commit_id,repo,msg,filename,diff,label,partition 3218043d6d3a019756607643cf65574fbfef5d7a,tensorflow/tensorflow,"Internal change PiperOrigin-RevId: 411896058 Change-Id: Ia031058247e3cf382957a6662d3f9e1cbb481ca2",op_level_cost_estimator.cc,"@@ -2153,7 +2153,7 @@ OpInfo::TensorProperties OpLevelCostEstimator::DescribeTensor( } /* static */ -OpLevelCostEstimator::ConvolutionDimensions +StatusOr OpLevelCostEstimator::OpDimensionsFromInputs( const TensorShapeProto& original_image_shape, const OpInfo& op_info, bool* found_unknown_shapes) { @@ -2190,6 +2190,11 @@ OpLevelCostEstimator::OpDimensionsFromInputs( std::vector strides = GetStrides(op_info); int64_t sx = strides[x_index]; int64_t sy = strides[y_index]; + if (sx == 0 || sy == 0) { + return errors::InvalidArgument( + ""Stride must be > 0 for Height and Width, but got ("", sy, "", "", sx, + "")""); + } const auto padding = GetPadding(op_info); int64_t ox = GetOutputSize(ix, kx, sx, padding); @@ -2206,8 +2211,9 @@ Status OpLevelCostEstimator::PredictMaxPool(const OpContext& op_context, bool found_unknown_shapes = false; const auto& op_info = op_context.op_info; // x: op_info.inputs(0) - ConvolutionDimensions dims = OpDimensionsFromInputs( - op_info.inputs(0).shape(), op_info, &found_unknown_shapes); + TF_ASSIGN_OR_RETURN(ConvolutionDimensions dims, + OpDimensionsFromInputs(op_info.inputs(0).shape(), op_info, + &found_unknown_shapes)); // kx * ky - 1 comparisons per output (kx * xy > 1) // or 1 copy per output (kx * k1 = 1). int per_output_ops = dims.kx * dims.ky == 1 ? 1 : dims.kx * dims.ky - 1; @@ -2248,8 +2254,9 @@ Status OpLevelCostEstimator::PredictMaxPoolGrad(const OpContext& op_context, op_info.ShortDebugString()); } - ConvolutionDimensions dims = OpDimensionsFromInputs( - op_info.inputs(0).shape(), op_info, &found_unknown_shapes); + TF_ASSIGN_OR_RETURN(ConvolutionDimensions dims, + OpDimensionsFromInputs(op_info.inputs(0).shape(), op_info, + &found_unknown_shapes)); int64_t ops = 0; if (dims.kx == 1 && dims.ky == 1) { @@ -2324,8 +2331,9 @@ Status OpLevelCostEstimator::PredictAvgPool(const OpContext& op_context, bool found_unknown_shapes = false; const auto& op_info = op_context.op_info; // x: op_info.inputs(0) - ConvolutionDimensions dims = OpDimensionsFromInputs( - op_info.inputs(0).shape(), op_info, &found_unknown_shapes); + TF_ASSIGN_OR_RETURN(ConvolutionDimensions dims, + OpDimensionsFromInputs(op_info.inputs(0).shape(), op_info, + &found_unknown_shapes)); // kx * ky - 1 additions and 1 multiplication per output. int64_t ops = dims.batch * dims.ox * dims.oy * dims.oz * dims.kx * dims.ky; @@ -2382,8 +2390,9 @@ Status OpLevelCostEstimator::PredictAvgPoolGrad(const OpContext& op_context, found_unknown_shapes = true; } - ConvolutionDimensions dims = - OpDimensionsFromInputs(x_shape, op_info, &found_unknown_shapes); + TF_ASSIGN_OR_RETURN( + ConvolutionDimensions dims, + OpDimensionsFromInputs(x_shape, op_info, &found_unknown_shapes)); int64_t ops = 0; if (dims.kx <= dims.sx && dims.ky <= dims.sy) { @@ -2409,8 +2418,9 @@ Status OpLevelCostEstimator::PredictFusedBatchNorm( // offset: op_info.inputs(2) // mean: op_info.inputs(3) --> only for inference // variance: op_info.inputs(4) --> only for inference - ConvolutionDimensions dims = OpDimensionsFromInputs( - op_info.inputs(0).shape(), op_info, &found_unknown_shapes); + TF_ASSIGN_OR_RETURN(ConvolutionDimensions dims, + OpDimensionsFromInputs(op_info.inputs(0).shape(), op_info, + &found_unknown_shapes)); const bool is_training = IsTraining(op_info); int64_t ops = 0; @@ -2459,8 +2469,9 @@ Status OpLevelCostEstimator::PredictFusedBatchNormGrad( // scale: op_info.inputs(2) // mean: op_info.inputs(3) // variance or inverse of variance: op_info.inputs(4) - ConvolutionDimensions dims = OpDimensionsFromInputs( - op_info.inputs(1).shape(), op_info, &found_unknown_shapes); + TF_ASSIGN_OR_RETURN(ConvolutionDimensions dims, + OpDimensionsFromInputs(op_info.inputs(1).shape(), op_info, + &found_unknown_shapes)); int64_t ops = 0; const auto rsqrt_cost = Eigen::internal::functor_traits< ",1,test 3218043d6d3a019756607643cf65574fbfef5d7a,tensorflow/tensorflow,"Internal change PiperOrigin-RevId: 411896058 Change-Id: Ia031058247e3cf382957a6662d3f9e1cbb481ca2",op_level_cost_estimator.h,"@@ -290,7 +290,7 @@ class OpLevelCostEstimator { bool* found_unknown_shapes); // For Pooling, FusedBatchNorm, and their grad ops. - static ConvolutionDimensions OpDimensionsFromInputs( + static StatusOr OpDimensionsFromInputs( const TensorShapeProto& original_image_shape, const OpInfo& op_info, bool* found_unknown_shapes); ",1,test 3218043d6d3a019756607643cf65574fbfef5d7a,tensorflow/tensorflow,"Internal change PiperOrigin-RevId: 411896058 Change-Id: Ia031058247e3cf382957a6662d3f9e1cbb481ca2",op_level_cost_estimator_test.cc,"@@ -24,6 +24,7 @@ limitations under the License. #include ""tensorflow/core/framework/tensor_shape.h"" #include ""tensorflow/core/framework/tensor_shape.pb.h"" #include ""tensorflow/core/framework/types.h"" +#include ""tensorflow/core/platform/status_matchers.h"" #include ""tensorflow/core/platform/test.h"" #include ""tensorflow/core/protobuf/device_properties.pb.h"" @@ -558,9 +559,10 @@ class OpLevelCostEstimatorTest : public ::testing::Test { } bool found_unknown_shapes; - auto dims = OpLevelCostEstimator::OpDimensionsFromInputs( - op_context.op_info.inputs(0).shape(), op_context.op_info, - &found_unknown_shapes); + TF_ASSERT_OK_AND_ASSIGN( + auto dims, OpLevelCostEstimator::OpDimensionsFromInputs( + op_context.op_info.inputs(0).shape(), op_context.op_info, + &found_unknown_shapes)); Padding padding_enum; if (padding == ""VALID"") { padding_enum = Padding::VALID; @@ -581,6 +583,38 @@ class OpLevelCostEstimatorTest : public ::testing::Test { EXPECT_EQ(padding_enum, dims.padding); } + StatusOr + CallOpDimensionsFromInputs(const int n, const int h, const int w, const int c, + const int kx, const int ky, const int sx, + const int sy, const string& data_format, + const string& padding) { + OpContext op_context; + + const std::vector x = {n, h, w, c}; + const std::vector ksize = {1, kx, ky, 1}; + std::vector strides; + if (data_format == ""NHWC"") { + strides = {1, sy, sx, 1}; + } else { + strides = {1, 1, sy, sx}; + } + + auto& op_info = op_context.op_info; + SetCpuDevice(&op_info); + op_info.set_op(""MaxPool""); + + DescribeTensor4D(x[0], x[1], x[2], x[3], op_info.add_inputs()); + auto* attr = op_info.mutable_attr(); + SetAttrValue(data_format, &(*attr)[""data_format""]); + SetAttrValue(padding, &(*attr)[""padding""]); + SetAttrValue(strides, &(*attr)[""strides""]); + SetAttrValue(ksize, &(*attr)[""ksize""]); + bool found_unknown_shapes; + return OpLevelCostEstimator::OpDimensionsFromInputs( + op_context.op_info.inputs(0).shape(), op_context.op_info, + &found_unknown_shapes); + } + OpLevelCostEstimator estimator_; }; @@ -1383,6 +1417,26 @@ TEST_F(OpLevelCostEstimatorTest, OpDimensionsFromInputs) { } } +TEST_F(OpLevelCostEstimatorTest, OpDimensionsFromInputsError) { + std::vector paddings = {""VALID"", ""SAME""}; + std::vector formats = {""NHWC"", ""NCHW""}; + for (const auto& p : paddings) { + for (const auto& f : formats) { + // n, h, w, c, kx, ky, sx, sy, data_format, padding. + ASSERT_THAT( + CallOpDimensionsFromInputs(10, 14, 14, 3840, 3, 3, 0, 2, f, p), + testing::StatusIs( + error::INVALID_ARGUMENT, + ""Stride must be > 0 for Height and Width, but got (2, 0)"")); + ASSERT_THAT( + CallOpDimensionsFromInputs(10, 14, 14, 3840, 3, 3, 2, 0, f, p), + testing::StatusIs( + error::INVALID_ARGUMENT, + ""Stride must be > 0 for Height and Width, but got (0, 2)"")); + } + } +} + TEST_F(OpLevelCostEstimatorTest, PredictMaxPool) { auto predict_max_pool = [this](const int n, const int in, const int c, const int k, const int s, ",1,test 23968a8bf65b009120c43b5ebcceaf52dbc9e943,tensorflow/tensorflow,"Fix out of bound access in DequantizeOp by adding check for axis < input dimension PiperOrigin-RevId: 411214268 Change-Id: I3249d2a69ddc82f182c589a3a5bbfb71543f4b29",dequantize_op.cc,"@@ -94,6 +94,11 @@ class DequantizeOp : public OpKernel { const Tensor& input_min_tensor = ctx->input(1); const Tensor& input_max_tensor = ctx->input(2); + OP_REQUIRES( + ctx, axis_ < input.dims(), + errors::InvalidArgument(""Axis must be less than input dimension("", + input.dims(), ""), got "", axis_)); + int num_slices = 1; if (axis_ > -1) { num_slices = input.dim_size(axis_); ",1,train b64638ec5ccaa77b7c1eb90958e3d85ce381f91b,tensorflow/tensorflow,"Fix Integer overflow error in Dequantize op shape function, by adding a bound check on axis. PiperOrigin-RevId: 412121389 Change-Id: I3088dbad9e90f9998d406b618c16694388a9dfb4",array_ops.cc,"@@ -24,6 +24,7 @@ limitations under the License. #include ""tensorflow/core/framework/types.h"" #include ""tensorflow/core/framework/types.pb.h"" #include ""tensorflow/core/lib/core/errors.h"" +#include ""tensorflow/core/platform/types.h"" #include ""tensorflow/core/util/mirror_pad_mode.h"" #include ""tensorflow/core/util/padding.h"" #include ""tensorflow/core/util/strided_slice_op.h"" @@ -3028,6 +3029,12 @@ REGISTER_OP(""Dequantize"") return errors::InvalidArgument(""axis should be at least -1, got "", axis); } + auto input_dims = c->Rank(c->input(0)); + if (axis > input_dims) { + return errors::InvalidArgument( + ""Axis must be less than input dimension("", input_dims, ""), got "", + axis); + } const int minmax_rank = (axis == -1) ? 0 : 1; TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); ShapeHandle minmax; @@ -3035,6 +3042,13 @@ REGISTER_OP(""Dequantize"") TF_RETURN_IF_ERROR(c->WithRank(c->input(2), minmax_rank, &minmax)); if (axis != -1) { ShapeHandle input; + if (axis >= kint32max) { + // Check int32 max bound for a corner case to prevent integer flow + // when input actually has kint32max rank and above bound check is not + // triggered. + return errors::InvalidArgument( + ""Axis cannot be >= kint32max value, got "", axis); + } TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input)); DimensionHandle depth; TF_RETURN_IF_ERROR( ",1,train b64638ec5ccaa77b7c1eb90958e3d85ce381f91b,tensorflow/tensorflow,"Fix Integer overflow error in Dequantize op shape function, by adding a bound check on axis. PiperOrigin-RevId: 412121389 Change-Id: I3088dbad9e90f9998d406b618c16694388a9dfb4",array_ops_test.py,"@@ -1704,6 +1704,21 @@ class QuantizeAndDequantizeTest(test_util.TensorFlowTestCase): output_grad = gradient_checker_v2.compute_gradient(f, [input_tensor]) self.assertAllClose(output_grad[0], np.zeros([1, 4, 4])) + def testOutOfBoundAxis(self): + input_tensor = constant_op.constant([1., 1.]) + input_min = [0] + input_max = [1] + q_input, _, _ = array_ops.quantize(input_tensor, 0, 1, dtypes.qint32) + error = (errors.InvalidArgumentError, ValueError) + with self.assertRaisesRegex(error, + r"".*Axis must be less than input dimension.*""): + self.evaluate( + gen_array_ops.dequantize( + input=q_input, + min_range=input_min, + max_range=input_max, + axis=2**31 - 1)) + @test_util.run_all_in_graph_and_eager_modes class SortedSearchTest(test_util.TensorFlowTestCase): ",1,train 37c01fb5e25c3d80213060460196406c43d31995,tensorflow/tensorflow,"Fix out of bound error in ReverseSequence Op shape function PiperOrigin-RevId: 411896080 Change-Id: I7e59a38e2f960886edf2b6c54ed5a84e86a9b193",array_ops.cc,"@@ -1653,11 +1653,21 @@ REGISTER_OP(""ReverseSequence"") return errors::InvalidArgument( ""batch_dim must be < input rank: "", batch_dim, "" vs. "", input_rank); } + if (seq_dim >= input_rank) { return errors::InvalidArgument( ""seq_dim must be < input rank: "", seq_dim, "" vs. "", input_rank); } + // To prevent out of bound access when calling c->Dim(input, batch_dim), + // batch_dim range [-1 * input rank, input rank) is allowed. However, + // the op implementation has a stricter bound for batch_dim requiring >= 0 + // value. Thus, perform strict check here. + if (batch_dim < 0) { + return errors::InvalidArgument(""batch_dim must be >=0, got "", + batch_dim); + } + DimensionHandle batch_dim_dim = c->Dim(input, batch_dim); TF_RETURN_IF_ERROR( c->Merge(batch_dim_dim, c->Dim(seq_lens_shape, 0), &batch_dim_dim)); ",1,train 58b34c6c8250983948b5a781b426f6aa01fd47af,tensorflow/tensorflow,"Fix integer overflow leading to divide by zero error in Unravel index kernel when dimensions product exceeds max int value. PiperOrigin-RevId: 413250052 Change-Id: I9450b6e8acecd2e881a64b882e2b7c70e8e9289a",unravel_index_op.cc,"@@ -13,6 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include + +#include ""tensorflow/core/framework/types.pb.h"" +#include ""tensorflow/core/platform/types.h"" #define EIGEN_USE_THREADS #include ""tensorflow/core/framework/op_kernel.h"" @@ -35,7 +39,8 @@ typedef Eigen::ThreadPoolDevice CPUDevice; template class UnravelIndexOp : public OpKernel { public: - explicit UnravelIndexOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + explicit UnravelIndexOp(OpKernelConstruction* ctx) + : OpKernel(ctx), dtidx_(DataTypeToEnum::v()) {} void Compute(OpKernelContext* ctx) override { const Tensor& indices_tensor = ctx->input(0); @@ -54,12 +59,31 @@ class UnravelIndexOp : public OpKernel { auto dims = dims_tensor.vec(); // Make sure dims does not contain a zero + double prod = 1; + uint64_t limit; + if (dtidx_ == DataType::DT_INT64) { + limit = kint64max; + } else { + limit = kint32max; + } + for (int i = 0; i < dims.size(); i++) { OP_REQUIRES( ctx, dims(i) != 0, errors::InvalidArgument(""Input dims cannot contain a dim of zero, "" ""but dims contains zero at index "", i)); + OP_REQUIRES(ctx, dims(i) > 0, + errors::InvalidArgument( + ""Input dims cannot be negative. Got dim = "", dims(i), + "" at index "", i)); + // Check interger overflow + OP_REQUIRES( + ctx, prod <= limit / dims(i), + errors::InvalidArgument(""Input dims product is causing integer "" + ""overflow: ("", + dims, "")"")); + prod = (prod * dims(i)); } // Check to make sure indices is not out of boundary @@ -132,6 +156,7 @@ class UnravelIndexOp : public OpKernel { strides_shifted.reshape(reshape).broadcast(bcast); } } + const DataType dtidx_; }; #define REGISTER_KERNEL(type) \ ",1,train 58b34c6c8250983948b5a781b426f6aa01fd47af,tensorflow/tensorflow,"Fix integer overflow leading to divide by zero error in Unravel index kernel when dimensions product exceeds max int value. PiperOrigin-RevId: 413250052 Change-Id: I9450b6e8acecd2e881a64b882e2b7c70e8e9289a",array_ops_test.py,"@@ -1580,6 +1580,20 @@ class UnravelIndexTest(test_util.TensorFlowTestCase): dims = constant_op.constant([3, 0], dtype=dtype) self.evaluate(array_ops.unravel_index(indices=indices, dims=dims)) + def testUnravelIndexIntegerOverflow(self): + with self.cached_session(): + for dtype in [dtypes.int32, dtypes.int64]: + with self.assertRaisesRegex( + errors.InvalidArgumentError, + r""Input dims product is causing integer overflow""): + indices = constant_op.constant(-0x100000, dtype=dtype) + if dtype == dtypes.int32: + value = 0x10000000 + else: + value = 0x7FFFFFFFFFFFFFFF + dims = constant_op.constant([value, value], dtype=dtype) + self.evaluate(array_ops.unravel_index(indices=indices, dims=dims)) + class GuaranteeConstOpTest(test_util.TensorFlowTestCase): ",1,train 002408c3696b173863228223d535f9de72a101a9,tensorflow/tensorflow,"Add negative bound check for row and column pooling_sequence in FractionalAvgPoolGrad op to avoid out of bound heap access PiperOrigin-RevId: 413837346 Change-Id: I2b86034101df31bee161abcb781755e236c7bccd",fractional_avg_pool_op.cc,"@@ -311,15 +311,26 @@ class FractionalAvgPoolGradOp : public OpKernel { for (int64_t b = 0; b < out_batch; ++b) { for (int64_t r = 0; r < out_rows; ++r) { const int64_t in_row_start = row_seq_tensor_flat(r); + int64_t in_row_end = overlapping_ ? row_seq_tensor_flat(r + 1) : row_seq_tensor_flat(r + 1) - 1; in_row_end = std::min(in_row_end, in_max_row_index); + OP_REQUIRES(context, in_row_start >= 0 && in_row_end >= 0, + errors::InvalidArgument( + ""Row sequence tensor values must not be negative, got "", + row_seq_tensor_flat)); + for (int64_t c = 0; c < out_cols; ++c) { const int64_t in_col_start = col_seq_tensor_flat(c); int64_t in_col_end = overlapping_ ? col_seq_tensor_flat(c + 1) : col_seq_tensor_flat(c + 1) - 1; in_col_end = std::min(in_col_end, in_max_col_index); + OP_REQUIRES( + context, in_col_start >= 0 && in_col_end >= 0, + errors::InvalidArgument( + ""Column sequence tensor values must not be negative, got "", + col_seq_tensor_flat)); const int64_t num_elements_in_pooling_cell = (in_row_end - in_row_start + 1) * (in_col_end - in_col_start + 1); const int64_t out_index = (b * out_rows + r) * out_cols + c; ",1,train 002408c3696b173863228223d535f9de72a101a9,tensorflow/tensorflow,"Add negative bound check for row and column pooling_sequence in FractionalAvgPoolGrad op to avoid out of bound heap access PiperOrigin-RevId: 413837346 Change-Id: I2b86034101df31bee161abcb781755e236c7bccd",fractional_avg_pool_op_test.py,"@@ -20,6 +20,7 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_nn_ops @@ -306,6 +307,32 @@ class FractionalAvgTest(test.TestCase): input_b, row_seq, col_seq, overlapping) self.assertSequenceEqual(expected.shape, actual.shape) + def testNegativeSeqValuesForGradOp(self): + with self.assertRaisesRegex( + errors.InvalidArgumentError, + r""Row sequence tensor values must not be negative.*""): + y = nn_ops.gen_nn_ops.fractional_avg_pool_grad( + orig_input_tensor_shape=[2, 2, 2, 2], + out_backprop=[[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, + 12]]]], + row_pooling_sequence=[-10, 1, 2, 3], + col_pooling_sequence=[1, 2, 3, 4], + overlapping=True) + + self.evaluate(y) + with self.assertRaisesRegex( + errors.InvalidArgumentError, + r""Column sequence tensor values must not be negative.*""): + z = nn_ops.gen_nn_ops.fractional_avg_pool_grad( + orig_input_tensor_shape=[2, 2, 2, 2], + out_backprop=[[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, + 12]]]], + row_pooling_sequence=[10, 1, 2, 3], + col_pooling_sequence=[1, 2, -3, 4], + overlapping=True) + + self.evaluate(z) + class FractionalAvgPoolGradTest(test.TestCase): """"""Tests for FractionalAvgPoolGrad. ",1,train 08d7b00c0a5a20926363849f611729f53f3ec022,tensorflow/tensorflow,"Fix Segfault in Concat V2 shape function. PiperOrigin-RevId: 412120654 Change-Id: I3ff915faea694f9ad8b00024e9af2de9909011be",common_shape_fns.cc,"@@ -2005,7 +2005,7 @@ Status ConcatShapeHelper(InferenceContext* c, int start_value_index, } // Minimum required number of dimensions. - const int min_rank = concat_dim < 0 ? -concat_dim : concat_dim + 1; + const int64 min_rank = concat_dim < 0 ? -concat_dim : concat_dim + 1; ShapeHandle output_before; ShapeHandle output_after; ",1,test 08d7b00c0a5a20926363849f611729f53f3ec022,tensorflow/tensorflow,"Fix Segfault in Concat V2 shape function. PiperOrigin-RevId: 412120654 Change-Id: I3ff915faea694f9ad8b00024e9af2de9909011be",concat_op_test.py,"@@ -16,6 +16,7 @@ import numpy as np +from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl @@ -570,6 +571,17 @@ class ConcatOpTest(test.TestCase): t2 = [2] gen_array_ops.concat_v2([t1, t2], 1).eval() + def testConcatInvalidAxisInTfFunction(self): + + @def_function.function + def concat_wrapper(): + y = gen_array_ops.concat_v2( + values=[[1, 2, 3], [4, 5, 6]], axis=0xb500005b) + return y + + with self.assertRaises(ValueError): + concat_wrapper() + def testConcatNegativeAxis(self): with test_util.use_gpu(): t1 = [[1, 2, 3], [4, 5, 6]] ",1,test e3749a6d5d1e8d11806d4a2e9cc3123d1a90b75e,tensorflow/tensorflow,"[tf.data] Set limit on number of threads used in threadpool_dataset. PiperOrigin-RevId: 410922677 Change-Id: Ib25814a99043ab10805b5d2d7088ae0e0b7b04fd",threadpool_dataset_op.cc,"@@ -39,6 +39,22 @@ namespace experimental { PrivateThreadPoolDatasetOp::kDatasetType; /* static */ constexpr const char* const PrivateThreadPoolDatasetOp::kDatasetOp; +namespace { +// To prevent integer overflow issues when allocating threadpool memory for an +// unreasonable number of threads. +constexpr int kThreadLimit = 65536; + +Status ValidateNumThreads(int32_t num_threads) { + if (num_threads < 0) { + return errors::InvalidArgument(""`num_threads` must be >= 0""); + } + if (num_threads >= kThreadLimit) { + return errors::InvalidArgument(""`num_threads` must be < "", kThreadLimit); + } + return Status::OK(); +} +} // namespace + class ThreadPoolResource : public ResourceBase { public: ThreadPoolResource(Env* env, const ThreadOptions& thread_options, @@ -83,9 +99,7 @@ class ThreadPoolHandleOp : public OpKernel { OP_REQUIRES_OK(ctx, ctx->GetAttr(""num_threads"", &num_threads_)); OP_REQUIRES_OK(ctx, ctx->GetAttr(""max_intra_op_parallelism"", &max_intra_op_parallelism_)); - OP_REQUIRES( - ctx, num_threads_ > 0, - errors::InvalidArgument(""`num_threads` must be greater than zero."")); + OP_REQUIRES_OK(ctx, ValidateNumThreads(num_threads_)); } // The resource is deleted from the resource manager only when it is private @@ -531,8 +545,7 @@ void PrivateThreadPoolDatasetOp::MakeDatasetFromOptions(OpKernelContext* ctx, DatasetBase* input, int32_t num_threads, DatasetBase** output) { - OP_REQUIRES(ctx, num_threads >= 0, - errors::InvalidArgument(""`num_threads` must be >= 0"")); + OP_REQUIRES_OK(ctx, ValidateNumThreads(num_threads)); *output = new Dataset(ctx, DatasetContext(DatasetContext::Params( {PrivateThreadPoolDatasetOp::kDatasetType, @@ -546,8 +559,7 @@ void PrivateThreadPoolDatasetOp::MakeDataset(OpKernelContext* ctx, int64_t num_threads = 0; OP_REQUIRES_OK( ctx, ParseScalarArgument(ctx, ""num_threads"", &num_threads)); - OP_REQUIRES(ctx, num_threads >= 0, - errors::InvalidArgument(""`num_threads` must be >= 0"")); + OP_REQUIRES_OK(ctx, ValidateNumThreads(num_threads)); *output = new Dataset(ctx, input, num_threads); } ",1,train f68fdab93fb7f4ddb4eb438c8fe052753c9413e8,tensorflow/tensorflow,"Add a check for pad width to be a positive value. PiperOrigin-RevId: 413275853 Change-Id: I261a8db9dabf5ce48a806a9e58129080c9fac619",string_ngrams_op.cc,"@@ -152,6 +152,16 @@ class StringNGramsOp : public tensorflow::OpKernel { // We don't have to worry about dynamic padding sizes here: if padding // was dynamic, every sequence would have had sufficient padding to // generate at least one ngram. + + // If reached here, pad_width should be > 0, pad_width_ = -1, + // which indicates max(ngram_widths) - 1 cannot be used here since + // ngram_width is not known. + OP_REQUIRES( + context, pad_width_ >= 0, + errors::InvalidArgument(""Pad width should be >= 0 when "" + ""preserve_short_sequences is True and "" + ""ngram_widths are not provided, got "", + pad_width_)); int ngram_width = data_length + 2 * pad_width_; auto output_start = &ngrams_data[output_start_idx]; int num_ngrams = 1; ",1,train f68fdab93fb7f4ddb4eb438c8fe052753c9413e8,tensorflow/tensorflow,"Add a check for pad width to be a positive value. PiperOrigin-RevId: 413275853 Change-Id: I261a8db9dabf5ce48a806a9e58129080c9fac619",raw_ops_test.py,"@@ -28,7 +28,6 @@ from tensorflow.python.platform import test @test_util.run_all_in_graph_and_eager_modes -@test_util.disable_tfrt class RawOpsTest(test.TestCase, parameterized.TestCase): def testSimple(self): @@ -63,8 +62,9 @@ class RawOpsTest(test.TestCase, parameterized.TestCase): @parameterized.parameters([[0, 8]], [[-1, 6]]) def testStringNGramsBadDataSplits(self, splits): data = [""aa"", ""bb"", ""cc"", ""dd"", ""ee"", ""ff""] - with self.assertRaisesRegex(errors.InvalidArgumentError, - ""Invalid split value""): + with self.assertRaisesRegex( + errors.InvalidArgumentError, + r""Invalid split value|First split value must be 0""): self.evaluate( gen_string_ops.string_n_grams( data=data, @@ -76,6 +76,25 @@ class RawOpsTest(test.TestCase, parameterized.TestCase): pad_width=0, preserve_short_sequences=False)) + def testStringSplit(self): + data = [""123456""] + data_splits = [0, 1] + separator = ""a"" * 15 + ngram_widths = [] + pad_width = -5 + left_pad = right_pad = """" + with self.assertRaisesRegex(errors.InvalidArgumentError, + ""Pad width should be >= 0""): + self.evaluate(gen_string_ops.string_n_grams( + data=data, + data_splits=data_splits, + separator=separator, + ngram_widths=ngram_widths, + left_pad=left_pad, + right_pad=right_pad, + pad_width=pad_width, + preserve_short_sequences=True)) + def testGetSessionHandle(self): if context.executing_eagerly(): with self.assertRaisesRegex( ",1,train f57315566d7094f322b784947093406c2aea0d7d,tensorflow/tensorflow,"Add a check for Key being scalar tensor for MapStage and OrderedMapStage ops. According to documentation[1][2], key must be int64 value, but this wasn't enforced and the ops would fail with check failure for non-scalar key value. [1]https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/ordered-map-stage [2]https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/map-stage PiperOrigin-RevId: 413822112 Change-Id: I9d118faf990e6361900aa32272eff486ad9f0e2e",map_stage_op.cc,"@@ -536,6 +536,11 @@ class MapStageOp : public OpKernel { OP_REQUIRES(ctx, key_tensor->NumElements() > 0, errors::InvalidArgument(""key must not be empty"")); + OP_REQUIRES(ctx, key_tensor->NumElements() == 1, + errors::InvalidArgument( + ""key must be an int64 scalar, got tensor with shape: "", + key_tensor->shape())); + // Create copy for insertion into Staging Area Tensor key(*key_tensor); ",1,test f57315566d7094f322b784947093406c2aea0d7d,tensorflow/tensorflow,"Add a check for Key being scalar tensor for MapStage and OrderedMapStage ops. According to documentation[1][2], key must be int64 value, but this wasn't enforced and the ops would fail with check failure for non-scalar key value. [1]https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/ordered-map-stage [2]https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/map-stage PiperOrigin-RevId: 413822112 Change-Id: I9d118faf990e6361900aa32272eff486ad9f0e2e",map_stage_op_test.py,"@@ -12,8 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -from tensorflow.python.framework import errors +import numpy as np + +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops @@ -28,7 +31,7 @@ class MapStageTest(test.TestCase): @test_util.run_deprecated_v1 def testSimple(self): - with ops.Graph().as_default() as G: + with ops.Graph().as_default() as g: with ops.device('/cpu:0'): x = array_ops.placeholder(dtypes.float32) pi = array_ops.placeholder(dtypes.int64) @@ -40,9 +43,9 @@ class MapStageTest(test.TestCase): k, y = stager.get(gi) y = math_ops.reduce_max(math_ops.matmul(y, y)) - G.finalize() + g.finalize() - with self.session(graph=G) as sess: + with self.session(graph=g) as sess: sess.run(stage, feed_dict={x: -1, pi: 0}) for i in range(10): _, yval = sess.run([stage, y], feed_dict={x: i, pi: i + 1, gi: i}) @@ -50,7 +53,7 @@ class MapStageTest(test.TestCase): @test_util.run_deprecated_v1 def testMultiple(self): - with ops.Graph().as_default() as G: + with ops.Graph().as_default() as g: with ops.device('/cpu:0'): x = array_ops.placeholder(dtypes.float32) pi = array_ops.placeholder(dtypes.int64) @@ -62,9 +65,9 @@ class MapStageTest(test.TestCase): k, (z, y) = stager.get(gi) y = math_ops.reduce_max(z * math_ops.matmul(y, y)) - G.finalize() + g.finalize() - with self.session(graph=G) as sess: + with self.session(graph=g) as sess: sess.run(stage, feed_dict={x: -1, pi: 0}) for i in range(10): _, yval = sess.run([stage, y], feed_dict={x: i, pi: i + 1, gi: i}) @@ -73,26 +76,25 @@ class MapStageTest(test.TestCase): @test_util.run_deprecated_v1 def testDictionary(self): - with ops.Graph().as_default() as G: + with ops.Graph().as_default() as g: with ops.device('/cpu:0'): x = array_ops.placeholder(dtypes.float32) pi = array_ops.placeholder(dtypes.int64) gi = array_ops.placeholder(dtypes.int64) v = 2. * (array_ops.zeros([128, 128]) + x) with ops.device(test.gpu_device_name()): - stager = data_flow_ops.MapStagingArea( - [dtypes.float32, dtypes.float32], - shapes=[[], [128, 128]], - names=['x', 'v']) + stager = data_flow_ops.MapStagingArea([dtypes.float32, dtypes.float32], + shapes=[[], [128, 128]], + names=['x', 'v']) stage = stager.put(pi, {'x': x, 'v': v}) key, ret = stager.get(gi) z = ret['x'] y = ret['v'] y = math_ops.reduce_max(z * math_ops.matmul(y, y)) - G.finalize() + g.finalize() - with self.session(graph=G) as sess: + with self.session(graph=g) as sess: sess.run(stage, feed_dict={x: -1, pi: 0}) for i in range(10): _, yval = sess.run([stage, y], feed_dict={x: i, pi: i + 1, gi: i}) @@ -102,7 +104,7 @@ class MapStageTest(test.TestCase): def testColocation(self): gpu_dev = test.gpu_device_name() - with ops.Graph().as_default() as G: + with ops.Graph().as_default() as g: with ops.device('/cpu:0'): x = array_ops.placeholder(dtypes.float32) v = 2. * (array_ops.zeros([128, 128]) + x) @@ -119,58 +121,56 @@ class MapStageTest(test.TestCase): self.assertEqual(y.device, '/device:CPU:0') self.assertEqual(z[0].device, '/device:CPU:0') - G.finalize() + g.finalize() @test_util.run_deprecated_v1 def testPeek(self): - with ops.Graph().as_default() as G: + with ops.Graph().as_default() as g: with ops.device('/cpu:0'): x = array_ops.placeholder(dtypes.int32, name='x') pi = array_ops.placeholder(dtypes.int64) gi = array_ops.placeholder(dtypes.int64) p = array_ops.placeholder(dtypes.int32, name='p') with ops.device(test.gpu_device_name()): - stager = data_flow_ops.MapStagingArea( - [ - dtypes.int32, - ], shapes=[[]]) + stager = data_flow_ops.MapStagingArea([ + dtypes.int32, + ], shapes=[[]]) stage = stager.put(pi, [x], [0]) peek = stager.peek(gi) size = stager.size() - G.finalize() + g.finalize() n = 10 - with self.session(graph=G) as sess: + with self.session(graph=g) as sess: for i in range(n): sess.run(stage, feed_dict={x: i, pi: i}) for i in range(n): - self.assertTrue(sess.run(peek, feed_dict={gi: i})[0] == i) + self.assertEqual(sess.run(peek, feed_dict={gi: i})[0], i) - self.assertTrue(sess.run(size) == 10) + self.assertEqual(sess.run(size), 10) @test_util.run_deprecated_v1 def testSizeAndClear(self): - with ops.Graph().as_default() as G: + with ops.Graph().as_default() as g: with ops.device('/cpu:0'): x = array_ops.placeholder(dtypes.float32, name='x') pi = array_ops.placeholder(dtypes.int64) gi = array_ops.placeholder(dtypes.int64) v = 2. * (array_ops.zeros([128, 128]) + x) with ops.device(test.gpu_device_name()): - stager = data_flow_ops.MapStagingArea( - [dtypes.float32, dtypes.float32], - shapes=[[], [128, 128]], - names=['x', 'v']) + stager = data_flow_ops.MapStagingArea([dtypes.float32, dtypes.float32], + shapes=[[], [128, 128]], + names=['x', 'v']) stage = stager.put(pi, {'x': x, 'v': v}) size = stager.size() clear = stager.clear() - G.finalize() + g.finalize() - with self.session(graph=G) as sess: + with self.session(graph=g) as sess: sess.run(stage, feed_dict={x: -1, pi: 3}) self.assertEqual(sess.run(size), 1) sess.run(stage, feed_dict={x: -1, pi: 1}) @@ -182,22 +182,23 @@ class MapStageTest(test.TestCase): def testCapacity(self): capacity = 3 - with ops.Graph().as_default() as G: + with ops.Graph().as_default() as g: with ops.device('/cpu:0'): x = array_ops.placeholder(dtypes.int32, name='x') pi = array_ops.placeholder(dtypes.int64, name='pi') gi = array_ops.placeholder(dtypes.int64, name='gi') with ops.device(test.gpu_device_name()): - stager = data_flow_ops.MapStagingArea( - [ - dtypes.int32, - ], capacity=capacity, shapes=[[]]) + stager = data_flow_ops.MapStagingArea([ + dtypes.int32, + ], + capacity=capacity, + shapes=[[]]) stage = stager.put(pi, [x], [0]) get = stager.get() size = stager.size() - G.finalize() + g.finalize() from six.moves import queue as Queue import threading @@ -205,7 +206,7 @@ class MapStageTest(test.TestCase): queue = Queue.Queue() n = 8 - with self.session(graph=G) as sess: + with self.session(graph=g) as sess: # Stage data in a separate thread which will block # when it hits the staging area's capacity and thus # not fill the queue with n tokens @@ -234,13 +235,13 @@ class MapStageTest(test.TestCase): capacity)) # Should have capacity elements in the staging area - self.assertTrue(sess.run(size) == capacity) + self.assertEqual(sess.run(size), capacity) # Clear the staging area completely for i in range(n): sess.run(get) - self.assertTrue(sess.run(size) == 0) + self.assertEqual(sess.run(size), 0) @test_util.run_deprecated_v1 def testMemoryLimit(self): @@ -248,28 +249,28 @@ class MapStageTest(test.TestCase): chunk = 200 * 1024 # 256K capacity = memory_limit // chunk - with ops.Graph().as_default() as G: + with ops.Graph().as_default() as g: with ops.device('/cpu:0'): x = array_ops.placeholder(dtypes.uint8, name='x') pi = array_ops.placeholder(dtypes.int64, name='pi') gi = array_ops.placeholder(dtypes.int64, name='gi') with ops.device(test.gpu_device_name()): - stager = data_flow_ops.MapStagingArea( - [dtypes.uint8], memory_limit=memory_limit, shapes=[[]]) + stager = data_flow_ops.MapStagingArea([dtypes.uint8], + memory_limit=memory_limit, + shapes=[[]]) stage = stager.put(pi, [x], [0]) get = stager.get() size = stager.size() - G.finalize() + g.finalize() from six.moves import queue as Queue import threading - import numpy as np queue = Queue.Queue() n = 8 - with self.session(graph=G) as sess: + with self.session(graph=g) as sess: # Stage data in a separate thread which will block # when it hits the staging area's capacity and thus # not fill the queue with n tokens @@ -299,56 +300,57 @@ class MapStageTest(test.TestCase): capacity)) # Should have capacity elements in the staging area - self.assertTrue(sess.run(size) == capacity) + self.assertEqual(sess.run(size), capacity) # Clear the staging area completely for i in range(n): sess.run(get) - self.assertTrue(sess.run(size) == 0) + self.assertEqual(sess.run(size), 0) @test_util.run_deprecated_v1 def testOrdering(self): import six import random - with ops.Graph().as_default() as G: + with ops.Graph().as_default() as g: with ops.device('/cpu:0'): x = array_ops.placeholder(dtypes.int32, name='x') pi = array_ops.placeholder(dtypes.int64, name='pi') gi = array_ops.placeholder(dtypes.int64, name='gi') with ops.device(test.gpu_device_name()): - stager = data_flow_ops.MapStagingArea( - [ - dtypes.int32, - ], shapes=[[]], ordered=True) + stager = data_flow_ops.MapStagingArea([ + dtypes.int32, + ], + shapes=[[]], + ordered=True) stage = stager.put(pi, [x], [0]) get = stager.get() size = stager.size() - G.finalize() + g.finalize() n = 10 - with self.session(graph=G) as sess: + with self.session(graph=g) as sess: # Keys n-1..0 keys = list(reversed(six.moves.range(n))) for i in keys: sess.run(stage, feed_dict={pi: i, x: i}) - self.assertTrue(sess.run(size) == n) + self.assertEqual(sess.run(size), n) # Check that key, values come out in ascending order for i, k in enumerate(reversed(keys)): get_key, values = sess.run(get) self.assertTrue(i == k == get_key == values) - self.assertTrue(sess.run(size) == 0) + self.assertEqual(sess.run(size), 0) @test_util.run_deprecated_v1 def testPartialDictInsert(self): - with ops.Graph().as_default() as G: + with ops.Graph().as_default() as g: with ops.device('/cpu:0'): x = array_ops.placeholder(dtypes.float32) f = array_ops.placeholder(dtypes.float32) @@ -366,41 +368,39 @@ class MapStageTest(test.TestCase): size = stager.size() isize = stager.incomplete_size() - G.finalize() + g.finalize() - with self.session(graph=G) as sess: + with self.session(graph=g) as sess: # 0 complete and incomplete entries - self.assertTrue(sess.run([size, isize]) == [0, 0]) + self.assertEqual(sess.run([size, isize]), [0, 0]) # Stage key 0, x and f tuple entries sess.run(stage_xf, feed_dict={pi: 0, x: 1, f: 2}) - self.assertTrue(sess.run([size, isize]) == [0, 1]) + self.assertEqual(sess.run([size, isize]), [0, 1]) # Stage key 1, x and f tuple entries sess.run(stage_xf, feed_dict={pi: 1, x: 1, f: 2}) - self.assertTrue(sess.run([size, isize]) == [0, 2]) + self.assertEqual(sess.run([size, isize]), [0, 2]) # Now complete key 0 with tuple entry v sess.run(stage_v, feed_dict={pi: 0, v: 1}) # 1 complete and 1 incomplete entry - self.assertTrue(sess.run([size, isize]) == [1, 1]) + self.assertEqual(sess.run([size, isize]), [1, 1]) # We can now obtain tuple associated with key 0 - self.assertTrue( - sess.run([key, ret], feed_dict={ - gi: 0 - }) == [0, { + self.assertEqual( + sess.run([key, ret], feed_dict={gi: 0}), + [0, { 'x': 1, 'f': 2, 'v': 1 }]) # 0 complete and 1 incomplete entry - self.assertTrue(sess.run([size, isize]) == [0, 1]) + self.assertEqual(sess.run([size, isize]), [0, 1]) # Now complete key 1 with tuple entry v sess.run(stage_v, feed_dict={pi: 1, v: 3}) # We can now obtain tuple associated with key 1 - self.assertTrue( - sess.run([key, ret], feed_dict={ - gi: 1 - }) == [1, { + self.assertEqual( + sess.run([key, ret], feed_dict={gi: 1}), + [1, { 'x': 1, 'f': 2, 'v': 3 @@ -408,7 +408,7 @@ class MapStageTest(test.TestCase): @test_util.run_deprecated_v1 def testPartialIndexInsert(self): - with ops.Graph().as_default() as G: + with ops.Graph().as_default() as g: with ops.device('/cpu:0'): x = array_ops.placeholder(dtypes.float32) f = array_ops.placeholder(dtypes.float32) @@ -424,35 +424,35 @@ class MapStageTest(test.TestCase): size = stager.size() isize = stager.incomplete_size() - G.finalize() + g.finalize() - with self.session(graph=G) as sess: + with self.session(graph=g) as sess: # 0 complete and incomplete entries - self.assertTrue(sess.run([size, isize]) == [0, 0]) + self.assertEqual(sess.run([size, isize]), [0, 0]) # Stage key 0, x and f tuple entries sess.run(stage_xf, feed_dict={pi: 0, x: 1, f: 2}) - self.assertTrue(sess.run([size, isize]) == [0, 1]) + self.assertEqual(sess.run([size, isize]), [0, 1]) # Stage key 1, x and f tuple entries sess.run(stage_xf, feed_dict={pi: 1, x: 1, f: 2}) - self.assertTrue(sess.run([size, isize]) == [0, 2]) + self.assertEqual(sess.run([size, isize]), [0, 2]) # Now complete key 0 with tuple entry v sess.run(stage_v, feed_dict={pi: 0, v: 1}) # 1 complete and 1 incomplete entry - self.assertTrue(sess.run([size, isize]) == [1, 1]) + self.assertEqual(sess.run([size, isize]), [1, 1]) # We can now obtain tuple associated with key 0 - self.assertTrue(sess.run([key, ret], feed_dict={gi: 0}) == [0, [1, 1, 2]]) + self.assertEqual(sess.run([key, ret], feed_dict={gi: 0}), [0, [1, 1, 2]]) # 0 complete and 1 incomplete entry - self.assertTrue(sess.run([size, isize]) == [0, 1]) + self.assertEqual(sess.run([size, isize]), [0, 1]) # Now complete key 1 with tuple entry v sess.run(stage_v, feed_dict={pi: 1, v: 3}) # We can now obtain tuple associated with key 1 - self.assertTrue(sess.run([key, ret], feed_dict={gi: 1}) == [1, [1, 3, 2]]) + self.assertEqual(sess.run([key, ret], feed_dict={gi: 1}), [1, [1, 3, 2]]) @test_util.run_deprecated_v1 def testPartialDictGetsAndPeeks(self): - with ops.Graph().as_default() as G: + with ops.Graph().as_default() as g: with ops.device('/cpu:0'): x = array_ops.placeholder(dtypes.float32) f = array_ops.placeholder(dtypes.float32) @@ -476,40 +476,38 @@ class MapStageTest(test.TestCase): size = stager.size() isize = stager.incomplete_size() - G.finalize() + g.finalize() - with self.session(graph=G) as sess: + with self.session(graph=g) as sess: # 0 complete and incomplete entries - self.assertTrue(sess.run([size, isize]) == [0, 0]) + self.assertEqual(sess.run([size, isize]), [0, 0]) # Stage key 0, x and f tuple entries sess.run(stage_xf, feed_dict={pi: 0, x: 1, f: 2}) - self.assertTrue(sess.run([size, isize]) == [0, 1]) + self.assertEqual(sess.run([size, isize]), [0, 1]) # Stage key 1, x and f tuple entries sess.run(stage_xf, feed_dict={pi: 1, x: 1, f: 2}) - self.assertTrue(sess.run([size, isize]) == [0, 2]) + self.assertEqual(sess.run([size, isize]), [0, 2]) # Now complete key 0 with tuple entry v sess.run(stage_v, feed_dict={pi: 0, v: 1}) # 1 complete and 1 incomplete entry - self.assertTrue(sess.run([size, isize]) == [1, 1]) + self.assertEqual(sess.run([size, isize]), [1, 1]) # We can now peek at 'x' and 'f' values associated with key 0 - self.assertTrue(sess.run(peek_xf, feed_dict={pei: 0}) == {'x': 1, 'f': 2}) + self.assertEqual(sess.run(peek_xf, feed_dict={pei: 0}), {'x': 1, 'f': 2}) # Peek at 'v' value associated with key 0 - self.assertTrue(sess.run(peek_v, feed_dict={pei: 0}) == {'v': 1}) + self.assertEqual(sess.run(peek_v, feed_dict={pei: 0}), {'v': 1}) # 1 complete and 1 incomplete entry - self.assertTrue(sess.run([size, isize]) == [1, 1]) + self.assertEqual(sess.run([size, isize]), [1, 1]) # We can now obtain 'x' and 'f' values associated with key 0 - self.assertTrue( - sess.run([key_xf, get_xf], feed_dict={ - gi: 0 - }) == [0, { + self.assertEqual( + sess.run([key_xf, get_xf], feed_dict={gi: 0}), [0, { 'x': 1, 'f': 2 }]) # Still have 1 complete and 1 incomplete entry - self.assertTrue(sess.run([size, isize]) == [1, 1]) + self.assertEqual(sess.run([size, isize]), [1, 1]) # We can no longer get 'x' and 'f' from key 0 with self.assertRaises(errors.InvalidArgumentError) as cm: @@ -517,40 +515,36 @@ class MapStageTest(test.TestCase): exc_str = (""Tensor at index '0' for key '0' "" 'has already been removed.') - self.assertTrue(exc_str in cm.exception.message) + self.assertIn(exc_str, cm.exception.message) # Obtain 'v' value associated with key 0 - self.assertTrue( - sess.run([key_v, get_v], feed_dict={ - gi: 0 - }) == [0, { + self.assertEqual( + sess.run([key_v, get_v], feed_dict={gi: 0}), [0, { 'v': 1 }]) # 0 complete and 1 incomplete entry - self.assertTrue(sess.run([size, isize]) == [0, 1]) + self.assertEqual(sess.run([size, isize]), [0, 1]) # Now complete key 1 with tuple entry v sess.run(stage_v, feed_dict={pi: 1, v: 1}) # 1 complete and 1 incomplete entry - self.assertTrue(sess.run([size, isize]) == [1, 0]) + self.assertEqual(sess.run([size, isize]), [1, 0]) # Pop without key to obtain 'x' and 'f' values associated with key 1 - self.assertTrue(sess.run([pop_key_xf, pop_xf]) == [1, {'x': 1, 'f': 2}]) + self.assertEqual(sess.run([pop_key_xf, pop_xf]), [1, {'x': 1, 'f': 2}]) # still 1 complete and 1 incomplete entry - self.assertTrue(sess.run([size, isize]) == [1, 0]) + self.assertEqual(sess.run([size, isize]), [1, 0]) # We can now obtain 'x' and 'f' values associated with key 1 - self.assertTrue( - sess.run([pop_key_v, pop_v], feed_dict={ - pi: 1 - }) == [1, { + self.assertEqual( + sess.run([pop_key_v, pop_v], feed_dict={pi: 1}), [1, { 'v': 1 }]) # Nothing is left - self.assertTrue(sess.run([size, isize]) == [0, 0]) + self.assertEqual(sess.run([size, isize]), [0, 0]) @test_util.run_deprecated_v1 def testPartialIndexGets(self): - with ops.Graph().as_default() as G: + with ops.Graph().as_default() as g: with ops.device('/cpu:0'): x = array_ops.placeholder(dtypes.float32) f = array_ops.placeholder(dtypes.float32) @@ -568,28 +562,72 @@ class MapStageTest(test.TestCase): size = stager.size() isize = stager.incomplete_size() - G.finalize() + g.finalize() - with self.session(graph=G) as sess: + with self.session(graph=g) as sess: # Stage complete tuple sess.run(stage_xvf, feed_dict={pi: 0, x: 1, f: 2, v: 3}) - self.assertTrue(sess.run([size, isize]) == [1, 0]) + self.assertEqual(sess.run([size, isize]), [1, 0]) # Partial get using indices - self.assertTrue( - sess.run([key_xf, get_xf], feed_dict={ - gi: 0 - }) == [0, [1, 2]]) + self.assertEqual( + sess.run([key_xf, get_xf], feed_dict={gi: 0}), [0, [1, 2]]) # Still some of key 0 left - self.assertTrue(sess.run([size, isize]) == [1, 0]) + self.assertEqual(sess.run([size, isize]), [1, 0]) # Partial get of remaining index - self.assertTrue(sess.run([key_v, get_v], feed_dict={gi: 0}) == [0, [3]]) + self.assertEqual(sess.run([key_v, get_v], feed_dict={gi: 0}), [0, [3]]) # All gone - self.assertTrue(sess.run([size, isize]) == [0, 0]) + self.assertEqual(sess.run([size, isize]), [0, 0]) + + @test_util.run_deprecated_v1 + def testNonScalarKeyOrderedMap(self): + with ops.Graph().as_default() as g: + x = array_ops.placeholder(dtypes.float32) + v = 2. * (array_ops.zeros([128, 128]) + x) + t = data_flow_ops.gen_data_flow_ops.ordered_map_stage( + key=constant_op.constant(value=[1], shape=(1, 3), dtype=dtypes.int64), + indices=np.array([[6]]), + values=[x, v], + dtypes=[dtypes.int64], + capacity=0, + memory_limit=0, + container='container1', + shared_name='', + name=None) + + g.finalize() + + with self.session(graph=g) as sess: + with self.assertRaisesRegex(errors.InvalidArgumentError, + 'key must be an int64 scalar'): + sess.run(t, feed_dict={x: 1}) + + @test_util.run_deprecated_v1 + def testNonScalarKeyUnorderedMap(self): + with ops.Graph().as_default() as g: + x = array_ops.placeholder(dtypes.float32) + v = 2. * (array_ops.zeros([128, 128]) + x) + t = data_flow_ops.gen_data_flow_ops.map_stage( + key=constant_op.constant(value=[1], shape=(1, 3), dtype=dtypes.int64), + indices=np.array([[6]]), + values=[x, v], + dtypes=[dtypes.int64], + capacity=0, + memory_limit=0, + container='container1', + shared_name='', + name=None) + + g.finalize() + + with self.session(graph=g) as sess: + with self.assertRaisesRegex(errors.InvalidArgumentError, + 'key must be an int64 scalar'): + sess.run(t, feed_dict={x: 1}) if __name__ == '__main__': ",1,test ba4e8ac4dc2991e350d5cc407f8598c8d4ee70fb,tensorflow/tensorflow,"Fix potential divide by zero error when executing FractionalMaxPool, when pooling ratio is higher than input size for a particular dimension. PiperOrigin-RevId: 412151722 Change-Id: I06e57cbb8eca43816eff79eac264fa7aae8f7163",fractional_max_pool_op.cc,"@@ -83,6 +83,13 @@ class FractionalMaxPoolOp : public OpKernel { std::vector output_size(tensor_in_and_out_dims); for (int i = 0; i < tensor_in_and_out_dims; ++i) { input_size[i] = tensor_in.dim_size(i); + + OP_REQUIRES( + context, input_size[i] >= pooling_ratio_[i], + errors::InvalidArgument(""Pooling ratio is higher than input "" + ""dimension size for dimension "", + i, "". Input dim size: "", input_size[i], + "" pooling ratio: "", pooling_ratio_[i])); } // Output size. for (int i = 0; i < tensor_in_and_out_dims; ++i) { ",1,train ba4e8ac4dc2991e350d5cc407f8598c8d4ee70fb,tensorflow/tensorflow,"Fix potential divide by zero error when executing FractionalMaxPool, when pooling ratio is higher than input size for a particular dimension. PiperOrigin-RevId: 412151722 Change-Id: I06e57cbb8eca43816eff79eac264fa7aae8f7163",fractional_max_pool_op_test.py,"@@ -20,6 +20,7 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_nn_ops @@ -319,6 +320,24 @@ class FractionalMaxPoolTest(test.TestCase): nn_ops.fractional_max_pool( rand_mat, [1, 1.5, 1.5, 1], seed=1, seed2=1, deterministic=True) + def testPoolingRatio(self): + with self.cached_session() as _: + with self.assertRaisesRegex( + errors.InvalidArgumentError, + r""Pooling ratio is higher than input dimension size for dimension 1.*"" + ): + result = nn_ops.gen_nn_ops.fractional_max_pool( + value=constant_op.constant( + value=[[[[1, 4, 2, 3]]]], dtype=dtypes.int64), + pooling_ratio=[1.0, 1.44, 1.73, 1.0], + pseudo_random=False, + overlapping=False, + deterministic=False, + seed=0, + seed2=0, + name=None) + self.evaluate(result) + class FractionalMaxPoolGradTest(test.TestCase): """"""Tests for FractionalMaxPoolGrad. ",1,train 965b97e4a9650495cda5a8c210ef6684b4b9eceb,tensorflow/tensorflow,"Properly validate sparse tensor in `SparseTensorSliceDataset` Existing validation was incomplete. PiperOrigin-RevId: 415375048 Change-Id: I14cd18f29ede73286f3ffac35171bd15828997e9",sparse_tensor_slice_dataset_op.cc,"@@ -240,28 +240,29 @@ class SparseTensorSliceDatasetOp : public DatasetOpKernel { OP_REQUIRES_OK(ctx, ctx->input(""dense_shape"", &dense_shape)); OP_REQUIRES(ctx, TensorShapeUtils::IsMatrix(indices->shape()), - errors::InvalidArgument( - ""Input indices should be a matrix but received shape "", - indices->shape().DebugString())); - - const auto num_indices = indices->NumElements(); - const auto num_values = values->NumElements(); - if (num_indices == 0 || num_values == 0) { - OP_REQUIRES(ctx, num_indices == num_values, - errors::InvalidArgument( - ""If indices or values are empty, the other one must also "" - ""be. Got indices of shape "", - indices->shape().DebugString(), "" and values of shape "", - values->shape().DebugString())); - } + errors::InvalidArgument(""Input indices must be a matrix. Got: "", + indices->shape().DebugString())); OP_REQUIRES(ctx, TensorShapeUtils::IsVector(values->shape()), - errors::InvalidArgument( - ""Input values should be a vector but received shape "", - indices->shape().DebugString())); + errors::InvalidArgument(""Input values must be a vector. Got: "", + values->shape().DebugString())); OP_REQUIRES(ctx, TensorShapeUtils::IsVector(dense_shape->shape()), + errors::InvalidArgument(""Input shape must be a vector. Got: "", + dense_shape->shape().DebugString())); + OP_REQUIRES( + ctx, values->shape().dim_size(0) == indices->shape().dim_size(0), + errors::InvalidArgument( + ""Number of values must match first dimension of indices. "", ""Got "", + values->shape().dim_size(0), + "" values, indices shape: "", indices->shape().DebugString())); + OP_REQUIRES( + ctx, dense_shape->shape().dim_size(0) == indices->shape().dim_size(1), + errors::InvalidArgument( + ""Number of dimensions must match second dimension of indices. "", + ""Got "", dense_shape->shape().dim_size(0), + "" dimensions, indices shape: "", indices->shape().DebugString())); + OP_REQUIRES(ctx, dense_shape->NumElements() > 0, errors::InvalidArgument( - ""Input shape should be a vector but received shape "", - dense_shape->shape().DebugString())); + ""The shape argument requires at least one element."")); // We currently ensure that `sparse_tensor` is ordered in the // batch dimension. ",1,train 965b97e4a9650495cda5a8c210ef6684b4b9eceb,tensorflow/tensorflow,"Properly validate sparse tensor in `SparseTensorSliceDataset` Existing validation was incomplete. PiperOrigin-RevId: 415375048 Change-Id: I14cd18f29ede73286f3ffac35171bd15828997e9",from_sparse_tensor_slices_test.py,"@@ -134,6 +134,25 @@ class FromSparseTensorSlicesTest(test_base.DatasetTestBase, with self.assertRaises(errors.InvalidArgumentError): sess.run(init_op, feed_dict={st: sparse_feed}) + @combinations.generate(combinations.combine(tf_api_version=1, mode=[""graph""])) + def testEmptySparseTensorSlicesInvalid2(self): + """"""Test a dataset based on invalid `tf.sparse.SparseTensor`."""""" + st = array_ops.sparse_placeholder(dtypes.float64) + iterator = dataset_ops.make_initializable_iterator( + dataset_ops.Dataset.from_sparse_tensor_slices(st)) + init_op = iterator.initializer + + with self.cached_session() as sess: + # Test with an empty sparse tensor but with non empty values. + empty_indices = [[]] + empty_values = [] + dense_shape = [1, 1] + sparse_feed = sparse_tensor.SparseTensorValue(empty_indices, empty_values, + dense_shape) + # Here, we expect the test to fail when running the feed. + with self.assertRaises(errors.InvalidArgumentError): + sess.run(init_op, feed_dict={st: sparse_feed}) + @combinations.generate(combinations.combine(tf_api_version=2, mode=[""eager""])) def testFromSparseTensorSlicesError(self): with self.assertRaises(AttributeError): ",1,train 7019ce4f68925fd01cdafde26f8d8c938f47e6f9,tensorflow/tensorflow,"Fix check-fail when bincount ops are passed invalid values. PiperOrigin-RevId: 415063028 Change-Id: I20f8dc09933ddca1111c4efbf9a3a1e863215d02",bincount_op.cc,"@@ -276,6 +276,9 @@ class DenseBincountOp : public OpKernel { const Tensor& size_t = ctx->input(1); const Tensor& weights = ctx->input(2); + OP_REQUIRES(ctx, size_t.dims() == 0, + errors::InvalidArgument(""Shape must be rank 0 but is rank "", + size_t.dims())); Tidx size = size_t.scalar()(); OP_REQUIRES( ctx, size >= 0, @@ -372,6 +375,9 @@ class SparseBincountOp : public OpKernel { const auto weights = ctx->input(4).flat(); const int64_t weights_size = weights.size(); + OP_REQUIRES(ctx, size_t.dims() == 0, + errors::InvalidArgument(""Shape must be rank 0 but is rank "", + size_t.dims())); Tidx size = size_t.scalar()(); OP_REQUIRES( ctx, size >= 0, @@ -462,6 +468,9 @@ class RaggedBincountOp : public OpKernel { const auto weights = ctx->input(3).flat(); const int64_t weights_size = weights.size(); + OP_REQUIRES(ctx, size_t.dims() == 0, + errors::InvalidArgument(""Shape must be rank 0 but is rank "", + size_t.dims())); Tidx size = size_t.scalar()(); OP_REQUIRES( ctx, size >= 0, ",1,test 7019ce4f68925fd01cdafde26f8d8c938f47e6f9,tensorflow/tensorflow,"Fix check-fail when bincount ops are passed invalid values. PiperOrigin-RevId: 415063028 Change-Id: I20f8dc09933ddca1111c4efbf9a3a1e863215d02",math_ops.cc,"@@ -1699,6 +1699,11 @@ REGISTER_OP(""Bincount"") return Status::OK(); } + if (size_tensor->dims() != 0) { + return errors::InvalidArgument(""Shape must be rank 0 but is rank "", + size_tensor->dims()); + } + // Return `[size]` shape if size is known. int32_t size_val = size_tensor->scalar()(); if (size_val < 0) { @@ -1730,6 +1735,10 @@ REGISTER_OP(""DenseBincount"") c->set_output(0, c->UnknownShape()); return Status::OK(); } + if (size_tensor->dims() != 0) { + return errors::InvalidArgument(""Shape must be rank 0 but is rank "", + size_tensor->dims()); + } int64_t size_val; DataType dtype; @@ -1771,6 +1780,10 @@ REGISTER_OP(""SparseBincount"") c->set_output(0, c->UnknownShape()); return Status::OK(); } + if (size_tensor->dims() != 0) { + return errors::InvalidArgument(""Shape must be rank 0 but is rank "", + size_tensor->dims()); + } int64_t size_val; DataType dtype; ",1,test 7019ce4f68925fd01cdafde26f8d8c938f47e6f9,tensorflow/tensorflow,"Fix check-fail when bincount ops are passed invalid values. PiperOrigin-RevId: 415063028 Change-Id: I20f8dc09933ddca1111c4efbf9a3a1e863215d02",bincount_op_test.py,"@@ -344,6 +344,14 @@ class BincountOpTest(test_util.TensorFlowTestCase, parameterized.TestCase): gen_math_ops.dense_bincount( input=[[[1, 2, 3], [0, 3, 2]]], weights=[], size=10)) + @test_util.run_in_graph_and_eager_modes + def test_size_is_not_scalar(self): # b/206619828 + with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError), + ""Shape must be rank 0 but is rank 1""): + self.evaluate( + gen_math_ops.dense_bincount( + input=[0], size=[1, 1], weights=[3], binary_output=False)) + class SparseBincountOpTest(test_util.TensorFlowTestCase, parameterized.TestCase): @@ -511,6 +519,19 @@ class SparseBincountOpTest(test_util.TensorFlowTestCase, weights=[], binary_output=True))) + @test_util.run_in_graph_and_eager_modes + def test_size_is_not_scalar(self): # b/206619828 + with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError), + ""Shape must be rank 0 but is rank 1""): + self.evaluate( + gen_math_ops.sparse_bincount( + indices=[[0], [1]], + values=[0, 0], + dense_shape=[1, 1], + size=[1, 1], + weights=[0, 0], + binary_output=False)) + class RaggedBincountOpTest(test_util.TensorFlowTestCase, parameterized.TestCase): @@ -650,6 +671,19 @@ class RaggedBincountOpTest(test_util.TensorFlowTestCase, size=size, binary_output=True))) + @test_util.run_in_graph_and_eager_modes + def test_size_is_not_scalar(self): # b/206619828 + with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError), + ""Shape must be rank 0 but is rank 1""): + self.evaluate( + gen_math_ops.ragged_bincount( + splits=[0, 0, 1], + values=[1], + size=[1, 1], + weights=[0, 0, 0], + binary_output=False, + name=None)) + if __name__ == ""__main__"": googletest.main() ",1,test 6f4d3e8139ec724dbbcb40505891c81dd1052c4a,tensorflow/tensorflow,"Prevent crash due to integer overflow followed by allocating negative sized array. PiperOrigin-RevId: 414891322 Change-Id: I5df390e0dc1d9f115209293708950cdf9306931c",count_ops.cc,"@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include + #include ""absl/container/flat_hash_map.h"" #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/op_requires.h"" @@ -23,6 +25,9 @@ limitations under the License. namespace tensorflow { +// Don't allocate too large `BatchedMap` objects +static int kMaxBatches = std::numeric_limits::max(); + template using BatchedMap = std::vector>; @@ -235,6 +240,10 @@ class SparseCount : public OpKernel { bool is_1d = shape.NumElements() == 1; int num_batches = is_1d ? 1 : shape_vector(0); + OP_REQUIRES( + context, 0 < num_batches && num_batches < kMaxBatches, + errors::InvalidArgument(""Cannot allocate "", num_batches, + "" batches, is the dense shape too wide?"")); const auto values_values = values.flat(); const auto weight_values = weights.flat(); ",1,train 53b0dd6dc5957652f35964af16b892ec9af4a559,tensorflow/tensorflow,"Fix nullptr exception in QuantizedMaxPool op when empty list is sent to min_input or max_input parameters. PiperOrigin-RevId: 413960973 Change-Id: I9e3ded593f3c4eabf0d6d5dc356e6a19a3ad2682",quantized_pooling_ops.cc,"@@ -15,6 +15,8 @@ limitations under the License. // See docs in ../ops/nn_ops.cc. +#include ""tensorflow/core/framework/op_requires.h"" +#include ""tensorflow/core/platform/errors.h"" #define EIGEN_USE_THREADS #include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" @@ -117,6 +119,18 @@ class QuantizedMaxPoolingOp : public MaxPoolingOp { : MaxPoolingOp(context) {} void Compute(OpKernelContext* context) override { + auto min_input_tensor = context->input(1); + auto max_input_tensor = context->input(2); + OP_REQUIRES( + context, min_input_tensor.NumElements() == 1, + errors::InvalidArgument( + ""min_input must be a scalar float value, got tensor with shape "", + min_input_tensor.shape())); + OP_REQUIRES( + context, max_input_tensor.NumElements() == 1, + errors::InvalidArgument( + ""max_input must be a scalar float value, got tensor with shape "", + max_input_tensor.shape())); const float min_input = context->input(1).flat()(0); const float max_input = context->input(2).flat()(0); MaxPoolingOp::Compute(context); ",1,train 2b7100d6cdff36aa21010a82269bc05a6d1cc74a,tensorflow/tensorflow,"Cleanup and remove duplicate validation in `SparseCount`. We have valdiation that is duplicated, checking different conditions, in different formats and failing to capture all cases. This should fix all the previous bugs. PiperOrigin-RevId: 414886981 Change-Id: Ibf0bba0beb057b76d505324bb9487565daf95f01",count_ops.cc,"@@ -185,6 +185,27 @@ class SparseCount : public OpKernel { errors::InvalidArgument( ""Input indices must be a 2-dimensional tensor. Got: "", indices.shape().DebugString())); + OP_REQUIRES(context, TensorShapeUtils::IsVector(values.shape()), + errors::InvalidArgument(""Input values must be a vector. Got: "", + values.shape().DebugString())); + OP_REQUIRES(context, TensorShapeUtils::IsVector(shape.shape()), + errors::InvalidArgument(""Input shape must be a vector. Got: "", + shape.shape().DebugString())); + OP_REQUIRES(context, + values.shape().dim_size(0) == indices.shape().dim_size(0), + errors::InvalidArgument( + ""Number of values must match first dimension of indices."", + ""Got "", values.shape().dim_size(0), + "" values, indices shape: "", indices.shape().DebugString())); + OP_REQUIRES( + context, shape.shape().dim_size(0) == indices.shape().dim_size(1), + errors::InvalidArgument( + ""Number of dimensions must match second dimension of indices."", + ""Got "", shape.shape().dim_size(0), + "" dimensions, indices shape: "", indices.shape().DebugString())); + OP_REQUIRES(context, shape.NumElements() > 0, + errors::InvalidArgument( + ""The shape argument requires at least one element."")); if (use_weights) { OP_REQUIRES( @@ -195,28 +216,11 @@ class SparseCount : public OpKernel { ""; values shape: "", values.shape().DebugString())); } - OP_REQUIRES(context, shape.NumElements() != 0, - errors::InvalidArgument( - ""The shape argument requires at least one element."")); - bool is_1d = shape.NumElements() == 1; auto shape_vector = shape.flat(); int num_batches = is_1d ? 1 : shape_vector(0); int num_values = values.NumElements(); - for (int b = 0; b < shape_vector.size(); b++) { - OP_REQUIRES(context, shape_vector(b) >= 0, - errors::InvalidArgument( - ""Elements in dense_shape must be >= 0. Instead got:"", - shape.DebugString())); - } - - OP_REQUIRES(context, num_values == indices.shape().dim_size(0), - errors::InvalidArgument( - ""Number of values must match first dimension of indices."", - ""Got "", num_values, - "" values, indices shape: "", indices.shape().DebugString())); - const auto indices_values = indices.matrix(); const auto values_values = values.flat(); const auto weight_values = weights.flat(); @@ -225,16 +229,6 @@ class SparseCount : public OpKernel { T max_value = 0; - OP_REQUIRES(context, num_values <= indices.shape().dim_size(0), - errors::InvalidArgument( - ""The first dimension of indices must be equal to or "" - ""greather than number of values. ( "", - indices.shape().dim_size(0), "" vs. "", num_values, "" )"")); - OP_REQUIRES(context, indices.shape().dim_size(1) > 0, - errors::InvalidArgument(""The second dimension of indices must "" - ""be greater than 0. Received: "", - indices.shape().dim_size(1))); - for (int idx = 0; idx < num_values; ++idx) { int batch = is_1d ? 0 : indices_values(idx, 0); if (batch >= num_batches) { ",1,train adbbabdb0d3abb3cdeac69e38a96de1d678b24b3,tensorflow/tensorflow,"Further validate sparse tensor for `SparseCount`: indices must be valid within dense shape. PiperOrigin-RevId: 414888122 Change-Id: I4552bd74c135ecd4bcb5448acc0a3ce9402d8286",count_ops.cc,"@@ -206,6 +206,23 @@ class SparseCount : public OpKernel { OP_REQUIRES(context, shape.NumElements() > 0, errors::InvalidArgument( ""The shape argument requires at least one element."")); + // Validate indices: each index must be valid for the corresponding + // dimension. This could be possibly done better. + const auto indices_values = indices.matrix(); + const auto shape_vector = shape.vec(); + int num_values = values.NumElements(); // same as first dim of indices + int rank = indices.shape().dim_size(1); + for (int i = 0; i < num_values; ++i) { + for (int j = 0; j < rank; ++j) { + OP_REQUIRES( + context, + indices_values(i, j) >= 0 && indices_values(i, j) < shape_vector(j), + errors::InvalidArgument( + ""Invalid index value at "", i, "": dimension "", j, "" has value "", + indices_values(i, j), "" which is not in [0, "", shape_vector(j), + "") (as given by dense shape "", shape.DebugString())); + } + } if (use_weights) { OP_REQUIRES( @@ -217,11 +234,8 @@ class SparseCount : public OpKernel { } bool is_1d = shape.NumElements() == 1; - auto shape_vector = shape.flat(); int num_batches = is_1d ? 1 : shape_vector(0); - int num_values = values.NumElements(); - const auto indices_values = indices.matrix(); const auto values_values = values.flat(); const auto weight_values = weights.flat(); ",1,test e5b0eec199c2d03de54fd6a7fd9275692218e2bc,tensorflow/tensorflow,"[lite] Add validation check for dilation height/width to be positive integers. PiperOrigin-RevId: 416429178 Change-Id: If7cdcddca54486434d9b2f06e7e2b401d7c3ee25",depthwise_conv.cc,"@@ -115,6 +115,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4); TF_LITE_ENSURE_EQ(context, NumDimensions(filter), 4); + TF_LITE_ENSURE(context, params->dilation_height_factor > 0); + TF_LITE_ENSURE(context, params->dilation_width_factor > 0); const TfLiteType data_type = input->type; ",1,train 8c6f391a2282684a25cbfec7687bd5d35261a209,tensorflow/tensorflow,"[lite] Add check for bias_size is zero to avoid division by zero. This shouldn't happen for properly converted models. Just safety check PiperOrigin-RevId: 416383645 Change-Id: If8e508bf696ae8ecfb927e69c139a8ccf7fe60cb",common.h,"@@ -75,6 +75,7 @@ float ActivationFunction(float x) { inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size, const float* bias_data, int array_size, float* array_data) { + if (bias_size == 0) return; // Note: see b/132215220: in May 2019 we thought it would be OK to replace // this with the Eigen one-liner: // return (array.colwise() + bias).cwiseMin(clamp_max).cwiseMin(clamp_max). ",1,train a1e1511dde36b3f8aa27a6ec630838e7ea40e091,tensorflow/tensorflow,"[lite] Update TfLiteIntArrayCreate to return size_t PiperOrigin-RevId: 416439896 Change-Id: I847f69b68d1ddaff4b1e925a09b8b69c1756653b",common.c,"@@ -21,10 +21,10 @@ limitations under the License. #include #endif // TF_LITE_STATIC_MEMORY -int TfLiteIntArrayGetSizeInBytes(int size) { +size_t TfLiteIntArrayGetSizeInBytes(int size) { static TfLiteIntArray dummy; - int computed_size = sizeof(dummy) + sizeof(dummy.data[0]) * size; + size_t computed_size = sizeof(dummy) + sizeof(dummy.data[0]) * size; #if defined(_MSC_VER) // Context for why this is needed is in http://b/189926408#comment21 computed_size -= sizeof(dummy.data[0]); @@ -51,7 +51,7 @@ int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size, #ifndef TF_LITE_STATIC_MEMORY TfLiteIntArray* TfLiteIntArrayCreate(int size) { - int alloc_size = TfLiteIntArrayGetSizeInBytes(size); + size_t alloc_size = TfLiteIntArrayGetSizeInBytes(size); if (alloc_size <= 0) return NULL; TfLiteIntArray* ret = (TfLiteIntArray*)malloc(alloc_size); if (!ret) return ret; ",1,train a1e1511dde36b3f8aa27a6ec630838e7ea40e091,tensorflow/tensorflow,"[lite] Update TfLiteIntArrayCreate to return size_t PiperOrigin-RevId: 416439896 Change-Id: I847f69b68d1ddaff4b1e925a09b8b69c1756653b",common.h,"@@ -98,7 +98,7 @@ typedef struct TfLiteIntArray { // Given the size (number of elements) in a TfLiteIntArray, calculate its size // in bytes. -int TfLiteIntArrayGetSizeInBytes(int size); +size_t TfLiteIntArrayGetSizeInBytes(int size); #ifndef TF_LITE_STATIC_MEMORY // Create a array of a given `size` (uninitialized entries). ",1,train 1de49725a5fc4e48f1a3b902ec3599ee99283043,tensorflow/tensorflow,"[lite] Check for overflow when creating required bytes. PiperOrigin-RevId: 417629001 Change-Id: Ia7feb3ea8e988f4fd4b3c98c1a1fed4557d99fd7",embedding_lookup_sparse.cc,"@@ -72,6 +72,7 @@ limitations under the License. #include ""tensorflow/lite/kernels/internal/tensor_ctypes.h"" #include ""tensorflow/lite/kernels/internal/tensor_utils.h"" #include ""tensorflow/lite/kernels/kernel_util.h"" +#include ""tensorflow/lite/util.h"" namespace tflite { namespace ops { @@ -175,25 +176,33 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TfLiteIntArray* output_shape = TfLiteIntArrayCreate(output_rank); TF_LITE_ENSURE(context, output_shape != nullptr); int k = 0; - int embedding_size = 1; - int lookup_size = 1; + size_t embedding_size = 1; + size_t lookup_size = 1; for (int i = 0; i < lookup_rank - 1; i++, k++) { - const int dim = dense_shape->data.i32[i]; - lookup_size *= dim; + const size_t dim = dense_shape->data.i32[i]; + TF_LITE_ENSURE_MSG( + context, + MultiplyAndCheckOverflow(lookup_size, dim, &lookup_size) == kTfLiteOk, + ""Lookup size overflowed.""); output_shape->data[k] = dim; } for (int i = 1; i < embedding_rank; i++, k++) { - const int dim = SizeOfDimension(value, i); - embedding_size *= dim; + const size_t dim = SizeOfDimension(value, i); + TF_LITE_ENSURE_MSG(context, + MultiplyAndCheckOverflow(embedding_size, dim, + &embedding_size) == kTfLiteOk, + ""Embedding size overflowed.""); output_shape->data[k] = dim; } TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, output, output_shape)); - const int output_size = lookup_size * embedding_size; + const size_t output_size = lookup_size * embedding_size; TfLiteTensorRealloc(output_size * sizeof(float), output); float* output_ptr = GetTensorData(output); const float* weights_ptr = GetTensorData(weights); const float* value_ptr = GetTensorData(value); + // Makes sure reallocation was successful. + TF_LITE_ENSURE(context, output_ptr != nullptr); std::fill_n(output_ptr, output_size, 0.0f); ",1,train a4e401da71458d253b05e41f28637b65baf64be4,tensorflow/tensorflow,"Prevent segfault in `embedding_lookup_sparse.cc` Previous fixes missed one additional case. PiperOrigin-RevId: 417676944 Change-Id: I8ab412155cf9b1e897448a6611d209eaa7ca9e66",embedding_lookup_sparse.cc,"@@ -159,6 +159,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 3, &weights)); const TfLiteTensor* value; TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 4, &value)); + const size_t values_size = NumElements(value); const int lookup_rank = SizeOfDimension(indices, 1); const int embedding_rank = NumDimensions(value); @@ -253,6 +254,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { current_squares_weight += w * w; current_total_weight += w; for (int k = 0; k < embedding_size; k++) { + // only index if indices are valid + if (current_output_offset + k < 0) continue; + if (current_output_offset + k >= output_size) continue; + if (example_embedding_offset + k < 0) continue; + if (example_embedding_offset + k >= values_size) continue; output_ptr[current_output_offset + k] += value_ptr[example_embedding_offset + k] * w; } ",1,train f19be71717c497723ba0cea0379e84f061a75e01,tensorflow/tensorflow,"[lite] Move MultiplyAndCheckOverflow to util to be able to share it. PiperOrigin-RevId: 416897229 Change-Id: I5feb44881bdcbb6ed911da4f17c55bb978754059",subgraph.cc,"@@ -690,27 +690,6 @@ TfLiteStatus Subgraph::CheckInputAndOutputForOverlap(const int* input_indices, return kTfLiteOk; } -namespace { -// Multiply two sizes and return true if overflow occurred; -// This is based off tensorflow/overflow.h but is simpler as we already -// have unsigned numbers. It is also generalized to work where sizeof(size_t) -// is not 8. -TfLiteStatus MultiplyAndCheckOverflow(size_t a, size_t b, size_t* product) { - // Multiplying a * b where a and b are size_t cannot result in overflow in a - // size_t accumulator if both numbers have no non-zero bits in their upper - // half. - constexpr size_t size_t_bits = 8 * sizeof(size_t); - constexpr size_t overflow_upper_half_bit_position = size_t_bits / 2; - *product = a * b; - // If neither integers have non-zero bits past 32 bits can't overflow. - // Otherwise check using slow devision. - if (TFLITE_EXPECT_FALSE((a | b) >> overflow_upper_half_bit_position != 0)) { - if (a != 0 && *product / a != b) return kTfLiteError; - } - return kTfLiteOk; -} -} // namespace - TfLiteStatus Subgraph::BytesRequired(TfLiteType type, const int* dims, size_t dims_size, size_t* bytes) { TF_LITE_ENSURE(&context_, bytes != nullptr); ",1,train f19be71717c497723ba0cea0379e84f061a75e01,tensorflow/tensorflow,"[lite] Move MultiplyAndCheckOverflow to util to be able to share it. PiperOrigin-RevId: 416897229 Change-Id: I5feb44881bdcbb6ed911da4f17c55bb978754059",util.cc,"@@ -27,6 +27,7 @@ limitations under the License. #include ""tensorflow/lite/builtin_ops.h"" #include ""tensorflow/lite/c/common.h"" +#include ""tensorflow/lite/core/macros.h"" #include ""tensorflow/lite/schema/schema_generated.h"" namespace tflite { @@ -176,4 +177,19 @@ bool IsValidationSubgraph(const char* name) { // NOLINTNEXTLINE: can't use absl::StartsWith as absl is not allowed. return name && std::string(name).find(kValidationSubgraphNamePrefix) == 0; } + +TfLiteStatus MultiplyAndCheckOverflow(size_t a, size_t b, size_t* product) { + // Multiplying a * b where a and b are size_t cannot result in overflow in a + // size_t accumulator if both numbers have no non-zero bits in their upper + // half. + constexpr size_t size_t_bits = 8 * sizeof(size_t); + constexpr size_t overflow_upper_half_bit_position = size_t_bits / 2; + *product = a * b; + // If neither integers have non-zero bits past 32 bits can't overflow. + // Otherwise check using slow devision. + if (TFLITE_EXPECT_FALSE((a | b) >> overflow_upper_half_bit_position != 0)) { + if (a != 0 && *product / a != b) return kTfLiteError; + } + return kTfLiteOk; +} } // namespace tflite ",1,train f19be71717c497723ba0cea0379e84f061a75e01,tensorflow/tensorflow,"[lite] Move MultiplyAndCheckOverflow to util to be able to share it. PiperOrigin-RevId: 416897229 Change-Id: I5feb44881bdcbb6ed911da4f17c55bb978754059",util.h,"@@ -99,6 +99,12 @@ constexpr char kValidationSubgraphNamePrefix[] = ""VALIDATION:""; // Checks whether the prefix of the subgraph name indicates the subgraph is a // validation subgraph. bool IsValidationSubgraph(const char* name); + +// Multiply two sizes and return true if overflow occurred; +// This is based off tensorflow/overflow.h but is simpler as we already +// have unsigned numbers. It is also generalized to work where sizeof(size_t) +// is not 8. +TfLiteStatus MultiplyAndCheckOverflow(size_t a, size_t b, size_t* product); } // namespace tflite #endif // TENSORFLOW_LITE_UTIL_H_ ",1,train f19be71717c497723ba0cea0379e84f061a75e01,tensorflow/tensorflow,"[lite] Move MultiplyAndCheckOverflow to util to be able to share it. PiperOrigin-RevId: 416897229 Change-Id: I5feb44881bdcbb6ed911da4f17c55bb978754059",util_test.cc,"@@ -22,6 +22,7 @@ limitations under the License. #include #include +#include ""tensorflow/lite/c/c_api_types.h"" #include ""tensorflow/lite/c/common.h"" #include ""tensorflow/lite/schema/schema_generated.h"" @@ -130,5 +131,12 @@ TEST(ValidationSubgraph, NameIsDetected) { EXPECT_TRUE(IsValidationSubgraph(""VALIDATION:main"")); } +TEST(MultiplyAndCheckOverflow, Validate) { + size_t res = 0; + EXPECT_TRUE(MultiplyAndCheckOverflow(1, 2, &res) == kTfLiteOk); + EXPECT_FALSE(MultiplyAndCheckOverflow(static_cast(123456789023), + 1223423425, &res) == kTfLiteOk); +} + } // namespace } // namespace tflite ",1,train 6364463d6f5b6254cac3d6aedf999b6a96225038,tensorflow/tensorflow,"[lite] Add some safety checks to avoid out of bound access for sparsity format PiperOrigin-RevId: 416910386 Change-Id: Ic0b4dc048dc4b5a6309c572b8c4c9f776e4db60a",sparsity_format_converter.cc,"@@ -282,10 +282,12 @@ void FormatConverter::InitSparseToDenseConverter( block_size_.resize(block_map_.size()); for (int i = 0; i < original_rank; i++) { if (block_dim < block_map_.size() && block_map_[block_dim] == i) { - int orig_dim = traversal_order_[original_rank + block_dim]; - block_size_[block_dim] = dense_size[orig_dim]; - blocked_shape_[i] = dense_shape_[i] / dense_size[orig_dim]; - block_dim++; + if (original_rank + block_dim < traversal_order_.size()) { + int orig_dim = traversal_order_[original_rank + block_dim]; + block_size_[block_dim] = dense_size[orig_dim]; + blocked_shape_[i] = dense_shape_[i] / dense_size[orig_dim]; + block_dim++; + } } else { blocked_shape_[i] = dense_shape_[i]; } @@ -328,13 +330,15 @@ void FormatConverter::Populate(const T* src_data, std::vector indices, Populate(src_data, indices, level + 1, prev_idx * shape_of_level + i, src_data_ptr, dest_data); } - } else { + } else if (prev_idx + 1 < dim_metadata_[metadata_idx].size()) { const auto& array_segments = dim_metadata_[metadata_idx]; const auto& array_indices = dim_metadata_[metadata_idx + 1]; for (int i = array_segments[prev_idx]; i < array_segments[prev_idx + 1]; i++) { - indices[level] = array_indices[i]; - Populate(src_data, indices, level + 1, i, src_data_ptr, dest_data); + if (i < array_indices.size() && level < indices.size()) { + indices[level] = array_indices[i]; + Populate(src_data, indices, level + 1, i, src_data_ptr, dest_data); + } } } } ",1,train 6c0b2b70eeee588591680f5b7d5d38175fd7cdf6,tensorflow/tensorflow,"[lite] add validation check for sparse fully connected PiperOrigin-RevId: 417629354 Change-Id: If96171c4bd4f5fdb01d6368d6deab19d1c9beca7",fully_connected.cc,"@@ -928,6 +928,36 @@ TfLiteStatus EvalShuffledQuantized(TfLiteContext* context, TfLiteNode* node, return kTfLiteOk; } +// Verifies that sparsity values are valid given input/weight/output. +bool VerifySparsity(const RuntimeShape& weights_shape, + const RuntimeShape& input_shape, + const RuntimeShape& output_shape, + const TfLiteSparsity* sparsity) { + const int weights_dims_count = weights_shape.DimensionsCount(); + const int output_dims_count = output_shape.DimensionsCount(); + const int w0_size = sparsity->dim_metadata[0].dense_size; + const int accum_depth = weights_shape.Dims(weights_dims_count - 1); + const int output_elements = output_shape.FlatSize(); + const int input_elements = input_shape.FlatSize(); + const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1); + const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2, + output_shape, output_dims_count - 1); + const int max_batch_index = batches - 1; + const int max_output = max_batch_index * output_depth + w0_size; + const int max_batch_depth = accum_depth * max_batch_index; + + // Verify output size is enough. + if (output_elements < max_output) return false; + + // Verify index from sparse in input is valid. + for (int i = 0; i < sparsity->dim_metadata[1].array_indices->size; ++i) { + if (input_elements <= + max_batch_depth + sparsity->dim_metadata[1].array_indices->data[i]) + return false; + } + return true; +} + template TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, TfLiteFullyConnectedParams* params, OpData* data, @@ -968,24 +998,32 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, ""Unsupported sparse fully-connected weight format.""); return kTfLiteError; } + const auto& input_shape = GetTensorShape(input); + const auto& filter_shape = GetTensorShape(filter); + const auto& output_shape = GetTensorShape(output); + const auto& bias_shape = GetTensorShape(bias); + if (!VerifySparsity(filter_shape, input_shape, output_shape, &sparsity)) { + TF_LITE_KERNEL_LOG(context, ""Invalid sparse fully-connected format.""); + return kTfLiteError; + } if (sparsity.dim_metadata_size == kDimMetadataSizeRandomSparse) { // Random sparse. optimized_ops::FullyConnectedSparseWeight( - sparsity, op_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + sparsity, op_params, // Disable formatting + input_shape, GetTensorData(input), // Disable formatting + filter_shape, GetTensorData(filter), // Disable formatting + bias_shape, GetTensorData(bias), // Disable formatting + output_shape, GetTensorData(output)); } else if (sparsity.dim_metadata_size == kDimMetadataSizeBlockSparse && sparsity.dim_metadata[2].dense_size == 4) { // Block sparse with block size of 1x4. optimized_ops::FullyConnectedSparseWeight1x4( - sparsity, op_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output), + sparsity, op_params, // Disable formatting + input_shape, GetTensorData(input), // Disable formatting + filter_shape, GetTensorData(filter), // Disable formatting + bias_shape, GetTensorData(bias), // Disable formatting + output_shape, GetTensorData(output), CpuBackendContext::GetFromContext(context)); } else { TF_LITE_KERNEL_LOG(context, ",1,train 14fea662350e7c26eb5fe1be2ac31704e5682ee6,tensorflow/tensorflow,"Prevent `CHECK`-fail when decoding resource handles from proto In certain scenarios, the proto might contain tensors that have too many elements (overflow). This is a `CHECK`-fail in general, but we should prevent this, given how many CVEs caused by that we have received this year (a large fraction of 200). PiperOrigin-RevId: 408049766 Change-Id: I2ac20b247aa8ed9110846fbdb7a0a9401f2c168c",resource_handle.cc,"@@ -17,8 +17,11 @@ limitations under the License. #include ""absl/strings/str_format.h"" #include ""tensorflow/core/framework/resource_handle.pb.h"" +#include ""tensorflow/core/framework/tensor_shape.h"" #include ""tensorflow/core/lib/core/errors.h"" #include ""tensorflow/core/lib/strings/strcat.h"" +#include ""tensorflow/core/platform/errors.h"" +#include ""tensorflow/core/platform/macros.h"" namespace tensorflow { @@ -28,7 +31,15 @@ namespace tensorflow { ResourceHandle::ResourceHandle() {} ResourceHandle::ResourceHandle(const ResourceHandleProto& proto) { - FromProto(proto); + TF_CHECK_OK(FromProto(proto)); +} + +Status ResourceHandle::BuildResourceHandle(const ResourceHandleProto& proto, + ResourceHandle* out) { + if (out == nullptr) + return errors::Internal( + ""BuildResourceHandle() was called with nullptr for the output""); + return out->FromProto(proto); } ResourceHandle::~ResourceHandle() {} @@ -46,7 +57,7 @@ void ResourceHandle::AsProto(ResourceHandleProto* proto) const { } } -void ResourceHandle::FromProto(const ResourceHandleProto& proto) { +Status ResourceHandle::FromProto(const ResourceHandleProto& proto) { set_device(proto.device()); set_container(proto.container()); set_name(proto.name()); @@ -55,10 +66,16 @@ void ResourceHandle::FromProto(const ResourceHandleProto& proto) { std::vector dtypes_and_shapes; for (const auto& dtype_and_shape : proto.dtypes_and_shapes()) { DataType dtype = dtype_and_shape.dtype(); - PartialTensorShape shape(dtype_and_shape.shape()); + PartialTensorShape shape; + Status s = PartialTensorShape::BuildPartialTensorShape( + dtype_and_shape.shape(), &shape); + if (!s.ok()) { + return s; + } dtypes_and_shapes.push_back(DtypeAndPartialTensorShape{dtype, shape}); } dtypes_and_shapes_ = std::move(dtypes_and_shapes); + return Status::OK(); } string ResourceHandle::SerializeAsString() const { @@ -69,9 +86,7 @@ string ResourceHandle::SerializeAsString() const { bool ResourceHandle::ParseFromString(const string& s) { ResourceHandleProto proto; - const bool status = proto.ParseFromString(s); - if (status) FromProto(proto); - return status; + return proto.ParseFromString(s) && FromProto(proto).ok(); } string ResourceHandle::DebugString() const { @@ -140,7 +155,9 @@ bool DecodeResourceHandleList(std::unique_ptr d, if (!proto.ParseFromArray(d->Data(sizes[i]), sizes[i])) { return false; } - ps[i].FromProto(proto); + if (!ps[i].FromProto(proto).ok()) { + return false; + } } return true; } ",1,train 14fea662350e7c26eb5fe1be2ac31704e5682ee6,tensorflow/tensorflow,"Prevent `CHECK`-fail when decoding resource handles from proto In certain scenarios, the proto might contain tensors that have too many elements (overflow). This is a `CHECK`-fail in general, but we should prevent this, given how many CVEs caused by that we have received this year (a large fraction of 200). PiperOrigin-RevId: 408049766 Change-Id: I2ac20b247aa8ed9110846fbdb7a0a9401f2c168c",resource_handle.h,"@@ -46,6 +46,11 @@ class ResourceHandle { ResourceHandle(const ResourceHandleProto& proto); ~ResourceHandle(); + // Use this factory method if the `proto` comes from user controlled input, to + // prevent a denial of service. + static Status BuildResourceHandle(const ResourceHandleProto& proto, + ResourceHandle* out); + // Unique name for the device containing the resource. const std::string& device() const { return device_; } @@ -91,7 +96,7 @@ class ResourceHandle { // Conversion to and from ResourceHandleProto void AsProto(ResourceHandleProto* proto) const; - void FromProto(const ResourceHandleProto& proto); + Status FromProto(const ResourceHandleProto& proto); // Serialization via ResourceHandleProto std::string SerializeAsString() const; ",1,train 14fea662350e7c26eb5fe1be2ac31704e5682ee6,tensorflow/tensorflow,"Prevent `CHECK`-fail when decoding resource handles from proto In certain scenarios, the proto might contain tensors that have too many elements (overflow). This is a `CHECK`-fail in general, but we should prevent this, given how many CVEs caused by that we have received this year (a large fraction of 200). PiperOrigin-RevId: 408049766 Change-Id: I2ac20b247aa8ed9110846fbdb7a0a9401f2c168c",tensor.cc,"@@ -537,6 +537,46 @@ TensorBuffer* FromProtoField(Allocator* a, const TensorProto& in, int64_t n) { return buf; } +// Separate implementation for `ResourceHandle` to handle the case when the +// proto for the resource is invalid. See `resource_handle.h` constructor and +// static factory builder. +template <> +TensorBuffer* FromProtoField(Allocator* a, + const TensorProto& in, int64_t n) { + CHECK_GT(n, 0); + Buffer* buf = new Buffer(a, n); + ResourceHandle* data = buf->template base(); + if (data == nullptr) { + buf->Unref(); + return nullptr; + } + const int64_t in_n = ProtoHelper::NumElements(in); + if (in_n <= 0) { + std::fill_n(data, n, ResourceHandle()); + } else { + // If tensor shape says we have n < in_n elements in the output tensor + // then make sure to only decode the first n out of the in_n elements in the + // in tensors. In all other cases, we decode all in_n elements of in and set + // the remaining elements up to n to be the default ResourceHandle() value. + const int64_t real_n = n < in_n ? n : in_n; + for (int64_t i = 0; i < real_n; ++i) { + Status s = ResourceHandle::BuildResourceHandle(in.resource_handle_val(i), + &data[i]); + if (!s.ok()) { + LOG(ERROR) << ""Could not decode resource handle from proto \"""" + << in.resource_handle_val(i).ShortDebugString() + << ""\"", returned status: "" << s.ToString(); + buf->Unref(); + return nullptr; + } + } + for (int64_t i = in_n; i < n; ++i) { + data[i] = ResourceHandle(); + } + } + return buf; +} + template <> TensorBuffer* FromProtoField(Allocator* a, const TensorProto& in, int64_t n) { ",1,train c2b31ff2d3151acb230edc3f5b1832d2c713a9e0,tensorflow/tensorflow,"Remove a `DCHECK`-fail, log an error instead. `DCHECK` in debug mode results in crashes. TensorFlow has had multiple vulnerabilities due to this. Outside of debug mode, `DCHECK` is a no-op. A better alternative is to report an error to the log buffer and continue. This should happen both in debug mode and in prod mode. PiperOrigin-RevId: 408375925 Change-Id: Id5b3e19c73f3fbe0cc4bba26ca44ff9607bb6356",op_def_util.cc,"@@ -821,9 +821,10 @@ bool RepeatedAttrDefEqual( const protobuf::RepeatedPtrField& a2) { std::unordered_map a1_set; for (const OpDef::AttrDef& def : a1) { - DCHECK(a1_set.find(def.name()) == a1_set.end()) - << ""AttrDef names must be unique, but '"" << def.name() - << ""' appears more than once""; + if (a1_set.find(def.name()) != a1_set.end()) { + LOG(ERROR) << ""AttrDef names must be unique, but '"" << def.name() + << ""' appears more than once""; + } a1_set[def.name()] = &def; } for (const OpDef::AttrDef& def : a2) { ",1,train 97282c6d0d34476b6ba033f961590b783fa184cd,tensorflow/tensorflow,"Prevent a crash due to heap OOB write in grappler. PiperOrigin-RevId: 408318417 Change-Id: If095feb8c001e3a8ac4a85b7387b81e8309df47d",graph_properties.cc,"@@ -1134,7 +1134,12 @@ class SymbolicShapeRefiner { GetUnknownOutputShape(node, output_port); InferenceContext* ctx = GetContext(node); if (ctx == nullptr) { - return errors::InvalidArgument(""Missing context""); + return errors::InvalidArgument(""SetUnknownShape: Missing context""); + } + if (output_port < 0 || output_port >= ctx->num_outputs()) { + return errors::InvalidArgument( + ""SetUnknownShape: output_port must be in [0, "", ctx->num_outputs(), + "") but was "", output_port); } ctx->set_output(output_port, shape); return Status::OK(); ",1,train 1b54cadd19391b60b6fcccd8d076426f7221d5e8,tensorflow/tensorflow,"Add missing validation to sparse dense cwise ops. PiperOrigin-RevId: 415543133 Change-Id: I5baf3284e919338afb96178c468ad3d3cb0d956c",sparse_dense_binary_op_shared.cc,"@@ -78,11 +78,24 @@ class SparseDenseBinaryOpShared : public OpKernel { ""but received shapes: "", values_t->shape().DebugString(), "" and "", shape_t->shape().DebugString())); + OP_REQUIRES( + ctx, TensorShapeUtils::IsVector(shape_t->shape()), + errors::InvalidArgument(""Input sp_shape must be a vector. Got: "", + shape_t->shape().DebugString())); OP_REQUIRES( ctx, values_t->dim_size(0) == indices_t->dim_size(0), errors::InvalidArgument( ""The first dimension of values and indices should match. ("", values_t->dim_size(0), "" vs. "", indices_t->dim_size(0), "")"")); + OP_REQUIRES( + ctx, shape_t->shape().dim_size(0) == indices_t->shape().dim_size(1), + errors::InvalidArgument( + ""Number of dimensions must match second dimension of indices. "", + ""Got "", shape_t->shape().dim_size(0), + "" dimensions, indices shape: "", indices_t->shape().DebugString())); + OP_REQUIRES(ctx, shape_t->NumElements() > 0, + errors::InvalidArgument( + ""The shape argument requires at least one element."")); const auto indices_mat = indices_t->matrix(); const auto shape_vec = shape_t->vec(); ",1,test e952a89b7026b98fe8cbe626514a93ed68b7c510,tensorflow/tensorflow,"Prevent overflow in sparse dense cwise ops. PiperOrigin-RevId: 415543171 Change-Id: I22dab7c41be2121ab5efe5403ca0e2f9b7cb24b8",sparse_dense_binary_op_shared.cc,"@@ -99,7 +99,9 @@ class SparseDenseBinaryOpShared : public OpKernel { const auto indices_mat = indices_t->matrix(); const auto shape_vec = shape_t->vec(); - const auto lhs_dims = BCast::FromShape(TensorShape(shape_vec)); + TensorShape lhs_shape; + OP_REQUIRES_OK(ctx, TensorShape::BuildTensorShape(shape_vec, &lhs_shape)); + const auto lhs_dims = BCast::FromShape(lhs_shape); const auto rhs_dims = BCast::FromShape(dense_t->shape()); BCast b(lhs_dims, rhs_dims, false); // false for keeping the same num dims. ",1,train a68f68061e263a88321c104a6c911fe5598050a8,tensorflow/tensorflow,"Replace faulty overflow check with a builder for `TensorShape`. Prevents an integer overflow that was not caught before. PiperOrigin-RevId: 415381595 Change-Id: I76585ddedc912bd9f4a390aeafa8e2ced1a28863",sparse_tensors_map_ops.cc,"@@ -263,22 +263,10 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp { ""Rank of input SparseTensor should be > 1, but saw rank: "", rank)); auto input_shape_vec = input_shape->vec(); - int new_num_elements = 1; - bool overflow_ocurred = false; - for (int i = 0; i < input_shape_vec.size(); i++) { - new_num_elements = - MultiplyWithoutOverflow(new_num_elements, input_shape_vec(i)); - if (new_num_elements < 0) { - overflow_ocurred = true; - break; - } - } - - OP_REQUIRES( - context, !overflow_ocurred, - errors::Internal(""Encountered overflow from large input shape."")); - TensorShape tensor_input_shape(input_shape_vec); + TensorShape tensor_input_shape; + OP_REQUIRES_OK(context, TensorShape::BuildTensorShape(input_shape_vec, + &tensor_input_shape)); gtl::InlinedVector std_order(rank); std::iota(std_order.begin(), std_order.end(), 0); SparseTensor input_st; ",1,test b51b82fe65ebace4475e3c54eb089c18a4403f1c,tensorflow/tensorflow,"Add missing validation to `AddManySparseToTensorsMap`. Sparse tensors have a set of requirements for the 3 components and not all of them were checked. PiperOrigin-RevId: 415358027 Change-Id: I96cbb672999cd1da772c22fabbd15507e32e12dc",sparse_tensors_map_ops.cc,"@@ -231,16 +231,29 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp { errors::InvalidArgument( ""Input indices should be a matrix but received shape "", input_indices->shape().DebugString())); - OP_REQUIRES(context, TensorShapeUtils::IsVector(input_values->shape()), errors::InvalidArgument( ""Input values should be a vector but received shape "", input_values->shape().DebugString())); - OP_REQUIRES(context, TensorShapeUtils::IsVector(input_shape->shape()), errors::InvalidArgument( ""Input shape should be a vector but received shape "", input_shape->shape().DebugString())); + OP_REQUIRES( + context, + input_values->shape().dim_size(0) == input_indices->shape().dim_size(0), + errors::InvalidArgument( + ""Number of values must match first dimension of indices. "", ""Got "", + input_values->shape().dim_size(0), + "" values, indices shape: "", input_indices->shape().DebugString())); + OP_REQUIRES( + context, + input_shape->shape().dim_size(0) == input_indices->shape().dim_size(1), + errors::InvalidArgument( + ""Number of dimensions must match second dimension of indices. "", + ""Got "", input_shape->shape().dim_size(0), + "" dimensions, indices shape: "", + input_indices->shape().DebugString())); int rank = input_shape->NumElements(); ",1,train 8a513cec4bec15961fbfdedcaa5376522980455c,tensorflow/tensorflow,"Prevent null dereference read in `SpecializeType()` For some adversarial protos, the attribute for a key might not exist. PiperOrigin-RevId: 408382090 Change-Id: Ie7eabe532c9ff280fce5dce1f6cdb93c76c2e040",full_type_util.cc,"@@ -22,6 +22,7 @@ limitations under the License. #include ""tensorflow/core/framework/op_def.pb.h"" #include ""tensorflow/core/framework/types.h"" #include ""tensorflow/core/platform/statusor.h"" +#include ""tensorflow/core/protobuf/error_codes.pb.h"" namespace tensorflow { @@ -102,7 +103,11 @@ StatusOr SpecializeType(const AttrSlice& attrs, auto* arg = t->mutable_args(i); if (arg->type_id() == TFT_VAR) { const auto* attr = attrs.Find(arg->s()); - DCHECK(attr != nullptr); + if (attr == nullptr) { + return Status( + error::INVALID_ARGUMENT, + absl::StrCat(""Could not find an attribute for key "", arg->s())); + } if (attr->value_case() == AttrValue::kList) { const auto& attr_list = attr->list(); arg->set_type_id(TFT_PRODUCT); ",1,train 5b491cd5e41ad63735161cec9c2a568172c8b6a3,tensorflow/tensorflow,"Validate `proto.dtype()` before calling `set_dtype()`. This prevents a `DCHECK`-fail when the proto contains an invalid dtype for a tensor shape with 0 elements or for an incomplete tensor shape. PiperOrigin-RevId: 408369083 Change-Id: Ia21a3e3d62a90d642a4561f08f3b543e5ad00c46",tensor.cc,"@@ -983,6 +983,15 @@ bool Tensor::FromProto(Allocator* a, const TensorProto& proto) { dtype_error = true, dtype_error = true); } if (dtype_error || p == nullptr) return false; + } else { + // Handle the case of empty tensors (N = 0) or tensors with incomplete shape + // (N = -1). All other values of `shape.num_elements()` should be invalid by + // construction. + // Here, we just need to validate that the `proto.dtype()` value is valid. + bool dtype_error = false; + CASES_WITH_DEFAULT(proto.dtype(), break, dtype_error = true, + dtype_error = true); + if (dtype_error) return false; } shape_ = shape; set_dtype(proto.dtype()); ",1,train cb164786dc891ea11d3a900e90367c339305dc7b,tensorflow/tensorflow,"Properly handle the case where `SpecializeType()` returns an error `Status`. If the error case in `SpecializeType()` is reached, then we would get a crash when trying to access the value of an errorenous `StatusOr` object PiperOrigin-RevId: 408380069 Change-Id: If3c3fc876dcf9384d5ec7a4985adc68c23ea7318",shape_inference.cc,"@@ -170,7 +170,10 @@ void InferenceContext::PreInputInit( const std::vector& input_tensors_as_shapes) { // TODO(mdan): This is also done at graph construction. Run only here instead? const auto ret = full_type::SpecializeType(attrs_, op_def); - DCHECK(ret.status().ok()) << ""while instantiating types: "" << ret.status(); + if (!ret.status().ok()) { + construction_status_ = ret.status(); + return; + } ret_types_ = ret.ValueOrDie(); input_tensors_ = input_tensors; ",1,train ef1d027be116f25e25bb94a60da491c2cf55bd0b,tensorflow/tensorflow,"Prevent copying uninitialized data in `AssignOp`. This prevents harder to debug undefined behaviors that cannot be traced back to the original tensor after assignments occur earlier in the graph execution. Several of these undefined behaviors are just reference bindings to null pointers, which are caught when running under ubsan/asan. PiperOrigin-RevId: 408654780 Change-Id: Iad2ec40d43f5fd7ea016c20283356c12d5ddeab1",assign_op.h,"@@ -50,6 +50,12 @@ class AssignOp : public OpKernel { // We always return the input ref. context->forward_ref_input_to_ref_output(0, 0); + // Prevent copying uninitialized data, to solve harder to debug undefined + // behaviors that cannot be traced back to the original tensor. + OP_REQUIRES( + context, rhs.IsInitialized(), + errors::Internal(""Right hand side of AssignOp is not initialized"")); + // We can't always know how this value will be used downstream, so make // conservative assumptions in specifying constraints on the memory // allocation attributes, unless the Grappler graph analysis determined that ",1,train 0657c83d08845cc434175934c642299de2c0f042,tensorflow/tensorflow,"Fix heap OOB read/write due to incorrect indexing. PiperOrigin-RevId: 408578046 Change-Id: Ifc9ffea49e5890f55fcb2c27568611052c3ddcfa",full_type_util.cc,"@@ -100,7 +100,7 @@ StatusOr SpecializeType(const AttrSlice& attrs, // verifications are needed, they should be done by separately, and in a // way that can be reused for type inference. for (int j = 0; j < t->args_size(); j++) { - auto* arg = t->mutable_args(i); + auto* arg = t->mutable_args(j); if (arg->type_id() == TFT_VAR) { const auto* attr = attrs.Find(arg->s()); if (attr == nullptr) { ",1,train fcd18ce3101f245b083b30655c27b239dc72221e,tensorflow/tensorflow,"Prevent integer overflow in `OpLevelCostEstimator::CalculateTensorSize`. In order to not change the API, we return a negative value in case of overflow. A better fix is to change the API to return a status instead. PiperOrigin-RevId: 408713061 Change-Id: I3771475b0c72a2844a3854086966562fd33f2da5",op_level_cost_estimator.cc,"@@ -1555,7 +1555,13 @@ int64_t OpLevelCostEstimator::CalculateTensorSize( int64_t count = CalculateTensorElementCount(tensor, found_unknown_shapes); int size = DataTypeSize(BaseType(tensor.dtype())); VLOG(2) << ""Count: "" << count << "" DataTypeSize: "" << size; - return count * size; + int64_t tensor_size = MultiplyWithoutOverflow(count, size); + if (tensor_size < 0) { + VLOG(1) << ""Overflow encountered when computing tensor size, multiplying "" + << count << "" with "" << size; + return -1; + } + return tensor_size; } int64_t OpLevelCostEstimator::CalculateInputSize(const OpInfo& op_info, ",1,train b9bd6cfd1c50e6807846af9a86f9b83cafc9c8ae,tensorflow/tensorflow,"Prevent integer overflow in `OpLevelCostEstimator::CalculateOutputSize`. In order to not change the API, we return a negative value in case of overflow. A better fix is to change the API to return a status instead. PiperOrigin-RevId: 408701427 Change-Id: Idf31e7f0bf18ca824d084fdd355e1f653f145c20",op_level_cost_estimator.cc,"@@ -27,6 +27,7 @@ limitations under the License. #include ""tensorflow/core/grappler/costs/op_context.h"" #include ""tensorflow/core/grappler/costs/utils.h"" #include ""tensorflow/core/platform/errors.h"" +#include ""tensorflow/core/util/overflow.h"" namespace tensorflow { namespace grappler { @@ -1607,7 +1608,14 @@ int64_t OpLevelCostEstimator::CalculateOutputSize(const OpInfo& op_info, auto output_shape = MaybeGetMinimumShape(original_output_shape, num_dims, found_unknown_shapes); for (const auto& dim : output_shape.dim()) { - output_size *= dim.size(); + int64_t new_output_size = + MultiplyWithoutOverflow(output_size, dim.size()); + if (new_output_size < 0) { + VLOG(1) << ""Overflow encountered when estimating cost, multiplying "" + << output_size << "" with "" << dim.size(); + return -1; + } + output_size = new_output_size; } total_output_size += output_size; VLOG(1) << ""Output Size: "" << output_size ",1,train 4f38b1ac8e42727e18a2f0bde06d3bee8e77b250,tensorflow/tensorflow,"Prevent null dereference read in `GetInitOp`. We have a map of maps. We test that the key exists in the first map but then we don't have any validation that this also means the second map has the needed key. In the scenarios where this is not the case, we'll dereference a nullptr, if we don't have this check PiperOrigin-RevId: 408739325 Change-Id: If9bb7ed759aba1f3b56a34913f209508dbaf65ce",loader_util.cc,"@@ -34,9 +34,14 @@ Status GetInitOp(const string& export_dir, const MetaGraphDef& meta_graph_def, const auto& init_op_sig_it = meta_graph_def.signature_def().find(kSavedModelInitOpSignatureKey); if (init_op_sig_it != sig_def_map.end()) { - *init_op_name = init_op_sig_it->second.outputs() - .find(kSavedModelInitOpSignatureKey) - ->second.name(); + const auto& sig_def_outputs = init_op_sig_it->second.outputs(); + const auto& sig_def_outputs_it = + sig_def_outputs.find(kSavedModelInitOpSignatureKey); + if (sig_def_outputs_it == sig_def_outputs.end()) { + return errors::FailedPrecondition(""Could not find output "", + kSavedModelInitOpSignatureKey); + } + *init_op_name = sig_def_outputs_it->second.name(); return Status::OK(); } ",1,train c79ccba517dbb1a0ccb9b01ee3bd2a63748b60dd,tensorflow/tensorflow,"Fix memory leak when a graph node is invalid. If a graph node is invalid but a kernel is created then we set the kernel back to `nullptr` but we forget to delete it. Hence, we get a memory leak. PiperOrigin-RevId: 408968108 Change-Id: I1d8a9d0d8988ed5e08be8b9f2004ce1b4cd11b7c",immutable_executor_state.cc,"@@ -131,6 +131,7 @@ Status ImmutableExecutorState::Initialize(const Graph& graph) { Status s = params_.create_kernel(n->properties(), &item->kernel); if (!s.ok()) { + params_.delete_kernel(item->kernel); item->kernel = nullptr; s = AttachDef(s, *n); return s; ",1,train 92dba16749fae36c246bec3f9ba474d9ddeb7662,tensorflow/tensorflow,"Prevent a null-pointer dereference / `CHECK`-fail in grappler. PiperOrigin-RevId: 409187354 Change-Id: I369c249cca32e6c56ec193f0ebbf2f2768fc7d43",dependency_optimizer.cc,"@@ -75,8 +75,10 @@ bool DependencyOptimizer::SafeToRemoveIdentity(const NodeDef& node) const { } const NodeDef* input = node_map_->GetNode(NodeName(node.input(0))); - CHECK(input != nullptr) << ""node = "" << node.name() - << "" input = "" << node.input(0); + if (input == nullptr) { + VLOG(1) << ""node = "" << node.name() << "" input = "" << node.input(0); + return false; + } // Don't remove Identity nodes corresponding to Variable reads or following // Recv. if (IsVariable(*input) || IsRecv(*input)) { ",1,train 1361fb7e29449629e1df94d44e0427ebec8c83c7,tensorflow/tensorflow,"Fix abort caused by allocating a too large vector. We need to make sure that the number of dimensions in a shape is within limits. PiperOrigin-RevId: 408997911 Change-Id: If59e1c23f2ec9c2d4ff4d8632fd62b2a7773a4eb",shape_inference.cc,"@@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include ""tensorflow/core/framework/shape_inference.h"" +#include + #include ""tensorflow/core/framework/bounds_check.h"" #include ""tensorflow/core/framework/full_type_util.h"" #include ""tensorflow/core/framework/node_def.pb.h"" @@ -789,6 +791,19 @@ Status InferenceContext::InternalMakeShapeFromTensor( return ReturnUnknownShape(out); } const auto num_dims = Value(shape_dim); + // TODO(mihaimaruseac): Should be `TensorShape::MaxDimensions()` as we are + // not able to materialize shapes with more than this number of dimensions + // but then shape inference would fail for operations such as + // `tf.range`/`tf.ones`, etc. where the shape is not really materialized, + // only used during the inference. Hence, just prevent doing a `reserve` + // with a very large argument. + const int64_t max_dimensions = 1 << 20; + if (num_dims >= max_dimensions) { + return errors::Internal( + ""Cannot create a tensor with "", num_dims, + "" dimensions, as these would be more than maximum of "", + max_dimensions); + } std::vector dims; dims.reserve(num_dims); for (int i = 0; i < num_dims; i++) dims.push_back(UnknownDim()); ",1,train 1fb27733f943295d874417630edd3b38b34ce082,tensorflow/tensorflow,"Remove `CHECK`-fails from `IsSimplifiableReshape` PiperOrigin-RevId: 409164987 Change-Id: I58c7dd459ff348c3dbae95e00c4c5e63b30a4e65",constant_folding.cc,"@@ -1689,7 +1689,11 @@ Status ConstantFolding::IsSimplifiableReshape( if (!IsReshape(node)) { return errors::Internal(""Node "", node.name(), "" is not a Reshape node""); } - CHECK_LE(2, node.input_size()); + if (2 > node.input_size()) { + return errors::Internal(""Node "", node.name(), + "" must have at most 2 inputs but has "", + node.input_size()); + } const NodeDef* new_shape = node_map_->GetNode(node.input(1)); if (!IsReallyConstant(*new_shape)) { return errors::Internal(""Node "", node.name(), "" has shape "", @@ -1707,7 +1711,11 @@ Status ConstantFolding::IsSimplifiableReshape( if (!s.ok()) { return errors::Internal(""Could not evaluate node "", node.name()); } - CHECK_EQ(1, outputs.size()); + if (outputs.size() != 1) { + return errors::Internal(""Node "", node.name(), + "" must have exactly 1 output but has "", + outputs.size()); + } const std::vector& props = properties.GetInputProperties(node.name()); ",1,train 240655511cd3e701155f944a972db71b6c0b1bb6,tensorflow/tensorflow,"Eliminate `CHECK`-fails from `IsSimplifiableReshape` via `MakeShape()` PiperOrigin-RevId: 409166738 Change-Id: I7f0a3590b8acae3f3e3e2fe636e1f5ef285693cf",constant_folding.cc,"@@ -1741,14 +1741,16 @@ Status ConstantFolding::IsSimplifiableReshape( int32_t dim = outputs[0]->flat()(i); shp.push_back(dim); } - TF_CHECK_OK(TensorShapeUtils::MakeShape(shp, &new_dims)); + s = TensorShapeUtils::MakeShape(shp, &new_dims); + if (!s.ok()) return s; } else { std::vector shp; for (int i = 0; i < outputs[0]->NumElements(); ++i) { int64_t dim = outputs[0]->flat()(i); shp.push_back(dim); } - TF_CHECK_OK(TensorShapeUtils::MakeShape(shp, &new_dims)); + s = TensorShapeUtils::MakeShape(shp, &new_dims); + if (!s.ok()) return s; } if (!shape.IsCompatibleWith(new_dims)) { ",1,train ebc1a2ffe5a7573d905e99bd0ee3568ee07c12c1,tensorflow/tensorflow,"Make `IsSimplifiableReshape` return `Status` instead of `bool`. This is to allow remove `CHECK`-fails in subsequent commits. PiperOrigin-RevId: 409160987 Change-Id: I3f050218a3832271395c4372a0b8ea05f1c03d80",constant_folding.cc,"@@ -1684,15 +1684,17 @@ Status ConstantFolding::FoldGraph( return Status::OK(); } -bool ConstantFolding::IsSimplifiableReshape( +Status ConstantFolding::IsSimplifiableReshape( const NodeDef& node, const GraphProperties& properties) const { if (!IsReshape(node)) { - return false; + return errors::Internal(""Node "", node.name(), "" is not a Reshape node""); } CHECK_LE(2, node.input_size()); const NodeDef* new_shape = node_map_->GetNode(node.input(1)); if (!IsReallyConstant(*new_shape)) { - return false; + return errors::Internal(""Node "", node.name(), "" has shape "", + new_shape->DebugString(), + "" which is not a constant""); } TensorVector outputs; auto outputs_cleanup = gtl::MakeCleanup([&outputs] { @@ -1703,22 +1705,25 @@ bool ConstantFolding::IsSimplifiableReshape( Status s = EvaluateNode(*new_shape, TensorVector(), &outputs); if (!s.ok()) { - return false; + return errors::Internal(""Could not evaluate node "", node.name()); } CHECK_EQ(1, outputs.size()); const std::vector& props = properties.GetInputProperties(node.name()); if (props.empty()) { - return false; + return errors::Internal(""Node "", node.name(), "" has no properties""); } const OpInfo::TensorProperties& prop = props[0]; if (prop.dtype() == DT_INVALID) { - return false; + return errors::Internal(""Node "", node.name(), "" has property "", + prop.DebugString(), "" with invalid dtype""); } const PartialTensorShape shape(prop.shape()); if (!shape.IsFullyDefined()) { - return false; + return errors::Internal(""Node "", node.name(), "" has property "", + prop.DebugString(), "" with shape "", + shape.DebugString(), "" which is not fully defined""); } PartialTensorShape new_dims; @@ -1738,7 +1743,12 @@ bool ConstantFolding::IsSimplifiableReshape( TF_CHECK_OK(TensorShapeUtils::MakeShape(shp, &new_dims)); } - return shape.IsCompatibleWith(new_dims); + if (!shape.IsCompatibleWith(new_dims)) { + return errors::Internal(""Expected shape "", shape.DebugString(), + ""to be compatible with "", new_dims.DebugString()); + } + + return Status::OK(); } #define IS_VALUE_CASE(DTYPE, VALUE) \ @@ -2925,7 +2935,7 @@ bool ConstantFolding::SimplifyReduction(GraphDef* optimized_graph, bool ConstantFolding::SimplifyReshape(const GraphProperties& properties, bool use_shape_info, NodeDef* node) { if (!use_shape_info || node->attr().count(""T"") == 0 || - !IsSimplifiableReshape(*node, properties)) { + !IsSimplifiableReshape(*node, properties).ok()) { return false; } DataType output_type = node->attr().at(""T"").type(); ",1,train ebc1a2ffe5a7573d905e99bd0ee3568ee07c12c1,tensorflow/tensorflow,"Make `IsSimplifiableReshape` return `Status` instead of `bool`. This is to allow remove `CHECK`-fails in subsequent commits. PiperOrigin-RevId: 409160987 Change-Id: I3f050218a3832271395c4372a0b8ea05f1c03d80",constant_folding.h,"@@ -129,8 +129,8 @@ class ConstantFolding : public GraphOptimizer { Status FoldGraph(const GraphProperties& properties, GraphDef* output, absl::flat_hash_set* nodes_to_not_simplify); - bool IsSimplifiableReshape(const NodeDef& node, - const GraphProperties& properties) const; + Status IsSimplifiableReshape(const NodeDef& node, + const GraphProperties& properties) const; Status SimplifyGraph(GraphDef* optimized_graph, GraphProperties* properties, absl::flat_hash_set* nodes_to_not_simplify); Status SimplifyNode(NodeDef* node, GraphDef* optimized_graph, ",1,train c2426bba00a01de6913738df8fa78e0215fcce02,tensorflow/tensorflow,"Use `PartialTensorShape` instead of `TensorShape`. `TensorShape` constructor throws a CHECK-fail if shape is partial/overflows which the other doesn't. We are only determining the number of elements in the shape and partial shape should be used as it returns negative number when needed. PiperOrigin-RevId: 409205384 Change-Id: Ia56542ff9ec758f2c9ffc7e4dcc9fa7eecd86e7b",attr_value_util.cc,"@@ -45,7 +45,7 @@ constexpr int kMaxTensorNestDepth = 100; // not fully defined return -1. int64_t TensorByteSize(const TensorProto& t) { // num_elements returns -1 if shape is not fully defined. - int64_t num_elems = TensorShape(t.tensor_shape()).num_elements(); + int64_t num_elems = PartialTensorShape(t.tensor_shape()).num_elements(); return num_elems < 0 ? -1 : num_elems * DataTypeSize(t.dtype()); } ",1,train a7c02f1a9bbc35473969618a09ee5f9f5d3e52d9,tensorflow/tensorflow,"Validate real and expected type of arguments to cwise ops. Without this validation, it is possible to trigger a `CHECK`-fail denial of service. This is a rollforward of a previous commit which was rolled back as it was relying on RTTI. This time we don't use RTTI, we replace `typeid(Tin).name()` with a double function call, `DataTypeString(DataTypeToEnum::v())`. PiperOrigin-RevId: 409340416 Change-Id: I96080b2796729a3a9b65e7c68307ac276070f2f0",cwise_ops_common.h,"@@ -87,7 +87,17 @@ class BinaryOp : public BinaryOpShared { void Compute(OpKernelContext* ctx) override { const Tensor& input_0 = ctx->input(0); + OP_REQUIRES(ctx, input_0.dtype() == DataTypeToEnum::v(), + errors::InvalidArgument( + ""Expected tensor of type "", + DataTypeString(DataTypeToEnum::v()), "" but got type "", + DataTypeString(input_0.dtype()))); const Tensor& input_1 = ctx->input(1); + OP_REQUIRES(ctx, input_1.dtype() == DataTypeToEnum::v(), + errors::InvalidArgument( + ""Expected tensor of type "", + DataTypeString(DataTypeToEnum::v()), "" but got type "", + DataTypeString(input_1.dtype()))); const Device& eigen_device = ctx->eigen_device(); bool error = false; bool* const error_ptr = Functor::has_errors ? &error : nullptr; ",1,train e746adbfcfee15e9cfdb391ff746c765b99bdf9b,tensorflow/tensorflow,"Prevent use after free in `DecodePng` kernel. We are cleaning up the memory in `decode` and then we are using an `OP_REQUIRES` to check an invariant on the `decode` data. PiperOrigin-RevId: 409299145 Change-Id: I4eb93aaca52483eb202e89b78df07fbb2f6cb254",decode_image_op.cc,"@@ -339,7 +339,6 @@ class DecodeImageV2Op : public OpKernel { if (width != static_cast(decode.width) || width <= 0 || width >= (1LL << 27) || height != static_cast(decode.height) || height <= 0 || height >= (1LL << 27) || total_size >= (1LL << 29)) { - png::CommonFreeDecode(&decode); OP_REQUIRES(context, false, errors::InvalidArgument(""PNG size too large for int: "", decode.width, "" by "", decode.height)); ",1,train ab51e5b813573dc9f51efa335aebcf2994125ee9,tensorflow/tensorflow,"Prevent memory leak in decoding PNG images. PiperOrigin-RevId: 409300653 Change-Id: I6182124c545989cef80cefd439b659095920763b",decode_image_op.cc,"@@ -18,6 +18,8 @@ limitations under the License. #include #include +#include ""tensorflow/core/lib/gtl/cleanup.h"" + #define EIGEN_USE_THREADS #include ""absl/strings/escaping.h"" @@ -326,6 +328,16 @@ class DecodeImageV2Op : public OpKernel { context, png::CommonInitDecode(input, channels_, channel_bits, &decode), errors::InvalidArgument(""Invalid PNG. Failed to initialize decoder."")); + // If we reach this point, then there is data in `decode` which must be + // freed by the time we end execution in this function. We cannot call + // `png::CommonFreeDecode()` before an `OP_REQUIRES` because if + // `OP_REQUIRES` constraint is satisfied then the data would be freed + // prematurely. Instead, let's use a `Cleanup` object. + auto cleanup = gtl::MakeCleanup([&decode]() { + std::cerr << ""Cleanup called...\n""; + png::CommonFreeDecode(&decode); + }); + // Verify that width and height are not too large: // - verify width and height don't overflow int. // - width can later be multiplied by channels_ and sizeof(uint16), so ",1,train 3d89911481ba6ebe8c88c1c0b595412121e6c645,tensorflow/tensorflow,"Eliminate `CHECK`-fail from `function.cc`. PiperOrigin-RevId: 409414744 Change-Id: Ic854e12ab2edb88b165d32e2d632c4ee654d71ad",function.cc,"@@ -181,7 +181,9 @@ class FunctionInstantiationHelper { DataTypeVector dtypes; TF_RETURN_IF_ERROR( ArgNumType(attr_values, arg_def, &is_type_list, &dtypes)); - CHECK_GE(dtypes.size(), size_t{1}); + if (dtypes.size() < size_t{1}) { + return errors::Internal(""Expected a list of at least one dtype""); + } int arg_index = result_.nodes.size(); TF_RETURN_IF_ERROR( AddItem(arg_def.name(), {true, arg_index, 0, is_type_list, dtypes})); ",1,train dcc21c7bc972b10b6fb95c2fb0f4ab5a59680ec2,tensorflow/tensorflow,"Eliminate debug `CHECK`-fail from `function.cc` PiperOrigin-RevId: 409416119 Change-Id: I8376ee464d434e9b970ff0ad49edfdaa2a273cfe",function.cc,"@@ -191,7 +191,11 @@ class FunctionInstantiationHelper { for (size_t i = 0; i < dtypes.size(); ++i) { TF_RETURN_IF_ERROR(AddItem(strings::StrCat(arg_def.name(), "":"", i), {true, arg_index, 0, false, {dtypes[i]}})); - DCHECK_EQ(arg_index, result_.nodes.size()); + if (arg_index != result_.nodes.size()) { + return errors::Internal( + ""Expected arg_index to be equal to the number of nodes in result."", + "" Got "", arg_index, "" and "", result_.nodes.size()); + } string name = arg_def.name(); if (dtypes.size() > 1) { strings::StrAppend(&name, ""_"", i); ",1,train 0aaaae6eca5a7175a193696383f582f53adab23f,tensorflow/tensorflow,"Prevent overflow in grappler cost estimation of crop&resize op. The crop parameters are user controlled, so we should make sure a user can not trigger an overflow maliciously. PiperOrigin-RevId: 409670234 Change-Id: I7994734a98b037c5642e051240329d16f959aae4",op_level_cost_estimator.cc,"@@ -2681,27 +2681,42 @@ Status OpLevelCostEstimator::PredictCropAndResize(const OpContext& op_context, // calculation differs from rough estimate in implementation, as it separates // out cost per box from cost per pixel and cost per element. + // Since crop arguments are user controlled, check for overflow. + int64_t crop_area = MultiplyWithoutOverflow(crop_height, crop_width); + if (crop_area < 0) + return errors::InvalidArgument(""Cannot estimate cost, multiplying "", + crop_height, "" with "", crop_width, + "" would overflow""); + int64_t crop_volume = MultiplyWithoutOverflow(crop_area, num_boxes); + if (crop_volume < 0) + return errors::InvalidArgument(""Cannot estimate cost, multiplying "", + crop_area, "" with "", num_boxes, + "" would overflow""); + int64_t crop_depth = MultiplyWithoutOverflow(crop_height, num_boxes); + if (crop_depth < 0) + return errors::InvalidArgument(""Cannot estimate cost, multiplying "", + crop_height, "" with "", num_boxes, + "" would overflow""); + // Ops for variables height_scale and width_scale. int64_t ops = (sub_cost * 6 + mul_cost * 2 + div_cost * 2) * num_boxes; // Ops for variable in_y. - ops += (mul_cost * 2 + sub_cost + add_cost) * crop_height * num_boxes; + ops += (mul_cost * 2 + sub_cost + add_cost) * crop_depth; // Ops for variable in_x (same computation across both branches). - ops += (mul_cost * 2 + sub_cost + add_cost) * crop_height * crop_width * - num_boxes; + ops += (mul_cost * 2 + sub_cost + add_cost) * crop_volume; // Specify op_cost based on the method. if (use_bilinear_interp) { // Ops for variables top_y_index, bottom_y_index, y_lerp. - ops += (floor_cost + ceil_cost + sub_cost) * crop_height * num_boxes; + ops += (floor_cost + ceil_cost + sub_cost) * crop_depth; // Ops for variables left_x, right_x, x_lerp; - ops += (floor_cost + ceil_cost + sub_cost) * crop_height * crop_width * - num_boxes; + ops += (floor_cost + ceil_cost + sub_cost) * crop_volume; // Ops for innermost loop across depth. ops += (cast_to_float_cost * 4 + add_cost * 3 + sub_cost * 3 + mul_cost * 3) * output_elements; } else /* method == ""nearest"" */ { // Ops for variables closest_x_index and closest_y_index. - ops += round_cost * 2 * crop_height * crop_width * num_boxes; + ops += round_cost * 2 * crop_volume; // Ops for innermost loop across depth. ops += cast_to_float_cost * output_elements; } ",1,train 6b5adc0877de832b2a7c189532dbbbc64622eeb6,tensorflow/tensorflow,"Prevent `CHECK`-fail when building reference tensor. The tensor constructor does not allow reference dtypes, as these should not show up explicitly. However, when passed these invalid types instead of building an invalid object the constructor crashes via a `CHECK`-fail. We have a static builder that properly handles this case but is not applicable given current usage. Instead, before calling the constructor, we can check that the dtype is not a reference type and return an error otherwise, given that the dtype is user controlled so malicious users can trigger denial of service. PiperOrigin-RevId: 409662503 Change-Id: I5892f831fde7f276cd7ab34519cf6b8061c71a59",constant_folding.cc,"@@ -1363,6 +1363,11 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node, input_tensor.ToString(), "" has a dtype of DT_INVALID."")); } + if (IsRefType(raw_val.dtype())) { + return errors::InvalidArgument( + ""Not allowed to construct a tensor with reference dtype, got "", + DataTypeString(raw_val.dtype())); + } Tensor* value = new Tensor(raw_val.dtype(), raw_val.tensor_shape()); if (!value->FromProto(raw_val)) { delete (value); ",1,train 045deec1cbdebb27d817008ad5df94d96a08b1bf,tensorflow/tensorflow,"Prevent null pointer dereference in `mutable_graph_view` PiperOrigin-RevId: 409684472 Change-Id: I577eb9d9ac470fcec0501423171e739a4ec0cb5c",mutable_graph_view.cc,"@@ -68,6 +68,9 @@ bool IsIdentityConsumingSwitch(const MutableGraphView& graph, } NodeDef* input_node = graph.GetNode(tensor_id.node()); + if (input_node == nullptr) { + return false; + } return IsSwitch(*input_node); } return false; ",1,train 0a365c029e437be0349c31f8d4c9926b69fa3fa1,tensorflow/tensorflow,"Prevent null pointer dereference in constant folding. Under certain conditions, an invalid protobuf saved model with invalid nodes would be loaded. During optimization phase, Grappler optimizer will then dereference a null pointer. PiperOrigin-RevId: 409683530 Change-Id: I1f10340a7ec384bc9bc587300390f1078cf5caa0",constant_folding.cc,"@@ -3505,6 +3505,9 @@ bool ConstantFolding::MulConvPushDown(GraphDef* optimized_graph, NodeDef* node, NodeDef* mul_left_child = node_map_->GetNode(node->input(0)); NodeDef* mul_right_child = node_map_->GetNode(node->input(1)); + if (mul_left_child == nullptr || mul_right_child == nullptr) { + return false; + } // One child must be constant, and the second must be Conv op. const bool left_child_is_constant = IsReallyConstant(*mul_left_child); const bool right_child_is_constant = IsReallyConstant(*mul_right_child); ",1,test 955059813cc325dc1db5e2daa6221271406d4439,tensorflow/tensorflow,"Check for type inference error on node construction. PiperOrigin-RevId: 409415804 Change-Id: Ieb6e020906b96f522bf8e2fa103715ddbbdc434a",graph.cc,"@@ -561,6 +561,11 @@ Node* Graph::AddNode(NodeDef node_def, Status* status) { VLOG(3) << ""AddNode: found type constructor for "" << node_def.name(); const auto ctor_type = full_type::SpecializeType(AttrSlice(node_def), op_reg_data->op_def); + if (!ctor_type.ok()) { + *status = errors::InvalidArgument(""type error: "", + ctor_type.status().ToString()); + return nullptr; + } const FullTypeDef ctor_typedef = ctor_type.ValueOrDie(); if (ctor_typedef.type_id() != TFT_UNSET) { *(node_def.mutable_experimental_type()) = ctor_typedef; ",1,test 448a16182065bd08a202d9057dd8ca541e67996c,tensorflow/tensorflow,"Prevent stack overflow when FunctionLib in GraphDef has a self-recursive function. It is likely that no recursivity is supported, but we should handle this separately. PiperOrigin-RevId: 414860329 Change-Id: I02a2270e86282b37362ddd485eeef16fb986a9e0",loader.cc,"@@ -25,6 +25,7 @@ limitations under the License. #include ""tensorflow/core/framework/attr_value.pb.h"" #include ""tensorflow/core/framework/function.pb.h"" #include ""tensorflow/core/framework/node_def.pb.h"" +#include ""tensorflow/core/framework/op_def.pb.h"" #include ""tensorflow/core/framework/tensor.pb.h"" #include ""tensorflow/core/lib/io/path.h"" #include ""tensorflow/core/lib/monitoring/counter.h"" @@ -99,6 +100,19 @@ static Status ValidateNode(const NodeDef& node) { return Status::OK(); } +static Status ValidateFunctionNotRecursive(const FunctionDef& function) { + const auto& function_name = function.signature().name(); + for (const auto& node : function.node_def()) { + if (node.op() == function_name) { + return errors::FailedPrecondition( + ""Function "", function_name, + "" is self recursive and TensorFlow does not support this scenario.""); + } + } + + return Status::OK(); +} + static Status ValidateSavedTensors(const GraphDef& graph_def) { for (const auto& node : graph_def.node()) { TF_RETURN_IF_ERROR(ValidateNode(node)); @@ -110,6 +124,10 @@ static Status ValidateSavedTensors(const GraphDef& graph_def) { for (const auto& node : function.node_def()) { TF_RETURN_IF_ERROR(ValidateNode(node)); } + + // Also check that there is no recursivity in the library + // TODO(mihaimaruseac): Do more than self-recursivity + TF_RETURN_IF_ERROR(ValidateFunctionNotRecursive(function)); } } ",1,train c99d98cd189839dcf51aee94e7437b54b31f8abd,tensorflow/tensorflow,"Handle invalid inputs instead of crashing. PiperOrigin-RevId: 409549744 Change-Id: I7f5935b34b53f7e426a5462fcc027bdbf5dcda24",graph.cc,"@@ -222,10 +222,16 @@ void Node::RunForwardTypeInference() { const auto& node_t = node->def().experimental_type(); if (node_t.type_id() != TFT_UNSET) { int ix = input_idx[i]; - DCHECK(ix < node_t.args_size()) - << ""input "" << i << "" should have an output "" << ix - << "" but instead only has "" << node_t.args_size() - << "" outputs: "" << node_t.DebugString(); + if (ix >= node_t.args_size()) { + LOG(WARNING) << name() << "" has bad type information: input "" << i + << "" should have an output "" << ix + << "" but instead only has "" << node_t.args_size() + << "" outputs: "" << node_t.DebugString() + << ""\nThis indicates either "" + ""a bug in op registration or a corrupted graph.""; + ClearTypeInfo(); + return; + } input_types.emplace_back(node_t.args(ix)); } else { input_types.emplace_back(*no_type); ",1,test 35f0fabb4c178253a964d7aabdbb15c6a398b69a,tensorflow/tensorflow,"Avoid Segfault for scalar shapes. Calling tensor::FromElementsOp with an empty vector of elements and no type causes a segfault. We need to let the FromElementsOp know which scalar type it should have. Also add back the DynamicBroadcastInDimOp canonicalization patterns, which previously prevented this bug from happening. Add a regression test that demonstrates the bug. PiperOrigin-RevId: 417561444 Change-Id: I6d1d6cfb71aabbad6102422625a00bbe253ac95a",tf_cpurt_symbolic_shape_optimization.cc,"@@ -157,6 +157,10 @@ llvm::Optional simplifyBroadcast(ShapeComponentAnalysis& analysis, shapes_found.push_back(*found_shape); maxRank = std::max(maxRank, found_shape->size()); } + if (maxRank == 0) { + return Value(builder->create( + loc, shapes[0].getType(), SmallVector())); + } SmallVector joined_dimensions( maxRank); ",1,train e21af685e1828f7ca65038307df5cc06de4479e8,tensorflow/tensorflow,"Fix Null-pointer dereference in BuildXlaCompilationCache If ConfigProto is not used, then use the default settings which is to allow all devices. PiperOrigin-RevId: 420391800 Change-Id: I88161ad7042990aef678e77b597a2fb2c8f815be",xla_platform_info.cc,"@@ -82,11 +82,13 @@ Status BuildXlaCompilationCache(DeviceBase* device, FunctionLibraryRuntime* flr, client_options.set_intra_op_parallelism_threads( device->tensorflow_cpu_worker_threads()->num_threads); - string allowed_gpus = - flr->config_proto()->gpu_options().visible_device_list(); - TF_ASSIGN_OR_RETURN(absl::optional> gpu_ids, - ParseVisibleDeviceList(allowed_gpus)); - client_options.set_allowed_devices(gpu_ids); + if (flr->config_proto()) { + string allowed_gpus = + flr->config_proto()->gpu_options().visible_device_list(); + TF_ASSIGN_OR_RETURN(absl::optional> gpu_ids, + ParseVisibleDeviceList(allowed_gpus)); + client_options.set_allowed_devices(gpu_ids); + } auto client = xla::ClientLibrary::GetOrCreateLocalClient(client_options); if (!client.ok()) { ",1,train eebb96c2830d48597d055d247c0e9aebaea94cd5,tensorflow/tensorflow,"Fix an invalid address vulnerability in `tf.raw_ops.RaggedBincount`. PiperOrigin-RevId: 368293153 Change-Id: I4b4e493d3fd05e7dc55a55de3a041a80a4f275c3",bincount_op.cc,"@@ -420,6 +420,15 @@ class RaggedBincountOp : public OpKernel { int num_values = values.size(); int batch_idx = 0; + OP_REQUIRES(ctx, splits(0) == 0, + errors::InvalidArgument(""Splits must start with 0, not with "", + splits(0))); + + OP_REQUIRES(ctx, splits(num_rows) == num_values, + errors::InvalidArgument( + ""Splits must end with the number of values, got "", + splits(num_rows), "" instead of "", num_values)); + Tensor* out_t; OP_REQUIRES_OK( ctx, ctx->allocate_output(0, TensorShape({num_rows, size}), &out_t)); ",1,train 030af767d357d1b4088c4a25c72cb3906abac489,tensorflow/tensorflow,"Fix `tf.raw_ops.ResourceCountUpTo` null pointer dereference. PiperOrigin-RevId: 368294347 Change-Id: I2c16fbfc9b4966c402c3d8e311f0d665a9c852d8",ndarray_tensor.cc,"@@ -16,6 +16,7 @@ limitations under the License. #include ""tensorflow/python/lib/core/ndarray_tensor.h"" #include +#include #include ""tensorflow/c/eager/tfe_context_internal.h"" #include ""tensorflow/c/tf_tensor_internal.h"" @@ -74,6 +75,13 @@ Status PyArrayDescr_to_TF_DataType(PyArray_Descr* descr, PyObject* key; PyObject* value; Py_ssize_t pos = 0; + + // Return an error if the fields attribute is null. + // Occurs with an improper conversion attempt to resource. + if (descr->fields == nullptr) { + return errors::Internal(""Unexpected numpy data type""); + } + if (PyDict_Next(descr->fields, &pos, &key, &value)) { // In Python 3, the keys of numpy custom struct types are unicode, unlike // Python 2, where the keys are bytes. ",1,train a7116dd3913c4a4afd2a3a938573aa7c785fdfc6,tensorflow/tensorflow,"Validate `MatrixDiagV{2,3}` arguments to prevent breakage. PiperOrigin-RevId: 369056033 Change-Id: Ic2018c297d3dd6f252dc1dd3667f1ed5cb1eaa42",matrix_diag_op.cc,"@@ -192,9 +192,22 @@ class MatrixDiagOp : public OpKernel { upper_diag_index = diag_index.flat()(1); } } - num_rows = context->input(2).flat()(0); - num_cols = context->input(3).flat()(0); - padding_value = context->input(4).flat()(0); + + auto& num_rows_tensor = context->input(2); + OP_REQUIRES(context, TensorShapeUtils::IsScalar(num_rows_tensor.shape()), + errors::InvalidArgument(""num_rows must be a scalar"")); + num_rows = num_rows_tensor.flat()(0); + + auto& num_cols_tensor = context->input(3); + OP_REQUIRES(context, TensorShapeUtils::IsScalar(num_cols_tensor.shape()), + errors::InvalidArgument(""num_cols must be a scalar"")); + num_cols = num_cols_tensor.flat()(0); + + auto& padding_value_tensor = context->input(4); + OP_REQUIRES(context, + TensorShapeUtils::IsScalar(padding_value_tensor.shape()), + errors::InvalidArgument(""padding_value must be a scalar"")); + padding_value = padding_value_tensor.flat()(0); } // Size validations. ",1,train b055b9c474cd376259dde8779908f9eeaf097d93,tensorflow/tensorflow,"Fix `tf.raw_ops.RaggedTensorToVariant` invalid resize. PiperOrigin-RevId: 368299574 Change-Id: I751c186325aa0bab397928845e790e60c2d90918",ragged_tensor_to_variant_op.cc,"@@ -159,6 +159,11 @@ class RaggedTensorToVariantOp : public OpKernel { // Unbatch the Ragged Tensor and encode the components. std::vector unbatched_ragged_input; + auto batched_splits_top_vec = + batched_ragged_input.splits(0).vec(); + int num_components = batched_splits_top_vec.size() - 1; + OP_REQUIRES(context, num_components >= 0, + errors::Internal(""Invalid split argument."")); OP_REQUIRES_OK(context, UnbatchRaggedZerothDim( batched_ragged_input, &unbatched_ragged_input)); ",1,train 799f835a3dfa00a4d852defa29b15841eea9d64f,tensorflow/tensorflow,"Fix 2 issues with `Conv3D`. We have an issue where the dimensions are not matching and this causes Eigen to crash on an assert. Then, we have an issue where we accidentally do a division by 0. PiperOrigin-RevId: 369242785 Change-Id: Ie94067b2d41f58699af99ebb5af335ad9defd931",conv_ops_3d.cc,"@@ -69,6 +69,11 @@ struct LaunchConvOp { errors::InvalidArgument(""CPU implementation of Conv3D "" ""currently only supports dilated rates "" ""of 1."")); + OP_REQUIRES(context, filter.dim_size(3) == input.dim_size(input.dims() - 1), + errors::InvalidArgument( + ""Number of channels in filter ("", filter.dim_size(3), + "") must match last dimension of input ("", + input.dim_size(input.dims() - 1), "")"")); functor::CuboidConvolution()( context->eigen_device(), output->tensor(), input.tensor(), filter.tensor(), strides[2], strides[1], @@ -142,6 +147,8 @@ class Conv3DOp : public BinaryOp { const int64 filter_depth = filter.dim_size(3); const int64 out_depth = filter.dim_size(4); + OP_REQUIRES(context, filter_depth != 0, + errors::InvalidArgument(""filter_depth must be non-zero"")); OP_REQUIRES(context, in_depth % filter_depth == 0, errors::InvalidArgument( ""Input depth must be evenly divisible by filter depth: "", ",1,train ff70c47a396ef1e3cb73c90513da4f5cb71bebba,tensorflow/tensorflow,"Fix `tf.raw_ops.GetSessionTensor` and `tf.raw_ops.DeleteSessionTensor` null pointer dereferences. PiperOrigin-RevId: 368294154 Change-Id: Ie10f07a0a9a1c2b685e08153d48a0ca4b93f9fc9",session_ops.cc,"@@ -91,7 +91,6 @@ TF_CALL_NUMBER_TYPES(REGISTER_GPU_KERNEL); REGISTER_GPU_KERNEL(bool); #undef REGISTER_GPU_KERNEL - class GetSessionTensorOp : public OpKernel { public: explicit GetSessionTensorOp(OpKernelConstruction* context) @@ -101,7 +100,11 @@ class GetSessionTensorOp : public OpKernel { const Tensor& handle = ctx->input(0); const string& name = handle.scalar()(); Tensor val; - OP_REQUIRES_OK(ctx, ctx->session_state()->GetTensor(name, &val)); + auto session_state = ctx->session_state(); + OP_REQUIRES(ctx, session_state != nullptr, + errors::FailedPrecondition( + ""GetSessionTensor called on null session state"")); + OP_REQUIRES_OK(ctx, session_state->GetTensor(name, &val)); ctx->set_output(0, val); } @@ -122,7 +125,6 @@ TF_CALL_NUMBER_TYPES(REGISTER_GPU_KERNEL); REGISTER_GPU_KERNEL(bool); #undef REGISTER_GPU_KERNEL - class DeleteSessionTensorOp : public OpKernel { public: explicit DeleteSessionTensorOp(OpKernelConstruction* context) @@ -131,7 +133,11 @@ class DeleteSessionTensorOp : public OpKernel { void Compute(OpKernelContext* ctx) override { const Tensor& handle = ctx->input(0); const string& name = handle.scalar()(); - OP_REQUIRES_OK(ctx, ctx->session_state()->DeleteTensor(name)); + auto session_state = ctx->session_state(); + OP_REQUIRES(ctx, session_state != nullptr, + errors::FailedPrecondition( + ""DeleteSessionTensor called on null session state"")); + OP_REQUIRES_OK(ctx, session_state->DeleteTensor(name)); } TF_DISALLOW_COPY_AND_ASSIGN(DeleteSessionTensorOp); ",1,train b1cc5e5a50e7cee09f2c6eb48eb40ee9c4125025,tensorflow/tensorflow,"Fix `tf.raw_ops.SparseCross` failing CHECK. PiperOrigin-RevId: 368701671 Change-Id: Id805729dd9ba0bda36e4bb309408129b55fb649d",sparse_cross_op.cc,"@@ -27,6 +27,7 @@ limitations under the License. #include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/tensor_shape.h"" #include ""tensorflow/core/framework/types.h"" +#include ""tensorflow/core/framework/types.pb.h"" #include ""tensorflow/core/lib/core/stringpiece.h"" #include ""tensorflow/core/lib/strings/str_util.h"" #include ""tensorflow/core/platform/fingerprint.h"" @@ -460,10 +461,19 @@ int64 CalculateBatchSize(const OpInputList& shapes_list_in, Status ValidateInput(const OpInputList& indices_list_in, const OpInputList& values_list_in, const OpInputList& shapes_list_in, - const OpInputList& dense_list_in) { + const OpInputList& dense_list_in, + const DataType& internal_type) { const auto size = indices_list_in.size(); + // Only perform internal_type check for SparseCrossOp. + // Check if the internal_type is not invalid before doing so. + bool check_type = internal_type != DT_INVALID; // Validates indices_list_in OpInputList. for (int i = 0; i < size; i++) { + if (check_type && indices_list_in[i].dtype() != DT_INT64) { + return errors::InvalidArgument(""Input indices should be of type "", + DT_INT64, "" but received "", + indices_list_in[i].dtype()); + } if (!TensorShapeUtils::IsMatrix(indices_list_in[i].shape())) { return errors::InvalidArgument( ""Input indices should be a matrix but received shape "", @@ -482,6 +492,14 @@ Status ValidateInput(const OpInputList& indices_list_in, values_list_in.size()); } for (int i = 0; i < size; i++) { + // Make sure to avoid the expected type to be string, but input values to be + // int64. + if (check_type && internal_type == DT_STRING && + values_list_in[i].dtype() == DT_INT64) { + return errors::InvalidArgument(""Input values should be of internal type "", + internal_type, "" but received "", + values_list_in[i].dtype()); + } if (!TensorShapeUtils::IsVector(values_list_in[i].shape())) { return errors::InvalidArgument( ""Input values should be a vector but received shape "", @@ -502,6 +520,11 @@ Status ValidateInput(const OpInputList& indices_list_in, shapes_list_in.size()); } for (int i = 0; i < size; i++) { + if (check_type && shapes_list_in[i].dtype() != DT_INT64) { + return errors::InvalidArgument(""Input shape should be of type "", DT_INT64, + "" but received "", + shapes_list_in[i].dtype()); + } if (!TensorShapeUtils::IsVector(shapes_list_in[i].shape())) { return errors::InvalidArgument( ""Input shapes should be a vector but received shape "", @@ -517,6 +540,14 @@ Status ValidateInput(const OpInputList& indices_list_in, // Validates dense_list_in OpInputList for (int i = 0; i < dense_list_in.size(); ++i) { + // Make sure to avoid the expected type to be string, but input values to be + // int64. + if (check_type && internal_type == DT_STRING && + dense_list_in[i].dtype() == DT_INT64) { + return errors::InvalidArgument(""Dense inputs should be of internal type "", + internal_type, "" but received "", + dense_list_in[i].dtype()); + } if (!TensorShapeUtils::IsMatrix(dense_list_in[i].shape())) { return errors::InvalidArgument( ""Dense inputs should be a matrix but received shape "", @@ -698,6 +729,7 @@ class SparseCrossOp : public OpKernel { int64 signed_hash_key_; OP_REQUIRES_OK(context, context->GetAttr(""hash_key"", &signed_hash_key_)); hash_key_ = static_cast(signed_hash_key_); + OP_REQUIRES_OK(context, context->GetAttr(""internal_type"", &internal_type_)); } void Compute(OpKernelContext* context) override { @@ -711,8 +743,10 @@ class SparseCrossOp : public OpKernel { OP_REQUIRES_OK(context, context->input_list(""dense_inputs"", &dense_list_in)); - OP_REQUIRES_OK(context, ValidateInput(indices_list_in, values_list_in, - shapes_list_in, dense_list_in)); + DataType internal_type = internal_type_; + OP_REQUIRES_OK( + context, ValidateInput(indices_list_in, values_list_in, shapes_list_in, + dense_list_in, internal_type)); std::vector>> columns = GenerateColumnsFromInput(indices_list_in, values_list_in, @@ -756,6 +790,7 @@ class SparseCrossOp : public OpKernel { private: int64 num_buckets_; uint64 hash_key_; + DataType internal_type_; }; class SparseCrossV2Op : public OpKernel { @@ -773,8 +808,11 @@ class SparseCrossV2Op : public OpKernel { OP_REQUIRES_OK(context, context->input_list(""dense_inputs"", &dense_list_in)); - OP_REQUIRES_OK(context, ValidateInput(indices_list_in, values_list_in, - shapes_list_in, dense_list_in)); + // Set internal_type to invalid_type so that the check will be ignored. + DataType internal_type = DT_INVALID; + OP_REQUIRES_OK( + context, ValidateInput(indices_list_in, values_list_in, shapes_list_in, + dense_list_in, internal_type)); const Tensor* sep_t; OP_REQUIRES_OK(context, context->input(""sep"", &sep_t)); @@ -832,8 +870,11 @@ class SparseCrossHashedOp : public OpKernel { OP_REQUIRES_OK(context, context->input_list(""dense_inputs"", &dense_list_in)); - OP_REQUIRES_OK(context, ValidateInput(indices_list_in, values_list_in, - shapes_list_in, dense_list_in)); + // Set internal_type to invalid_type so that the check will be ignored. + DataType internal_type = DT_INVALID; + OP_REQUIRES_OK( + context, ValidateInput(indices_list_in, values_list_in, shapes_list_in, + dense_list_in, internal_type)); const Tensor* num_buckets_t; OP_REQUIRES_OK(context, context->input(""num_buckets"", &num_buckets_t)); ",1,train 8f37b52e1320d8d72a9529b2468277791a261197,tensorflow/tensorflow,"Validate some shape requirements for `Conv3DBackpropFilter*` and `Conv3DBackpropInput*` ops. Older versions of Eigen might otherwise crash / produce OOB read on specially crafted inputs. PiperOrigin-RevId: 369293977 Change-Id: I58f51445a93936d7cf8e616f75de17677df36718",conv_grad_ops_3d.cc,"@@ -239,6 +239,20 @@ class Conv3DBackpropInputOp : public OpKernel { input_shape = context->input(0).shape(); } + OP_REQUIRES( + context, input_shape.dim_size(4) == filter_shape.dim_size(3), + errors::InvalidArgument(""input and filter_sizes must have the same "" + ""number of channels. Got "", + input_shape.dim_size(4), "" for input and "", + filter_shape.dim_size(3), "" for filter_sizes"")); + OP_REQUIRES( + context, out_backprop_shape.dim_size(4) == filter_shape.dim_size(4), + errors::InvalidArgument(""out_backprop and filter_sizes must have the "" + ""same number of channels. Got "", + out_backprop_shape.dim_size(4), + "" for out_backprop and "", + filter_shape.dim_size(4), "" for filter_sizes"")); + ConvBackpropDimensions dims; OP_REQUIRES_OK(context, ConvBackpropComputeDimensions( ""Conv3DBackpropInputOp"", /*num_spatial_dims=*/3, @@ -346,6 +360,20 @@ class Conv3DCustomBackpropInputOp : public OpKernel { input_shape = context->input(0).shape(); } + OP_REQUIRES( + context, input_shape.dim_size(4) == filter_shape.dim_size(3), + errors::InvalidArgument(""input and filter_sizes must have the same "" + ""number of channels. Got "", + input_shape.dim_size(4), "" for input and "", + filter_shape.dim_size(3), "" for filter_sizes"")); + OP_REQUIRES( + context, out_backprop_shape.dim_size(4) == filter_shape.dim_size(4), + errors::InvalidArgument(""out_backprop and filter_sizes must have the "" + ""same number of channels. Got "", + out_backprop_shape.dim_size(4), + "" for out_backprop and "", + filter_shape.dim_size(4), "" for filter_sizes"")); + ConvBackpropDimensions dims; OP_REQUIRES_OK(context, ConvBackpropComputeDimensions( ""Conv3DBackpropInputOp"", /*num_spatial_dims=*/3, @@ -696,6 +724,20 @@ class Conv3DBackpropFilterOp : public OpKernel { filter_shape = context->input(1).shape(); } + OP_REQUIRES( + context, input_shape.dim_size(4) == filter_shape.dim_size(3), + errors::InvalidArgument(""input and filter_sizes must have the same "" + ""number of channels. Got "", + input_shape.dim_size(4), "" for input and "", + filter_shape.dim_size(3), "" for filter_sizes"")); + OP_REQUIRES( + context, out_backprop_shape.dim_size(4) == filter_shape.dim_size(4), + errors::InvalidArgument(""out_backprop and filter_sizes must have the "" + ""same number of channels. Got "", + out_backprop_shape.dim_size(4), + "" for out_backprop and "", + filter_shape.dim_size(4), "" for filter_sizes"")); + ConvBackpropDimensions dims; OP_REQUIRES_OK(context, ConvBackpropComputeDimensions( @@ -808,6 +850,20 @@ class Conv3DCustomBackpropFilterOp : public OpKernel { filter_shape = context->input(1).shape(); } + OP_REQUIRES( + context, input_shape.dim_size(4) == filter_shape.dim_size(3), + errors::InvalidArgument(""input and filter_sizes must have the same "" + ""number of channels. Got "", + input_shape.dim_size(4), "" for input and "", + filter_shape.dim_size(3), "" for filter_sizes"")); + OP_REQUIRES( + context, out_backprop_shape.dim_size(4) == filter_shape.dim_size(4), + errors::InvalidArgument(""out_backprop and filter_sizes must have the "" + ""same number of channels. Got "", + out_backprop_shape.dim_size(4), + "" for out_backprop and "", + filter_shape.dim_size(4), "" for filter_sizes"")); + ConvBackpropDimensions dims; OP_REQUIRES_OK(context, ConvBackpropComputeDimensions( ",1,train c57c0b9f3a4f8684f3489dd9a9ec627ad8b599f5,tensorflow/tensorflow,"Fix the segfault in `tf.raw_ops.SparseCountSparseOutput`. PiperOrigin-RevId: 369264941 Change-Id: I23a96a15b8370c01ee21ba3841e1c7dcbf55e93d",count_ops.cc,"@@ -197,9 +197,17 @@ class SparseCount : public OpKernel { ""The shape argument requires at least one element."")); bool is_1d = shape.NumElements() == 1; - int num_batches = is_1d ? 1 : shape.flat()(0); + auto shape_vector = shape.flat(); + int num_batches = is_1d ? 1 : shape_vector(0); int num_values = values.NumElements(); + for (int b = 0; b < shape_vector.size(); b++) { + OP_REQUIRES(context, shape_vector(b) >= 0, + errors::InvalidArgument( + ""Elements in dense_shape must be >= 0. Instead got:"", + shape.DebugString())); + } + OP_REQUIRES(context, num_values == indices.shape().dim_size(0), errors::InvalidArgument( ""Number of values must match first dimension of indices."", ",1,train 311403edbc9816df80274bd1ea8b3c0c0f22c3fa,tensorflow/tensorflow,"Eliminate a division by 0 in 3D convolutions. Also prevent a CHECK failed introduced in the most recent change. PiperOrigin-RevId: 369322073 Change-Id: I4f609c028f89565fb2b49c3fdd20b63496582bae",conv_grad_ops_3d.cc,"@@ -239,6 +239,14 @@ class Conv3DBackpropInputOp : public OpKernel { input_shape = context->input(0).shape(); } + OP_REQUIRES(context, input_shape.dims() == 5, + errors::InvalidArgument(""input tensor must have 5 dimensions"")); + OP_REQUIRES( + context, filter_shape.dims() == 5, + errors::InvalidArgument(""filter_sizes tensor must have 5 dimensions"")); + OP_REQUIRES( + context, out_backprop_shape.dims() == 5, + errors::InvalidArgument(""out_backprop tensor must have 5 dimensions"")); OP_REQUIRES( context, input_shape.dim_size(4) == filter_shape.dim_size(3), errors::InvalidArgument(""input and filter_sizes must have the same "" @@ -360,6 +368,14 @@ class Conv3DCustomBackpropInputOp : public OpKernel { input_shape = context->input(0).shape(); } + OP_REQUIRES(context, input_shape.dims() == 5, + errors::InvalidArgument(""input tensor must have 5 dimensions"")); + OP_REQUIRES( + context, filter_shape.dims() == 5, + errors::InvalidArgument(""filter_sizes tensor must have 5 dimensions"")); + OP_REQUIRES( + context, out_backprop_shape.dims() == 5, + errors::InvalidArgument(""out_backprop tensor must have 5 dimensions"")); OP_REQUIRES( context, input_shape.dim_size(4) == filter_shape.dim_size(3), errors::InvalidArgument(""input and filter_sizes must have the same "" @@ -444,6 +460,11 @@ class Conv3DCustomBackpropInputOp : public OpKernel { // contraction compared to sharding and matmuls. const bool use_parallel_contraction = dims.batch_size == 1; + OP_REQUIRES( + context, work_unit_size > 0, + errors::InvalidArgument(""input, filter_sizes and out_backprop tensors "" + ""must all have at least 1 element"")); + const size_t shard_size = use_parallel_contraction ? 1 @@ -724,6 +745,14 @@ class Conv3DBackpropFilterOp : public OpKernel { filter_shape = context->input(1).shape(); } + OP_REQUIRES(context, input_shape.dims() == 5, + errors::InvalidArgument(""input tensor must have 5 dimensions"")); + OP_REQUIRES( + context, filter_shape.dims() == 5, + errors::InvalidArgument(""filter_sizes tensor must have 5 dimensions"")); + OP_REQUIRES( + context, out_backprop_shape.dims() == 5, + errors::InvalidArgument(""out_backprop tensor must have 5 dimensions"")); OP_REQUIRES( context, input_shape.dim_size(4) == filter_shape.dim_size(3), errors::InvalidArgument(""input and filter_sizes must have the same "" @@ -850,6 +879,14 @@ class Conv3DCustomBackpropFilterOp : public OpKernel { filter_shape = context->input(1).shape(); } + OP_REQUIRES(context, input_shape.dims() == 5, + errors::InvalidArgument(""input tensor must have 5 dimensions"")); + OP_REQUIRES( + context, filter_shape.dims() == 5, + errors::InvalidArgument(""filter_sizes tensor must have 5 dimensions"")); + OP_REQUIRES( + context, out_backprop_shape.dims() == 5, + errors::InvalidArgument(""out_backprop tensor must have 5 dimensions"")); OP_REQUIRES( context, input_shape.dim_size(4) == filter_shape.dim_size(3), errors::InvalidArgument(""input and filter_sizes must have the same "" @@ -936,6 +973,11 @@ class Conv3DCustomBackpropFilterOp : public OpKernel { const int64 work_unit_size = size_A + size_B + size_C; + OP_REQUIRES( + context, work_unit_size > 0, + errors::InvalidArgument(""input, filter_sizes and out_backprop tensors "" + ""must all have at least 1 element"")); + const size_t shard_size = (target_working_set_size + work_unit_size - 1) / work_unit_size; ",1,train 69c68ecbb24dff3fa0e46da0d16c821a2dd22d7c,tensorflow/tensorflow,"Fix overflow CHECK issue with `tf.raw_ops.AddManySparseToTensorsMap`. PiperOrigin-RevId: 369492969 Change-Id: I1d70d6c0c92e3d7a25bc3b3aa2a0c0ac9688bf81",sparse_tensors_map_ops.cc,"@@ -21,9 +21,6 @@ limitations under the License. #include #include -#include ""tensorflow/core/framework/op_kernel.h"" -#include ""tensorflow/core/framework/register_types.h"" - #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/register_types.h"" #include ""tensorflow/core/framework/resource_mgr.h"" @@ -31,6 +28,7 @@ limitations under the License. #include ""tensorflow/core/framework/tensor_util.h"" #include ""tensorflow/core/framework/types.h"" #include ""tensorflow/core/lib/gtl/inlined_vector.h"" +#include ""tensorflow/core/util/overflow.h"" #include ""tensorflow/core/util/sparse/sparse_tensor.h"" namespace tensorflow { @@ -254,7 +252,22 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp { errors::InvalidArgument( ""Rank of input SparseTensor should be > 1, but saw rank: "", rank)); - TensorShape tensor_input_shape(input_shape->vec()); + auto input_shape_vec = input_shape->vec(); + int new_num_elements = 1; + bool overflow_ocurred = false; + for (int i = 0; i < input_shape_vec.size(); i++) { + new_num_elements = + MultiplyWithoutOverflow(new_num_elements, input_shape_vec(i)); + if (new_num_elements < 0) { + overflow_ocurred = true; + } + } + + OP_REQUIRES( + context, !overflow_ocurred, + errors::Internal(""Encountered overflow from large input shape."")); + + TensorShape tensor_input_shape(input_shape_vec); gtl::InlinedVector std_order(rank); std::iota(std_order.begin(), std_order.end(), 0); SparseTensor input_st; @@ -262,8 +275,7 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp { tensor_input_shape, std_order, &input_st)); - auto input_shape_t = input_shape->vec(); - const int64 N = input_shape_t(0); + const int64 N = input_shape_vec(0); Tensor sparse_handles(DT_INT64, TensorShape({N})); auto sparse_handles_t = sparse_handles.vec(); @@ -274,7 +286,7 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp { // minibatch entries. TensorShape output_shape; OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape( - input_shape_t.data() + 1, + input_shape_vec.data() + 1, input_shape->NumElements() - 1, &output_shape)); // Get groups by minibatch dimension ",1,test fca9874a9b42a2134f907d2fb46ab774a831404a,tensorflow/tensorflow,"Prevent another division by zero. PiperOrigin-RevId: 369338598 Change-Id: I55471d363e401fdcf8d259670ad4eef672b731e2",conv_grad_shape_utils.cc,"@@ -127,6 +127,10 @@ Status ConvBackpropComputeDimensionsV2( // dimensions of the filter Tensor. VLOG(2) << ""input vs filter_in depth "" << dims->in_depth << "" "" << filter_shape.dim_size(num_dims - 2); + if (filter_shape.dim_size(num_dims - 2) <= 0) { + return errors ::InvalidArgument( + label, "": filter depth must be strictly greated than zero""); + } if (dims->in_depth % filter_shape.dim_size(num_dims - 2)) { return errors::InvalidArgument( label, "": input depth must be evenly divisible by filter depth""); ",1,test 2be2cdf3a123e231b16f766aa0e27d56b4606535,tensorflow/tensorflow,"Prevent yet another division by zero PiperOrigin-RevId: 369343977 Change-Id: I1a60da4cf512e60fd91e069c16e026544632fe7f",conv_grad_input_ops.h,"@@ -649,6 +649,11 @@ class Conv2DCustomBackpropInputOp : public OpKernel { dims.batch_size == 1 || thread_work_unit_size >= min_thread_work_unit_size; + OP_REQUIRES( + context, work_unit_size > 0, + errors::InvalidArgument(""input, filter_sizes and out_backprop tensors "" + ""must all have at least 1 element"")); + const size_t shard_size = use_parallel_contraction ? 1 ",1,train b12aa1d44352de21d1a6faaf04172d8c2508b42b,tensorflow/tensorflow,"Fix one more FPE. PiperOrigin-RevId: 369346568 Change-Id: I840fd575962adc879713a4c9cc59e6da3331caa7",conv_ops.cc,"@@ -260,6 +260,11 @@ struct LaunchConv2DOp { const int64 out_depth = output->dim_size(3); const int64 patch_depth = filter.dim_size(2); + if (patch_depth <= 0) { + ctx->SetStatus(errors::InvalidArgument( + ""filter depth must be stricly positive, got "", patch_depth)); + return; + } if (in_depth % patch_depth != 0) { ctx->SetStatus(errors::InvalidArgument( ""input depth must be evenly divisible by filter depth: "", in_depth, @@ -268,6 +273,11 @@ struct LaunchConv2DOp { } const int64 num_groups = in_depth / patch_depth; + if (num_groups <= 0) { + ctx->SetStatus(errors::InvalidArgument( + ""number of groups must be stricly positive, got "", num_groups)); + return; + } if (out_depth % num_groups != 0 || out_depth < num_groups) { ctx->SetStatus(errors::InvalidArgument( ""output depth must be evenly divisible by number of groups: "", @@ -536,6 +546,9 @@ Status ComputeConv2DDimension(const Conv2DParameters& params, errors::InvalidArgument(""Patch depth too large"")); const int in_depth = static_cast(in_depth_raw); const int patch_depth = static_cast(patch_depth_raw); + TF_REQUIRES(patch_depth > 0, + errors::InvalidArgument( + ""filter depth must be stricly positive, got "", patch_depth)); TF_REQUIRES(in_depth % patch_depth == 0, errors::InvalidArgument( ""input depth must be evenly divisible by filter depth: "", ",1,test cfa91be9863a91d5105a3b4941096044ab32036b,tensorflow/tensorflow,"Fix one FPE and remove two CHECK-fails. PiperOrigin-RevId: 369349640 Change-Id: I1fedbfc2b5bab635c5cb51f103d7c9176f79831a",quantized_conv_ops.cc,"@@ -18,6 +18,8 @@ limitations under the License. #include #include +#include ""tensorflow/core/platform/errors.h"" + #define EIGEN_USE_THREADS #define GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK @@ -227,8 +229,12 @@ class Im2ColConvFunctor { return; } - CHECK_GT(output_width, 0); - CHECK_GT(output_height, 0); + OP_REQUIRES( + context, output_width > 0, + errors::InvalidArgument(""output_width must be strictly positive"")); + OP_REQUIRES( + context, output_height > 0, + errors::InvalidArgument(""output_height must be strictly positive"")); int filter_left_offset; int filter_top_offset; if (padding == VALID) { @@ -255,6 +261,9 @@ class Im2ColConvFunctor { // by the width, then the height. This is the standard memory order in the // image world if it helps to visualize it. const int filter_value_count = filter_width * filter_height * input_depth; + OP_REQUIRES(context, filter_value_count > 0, + errors::InvalidArgument( + ""filter patch must contain at least one element"")); const int64 patches_per_chunk = kMaxChunkSize / (filter_value_count * sizeof(T1)); const int64 chunk_value_count = ",1,train a1b11d2fdd1e51bfe18bb1ede804f60abfa92da6,tensorflow/tensorflow,"Fix one division by zero PiperOrigin-RevId: 369474832 Change-Id: I1082858ed78d9b2e4738ce30b231955973d49e1e",quantized_mul_op.cc,"@@ -347,6 +347,11 @@ class QuantizedMulOp : public OpKernel { tensor_num_elements = x.NumElements(); tensor_offset = offset_x; } + if (vector_num_elements == 0) { + context->SetStatus( + errors::InvalidArgument(""vector must have at least 1 element"")); + return; + } VectorTensorMultiply( vector_data, vector_offset, vector_num_elements, tensor_data, tensor_offset, tensor_num_elements, z_data); ",1,train f851613f8f0fb0c838d160ced13c134f778e3ce7,tensorflow/tensorflow,"Fix heap buffer overflow caused by rounding. This was hard to fix. Due to the way we compute the pixels that influence an output pixel in resized images, for certain input configuration we might have issued a read to a pixel that is outside of boundary of the original image. This is because of floating errors that affected truncation results. PiperOrigin-RevId: 369757871 Change-Id: If89425fff930983829a2168203c11858883eebc9",quantized_resize_bilinear_op.cc,"@@ -64,6 +64,8 @@ inline void ComputeInterpolationWeights( std::max(static_cast(in_f), static_cast(0)); interpolation->upper[i] = std::min(static_cast(std::ceil(in)), in_size - 1); + interpolation->lower[i] = + std::min(interpolation->lower[i], interpolation->upper[i]); interpolation->lerp[i] = in - in_f; interpolation->ilerp[i] = static_cast((in - in_f) * (1 << resolution)); ",1,train e6a7c7cc18c3aaad1ae0872cb0a959f5c923d2bd,tensorflow/tensorflow,"Remove `OP_REQUIRES` call from helper function. Since `OP_REQUIRES` macro expands to a `return;` (among other), calling it in a helper function only ends the helper function's execution earlier, but the kernel will still run from start to end. Thus, all the expected validations are actually broken/useless as the code ploughs through the next crash anyway. PiperOrigin-RevId: 369524386 Change-Id: I54f6cf9328445675ccc392e661b04336b229c9da",sparse_cholesky_op.cc,"@@ -17,6 +17,8 @@ limitations under the License. #include #include +#include ""tensorflow/core/framework/op_requires.h"" + #define EIGEN_USE_THREADS #include ""third_party/eigen3/Eigen/Core"" @@ -82,8 +84,8 @@ class CSRSparseCholeskyCPUOp : public OpKernel { int64 num_rows; int batch_size; - ValidateInputs(ctx, *input_matrix, input_permutation_indices, &batch_size, - &num_rows); + OP_REQUIRES_OK(ctx, ValidateInputs(*input_matrix, input_permutation_indices, + &batch_size, &num_rows)); // Allocate batch pointers. Tensor batch_ptr(cpu_allocator(), DT_INT32, TensorShape({batch_size + 1})); @@ -226,49 +228,48 @@ class CSRSparseCholeskyCPUOp : public OpKernel { } private: - void ValidateInputs(OpKernelContext* ctx, - const CSRSparseMatrix& sparse_matrix, - const Tensor& permutation_indices, int* batch_size, - int64* num_rows) { - OP_REQUIRES(ctx, sparse_matrix.dtype() == DataTypeToEnum::value, - errors::InvalidArgument( - ""Asked for a CSRSparseMatrix of type "", - DataTypeString(DataTypeToEnum::value), - "" but saw dtype: "", DataTypeString(sparse_matrix.dtype()))); + Status ValidateInputs(const CSRSparseMatrix& sparse_matrix, + const Tensor& permutation_indices, int* batch_size, + int64* num_rows) { + if (sparse_matrix.dtype() != DataTypeToEnum::value) + return errors::InvalidArgument( + ""Asked for a CSRSparseMatrix of type "", + DataTypeString(DataTypeToEnum::value), + "" but saw dtype: "", DataTypeString(sparse_matrix.dtype())); const Tensor& dense_shape = sparse_matrix.dense_shape(); const int rank = dense_shape.dim_size(0); - OP_REQUIRES(ctx, rank == 2 || rank == 3, - errors::InvalidArgument(""sparse matrix must have rank 2 or 3; "", - ""but dense_shape has size "", rank)); + if (rank < 2 || rank > 3) + return errors::InvalidArgument(""sparse matrix must have rank 2 or 3; "", + ""but dense_shape has size "", rank); const int row_dim = (rank == 2) ? 0 : 1; auto dense_shape_vec = dense_shape.vec(); *num_rows = dense_shape_vec(row_dim); const int64 num_cols = dense_shape_vec(row_dim + 1); - OP_REQUIRES(ctx, *num_rows == num_cols, - errors::InvalidArgument(""sparse matrix must be square; got: "", - *num_rows, "" != "", num_cols)); + if (*num_rows != num_cols) + return errors::InvalidArgument( + ""sparse matrix must be square; got: "", *num_rows, "" != "", num_cols); const TensorShape& perm_shape = permutation_indices.shape(); - OP_REQUIRES( - ctx, perm_shape.dims() + 1 == rank, - errors::InvalidArgument( - ""sparse matrix must have the same rank as permutation; got: "", rank, - "" != "", perm_shape.dims(), "" + 1."")); - OP_REQUIRES( - ctx, perm_shape.dim_size(rank - 2) == *num_rows, - errors::InvalidArgument( - ""permutation must have the same number of elements in each batch "" - ""as the number of rows in sparse matrix; got: "", - perm_shape.dim_size(rank - 2), "" != "", *num_rows)); + if (perm_shape.dims() + 1 != rank) + return errors::InvalidArgument( + ""sparse matrix must have the same rank as permutation; got: "", rank, + "" != "", perm_shape.dims(), "" + 1.""); + if (perm_shape.dim_size(rank - 2) != *num_rows) + return errors::InvalidArgument( + ""permutation must have the same number of elements in each batch "" + ""as the number of rows in sparse matrix; got: "", + perm_shape.dim_size(rank - 2), "" != "", *num_rows); *batch_size = sparse_matrix.batch_size(); if (*batch_size > 1) { - OP_REQUIRES( - ctx, perm_shape.dim_size(0) == *batch_size, - errors::InvalidArgument(""permutation must have the same batch size "" - ""as sparse matrix; got: "", - perm_shape.dim_size(0), "" != "", *batch_size)); + if (perm_shape.dim_size(0) != *batch_size) + return errors::InvalidArgument( + ""permutation must have the same batch size "" + ""as sparse matrix; got: "", + perm_shape.dim_size(0), "" != "", *batch_size); } + + return Status::OK(); } }; ",1,test 26eb323554ffccd173e8a79a8c05c15b685ae4d1,tensorflow/tensorflow,"Fix null CHECK issue with `tf.raw_ops.EncodePng`. PiperOrigin-RevId: 369717714 Change-Id: I24136cd99c20b8466671f4f93b670ef6f6dd1250",encode_png_op.cc,"@@ -54,6 +54,8 @@ class EncodePngOp : public OpKernel { OP_REQUIRES(context, image.dims() == 3, errors::InvalidArgument(""image must be 3-dimensional"", image.shape().DebugString())); + OP_REQUIRES(context, image.NumElements() > 0, + errors::Internal(""Invalid image provided."")); OP_REQUIRES( context, FastBoundsCheck(image.NumElements(), std::numeric_limits::max()), ",1,train 44b7f486c0143f68b56c34e2d01e146ee445134a,tensorflow/tensorflow,"Fix out of bounds read in `ragged_cross_op.cc`. PiperOrigin-RevId: 369757702 Change-Id: Ie6e5d2c21513a8d56bf41fcf35960caf76e890f9",ragged_cross_op.cc,"@@ -21,6 +21,7 @@ limitations under the License. #include ""tensorflow/core/framework/register_types.h"" #include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/tensor_shape.h"" +#include ""tensorflow/core/platform/errors.h"" #include ""tensorflow/core/platform/fingerprint.h"" #include ""tensorflow/core/util/util.h"" #include ""tensorflow/core/util/work_sharder.h"" @@ -466,16 +467,45 @@ class RaggedCrossOp : public OpKernel { int next_dense = 0; for (char c : input_order_) { if (c == 'R') { + if (next_ragged >= ragged_values_list.size()) + return errors::InvalidArgument( + ""input_order \"""", input_order_, + ""\"" specifies reading a ragged tensor value at index "", + next_ragged, "" from a list of "", ragged_values_list.size(), + "" values.""); + if (next_ragged >= ragged_splits_list.size()) + return errors::InvalidArgument( + ""input_order \"""", input_order_, + ""\"" specifies reading a ragged tensor split at index "", + next_ragged, "" from a list of "", ragged_splits_list.size(), + "" splits.""); TF_RETURN_IF_ERROR(BuildRaggedFeatureReader( ragged_values_list[next_ragged], ragged_splits_list[next_ragged], features)); next_ragged++; } else if (c == 'S') { + if (next_sparse >= sparse_values_list.size()) + return errors::InvalidArgument( + ""input_order \"""", input_order_, + ""\"" specifies reading a sparse tensor value at index "", + next_sparse, "" from a list of "", sparse_values_list.size(), + "" values.""); + if (next_sparse >= sparse_indices_list.size()) + return errors::InvalidArgument( + ""input_order \"""", input_order_, + ""\"" specifies reading a sparse tensor index at index "", + next_sparse, "" from a list of "", sparse_indices_list.size(), + "" indices.""); TF_RETURN_IF_ERROR(BuildSparseFeatureReader( sparse_indices_list[next_sparse], sparse_values_list[next_sparse], batch_size, features)); next_sparse++; } else if (c == 'D') { + if (next_dense >= dense_list.size()) + return errors::InvalidArgument( + ""input_order \"""", input_order_, + ""\"" specifies reading a dense tensor at index "", next_dense, + "" from a list of "", dense_list.size(), "" tensors.""); TF_RETURN_IF_ERROR( BuildDenseFeatureReader(dense_list[next_dense++], features)); } else { ",1,train b432a38fe0e1b4b904a6c222cbce794c39703e87,tensorflow/tensorflow,"Fix overflow CHECK issue with `tf.raw_ops.DrawBoundingBoxes`. PiperOrigin-RevId: 369753591 Change-Id: I3b45fc98ee0d28a3c20b7e9c995aa647c976ec40",draw_bounding_box_op.cc,"@@ -147,22 +147,46 @@ class DrawBoundingBoxesOp : public OpKernel { // At this point, {min,max}_box_{row,col}_clamp are inside the // image. - CHECK_GE(min_box_row_clamp, 0); - CHECK_GE(max_box_row_clamp, 0); - CHECK_LT(min_box_row_clamp, height); - CHECK_LT(max_box_row_clamp, height); - CHECK_GE(min_box_col_clamp, 0); - CHECK_GE(max_box_col_clamp, 0); - CHECK_LT(min_box_col_clamp, width); - CHECK_LT(max_box_col_clamp, width); + OP_REQUIRES( + context, min_box_row_clamp >= 0, + errors::InvalidArgument(""Min box row clamp is less than 0."")); + OP_REQUIRES( + context, max_box_row_clamp >= 0, + errors::InvalidArgument(""Max box row clamp is less than 0."")); + OP_REQUIRES(context, min_box_row_clamp <= height, + errors::InvalidArgument( + ""Min box row clamp is greater than height."")); + OP_REQUIRES(context, max_box_row_clamp <= height, + errors::InvalidArgument( + ""Max box row clamp is greater than height."")); + + OP_REQUIRES( + context, min_box_col_clamp >= 0, + errors::InvalidArgument(""Min box col clamp is less than 0."")); + OP_REQUIRES( + context, max_box_col_clamp >= 0, + errors::InvalidArgument(""Max box col clamp is less than 0."")); + OP_REQUIRES(context, min_box_col_clamp <= width, + errors::InvalidArgument( + ""Min box col clamp is greater than width."")); + OP_REQUIRES(context, max_box_col_clamp <= width, + errors::InvalidArgument( + ""Max box col clamp is greater than width."")); // At this point, the min_box_row and min_box_col are either // in the image or above/left of it, and max_box_row and // max_box_col are either in the image or below/right or it. - CHECK_LT(min_box_row, height); - CHECK_GE(max_box_row, 0); - CHECK_LT(min_box_col, width); - CHECK_GE(max_box_col, 0); + + OP_REQUIRES( + context, min_box_row <= height, + errors::InvalidArgument(""Min box row is greater than height."")); + OP_REQUIRES(context, max_box_row >= 0, + errors::InvalidArgument(""Max box row is less than 0."")); + OP_REQUIRES( + context, min_box_col <= width, + errors::InvalidArgument(""Min box col is greater than width."")); + OP_REQUIRES(context, max_box_col >= 0, + errors::InvalidArgument(""Max box col is less than 0."")); // Draw top line. if (min_box_row >= 0) { ",1,train efea03b38fb8d3b81762237dc85e579cc5fc6e87,tensorflow/tensorflow,"Validate inputs to `QuantizedMul` PiperOrigin-RevId: 369756982 Change-Id: I00d960cc3b9316fd7a86bd37a44e341c96e17624",quantized_mul_op.cc,"@@ -284,10 +284,22 @@ class QuantizedMulOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& x = context->input(0); const Tensor& y = context->input(1); - const float min_x = context->input(2).flat()(0); - const float max_x = context->input(3).flat()(0); - const float min_y = context->input(4).flat()(0); - const float max_y = context->input(5).flat()(0); + auto& min_x_tensor = context->input(2); + OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_x_tensor.shape()), + errors::InvalidArgument(""min_x must be a scalar"")); + const float min_x = min_x_tensor.flat()(0); + auto& max_x_tensor = context->input(3); + OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_x_tensor.shape()), + errors::InvalidArgument(""max_x must be a scalar"")); + const float max_x = max_x_tensor.flat()(0); + auto& min_y_tensor = context->input(4); + OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_y_tensor.shape()), + errors::InvalidArgument(""min_y must be a scalar"")); + const float min_y = min_y_tensor.flat()(0); + auto& max_y_tensor = context->input(5); + OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_y_tensor.shape()), + errors::InvalidArgument(""max_y must be a scalar"")); + const float max_y = max_y_tensor.flat()(0); BCast bcast(BCast::FromShape(x.shape()), BCast::FromShape(y.shape())); if (!bcast.IsValid()) { ",1,train a324ac84e573fba362a5e53d4e74d5de6729933e,tensorflow/tensorflow,"Validate arguments to `QuantizedReshape`. Ensure that validations from `Reshape` also terminate `QuantizedReshape` on failure. PiperOrigin-RevId: 369775421 Change-Id: If8c5342267aceea65b7cb83a4b183304886f1ce8",quantized_reshape_op.cc,"@@ -17,6 +17,7 @@ limitations under the License. #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/register_types.h"" +#include ""tensorflow/core/framework/tensor_shape.h"" #include ""tensorflow/core/framework/tensor_types.h"" #include ""tensorflow/core/framework/types.h"" #include ""tensorflow/core/kernels/reshape_op.h"" @@ -30,9 +31,29 @@ class QuantizedReshapeOp : public ReshapeOp { void Compute(OpKernelContext* ctx) override { // This call processes inputs 1 and 2 to write output 0. ReshapeOp::Compute(ctx); + if (!ctx->status().ok()) { + return; + } + + const auto& input_min_float_tensor = ctx->input(2); + const auto& input_min_float_shape = input_min_float_tensor.shape(); + OP_REQUIRES(ctx, + TensorShapeUtils::IsScalar(input_min_float_shape) || + (TensorShapeUtils::IsVector(input_min_float_shape) && + (input_min_float_shape.dim_size(0) == 1)), + errors::InvalidArgument( + ""input_min must be a scalar or a vector of 1 element"")); + const float input_min_float = input_min_float_tensor.flat()(0); + const auto& input_max_float_tensor = ctx->input(3); + const auto& input_max_float_shape = input_max_float_tensor.shape(); + OP_REQUIRES(ctx, + TensorShapeUtils::IsScalar(input_max_float_shape) || + (TensorShapeUtils::IsVector(input_max_float_shape) && + (input_max_float_shape.dim_size(0) == 1)), + errors::InvalidArgument( + ""input_max must be a scalar or a vector of 1 element"")); + const float input_max_float = input_max_float_tensor.flat()(0); - const float input_min_float = ctx->input(2).flat()(0); - const float input_max_float = ctx->input(3).flat()(0); Tensor* output_min = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(1, TensorShape({}), &output_min)); output_min->flat()(0) = input_min_float; ",1,train f6c40f0c6cbf00d46c7717a26419f2062f2f8694,tensorflow/tensorflow,"Validate min and max arguments to `QuantizedResizeBilinear`. PiperOrigin-RevId: 369765091 Change-Id: I33be8b78273ab7d08b97541692fe05cb7f94963a",quantized_resize_bilinear_op.cc,"@@ -702,8 +702,14 @@ class QuantizedResizeBilinearOp : public OpKernel { } void Compute(OpKernelContext* context) override { - const float in_min = context->input(2).flat()(0); - const float in_max = context->input(3).flat()(0); + const auto& in_min_tensor = context->input(2); + OP_REQUIRES(context, TensorShapeUtils::IsScalar(in_min_tensor.shape()), + errors::InvalidArgument(""min must be a scalar"")); + const float in_min = in_min_tensor.flat()(0); + const auto& in_max_tensor = context->input(3); + OP_REQUIRES(context, TensorShapeUtils::IsScalar(in_max_tensor.shape()), + errors::InvalidArgument(""max must be a scalar"")); + const float in_max = in_max_tensor.flat()(0); ImageResizerState st(align_corners_, false); st.ValidateAndCreateOutput(context); ",1,train c570e2ecfc822941335ad48f6e10df4e21f11c96,tensorflow/tensorflow,"Fix issues in Conv2DBackpropFilter. PiperOrigin-RevId: 369772454 Change-Id: I49b465f2ae2ce91def61b56cea8000197d5177d8",conv_grad_filter_ops.cc,"@@ -495,6 +495,14 @@ class Conv2DCustomBackpropFilterOp : public OpKernel { const int filter_total_size = dims.spatial_dims[0].filter_size * dims.spatial_dims[1].filter_size * dims.in_depth; + OP_REQUIRES( + context, + filter_total_size * dims.out_depth == filter_backprop->NumElements(), + errors::InvalidArgument( + ""filter_size does not have enough elements, requested "", + filter_total_size * dims.out_depth, "", got "", + filter_backprop->NumElements())); + // The output image size is the spatial size of the output. const int output_image_size = dims.spatial_dims[0].output_size * dims.spatial_dims[1].output_size; @@ -518,6 +526,11 @@ class Conv2DCustomBackpropFilterOp : public OpKernel { const size_t work_unit_size = size_A + size_B + size_C; + OP_REQUIRES( + context, work_unit_size != 0, + errors::InvalidArgument( + ""Work size for convolution would be 0, which is not acceptable"")); + const size_t shard_size = (target_working_set_size + work_unit_size - 1) / work_unit_size; ",1,train 4f663d4b8f0bec1b48da6fa091a7d29609980fa4,tensorflow/tensorflow,"Allowlist certain data types to avoid a seg fault. PiperOrigin-RevId: 356326671 Change-Id: I23b65b52e93798cb5a6744632d31b0f88c6b6b31",immutable_constant_op.cc,"@@ -17,6 +17,8 @@ limitations under the License. #include +#include ""tensorflow/core/framework/types.pb.h"" + namespace tensorflow { namespace { @@ -86,6 +88,9 @@ ImmutableConstantOp::ImmutableConstantOp(OpKernelConstruction* context) OP_REQUIRES_OK(context, context->GetAttr(kMemoryRegionNameAttr, ®ion_name_)); OP_REQUIRES_OK(context, context->GetAttr(kDTypeAttr, &dtype_)); + OP_REQUIRES(context, dtype_ != DT_RESOURCE && dtype_ != DT_VARIANT, + errors::InvalidArgument( + ""Resource and variant dtypes are invalid for this op."")); OP_REQUIRES_OK(context, context->GetAttr(kShapeAttr, &shape_)); } ",1,train ba424dd8f16f7110eea526a8086f1a155f14f22b,tensorflow/tensorflow,"Enhance validation of ngram op and handle case of 0 tokens. PiperOrigin-RevId: 369940178 Change-Id: Ia82f42c09d14efe76e7dc013505b832a42282f0b",string_ngrams_op.cc,"@@ -61,16 +61,28 @@ class StringNGramsOp : public tensorflow::OpKernel { OP_REQUIRES_OK(context, context->input(""data_splits"", &splits)); const auto& splits_vec = splits->flat(); - // Validate that the splits are valid indices into data + // Validate that the splits are valid indices into data, only if there are + // splits specified. const int input_data_size = data->flat().size(); const int splits_vec_size = splits_vec.size(); - for (int i = 0; i < splits_vec_size; ++i) { - bool valid_splits = splits_vec(i) >= 0; - valid_splits = valid_splits && (splits_vec(i) <= input_data_size); - OP_REQUIRES( - context, valid_splits, - errors::InvalidArgument(""Invalid split value "", splits_vec(i), - "", must be in [0,"", input_data_size, ""]"")); + if (splits_vec_size > 0) { + int prev_split = splits_vec(0); + OP_REQUIRES(context, prev_split == 0, + errors::InvalidArgument(""First split value must be 0, got "", + prev_split)); + for (int i = 1; i < splits_vec_size; ++i) { + bool valid_splits = splits_vec(i) >= prev_split; + valid_splits = valid_splits && (splits_vec(i) <= input_data_size); + OP_REQUIRES(context, valid_splits, + errors::InvalidArgument( + ""Invalid split value "", splits_vec(i), "", must be in ["", + prev_split, "", "", input_data_size, ""]"")); + prev_split = splits_vec(i); + } + OP_REQUIRES(context, prev_split == input_data_size, + errors::InvalidArgument( + ""Last split value must be data size. Expected "", + input_data_size, "", got "", prev_split)); } int num_batch_items = splits_vec.size() - 1; @@ -174,13 +186,31 @@ class StringNGramsOp : public tensorflow::OpKernel { ngram->append(left_pad_); ngram->append(separator_); } + // Only output first num_tokens - 1 pairs of data and separator for (int n = 0; n < num_tokens - 1; ++n) { ngram->append(data[data_start_index + n]); ngram->append(separator_); } - ngram->append(data[data_start_index + num_tokens - 1]); - for (int n = 0; n < right_padding; ++n) { - ngram->append(separator_); + // Handle case when there are no tokens or no right padding as these can + // result in consecutive separators. + if (num_tokens > 0) { + // If we have tokens, then output last and then pair each separator with + // the right padding that follows, to ensure ngram ends either with the + // token or with the right pad. + ngram->append(data[data_start_index + num_tokens - 1]); + for (int n = 0; n < right_padding; ++n) { + ngram->append(separator_); + ngram->append(right_pad_); + } + } else { + // If we don't have tokens, then the last item inserted into the ngram + // has been the separator from the left padding loop above. Hence, + // output right pad and separator and make sure to finish with a + // padding, not a separator. + for (int n = 0; n < right_padding - 1; ++n) { + ngram->append(right_pad_); + ngram->append(separator_); + } ngram->append(right_pad_); } ",1,test ba424dd8f16f7110eea526a8086f1a155f14f22b,tensorflow/tensorflow,"Enhance validation of ngram op and handle case of 0 tokens. PiperOrigin-RevId: 369940178 Change-Id: Ia82f42c09d14efe76e7dc013505b832a42282f0b",string_ngrams_op_test.cc,"@@ -542,6 +542,40 @@ TEST_F(NgramKernelTest, TestEmptyInput) { assert_int64_equal(expected_splits, *GetOutput(1)); } +TEST_F(NgramKernelTest, TestNoTokens) { + MakeOp(""|"", {3}, ""L"", ""R"", -1, false); + // Batch items are: + // 0: + // 1: ""a"" + AddInputFromArray(TensorShape({1}), {""a""}); + AddInputFromArray(TensorShape({3}), {0, 0, 1}); + TF_ASSERT_OK(RunOpKernel()); + + std::vector expected_values( + {""L|L|R"", ""L|R|R"", // no input in first split + ""L|L|a"", ""L|a|R"", ""a|R|R""}); // second split + std::vector expected_splits({0, 2, 5}); + + assert_string_equal(expected_values, *GetOutput(0)); + assert_int64_equal(expected_splits, *GetOutput(1)); +} + +TEST_F(NgramKernelTest, TestNoTokensNoPad) { + MakeOp(""|"", {3}, """", """", 0, false); + // Batch items are: + // 0: + // 1: ""a"" + AddInputFromArray(TensorShape({1}), {""a""}); + AddInputFromArray(TensorShape({3}), {0, 0, 1}); + TF_ASSERT_OK(RunOpKernel()); + + std::vector expected_values({}); + std::vector expected_splits({0, 0, 0}); + + assert_string_equal(expected_values, *GetOutput(0)); + assert_int64_equal(expected_splits, *GetOutput(1)); +} + TEST_F(NgramKernelTest, ShapeFn) { ShapeInferenceTestOp op(""StringNGrams""); INFER_OK(op, ""?;?"", ""[?];[?]""); ",1,test ea3b43e98c32c97b35d52b4c66f9107452ca8fb2,tensorflow/tensorflow,"Fix `tf.raw_ops.CTCGreedyDecoder` CHECK failure. PiperOrigin-RevId: 369960465 Change-Id: If0b8b3264d5a47a24ac0970ed7b81ce6b4921fae",ctc_decoder_ops.cc,"@@ -232,6 +232,8 @@ class CTCGreedyDecoderOp : public OpKernel { int prev_indices = -1; for (int t = 0; t < seq_len_t(b); ++t) { int max_class_indices; + OP_REQUIRES(ctx, input_list_t[t].dimension(1) > 0, + errors::InvalidArgument(""Invalid input dimensions."")); log_prob_t(b, 0) += -RowMax(input_list_t[t], b, &max_class_indices); if (max_class_indices != blank_index && ",1,train 20431e9044cf2ad3c0323c34888b192f3289af6b,tensorflow/tensorflow,"Fix `tf.raw_ops.QuantizeAndDequantizeV4Grad` CHECK failure. PiperOrigin-RevId: 370532425 Change-Id: I767721be266851b63d8fe55e7ac6be0af6017f6c",quantize_and_dequantize_op.cc,"@@ -160,7 +160,17 @@ class QuantizeAndDequantizeV4GradientOp : public OpKernel { errors::InvalidArgument(""gradient and input must be the same size"")); const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_); const Tensor& input_min_tensor = ctx->input(2); + OP_REQUIRES(ctx, + input_min_tensor.dims() == 0 || input_min_tensor.dims() == 1, + errors::InvalidArgument( + ""Input min tensor must have dimension 1. Recieved "", + input_min_tensor.dims(), ""."")); const Tensor& input_max_tensor = ctx->input(3); + OP_REQUIRES(ctx, + input_max_tensor.dims() == 0 || input_max_tensor.dims() == 1, + errors::InvalidArgument( + ""Input max tensor must have dimension 1. Recieved "", + input_max_tensor.dims(), ""."")); if (axis_ != -1) { OP_REQUIRES( ctx, input_min_tensor.dim_size(0) == depth, ",1,train 1e922ccdf6bf46a3a52641f99fd47d54c1decd13,tensorflow/tensorflow,"Fix crash in `SparseTensorToCSRSparseMatrixCPUFunctor` PiperOrigin-RevId: 370110290 Change-Id: I4451e92661a55c2180f80d38b67a9b50bf5edec5",kernels.cc,"@@ -22,6 +22,7 @@ limitations under the License. #include ""tensorflow/core/framework/tensor_types.h"" #include ""tensorflow/core/lib/core/errors.h"" #include ""tensorflow/core/lib/core/status.h"" +#include ""tensorflow/core/platform/errors.h"" namespace tensorflow { namespace functor { @@ -63,6 +64,11 @@ Status SparseTensorToCSRSparseMatrixCPUFunctor::operator()( for (int64 i = 0; i < total_nnz; ++i) { // For now, the rows pointers store the corresponding row counts. + int64 ix = indices(i, 0) + 1; + if (ix >= csr_row_ptr.size()) { + return errors::InvalidArgument(""Got an index "", ix, + "" that is outside of csr_row_ptr""); + } csr_row_ptr(indices(i, 0) + 1) += 1; csr_col_ind(i) = indices(i, 1); } ",1,train 67784700869470d65d5f2ef20aeb5e97c31673cb,tensorflow/tensorflow,"Prevent division by 0 in `QuantizedBiasAdd`. PiperOrigin-RevId: 370117454 Change-Id: I3804e2ac8dcc6d3afcc92e27853e2325a017ca4d",quantized_bias_add_op.cc,"@@ -56,6 +56,8 @@ class QuantizedBiasAddOp : public OpKernel { ""Must provide as many biases as the last dimension "" ""of the input tensor: "", bias.shape().DebugString(), "" vs. "", input.shape().DebugString())); + OP_REQUIRES(context, bias.NumElements() > 0, + errors::InvalidArgument(""Must provide at least 1 bias"")); Tensor* output = nullptr; OP_REQUIRES_OK(context, ",1,train d6ed5bcfe1dcab9e85a4d39931bd18d99018e75b,tensorflow/tensorflow,"Add missing validation in `QuantizedBatchNormWithGlobalNormalization` PiperOrigin-RevId: 370123451 Change-Id: Id234d6dab1ec21230bb8e503dba30f899af87f33",quantized_batch_norm_op.cc,"@@ -173,20 +173,50 @@ class QuantizedBatchNormOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& input = context->input(0); - const float input_min = context->input(1).flat()(0); - const float input_max = context->input(2).flat()(0); + const auto& input_min_tensor = context->input(1); + OP_REQUIRES(context, input_min_tensor.NumElements() == 1, + errors::InvalidArgument(""input_min must have 1 element"")); + const float input_min = input_min_tensor.flat()(0); + const auto& input_max_tensor = context->input(2); + OP_REQUIRES(context, input_max_tensor.NumElements() == 1, + errors::InvalidArgument(""input_max must have 1 element"")); + const float input_max = input_max_tensor.flat()(0); const Tensor& mean = context->input(3); - const float mean_min = context->input(4).flat()(0); - const float mean_max = context->input(5).flat()(0); + const auto& mean_min_tensor = context->input(4); + OP_REQUIRES(context, mean_min_tensor.NumElements() == 1, + errors::InvalidArgument(""mean_min must have 1 element"")); + const float mean_min = mean_min_tensor.flat()(0); + const auto& mean_max_tensor = context->input(5); + OP_REQUIRES(context, mean_max_tensor.NumElements() == 1, + errors::InvalidArgument(""mean_max must have 1 element"")); + const float mean_max = mean_max_tensor.flat()(0); const Tensor& var = context->input(6); - const float var_min = context->input(7).flat()(0); - const float var_max = context->input(8).flat()(0); + const auto& var_min_tensor = context->input(7); + OP_REQUIRES(context, var_min_tensor.NumElements() == 1, + errors::InvalidArgument(""var_min must have 1 element"")); + const float var_min = var_min_tensor.flat()(0); + const auto& var_max_tensor = context->input(8); + OP_REQUIRES(context, var_max_tensor.NumElements() == 1, + errors::InvalidArgument(""var_max must have 1 element"")); + const float var_max = var_max_tensor.flat()(0); const Tensor& beta = context->input(9); - const float beta_min = context->input(10).flat()(0); - const float beta_max = context->input(11).flat()(0); + const auto& beta_min_tensor = context->input(10); + OP_REQUIRES(context, beta_min_tensor.NumElements() == 1, + errors::InvalidArgument(""beta_min must have 1 element"")); + const float beta_min = beta_min_tensor.flat()(0); + const auto& beta_max_tensor = context->input(11); + OP_REQUIRES(context, beta_max_tensor.NumElements() == 1, + errors::InvalidArgument(""beta_max must have 1 element"")); + const float beta_max = beta_max_tensor.flat()(0); const Tensor& gamma = context->input(12); - const float gamma_min = context->input(13).flat()(0); - const float gamma_max = context->input(14).flat()(0); + const auto& gamma_min_tensor = context->input(13); + OP_REQUIRES(context, gamma_min_tensor.NumElements() == 1, + errors::InvalidArgument(""gamma_min must have 1 element"")); + const float gamma_min = gamma_min_tensor.flat()(0); + const auto& gamma_max_tensor = context->input(14); + OP_REQUIRES(context, gamma_max_tensor.NumElements() == 1, + errors::InvalidArgument(""gamma_max must have 1 element"")); + const float gamma_max = gamma_max_tensor.flat()(0); OP_REQUIRES(context, input.dims() == 4, errors::InvalidArgument(""input must be 4-dimensional"", @@ -203,6 +233,33 @@ class QuantizedBatchNormOp : public OpKernel { OP_REQUIRES(context, gamma.dims() == 1, errors::InvalidArgument(""gamma must be 1-dimensional"", gamma.shape().DebugString())); + OP_REQUIRES(context, mean.NumElements() > 1, + errors::InvalidArgument(""Must have at least a mean value"", + gamma.shape().DebugString())); + OP_REQUIRES(context, mean.NumElements() > 1, + errors::InvalidArgument(""Must have at least a mean value"")); + const auto last_dim = input.shape().dims() - 1; + OP_REQUIRES(context, + mean.shape().dim_size(0) == input.shape().dim_size(last_dim), + errors::InvalidArgument(""Must provide as many means as the "" + ""last dimension of the input tensor: "", + mean.shape().DebugString(), "" vs. "", + input.shape().DebugString())); + OP_REQUIRES( + context, mean.shape().dim_size(0) == var.shape().dim_size(0), + errors::InvalidArgument( + ""Mean and variance tensors must have the same shape: "", + mean.shape().DebugString(), "" vs. "", var.shape().DebugString())); + OP_REQUIRES( + context, mean.shape().dim_size(0) == beta.shape().dim_size(0), + errors::InvalidArgument( + ""Mean and beta tensors must have the same shape: "", + mean.shape().DebugString(), "" vs. "", beta.shape().DebugString())); + OP_REQUIRES( + context, mean.shape().dim_size(0) == gamma.shape().dim_size(0), + errors::InvalidArgument( + ""Mean and gamma tensors must have the same shape: "", + mean.shape().DebugString(), "" vs. "", gamma.shape().DebugString())); Tensor* output = nullptr; OP_REQUIRES_OK(context, ",1,train 744009c9e5cc5d0447f0dc39d055f917e1fd9e16,tensorflow/tensorflow,"Validate work in `QuantizedAdd`, ensure at least one element. PiperOrigin-RevId: 370127996 Change-Id: I57c6f3e01afdeada84737820a131590137463855",quantized_add_op.cc,"@@ -538,6 +538,8 @@ class QuantizedAddOp : public OpKernel { tensor_min = min_x; tensor_max = max_x; } + OP_REQUIRES(context, vector_num_elements > 0, + errors::InvalidArgument(""Must have some elements to add"")); VectorTensorAddition( vector_data, vector_min, vector_max, vector_num_elements, tensor_data, tensor_min, tensor_max, tensor_num_elements, min_z_value, max_z_value, ",1,train 548b5eaf23685d86f722233d8fbc21d0a4aecb96,tensorflow/tensorflow,"Fix divide by zero error in `fractional_pool_common.cc`. PiperOrigin-RevId: 371126221 Change-Id: Iea4b2f363aaeb116ab460e3bc592c687484af344",fractional_avg_pool_op.cc,"@@ -80,6 +80,10 @@ class FractionalAvgPoolOp : public OpKernel { std::vector output_size(tensor_in_and_out_dims); for (int i = 0; i < tensor_in_and_out_dims; ++i) { input_size[i] = tensor_in.dim_size(i); + OP_REQUIRES( + context, pooling_ratio_[i] <= input_size[i], + errors::InvalidArgument( + ""Pooling ratio cannot be bigger than input tensor dim size."")); } // Output size. for (int i = 0; i < tensor_in_and_out_dims; ++i) { ",1,train 480641e3599775a8895254ffbc0fc45621334f68,tensorflow/tensorflow,"Validate (and ensure validation sticks) inputs for `MatrixTriangularSolve`. PiperOrigin-RevId: 370282444 Change-Id: Iaed61a0b0727cc42c830658b72eb69f785f48dc5",matrix_triangular_solve_op_impl.h,"@@ -162,6 +162,9 @@ class BaseMatrixTriangularSolveOp : public OpKernel { const Tensor& in1 = ctx->input(1); ValidateInputTensors(ctx, in0, in1); + if (!ctx->status().ok()) { + return; + } MatMulBCast bcast(in0.shape().dim_sizes(), in1.shape().dim_sizes()); OP_REQUIRES( @@ -230,13 +233,22 @@ class MatrixTriangularSolveOp private: void ValidateInputTensors(OpKernelContext* ctx, const Tensor& in0, const Tensor& in1) override { + const auto in0_num_dims = in0.dims(); OP_REQUIRES( - ctx, in0.dims() >= 2, - errors::InvalidArgument(""In[0] ndims must be >= 2: "", in0.dims())); + ctx, in0_num_dims >= 2, + errors::InvalidArgument(""In[0] ndims must be >= 2: "", in0_num_dims)); + const auto in1_num_dims = in1.dims(); OP_REQUIRES( - ctx, in1.dims() >= 2, - errors::InvalidArgument(""In[0] ndims must be >= 2: "", in1.dims())); + ctx, in1_num_dims >= 2, + errors::InvalidArgument(""In[1] ndims must be >= 2: "", in1_num_dims)); + + const auto in0_last_dim = in0.dim_size(in0_num_dims - 1); + const auto in0_prev_dim = in0.dim_size(in0_num_dims - 2); + OP_REQUIRES(ctx, in0_last_dim == in0_prev_dim, + errors::InvalidArgument( + ""In[0] matrices in the last dimensions must be square ("", + in0_last_dim, "" =/= "", in0_prev_dim, "")"")); } }; ",1,train 704866eabe03a9aeda044ec91a8d0c83fc1ebdbe,tensorflow/tensorflow,"Fix overflow CHECK issue with `tf.raw_ops.UnsortedSegmentJoin`. PiperOrigin-RevId: 370766155 Change-Id: I33e7c6626224e1060a8a4ab51ad5d861c6d4c63e",unsorted_segment_join_op.cc,"@@ -90,6 +90,8 @@ class UnsortedSegmentJoinOp : public OpKernel { const int32 segment_dims = segment_id_shape.dims(); const Tensor& num_segments_tensor = context->input(2); + OP_REQUIRES(context, num_segments_tensor.NumElements() != 0, + errors::InvalidArgument(""Number of segments cannot be empty."")); auto num_segments = num_segments_tensor.scalar()(); OP_REQUIRES(context, segment_dims != 0, ",1,train 99085e8ff02c3763a0ec2263e44daec416f6a387,tensorflow/tensorflow,"Fix `tf.raw_ops.QuantizeAndDequantizeV3` array index failure. PiperOrigin-RevId: 370577691 Change-Id: Ifeae64212f6bcd139435824fa2748d1329213c4c",quantize_and_dequantize_op.cc,"@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include ""tensorflow/core/framework/op_requires.h"" #define EIGEN_USE_THREADS #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ @@ -234,6 +235,10 @@ class QuantizeAndDequantizeV3Op : public OpKernel { void Compute(OpKernelContext* ctx) override { const Tensor& input = ctx->input(0); + OP_REQUIRES(ctx, axis_ < input.dims(), + errors::InvalidArgument( + ""Axis requested is larger than input dimensions. Axis: "", + axis_, "" Input Dimensions: "", input.dims())); const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_); Tensor* output = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output)); ",1,test da5ff2daf618591f64b2b62d9d9803951b945e9f,tensorflow/tensorflow,"Fix FPE issue with `tf.raw_ops.DenseCountSparseOutput`. PiperOrigin-RevId: 370946862 Change-Id: I3752584ad04aaecb327ff6793a9640ac56acfe7a",count_ops.cc,"@@ -122,6 +122,9 @@ class DenseCount : public OpKernel { int num_batch_elements = 1; for (int i = 0; i < num_batch_dimensions; ++i) { + OP_REQUIRES(context, data.shape().dim_size(i) != 0, + errors::InvalidArgument( + ""Invalid input: Shapes dimension cannot be 0."")); num_batch_elements *= data.shape().dim_size(i); } int num_value_elements = data.shape().num_elements() / num_batch_elements; ",1,test 1a2a87229d1d61e23a39373777c056161eb4084d,tensorflow/tensorflow,"Fix FPE issue with `tf.raw_ops.FusedBatchNorm`. PiperOrigin-RevId: 370948185 Change-Id: If0c8e0320062ed6363e94ff5fe38e6a301f69ac2",fused_batch_norm_op.cc,"@@ -293,6 +293,9 @@ struct FusedBatchNorm { const CPUDevice& d = context->eigen_device(); const int depth = x.dimension(3); + OP_REQUIRES( + context, depth != 0, + errors::Internal(""The 4th element in the input shape cannot be 0."")); const int size = x.size(); const int rest_size = size / depth; Eigen::DSizes rest_by_depth(rest_size, depth); ",1,train 4071d8e2f6c45c1955a811fee757ca2adbe462c1,tensorflow/tensorflow,"Fix FPE issue with `tf.raw_ops.Reverse`. PiperOrigin-RevId: 371176973 Change-Id: Ic6d483bfc95313ec2299c2d1c956cfe96c96626c",reverse_op.cc,"@@ -155,6 +155,12 @@ class ReverseOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& input = context->input(0); + // If input is provided, check to make sure the first dimension is valid. + if (input.dims() > 0) { + OP_REQUIRES( + context, input.dim_size(0) != 0, + errors::InvalidArgument(""Invalid input first dimension. Found 0."")); + } const Tensor& dims = context->input(1); if (TensorShapeUtils::IsScalar(input.shape())) { ",1,train 7f283ff806b2031f407db64c4d3edcda8fb9f9f5,tensorflow/tensorflow,"Fix FPE issue in external Eigen source code issue with `tf.raw_ops.SparseMatMul`. PiperOrigin-RevId: 370992919 Change-Id: Icfb276fef5fb40928b27c3e44608d2aca72c9fd7",sparse_matmul_op.cc,"@@ -1039,6 +1039,10 @@ class SparseMatMulOp : public OpKernel { if (transpose_b) { // TODO(agarwal): avoid transposing the matrix here and directly handle // transpose in CreateDenseSlices. + OP_REQUIRES(ctx, right->dim_size(0) != 0, + errors::InvalidArgument(""b has an entry 0 in it's shape."")); + OP_REQUIRES(ctx, right->dim_size(1) != 0, + errors::InvalidArgument(""b has an entry 0 in it's shape."")); right_tr.reset( new Tensor(right->dtype(), TensorShape({right->dim_size(1), right->dim_size(0)}))); ",1,train 8ba6fa29cd8bf9cef9b718dc31c78c73081f5b31,tensorflow/tensorflow,"Fix heap-buffer-overflow issue with `tf.raw_ops.SparseSplit`. PiperOrigin-RevId: 371242872 Change-Id: I482bb3d12602c7c3cc9446f97fb9f584bb98e9a4",sparse_tensor.h,"@@ -527,6 +527,10 @@ inline Status SparseTensor::Split(const SparseTensor& input_tensor, for (int i = 0; i < input_tensor.indices().dim_size(0); ++i) { const int dim = input_tensor.indices().matrix()(i, split_dim); int slice_index = GetSliceIndex(dim, split_size, residual); + if (slice_index >= num_values.size()) { + return errors::InvalidArgument(""Slice index "", slice_index, + "" is larger than num_split.""); + } num_values[slice_index]++; } ",1,train 51300ba1cc2f487aefec6e6631fef03b0e08b298,tensorflow/tensorflow,"Fix heap buffer overflow in tf.raw_ops.UnicodeEncode. PiperOrigin-RevId: 371717714 Change-Id: If33443b28f158e58078f1268f6b92f2728d219e0",unicode_ops.cc,"@@ -533,6 +533,17 @@ class UnicodeEncodeOp : public OpKernel { const Tensor& input_splits = context->input(1); const auto input_splits_flat = input_splits.flat(); + // Operation will treat first argument in input_splits as if it were zero + // regardless of its actual value since splits should begin with zero and + // end with the length of the input values vector. + OP_REQUIRES( + context, input_splits_flat(0) == 0, + errors::InvalidArgument(""First value in input_splits must be zero."")); + OP_REQUIRES(context, + input_splits_flat(input_splits_flat.size() - 1) == + input_tensor_flat.size(), + errors::InvalidArgument(""Last value in input_splits must be "" + ""equal to length of input_tensor."")); // Since we limit to a 2-D input (flat_values of rank 1 and a single splits // tensor), our output dimension will be 1 with it's size equal to the // number of splits (outer dimension or ragged tensor). @@ -548,6 +559,14 @@ class UnicodeEncodeOp : public OpKernel { for (int i = 1; i < input_splits_flat.size(); ++i) { icu::UnicodeString unicode_string; icu::UnicodeStringAppendable appendable_unicode_string(unicode_string); + OP_REQUIRES( + context, input_splits_flat(i - 1) <= input_splits_flat(i), + errors::InvalidArgument( + ""Values in input_splits must be equal or in ascending order."")); + OP_REQUIRES( + context, input_splits_flat(i) <= input_tensor_flat.size(), + errors::InvalidArgument(""Values in input_splits must be less than or "" + ""equal to input_tensor length."")); for (; idx < input_splits_flat(i); ++idx) { int32 code_point = input_tensor_flat(idx); // Check for invalid code point ",1,test a84358aa12f0b1518e606095ab9cfddbf597c121,tensorflow/tensorflow,"Fix heap-buffer-overflow issue with `tf.raw_ops.RaggedTensorToTensor`. PiperOrigin-RevId: 371986929 Change-Id: I79ab962a22c5867f36f7f45b780a1ac881b1dbdd",ragged_tensor_to_tensor_op.cc,"@@ -313,6 +313,12 @@ class RaggedTensorToTensorBaseOp : public OpKernel { output_index_multiplier, output_size, result); return tensorflow::Status::OK(); case RowPartitionType::ROW_SPLITS: + if (row_partition_tensor.size() - 1 > parent_output_index.size()) { + return errors::InvalidArgument( + ""Row partition size is greater than output size: "", + row_partition_tensor.size() - 1, "" > "", + parent_output_index.size()); + } CalculateOutputIndexRowSplit( context, row_partition_tensor, parent_output_index, output_index_multiplier, output_size, result); ",1,test 77dd114513d7796e1e2b8aece214a380af26fbf4,tensorflow/tensorflow,"Fix a check fail PiperOrigin-RevId: 372011072 Change-Id: I1062cfaed0aa16884e9a16312483794d188db76f",load_and_remap_matrix_op.cc,"@@ -123,6 +123,11 @@ class LoadAndRemapMatrixOp : public OpKernel { // Processes the checkpoint source and the provided Tensor name. const Tensor* ckpt_path_t; OP_REQUIRES_OK(context, context->input(""ckpt_path"", &ckpt_path_t)); + OP_REQUIRES( + context, ckpt_path_t->NumElements() == 1, + errors::InvalidArgument(""The `ckpt_path` tensor must have exactly one "" + ""element, got tensor of shape "", + ckpt_path_t->shape().DebugString())); const string& ckpt_path = ckpt_path_t->scalar()(); const Tensor* old_tensor_name_t; OP_REQUIRES_OK(context, ",1,train 1c56f53be0b722ca657cbc7df461ed676c8642a2,tensorflow/tensorflow,"Fix a check fail in Fast Fourier implementation PiperOrigin-RevId: 372026629 Change-Id: Id05c3362aa575271bc3e06b16316c9037085fc11",fft_ops.cc,"@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include ""tensorflow/core/platform/errors.h"" #define EIGEN_USE_THREADS // See docs in ../ops/fft_ops.cc. @@ -261,6 +262,9 @@ class FFTCPU : public FFTBase { i == FFTRank ? fft_shape[i - 1] / 2 + 1 : fft_shape[i - 1]; full_fft_shape.AddDim(fft_shape[i - 1]); } + OP_REQUIRES(ctx, full_fft_shape.num_elements() > 0, + errors::InvalidArgument(""Obtained a FFT shape of 0 elements: "", + full_fft_shape.DebugString())); Tensor temp; OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum::v(), ",1,test 31bd5026304677faa8a0b77602c6154171b9aec1,tensorflow/tensorflow,"Prevent check fail in FFT PiperOrigin-RevId: 372031044 Change-Id: I50994e3e8a5d1342d01bde80256f6bf2730ca299",fft_ops.cc,"@@ -222,6 +222,9 @@ class FFTCPU : public FFTBase { input_slice_sizes[i] = fft_shape[i - 1]; temp_shape.AddDim(fft_shape[i - 1]); } + OP_REQUIRES(ctx, temp_shape.num_elements() > 0, + errors::InvalidArgument(""Obtained a FFT shape of 0 elements: "", + temp_shape.DebugString())); auto output = out->flat_inner_dims(); const Eigen::DSizes zero_start_indices; ",1,train f4c364a5d6880557f6f5b6eb5cee2c407f0186b3,tensorflow/tensorflow,"Fix multiple issues in EditDistance PiperOrigin-RevId: 372033948 Change-Id: Ieb957c29894af05bdfeb1a0402fced808dfcfd7b",edit_distance_op.cc,"@@ -64,6 +64,12 @@ Status ValidateShapes(OpKernelContext* ctx, const Tensor& hypothesis_indices, return errors::InvalidArgument( ""truth_shape should be a vector, but got shape: "", truth_shape.shape().DebugString()); + if (hypothesis_values.NumElements() != hypothesis_indices.dim_size(0)) + return errors::InvalidArgument( + ""Expected hypothesis_values.NumElements == "" + ""#rows(hypothesis_indices), their shapes are: "", + hypothesis_values.shape().DebugString(), "" and "", + hypothesis_indices.shape().DebugString()); if (hypothesis_shape.NumElements() != hypothesis_indices.dim_size(1)) return errors::InvalidArgument( ""Expected hypothesis_shape.NumElements == "" @@ -75,6 +81,12 @@ Status ValidateShapes(OpKernelContext* ctx, const Tensor& hypothesis_indices, ""Input SparseTensors must have rank at least 2, but truth_shape "" ""rank is: "", truth_shape.NumElements()); + if (truth_values.NumElements() != truth_indices.dim_size(0)) + return errors::InvalidArgument( + ""Expected truth_values.NumElements == "" + ""#rows(truth_indices), their shapes are: "", + truth_values.shape().DebugString(), "" and "", + truth_indices.shape().DebugString()); if (truth_shape.NumElements() != truth_indices.dim_size(1)) return errors::InvalidArgument( ""Expected truth_shape.NumElements == "" @@ -153,6 +165,11 @@ class EditDistanceOp : public OpKernel { output_shape.AddDim(std::max(hypothesis_st_shape.dim_size(d), truth_st_shape.dim_size(d))); } + const auto output_elements = output_shape.num_elements(); + OP_REQUIRES( + ctx, output_elements > 0, + errors::InvalidArgument(""Got output shape "", output_shape.DebugString(), + "" which has 0 elements"")); Tensor* output = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(""output"", output_shape, &output)); @@ -185,6 +202,12 @@ class EditDistanceOp : public OpKernel { if (g_truth == g_hypothesis) { auto loc = std::inner_product(g_truth.begin(), g_truth.end(), output_strides.begin(), int64{0}); + OP_REQUIRES( + ctx, loc < output_elements, + errors::Internal(""Got an inner product "", loc, + "" which would require in writing to outside of "" + ""the buffer for the output tensor (max elements "", + output_elements, "")"")); output_t(loc) = gtl::LevenshteinDistance(truth_seq, hypothesis_seq, cmp); if (normalize_) output_t(loc) /= truth_seq.size(); @@ -194,6 +217,12 @@ class EditDistanceOp : public OpKernel { } else if (g_truth > g_hypothesis) { // zero-length truth auto loc = std::inner_product(g_hypothesis.begin(), g_hypothesis.end(), output_strides.begin(), int64{0}); + OP_REQUIRES( + ctx, loc < output_elements, + errors::Internal(""Got an inner product "", loc, + "" which would require in writing to outside of "" + ""the buffer for the output tensor (max elements "", + output_elements, "")"")); output_t(loc) = hypothesis_seq.size(); if (normalize_ && output_t(loc) != 0.0f) { output_t(loc) = std::numeric_limits::infinity(); @@ -202,6 +231,12 @@ class EditDistanceOp : public OpKernel { } else { // zero-length hypothesis auto loc = std::inner_product(g_truth.begin(), g_truth.end(), output_strides.begin(), int64{0}); + OP_REQUIRES( + ctx, loc < output_elements, + errors::Internal(""Got an inner product "", loc, + "" which would require in writing to outside of "" + ""the buffer for the output tensor (max elements "", + output_elements, "")"")); output_t(loc) = (normalize_) ? 1.0 : truth_seq.size(); ++truth_iter; } @@ -212,6 +247,12 @@ class EditDistanceOp : public OpKernel { auto hypothesis_seq = hypothesis_j.values(); auto loc = std::inner_product(g_hypothesis.begin(), g_hypothesis.end(), output_strides.begin(), int64{0}); + OP_REQUIRES( + ctx, loc < output_elements, + errors::Internal(""Got an inner product "", loc, + "" which would require in writing to outside of the "" + ""buffer for the output tensor (max elements "", + output_elements, "")"")); output_t(loc) = hypothesis_seq.size(); if (normalize_ && output_t(loc) != 0.0f) { output_t(loc) = std::numeric_limits::infinity(); @@ -224,6 +265,12 @@ class EditDistanceOp : public OpKernel { auto truth_seq = truth_i.values(); auto loc = std::inner_product(g_truth.begin(), g_truth.end(), output_strides.begin(), int64{0}); + OP_REQUIRES( + ctx, loc < output_elements, + errors::Internal(""Got an inner product "", loc, + "" which would require in writing to outside of the "" + ""buffer for the output tensor (max elements "", + output_elements, "")"")); output_t(loc) = (normalize_) ? 1.0 : truth_seq.size(); ++truth_iter; } ",1,test faa76f39014ed3b5e2c158593b1335522e573c7f,tensorflow/tensorflow,"Fix heap-buffer-overflow issue with `tf.raw_ops.SparseFillEmptyRows`. PiperOrigin-RevId: 372009178 Change-Id: Ia1a9e9691ecaa072f32fb39a0887b2aabd399210",sparse_fill_empty_rows_op.cc,"@@ -228,7 +228,10 @@ void SparseFillEmptyRowsOpImpl(OpKernelContext* context, default_value_t.shape().DebugString()), done); // TODO(ebrevdo): add shape checks between values, indices, - // dense_shape. Also add check that dense rank > 0. + // Also add check that dense rank > 0. + OP_REQUIRES_ASYNC(context, dense_shape_t.NumElements() != 0, + errors::InvalidArgument(""Dense shape cannot be empty.""), + done); using FunctorType = functor::SparseFillEmptyRows; OP_REQUIRES_OK_ASYNC(context, ",1,train 3f6fe4dfef6f57e768260b48166c27d148f3015f,tensorflow/tensorflow,"Add missing validations in dillation ops. PiperOrigin-RevId: 372037158 Change-Id: I4ee304c84a02550c030288a6534000b934fc1599",dilation_ops.cc,"@@ -130,6 +130,7 @@ class DilationOp : public OpKernel { ParseSizes(context, strides_, rates_, padding_, &stride_rows, &stride_cols, &rate_rows, &rate_cols, &pad_top, &pad_left, &out_rows, &out_cols); + if (!context->status().ok()) return; // Output tensor is of the following dimensions: // [ batch, out_rows, out_cols, depth ] @@ -229,6 +230,7 @@ class DilationBackpropInputOp : public OpKernel { ParseSizes(context, strides_, rates_, padding_, &stride_rows, &stride_cols, &rate_rows, &rate_cols, &pad_top, &pad_left, &out_rows, &out_cols); + if (!context->status().ok()) return; // Verify that the incoming gradient tensor has the expected size // [ batch, out_rows, out_cols, depth ] @@ -318,8 +320,10 @@ struct DilationBackpropInput { } } } - in_backprop(b, h_in_max, w_in_max, d) += - out_backprop(b, h_out, w_out, d); + if (h_in_max < input_rows && w_in_max < input_cols) { + in_backprop(b, h_in_max, w_in_max, d) += + out_backprop(b, h_out, w_out, d); + } } } } @@ -349,6 +353,7 @@ class DilationBackpropFilterOp : public OpKernel { ParseSizes(context, strides_, rates_, padding_, &stride_rows, &stride_cols, &rate_rows, &rate_cols, &pad_top, &pad_left, &out_rows, &out_cols); + if (!context->status().ok()) return; // Verify that the incoming gradient tensor has the expected size // [ batch, out_rows, out_cols, depth ] @@ -438,8 +443,10 @@ struct DilationBackpropFilter { } } } - filter_backprop(h_max, w_max, d) += - out_backprop(b, h_out, w_out, d); + if (h_max < filter_rows && w_max < filter_cols) { + filter_backprop(h_max, w_max, d) += + out_backprop(b, h_out, w_out, d); + } } } } ",1,train 7ae2af34087fb4b5c8915279efd03da3b81028bc,tensorflow/tensorflow,"Fix heap-buffer-overflow issue with `tf.raw_ops.SparseDenseCwiseMul`. PiperOrigin-RevId: 372054410 Change-Id: Ifcce0491e2e3816838c87e73be30a1e61b65174d",sparse_dense_binary_op_shared.cc,"@@ -78,6 +78,11 @@ class SparseDenseBinaryOpShared : public OpKernel { ""but received shapes: "", values_t->shape().DebugString(), "" and "", shape_t->shape().DebugString())); + OP_REQUIRES( + ctx, values_t->dim_size(0) == indices_t->dim_size(0), + errors::InvalidArgument( + ""The first dimension of values and indices should match. ("", + values_t->dim_size(0), "" vs. "", indices_t->dim_size(0), "")"")); const auto indices_mat = indices_t->matrix(); const auto shape_vec = shape_t->vec(); ",1,train 5e52ef5a461570cfb68f3bdbbebfe972cb4e0fd8,tensorflow/tensorflow,"Fix breakage in parameterized_truncated_normal_op.cc PiperOrigin-RevId: 372041718 Change-Id: Iff79e77a2bb27032423eefcb84211627b27dfe81",parameterized_truncated_normal_op.cc,"@@ -627,6 +627,9 @@ class ParameterizedTruncatedNormalOp : public OpKernel { ctx, TensorShapeUtils::IsVector(shape_tensor.shape()), errors::InvalidArgument(""Input shape should be a vector, got shape: "", shape_tensor.shape().DebugString())); + OP_REQUIRES(ctx, shape_tensor.NumElements() > 0, + errors::InvalidArgument(""Shape tensor must not be empty, got "", + shape_tensor.DebugString())); int32 num_batches = shape_tensor.flat()(0); int32 samples_per_batch = 1; ",1,train ef0c008ee84bad91ec6725ddc42091e19a30cf0e,tensorflow/tensorflow,"Fix out of bound read in requantization_range_op.cc PiperOrigin-RevId: 372129031 Change-Id: Ie684ab98a3840c5186ead3eafffc0e0ed0e8030d",requantization_range_op.cc,"@@ -46,6 +46,10 @@ class RequantizationRangeOp : public OpKernel { void Compute(OpKernelContext* ctx) override { const Tensor& input = ctx->input(0); + OP_REQUIRES(ctx, ctx->input(1).NumElements() > 0, + errors::InvalidArgument(""Input min must not be empty."")); + OP_REQUIRES(ctx, ctx->input(2).NumElements() > 0, + errors::InvalidArgument(""Input max must not be empty."")); const float input_min_float = ctx->input(1).flat()(0); const float input_max_float = ctx->input(2).flat()(0); Tensor* output_min = nullptr; ",1,train dcd7867de0fea4b72a2b34bd41eb74548dc23886,tensorflow/tensorflow,"Fix heap buffer overflow PiperOrigin-RevId: 372132844 Change-Id: Idef9895efaf145f2b1c23d31983601ec980cd5e4",maxpooling_op.cc,"@@ -1014,6 +1014,9 @@ struct LaunchMaxPoolingGradWithArgmax { const int input_start = start * input_size_per_batch; const int input_end = limit * input_size_per_batch; for (int64 index = input_start; index < input_end; index++) { + if (index >= argmax.NumElements()) { + break; + } int64 grad_out_index = argmax_flat(index); if (!include_batch_in_index) { const int64 cur_batch = index / input_size_per_batch; ",1,test 79865b542f9ffdc9caeb255631f7c56f1d4b6517,tensorflow/tensorflow,"Fix memory corruption issue with `tf.raw_ops.DrawBoundingBoxesV2`. PiperOrigin-RevId: 372033910 Change-Id: I8a9f4efc1c8ddaacbc26ec1fbe4bfdd6791c226d",draw_bounding_box_op.cc,"@@ -73,6 +73,12 @@ class DrawBoundingBoxesOp : public OpKernel { errors::InvalidArgument(""Channel depth should be either 1 (GRY), "" ""3 (RGB), or 4 (RGBA)"")); + OP_REQUIRES( + context, boxes.dim_size(2) == 4, + errors::InvalidArgument( + ""The size of the third dimension of the box must be 4. Received: "", + boxes.dim_size(2))); + const int64 batch_size = images.dim_size(0); const int64 height = images.dim_size(1); const int64 width = images.dim_size(2); ",1,train f7cc8755ac6683131fdfa7a8a121f9d7a9dec6fb,tensorflow/tensorflow,"Add several missing validations in SDCA PiperOrigin-RevId: 372172877 Change-Id: Id366da962432e18dcbfac847d11e98488bebb70a",sdca_internal.cc,"@@ -99,6 +99,10 @@ Status ModelWeights::Initialize(OpKernelContext* const context) { OpInputList sparse_weights_inputs; TF_RETURN_IF_ERROR( context->input_list(""sparse_weights"", &sparse_weights_inputs)); + if (sparse_indices_inputs.size() != sparse_weights_inputs.size()) + return errors::InvalidArgument( + ""sparse_indices and sparse_weights must have the same length, got "", + sparse_indices_inputs.size(), "" and "", sparse_weights_inputs.size()); OpInputList dense_weights_inputs; TF_RETURN_IF_ERROR( context->input_list(""dense_weights"", &dense_weights_inputs)); @@ -106,10 +110,20 @@ Status ModelWeights::Initialize(OpKernelContext* const context) { OpOutputList sparse_weights_outputs; TF_RETURN_IF_ERROR(context->output_list(""out_delta_sparse_weights"", &sparse_weights_outputs)); + if (sparse_weights_outputs.size() != sparse_weights_inputs.size()) + return errors::InvalidArgument( + ""out_delta_sparse_weights and sparse_weights must have the same "" + ""length, got "", + sparse_weights_outputs.size(), "" and "", sparse_weights_inputs.size()); OpOutputList dense_weights_outputs; TF_RETURN_IF_ERROR( context->output_list(""out_delta_dense_weights"", &dense_weights_outputs)); + if (dense_weights_outputs.size() != dense_weights_inputs.size()) + return errors::InvalidArgument( + ""out_delta_dense_weights and dense_weights must have the same length, "" + ""got "", + dense_weights_outputs.size(), "" and "", dense_weights_inputs.size()); for (int i = 0; i < sparse_weights_inputs.size(); ++i) { Tensor* delta_t; @@ -327,13 +341,28 @@ Status Examples::Initialize(OpKernelContext* const context, OpInputList sparse_example_indices_inputs; TF_RETURN_IF_ERROR(context->input_list(""sparse_example_indices"", &sparse_example_indices_inputs)); + if (sparse_example_indices_inputs.size() != num_sparse_features) + return errors::InvalidArgument( + ""Expected "", num_sparse_features, + "" tensors in sparse_example_indices but got "", + sparse_example_indices_inputs.size()); OpInputList sparse_feature_indices_inputs; TF_RETURN_IF_ERROR(context->input_list(""sparse_feature_indices"", &sparse_feature_indices_inputs)); + if (sparse_feature_indices_inputs.size() != num_sparse_features) + return errors::InvalidArgument( + ""Expected "", num_sparse_features, + "" tensors in sparse_feature_indices but got "", + sparse_feature_indices_inputs.size()); OpInputList sparse_feature_values_inputs; if (num_sparse_features_with_values > 0) { TF_RETURN_IF_ERROR(context->input_list(""sparse_feature_values"", &sparse_feature_values_inputs)); + if (sparse_feature_values_inputs.size() != num_sparse_features_with_values) + return errors::InvalidArgument( + ""Expected "", num_sparse_features_with_values, + "" tensors in sparse_feature_values but got "", + sparse_feature_values_inputs.size()); } const Tensor* example_weights_t; @@ -400,6 +429,13 @@ Status Examples::CreateSparseFeatureRepresentation( sparse_example_indices_inputs[i].template flat(); auto feature_indices = sparse_feature_indices_inputs[i].template flat(); + if (example_indices.size() != feature_indices.size()) { + mutex_lock l(mu); + result = errors::InvalidArgument( + ""Found mismatched example_indices and feature_indices ["", + example_indices, ""] vs ["", feature_indices, ""]""); + return; + } // Parse features for each example. Features for a particular example // are at the offsets (start_id, end_id] ",1,train 376c352a37ce5a68b721406dc7e77ac4b6cf483d,tensorflow/tensorflow,"Don't do any work if output tensor is null (prevent div by 0) PiperOrigin-RevId: 372208700 Change-Id: Iea6b6293e887ade8538facfdb50fb931e17f511e",maxpooling_op.cc,"@@ -1088,6 +1088,8 @@ class MaxPoolingGradWithArgmaxOp : public OpKernel { OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( {0}, 0, out_shape, &grad_out)); + if (out_shape.num_elements() == 0) return; // nothing to be done + LaunchMaxPoolingGradWithArgmax::launch( context, params, grad_in, argmax, grad_out, include_batch_in_index_); } ",1,test a3d9f9be9ac2296615644061b40cefcee341dcc4,tensorflow/tensorflow,"Add missing validation to pooling_ops_3d PiperOrigin-RevId: 372218727 Change-Id: I6b9ed4266aa7286c02f1f230d7bea922c1be547e",pooling_ops_3d.cc,"@@ -698,6 +698,19 @@ class MaxPooling3dGradGradOp : public OpKernel { OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( {2}, 0, tensor_out.shape(), &output)); + // Given access patterns in LaunchMaxPooling3dGradGradOp, these tensors must + // have elements. + OP_REQUIRES(context, tensor_in.NumElements() > 0, + errors::InvalidArgument(""received empty tensor tensor_in: "", + tensor_in.DebugString())); + OP_REQUIRES(context, tensor_out.NumElements() > 0, + errors::InvalidArgument(""received empty tensor tensor_out: "", + tensor_out.DebugString())); + OP_REQUIRES( + context, out_grad_backprop.NumElements() > 0, + errors::InvalidArgument(""received empty tensor out_grad_backprop: "", + out_grad_backprop.DebugString())); + LaunchMaxPooling3dGradGradOp::launch( context, params, tensor_in, tensor_out, out_grad_backprop, output); } ",1,test ecf768cbe50cedc0a45ce1ee223146a3d3d26d23,tensorflow/tensorflow,"Add missing validations to reverse_sequence_op PiperOrigin-RevId: 372178683 Change-Id: Iac97ebab5b342f1262c77a7d9bcb4267b305ce5b",reverse_sequence_op.cc,"@@ -115,6 +115,10 @@ class ReverseSequenceOp : public OpKernel { : OpKernel(context) { OP_REQUIRES_OK(context, context->GetAttr(""batch_dim"", &batch_dim_)); OP_REQUIRES_OK(context, context->GetAttr(""seq_dim"", &seq_dim_)); + OP_REQUIRES(context, batch_dim_ >= 0, + errors::InvalidArgument(""Invalid batch_dim "", batch_dim_)); + OP_REQUIRES(context, seq_dim_ >= 0, + errors::InvalidArgument(""Invalid seq_dim "", seq_dim_)); } void Compute(OpKernelContext* context) override { ",1,train 63c6a29d0f2d692b247f7bf81f8732d6442fad09,tensorflow/tensorflow,"Add missing validation, prevent heap OOB PiperOrigin-RevId: 372246723 Change-Id: I1a454a643810e77d7d14821b342098c56a09fbbf",pooling_ops_3d.cc,"@@ -693,6 +693,7 @@ class MaxPooling3dGradGradOp : public OpKernel { Pool3dParameters params{context, ksize_, stride_, padding_, data_format_, tensor_in.shape()}; + if (!context->status().ok()) return; // params is invalid Tensor* output = nullptr; OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( @@ -710,6 +711,17 @@ class MaxPooling3dGradGradOp : public OpKernel { context, out_grad_backprop.NumElements() > 0, errors::InvalidArgument(""received empty tensor out_grad_backprop: "", out_grad_backprop.DebugString())); + OP_REQUIRES(context, + tensor_in.NumElements() == out_grad_backprop.NumElements(), + errors::InvalidArgument(""tensor_in and out_grad_backprop must "" + ""have same number of elements, got <"", + tensor_in.DebugString(), ""> and <"", + out_grad_backprop.DebugString(), "">"")); + OP_REQUIRES( + context, tensor_out.NumElements() == output->NumElements(), + errors::InvalidArgument( + ""tensor_out and output must have same number of elements, got <"", + tensor_out.DebugString(), ""> and <"", output->DebugString(), "">"")); LaunchMaxPooling3dGradGradOp::launch( context, params, tensor_in, tensor_out, out_grad_backprop, output); ",1,train 6fc9141f42f6a72180ecd24021c3e6b36165fe0d,tensorflow/tensorflow,"Fix assertion failure in pooling_ops_3d PiperOrigin-RevId: 372364504 Change-Id: Iecde4fe26b47a8fa935d6e2611b5585ed5777781",pooling_ops_3d.cc,"@@ -383,6 +383,19 @@ struct LaunchAvgPooling3dGradOp { const std::array& output_shape, const std::array& padding, TensorFormat data_format, Tensor* output) { + OP_REQUIRES( + context, tensor_in_shape.dim_size(0) == out_backprop.dim_size(0), + errors::InvalidArgument( + ""Expected first dimension of tensor_in_shape and "" + ""out_backprop to match, got "", + tensor_in_shape.dim_size(0), "" and "", out_backprop.dim_size(0))); + OP_REQUIRES( + context, tensor_in_shape.dim_size(4) == out_backprop.dim_size(4), + errors::InvalidArgument( + ""Expected last dimension of tensor_in_shape and "" + ""out_backprop to match, got "", + tensor_in_shape.dim_size(4), "" and "", out_backprop.dim_size(4))); + output->flat().setZero(); std::array input_size = {{tensor_in_shape.dim_size(3), tensor_in_shape.dim_size(2), ",1,train 12c727cee857fa19be717f336943d95fca4ffe4f,tensorflow/tensorflow,"Validate inputs of `FractionalAvgPoolGrad`. PiperOrigin-RevId: 372420640 Change-Id: Icc583928e6cdc3062e12498e4d2337a8fe3da016",fractional_avg_pool_op.cc,"@@ -250,6 +250,19 @@ class FractionalAvgPoolGradOp : public OpKernel { const int64 out_cols = out_backprop.dim_size(2); const int64 out_depth = out_backprop.dim_size(3); + OP_REQUIRES(context, row_seq_tensor.NumElements() > out_rows, + errors::InvalidArgument(""Given out_backprop shape "", + out_backprop.shape().DebugString(), + "", row_seq_tensor must have at least "", + out_rows + 1, "" elements, but got "", + row_seq_tensor.NumElements())); + OP_REQUIRES(context, col_seq_tensor.NumElements() > out_cols, + errors::InvalidArgument(""Given out_backprop shape "", + out_backprop.shape().DebugString(), + "", col_seq_tensor must have at least "", + out_cols + 1, "" elements, but got "", + col_seq_tensor.NumElements())); + auto row_seq_tensor_flat = row_seq_tensor.flat(); auto col_seq_tensor_flat = col_seq_tensor.flat(); auto orig_input_tensor_shape_flat = orig_input_tensor_shape.flat(); ",1,train a74768f8e4efbda4def9f16ee7e13cf3922ac5f7,tensorflow/tensorflow,"Prevent heap OOB error in `MaxPoolGrad` PiperOrigin-RevId: 372424854 Change-Id: Idac0f23867ad8b0601cafbaaa52d5e64269e63a7",maxpooling_op.cc,"@@ -199,7 +199,9 @@ static void SpatialMaxPoolWithArgMaxHelper( // CHECK(input_backprop_index >= in_start && input_backprop_index < // in_end) FastBoundsCheck(input_backprop_index - in_start, in_end - in_start); - input_backprop_flat(input_backprop_index) += out_backprop_flat(index); + if (index < out_backprop.NumElements()) { + input_backprop_flat(input_backprop_index) += out_backprop_flat(index); + } } } }; ",1,train 32fdcbff9d06d010d908fcc4bd4b36eb3ce15925,tensorflow/tensorflow,"Validate arguments of `FractionalMaxPoolGrad` PiperOrigin-RevId: 372274982 Change-Id: If46b0c442efa4eaef635ce6a476717060420122c",fractional_max_pool_op.cc,"@@ -235,6 +235,20 @@ class FractionalMaxPoolGradOp : public OpKernel { // Just to make it similar to FractionalMaxPoolOp. constexpr int tensor_in_and_out_dims = 4; + OP_REQUIRES( + context, tensor_in.dims() == tensor_in_and_out_dims, + errors::InvalidArgument(""orig_input should be a tensor of rank 4, got "", + tensor_in.DebugString())); + OP_REQUIRES(context, tensor_in.NumElements() > 0, + errors::InvalidArgument(""orig_input must not be empty, got "", + tensor_in.DebugString())); + OP_REQUIRES(context, tensor_out.dims() == tensor_in_and_out_dims, + errors::InvalidArgument( + ""orig_output should be a tensor of rank 4, got "", + tensor_out.DebugString())); + OP_REQUIRES(context, tensor_out.NumElements() > 0, + errors::InvalidArgument(""orig_output must not be empty, got "", + tensor_out.DebugString())); std::vector input_size(tensor_in_and_out_dims); std::vector output_size(tensor_in_and_out_dims); for (int i = 0; i < tensor_in_and_out_dims; ++i) { ",1,train b1b323042264740c398140da32e93fb9c2c9f33e,tensorflow/tensorflow,"Fix SEGV in CTC ops PiperOrigin-RevId: 372430279 Change-Id: I7ec2ad9d6f4d0980c33de45d27c6b17df5c6e26f",ctc_decoder_ops.cc,"@@ -70,6 +70,9 @@ class CTCDecodeHelper { if (inputs_shape.dims() != 3) { return errors::InvalidArgument(""inputs is not a 3-Tensor""); } + if (inputs_shape.num_elements() == 0) { + return errors::InvalidArgument(""inputs must not be empty""); + } const int64 max_time = inputs_shape.dim_size(0); const int64 batch_size = inputs_shape.dim_size(1); ",1,train 5899741d0421391ca878da47907b1452f06aaf1b,tensorflow/tensorflow,"Fix heap OOB read in dequantize op. Also fixes SEGV in same op PiperOrigin-RevId: 372437896 Change-Id: I135e94d360c2a1ce374c10f7e0fed1af603dbc02",dequantize_op.cc,"@@ -98,6 +98,18 @@ class DequantizeOp : public OpKernel { if (axis_ > -1) { num_slices = input.dim_size(axis_); } + OP_REQUIRES(ctx, input_min_tensor.NumElements() == num_slices, + errors::InvalidArgument( + ""input_min_tensor must have as many elements as input on "" + ""the dequantization axis ("", + axis_, ""), got "", input_min_tensor.NumElements(), + "", expected "", num_slices)); + OP_REQUIRES(ctx, input_max_tensor.NumElements() == num_slices, + errors::InvalidArgument( + ""input_max_tensor must have as many elements as input on "" + ""the dequantization axis ("", + axis_, ""), got "", input_max_tensor.NumElements(), + "", expected "", num_slices)); Tensor* output = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output)); ",1,train 6972f9dfe325636b3db4e0bc517ee22a159365c0,tensorflow/tensorflow,"Add missing valuidation to FusedBatchNorm. PiperOrigin-RevId: 372460336 Change-Id: Ic8c4e4de67c58a741bd87f2e182bed07247d1126",fused_batch_norm_op.cc,"@@ -1282,6 +1282,32 @@ class FusedBatchNormOpBase : public OpKernel { errors::InvalidArgument(""Error during tensor copy."")); } + const auto num_channels = GetTensorDim(x, tensor_format_, 'C'); + OP_REQUIRES( + context, scale.NumElements() == num_channels, + errors::InvalidArgument(""scale must have the same number of elements "" + ""as the channels of x, got "", + scale.NumElements(), "" and "", num_channels)); + OP_REQUIRES( + context, offset.NumElements() == num_channels, + errors::InvalidArgument(""offset must have the same number of elements "" + ""as the channels of x, got "", + offset.NumElements(), "" and "", num_channels)); + if (estimated_mean.NumElements() != 0) { + OP_REQUIRES(context, estimated_mean.NumElements() == num_channels, + errors::InvalidArgument( + ""mean must be empty or have the same number of "" + ""elements as the channels of x, got "", + estimated_mean.NumElements(), "" and "", num_channels)); + } + if (estimated_variance.NumElements() != 0) { + OP_REQUIRES(context, estimated_variance.NumElements() == num_channels, + errors::InvalidArgument( + ""variance must be empty or have the same number of "" + ""elements as the channels of x, got "", + estimated_variance.NumElements(), "" and "", num_channels)); + } + if (has_side_input_) { OP_REQUIRES(context, side_input->shape() == x.shape(), errors::InvalidArgument( @@ -1294,7 +1320,7 @@ class FusedBatchNormOpBase : public OpKernel { // NOTE(ezhulenev): This requirement is coming from implementation // details of cudnnBatchNormalizationForwardTrainingEx. OP_REQUIRES( - context, !is_training_ || x.dim_size(3) % 4 == 0, + context, !is_training_ || num_channels % 4 == 0, errors::InvalidArgument(""FusedBatchNorm with activation requires "" ""channel dimension to be a multiple of 4."")); } ",1,train 4c0ee937c0f61c4fc5f5d32d9bb4c67428012a60,tensorflow/tensorflow,"Prevent overflow in sparse op PiperOrigin-RevId: 372442006 Change-Id: I60fe31cd7e56fb3501e97c63500caf902ddeee96",sparse_split_op.cc,"@@ -63,11 +63,18 @@ class SparseSplitOp : public OpKernel { input_shape.vec()(axis), ""), got "", num_split_)); + // Prevent overflow by constructing the dense shape separately + TensorShape dense_shape; + const auto input_shape_flat = input_shape.flat(); + for (int i = 0; i < input_shape.NumElements(); i++) { + OP_REQUIRES_OK(context, + dense_shape.AddDimWithStatus(input_shape_flat(i))); + } + sparse::SparseTensor sparse_tensor; OP_REQUIRES_OK(context, - sparse::SparseTensor::Create( - input_indices, input_values, - TensorShape(input_shape.vec()), &sparse_tensor)); + sparse::SparseTensor::Create(input_indices, input_values, + dense_shape, &sparse_tensor)); std::vector outputs; OP_REQUIRES_OK(context, sparse::SparseTensor::Split( ",1,train 49847ae69a4e1a97ae7f2db5e217c77721e37948,tensorflow/tensorflow,"Fix division by zero in TFLite padding. PiperOrigin-RevId: 370777494 Change-Id: Ic1331e4a1603b9e4c8aa183012a6c8237410aa0f",padding.h,"@@ -44,6 +44,11 @@ inline int ComputePaddingWithOffset(int stride, int dilation_rate, int in_size, inline int ComputeOutSize(TfLitePadding padding, int image_size, int filter_size, int stride, int dilation_rate = 1) { int effective_filter_size = (filter_size - 1) * dilation_rate + 1; + + // TODO(b/186448822): This uses 0 since the function has no other way to + // report error case + if (stride == 0) return 0; + switch (padding) { case kTfLitePaddingSame: return (image_size + stride - 1) / stride; ",1,train 5f7975d09eac0f10ed8a17dbb6f5964977725adc,tensorflow/tensorflow,"Prevent another div by 0 in optimized pooling implementations TFLite PiperOrigin-RevId: 370800091 Change-Id: I2119352f57fb5ca4f2051e0e2d749403304a979b",pooling.cc,"@@ -87,6 +87,10 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) { auto padding = params->padding; int out_width, out_height; + // Prevent division by 0 in optimized pooling implementations + TF_LITE_ENSURE(context, params->stride_height > 0); + TF_LITE_ENSURE(context, params->stride_width > 0); + data->padding = ComputePaddingHeightWidth( params->stride_height, params->stride_width, 1, 1, height, width, params->filter_height, params->filter_width, padding, &out_height, ",1,train 5f7975d09eac0f10ed8a17dbb6f5964977725adc,tensorflow/tensorflow,"Prevent another div by 0 in optimized pooling implementations TFLite PiperOrigin-RevId: 370800091 Change-Id: I2119352f57fb5ca4f2051e0e2d749403304a979b",pooling_test.cc,"@@ -1151,5 +1151,18 @@ TEST(FloatPoolingOpTest, L2PoolPaddingValidSlide1) { EXPECT_THAT(m.GetOutput(), ElementsAreArray({3.5, 6.0, 6.5})); } +#ifdef GTEST_HAS_DEATH_TEST +TEST(FloatPoolingOpTest, MaxPoolWithZeroStride) { + EXPECT_DEATH( + FloatPoolingOpModel m(BuiltinOperator_MAX_POOL_2D, + /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}}, + /*filter_width=*/2, /*filter_height=*/2, + /*output=*/{TensorType_FLOAT32, {}}, + /*padding=*/Padding_VALID, + /*stride_w=*/0, /*stride_h=*/0), + ""Cannot allocate tensors""); +} +#endif + } // namespace } // namespace tflite ",1,train 0d45ea1ca641b21b73bcf9c00e0179cda284e7e7,tensorflow/tensorflow,"Prevent one more div by 0 in TFLite PiperOrigin-RevId: 370800114 Change-Id: I6b956aeb8c458cc6f514408d2e89ffacfe249e57",space_to_depth.cc,"@@ -61,6 +61,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); const int block_size = params->block_size; + TF_LITE_ENSURE(context, block_size > 0); const int input_height = input->dims->data[1]; const int input_width = input->dims->data[2]; int output_height = input_height / block_size; ",1,train 801c1c6be5324219689c98e1bd3e0ca365ee834d,tensorflow/tensorflow,"Fix another division by 0 in TFLite PiperOrigin-RevId: 370800181 Change-Id: I924809166a6131f5075e6d45c455106538d755f9",transpose_conv.cc,"@@ -591,6 +591,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const auto* params = reinterpret_cast(node->builtin_data); + // Prevent divisions by 0 + TF_LITE_ENSURE(context, params->stride_height > 0); + TF_LITE_ENSURE(context, params->stride_width > 0); + // Resize any deferred dynamic tensors if (IsDynamicTensor(output)) { TF_LITE_ENSURE_OK(context, ResizeTensor(context, output_shape, output)); ",1,train 8e45822aa0b9f5df4b4c64f221e64dc930a70a9d,tensorflow/tensorflow,"Handle one more division by 0 in TFLite. PiperOrigin-RevId: 370800140 Change-Id: I9ab42e5aaccf02f226d1282611490a54cf7d273e",gather_nd.cc,"@@ -155,6 +155,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, kOutputTensor, &output)); + // Prevent division by 0 in the helper + TF_LITE_ENSURE(context, NumElements(params) > 0); + switch (indices->type) { case kTfLiteInt32: return EvalGatherNd(context, params, indices, output); ",1,test 953f28dca13c92839ba389c055587cfe6c723578,tensorflow/tensorflow,"Prevent a null pointer exception in TFLite PiperOrigin-RevId: 370800206 Change-Id: Idd437ebce4ff224120d8eefc1c14c062173b71d6",maximum_minimum.cc,"@@ -157,35 +157,37 @@ template TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { OpContext op_context(context, node); - switch (op_context.output->type) { - case kTfLiteFloat32: - TFLiteOperation(context, node, op_context); - break; - case kTfLiteUInt8: - TFLiteOperation(context, node, - op_context); - break; - case kTfLiteInt8: - TFLiteOperation(context, node, op_context); - break; - case kTfLiteInt32: - TFLiteOperation(context, node, - op_context); - break; - case kTfLiteInt64: - TFLiteOperation(context, node, - op_context); - break; - case kTfLiteInt16: - TFLiteOperation(context, node, - op_context); - break; - default: - context->ReportError(context, - ""Type %d is currently not supported by Maximum."", - op_context.output->type); - return kTfLiteError; - } + // If inputs have no element, shortcircuit. + if (NumElements(op_context.input1) == 0 || + NumElements(op_context.input2) == 0) { + return kTfLiteOk; + } + + switch (op_context.output->type) { + case kTfLiteFloat32: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteUInt8: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt8: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt32: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt64: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt16: + TFLiteOperation(context, node, op_context); + break; + default: + context->ReportError(context, + ""Type %d is currently not supported by Maximum."", + op_context.output->type); + return kTfLiteError; + } return kTfLiteOk; } ",1,train 9c1dc920d8ffb4893d6c9d27d1f039607b326743,tensorflow/tensorflow,"Prevent infinite loop/stack overflow in TFLite `while` op. PiperOrigin-RevId: 370800333 Change-Id: I6a2e4ff849da339545c449db2af7e11ce6ff02c3",while.cc,"@@ -138,6 +138,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { auto* subgraphs = this_subgraph->GetSubgraphs(); TF_LITE_ENSURE(context, op_data->cond_subgraph_index < subgraphs->size()); TF_LITE_ENSURE(context, op_data->body_subgraph_index < subgraphs->size()); + TF_LITE_ENSURE(context, + op_data->cond_subgraph_index != op_data->body_subgraph_index); Subgraph* cond_subgraph = (*subgraphs)[op_data->cond_subgraph_index].get(); Subgraph* body_subgraph = (*subgraphs)[op_data->body_subgraph_index].get(); ",1,train c6173f5fe66cdbab74f4f869311fe6aae2ba35f4,tensorflow/tensorflow,"TFLite: Error out when the graph has a recurion. Recursion is currently unsupported. PiperOrigin-RevId: 371708957 Change-Id: I8dfad0d85cbfe08e39ae8ea7bad21254ddee5003",subgraph.cc,"@@ -156,6 +156,42 @@ const char* GetTFLiteOpName(const TfLiteRegistration& op_reg) { return tflite::EnumNamesBuiltinOperator()[op_reg.builtin_code]; } +// An utility test to detect if the subgraph is abused: +// 1. Detects if recursion exists in the graph (recursion is not currently +// supported. +// 2. Detects if the interpreter / subgraph is used in multiple subgraphs. +// Note: It's clearly documented that the interpreter / subgraph are not +// thread-safe. This serves as a check with possible false negatives +// unless we switch to atomic boolean flags. +class SubgraphGuard { + public: + SubgraphGuard(TfLiteContext* context, bool* is_subgraph_in_use) + : is_subgraph_in_use_(is_subgraph_in_use) { + if (*is_subgraph_in_use_) { + TF_LITE_KERNEL_LOG( + context, + ""Subgraph is already in use. Using an interpreter or a subgraph in "" + ""multiple threads is not supported. Recursion in the graph is not "" + ""supported.""); + status_ = kTfLiteError; + } else { + *is_subgraph_in_use_ = true; + } + } + ~SubgraphGuard() { + // If tht original status was OK, recover the boolean flag. + if (status_ == kTfLiteOk) { + *is_subgraph_in_use_ = false; + } + } + + TfLiteStatus status() const { return status_; } + + private: + TfLiteStatus status_ = kTfLiteOk; + bool* is_subgraph_in_use_; +}; + } // namespace // A trivial implementation of GraphInfo around the Interpreter. @@ -655,6 +691,7 @@ TfLiteStatus Subgraph::BytesRequired(TfLiteType type, const int* dims, TfLiteStatus Subgraph::AllocateTensors() { TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(), ""AllocateTensors""); + if (!consistent_) { ReportError(""AllocateTensors() called on inconsistent model.""); return kTfLiteError; @@ -678,6 +715,12 @@ TfLiteStatus Subgraph::AllocateTensors() { return kTfLiteOk; } + // Note `AllocateTensors` sometimes calls itself recursively above + // for delegates. Therefore only the logic below need to be guarded + // by `SubgraphGuard`. + SubgraphGuard guard(&context_, &is_subgraph_in_use_); + TF_LITE_ENSURE_OK(&context_, guard.status()); + next_execution_plan_index_to_prepare_ = 0; next_execution_plan_index_to_plan_allocation_ = 0; next_original_execution_plan_index_to_prepare_ = 0; @@ -1014,6 +1057,9 @@ TfLiteStatus Subgraph::PrepareOpsAndTensors() { } TfLiteStatus Subgraph::Invoke() { + SubgraphGuard guard(&context_, &is_subgraph_in_use_); + TF_LITE_ENSURE_OK(&context_, guard.status()); + if (!consistent_) { ReportError(""Invoke called on model that is not consistent.""); return kTfLiteError; ",1,train c6173f5fe66cdbab74f4f869311fe6aae2ba35f4,tensorflow/tensorflow,"TFLite: Error out when the graph has a recurion. Recursion is currently unsupported. PiperOrigin-RevId: 371708957 Change-Id: I8dfad0d85cbfe08e39ae8ea7bad21254ddee5003",subgraph.h,"@@ -759,6 +759,10 @@ class Subgraph { // Whether memory planner should be instantiated to retain intermediates for // debugging. bool preserve_all_tensors_ = false; + + // Whether the subgraph is currently in use (e.g. running the `Invoke` + // or `AllocateTensors` functions). + bool is_subgraph_in_use_ = false; }; } // namespace tflite ",1,train c6173f5fe66cdbab74f4f869311fe6aae2ba35f4,tensorflow/tensorflow,"TFLite: Error out when the graph has a recurion. Recursion is currently unsupported. PiperOrigin-RevId: 371708957 Change-Id: I8dfad0d85cbfe08e39ae8ea7bad21254ddee5003",while.cc,"@@ -138,8 +138,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { auto* subgraphs = this_subgraph->GetSubgraphs(); TF_LITE_ENSURE(context, op_data->cond_subgraph_index < subgraphs->size()); TF_LITE_ENSURE(context, op_data->body_subgraph_index < subgraphs->size()); - TF_LITE_ENSURE(context, - op_data->cond_subgraph_index != op_data->body_subgraph_index); Subgraph* cond_subgraph = (*subgraphs)[op_data->cond_subgraph_index].get(); Subgraph* body_subgraph = (*subgraphs)[op_data->body_subgraph_index].get(); ",1,train c6173f5fe66cdbab74f4f869311fe6aae2ba35f4,tensorflow/tensorflow,"TFLite: Error out when the graph has a recurion. Recursion is currently unsupported. PiperOrigin-RevId: 371708957 Change-Id: I8dfad0d85cbfe08e39ae8ea7bad21254ddee5003",model_test.cc,"@@ -600,6 +600,25 @@ TEST(BasicFlatBufferModel, TestHandleMalformedModelReuseTensor) { ASSERT_NE(interpreter->AllocateTensors(), kTfLiteOk); } +// Recursion & reentrant are not supported in TFLite. +// The test ensures it fails gracefullly instead of crashing with +// a stack overflow. +TEST(BasicFlatBufferModel, TestUnsupportedRecursion) { + const auto model_path = + ""tensorflow/lite/testdata/unsupported_recursion.bin""; + + std::unique_ptr model = + FlatBufferModel::BuildFromFile(model_path); + ASSERT_NE(model, nullptr); + + tflite::ops::builtin::BuiltinOpResolver resolver; + InterpreterBuilder builder(*model, resolver); + std::unique_ptr interpreter; + ASSERT_EQ(builder(&interpreter), kTfLiteOk); + ASSERT_NE(interpreter, nullptr); + ASSERT_NE(interpreter->AllocateTensors(), kTfLiteOk); +} + // The models here have a buffer index for a tensor pointing to a null buffer. // This results in the tensor being interpreted as read-write, but the model // assumes the tensor is read-only. As such, `interpreter->Invoke()` would ",1,train f8378920345f4f4604202d4ab15ef64b2aceaa16,tensorflow/tensorflow,"Prevent a null pointer dereference in TFLite. PiperOrigin-RevId: 370800353 Change-Id: Ic9c9712ce5c6e384c954dcd640a5bd9ff05c9a05",subgraph.cc,"@@ -1060,10 +1060,17 @@ TfLiteStatus Subgraph::Invoke() { TF_LITE_ENSURE_STATUS(EnsureTensorDataIsReadable(tensor_index)); } if (tensor->data.raw == nullptr && tensor->bytes > 0) { - if (registration.builtin_code == kTfLiteBuiltinReshape && i == 1) { + if (registration.builtin_code == kTfLiteBuiltinReshape && i == 1 && + tensor->dims->size != 1) { // In general, having a tensor here with no buffer will be an error. - // However, for the reshape operator, the second input tensor is only - // used for the shape, not for the data. Thus, null buffer is ok. + // However, for the reshape operator, the second input tensor is + // sometimes only used for the shape, not for the data. Thus, null + // buffer is ok in this situation. + // The situation where null buffer is not ok for reshape operator is + // only when there are 2 inputs given to the node and the one + // corresponding to the shape (i == 1) is a vector that contains all + // dimensions. See `GetOutputShape()` function in + // `tensorflow/lite/kernels/reshape.cc` continue; } else { // In all other cases, we need to return an error as otherwise we will ",1,test 2c74674348a4708ced58ad6eb1b23354df8ee044,tensorflow/tensorflow,"Prevent division by 0 PiperOrigin-RevId: 370979352 Change-Id: Ic79191c316d986fc6072ecaebfec9d5f2b924d00",batch_to_space_nd.cc,"@@ -78,6 +78,7 @@ TfLiteStatus ResizeOutputTensor(TfLiteContext* context, int output_batch_size = input_size->data[0]; for (int dim = 0; dim < spatial_dims_num; ++dim) { // Number of batch must be multiple of (block_shape[dim]). + TF_LITE_ENSURE(context, block_shape[dim] != 0); TF_LITE_ENSURE_EQ(context, output_batch_size % block_shape[dim], 0); output_batch_size = output_batch_size / block_shape[dim]; output_size->data[dim + 1] = input_size->data[dim + 1] * block_shape[dim] - ",1,train ff489d95a9006be080ad14feb378f2b4dac35552,tensorflow/tensorflow,"Prevent division by 0. PiperOrigin-RevId: 370962554 Change-Id: I0b9b62f4d8e1046dd88f9433f8dfeaf61a901680",conv.cc,"@@ -545,6 +545,7 @@ TfLiteStatus Prepare(KernelType kernel_type, TfLiteContext* context, // Only one scale factor per batch is typically necessary. See optimized // implementation for why we need to allocate for the height of the inputs // flattened to 2D. + TF_LITE_ENSURE(context, channels_in != 0); const int height = NumElements(input) / channels_in; int scaling_dims[1] = {height}; if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) { @@ -587,6 +588,7 @@ TfLiteStatus Prepare(KernelType kernel_type, TfLiteContext* context, input_offsets->type = kTfLiteInt32; input_offsets->allocation_type = kTfLiteArenaRw; // See above comment for the need to allocate for height of inputs. + TF_LITE_ENSURE(context, channels_in != 0); const int height = NumElements(input) / channels_in; const int input_offset_dims[1] = {height}; if (!TfLiteIntArrayEqualsArray(input_offsets->dims, 1, @@ -886,8 +888,9 @@ TfLiteStatus EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node, CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); - const int input_size = NumElements(input) / SizeOfDimension(input, 0); const int batch_size = SizeOfDimension(input, 0); + TF_LITE_ENSURE(context, batch_size != 0); + const int input_size = NumElements(input) / batch_size; TfLiteTensor* quantized_input_tensor; TF_LITE_ENSURE_OK(context, GetTemporarySafe(context, node, data->input_quantized_index, @@ -989,8 +992,9 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node, CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); - const int input_size = NumElements(input) / SizeOfDimension(input, 0); const int batch_size = SizeOfDimension(input, 0); + TF_LITE_ENSURE(context, batch_size != 0); + const int input_size = NumElements(input) / batch_size; const float* input_ptr = GetTensorData(input); TfLiteTensor* quantized_input_tensor; ",1,train 106d8f4fb89335a2c52d7c895b7a7485465ca8d9,tensorflow/tensorflow,"Prevent division by 0 in TFLite PiperOrigin-RevId: 370800311 Change-Id: I21ccdbd31c30118acc67df8751807ee2e0b12f91",depth_to_space.cc,"@@ -61,6 +61,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); const int block_size = params->block_size; + TF_LITE_ENSURE(context, block_size > 0); const int input_height = input->dims->data[1]; const int input_width = input->dims->data[2]; const int input_channels = input->dims->data[3]; ",1,train 106d8f4fb89335a2c52d7c895b7a7485465ca8d9,tensorflow/tensorflow,"Prevent division by 0 in TFLite PiperOrigin-RevId: 370800311 Change-Id: I21ccdbd31c30118acc67df8751807ee2e0b12f91",depth_to_space_test.cc,"@@ -60,6 +60,11 @@ TEST(DepthToSpaceOpModel, BadBlockSize) { EXPECT_DEATH(DepthToSpaceOpModel({TensorType_FLOAT32, {1, 1, 1, 4}}, 4), ""Cannot allocate tensors""); } + +TEST(DepthToSpaceOpModel, NoBlockSize) { + EXPECT_DEATH(DepthToSpaceOpModel({TensorType_FLOAT32, {1, 1, 1, 4}}, 0), + ""Cannot allocate tensors""); +} #endif TEST(DepthToSpaceOpModel, Float32) { ",1,train 106d8f4fb89335a2c52d7c895b7a7485465ca8d9,tensorflow/tensorflow,"Prevent division by 0 in TFLite PiperOrigin-RevId: 370800311 Change-Id: I21ccdbd31c30118acc67df8751807ee2e0b12f91",depth_to_space.cc,"@@ -54,6 +54,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); const int block_size = params->block_size; + TF_LITE_ENSURE(context, block_size > 0); const int input_height = input->dims->data[kHeightRank]; const int input_width = input->dims->data[kWidthRank]; const int input_channels = input->dims->data[kDepthRank]; ",1,train f61c57bd425878be108ec787f4d96390579fb83e,tensorflow/tensorflow,"Prevent division by 0 PiperOrigin-RevId: 370966645 Change-Id: I831bfd96c7eb77b02d7ebb744335f59f6e5728cb",embedding_lookup.cc,"@@ -71,6 +71,10 @@ TfLiteStatus EvalSimple(TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* lookup, const TfLiteTensor* value, TfLiteTensor* output) { const int row_size = SizeOfDimension(value, 0); + if (row_size == 0) { + // Propagate empty tensor if input is empty + return kTfLiteOk; + } const int row_bytes = value->bytes / row_size; char* output_raw = GetTensorData(output); ",1,train 6d36ba65577006affb272335b7c1abd829010708,tensorflow/tensorflow,"Prevent division by 0 PiperOrigin-RevId: 370984990 Change-Id: Ib324955bbeb1cbd97c82fd5d61a00a2697c9a2de",space_to_batch_nd.cc,"@@ -79,6 +79,7 @@ TfLiteStatus ResizeOutputTensor(TfLiteContext* context, for (int dim = 0; dim < spatial_dims_num; ++dim) { int final_dim_size = (input_size->data[dim + 1] + paddings_data[dim * 2] + paddings_data[dim * 2 + 1]); + TF_LITE_ENSURE(context, block_shape[dim] != 0); TF_LITE_ENSURE_EQ(context, final_dim_size % block_shape[dim], 0); output_size->data[dim + 1] = final_dim_size / block_shape[dim]; output_batch_size *= block_shape[dim]; ",1,train 6841e522a3e7d48706a02e8819836e809f738682,tensorflow/tensorflow,"Prevent division by 0 PiperOrigin-RevId: 370995582 Change-Id: I670ffaf52d1ff8823ec31ea5f438f9125b402223",svdf.cc,"@@ -99,6 +99,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const int rank = params->rank; const int batch_size = input->dims->data[0]; const int num_filters = weights_feature->dims->data[0]; + TF_LITE_ENSURE(context, rank != 0); TF_LITE_ENSURE_EQ(context, num_filters % rank, 0); const int num_units = num_filters / rank; const int memory_size = weights_time->dims->data[1]; ",1,train b22786e7e9b7bdb6a56936ff29cc7e9968d7bc1d,tensorflow/tensorflow,"Prevent division by 0 PiperOrigin-RevId: 370998952 Change-Id: I6b1d49079624ee1447d2d9b53a8976fb356cc8f5",split.cc,"@@ -60,6 +60,7 @@ TfLiteStatus ResizeOutputTensors(TfLiteContext* context, TfLiteNode* node, TF_LITE_ENSURE(context, axis_value < NumDimensions(input)); const int input_size = SizeOfDimension(input, axis_value); + TF_LITE_ENSURE(context, num_splits != 0); TF_LITE_ENSURE_MSG(context, input_size % num_splits == 0, ""Not an even split""); const int slice_size = input_size / num_splits; ",1,train 3ebedd7e345453d68e279cfc3e4072648e5e12e5,tensorflow/tensorflow,"Prevent division by 0 in OneHot implementation If input indices is degenerate, the implementation would do a divide by zero. See https://github.com/tensorflow/tensorflow/blob/745d57df6d5e9bc568666a2a48ed8dd629c27241/tensorflow/lite/kernels/one_hot.cc#L68-L72 PiperOrigin-RevId: 370966870 Change-Id: Ie018337811c8016b5a1d3a277d00d5f2e19a2058",one_hot.cc,"@@ -69,6 +69,11 @@ void OneHotComputeImpl(const OneHotContext& op_context) { for (int i = 0; i < op_context.axis; ++i) { prefix_dim_size *= op_context.indices->dims->data[i]; } + if (prefix_dim_size == 0) { + // If indices tensor is degenerate, return a degenerate tensor, just like + // TensorFlow does. + return; + } const int suffix_dim_size = NumElements(op_context.indices) / prefix_dim_size; const int depth = *op_context.depth->data.i32; ",1,train 4253f96a58486ffe84b61c0415bb234a4632ee73,tensorflow/tensorflow,"Fix integer overflow in TFLite concat PiperOrigin-RevId: 371013841 Change-Id: I6a4782ce7ca753e23ff31e7fb6aeb7f9d412cd29",concatenation.cc,"@@ -16,6 +16,8 @@ limitations under the License. #include +#include + #include ""tensorflow/lite/c/builtin_op_data.h"" #include ""tensorflow/lite/c/common.h"" #include ""tensorflow/lite/kernels/internal/compatibility.h"" @@ -69,6 +71,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, t->type, input_type); for (int d = 0; d < t0->dims->size; ++d) { if (d == axis) { + // Avoid integer overflow in sum_axis below + TF_LITE_ENSURE(context, t->dims->data[axis] >= 0); + TF_LITE_ENSURE(context, t->dims->data[axis] <= + std::numeric_limits::max() - sum_axis); sum_axis += t->dims->data[axis]; } else { TF_LITE_ENSURE_EQ(context, t->dims->data[d], t0->dims->data[d]); ",1,train cbda3c6b2dbbd3fbdc482ff8c0170a78ec2e97d0,tensorflow/tensorflow,"Prevent divisions by 0 PiperOrigin-RevId: 371003153 Change-Id: Idef56c95b9fcaeb97f87e18c7a674dbeb5173204",depthwise_conv.cc,"@@ -285,8 +285,8 @@ TfLiteStatus ComputeDepthMultiplier(TfLiteContext* context, int16* depth_multiplier) { int num_filter_channels = SizeOfDimension(filter, 3); int num_input_channels = SizeOfDimension(input, 3); + TF_LITE_ENSURE(context, num_input_channels != 0); TF_LITE_ENSURE_EQ(context, num_filter_channels % num_input_channels, 0); - *depth_multiplier = num_filter_channels / num_input_channels; return kTfLiteOk; } @@ -455,8 +455,9 @@ TfLiteStatus EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node, float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); - const int input_size = NumElements(input) / SizeOfDimension(input, 0); const int batch_size = SizeOfDimension(input, 0); + TF_LITE_ENSURE(context, batch_size != 0); + const int input_size = NumElements(input) / batch_size; TfLiteTensor* input_quantized; TF_LITE_ENSURE_OK(context, GetTemporarySafe(context, node, data->input_quantized_index, ",1,train c59c37e7b2d563967da813fa50fe20b21f4da683,tensorflow/tensorflow,"Prevent array write out-of-bounds. If user passes an invalid axis, then we copy one too many dimensions to the output in the loop below these checks. Even if we didn't do that, there will be further issues with an invalid axis, so we check for that right now. PiperOrigin-RevId: 371023299 Change-Id: I9eca37ffc2b29e8e48710f500701270ef0790224",arg_min_max.cc,"@@ -48,6 +48,9 @@ TfLiteStatus ResizeOutput(TfLiteContext* context, const TfLiteTensor* input, axis_value += NumDimensions(input); } + TF_LITE_ENSURE(context, axis_value >= 0); + TF_LITE_ENSURE(context, axis_value < NumDimensions(input)); + // Copy the input dimensions to output except the axis dimension. TfLiteIntArray* output_dims = TfLiteIntArrayCreate(NumDimensions(input) - 1); int j = 0; ",1,train 5117e0851348065ed59c991562c0ec80d9193db2,tensorflow/tensorflow,"Prevent a division by 0 PiperOrigin-RevId: 371007407 Change-Id: Iecf2718de48d6bf5a69b02a9df9deda8ec1b19d3",hashtable_lookup.cc,"@@ -112,6 +112,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 2, &value)); const int num_rows = SizeOfDimension(value, 0); + TF_LITE_ENSURE(context, num_rows != 0); const int row_bytes = value->bytes / num_rows; void* pointer = nullptr; DynamicBuffer buf; ",1,train 7c8cc4ec69cd348e44ad6a2699057ca88faad3e5,tensorflow/tensorflow,"Fix a dangerous integer overflow and a malloc of negative size. PiperOrigin-RevId: 371254154 Change-Id: I250a98a3df26328770167025670235a963a72da0",common.c,"@@ -45,8 +45,10 @@ int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size, #ifndef TF_LITE_STATIC_MEMORY TfLiteIntArray* TfLiteIntArrayCreate(int size) { - TfLiteIntArray* ret = - (TfLiteIntArray*)malloc(TfLiteIntArrayGetSizeInBytes(size)); + int alloc_size = TfLiteIntArrayGetSizeInBytes(size); + if (alloc_size <= 0) return NULL; + TfLiteIntArray* ret = (TfLiteIntArray*)malloc(alloc_size); + if (!ret) return ret; ret->size = size; return ret; } ",1,test 7c8cc4ec69cd348e44ad6a2699057ca88faad3e5,tensorflow/tensorflow,"Fix a dangerous integer overflow and a malloc of negative size. PiperOrigin-RevId: 371254154 Change-Id: I250a98a3df26328770167025670235a963a72da0",embedding_lookup_sparse.cc,"@@ -173,6 +173,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // Resize output tensor. TfLiteIntArray* output_shape = TfLiteIntArrayCreate(output_rank); + TF_LITE_ENSURE(context, output_shape != nullptr); int k = 0; int embedding_size = 1; int lookup_size = 1; ",1,test ae2daeb45abfe2c6dda539cf8d0d6f653d3ef412,tensorflow/tensorflow,"Prevent array OOB read/write PiperOrigin-RevId: 371026165 Change-Id: I26ac6372c87246e03c7eb8c94e84c84d86054b36",split_v.cc,"@@ -96,6 +96,8 @@ TfLiteStatus ResizeOutputTensors(TfLiteContext* context, TfLiteNode* node, } } + TF_LITE_ENSURE(context, axis_value >= 0); + TF_LITE_ENSURE(context, axis_value < NumDimensions(input)); const int input_size = SizeOfDimension(input, axis_value); if (minus_one_index != -1) { ",1,train ba6822bd7b7324ba201a28b2f278c29a98edbef2,tensorflow/tensorflow,"Fix OOB issue with `tf.raw_ops.SparseSparseMinimum`. PiperOrigin-RevId: 371005787 Change-Id: Ib686ccc077836e8b980b8b5a03936d36a8ecaf71",sparse_sparse_binary_op_shared.cc,"@@ -180,6 +180,11 @@ class SparseSparseBinaryOpShared : public OpKernel { "" for dimension "", i)); } + OP_REQUIRES( + ctx, a_indices_t->dim_size(1) == b_indices_t->dim_size(1), + errors::InvalidArgument( + ""Indices' dimensions do not match: got "", a_indices_t->dim_size(1), + "" and "", b_indices_t->dim_size(1), "" for the second dimension."")); const int num_dims = a_indices_t->dim_size(1); const auto a_indices_mat = a_indices_t->matrix(); const auto b_indices_mat = b_indices_t->matrix(); ",1,train f6fde895ef9c77d848061c0517f19d0ec2682f3a,tensorflow/tensorflow,"Validate that a and b are proper sparse tensors PiperOrigin-RevId: 373274848 Change-Id: I3a665ac3a29dee9fb69bdf408a939330cb93ea75",sparse_sparse_binary_op_shared.cc,"@@ -150,6 +150,7 @@ class SparseSparseBinaryOpShared : public OpKernel { const int64 a_nnz = a_indices_t->dim_size(0); const int64 b_nnz = b_indices_t->dim_size(0); + const auto a_values = a_values_t->vec(); const auto b_values = b_values_t->vec(); @@ -166,6 +167,14 @@ class SparseSparseBinaryOpShared : public OpKernel { ""Input shapes should be a vector but received shapes "", a_shape_t->shape().DebugString(), "" and "", b_shape_t->shape().DebugString())); + const int num_dims = a_indices_t->dim_size(1); + OP_REQUIRES( + ctx, a_shape_t->NumElements() == num_dims, + errors::InvalidArgument(""Second dimension of a_indices and length of "" + ""a_shape must match, got "", + num_dims, "" and "", a_shape_t->NumElements())); + OP_REQUIRES(ctx, num_dims > 0, + errors::InvalidArgument(""Tensors must not be empty"")); OP_REQUIRES(ctx, a_shape_t->IsSameSize(*b_shape_t), errors::InvalidArgument( ""Operands do not have the same ranks; got shapes: "", @@ -180,12 +189,6 @@ class SparseSparseBinaryOpShared : public OpKernel { "" for dimension "", i)); } - OP_REQUIRES( - ctx, a_indices_t->dim_size(1) == b_indices_t->dim_size(1), - errors::InvalidArgument( - ""Indices' dimensions do not match: got "", a_indices_t->dim_size(1), - "" and "", b_indices_t->dim_size(1), "" for the second dimension."")); - const int num_dims = a_indices_t->dim_size(1); const auto a_indices_mat = a_indices_t->matrix(); const auto b_indices_mat = b_indices_t->matrix(); std::vector a_augmented_values, b_augmented_values; ",1,test b761c9b652af2107cfbc33efd19be0ce41daa33e,tensorflow/tensorflow,"Fix `tf.raw_ops.RaggedTensorToTensor` failing CHECK. PiperOrigin-RevId: 368706628 Change-Id: I5c9ea4833f38835ee183ca50d63251dc89c9f3bc",ragged_tensor_to_tensor_op.cc,"@@ -208,7 +208,7 @@ class RaggedTensorToTensorBaseOp : public OpKernel { } void CalculateOutputIndexRowSplit( - const RowPartitionTensor& row_split, + OpKernelContext* context, const RowPartitionTensor& row_split, const vector& parent_output_index, INDEX_TYPE output_index_multiplier, INDEX_TYPE output_size, vector* result) { @@ -233,7 +233,8 @@ class RaggedTensorToTensorBaseOp : public OpKernel { } } if (row_split_size > 0) { - DCHECK_EQ(result->size(), row_split(row_split_size - 1)); + OP_REQUIRES(context, result->size() == row_split(row_split_size - 1), + errors::InvalidArgument(""Invalid row split size."")); } } @@ -259,7 +260,7 @@ class RaggedTensorToTensorBaseOp : public OpKernel { // result[7] = -1 because parent_output_index[value_rowids[6]] == -1 // result[8] = parent_output_index[value_rowids[7]] void CalculateOutputIndexValueRowID( - const RowPartitionTensor& value_rowids, + OpKernelContext* context, const RowPartitionTensor& value_rowids, const vector& parent_output_index, INDEX_TYPE output_index_multiplier, INDEX_TYPE output_size, vector* result) { @@ -293,7 +294,8 @@ class RaggedTensorToTensorBaseOp : public OpKernel { } result->push_back(current_output_index); } - DCHECK_EQ(result->size(), value_rowids.size()); + OP_REQUIRES(context, result->size() == value_rowids.size(), + errors::InvalidArgument(""Invalid row ids."")); } Status CalculateOutputIndex(OpKernelContext* context, int dimension, @@ -307,13 +309,13 @@ class RaggedTensorToTensorBaseOp : public OpKernel { switch (partition_type) { case RowPartitionType::VALUE_ROWIDS: CalculateOutputIndexValueRowID( - row_partition_tensor, parent_output_index, output_index_multiplier, - output_size, result); + context, row_partition_tensor, parent_output_index, + output_index_multiplier, output_size, result); return tensorflow::Status::OK(); case RowPartitionType::ROW_SPLITS: - CalculateOutputIndexRowSplit(row_partition_tensor, parent_output_index, - output_index_multiplier, output_size, - result); + CalculateOutputIndexRowSplit( + context, row_partition_tensor, parent_output_index, + output_index_multiplier, output_size, result); return tensorflow::Status::OK(); default: return errors::InvalidArgument( ",1,test c4d7afb6a5986b04505aca4466ae1951686c80f6,tensorflow/tensorflow,"Fix heap OOB / undefined behavior in `RaggedTensorToTensor` PiperOrigin-RevId: 373244623 Change-Id: I2d6cbbc8c67b238a8815bf58097f7586d87c54f2",ragged_tensor_to_tensor_op.cc,"@@ -207,8 +207,8 @@ class RaggedTensorToTensorBaseOp : public OpKernel { DCHECK_EQ(result->size(), first_dimension); } - void CalculateOutputIndexRowSplit( - OpKernelContext* context, const RowPartitionTensor& row_split, + Status CalculateOutputIndexRowSplit( + const RowPartitionTensor& row_split, const vector& parent_output_index, INDEX_TYPE output_index_multiplier, INDEX_TYPE output_size, vector* result) { @@ -232,10 +232,11 @@ class RaggedTensorToTensorBaseOp : public OpKernel { result->push_back(-1); } } - if (row_split_size > 0) { - OP_REQUIRES(context, result->size() == row_split(row_split_size - 1), - errors::InvalidArgument(""Invalid row split size."")); + if (row_split_size > 0 && result->size() != row_split(row_split_size - 1)) { + return errors::InvalidArgument(""Invalid row split size.""); } + + return Status::OK(); } // Calculate the output index of the first element of a list. @@ -259,20 +260,26 @@ class RaggedTensorToTensorBaseOp : public OpKernel { // result[6] = -1 because parent_output_index[value_rowids[6]] == -1 // result[7] = -1 because parent_output_index[value_rowids[6]] == -1 // result[8] = parent_output_index[value_rowids[7]] - void CalculateOutputIndexValueRowID( - OpKernelContext* context, const RowPartitionTensor& value_rowids, + Status CalculateOutputIndexValueRowID( + const RowPartitionTensor& value_rowids, const vector& parent_output_index, INDEX_TYPE output_index_multiplier, INDEX_TYPE output_size, vector* result) { const INDEX_TYPE index_size = value_rowids.size(); result->reserve(index_size); if (index_size == 0) { - return; + return Status::OK(); } INDEX_TYPE current_output_column = 0; INDEX_TYPE current_value_rowid = value_rowids(0); - DCHECK_LT(current_value_rowid, parent_output_index.size()); + + if (current_value_rowid >= parent_output_index.size()) { + return errors::InvalidArgument( + ""Got current_value_rowid="", current_value_rowid, + "" which is not less than "", parent_output_index.size()); + } + INDEX_TYPE current_output_index = parent_output_index[current_value_rowid]; result->push_back(current_output_index); for (INDEX_TYPE i = 1; i < index_size; ++i) { @@ -289,13 +296,23 @@ class RaggedTensorToTensorBaseOp : public OpKernel { } else { current_output_column = 0; current_value_rowid = next_value_rowid; - DCHECK_LT(next_value_rowid, parent_output_index.size()); + + if (next_value_rowid >= parent_output_index.size()) { + return errors::InvalidArgument( + ""Got next_value_rowid="", next_value_rowid, + "" which is not less than "", parent_output_index.size()); + } + current_output_index = parent_output_index[next_value_rowid]; } result->push_back(current_output_index); } - OP_REQUIRES(context, result->size() == value_rowids.size(), - errors::InvalidArgument(""Invalid row ids."")); + + if (result->size() != value_rowids.size()) { + return errors::InvalidArgument(""Invalid row ids.""); + } + + return Status::OK(); } Status CalculateOutputIndex(OpKernelContext* context, int dimension, @@ -308,10 +325,9 @@ class RaggedTensorToTensorBaseOp : public OpKernel { auto partition_type = GetRowPartitionTypeByDimension(dimension); switch (partition_type) { case RowPartitionType::VALUE_ROWIDS: - CalculateOutputIndexValueRowID( - context, row_partition_tensor, parent_output_index, - output_index_multiplier, output_size, result); - return tensorflow::Status::OK(); + return CalculateOutputIndexValueRowID( + row_partition_tensor, parent_output_index, output_index_multiplier, + output_size, result); case RowPartitionType::ROW_SPLITS: if (row_partition_tensor.size() - 1 > parent_output_index.size()) { return errors::InvalidArgument( @@ -319,10 +335,9 @@ class RaggedTensorToTensorBaseOp : public OpKernel { row_partition_tensor.size() - 1, "" > "", parent_output_index.size()); } - CalculateOutputIndexRowSplit( - context, row_partition_tensor, parent_output_index, - output_index_multiplier, output_size, result); - return tensorflow::Status::OK(); + return CalculateOutputIndexRowSplit( + row_partition_tensor, parent_output_index, output_index_multiplier, + output_size, result); default: return errors::InvalidArgument( ""Unsupported partition type:"", ",1,train f94ef358bb3e91d517446454edff6535bcfe8e4a,tensorflow/tensorflow,"Fix `tf.raw_ops.RaggedTensorToTensor` failing CHECK in `tensor.cc`. PiperOrigin-RevId: 368300502 Change-Id: I91255d23c4bfd3aa3c029aac773937c09daf3c64",ragged_tensor_to_tensor_op.cc,"@@ -345,6 +345,11 @@ class RaggedTensorToTensorBaseOp : public OpKernel { void Compute(OpKernelContext* context) override { INDEX_TYPE first_dimension; + const Tensor first_partition_tensor = + context->input(kFirstPartitionInputIndex); + OP_REQUIRES(context, first_partition_tensor.NumElements() > 0, + errors::InvalidArgument(""Invalid first partition input. Tensor "" + ""requires at least one element."")); OP_REQUIRES_OK(context, GetFirstDimensionSize(context, &first_dimension)); vector output_size; OP_REQUIRES_OK(context, ",1,train 41727ff06111117bdf86b37db198217fd7a143cc,tensorflow/tensorflow,"Validate that a and b are proper sparse tensors PiperOrigin-RevId: 373248068 Change-Id: I0a2041a0747901b3f00387a6a3bce9bca6b0b3b1",sparse_add_op.cc,"@@ -44,6 +44,11 @@ class SparseAddOp : public OpKernel { b_indices->shape().DebugString())); const int64 a_nnz = a_indices->dim_size(0); const int64 b_nnz = b_indices->dim_size(0); + const int num_dims = a_indices->dim_size(1); + OP_REQUIRES(ctx, b_indices->dim_size(1) == num_dims, + errors::InvalidArgument( + ""Input indices must have the same dimension, got "", + num_dims, "" and "", b_indices->dim_size(1))); OP_REQUIRES_OK(ctx, ctx->input(""a_values"", &a_values_t)); OP_REQUIRES_OK(ctx, ctx->input(""b_values"", &b_values_t)); @@ -72,6 +77,13 @@ class SparseAddOp : public OpKernel { ""Input shapes should be a vector but received shapes "", a_shape->shape().DebugString(), "" and "", b_shape->shape().DebugString())); + OP_REQUIRES( + ctx, a_shape->NumElements() == num_dims, + errors::InvalidArgument(""Second dimension of a_indices and length of "" + ""a_shape must match, got "", + num_dims, "" and "", a_shape->NumElements())); + OP_REQUIRES(ctx, num_dims > 0, + errors::InvalidArgument(""Tesors must not be empty"")); OP_REQUIRES( ctx, a_shape->IsSameSize(*b_shape), errors::InvalidArgument( @@ -100,11 +112,6 @@ class SparseAddOp : public OpKernel { std::vector> entries_to_copy; // from_a?, idx entries_to_copy.reserve(a_nnz + b_nnz); std::vector out_values; - const int num_dims = a_shape->dim_size(0); - - OP_REQUIRES(ctx, num_dims > 0, - errors::InvalidArgument(""Invalid input_a shape. Received: "", - a_shape->DebugString())); // The input and output sparse tensors are assumed to be ordered along // increasing dimension number. ",1,train 6fd02f44810754ae7481838b6a67c5df7f909ca3,tensorflow/tensorflow,"Fix `tf.raw_ops.SparseAdd ` invalid memory access failure. PiperOrigin-RevId: 370568774 Change-Id: I5f73b31c865f2948a1c8dfb7ebd22b3cfb6405bf",sparse_add_op.cc,"@@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include ""tensorflow/core/framework/op_kernel.h"" +#include ""tensorflow/core/framework/op_requires.h"" #include ""tensorflow/core/framework/register_types.h"" #include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/tensor_util.h"" @@ -101,6 +102,10 @@ class SparseAddOp : public OpKernel { std::vector out_values; const int num_dims = a_shape->dim_size(0); + OP_REQUIRES(ctx, num_dims > 0, + errors::InvalidArgument(""Invalid input_a shape. Received: "", + a_shape->DebugString())); + // The input and output sparse tensors are assumed to be ordered along // increasing dimension number. int64 i = 0, j = 0; ",1,test c5b0d5f8ac19888e46ca14b0e27562e7fbbee9a9,tensorflow/tensorflow,"Fix the CHECK failure in tf.raw_ops.QuantizeAndDequantizeV2. PiperOrigin-RevId: 371361603 Change-Id: Ia70e34d41adaadddf928e95e5e5c5c97d5bc60d0",quantize_and_dequantize_op.cc,"@@ -72,6 +72,9 @@ class QuantizeAndDequantizeV2Op : public OpKernel { void Compute(OpKernelContext* ctx) override { const Tensor& input = ctx->input(0); + OP_REQUIRES( + ctx, axis_ >= -1, + errors::InvalidArgument(""Axis must be at least -1. Found "", axis_)); OP_REQUIRES( ctx, (axis_ == -1 || axis_ < input.shape().dims()), errors::InvalidArgument(""Shape must be at least rank "", axis_ + 1, ",1,train 1d04d7d93f4ed3854abf75d6b712d72c3f70d6b6,tensorflow/tensorflow,"Fix heap-buffer-overflow issue with `tf.raw_ops.SparseReshape`. PiperOrigin-RevId: 371218558 Change-Id: I6a6dc5bf15b50a1d05bdd95e9ba347cb39f40f45",sparse_reshape_op.cc,"@@ -26,6 +26,7 @@ limitations under the License. #include ""tensorflow/core/framework/types.h"" #include ""tensorflow/core/kernels/reshape_util.h"" #include ""tensorflow/core/lib/gtl/inlined_vector.h"" +#include ""tensorflow/core/platform/errors.h"" namespace tensorflow { @@ -38,6 +39,17 @@ class SparseReshapeOp : public OpKernel { explicit SparseReshapeOp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* context) override { + const Tensor& input_indices_in = context->input(0); + const Tensor& input_shape_in = context->input(1); + + OP_REQUIRES(context, TensorShapeUtils::IsMatrix(input_indices_in.shape()), + errors::InvalidArgument(""Input must be a matrix."")); + OP_REQUIRES(context, TensorShapeUtils::IsVector(input_shape_in.shape()), + errors::InvalidArgument(""Input shape must be a vector."")); + OP_REQUIRES(context, + input_indices_in.dim_size(1) == input_shape_in.dim_size(0), + errors::InvalidArgument( + ""Input tensor rank must match input shape length."")); ReshapeSparseTensor(context, context->input(0), context->input(1), context->input(2), 0 /* output indices index */, 1 /* output shape index */); ",1,train 0ab290774f91a23bebe30a358fde4e53ab4876a0,tensorflow/tensorflow,"Ensure validation sticks in banded_triangular_solve_op PiperOrigin-RevId: 373275480 Change-Id: Id7717cf275b2d6fdb9441fbbe166d555182d2e79",banded_triangular_solve_op.cc,"@@ -217,6 +217,7 @@ class BandedTriangularSolveOpCpu : public OpKernel { const Tensor& in1 = ctx->input(1); ValidateInputTensors(ctx, in0, in1); + if (!ctx->status().ok()) return; MatMulBCast bcast(in0.shape().dim_sizes(), in1.shape().dim_sizes()); OP_REQUIRES( ",1,train 14607c0707040d775e06b6817325640cb4b5864c,tensorflow/tensorflow,"Fix nullptr deref in `tf.raw_ops.CTCLoss`. PiperOrigin-RevId: 372266334 Change-Id: Ic52c3e9f13a38f54482d670907eda1688450862b",ctc_loss_op.cc,"@@ -109,6 +109,9 @@ class CTCLossOp : public OpKernel { const TensorShape& inputs_shape = inputs->shape(); const int64 max_time = inputs_shape.dim_size(0); + OP_REQUIRES(ctx, max_time != 0, + errors::InvalidArgument( + ""Max time or first dimension of input cannot be 0."")); const int64 batch_size = inputs_shape.dim_size(1); const int64 num_classes_raw = inputs_shape.dim_size(2); OP_REQUIRES( ",1,train 4504a081af71514bb1828048363e6540f797005b,tensorflow/tensorflow,"Fix OOB read issue with `tf.raw_ops.CTCLoss`. PiperOrigin-RevId: 372242187 Change-Id: I347228ed8c04e1d2eb9d2479ae52f51d1b512c6e",ctc_loss_op.cc,"@@ -100,6 +100,10 @@ class CTCLossOp : public OpKernel { errors::InvalidArgument(""sequence_length is not a vector"")); OP_REQUIRES(ctx, TensorShapeUtils::IsMatrix(labels_indices->shape()), errors::InvalidArgument(""labels_indices is not a matrix"")); + OP_REQUIRES(ctx, labels_indices->dim_size(1) > 1, + errors::InvalidArgument( + ""labels_indices second dimension must be >= 1. Received "", + labels_indices->dim_size(1))); OP_REQUIRES(ctx, TensorShapeUtils::IsVector(labels_values->shape()), errors::InvalidArgument(""labels_values is not a vector"")); ",1,train 698e01511f62a3c185754db78ebce0eee1f0184d,tensorflow/tensorflow,"Fix `tf.io.decode_raw` bugs and update documentation. Fixes cases where specifying `fixed_length` resulted in data loss and even segfault and corruption of the Python interpreter. The fix is subtle but needed due to pointer arithmetic rules. Makes sure that `fixed_length` does not change the output when present but not needed. Eliminates needless copy and cast in the main codepath. PiperOrigin-RevId: 371322725 Change-Id: I514ef67a2961c86422f69d05122d31615e87896c",decode_padded_raw_op.cc,"@@ -19,6 +19,7 @@ limitations under the License. #include ""tensorflow/core/framework/common_shape_fns.h"" #include ""tensorflow/core/framework/op.h"" #include ""tensorflow/core/framework/op_kernel.h"" +#include ""tensorflow/core/framework/op_requires.h"" #include ""tensorflow/core/framework/shape_inference.h"" namespace tensorflow { @@ -83,14 +84,13 @@ class DecodePaddedRawOp : public OpKernel { // can copy the memory directly. if (!convert_data_endianness_ || sizeof(T) == 1) { for (int64 i = 0; i < flat_in.size(); ++i) { - const T* in_data = reinterpret_cast(flat_in(i).data()); - - if (flat_in(i).size() > fixed_length) { - memcpy(out_data, in_data, fixed_length); - } else { - memcpy(out_data, in_data, flat_in(i).size()); - } - out_data += fixed_length; + const auto to_copy = + std::min(flat_in(i).size(), static_cast(fixed_length)); + memcpy(out_data, flat_in(i).data(), to_copy); + // Note: increase out_data by width since it's already of type T* so + // each shift amount is implicitly multiplied by sizeof(T) according to + // pointer arithmetic rules. + out_data += width; } } else { // Otherwise, the data is not in the host's byte order, and rather than a @@ -105,7 +105,10 @@ class DecodePaddedRawOp : public OpKernel { p_in += sizeof(T), p_out += sizeof(T)) { std::reverse_copy(p_in, p_in + sizeof(T), p_out); } - out_data += fixed_length; + // Note: increase out_data by width since it's already of type T* so + // each shift amount is implicitly multiplied by sizeof(T) according to + // pointer arithmetic rules. + out_data += width; } } } ",1,train 698e01511f62a3c185754db78ebce0eee1f0184d,tensorflow/tensorflow,"Fix `tf.io.decode_raw` bugs and update documentation. Fixes cases where specifying `fixed_length` resulted in data loss and even segfault and corruption of the Python interpreter. The fix is subtle but needed due to pointer arithmetic rules. Makes sure that `fixed_length` does not change the output when present but not needed. Eliminates needless copy and cast in the main codepath. PiperOrigin-RevId: 371322725 Change-Id: I514ef67a2961c86422f69d05122d31615e87896c",parsing_ops.py,"@@ -850,8 +850,8 @@ def decode_raw(input_bytes, name=None): r""""""Convert raw bytes from input tensor into numeric tensors. - The input tensor is interpreted as a sequence of bytes. These bytes are then - decoded as numbers in the format specified by `out_type`. + Every component of the input tensor is interpreted as a sequence of bytes. + These bytes are then decoded as numbers in the format specified by `out_type`. >>> tf.io.decode_raw(tf.constant(""1""), tf.uint8) @@ -909,22 +909,35 @@ def decode_raw(input_bytes, >>> tf.io.decode_raw(tf.constant([""1212""]), tf.uint16, fixed_length=4) >> # truncated to length of type as it matches fixed_length - >>> tf.io.decode_raw(tf.constant([""1212""]), tf.uint16, fixed_length=2) - - >>> # ignores the second component - >>> tf.io.decode_raw(tf.constant([""12"",""34""]), tf.uint16, fixed_length=2) - - >>> tf.io.decode_raw(tf.constant([""12"",""34""]), tf.uint16, fixed_length=4) - - - This will be fixed on a future release of TensorFlow. + If the input value is larger than `fixed_length`, it is truncated: + + >>> x=''.join([chr(1), chr(2), chr(3), chr(4)]) + >>> tf.io.decode_raw(x, tf.uint16, fixed_length=2) + + >>> hex(513) + '0x201' + + If `little_endian` and `fixed_length` are specified, truncation to the fixed + length occurs before endianness conversion: + + >>> x=''.join([chr(1), chr(2), chr(3), chr(4)]) + >>> tf.io.decode_raw(x, tf.uint16, fixed_length=2, little_endian=False) + + >>> hex(258) + '0x102' + + If input values all have the same length, then specifying `fixed_length` + equal to the size of the strings should not change output: + + >>> x = [""12345678"", ""87654321""] + >>> tf.io.decode_raw(x, tf.int16) + + >>> tf.io.decode_raw(x, tf.int16, fixed_length=len(x[0])) + Args: input_bytes: ",1,train e07e1c3d26492c06f078c7e5bf2d138043e199c1,tensorflow/tensorflow,"Prevent memory overflow in ParseAttrValue from nested tensors. PiperOrigin-RevId: 370108442 Change-Id: I84d64a5e8895a6aeffbf4749841b4c54d51b5889",attr_value_util.cc,"@@ -38,6 +38,9 @@ namespace { // Do not construct large tensors to compute their hash or compare for equality. constexpr int kMaxAttrValueTensorByteSize = 32 * 1024 * 1024; // 32mb +// Limit nesting of tensors to 100 deep to prevent memory overflow. +constexpr int kMaxTensorNestDepth = 100; + // Return the size of the tensor represented by this TensorProto. If shape is // not fully defined return -1. int64 TensorByteSize(const TensorProto& t) { @@ -224,6 +227,54 @@ string SummarizeFunc(const NameAttrList& func) { return strings::StrCat(func.name(), ""["", absl::StrJoin(entries, "", ""), ""]""); } +bool ParseAttrValueHelper_TensorNestsUnderLimit(int limit, string to_parse) { + int nests = 0; + int maxed_out = to_parse.length(); + int open_curly = to_parse.find('{'); + int open_bracket = to_parse.find('<'); + int close_curly = to_parse.find('}'); + int close_bracket = to_parse.find('>'); + if (open_curly == -1) { + open_curly = maxed_out; + } + if (open_bracket == -1) { + open_bracket = maxed_out; + } + int min = std::min(open_curly, open_bracket); + do { + if (open_curly == maxed_out && open_bracket == maxed_out) { + return true; + } + if (min == open_curly) { + nests += 1; + open_curly = to_parse.find('{', open_curly + 1); + if (open_curly == -1) { + open_curly = maxed_out; + } + } else if (min == open_bracket) { + nests += 1; + open_bracket = to_parse.find('<', open_bracket + 1); + if (open_bracket == -1) { + open_bracket = maxed_out; + } + } else if (min == close_curly) { + nests -= 1; + close_curly = to_parse.find('}', close_curly + 1); + if (close_curly == -1) { + close_curly = maxed_out; + } + } else if (min == close_bracket) { + nests -= 1; + close_bracket = to_parse.find('>', close_bracket + 1); + if (close_bracket == -1) { + close_bracket = maxed_out; + } + } + min = std::min({open_curly, open_bracket, close_curly, close_bracket}); + } while (nests < 100); + return false; +} + } // namespace string SummarizeAttrValue(const AttrValue& attr_value) { @@ -448,7 +499,12 @@ bool ParseAttrValue(StringPiece type, StringPiece text, AttrValue* out) { } else { to_parse = strings::StrCat(field_name, "": "", text); } - + if (field_name == ""tensor"") { + if (!ParseAttrValueHelper_TensorNestsUnderLimit(kMaxTensorNestDepth, + to_parse)) { + return false; + } + } return ProtoParseFromString(to_parse, out); } ",1,train e6340f0665d53716ef3197ada88936c2a5f7a2d3,tensorflow/tensorflow,"Handle a special grappler case resulting in crash. It might happen that a malformed input could be used to trick Grappler into trying to optimize a node with no inputs. This, in turn, would produce a null pointer dereference and a segfault. PiperOrigin-RevId: 369242852 Change-Id: I2e5cbe7aec243d34a6d60220ac8ac9b16f136f6b",arithmetic_optimizer.cc,"@@ -2047,6 +2047,12 @@ class ReorderCastLikeAndValuePreserving : public ArithmeticOptimizerStage { Status TrySimplify(NodeDef* consumer, string* simplified_node_name) override { NodeDef* producer; + + if (consumer->input_size() < 1) { + return errors::FailedPrecondition(""Node "", simplified_node_name, + "" lacks inputs""); + } + TF_RETURN_IF_ERROR(GetInputNode(consumer->input(0), &producer)); const bool producer_is_cast = IsCastLike(*producer); const bool can_optimize = @@ -2538,6 +2544,11 @@ class ReplaceMulWithSquare : public ArithmeticOptimizerStage { ~ReplaceMulWithSquare() override = default; bool IsSupported(const NodeDef* node) const override { + if (!node || node->input_size() < 2) { + // Invalid node + return false; + } + return IsAnyMul(*node) && node->input(0) == node->input(1); } ",1,train e6340f0665d53716ef3197ada88936c2a5f7a2d3,tensorflow/tensorflow,"Handle a special grappler case resulting in crash. It might happen that a malformed input could be used to trick Grappler into trying to optimize a node with no inputs. This, in turn, would produce a null pointer dereference and a segfault. PiperOrigin-RevId: 369242852 Change-Id: I2e5cbe7aec243d34a6d60220ac8ac9b16f136f6b",dependency_optimizer.cc,"@@ -68,6 +68,12 @@ bool DependencyOptimizer::SafeToRemoveIdentity(const NodeDef& node) const { // The output values of this node may be needed. return false; } + + if (node.input_size() < 1) { + // Node lacks input, is invalid + return false; + } + const NodeDef* input = node_map_->GetNode(NodeName(node.input(0))); CHECK(input != nullptr) << ""node = "" << node.name() << "" input = "" << node.input(0); ",1,train 82e6203221865de4008445b13c69b6826d2b28d9,tensorflow/tensorflow,"Fix segfaults in `tf.raw_ops.SparseCountSparseOutput`. PiperOrigin-RevId: 360547563 Change-Id: I781c7af4b54a63d867c6e18d43a44d64a5c4e7c9",count_ops.cc,"@@ -192,6 +192,10 @@ class SparseCount : public OpKernel { ""; values shape: "", values.shape().DebugString())); } + OP_REQUIRES(context, shape.NumElements() != 0, + errors::InvalidArgument( + ""The shape argument requires at least one element."")); + bool is_1d = shape.NumElements() == 1; int num_batches = is_1d ? 1 : shape.flat()(0); int num_values = values.NumElements(); @@ -212,6 +216,14 @@ class SparseCount : public OpKernel { for (int idx = 0; idx < num_values; ++idx) { int batch = is_1d ? 0 : indices_values(idx, 0); + if (batch >= num_batches) { + OP_REQUIRES(context, batch < num_batches, + errors::InvalidArgument( + ""Indices value along the first dimension must be "", + ""lower than the first index of the shape."", ""Got "", + batch, "" as batch and "", num_batches, + "" as the first dimension of the shape."")); + } const auto& value = values_values(idx); if (value >= 0 && (maxlength_ <= 0 || value < maxlength_)) { if (binary_output_) { ",1,train 87158f43f05f2720a374f3e6d22a7aaa3a33f750,tensorflow/tensorflow,"Prevent heap OOB in sparse reduction ops. PiperOrigin-RevId: 387934524 Change-Id: I894aa30f1e454f09b471d565b4a325da49322c1a",sparse_reduce_op.cc,"@@ -219,7 +219,20 @@ class SparseReduceOp : public OpKernel { sp.Reorder(reduction.reorder_dims); for (const auto &g : sp.group(reduction.group_by_dims)) { Op::template Run(ctx, reduced_val, g.template values()); + OP_REQUIRES(ctx, + output_strides.empty() || + (g.group().size() == output_strides.size()), + errors::Internal( + ""Expected group size and output_strides size to match"", + "", but got "", g.group().size(), "" and "", + output_strides.size())); const int64_t idx = CoordinatesToFlatIndex(g.group(), output_strides); + OP_REQUIRES(ctx, + idx >= 0 && idx < out_flat.size(), + errors::Internal( + ""Obtained a write index of "", idx, + "" which is outside of bounds of [0, "", + out_flat.size(), "")"")); out_flat(idx) = reduced_val(); VLOG(2) << ""coords: "" << absl::StrJoin(g.group(), "","") << ""; idx: "" << idx << ""; group "" << Op::Name() << "": "" ",1,train d9204be9f49520cdaaeb2541d1dc5187b23f31d9,tensorflow/tensorflow,"Disallow division by zero FPE in tf.raw_ops.SparseDenseCwiseDiv PiperOrigin-RevId: 383959809 Change-Id: Ibe88458bdf66a686c93e354b8255dec94285c560",sparse_dense_binary_op_shared.cc,"@@ -114,7 +114,10 @@ class SparseDenseBinaryOpShared : public OpKernel { OP_REQUIRES_OK( ctx, ctx->allocate_temp(DataTypeToEnum::value, TensorShape({nnz}), &dense_gathered)); - + bool op_is_div = false; + if (absl::StrContains(ctx->op_kernel().type_string_view(), ""Div"")) { + op_is_div = true; + } // Pulls relevant entries from the dense side, with reshape and broadcasting // *of the dense side* taken into account. Use a TensorRef to avoid blowing // up memory. @@ -143,6 +146,12 @@ class SparseDenseBinaryOpShared : public OpKernel { errors::InvalidArgument(""Provided indices are out-of-bounds w.r.t. "" \ ""dense side with broadcasted shape"")); \ dense_gathered_flat(i) = rhs_ref.coeff(idx); \ + if (op_is_div) { \ + OP_REQUIRES(ctx, dense_gathered_flat(i) != 0, \ + errors::InvalidArgument( \ + ""SparseDenseCwiseDiv cannot divide by zero,"" \ + ""but input dense tensor contains zero "")); \ + } \ } \ break; \ } ",1,train 5dc7f6981fdaf74c8c5be41f393df705841fb7c5,tensorflow/tensorflow,"Fix accessing possible nullptr in tensorflow::data::CompressElement and UncompressElement which are used in tf.data.service. PiperOrigin-RevId: 373920841 Change-Id: Ia88d78aee09fa19bb53a0f163fd19620d0c68743",compression_utils.cc,"@@ -29,9 +29,10 @@ Status CompressElement(const std::vector& element, int64 total_size = 0; for (auto& component : element) { if (DataTypeCanUseMemcpy(component.dtype())) { - // Some datatypes can be memcopied, allowing us to save two copies - // (AsProtoTensorContent and SerializeToArray). - total_size += DMAHelper::buffer(&component)->size(); + const TensorBuffer* buffer = DMAHelper::buffer(&component); + if (buffer) { + total_size += buffer->size(); + } } else { non_memcpy_components.emplace_back(); component.AsProtoTensorContent(&non_memcpy_components.back()); @@ -53,8 +54,10 @@ Status CompressElement(const std::vector& element, component.shape().AsProto(metadata->mutable_tensor_shape()); if (DataTypeCanUseMemcpy(component.dtype())) { const TensorBuffer* buffer = DMAHelper::buffer(&component); - memcpy(position, buffer->data(), buffer->size()); - metadata->set_tensor_size_bytes(buffer->size()); + if (buffer) { + memcpy(position, buffer->data(), buffer->size()); + metadata->set_tensor_size_bytes(buffer->size()); + } } else { TensorProto& proto = non_memcpy_components[non_memcpy_component_index++]; proto.SerializeToArray(position, proto.ByteSizeLong()); @@ -94,8 +97,13 @@ Status UncompressElement(const CompressedElement& compressed, if (DataTypeCanUseMemcpy(metadata.dtype())) { out->emplace_back(metadata.dtype(), metadata.tensor_shape()); TensorBuffer* buffer = DMAHelper::buffer(&out->back()); - iov[i].iov_base = buffer->data(); - iov[i].iov_len = buffer->size(); + if (buffer) { + iov[i].iov_base = buffer->data(); + iov[i].iov_len = buffer->size(); + } else { + iov[i].iov_base = nullptr; + iov[i].iov_len = 0; + } } else { // Allocate an empty Tensor. We will fill it out later after // uncompressing into the tensor_proto_str. ",1,train 301ae88b331d37a2a16159b65b255f4f9eb39314,tensorflow/tensorflow,"Fix null ptr deref in tf.raw_ops.RaggedTensorToTensor PiperOrigin-RevId: 384257511 Change-Id: I0484ad285039d132d6c41b284a7fcdd2b774a38e",ragged_tensor_to_tensor_op.cc,"@@ -348,6 +348,9 @@ class RaggedTensorToTensorBaseOp : public OpKernel { Status GetFirstDimensionSize(OpKernelContext* context, INDEX_TYPE* result) { const Tensor first_partition_tensor = context->input(kFirstPartitionInputIndex); + if (row_partition_types_.empty()) { + return errors::InvalidArgument(""No row_partition_types given.""); + } const RowPartitionType first_partition_type = row_partition_types_[0]; switch (first_partition_type) { case RowPartitionType::FIRST_DIM_SIZE: ",1,train 9e82dce6e6bd1f36a57e08fa85af213e2b2f2622,tensorflow/tensorflow,"Fix NPE in restoring code. PiperOrigin-RevId: 388303253 Change-Id: Ia8c68568cb854bca538909a182b31a618d68ce55",save_restore_tensor.cc,"@@ -151,11 +151,18 @@ void RestoreTensor(OpKernelContext* context, context, size == 1, errors::InvalidArgument( ""Input 0 (file_pattern) must be a string scalar; got a tensor of "", - size, ""elements"")); + size, "" elements"")); } const string& file_pattern = file_pattern_t.flat()(0); const Tensor& tensor_name_t = context->input(1); + { + const int64_t size = tensor_name_t.NumElements(); + OP_REQUIRES(context, size > restore_index, + errors::InvalidArgument( + ""Input 1 (file_pattern) must be a have at least "", + restore_index + 1, "" elements"")); + } const string& tensor_name = tensor_name_t.flat()(restore_index); // If we cannot find a cached reader we will allocate our own. ",1,train 4923de56ec94fff7770df259ab7f2288a74feb41,tensorflow/tensorflow,"Don't do any work when reshaping 0 elements sparse tensor. If reshaping to 0 elements tensor, check that input has no elements. If reshaping no elements input, check that output has no elements. PiperOrigin-RevId: 388296986 Change-Id: Iadc9fe7252e14313ca987e69bf0d7042fd10232a",reshape_util.cc,"@@ -174,6 +174,12 @@ void ReshapeSparseTensor(OpKernelContext *context, TensorShape({nnz, output_rank}), &result_indices)); if (nnz > 0) { + OP_REQUIRES( + context, dense_size > 0 && product > 0, + errors::InvalidArgument( + ""Input tensor has "", nnz, "" non zero elements but input shape ("", + input_shape.DebugString(), "") or output shape ("", + output_shape.DebugString(), "") is empty"")); OP_REQUIRES_OK(context, functor::ReshapeSparseTensorFunctor()( context, input_shape, output_shape, input_indices_in.matrix(), ",1,test a2b743f6017d7b97af1fe49087ae15f0ac634373,tensorflow/tensorflow,"Fix heap OOB in `tf.raw_ops.RaggedGather` PiperOrigin-RevId: 388355464 Change-Id: If14d96231d1cd7aad7c4d1c22c1bab1576b75717",ragged_gather_op.cc,"@@ -58,15 +58,21 @@ class RaggedGatherOpBase : public OpKernel { void Compute(OpKernelContext* context) override { // Get the input Tensors. + OpInputList params_nested_splits_in; OP_REQUIRES_OK(context, context->input_list(""params_nested_splits"", ¶ms_nested_splits_in)); + OP_REQUIRES( + context, params_nested_splits_in.size() > 0, + errors::InvalidArgument(""params_nested_splits must be non empty"")); + const Tensor& params_dense_values_in = context->input(params_nested_splits_in.size()); const Tensor& indices_in = context->input(params_nested_splits_in.size() + 1); - DCHECK_GT(params_nested_splits_in.size(), 0); // Enforced by REGISTER_OP. + OP_REQUIRES(context, params_nested_splits_in[0].dims() > 0, + errors::InvalidArgument(""Split tensors must not be scalars"")); SPLITS_TYPE num_params = params_nested_splits_in[0].dim_size(0) - 1; OP_REQUIRES_OK(context, ValidateIndices(indices_in, num_params)); ",1,train 4aacb30888638da75023e6601149415b39763d76,tensorflow/tensorflow,"Disallow division by zero FPE in `tf.raw_ops.ResourceScatterDiv` Had to update a test that was broken. PiperOrigin-RevId: 388516976 Change-Id: Ic358e6bf0559e011539974d453fc7aa18b427e9c",resource_variable_ops.cc,"@@ -873,6 +873,35 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_ND_GPU); #undef REGISTER_GATHER_ND_ALL_INDICES #undef REGISTER_GATHER_ND_FULL +namespace { + +template +bool isCPUDevice() { + return false; +} + +template <> +bool isCPUDevice() { + return true; +} + +template +bool ValidateInput(const Tensor& updates) { + const auto updates_flat = updates.flat(); + const T zero(0); + for (int i = 0; i < updates.NumElements(); i++) { + if (updates_flat(i) == zero) return false; + } + return true; +} + +template <> +bool ValidateInput(const Tensor& updates) { + return true; +} + +} // namespace + template class ResourceScatterUpdateOp : public OpKernel { public: @@ -939,6 +968,12 @@ class ResourceScatterUpdateOp : public OpKernel { "" indexing: "", params->dim_size(0), "" > "", std::numeric_limits::max())); + // Prevent division by 0 + if (isCPUDevice() && op == tensorflow::scatter_op::UpdateOp::DIV) { + OP_REQUIRES(c, ValidateInput(updates), + errors::InvalidArgument(""updates must not contain 0"")); + } + if (N > 0) { auto indices_flat = indices.flat(); auto params_flat = params->flat_outer_dims(); ",1,train 4aacb30888638da75023e6601149415b39763d76,tensorflow/tensorflow,"Disallow division by zero FPE in `tf.raw_ops.ResourceScatterDiv` Had to update a test that was broken. PiperOrigin-RevId: 388516976 Change-Id: Ic358e6bf0559e011539974d453fc7aa18b427e9c",sharded_variable_test.py,"@@ -175,8 +175,9 @@ class ShardedVariableTest(test.TestCase, parameterized.TestCase): 'scatter_update') def test_scatter_ops_even_partition(self, op): v = variables_lib.Variable(array_ops.zeros((30, 1))) + # Make sure values does not contain 0 due to testing `scatter_div`! sparse_delta = ops.IndexedSlices( - values=constant_op.constant([[0.], [1.], [2.], [3.], [4.]]), + values=constant_op.constant([[1.], [2.], [3.], [4.], [5.]]), indices=constant_op.constant([0, 10, 12, 21, 22])) v0 = variables_lib.Variable(array_ops.zeros((10, 1))) ",1,train 482da92095c4d48f8784b1f00dda4f81c28d2988,tensorflow/tensorflow,"Ensure non-empty padding_value input to tf.raw_ops.MatrixDiagPartV2, if a padding_value is input PiperOrigin-RevId: 388314614 Change-Id: If0b51ad58d5d8543a6be6ce8f42ae4755c80d55f",matrix_diag_op.cc,"@@ -89,7 +89,10 @@ class MatrixDiagPartOp : public OpKernel { upper_diag_index = diag_index.flat()(1); } } - padding_value = context->input(2).flat()(0); + const Tensor& padding_in = context->input(2); + OP_REQUIRES(context, padding_in.NumElements() == 1, + errors::InvalidArgument(""Padding must be scalar."")); + padding_value = padding_in.flat()(0); } const TensorShape& input_shape = input.shape(); ",1,train 8a6e874437670045e6c7dc6154c7412b4a2135e2,tensorflow/tensorflow,"Validate num_elements input in tf.raw_ops.TensorListReserve PiperOrigin-RevId: 383954564 Change-Id: I454bd78eff85bc4f16ddb7e608596971cca47f8f",list_kernels.cc,"@@ -302,6 +302,10 @@ class TensorListReserve : public OpKernel { PartialTensorShape element_shape; OP_REQUIRES_OK(c, TensorShapeFromTensor(c->input(0), &element_shape)); int32 num_elements = c->input(1).scalar()(); + OP_REQUIRES(c, num_elements >= 0, + errors::InvalidArgument(""The num_elements to reserve must be a "" + ""non negative number, but got "", + num_elements)); TensorList output; output.element_shape = element_shape; output.element_dtype = element_dtype_; ",1,train 96f364a1ca3009f98980021c4b32be5fdcca33a1,tensorflow/tensorflow,"Validate axis input in tf.raw_ops.QuantizeAndDequantizeV4Grad PiperOrigin-RevId: 388291385 Change-Id: I3bab68dc61d935afa96c0da021a7b722c6dc8dc8",quantize_and_dequantize_op.cc,"@@ -158,6 +158,13 @@ class QuantizeAndDequantizeV4GradientOp : public OpKernel { Tensor* input_backprop = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &input_backprop)); + OP_REQUIRES( + ctx, axis_ >= -1, + errors::InvalidArgument(""Axis must be at least -1. Found "", axis_)); + OP_REQUIRES(ctx, (axis_ == -1 || axis_ < input.shape().dims()), + errors::InvalidArgument( + ""Axis should be -1 or 0 or a positive value less than "", + input.shape().dims(), ""but given axis value was "", axis_)); OP_REQUIRES( ctx, input.IsSameSize(gradient), ",1,train c283e542a3f422420cfdb332414543b62fc4e4a5,tensorflow/tensorflow,"Disallow negative ngram_widths values in tf.raw_ops.StringNGrams PiperOrigin-RevId: 387148179 Change-Id: I641395a09a208be72ef9b3ceb128cf8a83a0775b",string_ngrams_op.cc,"@@ -53,6 +53,12 @@ class StringNGramsOp : public tensorflow::OpKernel { } void Compute(tensorflow::OpKernelContext* context) override { + for (int ngram_width : ngram_widths_) { + OP_REQUIRES( + context, ngram_width > 0, + errors::InvalidArgument(""ngram_widths must contain positive values"")); + } + const tensorflow::Tensor* data; OP_REQUIRES_OK(context, context->input(""data"", &data)); const auto& input_data = data->flat().data(); ",1,train 02cc160e29d20631de3859c6653184e3f876b9d7,tensorflow/tensorflow,"Prevent nullptr deref in SparseTensorSliceDataset The arguments must determine a valid sparse tensor. This means that when indices are empty then the values must be empty too (and the reverse). Also added test, by modifying existing test with empty sparse tensor to now run with an invalid sparse tensor input. PiperOrigin-RevId: 388562757 Change-Id: Id8b54cd7c2316025b4f9a77292c8fb5344d17609",sparse_tensor_slice_dataset_op.cc,"@@ -241,6 +241,17 @@ class SparseTensorSliceDatasetOp : public DatasetOpKernel { errors::InvalidArgument( ""Input indices should be a matrix but received shape "", indices->shape().DebugString())); + + const auto num_indices = indices->NumElements(); + const auto num_values = values->NumElements(); + if (num_indices == 0 || num_values == 0) { + OP_REQUIRES(ctx, num_indices == num_values, + errors::InvalidArgument( + ""If indices or values are empty, the other one must also "" + ""be. Got indices of shape "", + indices->shape().DebugString(), "" and values of shape "", + values->shape().DebugString())); + } OP_REQUIRES(ctx, TensorShapeUtils::IsVector(values->shape()), errors::InvalidArgument( ""Input values should be a vector but received shape "", ",1,train 02cc160e29d20631de3859c6653184e3f876b9d7,tensorflow/tensorflow,"Prevent nullptr deref in SparseTensorSliceDataset The arguments must determine a valid sparse tensor. This means that when indices are empty then the values must be empty too (and the reverse). Also added test, by modifying existing test with empty sparse tensor to now run with an invalid sparse tensor input. PiperOrigin-RevId: 388562757 Change-Id: Id8b54cd7c2316025b4f9a77292c8fb5344d17609",from_sparse_tensor_slices_test.py,"@@ -118,6 +118,26 @@ class FromSparseTensorSlicesTest(test_base.DatasetTestBase, with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + @combinations.generate(combinations.combine(tf_api_version=1, mode=[""graph""])) + def testEmptySparseTensorSlicesInvalid(self): + """"""Test a dataset based on invalid `tf.sparse.SparseTensor`."""""" + st = array_ops.sparse_placeholder(dtypes.float64) + iterator = dataset_ops.make_initializable_iterator( + dataset_ops.Dataset.from_sparse_tensor_slices(st)) + init_op = iterator.initializer + + with self.cached_session() as sess: + # Test with an empty sparse tensor but with non empty values. + empty_indices = np.empty((0, 4), dtype=np.int64) + non_empty_values = [1, 2, 3, 4] + empty_dense_shape = [0, 4, 37, 9] + sparse_feed = sparse_tensor.SparseTensorValue(empty_indices, + non_empty_values, + empty_dense_shape) + # Here, we expect the test to fail when running the feed. + with self.assertRaises(errors.InvalidArgumentError): + sess.run(init_op, feed_dict={st: sparse_feed}) + @combinations.generate(combinations.combine(tf_api_version=2, mode=[""eager""])) def testFromSparseTensorSlicesError(self): with self.assertRaises(AttributeError): ",1,train 9728c60e136912a12d99ca56e106b7cce7af5986,tensorflow/tensorflow,"Ensure validation sticks in `save_restore_v2_ops.cc` PiperOrigin-RevId: 387924206 Change-Id: I6156842eb3230076b5812c0815f3e66bd5241454",save_restore_v2_ops.cc,"@@ -98,6 +98,7 @@ class SaveV2 : public OpKernel { const Tensor& shape_and_slices = context->input(2); ValidateInputs(true /* is save op */, context, prefix, tensor_names, shape_and_slices); + if (!context->status().ok()) return; const int kFixedInputs = 3; // Prefix, tensor names, shape_and_slices. const int num_tensors = static_cast(tensor_names.NumElements()); @@ -177,6 +178,7 @@ class RestoreV2 : public OpKernel { "" expected dtypes."")); ValidateInputs(false /* not save op */, context, prefix, tensor_names, shape_and_slices); + if (!context->status().ok()) return; const string& prefix_string = prefix.scalar()(); ",1,test 7bdf50bb4f5c54a4997c379092888546c97c3ebd,tensorflow/tensorflow,"Ensure non-empty compressed input in tf.raw_ops.UncompressElement PiperOrigin-RevId: 383955815 Change-Id: I072a84fd02738dd2f51b3f42836ed80067dba4a8",compression_ops.cc,"@@ -48,6 +48,11 @@ void UncompressElementOp::Compute(OpKernelContext* ctx) { Tensor tensor = ctx->input(0); const Variant& variant = tensor.scalar()(); const CompressedElement* compressed = variant.get(); + OP_REQUIRES( + ctx, compressed != nullptr, + errors::InvalidArgument( + ""Input does not contain a compressed element. Instead got tensor "", + tensor.DebugString())); std::vector components; OP_REQUIRES_OK(ctx, UncompressElement(*compressed, &components)); ",1,train e0b6e58c328059829c3eb968136f17aa72b6c876,tensorflow/tensorflow,"Fix segfault/heap buffer overflow in `{Experimental,}DatasetToTFRecord` where dataset is numeric. Code assumes only strings inputs and then interprets numbers as valid `tstring`s. Then, when trying to compute the CRC of the record this results in heap buffer overflow. PiperOrigin-RevId: 387675909 Change-Id: I7396b9b8afc1ac744112af7c0b1cd7bb41e0f556",to_tf_record_op.cc,"@@ -18,6 +18,7 @@ limitations under the License. #include ""tensorflow/core/framework/function_handle_cache.h"" #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/resource_mgr.h"" +#include ""tensorflow/core/framework/types.h"" #include ""tensorflow/core/kernels/ops_util.h"" #include ""tensorflow/core/lib/core/threadpool.h"" #include ""tensorflow/core/lib/io/record_writer.h"" @@ -91,8 +92,20 @@ class ToTFRecordOp : public AsyncOpKernel { TF_RETURN_IF_ERROR(finalized_dataset->MakeIterator( &iter_ctx, /*parent=*/nullptr, ""ToTFRecordOpIterator"", &iterator)); + const int num_output_dtypes = finalized_dataset->output_dtypes().size(); + if (num_output_dtypes != 1) { + return errors::InvalidArgument( + ""ToTFRecordOp currently only support datasets of 1 single column, "", + ""but got "", num_output_dtypes); + } + const DataType dt = finalized_dataset->output_dtypes()[0]; + if (dt != DT_STRING) { + return errors::InvalidArgument( + ""ToTFRecordOp currently only supports DT_STRING dataypes, but got "", + DataTypeString(dt)); + } std::vector components; - components.reserve(finalized_dataset->output_dtypes().size()); + components.reserve(num_output_dtypes); bool end_of_sequence; do { TF_RETURN_IF_ERROR( ",1,train 0f931751fb20f565c4e94aa6df58d54a003cdb30,tensorflow/tensorflow,"Validate dimensions of input tensor in `FractionalAvgPoolGrad` PiperOrigin-RevId: 388286227 Change-Id: Ieb7566155e92acc8993a2212c76deacadc0edc8a",fractional_avg_pool_op.cc,"@@ -271,6 +271,18 @@ class FractionalAvgPoolGradOp : public OpKernel { const int64_t in_rows = orig_input_tensor_shape_flat(1); const int64_t in_cols = orig_input_tensor_shape_flat(2); const int64_t in_depth = orig_input_tensor_shape_flat(3); + OP_REQUIRES( + context, in_batch != 0, + errors::InvalidArgument(""Batch dimension of input must not be 0"")); + OP_REQUIRES( + context, in_rows != 0, + errors::InvalidArgument(""Rows dimension of input must not be 0"")); + OP_REQUIRES( + context, in_cols != 0, + errors::InvalidArgument(""Columns dimension of input must not be 0"")); + OP_REQUIRES( + context, in_depth != 0, + errors::InvalidArgument(""Depth dimension of input must not be 0"")); constexpr int tensor_in_and_out_dims = 4; // Transform orig_input_tensor_shape into TensorShape ",1,test 5ecec9c6fbdbc6be03295685190a45e7eee726ab,tensorflow/tensorflow,"Prevent use after free. A very old version of the code used `result` as a simple pointer to a resource. Two years later, the pointer got changed to a `unique_ptr` but author forgot to remove the call to `Unref`. Three years after that, we finally uncover the UAF. PiperOrigin-RevId: 387924872 Change-Id: I70fb6f199164de49fac20c168132a07b84903f9b",resource_ops.cc,"@@ -53,6 +53,7 @@ class BoostedTreesCreateEnsembleOp : public OpKernel { if (!result->InitFromSerialized( tree_ensemble_serialized_t->scalar()(), stamp_token)) { result->Unref(); + result.release(); // Needed due to the `->Unref` above, to prevent UAF OP_REQUIRES( context, false, errors::InvalidArgument(""Unable to parse tree ensemble proto."")); ",1,train ac117ee8a8ea57b73d34665cdf00ef3303bc0b11,tensorflow/tensorflow,"Prevent division by 0 in `resource_variable_ops.cc` PiperOrigin-RevId: 387939939 Change-Id: Ib04902d63756633999959a70613f2eaa30c2c151",resource_variable_ops.cc,"@@ -710,7 +710,8 @@ class ResourceGatherOp : public OpKernel { copy_functor(c->eigen_device(), tmp_indices.flat(), indices.flat()); - AddBatchOffsets(&tmp_indices, params); + AddBatchOffsets(c, &tmp_indices, params); + if (!c->status().ok()) return; op_indices = &tmp_indices; } @@ -742,11 +743,17 @@ class ResourceGatherOp : public OpKernel { // Example: batch_dims = 1, indices = [[0, 1, 2], [0, 1, 2]] // If indexing into a params dimension of size 4, then the indices will become // [0, 1, 2, 4, 5, 6] - void AddBatchOffsets(Tensor* indices, const Tensor& params) { + void AddBatchOffsets(OpKernelContext* ctx, Tensor* indices, + const Tensor& params) { int64_t batch_size = 1; // The size of all batch dimensions. for (int idx = 0; idx < batch_dims_; ++idx) { batch_size *= params.dim_size(idx); } + OP_REQUIRES( + ctx, batch_size != 0, + errors::InvalidArgument( + ""Inner size of indices would result in batch_size of 0 and a "", + ""division by 0 in the implementation. This is illegal"")); auto indices_flat = indices->flat(); int64_t const index_inner_size = indices->NumElements() / batch_size; ",1,train bc9c546ce7015c57c2f15c168b3d9201de679a1d,tensorflow/tensorflow,"Prevent heap oob access in `resource_variable_ops.cc` PiperOrigin-RevId: 387936433 Change-Id: I9e71ddaa8dbd51ec6afbf163a6b3b591f193b4f6",resource_variable_ops.cc,"@@ -660,6 +660,11 @@ class ResourceGatherOp : public OpKernel { OP_REQUIRES( c, TensorShapeUtils::IsVectorOrHigher(params.shape()), errors::InvalidArgument(""params must be at least 1 dimensional"")); + OP_REQUIRES( + c, params.shape().dims() >= batch_dims_, + errors::InvalidArgument(""params must have at least "", batch_dims_, + "" (batch_dims) dimensions but it has shape "", + params.shape().DebugString())); // Check that we have enough index space const int64_t N = indices.NumElements(); ",1,train 01cff3f986259d661103412a20745928c727326f,tensorflow/tensorflow,"Fix heap OOB due to dimension mismatch in `ResourceScatterUpdate` PiperOrigin-RevId: 388292801 Change-Id: Id9bd7244d98d41b1517d4771850b32782c0cc949",resource_variable_ops.cc,"@@ -955,11 +955,12 @@ class ResourceScatterUpdateOp : public OpKernel { params->dim_size(0), "")"")); } else { int64_t num_updates = updates.NumElements(); - OP_REQUIRES(c, num_updates % N == 0, - errors::InvalidArgument( - ""shape of indices ("", indices.shape().DebugString(), - "") is not compatible with the shape of updates ("", - updates.shape().DebugString(), "")"")); + OP_REQUIRES( + c, TensorShapeUtils::StartsWith(updates.shape(), indices.shape()), + errors::InvalidArgument( + ""The shape of indices ("", indices.shape().DebugString(), + "") must be a prefix of the shape of updates ("", + updates.shape().DebugString(), "")"")); auto updates_flat = updates.shaped({N, num_updates / N}); functor::ScatterFunctor functor; ",1,test 1071f554dbd09f7e101324d366eec5f4fe5a3ece,tensorflow/tensorflow,"Add missing validation to `RaggedTensorToSparse`. There needs to be a check that the splits allow for valid ragged tensors. PiperOrigin-RevId: 387712169 Change-Id: I2499175324b82b65d159a260c7f83b98ceb5cc7d",ragged_tensor_to_sparse_kernel.cc,"@@ -21,6 +21,7 @@ limitations under the License. #include ""tensorflow/core/framework/register_types.h"" #include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/tensor_shape.h"" +#include ""tensorflow/core/platform/errors.h"" namespace tensorflow { @@ -38,7 +39,8 @@ class RaggedTensorToSparseOp : public OpKernel { OP_REQUIRES_OK( context, context->input_list(""rt_nested_splits"", &rt_nested_splits_in)); const int rt_nested_splits_len = rt_nested_splits_in.size(); - DCHECK_GT(rt_nested_splits_len, 0); // Enforced by REGISTER_OP. + OP_REQUIRES(context, rt_nested_splits_len > 0, + errors::InvalidArgument(""rt_nested_splits must be non empty"")); std::vector rt_nested_splits; rt_nested_splits.reserve(rt_nested_splits_len); for (int i = 0; i < rt_nested_splits_len; ++i) { @@ -162,6 +164,14 @@ class RaggedTensorToSparseOp : public OpKernel { if (rt_nested_splits[i](0) != 0) { return InvalidArgument(""First value of ragged splits must be 0.""); } + for (int j = 1; j < rt_nested_splits[i].size(); ++j) { + if (rt_nested_splits[i](j) < rt_nested_splits[i](j - 1)) { + return InvalidArgument( + ""Ragged splits should be non decreasing, but we got "", + rt_nested_splits[i](j - 1), "" followed by "", + rt_nested_splits[i](j)); + } + } if (i > 0) { SPLITS_TYPE last_split = rt_nested_splits[i - 1](rt_nested_splits[i - 1].size() - 1); ",1,test f2a673bd34f0d64b8e40a551ac78989d16daad09,tensorflow/tensorflow,"Add missing validation to `matrix_diag_op.cc` PiperOrigin-RevId: 387923533 Change-Id: Idfffeb328d5f9c6748d992d28a56d6e9e45103a0",matrix_diag_op.cc,"@@ -73,6 +73,9 @@ class MatrixDiagPartOp : public OpKernel { errors::InvalidArgument( ""diag_index must be a scalar or vector, received shape: "", diag_index.shape().DebugString())); + OP_REQUIRES(context, diag_index.NumElements() > 0, + errors::InvalidArgument( + ""Expected diag_index to have at least 1 element"")); lower_diag_index = diag_index.flat()(0); upper_diag_index = lower_diag_index; if (TensorShapeUtils::IsVector(diag_index.shape())) { @@ -179,6 +182,9 @@ class MatrixDiagOp : public OpKernel { errors::InvalidArgument( ""diag_index must be a scalar or vector, received shape: "", diag_index.shape().DebugString())); + OP_REQUIRES(context, diag_index.NumElements() > 0, + errors::InvalidArgument( + ""Expected diag_index to have at least 1 element"")); lower_diag_index = diag_index.flat()(0); upper_diag_index = lower_diag_index; if (TensorShapeUtils::IsVector(diag_index.shape())) { ",1,test ff8894044dfae5568ecbf2ed514c1a37dc394f1b,tensorflow/tensorflow,"Add one missing valdiation to `matrix_set_diag_op.cc` PiperOrigin-RevId: 387923408 Change-Id: If6a97b9098c13879400f56c22f91555cdf0ce5d7",matrix_set_diag_op.cc,"@@ -70,6 +70,9 @@ class MatrixSetDiagOp : public OpKernel { errors::InvalidArgument( ""diag_index must be a scalar or vector, received shape: "", diag_index.shape().DebugString())); + OP_REQUIRES( + context, diag_index.NumElements() > 0, + errors::InvalidArgument(""diag_index must have at least one element"")); lower_diag_index = diag_index.flat()(0); upper_diag_index = lower_diag_index; if (TensorShapeUtils::IsVector(diag_index.shape())) { ",1,train 93f428fd1768df147171ed674fee1fc5ab8309ec,tensorflow/tensorflow,"Fix nullptr deref and heap OOB access in binary cwise ops. PiperOrigin-RevId: 387936777 Change-Id: I608b8074cec36a982cca622b7144cb2c43e6e19f",cwise_ops_common.h,"@@ -265,6 +265,11 @@ class SimpleBinaryOp : public OpKernel { void Compute(OpKernelContext* ctx) override { const Tensor& in0 = ctx->input(0); const Tensor& in1 = ctx->input(1); + OP_REQUIRES( + ctx, in0.NumElements() == in1.NumElements(), + errors::InvalidArgument(""The two arguments to a cwise op must have "" + ""same number of elements, got "", + in0.NumElements(), "" and "", in1.NumElements())); auto in0_flat = in0.flat(); auto in1_flat = in1.flat(); const Device& eigen_device = ctx->eigen_device(); ",1,train e86605c0a336c088b638da02135ea6f9f6753618,tensorflow/tensorflow,"Fix FPE in inpace update ops. PiperOrigin-RevId: 388303197 Change-Id: Ib48309b6213ffe53eba81004b00e889d653e4b83",inplace_ops.cc,"@@ -225,7 +225,7 @@ class InplaceOpBase : public OpKernel { Tensor y = x; // This creates an alias intentionally. // Skip processing if tensors are empty. - if (x.NumElements() > 0 || v.NumElements() > 0) { + if (x.NumElements() > 0 && v.NumElements() > 0) { OP_REQUIRES_OK(ctx, DoCompute(ctx, i, v, &y)); } ctx->set_output(0, y); ",1,train 8a84f7a2b5a2b27ecf88d25bad9ac777cd2f7992,tensorflow/tensorflow,"Ensure num_streams >= 0 in tf.raw_ops.BoostedTreesCreateQuantileStreamResource PiperOrigin-RevId: 387452765 Change-Id: I9990c760e177fabca6a3b9b4612ceeaeeba51495",quantile_ops.cc,"@@ -116,6 +116,9 @@ class BoostedTreesCreateQuantileStreamResourceOp : public OpKernel { const Tensor* num_streams_t; OP_REQUIRES_OK(context, context->input(kNumStreamsName, &num_streams_t)); int64_t num_streams = num_streams_t->scalar()(); + OP_REQUIRES(context, num_streams >= 0, + errors::InvalidArgument( + ""Num_streams input cannot be a negative integer"")); auto result = new QuantileStreamResource(epsilon, max_elements_, num_streams); ",1,train 429f009d2b2c09028647dd4bb7b3f6f414bbaad7,tensorflow/tensorflow,"Add remaining missing validation to `BoostedTreesCalculateBestFeatureSplit` PiperOrigin-RevId: 387423006 Change-Id: I8eaf30efb223011519e60707bfa751b275d3a443",stats_ops.cc,"@@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include +#include #include #include ""third_party/eigen3/Eigen/Core"" @@ -22,6 +23,7 @@ limitations under the License. #include ""tensorflow/core/framework/tensor_shape.h"" #include ""tensorflow/core/kernels/boosted_trees/boosted_trees.pb.h"" #include ""tensorflow/core/kernels/boosted_trees/tree_helper.h"" +#include ""tensorflow/core/platform/errors.h"" #include ""tensorflow/core/platform/logging.h"" namespace tensorflow { @@ -254,12 +256,18 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel { // node_id_range const Tensor* node_id_range_t; OP_REQUIRES_OK(context, context->input(""node_id_range"", &node_id_range_t)); + OP_REQUIRES( + context, node_id_range_t->NumElements() == 2, + errors::InvalidArgument(""node_id_range argument must have shape [2]"")); const auto node_id_range = node_id_range_t->vec(); const int32_t node_id_first = node_id_range(0); // inclusive const int32_t node_id_last = node_id_range(1); // exclusive const Tensor* stats_summary_t; OP_REQUIRES_OK(context, context->input(""stats_summary"", &stats_summary_t)); + OP_REQUIRES( + context, stats_summary_t->shape().dims() == 4, + errors::InvalidArgument(""stats_summary argument must have rank 4"")); TTypes::ConstTensor stats_summary = stats_summary_t->tensor(); const int32_t feature_dims = stats_summary_t->dim_size(1); @@ -272,6 +280,8 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel { const Tensor* l1_t; OP_REQUIRES_OK(context, context->input(""l1"", &l1_t)); + OP_REQUIRES(context, l1_t->NumElements() == 1, + errors::InvalidArgument(""l1 argument must be a scalar"")); const auto l1 = l1_t->scalar()(); DCHECK_GE(l1, 0); if (logits_dim_ > 1) { @@ -281,17 +291,25 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel { const Tensor* l2_t; OP_REQUIRES_OK(context, context->input(""l2"", &l2_t)); + OP_REQUIRES(context, l2_t->NumElements() == 1, + errors::InvalidArgument(""l2 argument must be a scalar"")); const auto l2 = l2_t->scalar()(); DCHECK_GE(l2, 0); const Tensor* tree_complexity_t; OP_REQUIRES_OK(context, context->input(""tree_complexity"", &tree_complexity_t)); + OP_REQUIRES( + context, tree_complexity_t->NumElements() == 1, + errors::InvalidArgument(""tree_complexity argument must be a scalar"")); const auto tree_complexity = tree_complexity_t->scalar()(); const Tensor* min_node_weight_t; OP_REQUIRES_OK(context, context->input(""min_node_weight"", &min_node_weight_t)); + OP_REQUIRES( + context, min_node_weight_t->NumElements() == 1, + errors::InvalidArgument(""min_node_weight argument must be a scalar"")); const auto min_node_weight = min_node_weight_t->scalar()(); std::vector output_node_ids; @@ -300,7 +318,7 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel { std::vector output_thresholds; std::vector output_left_node_contribs; std::vector output_right_node_contribs; - std::vector output_split_types; + std::vector output_split_types; // TODO(tanzheny) parallelize the computation. // Iterate each node and find the best gain per node. ",1,train 9c87c32c710d0b5b53dc6fd3bfde4046e1f7a5ad,tensorflow/tensorflow,"Disallow empty node_id_range in tf.raw_ops.BoostedTreesCalculateBestFeatureSplitV2 and tf.raw_ops.BoostedTreesCalculateBestGainsPerFeature PiperOrigin-RevId: 387165936 Change-Id: I2f70341af96236b2776c2a592c917d549c1fc1e2",stats_ops.cc,"@@ -51,6 +51,16 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel { // node_id_range const Tensor* node_id_range_t; OP_REQUIRES_OK(context, context->input(""node_id_range"", &node_id_range_t)); + OP_REQUIRES( + context, node_id_range_t->dims() == 1, + errors::InvalidArgument(""node_id_range must be a rank 1 tensor, but "" + ""given node_id_range has dims of "", + node_id_range_t->dims())); + OP_REQUIRES(context, node_id_range_t->dim_size(0) == 2, + errors::InvalidArgument( + ""node_id_range must be a rank 1 tensor with shape=[2], but "" + ""given node_id_range has shape "", + node_id_range_t->dim_size(0), "" on its first dim"")); const auto node_id_range = node_id_range_t->vec(); const int32_t node_id_first = node_id_range(0); // inclusive const int32_t node_id_last = node_id_range(1); // exclusive @@ -570,6 +580,16 @@ class BoostedTreesCalculateBestFeatureSplitV2 : public OpKernel { const Tensor* node_id_range_t; OP_REQUIRES_OK(context, context->input(""node_id_range"", &node_id_range_t)); const auto node_id_range = node_id_range_t->vec(); + OP_REQUIRES( + context, node_id_range_t->dims() == 1, + errors::InvalidArgument(""node_id_range must be a rank 1 tensor, but "" + ""given node_id_range has dims of "", + node_id_range_t->dims())); + OP_REQUIRES(context, node_id_range_t->dim_size(0) == 2, + errors::InvalidArgument( + ""node_id_range must be a rank 1 tensor with shape=[2], but "" + ""given node_id_range has shape "", + node_id_range_t->dim_size(0), "" on its first dim"")); const int32_t node_id_first = node_id_range(0); // Inclusive. const int32_t node_id_last = node_id_range(1); // Exclusive. ",1,train 6da6620efad397c85493b8f8667b821403516708,tensorflow/tensorflow,"Secure tf.raw_ops.QuantizeV2 Validate size and shape of min_range and max_range Ensure axis is within input dims limits PiperOrigin-RevId: 387232799 Change-Id: I36975281f7b5758e9e31a8dcc73fe610ef456318",quantize_op.cc,"@@ -113,7 +113,50 @@ class QuantizeV2Op : public OpKernel { int num_slices = 1; if (axis_ > -1) { + OP_REQUIRES( + ctx, input.dims() > axis_, + errors::InvalidArgument( + ""Axis is on a zero-based index, so its value must always be less "" + ""than number of input's dims, but given axis value was "", + axis_, "" and input's dims was "", input.dims())); num_slices = input.dim_size(axis_); + OP_REQUIRES(ctx, input_min_range.dims() == 1, + errors::InvalidArgument( + ""If axis is specified, min_range must be a 1-D tensor "" + ""whose size matches the axis dimension of the input and "" + ""output tensors, but min_range dims are "", + input_min_range.dims())); + OP_REQUIRES(ctx, input_min_range.dim_size(0) == num_slices, + errors::InvalidArgument( + ""If axis is specified, min_range must be a 1-D tensor "" + ""whose size matches the axis dimension of the input and "" + ""output tensors, but min_range is a 1-D tensor of size "", + input_min_range.dim_size(0), + "" and input's axis dimension is of size "", num_slices)); + OP_REQUIRES(ctx, input_max_range.dims() == 1, + errors::InvalidArgument( + ""If axis is specified, max_range must be a 1-D tensor "" + ""whose size matches the axis dimension of the input and "" + ""output tensors, but max_range dims are "", + input_max_range.dims())); + OP_REQUIRES(ctx, input_max_range.dim_size(0) == num_slices, + errors::InvalidArgument( + ""If axis is specified, max_range must be a 1-D tensor "" + ""whose size matches the axis dimension of the input and "" + ""output tensors, but max_range is a 1-D tensor of size "", + input_max_range.dim_size(0), + "" and input's axis dimension is of size "", num_slices)); + } else { + OP_REQUIRES(ctx, input_min_range.NumElements() == 1, + errors::InvalidArgument( + ""If axis is not specified, min_range must contain a "" + ""single float element, but it contains "", + input_min_range.NumElements(), "" elements"")); + OP_REQUIRES(ctx, input_max_range.NumElements() == 1, + errors::InvalidArgument( + ""If axis is not specified, max_range must contain a "" + ""single float element, but it contains "", + input_max_range.NumElements(), "" elements"")); } const TensorShape& minmax_shape = ctx->input(1).shape(); ",1,test e84c975313e8e8e38bb2ea118196369c45c51378,tensorflow/tensorflow,"In tf.raw_ops.BoostedTreesSparseCalculateBestFeatureSplit, limit stat_dim in stats_summary_indices to under stats_dims in stats_summary_shape PiperOrigin-RevId: 387171191 Change-Id: I83ca8a75b22aa78c037e8b98779da6cced16bfaa",stats_ops.cc,"@@ -1050,6 +1050,13 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel { const int32_t feature_dim = stats_summary_indices(idx, 1); const int32_t bucket_id = stats_summary_indices(idx, 2); const int32_t stat_dim = stats_summary_indices(idx, 3); + OP_REQUIRES(context, stat_dim < stats_dims, + errors::InvalidArgument( + ""Stat dim, the sum of logits dim and hessian dim in "" + ""stats_summary_indices, cannot be greater than stats "" + ""dims, the last value in stats_summary_shape, which was "", + stats_dims, "". At index ("", idx, + "", 4), stats_summary_indices contains value "", stat_dim)); std::pair const& f_insert_result = f_map.insert( FeatureMapIterator::value_type(feature_dim, BucketMap())); auto& b_map = f_insert_result.first->second; ",1,test 203214568f5bc237603dbab6e1fd389f1572f5c9,tensorflow/tensorflow,"Reorganize and add more validation to MKL requantization PiperOrigin-RevId: 387901341 Change-Id: I2515b9034c64e113db0bcec8337d30643ab0a0f1",mkl_requantize_per_channel_op.cc,"@@ -49,35 +49,45 @@ class MklRequantizePerChannelOp : public OpKernel { void Compute(OpKernelContext* ctx) override { try { const Tensor& input = ctx->input(kInputTensorIndex); + OP_REQUIRES( + ctx, input.dims() == 4, + errors::InvalidArgument(""Current RequantizePerChannel operator"" + ""supports 4D tensors only."")); + const Tensor& input_min_vec = ctx->input(kInputMinVecIndex); + size_t depth = input_min_vec.NumElements(); float* input_min_vec_data = (float*)const_cast( static_cast(input_min_vec.flat().data())); + const Tensor& input_max_vec = ctx->input(kInputMaxVecIndex); + OP_REQUIRES( + ctx, input_max_vec.NumElements() == depth, + errors::InvalidArgument(""input_max has incorrect size, expected "", + depth, "" was "", input_max_vec.NumElements())); float* input_max_vec_data = (float*)const_cast( static_cast(input_max_vec.flat().data())); const Tensor& input_requested_min = ctx->input(this->kRequestMinIndex); + OP_REQUIRES( + ctx, input_requested_min.NumElements() == 1, + errors::InvalidArgument(""requested_output_min must be a scalar"")); const float input_requested_min_float = input_requested_min.flat()(0); + const Tensor& input_requested_max = ctx->input(this->kRequestMaxIndex); + OP_REQUIRES( + ctx, input_requested_min.NumElements() == 1, + errors::InvalidArgument(""requested_output_max must be a scalar"")); const float input_requested_max_float = input_requested_max.flat()(0); - size_t depth = input_min_vec.NumElements(); - OP_REQUIRES( - ctx, input.dims() == 4, - errors::InvalidArgument(""Current RequantizePerChannel operator"" - ""supports 4D tensors only."")); - OP_REQUIRES( - ctx, input_min_vec.dim_size(0) == depth, - errors::InvalidArgument(""input_min has incorrect size, expected "", - depth, "" was "", input_min_vec.dim_size(0))); - OP_REQUIRES( - ctx, input_max_vec.dim_size(0) == depth, - errors::InvalidArgument(""input_max has incorrect size, expected "", - depth, "" was "", input_max_vec.dim_size(0))); - - if (out_type_ == DT_QINT8) DCHECK(input_requested_min_float < 0.0f); + if (out_type_ == DT_QINT8) { + OP_REQUIRES(ctx, input_requested_min_float < 0.0f, + errors::InvalidArgument( + ""If out_type is QINT8, requested_output_max must be "" + ""non negative, got "", + input_requested_min_float)); + } const float factor = (out_type_ == DT_QINT8) ? 127.0f : 255.0f; const float requested_min_max = ",1,train 9e62869465573cb2d9b5053f1fa02a81fce21d69,tensorflow/tensorflow,"Add more validation to `RequantizationRangePerChannel`. PiperOrigin-RevId: 387693946 Change-Id: Ife8dcbdb021bec4787eef6a4361dd08f17c14bd6",mkl_requantization_range_per_channel_op.cc,"@@ -57,6 +57,20 @@ class MklRequantizationRangePerChannelOp : public OpKernel { ctx, input_max.dim_size(0) == depth, errors::InvalidArgument(""input_max has incorrect size, expected "", depth, "" was "", input_max.dim_size(0))); + OP_REQUIRES( + ctx, input_min.NumElements() == depth, + errors::InvalidArgument(""input_min must have the same number of "" + ""elements as input_max, got "", + input_min.NumElements(), "" and "", depth)); + OP_REQUIRES(ctx, input.NumElements() > 0, + errors::InvalidArgument(""input must not be empty"")); + OP_REQUIRES(ctx, input.dims() == 4, + errors::InvalidArgument(""input must be in NHWC format"")); + OP_REQUIRES( + ctx, input.dim_size(3) == depth, + errors::InvalidArgument( + ""input must have same number of channels as length of input_min: "", + input.dim_size(3), "" vs "", depth)); const float* input_min_data = input_min.flat().data(); const float* input_max_data = input_max.flat().data(); ",1,test be7a4de6adfbd303ce08be4332554dff70362612,tensorflow/tensorflow,"Ensure non-empty rt_nested_splits in tf.raw_ops.RaggedTensorToVariant PiperOrigin-RevId: 387664237 Change-Id: Ia1700c34b5610873d63561abc86e23b46ead93b3",ragged_tensor_to_variant_op.cc,"@@ -157,6 +157,12 @@ class RaggedTensorToVariantOp : public OpKernel { return; } + // Checked here instead of at input in case batched_input_ is false + OP_REQUIRES(context, ragged_nested_splits_len > 0, + errors::InvalidArgument( + ""rt_nested_splits must be a list of one or more, but "" + ""received rt_nested_splits of length 0."")); + // Unbatch the Ragged Tensor and encode the components. std::vector unbatched_ragged_input; auto batched_splits_top_vec = ",1,train 2e0ee46f1a47675152d3d865797a18358881d7a6,tensorflow/tensorflow,"Ensure non-empty input_splits in tf.raw_ops.UnicodeEncode PiperOrigin-RevId: 387170080 Change-Id: I3b489acc51c5cb4124c535b9df7cc6e62ef21766",unicode_ops.cc,"@@ -533,6 +533,10 @@ class UnicodeEncodeOp : public OpKernel { const Tensor& input_splits = context->input(1); const auto input_splits_flat = input_splits.flat(); + OP_REQUIRES( + context, input_splits.NumElements() > 0, + errors::InvalidArgument(""Input_splits should contain elements, but "" + ""given input_values has 0 elements"")); // Operation will treat first argument in input_splits as if it were zero // regardless of its actual value since splits should begin with zero and // end with the length of the input values vector. ",1,test a776040a5e7ebf76eeb7eb923bf1ae417dd4d233,tensorflow/tensorflow,"Disallow dims input of 0 in tf.raw_ops.UnravelIndex PiperOrigin-RevId: 384284198 Change-Id: Ia1804ef1aec57b4d857ea507e6891bcccde18e9b",unravel_index_op.cc,"@@ -53,6 +53,14 @@ class UnravelIndexOp : public OpKernel { dims_tensor.shape().DebugString(), ""\"""")); auto dims = dims_tensor.vec(); + // Make sure dims does not contain a zero + for (int i = 0; i < dims.size(); i++) { + OP_REQUIRES( + ctx, dims(i) != 0, + errors::InvalidArgument(""Input dims cannot contain a dim of zero, "" + ""but dims contains zero at index "", + i)); + } // Chek to make sure indices is not out of boundary Eigen::Tensor dims_prod_eigen = dims.prod(); ",1,train a776040a5e7ebf76eeb7eb923bf1ae417dd4d233,tensorflow/tensorflow,"Disallow dims input of 0 in tf.raw_ops.UnravelIndex PiperOrigin-RevId: 384284198 Change-Id: Ia1804ef1aec57b4d857ea507e6891bcccde18e9b",array_ops_test.py,"@@ -1575,7 +1575,7 @@ class UnravelIndexTest(test_util.TensorFlowTestCase): with self.cached_session(): for dtype in [dtypes.int32, dtypes.int64]: with self.assertRaisesRegex(errors.InvalidArgumentError, - ""index is out of bound as with dims""): + ""dims cannot contain a dim of zero""): indices = constant_op.constant([2, 5, 7], dtype=dtype) dims = constant_op.constant([3, 0], dtype=dtype) self.evaluate(array_ops.unravel_index(indices=indices, dims=dims)) ",1,train 3a7362750d5c372420aa8f0caf7bf5b5c3d0f52d,tensorflow/tensorflow,"Prevent crash/heap OOB due to integer conversion to unsigned in NMS kernels PiperOrigin-RevId: 387938262 Change-Id: Id361a715307e7179977cf5c64391c199a966f2ad",non_max_suppression_op.cc,"@@ -169,6 +169,8 @@ void DoNonMaxSuppressionOp(OpKernelContext* context, const Tensor& scores, bool pad_to_max_output_size = false, int* ptr_num_valid_outputs = nullptr) { const int output_size = max_output_size.scalar()(); + OP_REQUIRES(context, output_size >= 0, + errors::InvalidArgument(""output size must be non-negative"")); std::vector scores_data(num_boxes); std::copy_n(scores.flat().data(), num_boxes, scores_data.begin()); @@ -768,6 +770,9 @@ class NonMaxSuppressionV4Op : public OpKernel { context, scores, num_boxes, max_output_size, iou_threshold_val, score_threshold_val, dummy_soft_nms_sigma, similarity_fn, return_scores_tensor_, pad_to_max_output_size_, &num_valid_outputs); + if (!context->status().ok()) { + return; + } // Allocate scalar output tensor for number of indices computed. Tensor* num_outputs_t = nullptr; @@ -845,6 +850,9 @@ class NonMaxSuppressionV5Op : public OpKernel { context, scores, num_boxes, max_output_size, iou_threshold_val, score_threshold_val, soft_nms_sigma_val, similarity_fn, return_scores_tensor_, pad_to_max_output_size_, &num_valid_outputs); + if (!context->status().ok()) { + return; + } // Allocate scalar output tensor for number of indices computed. Tensor* num_outputs_t = nullptr; ",1,train b5cdbf12ffcaaffecf98f22a6be5a64bb96e4f58,tensorflow/tensorflow,"Prevent overflow due to integer conversion to unsigned. PiperOrigin-RevId: 387738045 Change-Id: Id7e95bc07e02df1c66b72bd09f389608c87bdebe",non_max_suppression_op.cc,"@@ -930,6 +930,8 @@ class CombinedNonMaxSuppressionOp : public OpKernel { errors::InvalidArgument(""max_size_per_class must be 0-D, got shape "", max_output_size.shape().DebugString())); const int max_size_per_class = max_output_size.scalar()(); + OP_REQUIRES(context, max_size_per_class > 0, + errors::InvalidArgument(""max_size_per_class must be positive"")); // max_total_size: scalar const Tensor& max_total_size = context->input(3); OP_REQUIRES( ",1,train 42459e4273c2e47a3232cc16c4f4fff3b3a35c38,tensorflow/tensorflow,"Prevent CHECK-fail/heap OOB in UpperBound and LowerBound PiperOrigin-RevId: 387738073 Change-Id: Iee74de95ddad18440d052a75a5a1cb67544f490a",searchsorted_op.cc,"@@ -86,6 +86,10 @@ class UpperBoundOp : public OpKernel { const Tensor& sorted_inputs_t = ctx->input(0); const Tensor& values_t = ctx->input(1); + // inputs must be at least a matrix + OP_REQUIRES( + ctx, sorted_inputs_t.shape().dims() >= 2, + errors::InvalidArgument(""sorted input argument must be a matrix"")); // must have same batch dim_size for both OP_REQUIRES(ctx, sorted_inputs_t.dim_size(0) == values_t.dim_size(0), Status(error::INVALID_ARGUMENT, @@ -127,6 +131,10 @@ class LowerBoundOp : public OpKernel { const Tensor& sorted_inputs_t = ctx->input(0); const Tensor& values_t = ctx->input(1); + // inputs must be at least a matrix + OP_REQUIRES( + ctx, sorted_inputs_t.shape().dims() >= 2, + errors::InvalidArgument(""sorted input argument must be a matrix"")); // must have same batch dim_size for both OP_REQUIRES(ctx, sorted_inputs_t.dim_size(0) == values_t.dim_size(0), Status(error::INVALID_ARGUMENT, ",1,train 532f5c5a547126c634fefd43bbad1dc6417678ac,tensorflow/tensorflow,"Prevent nullptr deref in validation of indexes in map ops. PiperOrigin-RevId: 387738023 Change-Id: I83d18d36a7b82ffd2a40b5124a4e5b4c72238f27",map_stage_op.cc,"@@ -210,9 +210,9 @@ class StagingMap : public ResourceBase { const OptionalTuple& tuple) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) { if (tuple[index].has_value()) { - return Status(errors::InvalidArgument( + return errors::InvalidArgument( ""The tensor for index '"", index, ""' for key '"", key.scalar()(), - ""' was already initialized '"", dtypes_.size(), ""'."")); + ""' was already initialized '"", dtypes_.size(), ""'.""); } return Status::OK(); @@ -220,6 +220,10 @@ class StagingMap : public ResourceBase { // Check that the indices are strictly ordered Status check_index_ordering(const Tensor& indices) { + if (indices.NumElements() == 0) { + return errors::InvalidArgument(""Indices are empty""); + } + auto findices = indices.flat(); for (std::size_t i = 0; i < findices.dimension(0) - 1; ++i) { @@ -227,8 +231,7 @@ class StagingMap : public ResourceBase { continue; } - return Status( - errors::InvalidArgument(""Indices are not strictly ordered"")); + return errors::InvalidArgument(""Indices are not strictly ordered""); } return Status::OK(); @@ -238,10 +241,10 @@ class StagingMap : public ResourceBase { Status check_memory_limit(std::size_t bytes) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) { if (has_memory_limit() && bytes > memory_limit_) { - return Status(errors::ResourceExhausted( + return errors::ResourceExhausted( ""Attempted to insert tensors with combined size of '"", bytes, ""' bytes into Staging Area with a memory limit of '"", memory_limit_, - ""'."")); + ""'.""); } return Status::OK(); ",1,train a4e138660270e7599793fa438cd7b2fc2ce215a6,tensorflow/tensorflow,"Add remaining validation to `sdca_internal.cc` PiperOrigin-RevId: 387738010 Change-Id: I28eedcfd87a53aaf34deb075acea1f8c95470808",sdca_internal.cc,"@@ -380,6 +380,11 @@ Status Examples::Initialize(OpKernelContext* const context, const Tensor* example_labels_t; TF_RETURN_IF_ERROR(context->input(""example_labels"", &example_labels_t)); auto example_labels = example_labels_t->flat(); + if (example_labels.size() != num_examples) { + return errors::InvalidArgument(""Expected "", num_examples, + "" example labels but got "", + example_labels.size()); + } OpInputList dense_features_inputs; TF_RETURN_IF_ERROR( ",1,test d7de67733925de196ec8863a33445b73f9562d1d,tensorflow/tensorflow,"Prevent a CHECK-fail due to empty tensor input in `map_stage_op.cc` PiperOrigin-RevId: 387737906 Change-Id: Idc52df0c71c7ed6e2dd633b651a581932f277c8a",map_stage_op.cc,"@@ -527,6 +527,8 @@ class MapStageOp : public OpKernel { OP_REQUIRES_OK(ctx, ctx->input(""key"", &key_tensor)); OP_REQUIRES_OK(ctx, ctx->input(""indices"", &indices_tensor)); OP_REQUIRES_OK(ctx, ctx->input_list(""values"", &values_tensor)); + OP_REQUIRES(ctx, key_tensor->NumElements() > 0, + errors::InvalidArgument(""key must not be empty"")); // Create copy for insertion into Staging Area Tensor key(*key_tensor); ",1,train 136b51f10903e044308cf77117c0ed9871350475,tensorflow/tensorflow,"Add missing validation to `maxpooling_op.cc` PiperOrigin-RevId: 387932441 Change-Id: I43a0b24e6a12cc965611144ba035accd384594b9",maxpooling_op.cc,"@@ -74,6 +74,7 @@ static void SpatialMaxPoolWithArgMaxHelper( errors::Internal(""SpatialMaxPoolWithArgMaxHelper requires Targmax "" ""to be int64 when input_backprop != nullptr"")); } + if (tensor_in.NumElements() == 0 || output->NumElements() == 0) return; typedef Eigen::Map> ConstEigenMatrixMap; @@ -949,6 +950,10 @@ class MaxPoolingWithArgmaxOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& tensor_in = context->input(0); + OP_REQUIRES(context, tensor_in.dims() == 4, + errors::InvalidArgument(""tensor_in must be 4-dimensional (2)"")); + OP_REQUIRES(context, tensor_in.NumElements() > 0, + errors::InvalidArgument(""tensor_in must not be empty (2)"")); PoolParameters params{context, ksize_, ",1,train 136b51f10903e044308cf77117c0ed9871350475,tensorflow/tensorflow,"Add missing validation to `maxpooling_op.cc` PiperOrigin-RevId: 387932441 Change-Id: I43a0b24e6a12cc965611144ba035accd384594b9",pooling_ops_common.cc,"@@ -171,6 +171,8 @@ PoolParameters::PoolParameters(OpKernelContext* context, pad_depth = 0; out_depth = depth; } else { + OP_REQUIRES(context, depth_window > 0, + errors::InvalidArgument(""depth_window must not be 0"")); // Our current version of depthwise max pooling does not support // any padding, and expects the depth_window to equal the // depth_stride (no overlapping). ",1,train 8a793b5d7f59e37ac7f3cd0954a750a2fe76bad4,tensorflow/tensorflow,"Prevent division by 0 in common shape functions. PiperOrigin-RevId: 387712197 Change-Id: Id25c7460e35b68aeeeac23b9a88e455b443ee149",common_shape_fns.cc,"@@ -672,6 +672,8 @@ Status Conv2DShapeImpl(shape_inference::InferenceContext* c, if (c->ValueKnown(input_depth_dim) && c->ValueKnown(filter_input_depth_dim)) { int64_t input_depth_value = c->Value(input_depth_dim), filter_input_depth_value = c->Value(filter_input_depth_dim); + if (filter_input_depth_value == 0) + return errors::InvalidArgument(""Depth of filter must not be 0""); if (input_depth_value % filter_input_depth_value != 0) return errors::InvalidArgument( ""Depth of input ("", input_depth_value, @@ -681,6 +683,8 @@ Status Conv2DShapeImpl(shape_inference::InferenceContext* c, int64_t num_groups = input_depth_value / filter_input_depth_value; if (c->ValueKnown(output_depth_dim)) { int64_t output_depth_value = c->Value(output_depth_dim); + if (num_groups == 0) + return errors::InvalidArgument(""Number of groups must not be 0""); if (output_depth_value % num_groups != 0) return errors::InvalidArgument( ""Depth of output ("", output_depth_value, @@ -816,6 +820,8 @@ Status Conv3DShape(shape_inference::InferenceContext* c) { if (c->ValueKnown(input_depth_dim) && c->ValueKnown(filter_input_depth_dim)) { int64_t input_depth_value = c->Value(input_depth_dim), filter_input_depth_value = c->Value(filter_input_depth_dim); + if (filter_input_depth_value == 0) + return errors::InvalidArgument(""Depth of filter must not be 0""); if (input_depth_value % filter_input_depth_value != 0) return errors::InvalidArgument( ""Depth of input ("", input_depth_value, @@ -825,6 +831,8 @@ Status Conv3DShape(shape_inference::InferenceContext* c) { int64_t num_groups = input_depth_value / filter_input_depth_value; if (c->ValueKnown(output_depth_dim)) { int64_t output_depth_value = c->Value(output_depth_dim); + if (num_groups == 0) + return errors::InvalidArgument(""Number of groups must not be 0""); if (output_depth_value % num_groups != 0) return errors::InvalidArgument( ""Depth of output ("", output_depth_value, @@ -2456,6 +2464,9 @@ Status SparseReduceShapeFn(InferenceContext* c) { int64_t ndims = shape_vec.size(); absl::flat_hash_set axes; + if (ndims == 0) + return errors::InvalidArgument( + ""Number of dims in shape tensor must not be 0""); for (int i = 0; i < axes_vec.size(); i++) { axes.insert((axes_vec(i) + ndims) % ndims); } ",1,test 578e634b4f1c1c684d4b4294f9e5281b2133b3ed,tensorflow/tensorflow,"Prevent a segfault in shape inference due to bad inputs. PiperOrigin-RevId: 387737970 Change-Id: Ibd1cf3dbdce1dd2ab47fd633d5c5a57f7d8fb6e9",sparse_ops.cc,"@@ -16,6 +16,7 @@ limitations under the License. #include ""tensorflow/core/framework/common_shape_fns.h"" #include ""tensorflow/core/framework/op.h"" #include ""tensorflow/core/framework/shape_inference.h"" +#include ""tensorflow/core/platform/errors.h"" namespace tensorflow { @@ -619,6 +620,8 @@ REGISTER_OP(""SparseFillEmptyRows"") DimensionHandle unused_dim; TF_RETURN_IF_ERROR(c->Merge(c->Dim(input_indices, 1), c->Dim(input_shape, 0), &unused_dim)); + if (c->Value(c->NumElements(input_shape)) == 0) + return errors::InvalidArgument(""dense_shape must not be empty""); ShapeHandle output_indices = c->Matrix(InferenceContext::kUnknownDim, c->NumElements(input_shape)); ShapeHandle output_values = c->Vector(InferenceContext::kUnknownDim); ",1,train da857cfa0fde8f79ad0afdbc94e88b5d4bbec764,tensorflow/tensorflow,"Fix a shape inference issue leading to nullptr deref. PiperOrigin-RevId: 387712259 Change-Id: I7e670772b259c068a501a187cd89f18773bb95a1",array_ops.cc,"@@ -2990,6 +2990,10 @@ REGISTER_OP(""Dequantize"") if (!s.ok() && s.code() != error::NOT_FOUND) { return s; } + if (axis < -1) { + return errors::InvalidArgument(""axis should be at least -1, got "", + axis); + } const int minmax_rank = (axis == -1) ? 0 : 1; TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); ShapeHandle minmax; ",1,train 23d6383eb6c14084a8fc3bdf164043b974818012,tensorflow/tensorflow,"Use the safer `safe_load` function instead of `unsafe_load` when possible There is no need to open ourselves up to arbitrary code execution, especially since this is not in a performance critical loop, so we can take the slowdown due to safety. PiperOrigin-RevId: 388501098 Change-Id: I3434318a5e07a798490533b554f46752397837e5",functional.py,"@@ -53,7 +53,7 @@ class Functional(training_lib.Model): than with subclassed `Model`s, specifically: - Model cloning (`keras.models.clone`) - - Serialization (`model.get_config()/from_config`, `model.to_json()/to_yaml()` + - Serialization (`model.get_config()/from_config`, `model.to_json()` - Whole-model saving (`model.save()`) A `Functional` model can be instantiated by passing two arguments to ",1,train 23d6383eb6c14084a8fc3bdf164043b974818012,tensorflow/tensorflow,"Use the safer `safe_load` function instead of `unsafe_load` when possible There is no need to open ourselves up to arbitrary code execution, especially since this is not in a performance critical loop, so we can take the slowdown due to safety. PiperOrigin-RevId: 388501098 Change-Id: I3434318a5e07a798490533b554f46752397837e5",functional_test.py,"@@ -47,11 +47,6 @@ from tensorflow.python.ops.ragged import ragged_factory_ops from tensorflow.python.platform import test from tensorflow.python.training.tracking.util import Checkpoint -try: - import yaml # pylint:disable=g-import-not-at-top -except ImportError: - yaml = None - class NetworkConstructionTest(keras_parameterized.TestCase): @@ -627,10 +622,6 @@ class NetworkConstructionTest(keras_parameterized.TestCase): json_str = model.to_json() models.model_from_json(json_str) - if yaml is not None: - yaml_str = model.to_yaml() - models.model_from_yaml(yaml_str) - @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_invalid_graphs(self): a = layers.Input(shape=(32,), name='input_a') @@ -1361,10 +1352,6 @@ class NetworkConstructionTest(keras_parameterized.TestCase): json_str = model.to_json() models.model_from_json(json_str) - if yaml is not None: - yaml_str = model.to_yaml() - models.model_from_yaml(yaml_str) - def test_subclassed_error_if_init_not_called(self): class MyNetwork(training_lib.Model): ",1,train 23d6383eb6c14084a8fc3bdf164043b974818012,tensorflow/tensorflow,"Use the safer `safe_load` function instead of `unsafe_load` when possible There is no need to open ourselves up to arbitrary code execution, especially since this is not in a performance critical loop, so we can take the slowdown due to safety. PiperOrigin-RevId: 388501098 Change-Id: I3434318a5e07a798490533b554f46752397837e5",training.py,"@@ -87,11 +87,6 @@ try: import h5py except ImportError: h5py = None - -try: - import yaml -except ImportError: - yaml = None # pylint: enable=g-import-not-at-top @@ -2416,6 +2411,9 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): def to_yaml(self, **kwargs): """"""Returns a yaml string containing the network configuration. + Note: Since TF 2.6, this method is no longer supported and will raise a + RuntimeError. + To load a network from a yaml save file, use `keras.models.model_from_yaml(yaml_string, custom_objects={})`. @@ -2431,12 +2429,12 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): A YAML string. Raises: - ImportError: if yaml module is not found. + RuntimeError: announces that the method poses a security risk """""" - if yaml is None: - raise ImportError( - 'Requires yaml module installed (`pip install pyyaml`).') - return yaml.dump(self._updated_config(), **kwargs) + raise RuntimeError( + 'Method `model.to_yaml()` has been removed due to security risk of ' + 'arbitrary code execution. Please use `model.to_json()` instead.' + ) def reset_states(self): for layer in self.layers: ",1,train 23d6383eb6c14084a8fc3bdf164043b974818012,tensorflow/tensorflow,"Use the safer `safe_load` function instead of `unsafe_load` when possible There is no need to open ourselves up to arbitrary code execution, especially since this is not in a performance critical loop, so we can take the slowdown due to safety. PiperOrigin-RevId: 388501098 Change-Id: I3434318a5e07a798490533b554f46752397837e5",model_config.py,"@@ -18,18 +18,11 @@ from tensorflow.python.keras.saving.saved_model import json_utils from tensorflow.python.util.tf_export import keras_export -# pylint: disable=g-import-not-at-top -try: - import yaml -except ImportError: - yaml = None -# pylint: enable=g-import-not-at-top - @keras_export('keras.models.model_from_config') def model_from_config(config, custom_objects=None): """"""Instantiates a Keras model from its config. - + Usage: ``` # for a Functional API model @@ -63,17 +56,8 @@ def model_from_config(config, custom_objects=None): def model_from_yaml(yaml_string, custom_objects=None): """"""Parses a yaml model configuration file and returns a model instance. - Usage: - - >>> model = tf.keras.Sequential([ - ... tf.keras.layers.Dense(5, input_shape=(3,)), - ... tf.keras.layers.Softmax()]) - >>> try: - ... import yaml - ... config = model.to_yaml() - ... loaded_model = tf.keras.models.model_from_yaml(config) - ... except ImportError: - ... pass + Note: Since TF 2.6, this method is no longer supported and will raise a + RuntimeError. Args: yaml_string: YAML string or open file encoding a model configuration. @@ -85,19 +69,13 @@ def model_from_yaml(yaml_string, custom_objects=None): A Keras model instance (uncompiled). Raises: - ImportError: if yaml module is not found. + RuntimeError: announces that the method poses a security risk """""" - if yaml is None: - raise ImportError('Requires yaml module installed (`pip install pyyaml`).') - # The method unsafe_load only exists in PyYAML 5.x+, so which branch of the - # try block is covered by tests depends on the installed version of PyYAML. - try: - # PyYAML 5.x+ - config = yaml.unsafe_load(yaml_string) - except AttributeError: - config = yaml.load(yaml_string) - from tensorflow.python.keras.layers import deserialize # pylint: disable=g-import-not-at-top - return deserialize(config, custom_objects=custom_objects) + raise RuntimeError( + 'Method `model_from_yaml()` has been removed due to security risk of ' + 'arbitrary code execution. Please use `Model.to_json()` and ' + '`model_from_json()` instead.' + ) @keras_export('keras.models.model_from_json') ",1,train 4e2565483d0ffcadc719bd44893fb7f609bb5f12,tensorflow/tensorflow,"Fix bug that could cause map_fn to produce incorrect results (rather than an error) when mapping over a ragged tensor with an inappropriate fn_output_signature. (Note: there are cases where the default value for fn_output_signature is not appropriate, so the user needs to explicitly specify the correct output signature.) PiperOrigin-RevId: 387606546 Change-Id: Ib4ea27b9634e6ab413f211cfe809a69a90f0e2cd",ragged_tensor_from_variant_op.cc,"@@ -174,7 +174,23 @@ Status NestedStackRaggedTensors( auto output_values_flat = output_ragged->mutable_values()->flat_outer_dims(); int values_index = 0; + + TensorShape expected_value_shape = component_values_shape; + expected_value_shape.RemoveDim(0); + for (int i = 0; i < ragged_components.size(); i++) { + // Check that the flat_values tensor shape is compatible. + TensorShape value_shape = ragged_components[i].values().shape(); + value_shape.RemoveDim(0); + if (value_shape != expected_value_shape) { + return errors::InvalidArgument( + ""All flat_values must have compatible shapes. Shape at index 0: "", + expected_value_shape, "". Shape at index "", i, "": "", value_shape, + "". If you are using tf.map_fn, then you may need to specify an "" + ""explicit fn_output_signature with appropriate ragged_rank, and/or "" + ""convert output tensors to RaggedTensors.""); + } + auto component_values_flat = ragged_components[i].values().flat_outer_dims(); int num_inner_elements = ragged_components[i].values().NumElements(); ",1,train 4e2565483d0ffcadc719bd44893fb7f609bb5f12,tensorflow/tensorflow,"Fix bug that could cause map_fn to produce incorrect results (rather than an error) when mapping over a ragged tensor with an inappropriate fn_output_signature. (Note: there are cases where the default value for fn_output_signature is not appropriate, so the user needs to explicitly specify the correct output signature.) PiperOrigin-RevId: 387606546 Change-Id: Ib4ea27b9634e6ab413f211cfe809a69a90f0e2cd",ragged_map_fn_op_test.py,"@@ -21,9 +21,11 @@ from absl.testing import parameterized import numpy as np from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import map_fn as map_fn_lib from tensorflow.python.ops import math_ops as mo from tensorflow.python.ops import string_ops from tensorflow.python.ops.ragged import ragged_factory_ops @@ -309,6 +311,27 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase, ) self.assertAllEqual(id_t2, [[0, 5], [0, 4]]) + def testRaggedMapWithIncorrectFnOutputSignature(self): + x = ragged_factory_ops.constant([[1, 2, 3, 4], [1]]) + with self.assertRaisesRegex(errors.InvalidArgumentError, + 'All flat_values must have compatible shapes'): + y = map_fn_lib.map_fn(lambda r: map_fn_lib.map_fn(lambda y: r, r), x) + self.evaluate(y) + + def testNestedRaggedMapWithFnOutputSignature(self): + ragged1d = ragged_tensor.RaggedTensorSpec([None], dtypes.int32) + ragged2d = ragged_tensor.RaggedTensorSpec([None, None], dtypes.int32) + + x = ragged_factory_ops.constant([[1, 2, 3, 4], [1]]) + # pylint: disable=g-long-lambda + y = map_fn_lib.map_fn( + lambda r: map_fn_lib.map_fn( + lambda y: r, r, fn_output_signature=ragged1d), + x, + fn_output_signature=ragged2d) + expected = [[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], [[1]]] + self.assertAllEqual(y, expected) + if __name__ == '__main__': googletest.main() ",1,train 718721986aa137691ee23f03638867151f74935f,tensorflow/tensorflow,"Prevent division by 0 in `fully_connected.cc` PiperOrigin-RevId: 385137282 Change-Id: If201e69b6e0048f0be001330b4b977e2b46db2cb",fully_connected.cc,"@@ -223,6 +223,7 @@ TfLiteStatus PrepareImpl(TfLiteContext* context, TfLiteNode* node) { } TF_LITE_ENSURE_EQ(context, NumDimensions(filter), 2); + TF_LITE_ENSURE(context, filter->dims->data[1] != 0); const int batch_size = input_size / filter->dims->data[1]; const int num_units = filter->dims->data[0]; ",1,train 5b048e87e4e55990dae6b547add4dae59f4e1c76,tensorflow/tensorflow,"Fix a null pointer exception in SVDF This is due to not checking that `GetVariableInput` returns non-null tensor. Also fix a potential null pointer exception in `GetVariableInput`. PiperOrigin-RevId: 385160147 Change-Id: Iadf3f0705b036a9014d27caa5a8bbd91f4c4c401",kernel_util.cc,"@@ -119,6 +119,7 @@ TfLiteStatus GetInputSafe(const TfLiteContext* context, const TfLiteNode* node, TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node, int index) { TfLiteTensor* tensor = GetMutableInput(context, node, index); + if (tensor == nullptr) return nullptr; return tensor->is_variable ? tensor : nullptr; } ",1,train 5b048e87e4e55990dae6b547add4dae59f4e1c76,tensorflow/tensorflow,"Fix a null pointer exception in SVDF This is due to not checking that `GetVariableInput` returns non-null tensor. Also fix a potential null pointer exception in `GetVariableInput`. PiperOrigin-RevId: 385160147 Change-Id: Iadf3f0705b036a9014d27caa5a8bbd91f4c4c401",svdf.cc,"@@ -299,6 +299,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { GetTemporarySafe(context, node, /*index=*/0, &scratch)); TfLiteTensor* state = GetVariableInput(context, node, kStateTensor); + TF_LITE_ENSURE(context, state != nullptr); TfLiteTensor* output; TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, kOutputTensor, &output)); ",1,train 4a91f2069f7145aab6ba2d8cfe41be8a110c18a5,tensorflow/tensorflow,"Fix a null pointer exception caused by branching on uninitialized data. This is due to not checking that the params for the quantization exists. If there is no quantization, we should not access the `.params` field. PiperOrigin-RevId: 385168337 Change-Id: I28661e4f12ba1c92cfeae23d22a3fb2df2a2c6a4",unidirectional_sequence_lstm.cc,"@@ -62,8 +62,12 @@ TfLiteStatus PopulateQuantizedLstmParams8x8_16( context, GetOutputSafe(context, node, lstm::full::kOutputTensor, &output_tensor)); + TF_LITE_ENSURE(context, + cell_state->quantization.type != kTfLiteNoQuantization); auto* cell_state_params = static_cast(cell_state->quantization.params); + TF_LITE_ENSURE(context, + output_tensor->quantization.type != kTfLiteNoQuantization); auto* proj_params = static_cast( output_tensor->quantization.params); if (cell_clip > 0.0) { @@ -160,6 +164,8 @@ TfLiteStatus PopulateQuantizedLstmParams8x8_16( TfLiteTensor* intermediate; TF_LITE_ENSURE_OK(context, GetIntermediatesSafe(context, node, i, &intermediate)); + TF_LITE_ENSURE(context, + intermediate->quantization.type != kTfLiteNoQuantization); auto* params = static_cast( intermediate->quantization.params); intermediate_scale.push_back(params->scale->data[0]); @@ -174,6 +180,7 @@ TfLiteStatus PopulateQuantizedLstmParams8x8_16( // is ignored. TfLiteTensor* hidden; TF_LITE_ENSURE_OK(context, GetIntermediatesSafe(context, node, 4, &hidden)); + TF_LITE_ENSURE(context, hidden->quantization.type != kTfLiteNoQuantization); auto* hidden_params = static_cast(hidden->quantization.params); intermediate_scale.push_back(hidden_params->scale->data[0]); @@ -760,6 +767,8 @@ TfLiteStatus PopulatePrecomputedZPTimesWeightsWithBias(TfLiteContext* context, const TfLiteTensor* intermediate = &context->tensors[node->intermediates->data[4]]; + TF_LITE_ENSURE(context, + intermediate->quantization.type != kTfLiteNoQuantization); const auto* params = static_cast(intermediate->quantization.params); const int32_t hidden_zp = params->zero_point->data[0]; ",1,train 537bc7c723439b9194a358f64d871dd326c18887,tensorflow/tensorflow,"Fix a null pointer exception caused by branching on uninitialized data. This is due to not checking that the params for the quantization exists. If there is no quantization, we should not access the `.params` field. PiperOrigin-RevId: 385163909 Change-Id: I2beb8d50649b6542db224c163033fbcbaa49314f",svdf.cc,"@@ -256,14 +256,21 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_temp_size_array)); // Calculate effective scales. + TF_LITE_ENSURE(context, input->quantization.type != kTfLiteNoQuantization); auto* input_params = reinterpret_cast(input->quantization.params); + TF_LITE_ENSURE(context, + weights_feature->quantization.type != kTfLiteNoQuantization); auto* weights_feature_params = reinterpret_cast( weights_feature->quantization.params); + TF_LITE_ENSURE(context, state->quantization.type != kTfLiteNoQuantization); auto* state_params = reinterpret_cast(state->quantization.params); + TF_LITE_ENSURE(context, + weights_time->quantization.type != kTfLiteNoQuantization); auto* weight_time_params = reinterpret_cast( weights_time->quantization.params); + TF_LITE_ENSURE(context, output->quantization.type != kTfLiteNoQuantization); auto* output_params = reinterpret_cast( output->quantization.params); const double effective_scale_1 = input_params->scale->data[0] * ",1,train 8933b8a21280696ab119b63263babdb54c298538,tensorflow/tensorflow,"Fix a null pointer exception caused by branching on uninitialized data. This is due to not checking that the params for the quantization exists. If there is no quantization, we should not access the `.params` field. PiperOrigin-RevId: 385173491 Change-Id: I8fc476c4b274fdb21ba741caa0fbc6d1b8840663",depthwise_conv.cc,"@@ -176,6 +176,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { if (data_type != kTfLiteFloat32) { TF_LITE_ENSURE_EQ(context, filter->quantization.type, kTfLiteAffineQuantization); + TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization); const auto* affine_quantization = reinterpret_cast( filter->quantization.params); @@ -195,6 +196,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { } if (is_hybrid) { + TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization); const auto* affine_quantization = reinterpret_cast( filter->quantization.params); @@ -495,6 +497,7 @@ TfLiteStatus EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node, op_params.weights_offset = 0; op_params.float_activation_min = output_activation_min; op_params.float_activation_max = output_activation_max; + TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization); const auto* affine_quantization = reinterpret_cast(filter->quantization.params); if (kernel_type == kReference) { ",1,test 1e206baedf8bef0334cca3eb92bab134ef525a28,tensorflow/tensorflow,"Prevent a division by 0 in division ops. PiperOrigin-RevId: 385223169 Change-Id: Ia4228960b5d2aa44480385f74bdd70d21a3613c3",div.cc,"@@ -216,9 +216,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, kOutputTensor, &output)); - if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) { + // TODO(b/193904910): This can written with C++ templates +#define TF_LITE_CHECK_DIV_NON_ZERO(data_type) \ + const auto* input2_data = GetTensorData(input2); \ + const size_t input2_elements = input2->bytes / sizeof(data_type); \ + for (size_t i = 0; i < input2_elements; i++) { \ + TF_LITE_ENSURE(context, input2_data[i] != 0); \ + } + + if (output->type == kTfLiteFloat32) { + // Div by zero seems ok in this case, just like in TF case infinities are + // returned. So we don't do a check at this point. + EvalDiv(context, node, params, data, input1, input2, output); + } else if (output->type == kTfLiteInt32) { + TF_LITE_CHECK_DIV_NON_ZERO(int32_t); EvalDiv(context, node, params, data, input1, input2, output); } else if (output->type == kTfLiteUInt8) { + TF_LITE_CHECK_DIV_NON_ZERO(uint8_t); TF_LITE_ENSURE_OK( context, EvalQuantized(context, node, params, data, input1, input2, output)); @@ -229,6 +243,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { output->type); return kTfLiteError; } +#undef TF_LITE_CHECK_DIV_NON_ZERO return kTfLiteOk; } ",1,test d94ffe08a65400f898241c0374e9edc6fa8ed257,tensorflow/tensorflow,"Prevent an OOB read in `expand_dims.cc` The for loop that follows this check assumes that `axis` is between `0` and `input_dims.size`. If user supplied `axis` is negative, the if code before this check is supposed to bring it back to positive (similar to how in Python one can do `l[-3]` to mean `l[-3 + len(l)]`). PiperOrigin-RevId: 387200206 Change-Id: I162f4feba12d547c3a4340833ae682016a2ebfab",expand_dims.cc,"@@ -37,6 +37,7 @@ TfLiteStatus ExpandTensorDim(TfLiteContext* context, const TfLiteTensor& input, axis = input_dims.size + 1 + axis; } TF_LITE_ENSURE(context, axis <= input_dims.size); + TF_LITE_ENSURE(context, axis >= 0); TfLiteIntArray* output_dims = TfLiteIntArrayCreate(input_dims.size + 1); for (int i = 0; i < output_dims->size; ++i) { ",1,train dfa22b348b70bb89d6d6ec0ff53973bacb4f4695,tensorflow/tensorflow,"Prevent a division by 0 in average ops. PiperOrigin-RevId: 385184660 Change-Id: I7affd4554f9b336fca29ac68f633232c094d0bd3",averagepool_quantized_test.cc,"@@ -40,12 +40,14 @@ void RunOneAveragePoolTest(const PoolParams& params, std::vector optimized_averagePool_output(buffer_size); std::vector reference_averagePool_output(buffer_size); - reference_integer_ops::AveragePool(params, input_shape, input_data, - output_shape, - reference_averagePool_output.data()); - optimized_integer_ops::AveragePool(params, input_shape, input_data, - output_shape, - optimized_averagePool_output.data()); + bool reference_success = reference_integer_ops::AveragePool( + params, input_shape, input_data, output_shape, + reference_averagePool_output.data()); + bool optimized_success = optimized_integer_ops::AveragePool( + params, input_shape, input_data, output_shape, + optimized_averagePool_output.data()); + EXPECT_TRUE(reference_success); + EXPECT_TRUE(optimized_success); for (int i = 0; i < buffer_size; i++) { EXPECT_TRUE(reference_averagePool_output[i] == ",1,train dfa22b348b70bb89d6d6ec0ff53973bacb4f4695,tensorflow/tensorflow,"Prevent a division by 0 in average ops. PiperOrigin-RevId: 385184660 Change-Id: I7affd4554f9b336fca29ac68f633232c094d0bd3",pooling.h,"@@ -144,7 +144,7 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, } } -inline void AveragePool(const PoolParams& params, +inline bool AveragePool(const PoolParams& params, const RuntimeShape& input_shape, const int8* input_data, const RuntimeShape& output_shape, int8* output_data) { ruy::profiler::ScopeLabel label(""AveragePool/8bitWith32bitAccumulator""); @@ -192,6 +192,7 @@ inline void AveragePool(const PoolParams& params, std::min(params.filter_height, input_height - in_y_origin); const int filter_count = (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start); + if (filter_count == 0) return false; memset(acc, 0, tranche_depth * sizeof(acc[0])); const int8* input_ptr = input_data + depth_base + @@ -267,6 +268,7 @@ inline void AveragePool(const PoolParams& params, } } } + return true; } } // namespace optimized_integer_ops ",1,train dfa22b348b70bb89d6d6ec0ff53973bacb4f4695,tensorflow/tensorflow,"Prevent a division by 0 in average ops. PiperOrigin-RevId: 385184660 Change-Id: I7affd4554f9b336fca29ac68f633232c094d0bd3",legacy_optimized_ops.h,"@@ -3761,7 +3761,7 @@ inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims, output_data, output_dims); } -inline void AveragePool(const float* input_data, const Dims<4>& input_dims, +inline bool AveragePool(const float* input_data, const Dims<4>& input_dims, int stride_width, int stride_height, int pad_width, int pad_height, int kwidth, int kheight, float output_activation_min, @@ -3776,35 +3776,37 @@ inline void AveragePool(const float* input_data, const Dims<4>& input_dims, params.padding_values.width = pad_width; params.float_activation_min = output_activation_min; params.float_activation_max = output_activation_max; - AveragePool(params, DimsToShape(input_dims), input_data, - DimsToShape(output_dims), output_data); + return AveragePool(params, DimsToShape(input_dims), input_data, + DimsToShape(output_dims), output_data); } // legacy, for compatibility with old checked-in code template -void AveragePool(const float* input_data, const Dims<4>& input_dims, +bool AveragePool(const float* input_data, const Dims<4>& input_dims, int stride_width, int stride_height, int pad_width, int pad_height, int kwidth, int kheight, float* output_data, const Dims<4>& output_dims) { float output_activation_min, output_activation_max; GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - AveragePool(input_data, input_dims, stride_width, stride_height, pad_width, - pad_height, kwidth, kheight, output_activation_min, - output_activation_max, output_data, output_dims); + return AveragePool(input_data, input_dims, stride_width, stride_height, + pad_width, pad_height, kwidth, kheight, + output_activation_min, output_activation_max, output_data, + output_dims); } // legacy, for compatibility with old checked-in code template -void AveragePool(const float* input_data, const Dims<4>& input_dims, int stride, +bool AveragePool(const float* input_data, const Dims<4>& input_dims, int stride, int pad_width, int pad_height, int filter_width, int filter_height, float* output_data, const Dims<4>& output_dims) { - AveragePool(input_data, input_dims, stride, stride, pad_width, pad_height, - filter_width, filter_height, output_data, output_dims); + return AveragePool(input_data, input_dims, stride, stride, pad_width, + pad_height, filter_width, filter_height, output_data, + output_dims); } -inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims, +inline bool AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride_width, int stride_height, int pad_width, int pad_height, int filter_width, int filter_height, int32 output_activation_min, @@ -3819,13 +3821,13 @@ inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims, params.padding_values.width = pad_width; params.quantized_activation_min = output_activation_min; params.quantized_activation_max = output_activation_max; - AveragePool(params, DimsToShape(input_dims), input_data, - DimsToShape(output_dims), output_data); + return AveragePool(params, DimsToShape(input_dims), input_data, + DimsToShape(output_dims), output_data); } // legacy, for compatibility with old checked-in code template -void AveragePool(const uint8* input_data, const Dims<4>& input_dims, +bool AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride_width, int stride_height, int pad_width, int pad_height, int filter_width, int filter_height, int32 output_activation_min, int32 output_activation_max, @@ -3839,21 +3841,23 @@ void AveragePool(const uint8* input_data, const Dims<4>& input_dims, TFLITE_DCHECK_EQ(output_activation_min, 0); TFLITE_DCHECK_EQ(output_activation_max, 255); } - AveragePool(input_data, input_dims, stride_width, stride_height, pad_width, - pad_height, filter_width, filter_height, output_activation_min, - output_activation_max, output_data, output_dims); + return AveragePool(input_data, input_dims, stride_width, stride_height, + pad_width, pad_height, filter_width, filter_height, + output_activation_min, output_activation_max, output_data, + output_dims); } // legacy, for compatibility with old checked-in code template -void AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride, +bool AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride, int pad_width, int pad_height, int filter_width, int filter_height, int32 output_activation_min, int32 output_activation_max, uint8* output_data, const Dims<4>& output_dims) { - AveragePool(input_data, input_dims, stride, stride, pad_width, pad_height, - filter_width, filter_height, output_activation_min, - output_activation_max, output_data, output_dims); + return AveragePool(input_data, input_dims, stride, stride, pad_width, + pad_height, filter_width, filter_height, + output_activation_min, output_activation_max, + output_data, output_dims); } inline void MaxPool(const float* input_data, const Dims<4>& input_dims, ",1,train dfa22b348b70bb89d6d6ec0ff53973bacb4f4695,tensorflow/tensorflow,"Prevent a division by 0 in average ops. PiperOrigin-RevId: 385184660 Change-Id: I7affd4554f9b336fca29ac68f633232c094d0bd3",optimized_ops.h,"@@ -3172,7 +3172,7 @@ inline int NodeOffset(int b, int h, int w, int height, int width) { return (b * height + h) * width + w; } -inline void AveragePool(const PoolParams& params, +inline bool AveragePool(const PoolParams& params, const RuntimeShape& input_shape, const float* input_data, const RuntimeShape& output_shape, float* output_data) { @@ -3187,6 +3187,9 @@ inline void AveragePool(const PoolParams& params, const int stride_height = params.stride_height; const int stride_width = params.stride_width; + if (stride_height == 0) return false; + if (stride_width == 0) return false; + // TODO(benoitjacob) make this a proper reference impl without Eigen! const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape); auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape); @@ -3232,9 +3235,11 @@ inline void AveragePool(const PoolParams& params, params.float_activation_min, params.float_activation_max); } + + return true; } -inline void AveragePool(const PoolParams& params, +inline bool AveragePool(const PoolParams& params, const RuntimeShape& input_shape, const uint8* input_data, const RuntimeShape& output_shape, uint8* output_data) { @@ -3283,6 +3288,7 @@ inline void AveragePool(const PoolParams& params, std::min(params.filter_height, input_height - in_y_origin); const int filter_count = (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start); + if (filter_count == 0) return false; memset(acc, 0, tranche_depth * sizeof(acc[0])); const uint8* input_ptr = input_data + depth_base + @@ -3369,6 +3375,7 @@ inline void AveragePool(const PoolParams& params, } } } + return true; } inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, ",1,train dfa22b348b70bb89d6d6ec0ff53973bacb4f4695,tensorflow/tensorflow,"Prevent a division by 0 in average ops. PiperOrigin-RevId: 385184660 Change-Id: I7affd4554f9b336fca29ac68f633232c094d0bd3",pooling.h,"@@ -21,7 +21,7 @@ limitations under the License. namespace tflite { namespace reference_integer_ops { -inline void AveragePool(const PoolParams& params, +inline bool AveragePool(const PoolParams& params, const RuntimeShape& input_shape, const int8_t* input_data, const RuntimeShape& output_shape, int8_t* output_data) { @@ -66,6 +66,7 @@ inline void AveragePool(const PoolParams& params, filter_count++; } } + if (filter_count == 0) return false; // Round to the closest integer value. acc = acc > 0 ? (acc + filter_count / 2) / filter_count : (acc - filter_count / 2) / filter_count; @@ -77,6 +78,7 @@ inline void AveragePool(const PoolParams& params, } } } + return true; } inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, @@ -136,7 +138,7 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, } } -inline void AveragePool(const PoolParams& params, +inline bool AveragePool(const PoolParams& params, const RuntimeShape& input_shape, const int16_t* input_data, const RuntimeShape& output_shape, @@ -182,6 +184,7 @@ inline void AveragePool(const PoolParams& params, filter_count++; } } + if (filter_count == 0) return false; // Round to the closest integer value. acc = acc > 0 ? (acc + filter_count / 2) / filter_count : (acc - filter_count / 2) / filter_count; @@ -193,6 +196,7 @@ inline void AveragePool(const PoolParams& params, } } } + return true; } inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, ",1,train dfa22b348b70bb89d6d6ec0ff53973bacb4f4695,tensorflow/tensorflow,"Prevent a division by 0 in average ops. PiperOrigin-RevId: 385184660 Change-Id: I7affd4554f9b336fca29ac68f633232c094d0bd3",legacy_reference_ops.h,"@@ -1487,7 +1487,7 @@ void Sub(const T* input1_data, const Dims<4>& input1_dims, const T* input2_data, output_data); } -inline void AveragePool(const float* input_data, const Dims<4>& input_dims, +inline bool AveragePool(const float* input_data, const Dims<4>& input_dims, int stride_width, int stride_height, int pad_width, int pad_height, int kwidth, int kheight, float output_activation_min, @@ -1502,8 +1502,8 @@ inline void AveragePool(const float* input_data, const Dims<4>& input_dims, params.padding_values.width = pad_width; params.float_activation_min = output_activation_min; params.float_activation_max = output_activation_max; - AveragePool(params, DimsToShape(input_dims), input_data, - DimsToShape(output_dims), output_data); + return AveragePool(params, DimsToShape(input_dims), input_data, + DimsToShape(output_dims), output_data); } // Transitional version that will be moved shortly to legacy_reference_ops, as @@ -1562,29 +1562,31 @@ inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims, // legacy, for compatibility with old checked-in code template -void AveragePool(const float* input_data, const Dims<4>& input_dims, +bool AveragePool(const float* input_data, const Dims<4>& input_dims, int stride_width, int stride_height, int pad_width, int pad_height, int kwidth, int kheight, float* output_data, const Dims<4>& output_dims) { float output_activation_min, output_activation_max; GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - AveragePool(input_data, input_dims, stride_width, stride_height, pad_width, - pad_height, kwidth, kheight, output_activation_min, - output_activation_max, output_data, output_dims); + return AveragePool(input_data, input_dims, stride_width, stride_height, + pad_width, pad_height, kwidth, kheight, + output_activation_min, output_activation_max, output_data, + output_dims); } // legacy, for compatibility with old checked-in code template -void AveragePool(const float* input_data, const Dims<4>& input_dims, int stride, +bool AveragePool(const float* input_data, const Dims<4>& input_dims, int stride, int pad_width, int pad_height, int filter_width, int filter_height, float* output_data, const Dims<4>& output_dims) { - AveragePool(input_data, input_dims, stride, stride, pad_width, pad_height, - filter_width, filter_height, output_data, output_dims); + return AveragePool(input_data, input_dims, stride, stride, pad_width, + pad_height, filter_width, filter_height, output_data, + output_dims); } -inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims, +inline bool AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride_width, int stride_height, int pad_width, int pad_height, int filter_width, int filter_height, int32 output_activation_min, @@ -1599,13 +1601,13 @@ inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims, params.padding_values.width = pad_width; params.quantized_activation_min = output_activation_min; params.quantized_activation_max = output_activation_max; - AveragePool(params, DimsToShape(input_dims), input_data, - DimsToShape(output_dims), output_data); + return AveragePool(params, DimsToShape(input_dims), input_data, + DimsToShape(output_dims), output_data); } // legacy, for compatibility with old checked-in code template -void AveragePool(const uint8* input_data, const Dims<4>& input_dims, +bool AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride_width, int stride_height, int pad_width, int pad_height, int filter_width, int filter_height, int32 output_activation_min, int32 output_activation_max, @@ -1619,21 +1621,23 @@ void AveragePool(const uint8* input_data, const Dims<4>& input_dims, TFLITE_DCHECK_EQ(output_activation_min, 0); TFLITE_DCHECK_EQ(output_activation_max, 255); } - AveragePool(input_data, input_dims, stride_width, stride_height, pad_width, - pad_height, filter_width, filter_height, output_activation_min, - output_activation_max, output_data, output_dims); + return AveragePool(input_data, input_dims, stride_width, stride_height, + pad_width, pad_height, filter_width, filter_height, + output_activation_min, output_activation_max, output_data, + output_dims); } // legacy, for compatibility with old checked-in code template -void AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride, +bool AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride, int pad_width, int pad_height, int filter_width, int filter_height, int32 output_activation_min, int32 output_activation_max, uint8* output_data, const Dims<4>& output_dims) { - AveragePool(input_data, input_dims, stride, stride, pad_width, pad_height, - filter_width, filter_height, output_activation_min, - output_activation_max, output_data, output_dims); + return AveragePool(input_data, input_dims, stride, stride, pad_width, + pad_height, filter_width, filter_height, + output_activation_min, output_activation_max, + output_data, output_dims); } inline void MaxPool(const float* input_data, const Dims<4>& input_dims, ",1,train dfa22b348b70bb89d6d6ec0ff53973bacb4f4695,tensorflow/tensorflow,"Prevent a division by 0 in average ops. PiperOrigin-RevId: 385184660 Change-Id: I7affd4554f9b336fca29ac68f633232c094d0bd3",pooling.h,"@@ -23,7 +23,7 @@ limitations under the License. namespace tflite { namespace reference_ops { -inline void AveragePool(const PoolParams& params, +inline bool AveragePool(const PoolParams& params, const RuntimeShape& input_shape, const float* input_data, const RuntimeShape& output_shape, float* output_data) { @@ -66,6 +66,7 @@ inline void AveragePool(const PoolParams& params, filter_count++; } } + if (filter_count == 0) return false; const float average = total / filter_count; output_data[Offset(output_shape, batch, out_y, out_x, channel)] = ActivationFunctionWithMinMax(average, params.float_activation_min, @@ -74,9 +75,10 @@ inline void AveragePool(const PoolParams& params, } } } + return true; } -inline void AveragePool(const PoolParams& params, +inline bool AveragePool(const PoolParams& params, const RuntimeShape& input_shape, const uint8_t* input_data, const RuntimeShape& output_shape, @@ -122,6 +124,7 @@ inline void AveragePool(const PoolParams& params, filter_count++; } } + if (filter_count == 0) return false; acc = (acc + filter_count / 2) / filter_count; acc = std::max(acc, params.quantized_activation_min); acc = std::min(acc, params.quantized_activation_max); @@ -131,6 +134,7 @@ inline void AveragePool(const PoolParams& params, } } } + return true; } inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape, ",1,train dfa22b348b70bb89d6d6ec0ff53973bacb4f4695,tensorflow/tensorflow,"Prevent a division by 0 in average ops. PiperOrigin-RevId: 385184660 Change-Id: I7affd4554f9b336fca29ac68f633232c094d0bd3",pooling.cc,"@@ -117,117 +117,126 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) { } template -void AverageEvalFloat(TfLiteContext* context, TfLiteNode* node, - TfLitePoolParams* params, OpData* data, - const TfLiteTensor* input, TfLiteTensor* output) { +TfLiteStatus AverageEvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, OpData* data, + const TfLiteTensor* input, TfLiteTensor* output) { float activation_min, activation_max; CalculateActivationRange(params->activation, &activation_min, &activation_max); -#define TF_LITE_AVERAGE_POOL(type) \ - tflite::PoolParams op_params; \ - op_params.stride_height = params->stride_height; \ - op_params.stride_width = params->stride_width; \ - op_params.filter_height = params->filter_height; \ - op_params.filter_width = params->filter_width; \ - op_params.padding_values.height = data->padding.height; \ - op_params.padding_values.width = data->padding.width; \ - op_params.float_activation_min = activation_min; \ - op_params.float_activation_max = activation_max; \ - type::AveragePool(op_params, GetTensorShape(input), \ - GetTensorData(input), GetTensorShape(output), \ - GetTensorData(output)) +#define TF_LITE_AVERAGE_POOL(type) \ + tflite::PoolParams op_params; \ + op_params.stride_height = params->stride_height; \ + op_params.stride_width = params->stride_width; \ + op_params.filter_height = params->filter_height; \ + op_params.filter_width = params->filter_width; \ + op_params.padding_values.height = data->padding.height; \ + op_params.padding_values.width = data->padding.width; \ + op_params.float_activation_min = activation_min; \ + op_params.float_activation_max = activation_max; \ + TF_LITE_ENSURE(context, type::AveragePool(op_params, GetTensorShape(input), \ + GetTensorData(input), \ + GetTensorShape(output), \ + GetTensorData(output))) if (kernel_type == kReference) { TF_LITE_AVERAGE_POOL(reference_ops); } else { TF_LITE_AVERAGE_POOL(optimized_ops); } #undef TF_LITE_AVERAGE_POOL + return kTfLiteOk; } template -void AverageEvalQuantizedUint8(TfLiteContext* context, TfLiteNode* node, - TfLitePoolParams* params, OpData* data, - const TfLiteTensor* input, - TfLiteTensor* output) { +TfLiteStatus AverageEvalQuantizedUint8(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, OpData* data, + const TfLiteTensor* input, + TfLiteTensor* output) { int32_t activation_min; int32_t activation_max; (void)CalculateActivationRangeQuantized(context, params->activation, output, &activation_min, &activation_max); -#define TF_LITE_AVERAGE_POOL(type) \ - tflite::PoolParams op_params; \ - op_params.stride_height = params->stride_height; \ - op_params.stride_width = params->stride_width; \ - op_params.filter_height = params->filter_height; \ - op_params.filter_width = params->filter_width; \ - op_params.padding_values.height = data->padding.height; \ - op_params.padding_values.width = data->padding.width; \ - op_params.quantized_activation_min = activation_min; \ - op_params.quantized_activation_max = activation_max; \ - type::AveragePool(op_params, GetTensorShape(input), \ - GetTensorData(input), GetTensorShape(output), \ - GetTensorData(output)) +#define TF_LITE_AVERAGE_POOL(type) \ + tflite::PoolParams op_params; \ + op_params.stride_height = params->stride_height; \ + op_params.stride_width = params->stride_width; \ + op_params.filter_height = params->filter_height; \ + op_params.filter_width = params->filter_width; \ + op_params.padding_values.height = data->padding.height; \ + op_params.padding_values.width = data->padding.width; \ + op_params.quantized_activation_min = activation_min; \ + op_params.quantized_activation_max = activation_max; \ + TF_LITE_ENSURE(context, type::AveragePool(op_params, GetTensorShape(input), \ + GetTensorData(input), \ + GetTensorShape(output), \ + GetTensorData(output))) if (kernel_type == kReference) { TF_LITE_AVERAGE_POOL(reference_ops); } else { TF_LITE_AVERAGE_POOL(optimized_ops); } #undef TF_LITE_AVERAGE_POOL + return kTfLiteOk; } template -void AverageEvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, - TfLitePoolParams* params, OpData* data, - const TfLiteTensor* input, TfLiteTensor* output) { +TfLiteStatus AverageEvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, OpData* data, + const TfLiteTensor* input, + TfLiteTensor* output) { int32_t activation_min; int32_t activation_max; (void)CalculateActivationRangeQuantized(context, params->activation, output, &activation_min, &activation_max); -#define TF_LITE_AVERAGE_POOL(type) \ - tflite::PoolParams op_params; \ - op_params.stride_height = params->stride_height; \ - op_params.stride_width = params->stride_width; \ - op_params.filter_height = params->filter_height; \ - op_params.filter_width = params->filter_width; \ - op_params.padding_values.height = data->padding.height; \ - op_params.padding_values.width = data->padding.width; \ - op_params.quantized_activation_min = activation_min; \ - op_params.quantized_activation_max = activation_max; \ - type::AveragePool(op_params, GetTensorShape(input), \ - GetTensorData(input), GetTensorShape(output), \ - GetTensorData(output)) +#define TF_LITE_AVERAGE_POOL(type) \ + tflite::PoolParams op_params; \ + op_params.stride_height = params->stride_height; \ + op_params.stride_width = params->stride_width; \ + op_params.filter_height = params->filter_height; \ + op_params.filter_width = params->filter_width; \ + op_params.padding_values.height = data->padding.height; \ + op_params.padding_values.width = data->padding.width; \ + op_params.quantized_activation_min = activation_min; \ + op_params.quantized_activation_max = activation_max; \ + TF_LITE_ENSURE(context, type::AveragePool(op_params, GetTensorShape(input), \ + GetTensorData(input), \ + GetTensorShape(output), \ + GetTensorData(output))) if (kernel_type == kReference) { TF_LITE_AVERAGE_POOL(reference_integer_ops); } else { TF_LITE_AVERAGE_POOL(optimized_integer_ops); } #undef TF_LITE_AVERAGE_POOL + return kTfLiteOk; } template -void AverageEvalQuantizedInt16(TfLiteContext* context, TfLiteNode* node, - TfLitePoolParams* params, OpData* data, - const TfLiteTensor* input, - TfLiteTensor* output) { +TfLiteStatus AverageEvalQuantizedInt16(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, OpData* data, + const TfLiteTensor* input, + TfLiteTensor* output) { int32_t activation_min; int32_t activation_max; CalculateActivationRangeQuantized(context, params->activation, output, &activation_min, &activation_max); -#define TF_LITE_AVERAGE_POOL(type) \ - tflite::PoolParams op_params; \ - op_params.stride_height = params->stride_height; \ - op_params.stride_width = params->stride_width; \ - op_params.filter_height = params->filter_height; \ - op_params.filter_width = params->filter_width; \ - op_params.padding_values.height = data->padding.height; \ - op_params.padding_values.width = data->padding.width; \ - op_params.quantized_activation_min = activation_min; \ - op_params.quantized_activation_max = activation_max; \ - type::AveragePool(op_params, GetTensorShape(input), \ - GetTensorData(input), GetTensorShape(output), \ - GetTensorData(output)) +#define TF_LITE_AVERAGE_POOL(type) \ + tflite::PoolParams op_params; \ + op_params.stride_height = params->stride_height; \ + op_params.stride_width = params->stride_width; \ + op_params.filter_height = params->filter_height; \ + op_params.filter_width = params->filter_width; \ + op_params.padding_values.height = data->padding.height; \ + op_params.padding_values.width = data->padding.width; \ + op_params.quantized_activation_min = activation_min; \ + op_params.quantized_activation_max = activation_max; \ + TF_LITE_ENSURE(context, type::AveragePool(op_params, GetTensorShape(input), \ + GetTensorData(input), \ + GetTensorShape(output), \ + GetTensorData(output))) TF_LITE_AVERAGE_POOL(reference_integer_ops); #undef TF_LITE_AVERAGE_POOL + return kTfLiteOk; } template @@ -380,20 +389,17 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input)); switch (input->type) { // Already know in/out types are same. case kTfLiteFloat32: - AverageEvalFloat(context, node, params, data, input, output); - break; + return AverageEvalFloat(context, node, params, data, input, + output); case kTfLiteUInt8: - AverageEvalQuantizedUint8(context, node, params, data, input, - output); - break; + return AverageEvalQuantizedUint8(context, node, params, data, + input, output); case kTfLiteInt8: - AverageEvalQuantizedInt8(context, node, params, data, input, - output); - break; + return AverageEvalQuantizedInt8(context, node, params, data, + input, output); case kTfLiteInt16: - AverageEvalQuantizedInt16(context, node, params, data, input, - output); - break; + return AverageEvalQuantizedInt16(context, node, params, data, + input, output); default: TF_LITE_KERNEL_LOG(context, ""Type %s not currently supported."", TfLiteTypeGetName(input->type)); ",1,train bb6a0383ed553c286f87ca88c207f6774d5c4a8f,tensorflow/tensorflow,"Prevent heap OOB read in TFLite's `gather_nd.cc`. Passing negative indices is illegal but there was a missing check so that resulted in OOB accesses. PiperOrigin-RevId: 387208551 Change-Id: I6b7a8a62d3e7c13a16d81619e5bc23ae2cdbc7fd",gather_nd.cc,"@@ -123,6 +123,17 @@ TfLiteStatus GatherNdString(const TfLiteTensor* params, template TfLiteStatus EvalGatherNd(TfLiteContext* context, const TfLiteTensor* params, const TfLiteTensor* indices, TfLiteTensor* output) { + bool indices_has_only_positive_elements = true; + const auto* indices_values = GetTensorData(indices); + const size_t num_indices = indices->bytes / sizeof(IndicesT); + for (size_t i = 0; i < num_indices; i++) { + if (indices_values[i] < 0) { + indices_has_only_positive_elements = false; + break; + } + } + TF_LITE_ENSURE(context, indices_has_only_positive_elements); + switch (params->type) { case kTfLiteFloat32: return GatherNd(params, indices, output); ",1,train eb921122119a6b6e470ee98b89e65d721663179d,tensorflow/tensorflow,"Prevent heap OOB read in TFLite's `gather.cc`. Passing negative indices is illegal but there was a missing check so that resulted in OOB accesses. PiperOrigin-RevId: 387231300 Change-Id: I3111b54b2f232638d795be17efc46abe4ede6bf8",gather.cc,"@@ -117,8 +117,20 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { } template -TfLiteStatus Gather(const TfLiteGatherParams& params, const TfLiteTensor* input, - const TfLiteTensor* positions, TfLiteTensor* output) { +TfLiteStatus Gather(TfLiteContext* context, const TfLiteGatherParams& params, + const TfLiteTensor* input, const TfLiteTensor* positions, + TfLiteTensor* output) { + const PositionsT* indexes = GetTensorData(positions); + bool indices_has_only_positive_elements = true; + const size_t num_indices = positions->bytes / sizeof(PositionsT); + for (size_t i = 0; i < num_indices; i++) { + if (indexes[i] < 0) { + indices_has_only_positive_elements = false; + break; + } + } + TF_LITE_ENSURE(context, indices_has_only_positive_elements); + tflite::GatherParams op_params; op_params.axis = params.axis; op_params.batch_dims = params.batch_dims; @@ -134,7 +146,18 @@ TfLiteStatus GatherStrings(TfLiteContext* context, const TfLiteTensor* input, const TfLiteTensor* positions, TfLiteTensor* output) { DynamicBuffer buffer; + const PositionT* indexes = GetTensorData(positions); + bool indices_has_only_positive_elements = true; + const size_t num_indices = positions->bytes / sizeof(PositionT); + for (size_t i = 0; i < num_indices; i++) { + if (indexes[i] < 0) { + indices_has_only_positive_elements = false; + break; + } + } + TF_LITE_ENSURE(context, indices_has_only_positive_elements); + const PositionT num_strings = GetStringCount(input); const int num_indexes = NumElements(positions); @@ -163,19 +186,26 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { if (positions->type == kTfLiteInt32) { switch (input->type) { case kTfLiteFloat32: - return Gather(*params, input, positions, output); + return Gather(context, *params, input, positions, + output); case kTfLiteUInt8: - return Gather(*params, input, positions, output); + return Gather(context, *params, input, positions, + output); case kTfLiteInt8: - return Gather(*params, input, positions, output); + return Gather(context, *params, input, positions, + output); case kTfLiteInt16: - return Gather(*params, input, positions, output); + return Gather(context, *params, input, positions, + output); case kTfLiteInt32: - return Gather(*params, input, positions, output); + return Gather(context, *params, input, positions, + output); case kTfLiteInt64: - return Gather(*params, input, positions, output); + return Gather(context, *params, input, positions, + output); case kTfLiteBool: - return Gather(*params, input, positions, output); + return Gather(context, *params, input, positions, + output); case kTfLiteString: return GatherStrings(context, input, positions, output); default: @@ -187,19 +217,26 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { if (positions->type == kTfLiteInt64) { switch (input->type) { case kTfLiteFloat32: - return Gather(*params, input, positions, output); + return Gather(context, *params, input, positions, + output); case kTfLiteUInt8: - return Gather(*params, input, positions, output); + return Gather(context, *params, input, positions, + output); case kTfLiteInt8: - return Gather(*params, input, positions, output); + return Gather(context, *params, input, positions, + output); case kTfLiteInt16: - return Gather(*params, input, positions, output); + return Gather(context, *params, input, positions, + output); case kTfLiteInt32: - return Gather(*params, input, positions, output); + return Gather(context, *params, input, positions, + output); case kTfLiteInt64: - return Gather(*params, input, positions, output); + return Gather(context, *params, input, positions, + output); case kTfLiteBool: - return Gather(*params, input, positions, output); + return Gather(context, *params, input, positions, + output); case kTfLiteString: return GatherStrings(context, input, positions, output); default: ",1,train 15691e456c7dc9bd6be203b09765b063bf4a380c,tensorflow/tensorflow,"Prevent dereferencing of null pointers in TFLite's `add.cc`. PiperOrigin-RevId: 387244946 Change-Id: I56094233327fbd8439b92e1dbb1262176e00eeb9",optimized_ops.h,"@@ -265,7 +265,7 @@ inline void BinaryBroadcastFiveFold(const ArithmeticParams& unswitched_params, // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on. input2_data_reset = input2_data_ptr; } - } else { + } else if (input1_data_ptr != nullptr) { // Special case of y4 == 1, in which the innermost loop is a single // element and can be combined with the next (y3) as an inner broadcast. // ",1,train d6b57f461b39fd1aa8c1b870f1b974aac3554955,tensorflow/tensorflow,"Prevent nullptr dereference in MLIR TFLite dialect/optimizer. PiperOrigin-RevId: 387220762 Change-Id: Id136ef04bb3d36123b4685d316ae81a9ec924d6b",optimize.cc,"@@ -68,6 +68,9 @@ constexpr char kRelu6[] = ""RELU6""; constexpr char kRelu1[] = ""RELU_N1_TO_1""; bool L2NormalizeReduceAxis(Value sq_op, DenseElementsAttr axis) { + if (axis.getNumElements() == 0) { + return false; + } if (sq_op.getType().cast().getRank() - 1 == *axis.getValues().begin() || *axis.getValues().begin() == -1) { ",1,train ee119d4a498979525046fba1c3dd3f13a039fbb1,tensorflow/tensorflow,"Fix segmentation fault in shape inference logic. When running shape functions, some functions (such as `MutableHashTableShape`) produce extra output information in the form of a `ShapeAndType` struct. The shapes embedded in this struct are owned by an inference context that is cleaned up almost immediately; if the upstream code attempts to access this shape information, it can trigger a segfault. `ShapeRefiner` is mitigating this for normal output shapes by cloning them (and thus putting the newly created shape under ownership of an inference context that will not die), but we were not doing the same for shapes and types. This commit fixes that by doing similar logic on output shapes and types. PiperOrigin-RevId: 384761124 Change-Id: I07c0c42d29dfbb55bfa13ec1f09ef825fb0a1a1d",shape_refiner.cc,"@@ -120,9 +120,26 @@ Status ShapeRefiner::InferShapesForFunctionSubNode( TF_RETURN_IF_ERROR(outer_context->MakeShapeFromShapeProto(proto, &handle)); outer_context->set_output(index, handle); - auto* resource = node_context->input_handle_shapes_and_types(0); + const std::vector* resource = + node_context->input_handle_shapes_and_types(0); if (resource) { - outer_context->set_output_handle_shapes_and_types(index, *resource); + // `ShapesAndType`s contain `ShapeHandle`s. These `ShapeHandle`s point + // to `Shape`s that are owned by a different inference context too. We + // need to copy them to the outer context to prevent them from being + // destroyed before they are used. + std::vector copied_shapes_and_types; + for (auto& shape_and_type : *resource) { + ShapeHandle handle; + TensorShapeProto proto; + node_context->ShapeHandleToProto(shape_and_type.shape, &proto); + TF_RETURN_IF_ERROR( + outer_context->MakeShapeFromShapeProto(proto, &handle)); + copied_shapes_and_types.push_back( + ShapeAndType(handle, shape_and_type.dtype, shape_and_type.type)); + } + + outer_context->set_output_handle_shapes_and_types( + index, copied_shapes_and_types); } } ",1,test 0575b640091680cfb70f4dd93e70658de43b94f9,tensorflow/tensorflow,"Prevent division by 0 in LSH projection. PiperOrigin-RevId: 387225857 Change-Id: Iaeb572a763618c64f503e0026f6dd9fd769bf50c",lsh_projection.cc,"@@ -28,7 +28,7 @@ limitations under the License. // // Input: // Tensor[0]: Hash functions. Dim.size == 2, DataType: Float. -// Tensor[0].Dim[0]: Num of hash functions. +// Tensor[0].Dim[0]: Num of hash functions. Must be at least 1. // Tensor[0].Dim[1]: Num of projected output bits generated by // each hash function. // In sparse case, Tensor[0].Dim[1] + ceil( log2(Tensor[0].Dim[0] )) <= 32. @@ -82,6 +82,7 @@ TfLiteStatus Resize(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input; TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &input)); TF_LITE_ENSURE(context, NumDimensions(input) >= 1); + TF_LITE_ENSURE(context, SizeOfDimension(input, 0) >= 1); if (NumInputs(node) == 3) { const TfLiteTensor* weight; ",1,test 7c1692bd417eb4f9b33ead749a41166d6080af85,tensorflow/tensorflow,"PR #51732: Fix crash of tf.image.crop_and_resize when input is large number Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/51732 This PR is part of the effort in #46890 where tf.image.crop_and_resize will crash if shape consists of large number. Signed-off-by: Yong Tang Copybara import of the project: -- c8d87055a56d8740d27ad8bdc74a7459ede6900e by Yong Tang : Fix crash of tf.image.crop_and_resize when input is large number This PR is part of the effort in 46890 where tf.image.crop_and_resize will crash if shape consists of large number. Signed-off-by: Yong Tang COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/tensorflow/pull/51732 from yongtang:46890-tf.image.crop_and_resize c8d87055a56d8740d27ad8bdc74a7459ede6900e PiperOrigin-RevId: 394109830 Change-Id: If049dad0844df9353722029ee95bc76819eda1f4",crop_and_resize_op.cc,"@@ -170,14 +170,15 @@ class CropAndResizeOp : public AsyncOpKernel { context, crop_height > 0 && crop_width > 0, errors::InvalidArgument(""crop dimensions must be positive""), done); + TensorShape shape; + OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(num_boxes), done); + OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(crop_height), done); + OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(crop_width), done); + OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(depth), done); // Allocate output tensor. Tensor* output = nullptr; - OP_REQUIRES_OK_ASYNC( - context, - context->allocate_output( - 0, TensorShape({num_boxes, crop_height, crop_width, depth}), - &output), - done); + OP_REQUIRES_OK_ASYNC(context, context->allocate_output(0, shape, &output), + done); auto compute_callback = [this, context, output]() { const Tensor& image = context->input(0); @@ -417,14 +418,15 @@ class CropAndResizeGradImageOp : public AsyncOpKernel { done); } + TensorShape shape; + OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(batch_size), done); + OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(image_height), done); + OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(image_width), done); + OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(depth), done); // Allocate output tensor. Tensor* output = nullptr; - OP_REQUIRES_OK_ASYNC( - context, - context->allocate_output( - 0, TensorShape({batch_size, image_height, image_width, depth}), - &output), - done); + OP_REQUIRES_OK_ASYNC(context, context->allocate_output(0, shape, &output), + done); auto compute_callback = [this, context, output]() { const Tensor& grads = context->input(0); ",1,train 7c1692bd417eb4f9b33ead749a41166d6080af85,tensorflow/tensorflow,"PR #51732: Fix crash of tf.image.crop_and_resize when input is large number Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/51732 This PR is part of the effort in #46890 where tf.image.crop_and_resize will crash if shape consists of large number. Signed-off-by: Yong Tang Copybara import of the project: -- c8d87055a56d8740d27ad8bdc74a7459ede6900e by Yong Tang : Fix crash of tf.image.crop_and_resize when input is large number This PR is part of the effort in 46890 where tf.image.crop_and_resize will crash if shape consists of large number. Signed-off-by: Yong Tang COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/tensorflow/pull/51732 from yongtang:46890-tf.image.crop_and_resize c8d87055a56d8740d27ad8bdc74a7459ede6900e PiperOrigin-RevId: 394109830 Change-Id: If049dad0844df9353722029ee95bc76819eda1f4",image_ops_test.py,"@@ -6075,6 +6075,16 @@ class DecodeImageTest(test_util.TensorFlowTestCase, parameterized.TestCase): crop_size=[1, 1]) self.evaluate(op) + def testImageCropAndResizeWithInvalidInput(self): + with self.session(): + with self.assertRaises((errors.InternalError, ValueError)): + op = image_ops_impl.crop_and_resize_v2( + image=np.ones((1, 1, 1, 1)), + boxes=np.ones((11, 4)), + box_indices=np.ones((11)), + crop_size=[2065374891, 1145309325]) + self.evaluate(op) + @parameterized.named_parameters( (""_jpeg"", ""JPEG"", ""jpeg_merge_test1.jpg""), (""_png"", ""PNG"", ""lena_rgba.png""), ",1,train f09caa532b6e1ac8d2aa61b7832c78c5b79300c6,tensorflow/tensorflow,"Fix EinsumHelper::ParseEquation to avoid uninitialized accesses. EinsumHelper::ParseEquation is supposed to return true or false in input_has_ellipsis and output_has_ellipsis to indicate whether there is ellipsis in the inputs and output. Previously, when there is no ellipsis in the inputs or output, the routine doesn't assign false to the variables. This change initializes the two variables with false to fix the problem. PiperOrigin-RevId: 391772004 Change-Id: I17b6c88aadef4131470378e48cced054bf252e86",einsum_op_impl.h,"@@ -153,6 +153,7 @@ struct EinsumHelper { input_has_ellipsis->resize(num_inputs); for (int i = 0; i < num_inputs; ++i) { input_label_counts->at(i).resize(num_labels); + input_has_ellipsis->at(i) = false; for (const int label : input_labels->at(i)) { if (label != kEllipsisLabel) input_label_counts->at(i)[label] += 1; @@ -161,6 +162,7 @@ struct EinsumHelper { } } output_label_counts->resize(num_labels); + *output_has_ellipsis = false; for (const int label : *output_labels) { if (label != kEllipsisLabel) output_label_counts->at(label) += 1; ",1,test 368af875869a204b4ac552b9ddda59f6a46a56ec,tensorflow/tensorflow,"Avoid buffer overflow when loading tensors with insufficient data from checkpoints. `CopyDataFromTensorSliceToTensorSlice` does not (and cannot conveniently) provide any bounds checking on its own, so the size is instead checked prior to passing unvalidated data to that function. PiperOrigin-RevId: 392971286 Change-Id: If2073b36d4d5eedd386329f56729395fd7effee1",saved_tensor_slice_util.h,"@@ -59,6 +59,9 @@ Status ParseShapeAndSlice(const string& shape_and_slice, TensorShape* shape, template struct SaveTypeTraits; +template +int TensorProtoDataSize(const TensorProto& t); + template const typename SaveTypeTraits::SavedType* TensorProtoData( const TensorProto& t); @@ -95,6 +98,10 @@ void Fill(T* data, size_t n, TensorProto* t); #define TENSOR_PROTO_EXTRACT_TYPE(TYPE, FIELD, FTYPE) \ TENSOR_PROTO_EXTRACT_TYPE_HELPER(TYPE, FIELD, FTYPE, FTYPE) \ template <> \ + inline int TensorProtoDataSize(const TensorProto& t) { \ + return t.FIELD##_val_size(); \ + } \ + template <> \ inline void Fill(const TYPE* data, size_t n, TensorProto* t) { \ typename protobuf::RepeatedField copy(data, data + n); \ t->mutable_##FIELD##_val()->Swap(©); \ @@ -104,6 +111,10 @@ void Fill(T* data, size_t n, TensorProto* t); #define TENSOR_PROTO_EXTRACT_TYPE_COMPLEX(TYPE, FIELD, FTYPE) \ TENSOR_PROTO_EXTRACT_TYPE_HELPER(TYPE, FIELD, FTYPE, TYPE) \ template <> \ + inline int TensorProtoDataSize(const TensorProto& t) { \ + return t.FIELD##_val_size() / 2; \ + } \ + template <> \ inline void Fill(const TYPE* data, size_t n, TensorProto* t) { \ const FTYPE* sub = reinterpret_cast(data); \ typename protobuf::RepeatedField copy(sub, sub + 2 * n); \ @@ -136,6 +147,11 @@ TENSOR_PROTO_EXTRACT_TYPE(quint16, int, int32); template <> struct SaveTypeTraits : SaveTypeTraits {}; +template <> +inline int TensorProtoDataSize(const TensorProto& t) { + return t.int_val_size(); +} + template <> inline const int32* TensorProtoData(const TensorProto& t) { static_assert(SaveTypeTraits::supported, @@ -158,6 +174,11 @@ struct SaveTypeTraits { typedef protobuf::RepeatedField RepeatedField; }; +template <> +inline int TensorProtoDataSize(const TensorProto& t) { + return t.half_val_size(); +} + template <> inline const int* TensorProtoData(const TensorProto& t) { return t.half_val().data(); @@ -187,6 +208,11 @@ struct SaveTypeTraits { typedef protobuf::RepeatedPtrField RepeatedField; }; +template <> +inline int TensorProtoDataSize(const TensorProto& t) { + return t.string_val_size(); +} + template <> inline const string* const* TensorProtoData(const TensorProto& t) { static_assert(SaveTypeTraits::supported, ",1,train 368af875869a204b4ac552b9ddda59f6a46a56ec,tensorflow/tensorflow,"Avoid buffer overflow when loading tensors with insufficient data from checkpoints. `CopyDataFromTensorSliceToTensorSlice` does not (and cannot conveniently) provide any bounds checking on its own, so the size is instead checked prior to passing unvalidated data to that function. PiperOrigin-RevId: 392971286 Change-Id: If2073b36d4d5eedd386329f56729395fd7effee1",tensor_slice_reader.h,"@@ -181,6 +181,22 @@ bool TensorSliceReader::CopySliceData(const string& name, << slice_s.DebugString() << "": computed key = "" << key; return false; } + // Ensure the TensorSlice contains the expected amount of data. + TensorShape shp_s; + Status s = slice_s.SliceTensorShape(tss->shape(), &shp_s); + if (!s.ok()) { + VLOG(1) << ""Failed to slice tensor "" << name << "", slice "" + << slice_s.DebugString() << "": "" << s; + return false; + } + if (checkpoint::TensorProtoDataSize(sts.data().data()) != + shp_s.num_elements()) { + VLOG(1) << ""Tensor "" << name << "", slice "" << slice_s.DebugString() + << "" had an unexpected amount of data: expected = "" + << shp_s.num_elements() << "", got = "" + << checkpoint::TensorProtoDataSize(sts.data().data()); + return false; + } CopyDataFromTensorSliceToTensorSlice( tss->shape(), slice_s, slice, checkpoint::TensorProtoData(sts.data().data()), data); ",1,train 368af875869a204b4ac552b9ddda59f6a46a56ec,tensorflow/tensorflow,"Avoid buffer overflow when loading tensors with insufficient data from checkpoints. `CopyDataFromTensorSliceToTensorSlice` does not (and cannot conveniently) provide any bounds checking on its own, so the size is instead checked prior to passing unvalidated data to that function. PiperOrigin-RevId: 392971286 Change-Id: If2073b36d4d5eedd386329f56729395fd7effee1",tensor_slice_reader_test.cc,"@@ -459,6 +459,33 @@ TEST(TensorSliceReaderTest, InvalidTensorSlice) { EXPECT_FALSE(reader.status().ok()); } +TEST(TensorSliceReaderTest, MissingTensorData) { + const string fname = + io::JoinPath(testing::TmpDir(), ""missing_data_checkpoint""); + TensorSliceWriter writer(fname, CreateTableTensorSliceBuilder); + const int32 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + TF_ASSERT_OK(writer.Add(""test"", TensorShape({4, 5}), + TensorSlice::ParseOrDie(""0,2:-""), data)); + TF_ASSERT_OK(writer.Finish()); + + MutateSavedTensorSlices(fname, [&](SavedTensorSlices sts) { + if (sts.has_data()) { + // Replace the data with only 4 elements. + Fill(data, 4, sts.mutable_data()->mutable_data()); + } + return sts.SerializeAsString(); + }); + + TensorSliceReader reader(fname, OpenTableTensorSliceReader); + TF_ASSERT_OK(reader.status()); + + // The tensor should be present, but loading it should fail due to the missing + // data. + EXPECT_TRUE(reader.HasTensor(""test"", nullptr, nullptr)); + std::unique_ptr tensor; + EXPECT_FALSE(reader.GetTensor(""test"", &tensor).ok()); +} + void CachedTensorSliceReaderTesterHelper( const TensorSliceWriter::CreateBuilderFunction& create_function, const TensorSliceReader::OpenTableFunction& open_function) { ",1,train abcced051cb1bd8fb05046ac3b6023a7ebcc4578,tensorflow/tensorflow,"Prevent crashes when loading tensor slices with unsupported types. Also fix the `Tensor(const TensorShape&)` constructor swapping the LOG(FATAL) messages for the unset and unsupported types. PiperOrigin-RevId: 392695027 Change-Id: I4beda7db950db951d273e3259a7c8534ece49354",tensor.cc,"@@ -52,6 +52,7 @@ limitations under the License. #include ""tensorflow/core/lib/gtl/inlined_vector.h"" #include ""tensorflow/core/lib/strings/str_util.h"" #include ""tensorflow/core/lib/strings/strcat.h"" +#include ""tensorflow/core/platform/errors.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/macros.h"" #include ""tensorflow/core/platform/protobuf.h"" @@ -723,11 +724,11 @@ bool Tensor::RefCountIsOne() const { // The macro CASES() expands to a switch statement conditioned on // TYPE_ENUM. Each case expands the STMTS after a typedef for T. #define SINGLE_ARG(...) __VA_ARGS__ -#define CASE(TYPE, STMTS) \ - case DataTypeToEnum::value: { \ - typedef TYPE T; \ - STMTS; \ - break; \ +#define CASE(TYPE, STMTS) \ + case DataTypeToEnum::value: { \ + typedef TF_ATTRIBUTE_UNUSED TYPE T; \ + STMTS; \ + break; \ } #define CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, INVALID, DEFAULT) \ switch (TYPE_ENUM) { \ @@ -763,9 +764,8 @@ bool Tensor::RefCountIsOne() const { } #define CASES(TYPE_ENUM, STMTS) \ - CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, \ - LOG(FATAL) << ""Unexpected type: "" << TYPE_ENUM; \ - , LOG(FATAL) << ""Type not set"";) + CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, LOG(FATAL) << ""Type not set""; \ + , LOG(FATAL) << ""Unexpected type: "" << TYPE_ENUM;) Tensor::Tensor(Allocator* a, DataType type, const TensorShape& shape) : shape_(shape), buf_(nullptr) { @@ -795,6 +795,16 @@ Tensor::Tensor(Allocator* a, DataType type, const TensorShape& shape, } } +Status Tensor::BuildTensor(DataType type, const TensorShape& shape, + Tensor* out_tensor) { + // Avoid crashes due to invalid or unsupported types. + CASES_WITH_DEFAULT( + type, {}, return errors::InvalidArgument(""Type not set""), + return errors::InvalidArgument(""Unexpected type: "", DataType_Name(type))); + *out_tensor = Tensor(type, shape); + return Status::OK(); +} + // NOTE(mrry): The default allocator for a Tensor (when none is specified) is // the default CPU allocator for NUMA zone 0. Accessing that currently involves // acquiring a lock, which guards initialization of the per-NUMA zone ",1,test abcced051cb1bd8fb05046ac3b6023a7ebcc4578,tensorflow/tensorflow,"Prevent crashes when loading tensor slices with unsupported types. Also fix the `Tensor(const TensorShape&)` constructor swapping the LOG(FATAL) messages for the unset and unsupported types. PiperOrigin-RevId: 392695027 Change-Id: I4beda7db950db951d273e3259a7c8534ece49354",tensor.h,"@@ -170,6 +170,15 @@ class Tensor { /// for details. explicit Tensor(DataType type); + /// \brief Initializes a tensor with the input `type` and `shape`, or returns + /// an error and leaves `out_tensor` unmodified. This factory method should be + /// used instead of the corresponding constructor if calling code cannot + /// validate that the `DataType` is valid and supported. + /// + /// The underlying buffer is allocated using a `CPUAllocator`. + static Status BuildTensor(DataType type, const TensorShape& shape, + Tensor* out_tensor); + private: // A tag type for selecting the `Tensor` constructor overload that creates a // scalar tensor in host memory. ",1,test abcced051cb1bd8fb05046ac3b6023a7ebcc4578,tensorflow/tensorflow,"Prevent crashes when loading tensor slices with unsupported types. Also fix the `Tensor(const TensorShape&)` constructor swapping the LOG(FATAL) messages for the unset and unsupported types. PiperOrigin-RevId: 392695027 Change-Id: I4beda7db950db951d273e3259a7c8534ece49354",tensor_slice_reader.cc,"@@ -248,7 +248,9 @@ Status TensorSliceReader::GetTensor( slice = tss->Slices().begin()->second.slice; } - std::unique_ptr t(new tensorflow::Tensor(type, shape)); + std::unique_ptr t(new tensorflow::Tensor); + Status s = tensorflow::Tensor::BuildTensor(type, shape, t.get()); + if (!s.ok()) return s; bool success = false; #define READER_COPY(dt) \ ",1,test abcced051cb1bd8fb05046ac3b6023a7ebcc4578,tensorflow/tensorflow,"Prevent crashes when loading tensor slices with unsupported types. Also fix the `Tensor(const TensorShape&)` constructor swapping the LOG(FATAL) messages for the unset and unsupported types. PiperOrigin-RevId: 392695027 Change-Id: I4beda7db950db951d273e3259a7c8534ece49354",tensor_slice_reader_test.cc,"@@ -13,15 +13,19 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include - #include ""tensorflow/core/util/tensor_slice_reader.h"" +#include +#include + #include ""tensorflow/core/framework/types.h"" #include ""tensorflow/core/framework/versions.pb.h"" #include ""tensorflow/core/lib/core/status_test_util.h"" #include ""tensorflow/core/lib/core/stringpiece.h"" +#include ""tensorflow/core/lib/io/iterator.h"" #include ""tensorflow/core/lib/io/path.h"" +#include ""tensorflow/core/lib/io/table.h"" +#include ""tensorflow/core/lib/io/table_builder.h"" #include ""tensorflow/core/lib/strings/str_util.h"" #include ""tensorflow/core/lib/strings/strcat.h"" #include ""tensorflow/core/platform/env.h"" @@ -30,6 +34,7 @@ limitations under the License. #include ""tensorflow/core/platform/test.h"" #include ""tensorflow/core/platform/types.h"" #include ""tensorflow/core/public/version.h"" +#include ""tensorflow/core/util/saved_tensor_slice.pb.h"" #include ""tensorflow/core/util/saved_tensor_slice_util.h"" #include ""tensorflow/core/util/tensor_slice_reader_cache.h"" #include ""tensorflow/core/util/tensor_slice_writer.h"" @@ -309,6 +314,102 @@ TEST_SIMPLE_INT(int16, int32) TEST_SIMPLE_INT(int8, int32) TEST_SIMPLE_INT(uint8, int32) +// Modifies the SavedTensorSlices messages in a checkpoint to allow creating +// malformed or unsupported checkpoints. +void MutateSavedTensorSlices( + const std::string& fname, + const std::function& mutator) { + table::Options options; + options.compression = table::kNoCompression; + + // Read all entres from the table. + std::vector> entries; + { + std::unique_ptr file; + TF_CHECK_OK(Env::Default()->NewRandomAccessFile(fname, &file)); + uint64 file_size; + TF_CHECK_OK(Env::Default()->GetFileSize(fname, &file_size)); + table::Table* t; + TF_CHECK_OK(table::Table::Open(options, file.get(), file_size, &t)); + std::unique_ptr table(t); + std::unique_ptr it(table->NewIterator()); + for (it->Seek(""""); it->Valid(); it->Next()) { + entries.emplace_back(it->key(), it->value()); + } + TF_CHECK_OK(it->status()); + } + + // Rewrite the table, mutating each value. + { + std::unique_ptr file; + TF_CHECK_OK(Env::Default()->NewWritableFile(fname, &file)); + table::TableBuilder builder(options, file.get()); + for (const auto& entry : entries) { + SavedTensorSlices sts; + CHECK(sts.ParseFromString(entry.second)); + builder.Add(entry.first, mutator(std::move(sts))); + } + TF_CHECK_OK(builder.Finish()); + TF_CHECK_OK(file->Close()); + } +} + +TEST(TensorSliceReaderTest, MissingTensorType) { + const string fname = io::JoinPath(testing::TmpDir(), ""invalid_checkpoint""); + TensorSliceWriter writer(fname, CreateTableTensorSliceBuilder); + const int32 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + TensorShape shape({4, 5}); + TensorSlice slice = TensorSlice::ParseOrDie(""0,2:-""); + TF_CHECK_OK(writer.Add(""test"", shape, slice, data)); + TF_CHECK_OK(writer.Finish()); + + MutateSavedTensorSlices(fname, [](SavedTensorSlices sts) { + if (sts.has_meta()) { + for (auto& tensor : *sts.mutable_meta()->mutable_tensor()) { + tensor.clear_type(); + } + } + return sts.SerializeAsString(); + }); + + TensorSliceReader reader(fname, OpenTableTensorSliceReader); + TF_CHECK_OK(reader.status()); + + // The tensor should be present, but loading it should fail due to the + // unset (invalid) type. + EXPECT_TRUE(reader.HasTensor(""test"", nullptr, nullptr)); + std::unique_ptr tensor; + EXPECT_FALSE(reader.GetTensor(""test"", &tensor).ok()); +} + +TEST(TensorSliceReaderTest, UnsupportedTensorType) { + const string fname = io::JoinPath(testing::TmpDir(), ""int32_ref_checkpoint""); + TensorSliceWriter writer(fname, CreateTableTensorSliceBuilder); + const int32 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + TensorShape shape({4, 5}); + TensorSlice slice = TensorSlice::ParseOrDie(""0,2:-""); + TF_CHECK_OK(writer.Add(""test"", shape, slice, data)); + TF_CHECK_OK(writer.Finish()); + + MutateSavedTensorSlices(fname, [](SavedTensorSlices sts) { + if (sts.has_meta()) { + for (auto& tensor : *sts.mutable_meta()->mutable_tensor()) { + tensor.set_type(DT_INT32_REF); + } + } + return sts.SerializeAsString(); + }); + + TensorSliceReader reader(fname, OpenTableTensorSliceReader); + TF_CHECK_OK(reader.status()); + + // The tensor should be present, but loading it should fail due to the + // unsupported type. + EXPECT_TRUE(reader.HasTensor(""test"", nullptr, nullptr)); + std::unique_ptr tensor; + EXPECT_FALSE(reader.GetTensor(""test"", &tensor).ok()); +} + void CachedTensorSliceReaderTesterHelper( const TensorSliceWriter::CreateBuilderFunction& create_function, const TensorSliceReader::OpenTableFunction& open_function) { ",1,test b619c6f865715ca3b15ef1842b5b95edbaa710ad,tensorflow/tensorflow,"Use BuildTensorShapeBase when parsing unverified TensorShapes during checkpoint loading. This avoids crashing when the TensorShape has negative dimensions. PiperOrigin-RevId: 392769882 Change-Id: Id1f7ae7fcf8142193556af47abfda81b13d3cce4",tensor_slice_reader.cc,"@@ -168,7 +168,9 @@ void TensorSliceReader::LoadShard(int shard) const { ""checkpoint""); if (!status_.ok()) return; for (const SavedSliceMeta& ssm : sts.meta().tensor()) { - TensorShape ssm_shape(ssm.shape()); + TensorShape ssm_shape; + status_ = TensorShape::BuildTensorShapeBase(ssm.shape(), &ssm_shape); + if (!status_.ok()) return; for (const TensorSliceProto& tsp : ssm.slice()) { TensorSlice ss_slice(tsp); status_ = RegisterTensorSlice(ssm.name(), ssm_shape, ssm.type(), fname, ",1,train b619c6f865715ca3b15ef1842b5b95edbaa710ad,tensorflow/tensorflow,"Use BuildTensorShapeBase when parsing unverified TensorShapes during checkpoint loading. This avoids crashing when the TensorShape has negative dimensions. PiperOrigin-RevId: 392769882 Change-Id: Id1f7ae7fcf8142193556af47abfda81b13d3cce4",tensor_slice_reader_test.cc,"@@ -18,6 +18,7 @@ limitations under the License. #include #include +#include ""tensorflow/core/framework/tensor_shape.pb.h"" #include ""tensorflow/core/framework/types.h"" #include ""tensorflow/core/framework/versions.pb.h"" #include ""tensorflow/core/lib/core/status_test_util.h"" @@ -410,6 +411,31 @@ TEST(TensorSliceReaderTest, UnsupportedTensorType) { EXPECT_FALSE(reader.GetTensor(""test"", &tensor).ok()); } +TEST(TensorSliceReaderTest, NegativeTensorShapeDimension) { + const string fname = + io::JoinPath(testing::TmpDir(), ""negative_dim_checkpoint""); + TensorSliceWriter writer(fname, CreateTableTensorSliceBuilder); + const int32 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + TF_CHECK_OK(writer.Add(""test"", TensorShape({4, 5}), + TensorSlice::ParseOrDie(""0,2:-""), data)); + TF_CHECK_OK(writer.Finish()); + + MutateSavedTensorSlices(fname, [](SavedTensorSlices sts) { + if (sts.has_meta()) { + for (auto& tensor : *sts.mutable_meta()->mutable_tensor()) { + for (auto& dim : *tensor.mutable_shape()->mutable_dim()) { + dim.set_size(-dim.size()); + } + } + } + return sts.SerializeAsString(); + }); + + TensorSliceReader reader(fname, OpenTableTensorSliceReader); + // The negative dimension should cause loading to fail. + EXPECT_FALSE(reader.status().ok()); +} + void CachedTensorSliceReaderTesterHelper( const TensorSliceWriter::CreateBuilderFunction& create_function, const TensorSliceReader::OpenTableFunction& open_function) { ",1,train e8dc63704c88007ee4713076605c90188d66f3d2,tensorflow/tensorflow,"Add BuildTensorSlice for building from unvalidated TensorSliceProtos. This avoids several sources of crashes and undefined behavior when loading invalid checkpoints. PiperOrigin-RevId: 392785704 Change-Id: Icd9713c768b882f3b58b427eddac376060696833",tensor_slice.cc,"@@ -14,7 +14,10 @@ limitations under the License. ==============================================================================*/ #include ""tensorflow/core/framework/tensor_slice.h"" + +#include #include + #include ""tensorflow/core/lib/core/errors.h"" #include ""tensorflow/core/lib/strings/numbers.h"" #include ""tensorflow/core/lib/strings/str_util.h"" @@ -44,6 +47,34 @@ TensorSlice::TensorSlice( } } +Status TensorSlice::BuildTensorSlice(const TensorSliceProto& proto, + TensorSlice* output) { + output->Clear(); + output->starts_.reserve(proto.extent_size()); + output->lengths_.reserve(proto.extent_size()); + for (const auto& e : proto.extent()) { + int64_t l = GetExtentLength(e); + if (e.start() != 0 || l != kFullExtent) { + if (e.start() < 0 || l <= 0) { + return errors::InvalidArgument( + ""Expected non-negative start and positive length but got start = "", + e.start(), "", length = "", l, "": extent = "", e.ShortDebugString()); + } + // Calculating the extent end must not cause signed integer overflow. + if (static_cast(e.start()) + static_cast(e.length()) > + std::numeric_limits::max()) { + return errors::InvalidArgument( + ""Extent end exceeds the maximum possible size: extent = "", + e.ShortDebugString()); + } + } + output->starts_.push_back(e.start()); + output->lengths_.push_back(l); + } + + return Status::OK(); +} + Status TensorSlice::Parse(const string& str, TensorSlice* slice) { std::vector items = str_util::Split(str, ':', str_util::SkipEmpty()); slice->starts_.reserve(items.size()); ",1,train e8dc63704c88007ee4713076605c90188d66f3d2,tensorflow/tensorflow,"Add BuildTensorSlice for building from unvalidated TensorSliceProtos. This avoids several sources of crashes and undefined behavior when loading invalid checkpoints. PiperOrigin-RevId: 392785704 Change-Id: Icd9713c768b882f3b58b427eddac376060696833",tensor_slice.h,"@@ -48,6 +48,12 @@ class TensorSlice { explicit TensorSlice( std::initializer_list> extents); + // This factory methods should be used instead of the constructor that takes a + // `TensorSliceProto` if calling code cannot validate that the sizes specify a + // valid `TensorSlice`. + static Status BuildTensorSlice(const TensorSliceProto& proto, + TensorSlice* output); + static Status Parse(const string& str, TensorSlice* output); static TensorSlice ParseOrDie(const string& str) { TensorSlice ret; ",1,train e8dc63704c88007ee4713076605c90188d66f3d2,tensorflow/tensorflow,"Add BuildTensorSlice for building from unvalidated TensorSliceProtos. This avoids several sources of crashes and undefined behavior when loading invalid checkpoints. PiperOrigin-RevId: 392785704 Change-Id: Icd9713c768b882f3b58b427eddac376060696833",tensor_slice_test.cc,"@@ -15,6 +15,8 @@ limitations under the License. #include ""tensorflow/core/framework/tensor_slice.h"" +#include + #include ""tensorflow/core/lib/core/status_test_util.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/protobuf.h"" @@ -125,6 +127,48 @@ TEST(TensorSliceTest, Serialization) { } } +// Testing `BuildTensorSlice` with valid and invalid input protos. +TEST(TensorSliceTest, BuildTensorSlice) { + TensorSliceProto proto; + TensorSlice({{0, -1}, {0, 10}, {14, 1}}).AsProto(&proto); + TensorSlice s; + + // Successful building. + { + TF_ASSERT_OK(TensorSlice::BuildTensorSlice(proto, &s)); + EXPECT_EQ(""-:0,10:14,1"", s.DebugString()); + } + + // Failed building due to negative extent start. + { + TensorSliceProto invalid_proto = proto; + invalid_proto.mutable_extent(0)->set_start(-1); + EXPECT_FALSE(TensorSlice::BuildTensorSlice(invalid_proto, &s).ok()); + } + + // Failed building due to negative extent length. + { + TensorSliceProto invalid_proto = proto; + invalid_proto.mutable_extent(2)->set_length(-1); + EXPECT_FALSE(TensorSlice::BuildTensorSlice(invalid_proto, &s).ok()); + } + + // Failed building due to missing extent length. + { + TensorSliceProto invalid_proto = proto; + invalid_proto.mutable_extent(2)->clear_length(); + EXPECT_FALSE(TensorSlice::BuildTensorSlice(invalid_proto, &s).ok()); + } + + // Failed building due to extent end overflowing. + { + TensorSliceProto invalid_proto = proto; + invalid_proto.mutable_extent(2)->set_length( + std::numeric_limits::max()); + EXPECT_FALSE(TensorSlice::BuildTensorSlice(invalid_proto, &s).ok()); + } +} + // Testing the slice intersection TEST(TensorSliceTest, Intersection) { // ""EVERYTHING"" intersects with everything ",1,train e8dc63704c88007ee4713076605c90188d66f3d2,tensorflow/tensorflow,"Add BuildTensorSlice for building from unvalidated TensorSliceProtos. This avoids several sources of crashes and undefined behavior when loading invalid checkpoints. PiperOrigin-RevId: 392785704 Change-Id: Icd9713c768b882f3b58b427eddac376060696833",tensor_slice_reader.cc,"@@ -172,7 +172,9 @@ void TensorSliceReader::LoadShard(int shard) const { status_ = TensorShape::BuildTensorShapeBase(ssm.shape(), &ssm_shape); if (!status_.ok()) return; for (const TensorSliceProto& tsp : ssm.slice()) { - TensorSlice ss_slice(tsp); + TensorSlice ss_slice; + status_ = TensorSlice::BuildTensorSlice(tsp, &ss_slice); + if (!status_.ok()) return; status_ = RegisterTensorSlice(ssm.name(), ssm_shape, ssm.type(), fname, ss_slice, &tensors_); if (!status_.ok()) return; ",1,train e8dc63704c88007ee4713076605c90188d66f3d2,tensorflow/tensorflow,"Add BuildTensorSlice for building from unvalidated TensorSliceProtos. This avoids several sources of crashes and undefined behavior when loading invalid checkpoints. PiperOrigin-RevId: 392785704 Change-Id: Icd9713c768b882f3b58b427eddac376060696833",tensor_slice_reader_test.cc,"@@ -436,6 +436,29 @@ TEST(TensorSliceReaderTest, NegativeTensorShapeDimension) { EXPECT_FALSE(reader.status().ok()); } +TEST(TensorSliceReaderTest, InvalidTensorSlice) { + const string fname = + io::JoinPath(testing::TmpDir(), ""invalid_slice_checkpoint""); + TensorSliceWriter writer(fname, CreateTableTensorSliceBuilder); + const int32 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + TF_CHECK_OK(writer.Add(""test"", TensorShape({4, 5}), + TensorSlice::ParseOrDie(""0,2:-""), data)); + TF_CHECK_OK(writer.Finish()); + + MutateSavedTensorSlices(fname, [](SavedTensorSlices sts) { + if (sts.has_meta()) { + for (auto& tensor : *sts.mutable_meta()->mutable_tensor()) { + tensor.mutable_slice(0)->mutable_extent(0)->set_length(-10); + } + } + return sts.SerializeAsString(); + }); + + TensorSliceReader reader(fname, OpenTableTensorSliceReader); + // The negative exent length should cause loading to fail. + EXPECT_FALSE(reader.status().ok()); +} + void CachedTensorSliceReaderTesterHelper( const TensorSliceWriter::CreateBuilderFunction& create_function, const TensorSliceReader::OpenTableFunction& open_function) { ",1,train 7731e8dfbe4a56773be5dc94d631611211156659,tensorflow/tensorflow,"Don't constant-fold DT_RESOURCE constants. PiperOrigin-RevId: 391803952 Change-Id: I0ea3ec31d3e7dfda0f03b4027a237f08d00a3091",constant_folding.cc,"@@ -30,6 +30,7 @@ limitations under the License. #include ""tensorflow/core/framework/log_memory.h"" #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/types.h"" +#include ""tensorflow/core/framework/types.pb.h"" #include ""tensorflow/core/graph/algorithm.h"" #include ""tensorflow/core/graph/node_builder.h"" #include ""tensorflow/core/graph/subgraph.h"" @@ -223,7 +224,8 @@ bool IsConstantFoldable( std::unordered_map>* shape_replacement_map) { if (n->IsConstant()) { - return true; + // Skip constant folding resources as they cannot be deep copied. + return n->output_type(0) != DT_RESOURCE; } if (MaybeReplaceShapeOp(n, shape_map, shape_replacement_map)) { return true; ",1,train 7cf73a2274732c9d82af51c2bc2cf90d13cd7e6d,tensorflow/tensorflow,"Address QuantizeAndDequantizeV* heap oob. Added additional checks for the 'axis' attribute. PiperOrigin-RevId: 402446942 Change-Id: Id2f6b82e4e740d0550329be02621c46466b5a5b9",array_ops.cc,"@@ -2863,7 +2863,10 @@ REGISTER_OP(""QuantizeAndDequantizeV2"") ShapeHandle minmax; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), minmax_rank, &minmax)); TF_RETURN_IF_ERROR(c->Merge(c->input(2), minmax, &minmax)); - if (axis != -1) { + if (axis < -1) { + return errors::InvalidArgument(""axis should be at least -1, got "", + axis); + } else if (axis != -1) { ShapeHandle input; TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input)); DimensionHandle depth; @@ -2895,7 +2898,10 @@ REGISTER_OP(""QuantizeAndDequantizeV4"") ShapeHandle minmax; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), minmax_rank, &minmax)); TF_RETURN_IF_ERROR(c->Merge(c->input(2), minmax, &minmax)); - if (axis != -1) { + if (axis < -1) { + return errors::InvalidArgument(""axis should be at least -1, got "", + axis); + } else if (axis != -1) { ShapeHandle input; TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input)); DimensionHandle depth; @@ -2923,7 +2929,10 @@ REGISTER_OP(""QuantizeAndDequantizeV4Grad"") ShapeHandle minmax; TF_RETURN_IF_ERROR(c->WithRank(c->input(2), minmax_rank, &minmax)); TF_RETURN_IF_ERROR(c->Merge(c->input(3), minmax, &minmax)); - if (axis != -1) { + if (axis < -1) { + return errors::InvalidArgument(""axis should be at least -1, got "", + axis); + } else if (axis != -1) { ShapeHandle input; TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input)); DimensionHandle depth; @@ -2956,7 +2965,10 @@ REGISTER_OP(""QuantizeAndDequantizeV3"") ShapeHandle minmax; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), minmax_rank, &minmax)); TF_RETURN_IF_ERROR(c->Merge(c->input(2), minmax, &minmax)); - if (axis != -1) { + if (axis < -1) { + return errors::InvalidArgument(""axis should be at least -1, got "", + axis); + } else if (axis != -1) { ShapeHandle input; TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input)); DimensionHandle depth; ",1,train 7cf73a2274732c9d82af51c2bc2cf90d13cd7e6d,tensorflow/tensorflow,"Address QuantizeAndDequantizeV* heap oob. Added additional checks for the 'axis' attribute. PiperOrigin-RevId: 402446942 Change-Id: Id2f6b82e4e740d0550329be02621c46466b5a5b9",array_ops_test.cc,"@@ -1374,6 +1374,8 @@ TEST(ArrayOpsTest, QuantizeAndDequantizeV2_ShapeFn) { INFER_ERROR(""Shapes must be equal rank, but are 1 and 0"", op, ""[1,2,?,4,5];[];[1]""); INFER_ERROR(""Shape must be rank 0 but is rank 1"", op, ""[1,2,?,4,5];[1];[1]""); + (*op.node_def.mutable_attr())[""axis""].set_i(-2); + INFER_ERROR(""axis should be at least -1, got -2"", op, ""?;?;?""); } TEST(ArrayOpsTest, SpaceToBatch_ShapeFn) { ",1,train 4d74d8a00b07441cba090a02e0dd9ed385145bf4,tensorflow/tensorflow,"Fix crash in softmax-xent when some input dimensions are 1. Before, tf.nn.softmax_cross_entropy_with_logits would fail a CHECK if one input tensor had shape (1, 1) and the other did not. In particular, the call to ToIndexArray<2> here https://github.com/tensorflow/tensorflow/blob/1f3da84a89702d3b4f234ee83762d738caffe098/tensorflow/core/kernels/xent_op.cc#L99 would fail, since the call assumed the array had two dimensions. If both dimensions were 1, BCast would merge the two dimensions into a single dimension. Passing fewer_dims_optimization=false stops this optimization PiperOrigin-RevId: 384844496 Change-Id: Ifb02dc74964132c3ed3f3bc98b0858dbe4e258b7",xent_op.cc,"@@ -46,7 +46,8 @@ class SoftmaxXentWithLogitsOp : public OpKernel { TensorShape shape_in = logits_in.shape(); BCast bcast(BCast::FromShape(logits_in.shape()), - BCast::FromShape(labels_in.shape())); + BCast::FromShape(labels_in.shape()), + /*fewer_dims_optimization=*/false); if (!logits_in.IsSameSize(labels_in)) { OP_REQUIRES(context, bcast.IsValid(), errors::InvalidArgument( @@ -88,20 +89,12 @@ class SoftmaxXentWithLogitsOp : public OpKernel { {0}, 1, shape_in, &back_out)); if (shape_in.dim_size(0) > 0) { functor::XentFunctor functor; - if (logits_in.IsSameSize(labels_in)) { - functor(context->eigen_device(), shape_in.AsEigenDSizes<2>(), - Eigen::array{1, 1}, - Eigen::array{1, 1}, logits_in.matrix(), - labels_in.matrix(), scratch.matrix(), loss_out->vec(), - back_out->matrix()); - } else { - functor(context->eigen_device(), shape_in.AsEigenDSizes<2>(), - BCast::ToIndexArray<2>(bcast.x_bcast()), - BCast::ToIndexArray<2>(bcast.y_bcast()), - logits_in.template shaped(bcast.x_reshape()), - labels_in.template shaped(bcast.y_reshape()), - scratch.matrix(), loss_out->vec(), back_out->matrix()); - } + functor(context->eigen_device(), shape_in.AsEigenDSizes<2>(), + BCast::ToIndexArray<2>(bcast.x_bcast()), + BCast::ToIndexArray<2>(bcast.y_bcast()), + logits_in.template shaped(bcast.x_reshape()), + labels_in.template shaped(bcast.y_reshape()), + scratch.matrix(), loss_out->vec(), back_out->matrix()); } } }; ",1,train 4d74d8a00b07441cba090a02e0dd9ed385145bf4,tensorflow/tensorflow,"Fix crash in softmax-xent when some input dimensions are 1. Before, tf.nn.softmax_cross_entropy_with_logits would fail a CHECK if one input tensor had shape (1, 1) and the other did not. In particular, the call to ToIndexArray<2> here https://github.com/tensorflow/tensorflow/blob/1f3da84a89702d3b4f234ee83762d738caffe098/tensorflow/core/kernels/xent_op.cc#L99 would fail, since the call assumed the array had two dimensions. If both dimensions were 1, BCast would merge the two dimensions into a single dimension. Passing fewer_dims_optimization=false stops this optimization PiperOrigin-RevId: 384844496 Change-Id: Ifb02dc74964132c3ed3f3bc98b0858dbe4e258b7",xent_op_test.py,"@@ -63,6 +63,13 @@ class XentOpTest(xent_op_test_base.XentOpTestBase): self.assertAllCloseAccordingToType(np_loss, tf_loss) self.assertAllCloseAccordingToType(np_gradient, tf_gradient) + tf_f = constant_op.constant(np.array([[1.]]).astype(np.float32)) + tf_l = constant_op.constant(np.array([[1.], [1.]]).astype(np.float32)) + tf_loss, tf_gradient = gen_nn_ops.softmax_cross_entropy_with_logits( + tf_f, tf_l) + self.assertAllClose([0, 0], tf_loss) + self.assertAllCloseAccordingToType([[0], [0]], tf_gradient) + @test_util.run_deprecated_v1 def testNotMatrix(self): with self.cached_session(): ",1,train 4d74d8a00b07441cba090a02e0dd9ed385145bf4,tensorflow/tensorflow,"Fix crash in softmax-xent when some input dimensions are 1. Before, tf.nn.softmax_cross_entropy_with_logits would fail a CHECK if one input tensor had shape (1, 1) and the other did not. In particular, the call to ToIndexArray<2> here https://github.com/tensorflow/tensorflow/blob/1f3da84a89702d3b4f234ee83762d738caffe098/tensorflow/core/kernels/xent_op.cc#L99 would fail, since the call assumed the array had two dimensions. If both dimensions were 1, BCast would merge the two dimensions into a single dimension. Passing fewer_dims_optimization=false stops this optimization PiperOrigin-RevId: 384844496 Change-Id: Ifb02dc74964132c3ed3f3bc98b0858dbe4e258b7",xent_op_test_base.py,"@@ -151,6 +151,9 @@ class XentOpTestBase(test.TestCase): labels = np.array([[0., 0., 0., 1.]]).astype(np.float16) logits = np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float16) self._testXent2D(labels, logits, with_placeholders=True) + labels = np.array([[1.]]).astype(np.float16) + logits = np.array([[1.], [2.]]).astype(np.float16) + self._testXent2D(labels, logits, with_placeholders=True) labels = np.array([[0.], [2.], [0.25]]).astype(np.float16) logits = np.array([[1., 1., 1., 1.], [1., 2., 3., 4.], [1., 2., 3., 4.]]).astype(np.float16) ",1,train 4dddb2fd0b01cdd196101afbba6518658a2c9e07,tensorflow/tensorflow,"Fix segfault in pools on empty shapes when certain dimension were very large. Pooling ops multiply certain components of the input shape, e.g. by multiplying input.shape[1] * input.shape[2] * input.shape[3]. This multiplication could overflow an int64 value if shape[0] was 0 but shape[1], shape[2], and shape[3] were very large, e.g. by passing an input with shape (0, 2**25, 2**25, 2**25). PiperOrigin-RevId: 404644978 Change-Id: Ic79f89c970357ca2962b1f231449066db9403146",pooling_ops_common.h,"@@ -189,6 +189,9 @@ class MaxPoolingOp : public OpKernel { void SpatialMaxPool(OpKernelContext* context, Tensor* output, const Tensor& tensor_in, const PoolParameters& params, const Padding& padding) { + if (output->NumElements() == 0) { + return; + } // On GPU, use Eigen's Spatial Max Pooling. On CPU, use an // EigenMatrix version that is currently faster than Eigen's // Spatial MaxPooling implementation. @@ -443,6 +446,9 @@ class MaxPoolingV2Op : public OpKernel { void SpatialMaxPool(OpKernelContext* context, Tensor* output, const Tensor& tensor_in, const PoolParameters& params, const Padding& padding) { + if (output->NumElements() == 0) { + return; + } // On GPU, use Eigen's Spatial Max Pooling. On CPU, use an // EigenMatrix version that is currently faster than Eigen's // Spatial MaxPooling implementation. @@ -561,6 +567,9 @@ template void SpatialAvgPool(OpKernelContext* context, Tensor* output, const Tensor& input, const PoolParameters& params, const Padding& padding) { + if (output->NumElements() == 0) { + return; + } typedef Eigen::Map> ConstEigenMatrixMap; typedef Eigen::Map> ",1,train 579261dcd446385831fe4f7457d802a59685121d,tensorflow/tensorflow,"Fix crash in MatrixSolve when inputs have different batch dimensions. Before, the process would crash or certain elements would be silently ignored. Now an InvalidArgument is raised. PiperOrigin-RevId: 384844020 Change-Id: Iba44417e383bdd0e1abc4012bfca83b2377dd335",matrix_solve_op.cc,"@@ -143,15 +143,22 @@ class MatrixSolveOpGpu : public AsyncOpKernel { done); OP_REQUIRES_ASYNC( context, input.dim_size(ndims - 2) == n, - errors::InvalidArgument(""Input matrices must be squares, got"", + errors::InvalidArgument(""Input matrices must be squares, got "", input.dim_size(ndims - 2), "" != "", n), done); OP_REQUIRES_ASYNC(context, rhs.dim_size(ndims - 2) == n, errors::InvalidArgument( ""Input matrix and right-hand side must have the "" - ""same number of rows, got"", + ""same number of rows, got "", n, "" != "", rhs.dim_size(ndims - 2)), done); + for (int dim = 0; dim < ndims - 2; dim++) { + OP_REQUIRES_ASYNC( + context, input.dim_size(dim) == rhs.dim_size(dim), + errors::InvalidArgument( + ""All input tensors must have the same outer dimensions.""), + done); + } // Allocate output. Tensor* output; ",1,train 579261dcd446385831fe4f7457d802a59685121d,tensorflow/tensorflow,"Fix crash in MatrixSolve when inputs have different batch dimensions. Before, the process would crash or certain elements would be silently ignored. Now an InvalidArgument is raised. PiperOrigin-RevId: 384844020 Change-Id: Iba44417e383bdd0e1abc4012bfca83b2377dd335",matrix_solve_op_test.py,"@@ -112,6 +112,12 @@ class MatrixSolveOpTest(test.TestCase): with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)): self.evaluate(linalg_ops.matrix_solve(matrix, rhs)) + # The matrix and right-hand side should have the same batch dimensions + matrix = np.random.normal(size=(2, 6, 2, 2)) + rhs = np.random.normal(size=(2, 3, 2, 2)) + with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)): + self.evaluate(linalg_ops.matrix_solve(matrix, rhs)) + def testNotInvertible(self): # The input should be invertible. with self.assertRaisesOpError(""Input matrix is not invertible.""): ",1,train 68422b215e618df5ad375bcdc6d2052e9fd3080a,tensorflow/tensorflow,"Add shape checks to GPU TridiagonalMatMul. When given invalid shapes, the GPU TridiagonalMatMul op could read invalid or uninitialized GPU memory. PiperOrigin-RevId: 401775483 Change-Id: Ib5500aeb8225e50d4ce790b06d2c34751f544ad8",tridiagonal_matmul_op_gpu.cu.cc,"@@ -66,6 +66,12 @@ class TridiagonalMatMulOpGpu : public OpKernel { const Tensor& rhs = context->input(3); const int ndims = rhs.dims(); + OP_REQUIRES( + context, ndims >= 2, + errors::InvalidArgument(""Input must have rank >= 2, but got "", ndims)); + OP_REQUIRES_OK(context, ValidateInputTensor(superdiag, ""superdiag"", rhs)); + OP_REQUIRES_OK(context, ValidateInputTensor(maindiag, ""maindiag"", rhs)); + OP_REQUIRES_OK(context, ValidateInputTensor(subdiag, ""subdiag"", rhs)); int64 batch_size = 1; for (int i = 0; i < ndims - 2; i++) { batch_size *= rhs.dim_size(i); @@ -85,6 +91,39 @@ class TridiagonalMatMulOpGpu : public OpKernel { maindiag.flat().data(), subdiag.flat().data(), rhs.flat().data(), output->flat().data())); } + + private: + Status ValidateInputTensor(const Tensor& tensor, + const std::string& tensor_name, + const Tensor& rhs) { + const int ndims = rhs.dims(); + if (tensor.dims() != ndims) { + return errors::InvalidArgument(tensor_name, + "" must have same rank as rhs, but got "", + tensor.dims(), "" and "", ndims); + } + for (int i = 0; i < ndims - 2; i++) { + if (tensor.dim_size(i) != rhs.dim_size(i)) { + return errors::InvalidArgument( + tensor_name, + "" must have same outer dimensions as rhs, but for index "", i, + "", got "", tensor.dim_size(i), "" and "", rhs.dim_size(i)); + } + } + if (tensor.dim_size(ndims - 2) != 1) { + return errors::InvalidArgument( + tensor_name, ""'s second-to-last dimension must be 1, but got "", + tensor.dim_size(ndims - 2)); + } + if (tensor.dim_size(ndims - 1) != rhs.dim_size(ndims - 2)) { + return errors::InvalidArgument(tensor_name, + ""'s last dimension size must be rhs's "" + ""second-to-last dimension size, but got "", + tensor.dim_size(ndims - 1), "" and "", + rhs.dim_size(ndims - 2)); + } + return Status::OK(); + } }; REGISTER_LINALG_OP_GPU(""TridiagonalMatMul"", (TridiagonalMatMulOpGpu), ",1,train 68422b215e618df5ad375bcdc6d2052e9fd3080a,tensorflow/tensorflow,"Add shape checks to GPU TridiagonalMatMul. When given invalid shapes, the GPU TridiagonalMatMul op could read invalid or uninitialized GPU memory. PiperOrigin-RevId: 401775483 Change-Id: Ib5500aeb8225e50d4ce790b06d2c34751f544ad8",tridiagonal_matmul_op_test.py,"@@ -19,12 +19,15 @@ import itertools import numpy as np from tensorflow.python.client import session +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradient_checker_v2 +from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.ops.linalg import linalg_impl @@ -175,6 +178,37 @@ class TridiagonalMulOpTest(test.TestCase): rhs = self._randomComplexArray((b, m, n)) self._gradientTest(diags, rhs, dtype=dtypes.complex128) + def _testErrorWithShapesEager(self, exception_regex, superdiag_shape, + maindiag_shape, subdiag_shape, rhs_shape): + with context.eager_mode(): + superdiag = array_ops.ones(superdiag_shape) + maindiag = array_ops.ones(maindiag_shape) + subdiag = array_ops.ones(subdiag_shape) + rhs = array_ops.ones(rhs_shape) + with self.assertRaisesRegex(errors_impl.InvalidArgumentError, + exception_regex): + linalg_ops.tridiagonal_mat_mul(superdiag, maindiag, subdiag, rhs) + + def testInvalidShapesEagerGpu(self): + if not test.is_gpu_available(): + self.skipTest('Test requires GPU') + self._testErrorWithShapesEager('Input must have rank >= 2, but got ', + [2], [2], [2], [2]) + self._testErrorWithShapesEager( + 'superdiag must have same rank as rhs, but got 3 and 2', + [2, 1, 2], [2, 1], [2, 1], [2, 2]) + self._testErrorWithShapesEager( + 'maindiag must have same outer dimensions as rhs, but for index 0, got ' + '3 and 2', + [2, 1, 2], [3, 1, 2], [2, 1, 2], [2, 2, 2]) + self._testErrorWithShapesEager( + ""subdiag's second-to-last dimension must be 1, but got 3"", + [2, 1, 2], [2, 1, 2], [2, 3, 2], [2, 2, 2]) + self._testErrorWithShapesEager( + ""subdiag's last dimension size must be rhs's second-to-last dimension "" + ""size, but got 3 and 2"", + [2, 1, 2], [2, 1, 2], [2, 1, 3], [2, 2, 2]) + # Benchmark class TridiagonalMatMulBenchmark(test.Benchmark): sizes = [(100000, 1, 1), (1000000, 1, 1), (10000000, 1, 1), (100000, 10, 1), ",1,train da4aad5946be30e5f049920fa076e1f7ef021261,tensorflow/tensorflow,"Roll forward https://github.com/tensorflow/tensorflow/commit/ab0ca4bbc66a476aea305f81c69e0201b5876d0a. The internal test that it broke has been fixed. PiperOrigin-RevId: 401913101 Change-Id: I67f095899187e38101fbb10289c5e444b0a9e8c0",maxpooling_op.cc,"@@ -325,6 +325,14 @@ class MaxPoolingGradOp : public OpKernel { if (!context->status().ok()) { return; } + OP_REQUIRES(context, tensor_out.shape() == params.forward_output_shape(), + errors::InvalidArgument(""Expected orig_output shape to be "", + params.forward_output_shape(), + "", but got "", tensor_out.shape())); + OP_REQUIRES(context, out_backprop.shape() == params.forward_output_shape(), + errors::InvalidArgument(""Expected grad shape to be "", + params.forward_output_shape(), + "", but got "", out_backprop.shape())); Tensor* output = nullptr; OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( @@ -538,6 +546,18 @@ class MaxPoolingGradGradOp : public OpKernel { /*explicit_paddings=*/{}, FORMAT_NHWC, tensor_in.shape()}; + if (!context->status().ok()) { + return; + } + OP_REQUIRES(context, tensor_out.shape() == params.forward_output_shape(), + errors::InvalidArgument(""Expected orig_output shape to be "", + params.forward_output_shape(), + "", but got "", tensor_out.shape())); + OP_REQUIRES( + context, out_grad_backprop.shape() == tensor_in.shape(), + errors::InvalidArgument(""Expected grad shape to be "", tensor_in.shape(), + "", but got "", out_grad_backprop.shape())); + Tensor* output = nullptr; OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( {2}, 0, tensor_out.shape(), &output)); @@ -742,6 +762,17 @@ class MaxPoolingGradGradOp : public OpKernel { /*explicit_paddings=*/{}, data_format_, tensor_in.shape()}; + if (!context->status().ok()) { + return; + } + OP_REQUIRES(context, tensor_out.shape() == params.forward_output_shape(), + errors::InvalidArgument(""Expected orig_output shape to be "", + params.forward_output_shape(), + "", but got "", tensor_out.shape())); + OP_REQUIRES( + context, out_grad_backprop.shape() == tensor_in.shape(), + errors::InvalidArgument(""Expected grad shape to be "", tensor_in.shape(), + "", but got "", out_grad_backprop.shape())); functor::MaxPoolGradBackwardNoMask()( data_format_, tensor_in.flat().data(), tensor_out.flat().data(), @@ -1096,6 +1127,14 @@ class MaxPoolingGradWithArgmaxOp : public OpKernel { if (!context->status().ok()) { return; } + OP_REQUIRES(context, grad_in.shape() == params.forward_output_shape(), + errors::InvalidArgument(""Expected grad shape to be "", + params.forward_output_shape(), + "", but got "", grad_in.shape())); + OP_REQUIRES(context, argmax.shape() == params.forward_output_shape(), + errors::InvalidArgument(""Expected argmax shape to be "", + params.forward_output_shape(), + "", but got "", argmax.shape())); TensorShape out_shape({params.tensor_in_batch, params.tensor_in_rows, params.tensor_in_cols, params.depth}); @@ -1156,6 +1195,14 @@ class MaxPoolingGradGradWithArgmaxOp : public OpKernel { if (!context->status().ok()) { return; } + OP_REQUIRES( + context, grad_in.shape() == tensor_in.shape(), + errors::InvalidArgument(""Expected grad shape to be "", tensor_in.shape(), + "", but got "", grad_in.shape())); + OP_REQUIRES(context, argmax.shape() == params.forward_output_shape(), + errors::InvalidArgument(""Expected argmax shape to be "", + params.forward_output_shape(), + "", but got "", argmax.shape())); TensorShape out_shape({params.tensor_in_batch, params.out_height, params.out_width, params.depth}); ",1,train da4aad5946be30e5f049920fa076e1f7ef021261,tensorflow/tensorflow,"Roll forward https://github.com/tensorflow/tensorflow/commit/ab0ca4bbc66a476aea305f81c69e0201b5876d0a. The internal test that it broke has been fixed. PiperOrigin-RevId: 401913101 Change-Id: I67f095899187e38101fbb10289c5e444b0a9e8c0",pooling_ops_3d.cc,"@@ -366,6 +366,19 @@ class MaxPooling3dGradOp : public OpKernel { OP_REQUIRES_OK(context, Get3dOutputSize(input_size, window, stride, padding_, &out, &padding)); + + const int64_t depth = GetTensorDim(tensor_in, data_format_, 'C'); + const int64_t in_batch = GetTensorDim(tensor_in, data_format_, 'N'); + TensorShape out_shape = ShapeFromFormat(data_format_, in_batch, + {{out[2], out[1], out[0]}}, depth); + OP_REQUIRES( + context, tensor_out.shape() == out_shape, + errors::InvalidArgument(""Expected orig_output shape to be "", out_shape, + "", but got "", tensor_out.shape())); + OP_REQUIRES(context, out_backprop.shape() == out_shape, + errors::InvalidArgument(""Expected grad shape to be "", out_shape, + "", but got "", out_backprop.shape())); + LaunchMaxPooling3dGradOp::launch( context, tensor_in, tensor_out, out_backprop, window, stride, out, padding, data_format_, input_backprop); @@ -712,6 +725,14 @@ class MaxPooling3dGradGradOp : public OpKernel { Pool3dParameters params{context, ksize_, stride_, padding_, data_format_, tensor_in.shape()}; if (!context->status().ok()) return; // params is invalid + OP_REQUIRES(context, tensor_out.shape() == params.forward_output_shape(), + errors::InvalidArgument(""Expected orig_output shape to be "", + params.forward_output_shape(), + "", but got "", tensor_out.shape())); + OP_REQUIRES( + context, out_grad_backprop.shape() == tensor_in.shape(), + errors::InvalidArgument(""Expected grad shape to be "", tensor_in.shape(), + "", but got "", out_grad_backprop.shape())); Tensor* output = nullptr; OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( ",1,train da4aad5946be30e5f049920fa076e1f7ef021261,tensorflow/tensorflow,"Roll forward https://github.com/tensorflow/tensorflow/commit/ab0ca4bbc66a476aea305f81c69e0201b5876d0a. The internal test that it broke has been fixed. PiperOrigin-RevId: 401913101 Change-Id: I67f095899187e38101fbb10289c5e444b0a9e8c0",pooling_ops_common.cc,"@@ -465,6 +465,16 @@ void DnnPoolingGradOp::Compute( if (!context->status().ok()) { return; } + if (tensor_out) { + OP_REQUIRES(context, tensor_out->shape() == params.forward_output_shape(), + errors::InvalidArgument(""Expected orig_output shape to be "", + params.forward_output_shape(), + "", but got "", tensor_out->shape())); + } + OP_REQUIRES(context, out_backprop.shape() == params.forward_output_shape(), + errors::InvalidArgument(""Expected grad shape to be "", + params.forward_output_shape(), + "", but got "", out_backprop.shape())); TensorFormat transformed_input_data_format = data_format; ",1,train da4aad5946be30e5f049920fa076e1f7ef021261,tensorflow/tensorflow,"Roll forward https://github.com/tensorflow/tensorflow/commit/ab0ca4bbc66a476aea305f81c69e0201b5876d0a. The internal test that it broke has been fixed. PiperOrigin-RevId: 401913101 Change-Id: I67f095899187e38101fbb10289c5e444b0a9e8c0",pooling_ops_common.h,"@@ -83,11 +83,6 @@ struct PoolParameters { TensorFormat data_format; }; -// Checks if the sizes of the paddings are less than the size of window. -// This is required for MaxPool because it pads with -inf, so the pooling -// window cannot fully cover the padded area. -Status CheckPaddingSize(PoolParameters& params); - // An implementation of MaxPooling (forward). // TODO (yongtang): Remove MaxPoolingOp and use MaxPoolingV2Op, // QuantizedMaxPoolingOp depends on MaxPoolingOp so keep intact for now ",1,train da4aad5946be30e5f049920fa076e1f7ef021261,tensorflow/tensorflow,"Roll forward https://github.com/tensorflow/tensorflow/commit/ab0ca4bbc66a476aea305f81c69e0201b5876d0a. The internal test that it broke has been fixed. PiperOrigin-RevId: 401913101 Change-Id: I67f095899187e38101fbb10289c5e444b0a9e8c0",pooling_ops_3d_test.py,"@@ -16,9 +16,13 @@ import numpy as np +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import errors +from tensorflow.python.framework import errors_impl from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import nn_ops @@ -515,6 +519,44 @@ class PoolingTest(test.TestCase): pool_3d = f(input_tensor, ksize=[2, 2, 0], strides=1, padding=""VALID"") self.evaluate(pool_3d) + def testMaxPoolGradEagerShapeErrors(self): + with context.eager_mode(): + orig_in = array_ops.ones((1, 1, 1, 1, 1)) + + # Test invalid orig_out shape + orig_out = array_ops.ones((1, 1, 1, 1, 2)) + grad = array_ops.ones((1, 1, 1, 1, 1)) + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + r""Expected orig_output shape to be \[1,1,1,1,1\], but got "" + r""\[1,1,1,1,2\]""): + gen_nn_ops.max_pool3d_grad( + orig_in, orig_out, grad, ksize=[1, 1, 1, 1, 1], + strides=[1, 1, 1, 1, 1], padding=""VALID"") + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + r""Expected orig_output shape to be \[1,1,1,1,1\], but got "" + r""\[1,1,1,1,2\]""): + gen_nn_ops.max_pool3d_grad_grad( + orig_in, orig_out, grad, ksize=[1, 1, 1, 1, 1], + strides=[1, 1, 1, 1, 1], padding=""VALID"") + + # Test invalid grad shape + orig_out = array_ops.ones((1, 1, 1, 1, 1)) + grad = array_ops.ones((1, 1, 1, 1, 2)) + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + r""Expected grad shape to be \[1,1,1,1,1\], but got \[1,1,1,1,2\]""): + gen_nn_ops.max_pool3d_grad( + orig_in, orig_out, grad, ksize=[1, 1, 1, 1, 1], + strides=[1, 1, 1, 1, 1], padding=""VALID"") + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + r""Expected grad shape to be \[1,1,1,1,1\], but got \[1,1,1,1,2\]""): + gen_nn_ops.max_pool3d_grad_grad( + orig_in, orig_out, grad, ksize=[1, 1, 1, 1, 1], + strides=[1, 1, 1, 1, 1], padding=""VALID"") + if __name__ == ""__main__"": test.main() ",1,train da4aad5946be30e5f049920fa076e1f7ef021261,tensorflow/tensorflow,"Roll forward https://github.com/tensorflow/tensorflow/commit/ab0ca4bbc66a476aea305f81c69e0201b5876d0a. The internal test that it broke has been fixed. PiperOrigin-RevId: 401913101 Change-Id: I67f095899187e38101fbb10289c5e444b0a9e8c0",pooling_ops_test.py,"@@ -618,6 +618,7 @@ class PoolingTest(test.TestCase, parameterized.TestCase): @parameterized.parameters( GetTestConfigsDicts(nn_ops.max_pool, nn_ops.max_pool_v2)) + @test_util.xla_allow_fallback(""XLA doesn't support explicit padding"") @test_util.run_deprecated_v1 def testMaxPoolNegativeInputExpPaddingAdv(self, **kwargs): expected_output = [-1, -1, -3, -5, -7, -7, -9, -11, -19, -19, -21, -23, -31, @@ -2390,6 +2391,82 @@ class PoolingTest(test.TestCase, parameterized.TestCase): explicit_paddings=[1, 1, 1, 1, 1, 1, 0, 0], data_format=""NHWC"")) + def testMaxPoolGradEagerShapeErrors(self): + with context.eager_mode(): + orig_in = array_ops.ones((1, 1, 1, 1)) + + # Test invalid orig_out shape + orig_out = array_ops.ones((1, 1, 1, 2)) + grad = array_ops.ones((1, 1, 1, 1)) + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + r""Expected orig_output shape to be \[1,1,1,1\], but got \[1,1,1,2\]""): + gen_nn_ops.max_pool_grad( + orig_in, orig_out, grad, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1], + padding=""VALID"") + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + r""Expected orig_output shape to be \[1,1,1,1\], but got \[1,1,1,2\]""): + gen_nn_ops.max_pool_grad_grad( + orig_in, orig_out, grad, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1], + padding=""VALID"") + + # Test invalid grad shape + orig_out = array_ops.ones((1, 1, 1, 1)) + grad = array_ops.ones((1, 1, 1, 2)) + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + r""Expected grad shape to be \[1,1,1,1\], but got \[1,1,1,2\]""): + gen_nn_ops.max_pool_grad( + orig_in, orig_out, grad, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1], + padding=""VALID"") + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + r""Expected grad shape to be \[1,1,1,1\], but got \[1,1,1,2\]""): + gen_nn_ops.max_pool_grad_grad( + orig_in, orig_out, grad, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1], + padding=""VALID"") + + def testMaxPoolGradWithArgmaxEagerShapeErrors(self): + with context.eager_mode(): + inp = array_ops.ones((1, 1, 1, 1)) + + # Test invalid grad shape + grad = array_ops.ones((1, 1, 1, 2)) + argmax = array_ops.zeros((1, 1, 1, 1), dtype=dtypes.int64) + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + r""Expected grad shape to be \[1,1,1,1\], but got \[1,1,1,2\]""): + gen_nn_ops.max_pool_grad_with_argmax( + inp, grad, argmax, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1], + padding=""VALID"") + # max_pool_grad_grad_with_argmax is only implemented for GPUs + if test.is_gpu_available(): + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + r""Expected grad shape to be \[1,1,1,1\], but got \[1,1,1,2\]""): + gen_nn_ops.max_pool_grad_grad_with_argmax( + inp, grad, argmax, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1], + padding=""VALID"") + + # Test invalid argmax shape + grad = array_ops.ones((1, 1, 1, 1)) + argmax = array_ops.ones((1, 1, 1, 2), dtype=dtypes.int64) + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + r""Expected argmax shape to be \[1,1,1,1\], but got \[1,1,1,2\]""): + gen_nn_ops.max_pool_grad_with_argmax( + inp, grad, argmax, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1], + padding=""VALID"") + # max_pool_grad_grad_with_argmax is only implemented for GPUs + if test.is_gpu_available(): + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + r""Expected argmax shape to be \[1,1,1,1\], but got \[1,1,1,2\]""): + gen_nn_ops.max_pool_grad_grad_with_argmax( + inp, grad, argmax, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1], + padding=""VALID"") + def GetMaxPoolFwdTest(input_size, filter_size, strides, padding): ",1,train e7f497570abb6b4ae5af4970620cd880e4c0c904,tensorflow/tensorflow,"Fix segfault on OOM in Conv2D. PiperOrigin-RevId: 404655317 Change-Id: I33588dbd3f5d0fef980e3c908bf5515a9ee09ce7",conv_ops.cc,"@@ -183,12 +183,18 @@ struct LaunchGrouped { auto on_shuffled = [&]() { shuffles_completed.DecrementCount(); }; // Shuffle input into temporary tensor. - Tensor input_shuffled(input.dtype(), TensorShape(post_shuffle(input))); + Tensor input_shuffled; + OP_REQUIRES_OK( + ctx, ctx->allocate_temp(input.dtype(), TensorShape(post_shuffle(input)), + &input_shuffled)); input_shuffled.tensor().device(device, on_shuffled) = input.shaped(pre_shuffle(input)).shuffle(shuffle); // Shuffle filter into temporary tensor. - Tensor filter_shuffled(filter.dtype(), TensorShape(post_shuffle(filter))); + Tensor filter_shuffled; + OP_REQUIRES_OK(ctx, ctx->allocate_temp(filter.dtype(), + TensorShape(post_shuffle(filter)), + &filter_shuffled)); filter_shuffled.tensor().device(device, on_shuffled) = filter.shaped(pre_shuffle(filter)).shuffle(shuffle); @@ -196,7 +202,10 @@ struct LaunchGrouped { shuffles_completed.Wait(); // Write group convolution results into temporary output tensor. - Tensor output_shuffled(output->dtype(), TensorShape(post_shuffle(*output))); + Tensor output_shuffled; + OP_REQUIRES_OK(ctx, ctx->allocate_temp(output->dtype(), + TensorShape(post_shuffle(*output)), + &output_shuffled)); for (int64_t i = 0; i < num_groups; ++i) { // TODO(ezhulenev): Run this loop using `parallelFor` (regular parallelFor ",1,train f2c3931113eaafe9ef558faaddd48e00a6606235,tensorflow/tensorflow,"Adding more validation checks to _ParallelConcatUpdate to avoid NPE. PiperOrigin-RevId: 402569467 Change-Id: I2db122dab68be2a5e4e8dd3375f5a70c4d2307ec",inplace_ops.cc,"@@ -71,6 +71,15 @@ class ParallelConcatUpdate : public OpKernel { void Compute(OpKernelContext* ctx) override { auto value = ctx->input(0); + // Value should be at least rank 1. Also the 0th dimension should be + // at least loc_. + OP_REQUIRES(ctx, value.dims() >= 1, + errors::InvalidArgument(""value should be at least rank 1."")); + OP_REQUIRES( + ctx, value.dim_size(0) > loc_, + errors::InvalidArgument(""0th dimension of value = "", value.dim_size(0), + "" is less than loc_="", loc_)); + auto update = ctx->input(1); OP_REQUIRES( ",1,train f2c3931113eaafe9ef558faaddd48e00a6606235,tensorflow/tensorflow,"Adding more validation checks to _ParallelConcatUpdate to avoid NPE. PiperOrigin-RevId: 402569467 Change-Id: I2db122dab68be2a5e4e8dd3375f5a70c4d2307ec",stack_op_test.py,"@@ -16,12 +16,16 @@ import numpy as np +from tensorflow.python import tf2 from tensorflow.python.eager import context +from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import gradient_checker_v2 from tensorflow.python.platform import test @@ -69,6 +73,19 @@ class StackOpTest(test.TestCase): c = array_ops.parallel_stack(xs) self.assertAllEqual(c, data) + def testParallelConcatShapeZero(self): + if not tf2.enabled(): + self.skipTest(""only fails in TF2"") + + @def_function.function + def f(): + y = gen_array_ops.parallel_concat(values=[[""tf""]], shape=0) + return y + + with self.assertRaisesRegex(errors.InvalidArgumentError, + r""0th dimension of value .* is less than""): + f() + def testSimpleParallelGPU(self): # tf.parallel_stack is only supported in graph mode. with ops.Graph().as_default(): ",1,train 5c8c9a8bfe750f9743d0c859bae112060b216f5c,tensorflow/tensorflow,"Fixing security fixes in boosted trees ops PiperOrigin-RevId: 405669548 Change-Id: Iae224d240d1779bcc02405c2fff99785644fbd0d",stats_ops.cc,"@@ -72,7 +72,10 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel { &stats_summary_list)); const int64_t num_buckets = stats_summary_list[0].dim_size(1); // Check for single logit: 1 gradient + 1 hessian value. - DCHECK_EQ(stats_summary_list[0].dim_size(2), 2); + OP_REQUIRES(context, stats_summary_list[0].dim_size(2) == 2, + errors::InvalidArgument(""stats_summary_list[0] must have "" + ""exactly 2 dimensions, obtained: "", + stats_summary_list[0].dim_size(2))); std::vector::ConstTensor> stats_summary; stats_summary.reserve(stats_summary_list.size()); for (const auto& tensor : stats_summary_list) { @@ -275,8 +278,13 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel { const int32_t num_buckets = stats_summary_t->dim_size(2) - 1; const int32_t logits_dim = logits_dim_; const int32_t hessian_dim = stats_summary_t->dim_size(3) - logits_dim; - DCHECK_GT(hessian_dim, 0); - DCHECK_LE(hessian_dim, logits_dim * logits_dim); + OP_REQUIRES(context, hessian_dim > 0, + errors::InvalidArgument(""hessian dim should be < 0, got "", + hessian_dim)); + OP_REQUIRES(context, hessian_dim <= logits_dim * logits_dim, + errors::InvalidArgument( + ""hessian dim should be <= "", logits_dim * logits_dim, + "" but got: "", hessian_dim)); const Tensor* l1_t; OP_REQUIRES_OK(context, context->input(""l1"", &l1_t)); @@ -624,8 +632,13 @@ class BoostedTreesCalculateBestFeatureSplitV2 : public OpKernel { const int32_t logits_dim = logits_dim_; const int32_t hessian_dim = stats_summaries_list[0].dim_size(3) - logits_dim; - DCHECK_GT(hessian_dim, 0); - DCHECK_LE(hessian_dim, logits_dim * logits_dim); + OP_REQUIRES(context, hessian_dim > 0, + errors::InvalidArgument(""hessian dim should be < 0, got "", + hessian_dim)); + OP_REQUIRES(context, hessian_dim <= logits_dim * logits_dim, + errors::InvalidArgument( + ""hessian dim should be <= "", logits_dim * logits_dim, + "" but got: "", hessian_dim)); // Vector of stats_summaries; each element is stats for feature of shape // [max_splits, feature_dim, num_buckets, logits_dim + hessian_dim]. @@ -1002,6 +1015,10 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel { const Tensor* node_id_range_t; OP_REQUIRES_OK(context, context->input(""node_id_range"", &node_id_range_t)); const auto node_id_range = node_id_range_t->vec(); + OP_REQUIRES( + context, node_id_range.size() == 2, + errors::InvalidArgument(""node_id_range should have 2 entries, got: "", + node_id_range.size())); const int32_t node_id_first = node_id_range(0); // inclusive const int32_t node_id_last = node_id_range(1); // exclusive @@ -1075,6 +1092,11 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel { ""dims, the last value in stats_summary_shape, which was "", stats_dims, "". At index ("", idx, "", 4), stats_summary_indices contains value "", stat_dim)); + OP_REQUIRES(context, stat_dim >= 0, + errors::InvalidArgument( + ""Stat dim, the sum of logits dim and hessian dim in "" + ""stats_summary_indices, should be >= 0, which was "", + stat_dim, "" at index "", idx)); std::pair const& f_insert_result = f_map.insert( FeatureMapIterator::value_type(feature_dim, BucketMap())); auto& b_map = f_insert_result.first->second; @@ -1307,6 +1329,12 @@ class BoostedTreesMakeStatsSummaryOp : public OpKernel { const Tensor* gradients_t; OP_REQUIRES_OK(context, context->input(""gradients"", &gradients_t)); const auto gradients = gradients_t->matrix(); + OP_REQUIRES( + context, node_ids.size() == gradients.dimension(0), + errors::InvalidArgument( + ""node_ids size should match 0th dim of gradients. node ids "" + ""size: "", + node_ids.size(), "", gradients dim0: "", gradients.dimension(0))); // hessians const Tensor* hessians_t; OP_REQUIRES_OK(context, context->input(""hessians"", &hessians_t)); @@ -1376,6 +1404,13 @@ class BoostedTreesAggregateStatsOp : public OpKernel { OP_REQUIRES_OK(context, context->input(""gradients"", &gradients_t)); const auto gradients = gradients_t->matrix(); + OP_REQUIRES( + context, node_ids.size() == gradients.dimension(0), + errors::InvalidArgument( + ""node_ids size should match 0th dim of gradients. node ids "" + ""size: "", + node_ids.size(), "", gradients dim0: "", gradients.dimension(0))); + // hessians. const Tensor* hessians_t; OP_REQUIRES_OK(context, context->input(""hessians"", &hessians_t)); @@ -1406,6 +1441,9 @@ class BoostedTreesAggregateStatsOp : public OpKernel { for (int i = 0; i < batch_size; ++i) { const int32_t node = node_ids(i); + OP_REQUIRES(context, node >= 0, + errors::InvalidArgument( + ""node_ids "", i, ""th entry should be >=0, got: "", node)); for (int feature_dim = 0; feature_dim < feature_dims; ++feature_dim) { const int32_t feature_value = feature(i, feature_dim); const int32_t bucket = @@ -1612,7 +1650,12 @@ class BoostedTreesSparseAggregateStatsOp : public OpKernel { const int64_t stats_dims = logits_dims + hessians_dims; const int64_t num_sparse_entries = feature_indices_t->dim_size(0); const int32_t feature_dims = feature_shape(1); - DCHECK_LE(num_sparse_entries, batch_size * feature_dims); + OP_REQUIRES(context, num_sparse_entries <= batch_size * feature_dims, + errors::InvalidArgument( + ""feature_indices dim0 should be <= gradients dim0 * "" + ""feature_shape[1]. features_indices dim0: "", + num_sparse_entries, "" gradients dim0: "", batch_size, + "", feature_shape[1]: "", feature_dims)); // Aggregate statistics info to map. StatsPartitionMap stats_map; ",1,train 5c8c9a8bfe750f9743d0c859bae112060b216f5c,tensorflow/tensorflow,"Fixing security fixes in boosted trees ops PiperOrigin-RevId: 405669548 Change-Id: Iae224d240d1779bcc02405c2fff99785644fbd0d",stats_ops_test.py,"@@ -17,9 +17,11 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import boosted_trees_ops +from tensorflow.python.ops import gen_boosted_trees_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.platform import googletest @@ -1665,6 +1667,199 @@ class StatsOpsTest(test_util.TensorFlowTestCase): """"""Tests numeric precision."""""" self._verify_precision(length=50000000) + def testBoostedTreesCalculateBestGainsPerFeatureSecurity(self): + node_id_range = [1, 2] + stats_summary_list = [[[[]]]] + l1 = [1.0] + l2 = [1.0] + tree_complexity = [1.0] + min_node_weight = [1.17] + max_splits = 1 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_calculate_best_gains_per_feature( + node_id_range=node_id_range, + stats_summary_list=stats_summary_list, + l1=l1, + l2=l2, + tree_complexity=tree_complexity, + min_node_weight=min_node_weight, + max_splits=max_splits) + + def testBoostedTreesCalculateBestFeatureSplitSecurity(self): + node_id_range = [1, 2] + stats_summary = [[[[]]]] + split_type = 'equality' + l1 = [1.0] + l2 = [1.0] + tree_complexity = [1.0] + min_node_weight = [1.17] + logits_dimension = 5 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split( + node_id_range=node_id_range, + stats_summary=stats_summary, + l1=l1, + l2=l2, + tree_complexity=tree_complexity, + min_node_weight=min_node_weight, + logits_dimension=logits_dimension, + split_type=split_type) + + def testBoostedTreesCalculateBestFeatureSplitSecurity2(self): + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split( + node_id_range=[0, 8], + stats_summary=[[[[1.0], [2.0], [3.0]]]], + l1=[0.5], + l2=[0.5], + tree_complexity=[0.1], + min_node_weight=[1.0], + logits_dimension=8) + + def testBoostedTreesCalculateBestFeatureSplitV2Security(self): + node_id_range = [1, 2] + stats_summaries_list = [[[[[]]]]] + split_types = ['inequality'] + candidate_feature_ids = [1, 2, 3, 4] + l1 = [1.0] + l2 = [1.0] + tree_complexity = [1.0] + min_node_weight = [1.17] + logits_dimension = 5 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split_v2( + node_id_range=node_id_range, + stats_summaries_list=stats_summaries_list, + split_types=split_types, + candidate_feature_ids=candidate_feature_ids, + l1=l1, + l2=l2, + tree_complexity=tree_complexity, + min_node_weight=min_node_weight, + logits_dimension=logits_dimension) + + def testBoostedTreesSparseCalculateBestFeatureSplitSecurity(self): + node_id_range = [] + stats_summary_indices = [[]] + stats_summary_values = [1.0] + stats_summary_shape = [1, 1, 1, 1] + l1 = [1.0] + l2 = [1.0] + tree_complexity = [0.5] + min_node_weight = [1.0] + logits_dimension = 3 + split_type = 'inequality' + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_sparse_calculate_best_feature_split( + node_id_range=node_id_range, + stats_summary_indices=stats_summary_indices, + stats_summary_values=stats_summary_values, + stats_summary_shape=stats_summary_shape, + l1=l1, + l2=l2, + tree_complexity=tree_complexity, + min_node_weight=min_node_weight, + logits_dimension=logits_dimension, + split_type=split_type) + + def testBoostedTreesSparseCalculateBestFeatureSplitSecurity2(self): + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_sparse_calculate_best_feature_split( + node_id_range=[0, 1], + stats_summary_indices=[[0, -1, -1, -1], [1, 0, -1, 0], [1, 0, 0, -1]], + stats_summary_values=[0.1, 0.2, 0.3], + stats_summary_shape=[1, 1, 1, 1], + l1=[0.5], + l2=[0.5], + tree_complexity=[0.1], + min_node_weight=[1.0], + logits_dimension=1) + + def testBoostedTreesMakeStatsSummarySecurity(self): + node_ids = [1, 2] + gradients = [[]] + hessians = [[0.2], [0.1]] + bucketized_features_list = [[1], [2]] + max_splits = 3 + num_buckets = 3 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_make_stats_summary( + node_ids=node_ids, + gradients=gradients, + hessians=hessians, + bucketized_features_list=bucketized_features_list, + max_splits=max_splits, + num_buckets=num_buckets) + + def testBoostedTreesMakeStatsSummarySecurity2(self): + node_ids = [1, 2, 3] + gradients = [[0.1], [0.2]] + hessians = [[0.2], [0.1]] + bucketized_features_list = [[1], [2]] + max_splits = 3 + num_buckets = 3 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_make_stats_summary( + node_ids=node_ids, + gradients=gradients, + hessians=hessians, + bucketized_features_list=bucketized_features_list, + max_splits=max_splits, + num_buckets=num_buckets) + + def testBoostedTreesAggregateStatsSecurity(self): + node_ids = [1, 2] + gradients = [[]] + hessians = [[100.0]] + feature = [[0, 0, 0]] + max_splits = 100 + num_buckets = 100 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_aggregate_stats( + node_ids=node_ids, + gradients=gradients, + hessians=hessians, + feature=feature, + max_splits=max_splits, + num_buckets=num_buckets) + + def testBoostedTreesAggregateStatsSecurity2(self): + node_ids = [-10] + gradients = [[0.0, 0.0]] + hessians = [[100.0]] + feature = [[0, 0, 0]] + max_splits = 100 + num_buckets = 100 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + self.evaluate( + gen_boosted_trees_ops.boosted_trees_aggregate_stats( + node_ids=node_ids, + gradients=gradients, + hessians=hessians, + feature=feature, + max_splits=max_splits, + num_buckets=num_buckets)) + + def testBoostedTreesSparseAggregateStatsSecurity(self): + node_ids = [] + gradients = [[1.0]] + hessians = [[100.0]] + feature_indices = [[0, 0, 0]] + feature_values = [0, 0, 0] + feature_shape = [0, 0, 0] + max_splits = 100 + num_buckets = 100 + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + gen_boosted_trees_ops.boosted_trees_sparse_aggregate_stats( + node_ids=node_ids, + gradients=gradients, + hessians=hessians, + feature_indices=feature_indices, + feature_values=feature_values, + feature_shape=feature_shape, + max_splits=max_splits, + num_buckets=num_buckets) + class BestMultiDimFeatureSplitMultiClassV2Op(StatsOpsTest): """"""Tests multi-class/multi-regression for best splits using V2 op."""""" ",1,train 701cfaca222a82afbeeb17496bd718baa65a67d2,tensorflow/tensorflow,"Fix heap out of bounds error in tf.raw_ops.SparseCountSparseOutput shape inference when it is called with invalid inputs, and add a test for it. PiperOrigin-RevId: 405766415 Change-Id: I77d244ef35f351ef7b6f821efd959cac2c66db24",count_ops.cc,"@@ -41,6 +41,8 @@ Status DenseCountSparseOutputShapeFn(InferenceContext *c) { } Status SparseCountSparseOutputShapeFn(InferenceContext *c) { + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &unused)); auto rank = c->Dim(c->input(0), 1); auto nvals = c->UnknownDim(); c->set_output(0, c->Matrix(nvals, rank)); // out.indices ",1,train 701cfaca222a82afbeeb17496bd718baa65a67d2,tensorflow/tensorflow,"Fix heap out of bounds error in tf.raw_ops.SparseCountSparseOutput shape inference when it is called with invalid inputs, and add a test for it. PiperOrigin-RevId: 405766415 Change-Id: I77d244ef35f351ef7b6f821efd959cac2c66db24",bincount_ops_test.py,"@@ -831,6 +831,25 @@ class TestSparseCountFailureModes(test.TestCase): self.evaluate(bincount_ops.sparse_bincount(x, weights=weights, axis=-1)) +class RawOpsHeapOobTest(test.TestCase, parameterized.TestCase): + + @test_util.run_v1_only(""Test security error"") + def testSparseCountSparseOutputBadIndicesShapeTooSmall(self): + indices = [1] + values = [[1]] + weights = [] + dense_shape = [10] + with self.assertRaisesRegex(ValueError, + ""Shape must be rank 2 but is rank 1 for""): + self.evaluate( + gen_count_ops.SparseCountSparseOutput( + indices=indices, + values=values, + dense_shape=dense_shape, + weights=weights, + binary_output=True)) + + @test_util.run_all_in_graph_and_eager_modes @test_util.disable_tfrt class RawOpsTest(test.TestCase, parameterized.TestCase): ",1,train a0d64445116c43cf46a5666bd4eee28e7a82f244,tensorflow/tensorflow,"Prevent OOB access in QuantizeV2 shape inference PiperOrigin-RevId: 400309614 Change-Id: I31412c71b05b4f21b677f7fa715a61499cbee39d",common_shape_fns.cc,"@@ -2559,6 +2559,9 @@ Status QuantizeV2Shape(InferenceContext* c) { if (!s.ok() && s.code() != error::NOT_FOUND) { return s; } + if (axis < -1) { + return errors::InvalidArgument(""axis should be at least -1, got "", axis); + } const int minmax_rank = (axis == -1) ? 0 : 1; TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); ShapeHandle minmax; ",1,test fa6b7782fbb14aa08d767bc799c531f5e1fb3bb8,tensorflow/tensorflow,"Fix null pointer exception in shape inference function when tf.ragged.cross() is called with invalid inputs. PiperOrigin-RevId: 400045848 Change-Id: Ia65501583b85cf1ec14a252d83fbdd716817a516",ragged_array_ops.cc,"@@ -99,6 +99,13 @@ REGISTER_OP(""RaggedCross"") int dense_start = num_ragged * 2 + num_sparse * 3; for (int i = 0; i < dense_types.size(); ++i) { ShapeHandle dense_input = c->input(i + dense_start); + int32 rank = c->Rank(dense_input); + if (rank == InferenceContext::kUnknownRank) { + continue; + } else if (rank != 2) { + return errors::InvalidArgument( + ""tf.ragged.cross only supports inputs with rank=2""); + } int64_t batch_size = c->Value(c->Dim(dense_input, 0)); if (batch_size != InferenceContext::kUnknownDim) { ShapeHandle row_splits = c->Vector(batch_size + 1); ",1,train fa6b7782fbb14aa08d767bc799c531f5e1fb3bb8,tensorflow/tensorflow,"Fix null pointer exception in shape inference function when tf.ragged.cross() is called with invalid inputs. PiperOrigin-RevId: 400045848 Change-Id: Ia65501583b85cf1ec14a252d83fbdd716817a516",ragged_cross_op_test.py,"@@ -18,10 +18,12 @@ from absl.testing import parameterized import numpy as np +from tensorflow.python.eager import def_function from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import test_util from tensorflow.python.ops import sparse_ops from tensorflow.python.ops.ragged import ragged_array_ops @@ -358,6 +360,16 @@ class RaggedCrossOpTest(test_util.TensorFlowTestCase, parameterized.TestCase): dense_const([[2], [3]])], exception=(ValueError, errors.InvalidArgumentError), message='inputs must all have the same batch dimension size'), + dict( + testcase_name='3DDenseTensor', + inputs=[dense_const([[[1]]])], + exception=(ValueError, errors.InvalidArgumentError), + message='tf.ragged.cross only supports inputs with rank=2'), + dict( + testcase_name='0DDenseTensor', + inputs=[dense_const(1)], + exception=(ValueError, errors.InvalidArgumentError), + message='tf.ragged.cross only supports inputs with rank=2'), ]) def testStaticError(self, inputs, exception=ValueError, message=None): with self.assertRaisesRegex(exception, message): @@ -368,17 +380,36 @@ class RaggedCrossOpTest(test_util.TensorFlowTestCase, parameterized.TestCase): testcase_name='3DRaggedTensor', inputs=[ragged_const([[[1]]], ragged_rank=1)], message='tf.ragged.cross only supports inputs with rank=2'), + dict( + testcase_name='0DDenseTensor', + inputs=[dense_const(1)], + signature=[[tensor_spec.TensorSpec(None, dtypes.int32)]], + exception=(ValueError, errors.InvalidArgumentError), + message='tf.ragged.cross only supports inputs with rank=2'), + dict( + testcase_name='1DDenseTensor', + inputs=[dense_const([1])], + signature=[[tensor_spec.TensorSpec(None, dtypes.int32)]], + exception=(ValueError, errors.InvalidArgumentError), + message='tf.ragged.cross only supports inputs with rank=2'), dict( testcase_name='3DDenseTensor', inputs=[dense_const([[[1]]])], + signature=[[tensor_spec.TensorSpec(None, dtypes.int32)]], + exception=(ValueError, errors.InvalidArgumentError), message='tf.ragged.cross only supports inputs with rank=2'), ]) def testRuntimeError(self, inputs, exception=errors.InvalidArgumentError, - message=None): + message=None, + signature=None): + @def_function.function(input_signature=signature) + def fn(x): + return ragged_array_ops.cross(x) + with self.assertRaisesRegex(exception, message): - self.evaluate(ragged_array_ops.cross(inputs)) + self.evaluate(fn(inputs)) def _ragged_to_sparse(self, t): if ragged_tensor.is_ragged(t): ",1,train afac8158d43691661ad083f6dd9e56f327c1dcb7,tensorflow/tensorflow,"Fix the deadlock issue of recursive tf.function. Replace threading.Lock with threading.RLock to allow recursive tf.function. PiperOrigin-RevId: 401282729 Change-Id: I3d10416f2eb2c15e2055bb4f4afee3d62bd6c428",def_function.py,"@@ -572,7 +572,7 @@ class Function(core.GenericFunction): ValueError: if `input_signature` is not None and the `python_function`'s argspec has keyword arguments. """""" - self._lock = threading.Lock() + self._lock = threading.RLock() self._python_function = python_function self._function_spec = function_lib.FunctionSpec.from_function_and_signature( python_function, @@ -613,7 +613,7 @@ class Function(core.GenericFunction): def __setstate__(self, state): """"""Restore from pickled state."""""" self.__dict__ = state - self._lock = threading.Lock() + self._lock = threading.RLock() self._descriptor_cache = weakref.WeakKeyDictionary() self._key_for_call_stats = self._get_key_for_call_stats() ",1,train afac8158d43691661ad083f6dd9e56f327c1dcb7,tensorflow/tensorflow,"Fix the deadlock issue of recursive tf.function. Replace threading.Lock with threading.RLock to allow recursive tf.function. PiperOrigin-RevId: 401282729 Change-Id: I3d10416f2eb2c15e2055bb4f4afee3d62bd6c428",def_function_test.py,"@@ -25,6 +25,7 @@ from absl.testing import parameterized from six.moves import range from tensorflow.python.autograph.core import converter +from tensorflow.python.eager import backprop from tensorflow.python.eager import def_function from tensorflow.python.eager import lift_to_graph from tensorflow.python.framework import constant_op @@ -36,6 +37,7 @@ from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import test_util from tensorflow.python.module import module from tensorflow.python.ops import array_ops +from tensorflow.python.ops import cond_v2 from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops @@ -1261,6 +1263,117 @@ class DefFunctionTest(test.TestCase, parameterized.TestCase): self.assertAllEqual(obj2.testDouble.experimental_get_tracing_count(), 3) self.assertAllEqual(obj1.testDouble.experimental_get_tracing_count(), 2) + def test_recursive_tf_function(self): + + @def_function.function + def recursive_fn(n): + if n > 0: + return recursive_fn(n - 1) + return 1 + + self.assertEqual(recursive_fn(5).numpy(), 1) + + def test_recursive_tf_function_with_gradients(self): + + @def_function.function + def recursive_fn(n, x): + if n > 0: + return n * recursive_fn(n - 1, x) + else: + return x + + x = variables.Variable(1.0) + with backprop.GradientTape() as tape: + g = recursive_fn(5, x) + + dg_dx = tape.gradient(g, x) + self.assertEqual(dg_dx.numpy(), 120) + + def test_recursive_python_function(self): + + def recursive_py_fn(n): + if n > 0: + return recursive_py_fn(n - 1) + return 1 + + @def_function.function + def recursive_fn(n): + return recursive_py_fn(n) + + self.assertEqual(recursive_fn(5).numpy(), 1) + + def test_recursive_python_function_with_gradients(self): + + def recursive_py_fn(n, x): + if n > 0: + return n * recursive_py_fn(n - 1, x) + return x + + @def_function.function + def recursive_fn(n, x): + return recursive_py_fn(n, x) + + x = variables.Variable(1.0) + with backprop.GradientTape() as tape: + g = recursive_fn(5, x) + + dg_dx = tape.gradient(g, x) + self.assertEqual(dg_dx.numpy(), 120) + + def test_recursive_tf_function_call_each_other(self): + + @def_function.function + def recursive_fn1(n): + if n <= 1: + return 1 + return recursive_fn2(n - 1) + + @def_function.function + def recursive_fn2(n): + if n <= 1: + return 2 + return recursive_fn1(n - 1) + + self.assertEqual(recursive_fn1(5).numpy(), 1) + self.assertEqual(recursive_fn1(6).numpy(), 2) + self.assertEqual(recursive_fn2(5).numpy(), 2) + self.assertEqual(recursive_fn2(6).numpy(), 1) + + def test_recursive_tf_function_call_each_other_with_gradients(self): + + @def_function.function + def recursive_fn1(n, x): + if n <= 1: + return x + return n * recursive_fn2(n - 1, x) + + @def_function.function + def recursive_fn2(n, x): + if n <= 1: + return 2 * x + return n * recursive_fn1(n - 1, x) + + x = variables.Variable(1.0) + with backprop.GradientTape() as tape: + g1 = recursive_fn1(5, x) + + dg1_dx = tape.gradient(g1, x) + self.assertEqual(dg1_dx.numpy(), 120) + + with backprop.GradientTape() as tape: + g2 = recursive_fn2(5, x) + + dg2_dx = tape.gradient(g2, x) + self.assertEqual(dg2_dx.numpy(), 240) + + def test_recursive_tf_function_with_cond(self): + @def_function.function(autograph=False) + def recursive_fn(n): + return cond_v2.cond_v2(n > 0, recursive_fn(n - 1), 1) + + with self.assertRaises(RecursionError): + recursive_fn(constant_op.constant(5)) + if __name__ == '__main__': ops.enable_eager_execution() ",1,train afac8158d43691661ad083f6dd9e56f327c1dcb7,tensorflow/tensorflow,"Fix the deadlock issue of recursive tf.function. Replace threading.Lock with threading.RLock to allow recursive tf.function. PiperOrigin-RevId: 401282729 Change-Id: I3d10416f2eb2c15e2055bb4f4afee3d62bd6c428",function.py,"@@ -3037,7 +3037,7 @@ class Function(object): if self.input_signature is not None: self._hashable_input_signature = hash(self.flat_input_signature) - self._lock = threading.Lock() + self._lock = threading.RLock() # _descriptor_cache is a of instance of a class to an instance-specific # `Function`, used to make sure defun-decorated methods create different # functions for each instance. ",1,train d3738dd70f1c9ceb547258cbb82d853da8771850,tensorflow/tensorflow,"Ensuring that the input to DeserializeSparse is not a scalar. PiperOrigin-RevId: 400554784 Change-Id: Ib658701040d4f707f20b8706e251d5fff46b2671",sparse_ops.cc,"@@ -16,6 +16,7 @@ limitations under the License. #include ""tensorflow/core/framework/common_shape_fns.h"" #include ""tensorflow/core/framework/op.h"" #include ""tensorflow/core/framework/shape_inference.h"" +#include ""tensorflow/core/framework/types.pb.h"" #include ""tensorflow/core/platform/errors.h"" namespace tensorflow { @@ -159,6 +160,8 @@ REGISTER_OP(""DeserializeSparse"") .Attr(""Tserialized: {string, variant} = DT_STRING"") .SetShapeFn([](InferenceContext* c) { // serialized sparse is [?, ..., ?, 3] vector. + ShapeHandle unused_shape; + TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 1, &unused_shape)); DimensionHandle unused; TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), -1), 3, &unused)); c->set_output(0, c->Matrix(InferenceContext::kUnknownDim, ",1,test d3738dd70f1c9ceb547258cbb82d853da8771850,tensorflow/tensorflow,"Ensuring that the input to DeserializeSparse is not a scalar. PiperOrigin-RevId: 400554784 Change-Id: Ib658701040d4f707f20b8706e251d5fff46b2671",sparse_serialization_ops_test.py,"@@ -16,10 +16,12 @@ import numpy as np +from tensorflow.python.eager import def_function from tensorflow.python.framework import dtypes from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_resource_variable_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.platform import test @@ -460,6 +462,18 @@ class SerializeSparseTest(test.TestCase): self._testDeserializeFailsInvalidProtoHelper( sparse_ops.serialize_sparse, sparse_ops.deserialize_many_sparse) + def testDeserializeInvalidVariant(self): + mu = gen_resource_variable_ops.mutex_v2() + mu_lock = gen_resource_variable_ops.mutex_lock(mutex=mu) + + @def_function.function + def f(): + return sparse_ops.deserialize_sparse( + serialized_sparse=mu_lock, dtype=dtypes.int32) + + with self.assertRaisesRegex(ValueError, r""Shape must be at least rank 1""): + f() + if __name__ == ""__main__"": test.main() ",1,test c79ba87153ee343401dbe9d1954d7f79e521eb14,tensorflow/tensorflow,"Make Transpose's shape inference function validate that negative `perm` values are within the tensor's rank. PiperOrigin-RevId: 403252853 Change-Id: Ia6b31b45b237312668bb31c2c3b3c7bbce2d2610",array_ops.cc,"@@ -168,7 +168,7 @@ Status TransposeShapeFn(InferenceContext* c) { for (int32_t i = 0; i < rank; ++i) { int64_t in_idx = data[i]; - if (in_idx >= rank) { + if (in_idx >= rank || in_idx <= -rank) { return errors::InvalidArgument(""perm dim "", in_idx, "" is out of range of input rank "", rank); } ",1,train 05cbebd3c6bb8f517a158b0155debb8df79017ff,tensorflow/tensorflow,"Fix a NPE issue in invalid Exit op. Now it will report an error instead of crash. PiperOrigin-RevId: 404089902 Change-Id: Ia6ec55445ea70ad045a4d339d354959ad0618f2a",immutable_executor_state.cc,"@@ -316,6 +316,10 @@ Status ImmutableExecutorState::BuildControlFlowInfo(const Graph* g, } else if (IsExit(curr_node)) { // Exit to the parent frame. parent = parent_nodes[curr_id]; + if (!parent) { + return errors::InvalidArgument( + ""Invalid Exit op: Cannot find a corresponding Enter op.""); + } frame_name = cf_info->frame_names[parent->id()]; parent = parent_nodes[parent->id()]; } else { ",1,train a8ad3e5e79c75f36edb81e0ba3f3c0c5442aeddc,tensorflow/tensorflow,"Update TPU AllToAll op to avoid divide by 0. PiperOrigin-RevId: 400259638 Change-Id: Ic4cfe4fe7159da38caed8044ee005f898e42cd86",tpu_cross_replica_ops.cc,"@@ -32,6 +32,7 @@ REGISTER_OP(""AllToAll"") .Attr(""split_count: int"") .SetShapeFn([](InferenceContext* c) { ShapeHandle input = c->input(0); + ShapeHandle group_assignment = c->input(1); if (!c->RankKnown(input)) { c->set_output(0, c->UnknownShape()); return Status::OK(); @@ -42,6 +43,21 @@ REGISTER_OP(""AllToAll"") int split_dimension; int split_count; TF_RETURN_IF_ERROR(c->GetAttr(""split_count"", &split_count)); + if (split_count < 1) { + return errors::InvalidArgument(""split_count "", split_count, + "" must at least be one.""); + } + if (c->RankKnown(group_assignment) && c->Rank(group_assignment) != 2) { + return errors::InvalidArgument(""group_assignment must have rank 2.""); + } + DimensionHandle num_replicas_per_group = c->Dim(group_assignment, 1); + if (c->ValueKnown(num_replicas_per_group) && + (c->Value(num_replicas_per_group) != split_count)) { + return errors::InvalidArgument( + ""split_count "", split_count, + "" must equal the size of the second dimension of group_assignment "", + c->Value(num_replicas_per_group)); + } TF_RETURN_IF_ERROR(c->GetAttr(""concat_dimension"", &concat_dimension)); @@ -65,6 +81,12 @@ REGISTER_OP(""AllToAll"") dims[i] = c->MakeDim(c->Value(dims[i]) * split_count); } if (i == split_dimension) { + if (c->ValueKnown(dims[i]) && + (c->Value(dims[i]) % split_count != 0)) { + return errors::InvalidArgument( + ""input dimension "", c->Value(dims[i]), + "" not divisible by split_count "", split_count); + } dims[i] = c->MakeDim(c->Value(dims[i]) / split_count); } } ",1,train a8ad3e5e79c75f36edb81e0ba3f3c0c5442aeddc,tensorflow/tensorflow,"Update TPU AllToAll op to avoid divide by 0. PiperOrigin-RevId: 400259638 Change-Id: Ic4cfe4fe7159da38caed8044ee005f898e42cd86",tpu_test.py,"@@ -32,6 +32,7 @@ from tensorflow.python.platform import test from tensorflow.python.tpu import tpu from tensorflow.python.tpu import tpu_feed from tensorflow.python.tpu import training_loop +from tensorflow.python.tpu.ops import tpu_ops class TPUContextTest(test.TestCase): @@ -165,6 +166,51 @@ class TPUGraphPruneTest(test.TestCase): graph.get_operation_by_name(""import/y"").get_attr( tpu._TPU_REPLICATE_ATTR) + +class TPUOpsTest(test.TestCase): + + def test_all_to_all_zero_split_count(self): + with self.assertRaisesRegex( + ValueError, ""split_count 0 must at least be one""): + tpu_ops.all_to_all( + x=[0.0, 0.1652, 0.6543], + group_assignment=[1, -1], + concat_dimension=0, + split_dimension=0, + split_count=0) + + def test_all_to_all_group_assignment_wrong_shape(self): + with self.assertRaisesRegex( + ValueError, ""group_assignment must have rank 2""): + tpu_ops.all_to_all( + x=[0.0, 0.1652, 0.6543], + group_assignment=[1, -1], + concat_dimension=0, + split_dimension=0, + split_count=2) + + def test_all_to_all_split_count_not_equal_to_group_assignment_shape(self): + with self.assertRaisesRegex( + ValueError, ""split_count 1 must equal the size of the second dimension "" + ""of group_assignment 2""): + tpu_ops.all_to_all( + x=[0.0, 0.1652, 0.6543], + group_assignment=[[0, 1], [2, 3]], + concat_dimension=0, + split_dimension=0, + split_count=1) + + def test_all_to_all_split_count_not_divide_input_shape(self): + with self.assertRaisesRegex( + ValueError, ""input dimension 3 not divisible by split_count 2""): + tpu_ops.all_to_all( + x=[[0.0], [0.1652], [0.6543]], + group_assignment=[[0, 1], [2, 3]], + concat_dimension=1, + split_dimension=0, + split_count=2) + + def do_einsum(): a = array_ops.placeholder(dtype=dtypes.float32, name=""a"", shape=[2, 3, 4]) b = array_ops.placeholder(dtype=dtypes.float32, name=""b"", shape=[2, 4, 5]) ",1,train e6cf28c72ba2eb949ca950d834dd6d66bb01cfae,tensorflow/tensorflow,"Validate that matrix dimension sizes in SparseMatMul are positive. PiperOrigin-RevId: 401149683 Change-Id: Ib33eafc561a39c8741ece80b2edce6d4aae9a57d",sparse_matmul_op.cc,"@@ -32,6 +32,7 @@ limitations under the License. #include ""tensorflow/core/kernels/fill_functor.h"" #include ""tensorflow/core/lib/core/blocking_counter.h"" #include ""tensorflow/core/lib/core/threadpool.h"" +#include ""tensorflow/core/platform/errors.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/macros.h"" #include ""tensorflow/core/platform/mutex.h"" @@ -980,9 +981,18 @@ class SparseMatMulOp : public OpKernel { errors::InvalidArgument( ""Matrix size incompatible: a: "", a.shape().DebugString(), "", b: "", b.shape().DebugString())); + OP_REQUIRES(ctx, m >= 0 && n >= 0 && k >= 0, + errors::InvalidArgument( + ""Matrix dimensions cannot be negative: a: "", + a.shape().DebugString(), "", b: "", b.shape().DebugString())); Tensor* output = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({m, n}), &output)); + // Return early if at least one of the output dimension size is 0. + if (m == 0 || n == 0) { + return; + } + if (k == 0) { // If the inner dimension k in the matrix multiplication is zero, we fill // the output with zeros. ",1,train ca38dab9d3ee66c5de06f11af9a4b1200da5ef75,tensorflow/tensorflow,"Fix undefined behavior in CollectiveReduceV2 and others We should not call done after it's moved. PiperOrigin-RevId: 400838185 Change-Id: Ifc979740054b8f8c6f4d50acc89472fe60c4fdb1",collective_ops.cc,"@@ -494,15 +494,17 @@ class CollectiveOpV2Kernel : public AsyncOpKernel { const Tensor& group_size, const Tensor& group_key, const Tensor& instance_key) { if (group_size.dims() > 0) { - return errors::Internal(""Unexpected dimensions on input group_size, got "", - group_size.shape().DebugString()); + return errors::InvalidArgument( + ""Unexpected dimensions on input group_size, got "", + group_size.shape().DebugString()); } if (group_key.dims() > 0) { - return errors::Internal(""Unexpected dimensions on input group_key, got "", - group_key.shape().DebugString()); + return errors::InvalidArgument( + ""Unexpected dimensions on input group_key, got "", + group_key.shape().DebugString()); } if (instance_key.dims() > 0) { - return errors::Internal( + return errors::InvalidArgument( ""Unexpected dimensions on input instance_key, got "", instance_key.shape().DebugString()); } @@ -625,7 +627,7 @@ class CollectiveReduceV2OpKernel : public CollectiveOpV2Kernel { /*group_size*/ c->input(1), /*group_key*/ c->input(2), /*instance_key*/ c->input(3)), - done); + done_with_cleanup); col_params->instance.shape = c->input(0).shape(); col_params->merge_op = merge_op_.get(); col_params->final_op = final_op_.get(); @@ -855,14 +857,15 @@ class CollectiveInitializeCommunicatorOpKernel : public AsyncOpKernel { Status CheckInputs(Tensor group_size_t, Tensor group_key_t) { if (group_size_t.dims() > 0) { - return errors::Internal( + return errors::InvalidArgument( ""Unexpected dimensions on input group_size. "" ""It shoulbe a scalar, got tensor with shape "", group_size_t.shape().DebugString()); } if (group_key_t.dims() > 0) { - return errors::Internal(""Unexpected dimensions on input group_key, got "", - group_key_t.shape().DebugString()); + return errors::InvalidArgument( + ""Unexpected dimensions on input group_key, got "", + group_key_t.shape().DebugString()); } auto group_size = group_size_t.unaligned_flat()(0); @@ -1084,7 +1087,7 @@ class CollectiveReduceV3OpKernel : public CollectiveOpV3Kernel { }; core::RefCountPtr resource; OP_REQUIRES_OK_ASYNC(c, LookupResource(c, HandleFromInput(c, 1), &resource), - done); + done_with_cleanup); Tensor group_assignment = c->input(2); @@ -1134,7 +1137,7 @@ class CollectiveAllToAllV3OpKernel : public CollectiveOpV3Kernel { }; core::RefCountPtr resource; OP_REQUIRES_OK_ASYNC(c, LookupResource(c, HandleFromInput(c, 1), &resource), - done); + done_with_cleanup); Tensor group_assignment = c->input(2); ",1,train ca38dab9d3ee66c5de06f11af9a4b1200da5ef75,tensorflow/tensorflow,"Fix undefined behavior in CollectiveReduceV2 and others We should not call done after it's moved. PiperOrigin-RevId: 400838185 Change-Id: Ifc979740054b8f8c6f4d50acc89472fe60c4fdb1",collective_ops_test.py,"@@ -1182,6 +1182,69 @@ class InputPipelineTest(test.TestCase): self.assertAllEqual(self.evaluate(f()), [[3.], [3.]]) +@combinations.generate( + combinations.times( + combinations.combine(collective_op=[ + combinations.NamedObject('all_reduce_v2', + CollectiveOpsV2.all_reduce), + combinations.NamedObject('all_gather_v2', + CollectiveOpsV2.all_gather) + ]), device_combination)) +class InvalidInputTest(test.TestCase, parameterized.TestCase): + + def setUp(self): + _setup_context() + super().setUp() + + def testInvalidGroupKey(self, collective_op, device, communication): + dev0 = '/device:%s:0' % device + group_size = 2 + group_key = [100] + instance_key = 100 + in_tensor = constant_op.constant([1.]) + + with self.assertRaises(errors.InvalidArgumentError): + with ops.device(dev0): + collective_op( + in_tensor, + group_size, + group_key, + instance_key, + communication_hint=communication) + + def testInvalidGroupSize(self, collective_op, device, communication): + dev0 = '/device:%s:0' % device + group_size = -2 + group_key = 100 + instance_key = 100 + in_tensor = constant_op.constant([1.]) + + with self.assertRaises(errors.InvalidArgumentError): + with ops.device(dev0): + collective_op( + in_tensor, + group_size, + group_key, + instance_key, + communication_hint=communication) + + def testInvalidInstanceKey(self, collective_op, device, communication): + dev0 = '/device:%s:0' % device + group_size = 2 + group_key = 100 + instance_key = [100] + in_tensor = constant_op.constant([1.]) + + with self.assertRaises(errors.InvalidArgumentError): + with ops.device(dev0): + collective_op( + in_tensor, + group_size, + group_key, + instance_key, + communication_hint=communication) + + class CollectiveOpsV3Test(test.TestCase, parameterized.TestCase): def setUp(self): ",1,train af5fcebb37c8b5d71c237f4e59c6477015c78ce6,tensorflow/tensorflow,"Fix access to undefined memory during shape inference of Cudnn*. PiperOrigin-RevId: 400324259 Change-Id: Ie3b7859d19ae24ee9ac2adf413bdc1e851bbc604",cudnn_rnn_ops.cc,"@@ -81,11 +81,17 @@ REGISTER_OP(""CudnnRNN"") .Attr(""seed2: int = 0"") .Attr(""is_training: bool = true"") .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; auto input_shape = c->input(0); auto input_h_shape = c->input(1); + TF_RETURN_IF_ERROR(c->WithRank(input_shape, 3, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(input_h_shape, 3, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 1, &unused)); + auto seq_length = c->Dim(input_shape, 0); auto batch_size = c->Dim(input_shape, 1); auto num_units = c->Dim(input_h_shape, 2); + string direction; TF_RETURN_IF_ERROR(c->GetAttr(""direction"", &direction)); string rnn_mode; @@ -124,8 +130,13 @@ REGISTER_OP(""CudnnRNNV2"") .Attr(""seed2: int = 0"") .Attr(""is_training: bool = true"") .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; auto input_shape = c->input(0); auto input_h_shape = c->input(1); + TF_RETURN_IF_ERROR(c->WithRank(input_shape, 3, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(input_h_shape, 3, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 1, &unused)); + auto seq_length = c->Dim(input_shape, 0); auto batch_size = c->Dim(input_shape, 1); auto num_units = c->Dim(input_h_shape, 2); @@ -171,16 +182,26 @@ REGISTER_OP(""CudnnRNNV3"") .Attr(""is_training: bool = true"") .Attr(""time_major: bool = true"") .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; auto input_shape = c->input(0); auto input_h_shape = c->input(1); auto input_c_shape = c->input(2); + TF_RETURN_IF_ERROR(c->WithRank(input_shape, 3, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(input_h_shape, 3, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 1, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 1, &unused)); + auto max_seq_length = c->Dim(input_shape, 0); auto batch_size = c->Dim(input_shape, 1); auto num_units = c->Dim(input_h_shape, 2); + string direction; TF_RETURN_IF_ERROR(c->GetAttr(""direction"", &direction)); string rnn_mode; TF_RETURN_IF_ERROR(c->GetAttr(""rnn_mode"", &rnn_mode)); + if (rnn_mode == ""lstm"") { + TF_RETURN_IF_ERROR(c->WithRank(input_c_shape, 3, &unused)); + } int dir_count = (direction == ""bidirectional"") ? 2 : 1; DimensionHandle output_size; TF_RETURN_IF_ERROR(c->Multiply(num_units, dir_count, &output_size)); ",1,train af5fcebb37c8b5d71c237f4e59c6477015c78ce6,tensorflow/tensorflow,"Fix access to undefined memory during shape inference of Cudnn*. PiperOrigin-RevId: 400324259 Change-Id: Ie3b7859d19ae24ee9ac2adf413bdc1e851bbc604",cudnn_rnn_ops_test.cc,"@@ -68,6 +68,11 @@ TEST(CudnnRNNOpsTest, ForwardLstm_ShapeFn) { .Attr(""direction"", ""unidirectional"") .Finalize(&op.node_def)); INFER_OK(op, input_shapes_desc, output_shapes_desc); + INFER_ERROR(""Shape must be rank 3 "", op, ""[];[?,?,?];[?,?,?];[?]""); + INFER_ERROR(""Shape must be rank 3 "", op, ""[?,?,?];[];[?,?,?];[?]""); + // Disabled because the kernel does not check shape of input_c. + // INFER_ERROR(""Shape must be rank 3 "", op, ""[?,?,?];[?,?,?];[?];[?]""); + INFER_ERROR(""Shape must be rank 1 "", op, ""[?,?,?];[?,?,?];[?,?,?];[]""); } TEST(CudnnRNNOpsTest, ForwardV2Lstm_ShapeFn) { @@ -100,6 +105,11 @@ TEST(CudnnRNNOpsTest, ForwardV2Lstm_ShapeFn) { .Attr(""direction"", ""unidirectional"") .Finalize(&op.node_def)); INFER_OK(op, input_shapes_desc, output_shapes_desc); + INFER_ERROR(""Shape must be rank 3 "", op, ""[];[?,?,?];[?,?,?];[?]""); + INFER_ERROR(""Shape must be rank 3 "", op, ""[?,?,?];[];[?,?,?];[?]""); + // Disabled because the kernel does not check shape of input_c. + // INFER_ERROR(""Shape must be rank 3 "", op, ""[?,?,?];[?,?,?];[?];[?]""); + INFER_ERROR(""Shape must be rank 1 "", op, ""[?,?,?];[?,?,?];[?,?,?];[]""); } TEST(CudnnRNNOpsTest, ForwardV3Lstm_ShapeFn) { @@ -137,6 +147,52 @@ TEST(CudnnRNNOpsTest, ForwardV3Lstm_ShapeFn) { .Attr(""direction"", ""unidirectional"") .Finalize(&op.node_def)); INFER_OK(op, input_shapes_desc, output_shapes_desc); + INFER_ERROR(""Shape must be rank 3 "", op, ""[];[?,?,?];[?,?,?];[?];[?]""); + INFER_ERROR(""Shape must be rank 3 "", op, ""[?,?,?];[];[?,?,?];[?];[?]""); + INFER_ERROR(""Shape must be rank 3 "", op, ""[?,?,?];[?,?,?];[];[?];[?]""); + INFER_ERROR(""Shape must be rank 1 "", op, ""[?,?,?];[?,?,?];[?,?,?];[];[?]""); + INFER_ERROR(""Shape must be rank 1 "", op, ""[?,?,?];[?,?,?];[?,?,?];[?];[]""); +} + +TEST(CudnnRNNOpsTest, ForwardV3Gru) { + int max_seq_length = 2; + int batch_size = 3; + int num_units = 4; + int num_layers = 5; + int dir_count = 1; + std::vector input_shape = {max_seq_length, batch_size, num_units}; + std::vector input_h_shape = {num_layers * dir_count, batch_size, + num_units}; + std::vector input_c_shape = {num_layers * dir_count, batch_size, + num_units}; + std::vector output_shape = {max_seq_length, batch_size, + num_units * dir_count}; + std::vector seq_lengths_shape = {batch_size}; + auto shape_to_str = [](const std::vector& v) { + return strings::StrCat(""["", absl::StrJoin(v, "",""), ""]""); + }; + string input_shapes_desc = strings::StrCat( + shape_to_str(input_shape), "";"", shape_to_str(input_h_shape), "";"", + shape_to_str(input_c_shape), "";"", ""[?]"", "";"", + shape_to_str(seq_lengths_shape)); + string output_shapes_desc = ""[d0_0,d0_1,d1_2];in1;[];?;?""; + + ShapeInferenceTestOp op(""CudnnRNNV3""); + TF_ASSERT_OK(NodeDefBuilder(""test"", ""CudnnRNNV3"") + .Input({""input"", 0, DT_FLOAT}) + .Input({""input_h"", 0, DT_FLOAT}) + .Input({""input_c"", 0, DT_FLOAT}) + .Input({""params"", 0, DT_FLOAT}) + .Input({""sequence_lengths"", 0, DT_INT32}) + .Attr(""rnn_mode"", ""gru"") + .Attr(""input_mode"", ""auto_select"") + .Attr(""direction"", ""unidirectional"") + .Finalize(&op.node_def)); + INFER_OK(op, input_shapes_desc, output_shapes_desc); + INFER_ERROR(""Shape must be rank 3 "", op, ""[];[?,?,?];[];[?];[?]""); + INFER_ERROR(""Shape must be rank 3 "", op, ""[?,?,?];[];[];[?];[?]""); + INFER_ERROR(""Shape must be rank 1 "", op, ""[?,?,?];[?,?,?];[];[];[?]""); + INFER_ERROR(""Shape must be rank 1 "", op, ""[?,?,?];[?,?,?];[];[?];[]""); } } // end namespace tensorflow ",1,train 25d622ffc432acc736b14ca3904177579e733cc6,tensorflow/tensorflow,"A negative size in one of the split sizes allowed the computed size of another to exceed the total dimension, leading to a segfault and security vulnerability. Adding a check for negative sizes prevents this. PiperOrigin-RevId: 401035665 Change-Id: I79bbe329787dac82aa4bf60397a9129b716aedab",split_v_op.cc,"@@ -138,6 +138,13 @@ class SplitVOpBase : public OpKernel { (*split_sizes_vec)[neg_one_dim] = input_size_split_dim - determined_size; } + for (int i = 0; i < split_sizes_vec->size(); ++i) { + const Tlen& split_size = (*split_sizes_vec)[i]; + OP_REQUIRES(context, split_size >= Tlen(0), + errors::InvalidArgument(""Split size at index "", i, + "" must be >= 0. Got: "", split_size)); + } + // Special case 2: split along the 1st dimension. The requirements are that // either we are splitting the outer dimension of two or more such that // every outer subpart is aligned or that the split sizes mean that they are ",1,test 25d622ffc432acc736b14ca3904177579e733cc6,tensorflow/tensorflow,"A negative size in one of the split sizes allowed the computed size of another to exceed the total dimension, leading to a segfault and security vulnerability. Adding a check for negative sizes prevents this. PiperOrigin-RevId: 401035665 Change-Id: I79bbe329787dac82aa4bf60397a9129b716aedab",array_ops.cc,"@@ -681,6 +681,12 @@ REGISTER_OP(""SplitV"") if (data[i] == -1 && c->ValueKnown(split_dim_size)) { size = split_dim_size - total_size; } + // If we have a negative known size (either explicit, or computed + // via -1), then the split sizes are invalid. + if (size < -1 || (size == -1 && c->ValueKnown(split_dim_size))) { + return errors::InvalidArgument(""Split size at index "", i, + "" must be >= 0. Got: "", size); + } TF_RETURN_IF_ERROR( c->ReplaceDim(input, split_dim, c->MakeDim(size), &output_shape)); c->set_output(i, output_shape); ",1,test 25d622ffc432acc736b14ca3904177579e733cc6,tensorflow/tensorflow,"A negative size in one of the split sizes allowed the computed size of another to exceed the total dimension, leading to a segfault and security vulnerability. Adding a check for negative sizes prevents this. PiperOrigin-RevId: 401035665 Change-Id: I79bbe329787dac82aa4bf60397a9129b716aedab",split_op_test.py,"@@ -384,6 +384,24 @@ class SplitOpTest(test.TestCase): ""must have exactly one element""): sess.run(y, {x: np.array([], dtype=np.int32), splits: [4, 11, 15]}) + @test_util.run_in_graph_and_eager_modes + def testNegativeSizes(self): + x = constant_op.constant([1, 2, 3], dtypes.float32) + # A size of -1 signifies to determine size based on sum of other splits. + with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError), + ""Split size at index 1 must be >= 0. Got: -2""): + splits = [-1, -2] + self.evaluate(array_ops.split(x, splits, axis=0)) + + @test_util.run_in_graph_and_eager_modes + def testBadSplitSizes(self): + x = constant_op.constant([1, 2], dtypes.float32) + with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError), + ""Determined shape must either match input"" + ""|can't split axis""): + splits = [1, 2] + self.evaluate(array_ops.split(x, splits, axis=0)) + if __name__ == ""__main__"": test.main() ",1,test aab9998916c2ffbd8f0592059fad352622f89cda,tensorflow/tensorflow,"Add shape checks to FusedBatchNorm kernels. PiperOrigin-RevId: 399755576 Change-Id: If8049fde109cc33badb5509d174b9b95aee1ea5e",fused_batch_norm_op.cc,"@@ -1340,18 +1340,20 @@ class FusedBatchNormOpBase : public OpKernel { errors::InvalidArgument(""offset must have the same number of elements "" ""as the channels of x, got "", offset.NumElements(), "" and "", num_channels)); - if (estimated_mean.NumElements() != 0) { + if (!is_training_ || exponential_avg_factor_ != 1.) { + std::string prefix_msg = is_training_ ? ""When exponential_avg_factor != 1"" + : ""When is_training=false""; OP_REQUIRES(context, estimated_mean.NumElements() == num_channels, errors::InvalidArgument( - ""mean must be empty or have the same number of "" - ""elements as the channels of x, got "", + prefix_msg, + "", mean must have the same number "" + ""of elements as the channels of x, got "", estimated_mean.NumElements(), "" and "", num_channels)); - } - if (estimated_variance.NumElements() != 0) { OP_REQUIRES(context, estimated_variance.NumElements() == num_channels, errors::InvalidArgument( - ""variance must be empty or have the same number of "" - ""elements as the channels of x, got "", + prefix_msg, + "", variance must have the same "" + ""number of elements as the channels of x, got "", estimated_variance.NumElements(), "" and "", num_channels)); } @@ -1543,6 +1545,11 @@ class FusedBatchNormGradOpBase : public OpKernel { errors::InvalidArgument( ""saved variance must be 1-dimensional"", saved_maybe_inv_var_or_pop_var.shape().DebugString())); + OP_REQUIRES( + context, x.shape() == y_backprop.shape(), + errors::InvalidArgument( + ""x and y_backprop must have same shape, but x has shape "", + x.shape(), "" and y_backprop has shape "", y_backprop.shape())); if (use_activation) { OP_REQUIRES( context, x.dim_size(3) % 4 == 0, @@ -1569,6 +1576,23 @@ class FusedBatchNormGradOpBase : public OpKernel { errors::InvalidArgument(""Error during tensor copy."")); } + const auto num_channels = GetTensorDim(x, tensor_format_, 'C'); + OP_REQUIRES( + context, scale.NumElements() == num_channels, + errors::InvalidArgument(""scale must have the same number of elements "" + ""as the channels of x, got "", + scale.NumElements(), "" and "", num_channels)); + OP_REQUIRES( + context, saved_mean_or_pop_mean.NumElements() == num_channels, + errors::InvalidArgument(""reserve_space_1 must have the same number of "" + ""elements as the channels of x, got "", + scale.NumElements(), "" and "", num_channels)); + OP_REQUIRES( + context, saved_maybe_inv_var_or_pop_var.NumElements() == num_channels, + errors::InvalidArgument(""reserve_space_2 must have the same number of "" + ""elements as the channels of x, got "", + scale.NumElements(), "" and "", num_channels)); + Tensor* x_backprop = nullptr; auto alloc_shape = use_reshape ? dest_shape : x_shape; OP_REQUIRES_OK(context, ",1,train aab9998916c2ffbd8f0592059fad352622f89cda,tensorflow/tensorflow,"Add shape checks to FusedBatchNorm kernels. PiperOrigin-RevId: 399755576 Change-Id: If8049fde109cc33badb5509d174b9b95aee1ea5e",nn_fused_batchnorm_test.py,"@@ -16,10 +16,13 @@ import numpy as np +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors_impl from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops @@ -694,6 +697,126 @@ class BatchNormalizationTest(test.TestCase): y_ref = np.maximum(y_ref, 0.) self.assertAllClose(y_ref, y_val, atol=1e-3) + def testEagerShapeErrors(self): + with context.eager_mode(): + x = array_ops.ones((2, 2, 2, 2)) + scale = array_ops.ones((3,)) + offset = array_ops.ones((2,)) + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + 'scale must have the same number of elements'): + nn_impl.fused_batch_norm(x, scale, offset) + + x = array_ops.ones((2, 2, 2, 2)) + scale = array_ops.ones((2,)) + offset = array_ops.ones((3,)) + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + 'offset must have the same number of elements'): + nn_impl.fused_batch_norm(x, scale, offset) + + x = array_ops.ones((2, 2, 2, 2)) + scale = array_ops.ones((2,)) + offset = array_ops.ones((2,)) + mean = array_ops.ones((0,)) + variance = array_ops.ones((2,)) + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + 'When is_training=false, mean must have the same number of elements'): + nn_impl.fused_batch_norm( + x, scale, offset, mean=mean, variance=variance, is_training=False) + + x = array_ops.ones((2, 2, 2, 2)) + scale = array_ops.ones((2,)) + offset = array_ops.ones((2,)) + mean = array_ops.ones((2,)) + variance = array_ops.ones((0,)) + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + 'When is_training=false, variance must have the same number of ' + 'elements'): + nn_impl.fused_batch_norm( + x, scale, offset, mean=mean, variance=variance, is_training=False) + + x = array_ops.ones((2, 2, 2, 2)) + scale = array_ops.ones((2,)) + offset = array_ops.ones((2,)) + mean = array_ops.ones((0,)) + variance = array_ops.ones((2,)) + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + 'When exponential_avg_factor != 1, mean must have the same number of ' + 'elements'): + nn_impl.fused_batch_norm( + x, + scale, + offset, + mean=mean, + variance=variance, + exponential_avg_factor=0.5) + + x = array_ops.ones((2, 2, 2, 2)) + scale = array_ops.ones((2,)) + offset = array_ops.ones((2,)) + mean = array_ops.ones((2,)) + variance = array_ops.ones((0,)) + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + 'When exponential_avg_factor != 1, variance must have the same ' + 'number of elements'): + nn_impl.fused_batch_norm( + x, + scale, + offset, + mean=mean, + variance=variance, + exponential_avg_factor=0.5) + + def testEagerShapeGradErrors(self): + with context.eager_mode(): + y_backprop = array_ops.ones((2, 2, 2, 3)) + x = array_ops.ones((2, 2, 2, 2)) + scale = array_ops.ones((2,)) + reserve_space_1 = array_ops.ones((2,)) + reserve_space_2 = array_ops.ones((2,)) + with self.assertRaisesRegex(errors_impl.InvalidArgumentError, + 'x and y_backprop must have same shape,'): + gen_nn_ops.fused_batch_norm_grad_v2(y_backprop, x, scale, + reserve_space_1, reserve_space_2) + + y_backprop = array_ops.ones((2, 2, 2, 2)) + x = array_ops.ones((2, 2, 2, 2)) + scale = array_ops.ones((3,)) + reserve_space_1 = array_ops.ones((2,)) + reserve_space_2 = array_ops.ones((2,)) + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + 'scale must have the same number of elements'): + gen_nn_ops.fused_batch_norm_grad_v2(y_backprop, x, scale, + reserve_space_1, reserve_space_2) + + y_backprop = array_ops.ones((2, 2, 2, 2)) + x = array_ops.ones((2, 2, 2, 2)) + scale = array_ops.ones((2,)) + reserve_space_1 = array_ops.ones((3,)) + reserve_space_2 = array_ops.ones((2,)) + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + 'reserve_space_1 must have the same number of elements'): + gen_nn_ops.fused_batch_norm_grad_v2(y_backprop, x, scale, + reserve_space_1, reserve_space_2) + + y_backprop = array_ops.ones((2, 2, 2, 2)) + x = array_ops.ones((2, 2, 2, 2)) + scale = array_ops.ones((2,)) + reserve_space_1 = array_ops.ones((2,)) + reserve_space_2 = array_ops.ones((3,)) + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + 'reserve_space_2 must have the same number of elements'): + gen_nn_ops.fused_batch_norm_grad_v2(y_backprop, x, scale, + reserve_space_1, reserve_space_2) + if __name__ == '__main__': test.main() ",1,train 67bfd9feeecfb3c61d80f0e46d89c170fbee682b,tensorflow/tensorflow,"Make SparseFillEmptyRows validate that the length of `values` must be equal to the number of index tuples. PiperOrigin-RevId: 399969549 Change-Id: I3c2f2ca1c1d2cc88bb5951c6958b38c16e9436c8",sparse_fill_empty_rows_op.cc,"@@ -24,11 +24,13 @@ limitations under the License. #include #include ""tensorflow/core/framework/op_kernel.h"" +#include ""tensorflow/core/framework/op_requires.h"" #include ""tensorflow/core/framework/register_types.h"" #include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/tensor_util.h"" #include ""tensorflow/core/framework/types.h"" #include ""tensorflow/core/lib/gtl/inlined_vector.h"" +#include ""tensorflow/core/platform/errors.h"" #include ""tensorflow/core/util/sparse/sparse_tensor.h"" namespace tensorflow { @@ -222,6 +224,12 @@ void SparseFillEmptyRowsOpImpl(OpKernelContext* context, errors::InvalidArgument(""values must be a vector, saw: "", values_t.shape().DebugString()), done); + OP_REQUIRES_ASYNC( + context, indices_t.dim_size(0) == values_t.dim_size(0), + errors::InvalidArgument(""The length of `values` ("", values_t.dim_size(0), + "") must match the first dimension of `indices` ("", + indices_t.dim_size(0), "").""), + done); OP_REQUIRES_ASYNC( context, TensorShapeUtils::IsScalar(default_value_t.shape()), errors::InvalidArgument(""default_value must be a scalar, saw: "", ",1,train 68867bf01239d9e1048f98cbad185bf4761bedd3,tensorflow/tensorflow,"Prevent unitialized variable use in grappler. PiperOrigin-RevId: 399702928 Change-Id: Id7e75451fbff297692dfb687f60ea04b25c96b24",auto_parallel.cc,"@@ -152,7 +152,7 @@ Status AutoParallel::Initialize(const GrapplerItem& item) { TF_RETURN_IF_ERROR(ComputeTransitiveFanin(graph_, item.fetch, &train_nodes)); LOG(INFO) << ""Number of training nodes: "" << train_nodes.size(); - const NodeDef* dequeue_node; + const NodeDef* dequeue_node = nullptr; for (const auto& train_node : train_nodes) { if (IsDequeueOp(*train_node)) { dequeue_node = train_node; ",1,train 68867bf01239d9e1048f98cbad185bf4761bedd3,tensorflow/tensorflow,"Prevent unitialized variable use in grappler. PiperOrigin-RevId: 399702928 Change-Id: Id7e75451fbff297692dfb687f60ea04b25c96b24",auto_parallel_test.cc,"@@ -126,6 +126,30 @@ TEST_F(AutoParallelTest, SimpleParallel) { EXPECT_EQ(""^AutoParallel-Control-Fetch"", node_gradient.input(0)); } +TEST_F(AutoParallelTest, SimpleParallelNoDequeue) { + tensorflow::Scope s = tensorflow::Scope::DisabledShapeInferenceScope(); + Output constant_a = ops::Const(s.WithOpName(""constant_a""), 1.0f, {1}); + Output constant_c = ops::Const(s.WithOpName(""constant_c""), 1.0f, {1}); + Output constant_b = ops::Const(s.WithOpName(""constant_b""), 1, {1}); + Output var = ops::Variable(s.WithOpName(""var""), {1}, DT_FLOAT); + Output assign = ops::Assign(s.WithOpName(""assign""), {var}, {constant_a}); + Output add = ops::AddN(s.WithOpName(""add""), {constant_a, constant_c}); + Output learning_rate = ops::Const(s.WithOpName(""learning_rate""), 0.01f, {1}); + Output apply_gradient = ops::ApplyGradientDescent( + s.WithOpName(""apply_gradient""), {var}, {learning_rate}, {add}); + + GrapplerItem item; + item.init_ops.push_back(""assign""); + item.fetch.push_back(""apply_gradient""); + item.init_ops.push_back(""assign""); + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + AutoParallel parallel(2); + GraphDef output; + Status status = parallel.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); +} + } // namespace } // namespace grappler } // namespace tensorflow ",1,train f410212e373eb2aec4c9e60bf3702eba99a38aba,tensorflow/tensorflow,"Prevent out-of-bound accesses in SparseBincount. PiperOrigin-RevId: 399918616 Change-Id: I11d154f4444d3fde1f09c5c40628b8671791a30d",bincount_op.cc,"@@ -405,6 +405,16 @@ class SparseBincountOp : public OpKernel { for (int64_t i = 0; i < indices_mat.dimension(0); ++i) { const int64_t batch = indices_mat(i, 0); const Tidx bin = values(i); + OP_REQUIRES( + ctx, batch < out.dimension(0), + errors::InvalidArgument(""Index out of bound. `batch` ("", batch, + "") must be less than the dimension size ("", + out.dimension(0), "")."")); + OP_REQUIRES( + ctx, bin < out.dimension(1), + errors::InvalidArgument(""Index out ouf bound. `bin` ("", bin, + "") must be less then the dimension size ("", + out.dimension(1), "")."")); if (bin < size) { if (binary_output_) { out(batch, bin) = T(1); ",1,train 1cb6bb6c2a6019417c9adaf9e6843ba75ee2580b,tensorflow/tensorflow,"Add error checking to ImmutableConst OP that strings are not yet supported. PiperOrigin-RevId: 401065359 Change-Id: I9dd2bd2a2c36f22f4a05153daf6ebdc4613469d2",immutable_constant_op.cc,"@@ -100,6 +100,9 @@ void ImmutableConstantOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK(ctx, allocator->InitializeFromRegion(region_name_, ctx->env())); + OP_REQUIRES(ctx, dtype_ != DT_STRING, + errors::Unimplemented(""Sorry, DT_STRING is not currently "" + ""supported for ImmutableConstOp."")); ctx->set_output(0, Tensor(allocator.get(), dtype_, shape_)); OP_REQUIRES_OK(ctx, allocator->allocation_status()); // Allocator is owned by the tensor from this point. ",1,train 1cb6bb6c2a6019417c9adaf9e6843ba75ee2580b,tensorflow/tensorflow,"Add error checking to ImmutableConst OP that strings are not yet supported. PiperOrigin-RevId: 401065359 Change-Id: I9dd2bd2a2c36f22f4a05153daf6ebdc4613469d2",immutable_constant_op_test.cc,"@@ -146,7 +146,8 @@ TEST(ImmutableConstantOpTest, ExecutionError) { error::INTERNAL); } -Status CreateTempFile(Env* env, float value, uint64 size, string* filename) { +Status CreateTempFileFloat(Env* env, float value, uint64 size, + string* filename) { const string dir = testing::TmpDir(); *filename = io::JoinPath(dir, strings::StrCat(""file_"", value)); std::unique_ptr file; @@ -166,8 +167,8 @@ TEST(ImmutableConstantOpTest, FromFile) { auto root = Scope::NewRootScope().ExitOnError(); string two_file, three_file; - TF_ASSERT_OK(CreateTempFile(env, 2.0f, 1000, &two_file)); - TF_ASSERT_OK(CreateTempFile(env, 3.0f, 1000, &three_file)); + TF_ASSERT_OK(CreateTempFileFloat(env, 2.0f, 1000, &two_file)); + TF_ASSERT_OK(CreateTempFileFloat(env, 3.0f, 1000, &three_file)); auto node1 = ops::ImmutableConst(root, DT_FLOAT, kFileTensorShape, two_file); auto node2 = ops::ImmutableConst(root, DT_FLOAT, kFileTensorShape, three_file); @@ -190,5 +191,39 @@ TEST(ImmutableConstantOpTest, FromFile) { EXPECT_EQ(outputs.front().flat()(2), 2.0f * 3.0f); } +Status CreateTempFileBadString(Env* env, char value, uint64 size, + const string suffix, string* filename) { + const string dir = testing::TmpDir(); + *filename = io::JoinPath(dir, strings::StrCat(""file_"", suffix)); + std::unique_ptr file; + TF_RETURN_IF_ERROR(env->NewWritableFile(*filename, &file)); + TF_RETURN_IF_ERROR(file->Append(std::string(size, value))); + TF_RETURN_IF_ERROR(file->Close()); + return Status::OK(); +} + +TEST(ImmutableConstantOpTest, FromFileStringUnimplmented) { + const TensorShape kFileTensorShape({1}); + Env* env = Env::Default(); + auto root = Scope::NewRootScope().ExitOnError(); + + string bad_file; + TF_ASSERT_OK(CreateTempFileBadString(env, '\xe2', 128, ""bad_e2"", &bad_file)); + auto result = + ops::ImmutableConst(root, DT_STRING, kFileTensorShape, bad_file); + GraphDef graph_def; + TF_ASSERT_OK(root.ToGraphDef(&graph_def)); + SessionOptions session_options; + session_options.env = Env::Default(); + std::unique_ptr session(NewSession(session_options)); + ASSERT_TRUE(session != nullptr) << ""Failed to create session""; + TF_ASSERT_OK(session->Create(graph_def)) << ""Can't create test graph""; + std::vector outputs; + // Check that the run returned error. + EXPECT_EQ( + session->Run({}, {result.node()->name() + "":0""}, {}, &outputs).code(), + error::UNIMPLEMENTED); +} + } // namespace } // namespace tensorflow ",1,train 3712a2d3455e6ccb924daa5724a3652a86f6b585,tensorflow/tensorflow,"Fix macros for converting little endian to host for TF_TSRT_OFFSET GetSize Make the macro that converts little endian data do nothing on little endian hosts, and byte swap otherwise. This only affects getting the size of TStrings of type ""Offset"". Added a test for TStrings of type ""Offset"" that checks if type and size are consistent. PiperOrigin-RevId: 400789721 Change-Id: I1398bffd842ab1631614b212b7c3a2af88d99538",ctstring_internal.h,"@@ -63,9 +63,9 @@ static inline uint32_t TF_swap32(uint32_t host_int) { #endif #if TF_TSTRING_LITTLE_ENDIAN -#define TF_le32toh(x) TF_swap32(x) -#else // TF_TSTRING_LITTLE_ENDIAN #define TF_le32toh(x) x +#else // TF_TSTRING_LITTLE_ENDIAN +#define TF_le32toh(x) TF_swap32(x) #endif // TF_TSTRING_LITTLE_ENDIAN static inline size_t TF_align16(size_t i) { return (i + 0xF) & ~0xF; } ",1,train 3712a2d3455e6ccb924daa5724a3652a86f6b585,tensorflow/tensorflow,"Fix macros for converting little endian to host for TF_TSRT_OFFSET GetSize Make the macro that converts little endian data do nothing on little endian hosts, and byte swap otherwise. This only affects getting the size of TStrings of type ""Offset"". Added a test for TStrings of type ""Offset"" that checks if type and size are consistent. PiperOrigin-RevId: 400789721 Change-Id: I1398bffd842ab1631614b212b7c3a2af88d99538",ctstring_test.cc,"@@ -18,6 +18,7 @@ limitations under the License. #include #include +#include ""tensorflow/core/platform/ctstring_internal.h"" #include ""tensorflow/core/platform/test.h"" static const char kLongString[] = @@ -380,3 +381,29 @@ TEST(TF_CTStringTest, ResizeReserve) { TF_TString_Dealloc(&s70); } } + +TEST(TF_CTStringTest, OffsetType) { + { + TF_TString s71; + + TF_TString_Init(&s71); + size_t header_length = 24; + size_t size = 8; + TF_TString_ResizeUninitialized(&s71, header_length + size); + uint32_t save_size = s71.u.offset.size; + uint32_t save_offset = s71.u.offset.offset; + uint32_t save_count = s71.u.offset.count; + + s71.u.offset.size = TF_TString_ToInternalSizeT(size, TF_TSTR_OFFSET); + s71.u.offset.offset = header_length; + s71.u.offset.count = 0; + EXPECT_EQ(size, TF_TString_GetSize(&s71)); + EXPECT_EQ(TF_TSTR_OFFSET, TF_TString_GetType(&s71)); + + // restore state so string can be deallocated + s71.u.offset.size = save_size; + s71.u.offset.offset = save_offset; + s71.u.offset.count = save_count; + TF_TString_Dealloc(&s71); + } +} ",1,train 8b202f08d52e8206af2bdb2112a62fafbc546ec7,tensorflow/tensorflow,"Remove use of `eval` when evaluating the input example. Use `ast.eval_literal` instead which safely evaluates the expression. PiperOrigin-RevId: 400012249 Change-Id: I5ff98608ea2d736d093aa488af723ff4f6707e02",saved_model_cli.py,"@@ -20,6 +20,7 @@ https://www.tensorflow.org/guide/saved_model#cli_to_inspect_and_execute_savedmod """""" import argparse +import ast import os import re import sys @@ -521,7 +522,7 @@ def preprocess_inputs_arg_string(inputs_str): return input_dict -def preprocess_input_exprs_arg_string(input_exprs_str): +def preprocess_input_exprs_arg_string(input_exprs_str, safe=True): """"""Parses input arg into dictionary that maps input key to python expression. Parses input string in the format of 'input_key=' into a @@ -529,8 +530,10 @@ def preprocess_input_exprs_arg_string(input_exprs_str): Args: input_exprs_str: A string that specifies python expression for input keys. - Each input is separated by semicolon. For each input key: + Each input is separated by semicolon. For each input key: 'input_key=' + safe: Whether to evaluate the python expression as literals or allow + arbitrary calls (e.g. numpy usage). Returns: A dictionary that maps input keys to their values. @@ -545,8 +548,15 @@ def preprocess_input_exprs_arg_string(input_exprs_str): raise RuntimeError('--input_exprs ""%s"" format is incorrect. Please follow' '""=""' % input_exprs_str) input_key, expr = input_raw.split('=', 1) - # ast.literal_eval does not work with numpy expressions - input_dict[input_key] = eval(expr) # pylint: disable=eval-used + if safe: + try: + input_dict[input_key] = ast.literal_eval(expr) + except: + raise RuntimeError( + f'Expression ""{expr}"" is not a valid python literal.') + else: + # ast.literal_eval does not work with numpy expressions + input_dict[input_key] = eval(expr) # pylint: disable=eval-used return input_dict @@ -659,7 +669,7 @@ def load_inputs_from_input_arg_string(inputs_str, input_exprs_str, tensor_key_feed_dict = {} inputs = preprocess_inputs_arg_string(inputs_str) - input_exprs = preprocess_input_exprs_arg_string(input_exprs_str) + input_exprs = preprocess_input_exprs_arg_string(input_exprs_str, safe=False) input_examples = preprocess_input_examples_arg_string(input_examples_str) for input_tensor_key, (filename, variable_name) in inputs.items(): @@ -923,8 +933,10 @@ def add_run_subparser(subparsers): parser_run.add_argument('--inputs', type=str, default='', help=msg) msg = ('Specifying inputs by python expressions, in the format of' ' ""=\'\'"", separated by \';\'. ' - 'numpy module is available as \'np\'. ' - 'Will override duplicate input keys from --inputs option.') + 'numpy module is available as \'np\'. Please note that the expression ' + 'will be evaluated as-is, and is susceptible to code injection. ' + 'When this is set, the value will override duplicate input keys from ' + '--inputs option.') parser_run.add_argument('--input_exprs', type=str, default='', help=msg) msg = ( 'Specifying tf.Example inputs as list of dictionaries. For example: ' ",1,train 8b202f08d52e8206af2bdb2112a62fafbc546ec7,tensorflow/tensorflow,"Remove use of `eval` when evaluating the input example. Use `ast.eval_literal` instead which safely evaluates the expression. PiperOrigin-RevId: 400012249 Change-Id: I5ff98608ea2d736d093aa488af723ff4f6707e02",saved_model_cli_test.py,"@@ -382,7 +382,7 @@ Defined Functions: input_expr_str = 'input3=np.zeros([2,2]);input4=[4,5]' input_dict = saved_model_cli.preprocess_inputs_arg_string(input_str) input_expr_dict = saved_model_cli.preprocess_input_exprs_arg_string( - input_expr_str) + input_expr_str, safe=False) self.assertTrue(input_dict['input1'] == ('/path/file.txt', 'ab3')) self.assertTrue(input_dict['input2'] == ('file2', None)) print(input_expr_dict['input3']) @@ -418,6 +418,11 @@ Defined Functions: } """""", feature) + def testInputPreprocessExampleWithCodeInjection(self): + input_examples_str = 'inputs=os.system(""echo hacked"")' + with self.assertRaisesRegex(RuntimeError, 'not a valid python literal.'): + saved_model_cli.preprocess_input_examples_arg_string(input_examples_str) + def testInputPreProcessFileNames(self): input_str = (r'inputx=C:\Program Files\data.npz[v:0];' r'input:0=c:\PROGRA~1\data.npy') @@ -434,8 +439,8 @@ Defined Functions: with self.assertRaises(RuntimeError): saved_model_cli.preprocess_inputs_arg_string(input_str) input_str = 'inputx:np.zeros((5))' - with self.assertRaises(RuntimeError): - saved_model_cli.preprocess_input_exprs_arg_string(input_str) + with self.assertRaisesRegex(RuntimeError, 'format is incorrect'): + saved_model_cli.preprocess_input_exprs_arg_string(input_str, safe=False) def testInputParserNPY(self): x0 = np.array([[1], [2]]) ",1,train da8558533d925694483d2c136a9220d6d49d843c,tensorflow/tensorflow,"Fix undefined behavior in `tf.raw_ops.Switch` in eager mode. PiperOrigin-RevId: 332578058 Change-Id: I9727571d2f21476b10d8aa27c1b7176564b76ac9",kernel_and_device.cc,"@@ -308,7 +308,12 @@ Status KernelAndDeviceOp::Run( if (outputs != nullptr) { outputs->clear(); for (int i = 0; i < context.num_outputs(); ++i) { - outputs->push_back(Tensor(*context.mutable_output(i))); + const auto* output_tensor = context.mutable_output(i); + if (output_tensor != nullptr) { + outputs->push_back(Tensor(*output_tensor)); + } else { + outputs->push_back(Tensor()); + } } } return Status::OK(); ",1,test da8558533d925694483d2c136a9220d6d49d843c,tensorflow/tensorflow,"Fix undefined behavior in `tf.raw_ops.Switch` in eager mode. PiperOrigin-RevId: 332578058 Change-Id: I9727571d2f21476b10d8aa27c1b7176564b76ac9",control_flow_ops_py_test.py,"@@ -4579,6 +4579,14 @@ class ControlFlowTest(test.TestCase, parameterized.TestCase): result = control_flow_ops.merge([v_f, v_t]) self.evaluate(result) + def testSwitchEagerMode(self): + if not context.executing_eagerly(): + return + input_data = [1, 2, 3, 4] + vf, vt = control_flow_ops.switch(input_data, False) + self.assertAllEqual(vf, input_data) + self.assertAllEqual(vt, []) + @test_util.run_deprecated_v1 def testQIntArgAndRet(self): ",1,test 22e07fb204386768e5bcbea563641ea11f96ceb8,tensorflow/tensorflow,"Fix multiple vulnerabilities in `tf.experimental.dlpack.to_dlpack`. We have a use after free caused by memory coruption, a segmentation fault caused by memory corruption, several memory leaks and an undefined behavior when taking the reference of a nullptr. PiperOrigin-RevId: 332568894 Change-Id: Ife0fc05e103b35325094ae5d822ee5fdea764572",dlpack.cc,"@@ -249,21 +249,36 @@ void TFE_CallDLManagedTensorDeleter(void* dlm_ptr) { } void* TFE_HandleToDLPack(TFE_TensorHandle* h, TF_Status* status) { + auto tf_dlm_context = GetDlContext(h, status); + if (!status->status.ok()) { + return nullptr; + } + + auto* tf_dlm_data = TFE_TensorHandleDevicePointer(h, status); + if (!status->status.ok()) { + return nullptr; + } + const Tensor* tensor = GetTensorFromHandle(h, status); TF_DataType data_type = static_cast(tensor->dtype()); - TensorReference tensor_ref(*tensor); // This will call buf_->Ref() + auto tf_dlm_type = GetDlDataType(data_type, status); + if (!status->status.ok()) { + return nullptr; + } + + TensorReference tensor_ref(*tensor); // This will call buf_->Ref() auto* tf_dlm_tensor_ctx = new TfDlManagedTensorCtx(tensor_ref); tf_dlm_tensor_ctx->reference = tensor_ref; DLManagedTensor* dlm_tensor = &tf_dlm_tensor_ctx->tensor; dlm_tensor->manager_ctx = tf_dlm_tensor_ctx; dlm_tensor->deleter = &DLManagedTensorDeleter; - dlm_tensor->dl_tensor.ctx = GetDlContext(h, status); + dlm_tensor->dl_tensor.ctx = tf_dlm_context; int ndim = tensor->dims(); dlm_tensor->dl_tensor.ndim = ndim; - dlm_tensor->dl_tensor.data = TFE_TensorHandleDevicePointer(h, status); - dlm_tensor->dl_tensor.dtype = GetDlDataType(data_type, status); + dlm_tensor->dl_tensor.data = tf_dlm_data; + dlm_tensor->dl_tensor.dtype = tf_dlm_type; std::vector* shape_arr = &tf_dlm_tensor_ctx->shape; std::vector* stride_arr = &tf_dlm_tensor_ctx->strides; @@ -276,13 +291,14 @@ void* TFE_HandleToDLPack(TFE_TensorHandle* h, TF_Status* status) { (*stride_arr)[i] = (*shape_arr)[i + 1] * (*stride_arr)[i + 1]; } - dlm_tensor->dl_tensor.shape = &(*shape_arr)[0]; + dlm_tensor->dl_tensor.shape = shape_arr->data(); // There are two ways to represent compact row-major data // 1) nullptr indicates tensor is compact and row-majored. // 2) fill in the strides array as the real case for compact row-major data. // Here we choose option 2, since some frameworks didn't handle the strides // argument properly. - dlm_tensor->dl_tensor.strides = &(*stride_arr)[0]; + dlm_tensor->dl_tensor.strides = stride_arr->data(); + dlm_tensor->dl_tensor.byte_offset = 0; // TF doesn't handle the strides and byte_offsets here return static_cast(dlm_tensor); ",1,train 22e07fb204386768e5bcbea563641ea11f96ceb8,tensorflow/tensorflow,"Fix multiple vulnerabilities in `tf.experimental.dlpack.to_dlpack`. We have a use after free caused by memory coruption, a segmentation fault caused by memory corruption, several memory leaks and an undefined behavior when taking the reference of a nullptr. PiperOrigin-RevId: 332568894 Change-Id: Ife0fc05e103b35325094ae5d822ee5fdea764572",dlpack_test.py,"@@ -20,9 +20,11 @@ from __future__ import print_function from absl.testing import parameterized import numpy as np + from tensorflow.python.dlpack import dlpack from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.platform import test from tensorflow.python.ops import array_ops @@ -105,6 +107,12 @@ class DLPackTest(parameterized.TestCase, test.TestCase): self.assertRaisesRegex(Exception, "".* is not supported by dlpack"", UnsupportedComplex64) + def testMustPassTensorArgumentToDLPack(self): + with self.assertRaisesRegex( + errors.InvalidArgumentError, + ""The argument to `to_dlpack` must be a TF tensor, not Python object""): + dlpack.to_dlpack([1]) + if __name__ == ""__main__"": ops.enable_eager_execution() ",1,train 22e07fb204386768e5bcbea563641ea11f96ceb8,tensorflow/tensorflow,"Fix multiple vulnerabilities in `tf.experimental.dlpack.to_dlpack`. We have a use after free caused by memory coruption, a segmentation fault caused by memory corruption, several memory leaks and an undefined behavior when taking the reference of a nullptr. PiperOrigin-RevId: 332568894 Change-Id: Ife0fc05e103b35325094ae5d822ee5fdea764572",tfe_wrapper.cc,"@@ -1358,9 +1358,16 @@ PYBIND11_MODULE(_pywrap_tfe, m) { // DLPack functions m.def(""TFE_ToDlpackCapsule"", [](py::handle& o) { PyObject* eager_tensor_pyobject_ptr = o.ptr(); - TFE_TensorHandle* thandle = EagerTensor_Handle(eager_tensor_pyobject_ptr); tensorflow::Safe_TF_StatusPtr status = tensorflow::make_safe(TF_NewStatus()); + + if (!EagerTensor_CheckExact(eager_tensor_pyobject_ptr)) { + status->status = tensorflow::errors::InvalidArgument( + ""The argument to `to_dlpack` must be a TF tensor, not Python object""); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + } + + TFE_TensorHandle* thandle = EagerTensor_Handle(eager_tensor_pyobject_ptr); void* dlm_ptr = tensorflow::TFE_HandleToDLPack(thandle, status.get()); tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); ",1,train 390611e0d45c5793c7066110af37c8514e6a6c54,tensorflow/tensorflow,"Fix heap buffer overflow in `tf.raw_ops.SparseFillEmptyRowsGrad`. Also add tests as they were lacking PiperOrigin-RevId: 332566071 Change-Id: I44277578e26ff5fb3fdb0dcbba6e91b2ec3e7859",sparse_fill_empty_rows_op.cc,"@@ -236,6 +236,9 @@ class SparseFillEmptyRowsGradOp : public OpKernel { context, TensorShapeUtils::IsVector(reverse_index_map_t->shape()), errors::InvalidArgument(""reverse_index_map must be a vector, saw: "", reverse_index_map_t->shape().DebugString())); + OP_REQUIRES(context, TensorShapeUtils::IsVector(grad_values_t->shape()), + errors::InvalidArgument(""grad_values must be a vector, saw: "", + grad_values_t->shape().DebugString())); const auto reverse_index_map = reverse_index_map_t->vec(); const auto grad_values = grad_values_t->vec(); @@ -264,8 +267,13 @@ class SparseFillEmptyRowsGradOp : public OpKernel { // Locate the index of the output of the forward prop associated // with this location in the input of the forward prop. Copy // the gradient into it. Mark it as visited. - d_values(i) = grad_values(reverse_index_map(i)); - visited(reverse_index_map(i)) = true; + int64 reverse_index = reverse_index_map(i); + OP_REQUIRES( + context, 0 <= reverse_index && reverse_index < N_full, + errors::InvalidArgument(""Elements in reverse index must be in [0, "", + N_full, "") but got "", reverse_index)); + d_values(i) = grad_values(reverse_index); + visited(reverse_index) = true; } for (int j = 0; j < N_full; ++j) { // The default value gradient gets the accumulated remainder of ",1,test 390611e0d45c5793c7066110af37c8514e6a6c54,tensorflow/tensorflow,"Fix heap buffer overflow in `tf.raw_ops.SparseFillEmptyRowsGrad`. Also add tests as they were lacking PiperOrigin-RevId: 332566071 Change-Id: I44277578e26ff5fb3fdb0dcbba6e91b2ec3e7859",sparse_ops_test.py,"@@ -21,6 +21,7 @@ from __future__ import print_function from absl.testing import parameterized import numpy as np +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -30,6 +31,7 @@ from tensorflow.python.framework import test_util # Need array_grad to register gradient for Identity. from tensorflow.python.ops import array_grad # pylint: disable=unused-import from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_sparse_ops from tensorflow.python.ops import gradient_checker_v2 as gradient_checker from tensorflow.python.ops import math_ops # Need sparse_grad to register gradient for SparseToDense. @@ -234,5 +236,57 @@ class SparseOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): self.assertAllEqual([5], result.dense_shape) +@test_util.run_all_in_graph_and_eager_modes +class RawOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): + + def testSparseFillEmptyRowsGrad(self): + reverse_index_map = [2, 1] + grad_values = [0, 1, 2, 3] + d_values, d_default_value = self.evaluate( + gen_sparse_ops.SparseFillEmptyRowsGrad( + reverse_index_map=reverse_index_map, grad_values=grad_values)) + self.assertAllEqual([2, 1], d_values) + self.assertEqual(3, d_default_value) + + def testSparseFillEmptyRowsGradNegativeIndexMapValue(self): + reverse_index_map = [2, -1] + grad_values = [0, 1, 2, 3] + with self.assertRaisesRegex( + errors.InvalidArgumentError, + r'Elements in reverse index must be in \[0, 4\)'): + self.evaluate( + gen_sparse_ops.SparseFillEmptyRowsGrad( + reverse_index_map=reverse_index_map, grad_values=grad_values)) + + def testSparseFillEmptyRowsGradLargeIndexMapValue(self): + reverse_index_map = [2, 10] + grad_values = [0, 1, 2, 3] + with self.assertRaisesRegex( + errors.InvalidArgumentError, + r'Elements in reverse index must be in \[0, 4\)'): + self.evaluate( + gen_sparse_ops.SparseFillEmptyRowsGrad( + reverse_index_map=reverse_index_map, grad_values=grad_values)) + + def testSparseFillEmptyRowsGradMatrix(self): + reverse_index_map = [0, 1] + grad_values = [[0, 1], [2, 3]] + # Note: Eager mode and graph mode throw different errors here. Graph mode + # will fail with a ValueError from the shape checking logic, while Eager + # will fail with an InvalidArgumentError from the kernel itself. + if context.executing_eagerly(): + with self.assertRaisesRegex(errors.InvalidArgumentError, + r'grad_values must be a vector'): + self.evaluate( + gen_sparse_ops.SparseFillEmptyRowsGrad( + reverse_index_map=reverse_index_map, grad_values=grad_values)) + else: + with self.assertRaisesRegex(ValueError, + r'Shape must be rank 1 but is rank 2'): + self.evaluate( + gen_sparse_ops.SparseFillEmptyRowsGrad( + reverse_index_map=reverse_index_map, grad_values=grad_values)) + + if __name__ == '__main__': googletest.main() ",1,test 3cbb917b4714766030b28eba9fb41bb97ce9ee02,tensorflow/tensorflow,"Fix multiple vulnerabilities in `tf.raw_ops.*CountSparseOutput`. Also add tests for these API points, both for the happy paths and for the vulnerable ones. PiperOrigin-RevId: 332563222 Change-Id: Ib3b52116a83a134c2e742a7c66e5e956db8fba05",count_ops.cc,"@@ -178,10 +178,30 @@ class SparseCount : public OpKernel { const Tensor& weights = context->input(3); bool use_weights = weights.NumElements() > 0; + OP_REQUIRES(context, TensorShapeUtils::IsMatrix(indices.shape()), + errors::InvalidArgument( + ""Input indices must be a 2-dimensional tensor. Got: "", + indices.shape().DebugString())); + + if (use_weights) { + OP_REQUIRES( + context, weights.shape() == values.shape(), + errors::InvalidArgument( + ""Weights and values must have the same shape. Weight shape: "", + weights.shape().DebugString(), + ""; values shape: "", values.shape().DebugString())); + } + bool is_1d = shape.NumElements() == 1; int num_batches = is_1d ? 1 : shape.flat()(0); int num_values = values.NumElements(); + OP_REQUIRES(context, num_values == indices.shape().dim_size(0), + errors::InvalidArgument( + ""Number of values must match first dimension of indices."", + ""Got "", num_values, + "" values, indices shape: "", indices.shape().DebugString())); + const auto indices_values = indices.matrix(); const auto values_values = values.flat(); const auto weight_values = weights.flat(); @@ -235,12 +255,33 @@ class RaggedCount : public OpKernel { bool use_weights = weights.NumElements() > 0; bool is_1d = false; + if (use_weights) { + OP_REQUIRES( + context, weights.shape() == values.shape(), + errors::InvalidArgument( + ""Weights and values must have the same shape. Weight shape: "", + weights.shape().DebugString(), + ""; values shape: "", values.shape().DebugString())); + } + const auto splits_values = splits.flat(); const auto values_values = values.flat(); const auto weight_values = weights.flat(); int num_batches = splits.NumElements() - 1; int num_values = values.NumElements(); + OP_REQUIRES( + context, num_batches > 0, + errors::InvalidArgument( + ""Must provide at least 2 elements for the splits argument"")); + OP_REQUIRES(context, splits_values(0) == 0, + errors::InvalidArgument(""Splits must start with 0, not with "", + splits_values(0))); + OP_REQUIRES(context, splits_values(num_batches) == num_values, + errors::InvalidArgument( + ""Splits must end with the number of values, got "", + splits_values(num_batches), "" instead of "", num_values)); + auto per_batch_counts = BatchedMap(num_batches); T max_value = 0; int batch_idx = 0; ",1,test 3cbb917b4714766030b28eba9fb41bb97ce9ee02,tensorflow/tensorflow,"Fix multiple vulnerabilities in `tf.raw_ops.*CountSparseOutput`. Also add tests for these API points, both for the happy paths and for the vulnerable ones. PiperOrigin-RevId: 332563222 Change-Id: Ib3b52116a83a134c2e742a7c66e5e956db8fba05",bincount_ops_test.py,"@@ -25,7 +25,9 @@ from tensorflow.python.eager import context from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import test_util from tensorflow.python.ops import bincount_ops +from tensorflow.python.ops import gen_count_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops.ragged import ragged_factory_ops from tensorflow.python.ops.ragged import ragged_tensor @@ -834,5 +836,121 @@ class TestSparseCountFailureModes(test.TestCase): self.evaluate(bincount_ops.sparse_bincount(x, weights=weights, axis=-1)) +@test_util.run_all_in_graph_and_eager_modes +@test_util.disable_tfrt +class RawOpsTest(test.TestCase, parameterized.TestCase): + + def testSparseCountSparseOutputBadIndicesShape(self): + indices = [[[0], [0]], [[0], [1]], [[1], [0]], [[1], [2]]] + values = [1, 1, 1, 10] + weights = [1, 2, 4, 6] + dense_shape = [2, 3] + with self.assertRaisesRegex(errors.InvalidArgumentError, + ""Input indices must be a 2-dimensional tensor""): + self.evaluate( + gen_count_ops.SparseCountSparseOutput( + indices=indices, + values=values, + dense_shape=dense_shape, + weights=weights, + binary_output=False)) + + def testSparseCountSparseOutputBadWeightsShape(self): + indices = [[0, 0], [0, 1], [1, 0], [1, 2]] + values = [1, 1, 1, 10] + weights = [1, 2, 4] + dense_shape = [2, 3] + with self.assertRaisesRegex(errors.InvalidArgumentError, + ""Weights and values must have the same shape""): + self.evaluate( + gen_count_ops.SparseCountSparseOutput( + indices=indices, + values=values, + dense_shape=dense_shape, + weights=weights, + binary_output=False)) + + def testSparseCountSparseOutputBadNumberOfValues(self): + indices = [[0, 0], [0, 1], [1, 0]] + values = [1, 1, 1, 10] + weights = [1, 2, 4, 6] + dense_shape = [2, 3] + with self.assertRaisesRegex( + errors.InvalidArgumentError, + ""Number of values must match first dimension of indices""): + self.evaluate( + gen_count_ops.SparseCountSparseOutput( + indices=indices, + values=values, + dense_shape=dense_shape, + weights=weights, + binary_output=False)) + + def testRaggedCountSparseOutput(self): + splits = [0, 4, 7] + values = [1, 1, 2, 1, 2, 10, 5] + weights = [1, 2, 3, 4, 5, 6, 7] + output_indices, output_values, output_shape = self.evaluate( + gen_count_ops.RaggedCountSparseOutput( + splits=splits, values=values, weights=weights, binary_output=False)) + self.assertAllEqual([[0, 1], [0, 2], [1, 2], [1, 5], [1, 10]], + output_indices) + self.assertAllEqual([7, 3, 5, 7, 6], output_values) + self.assertAllEqual([2, 11], output_shape) + + def testRaggedCountSparseOutputBadWeightsShape(self): + splits = [0, 4, 7] + values = [1, 1, 2, 1, 2, 10, 5] + weights = [1, 2, 3, 4, 5, 6] + with self.assertRaisesRegex(errors.InvalidArgumentError, + ""Weights and values must have the same shape""): + self.evaluate( + gen_count_ops.RaggedCountSparseOutput( + splits=splits, + values=values, + weights=weights, + binary_output=False)) + + def testRaggedCountSparseOutputEmptySplits(self): + splits = [] + values = [1, 1, 2, 1, 2, 10, 5] + weights = [1, 2, 3, 4, 5, 6, 7] + with self.assertRaisesRegex( + errors.InvalidArgumentError, + ""Must provide at least 2 elements for the splits argument""): + self.evaluate( + gen_count_ops.RaggedCountSparseOutput( + splits=splits, + values=values, + weights=weights, + binary_output=False)) + + def testRaggedCountSparseOutputBadSplitsStart(self): + splits = [1, 7] + values = [1, 1, 2, 1, 2, 10, 5] + weights = [1, 2, 3, 4, 5, 6, 7] + with self.assertRaisesRegex(errors.InvalidArgumentError, + ""Splits must start with 0""): + self.evaluate( + gen_count_ops.RaggedCountSparseOutput( + splits=splits, + values=values, + weights=weights, + binary_output=False)) + + def testRaggedCountSparseOutputBadSplitsEnd(self): + splits = [0, 5] + values = [1, 1, 2, 1, 2, 10, 5] + weights = [1, 2, 3, 4, 5, 6, 7] + with self.assertRaisesRegex(errors.InvalidArgumentError, + ""Splits must end with the number of values""): + self.evaluate( + gen_count_ops.RaggedCountSparseOutput( + splits=splits, + values=values, + weights=weights, + binary_output=False)) + + if __name__ == ""__main__"": test.main() ",1,test 27b417360cbd671ef55915e4bb6bb06af8b8a832,tensorflow/tensorflow,"Prevent `int64` to `int` truncation in `Shard` API usage. The function argument in `Shard` must be a function of two `int64` arguments. However, we are passing in a function with two `int` arguments. Thus, for large workloads, these arguments get truncated from positive `int64` values to negative `int` ones, resulting in a buffer out of bounds write. PiperOrigin-RevId: 332557334 Change-Id: I236c9a2e7f53580e520571da8ba941a3aa9fa0b5",random_op.cc,"@@ -202,7 +202,7 @@ class RandomGammaOp : public OpKernel { // avoid a couple flops which can be done on a per-alpha basis. auto DoWork = [samples_per_alpha, num_alphas, &rng, samples_flat, - alpha_flat](int start_output, int limit_output) { + alpha_flat](int64 start_output, int64 limit_output) { using Eigen::numext::exp; using Eigen::numext::log; using Eigen::numext::log1p; ",1,train ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits. The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior. PiperOrigin-RevId: 332560414 Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",prediction_ops.cc,"@@ -121,7 +121,7 @@ class BoostedTreesTrainingPredictOp : public OpKernel { auto do_work = [&resource, &bucketized_features, &cached_tree_ids, &cached_node_ids, &output_partial_logits, &output_node_ids, latest_tree, - this](int32 start, int32 end) { + this](int64 start, int64 end) { for (int32 i = start; i < end; ++i) { int32 tree_id = cached_tree_ids(i); int32 node_id = cached_node_ids(i); @@ -237,7 +237,7 @@ class BoostedTreesPredictOp : public OpKernel { const int32 last_tree = resource->num_trees() - 1; auto do_work = [&resource, &bucketized_features, &output_logits, last_tree, - this](int32 start, int32 end) { + this](int64 start, int64 end) { for (int32 i = start; i < end; ++i) { std::vector tree_logits(logits_dimension_, 0.0); int32 tree_id = 0; @@ -340,7 +340,7 @@ class BoostedTreesExampleDebugOutputsOp : public OpKernel { // path. Note: feature_ids has one less value than logits_path because the // first value of each logit path will be the bias. auto do_work = [&resource, &bucketized_features, &output_debug_info, - last_tree](int32 start, int32 end) { + last_tree](int64 start, int64 end) { for (int32 i = start; i < end; ++i) { // Proto to store debug outputs, per example. boosted_trees::DebugOutput example_debug_info; ",1,test ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits. The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior. PiperOrigin-RevId: 332560414 Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",crop_and_resize_op.cc,"@@ -223,7 +223,7 @@ struct CropAndResize { const int depth = crops.dimension(3); // Sharding across boxes. - auto CropAndResizePerBox = [&](int start_box, int limit_box) { + auto CropAndResizePerBox = [&](int64 start_box, int64 limit_box) { for (int b = start_box; b < limit_box; ++b) { const float y1 = boxes(b, 0); const float x1 = boxes(b, 1); @@ -449,7 +449,7 @@ struct CropAndResizeBackpropImage { grads_image.setZero(); - auto CropAndResizeBackImgPerBox = [&](int start_box, int limit_box) { + auto CropAndResizeBackImgPerBox = [&](int64 start_box, int64 limit_box) { for (int b = start_box; b < limit_box; ++b) { const float y1 = boxes(b, 0); const float x1 = boxes(b, 1); ",1,test ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits. The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior. PiperOrigin-RevId: 332560414 Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",banded_triangular_solve_op.cc,"@@ -193,7 +193,8 @@ struct LaunchBatchBandedTriangularSolve { Shard(worker_threads.num_threads, worker_threads.workers, batch_size, cost_per_unit, - [&in_x, &in_y, adjoint, lower, &bcast, out](int start, int limit) { + [&in_x, &in_y, adjoint, lower, &bcast, out](int64 start, + int64 limit) { SequentialBandedTriangularSolveKernel::Run( in_x, in_y, lower, adjoint, bcast, out, start, limit); }); ",1,test ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits. The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior. PiperOrigin-RevId: 332560414 Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",nth_element_op.cc,"@@ -95,7 +95,8 @@ struct NthElementFunctor { const int last_dim = input_tensor.dim_size(input_tensor.dims() - 1); // Allocate each row to different shard. - auto SubNthElement = [&, input, output, last_dim, n](int start, int limit) { + auto SubNthElement = [&, input, output, last_dim, n](int64 start, + int64 limit) { // std::nth_element would rearrange the array, so we need a new buffer. std::vector buf(last_dim); ",1,test ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits. The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior. PiperOrigin-RevId: 332560414 Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",parameterized_truncated_normal_op.cc,"@@ -70,8 +70,8 @@ struct TruncatedNormalFunctor { auto do_work = [samples_per_batch, num_elements, &ctx, &means, &stddevs, &minvals, &maxvals, &gen, &output, - kStdDevsInsideBoundsToUseRandnSampler](int start_batch, - int limit_batch) { + kStdDevsInsideBoundsToUseRandnSampler](int64 start_batch, + int64 limit_batch) { // Capturing ""gen"" by-value would only make a copy for the _shared_ // lambda. Since we want to let each worker have its own copy, we pass // ""gen"" by reference and explicitly do a copy assignment here. @@ -333,8 +333,8 @@ struct TruncatedNormalFunctorV2 { auto do_work = [num_batches, samples_per_batch, &ctx, &bcast, &means, &stddevs, &minvals, &maxvals, &gen, &output, - kStdDevsInsideBoundsToUseRandnSampler](int start_output, - int limit_output) { + kStdDevsInsideBoundsToUseRandnSampler](int64 start_output, + int64 limit_output) { // Capturing ""gen"" by-value would only make a copy for the _shared_ // lambda. Since we want to let each worker have its own copy, we pass // ""gen"" by reference and explicitly do a copy assignment here. ",1,test ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits. The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior. PiperOrigin-RevId: 332560414 Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",random_binomial_op.cc,"@@ -184,7 +184,7 @@ struct RandomBinomialFunctor { // the sample shape and [H1, ... Hm] for the batch shape of the samples. // We have B1 * ... * Bk samples per batch member we need. auto DoWork = [num_batches, samples_per_batch, &bcast, &counts, &probs, - &gen, &output](int start_output, int limit_output) { + &gen, &output](int64 start_output, int64 limit_output) { // Vectorized intermediate calculations for uniform rejection sampling. // We always generate at most 4 samples. Eigen::array z; ",1,test ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits. The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior. PiperOrigin-RevId: 332560414 Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",random_poisson_op.cc,"@@ -97,7 +97,7 @@ struct PoissonFunctor { typedef random::UniformDistribution Uniform; auto DoWork = [num_samples, num_rate, &rng, samples_flat, rate_flat]( - int start_output, int limit_output) { + int64 start_output, int64 limit_output) { // Capturing ""rng"" by value would only make a copy for the _shared_ // lambda. Since we want to let each worker have its own copy, we pass // ""rng"" by reference and explicitly do a copy assignment. ",1,test ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits. The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior. PiperOrigin-RevId: 332560414 Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",stateless_random_ops.cc,"@@ -252,7 +252,7 @@ class StatelessRandomGammaOp : public StatelessRandomOpBase { // avoid a couple flops which can be done on a per-alpha basis. auto DoWork = [samples_per_alpha, num_alphas, &random, samples_flat, - alpha_flat](int start_output, int limit_output) { + alpha_flat](int64 start_output, int64 limit_output) { // Capturing ""random"" by-value would only make a copy for the _shared_ // lambda. Since we want to let each worker have its own copy, we pass // ""random"" by reference and explicitly do a copy assignment. ",1,test ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits. The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior. PiperOrigin-RevId: 332560414 Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",topk_op.cc,"@@ -136,7 +136,7 @@ struct TopKFunctor { return Status::OK(); } - auto SortIndices = [&](int start_batch, int limit_batch) { + auto SortIndices = [&](int64 start_batch, int64 limit_batch) { for (int32 b = start_batch; b < limit_batch; ++b) { const T* input_data = &input(b, 0); const auto stable_comp = [input_data](const int32 a, const int32 b) { ",1,test 33be22c65d86256e6826666662e40dbdfe70ee83,tensorflow/tensorflow,"Prevent format string vulnerability in `tf.strings.as_string`. The `printf` format specifier only allows `#`, `0`, `-`, `+` and space as flag characters. Others are interpreted as width/precision/length modifier or conversion specifiers. If a character does not fit into any of these sets `printf` just displays it. Also add a test suite for `tf.strings.as_string`. Also fix the issue where the flag character was used only if width was specified. PiperOrigin-RevId: 332553548 Change-Id: Ie57cf2a7c14d1a36097642794c14329db669bbba",as_string_op.cc,"@@ -65,9 +65,26 @@ class AsStringOp : public OpKernel { OP_REQUIRES(ctx, !(scientific && shortest), errors::InvalidArgument( ""Cannot select both scientific and shortest notation"")); + format_ = ""%""; + if (!fill_string.empty()) { + switch (fill_string[0]) { + case ' ': + case '+': + case '-': + case '0': + case '#': + strings::Appendf(&format_, ""%s"", fill_string.c_str()); + break; + default: + bool fill_not_supported = true; + OP_REQUIRES(ctx, !fill_not_supported, + errors::InvalidArgument(""Fill argument not supported: \"""", + fill_string, ""\"""")); + } + } if (width > -1) { - strings::Appendf(&format_, ""%s%d"", fill_string.c_str(), width); + strings::Appendf(&format_, ""%d"", width); } if (precision > -1) { strings::Appendf(&format_, "".%d"", precision); ",1,train 33be22c65d86256e6826666662e40dbdfe70ee83,tensorflow/tensorflow,"Prevent format string vulnerability in `tf.strings.as_string`. The `printf` format specifier only allows `#`, `0`, `-`, `+` and space as flag characters. Others are interpreted as width/precision/length modifier or conversion specifiers. If a character does not fit into any of these sets `printf` just displays it. Also add a test suite for `tf.strings.as_string`. Also fix the issue where the flag character was used only if width was specified. PiperOrigin-RevId: 332553548 Change-Id: Ie57cf2a7c14d1a36097642794c14329db669bbba",as_string_op_test.cc,"@@ -0,0 +1,245 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/core/framework/fake_input.h"" +#include ""tensorflow/core/framework/node_def_builder.h"" +#include ""tensorflow/core/framework/tensor.h"" +#include ""tensorflow/core/framework/tensor_testutil.h"" +#include ""tensorflow/core/framework/types.h"" +#include ""tensorflow/core/kernels/ops_testutil.h"" +#include ""tensorflow/core/kernels/ops_util.h"" +#include ""tensorflow/core/lib/core/status_test_util.h"" + +namespace tensorflow { +namespace { + +class AsStringGraphTest : public OpsTestBase { + protected: + Status Init(DataType input_type, const string& fill = """", int width = -1, + int precision = -1, bool scientific = false, + bool shortest = false) { + TF_CHECK_OK(NodeDefBuilder(""op"", ""AsString"") + .Input(FakeInput(input_type)) + .Attr(""fill"", fill) + .Attr(""precision"", precision) + .Attr(""scientific"", scientific) + .Attr(""shortest"", shortest) + .Attr(""width"", width) + .Finalize(node_def())); + return InitOp(); + } +}; + +TEST_F(AsStringGraphTest, Int8) { + TF_ASSERT_OK(Init(DT_INT8)); + + AddInputFromArray(TensorShape({3}), {-42, 0, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({3})); + test::FillValues(&expected, {""-42"", ""0"", ""42""}); + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, Int64) { + TF_ASSERT_OK(Init(DT_INT64)); + + AddInputFromArray(TensorShape({3}), {-42, 0, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({3})); + test::FillValues(&expected, {""-42"", ""0"", ""42""}); + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, FloatDefault) { + TF_ASSERT_OK(Init(DT_FLOAT)); + + AddInputFromArray(TensorShape({4}), {-42, 0, 3.14159, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({4})); + test::FillValues( + &expected, {""-42.000000"", ""0.000000"", ""3.141590"", ""42.000000""}); + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, FloatScientific) { + TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"""", /*width=*/-1, /*precision=*/-1, + /*scientific=*/true)); + + AddInputFromArray(TensorShape({4}), {-42, 0, 3.14159, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({4})); + test::FillValues(&expected, {""-4.200000e+01"", ""0.000000e+00"", + ""3.141590e+00"", ""4.200000e+01""}); + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, FloatShortest) { + TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"""", /*width=*/-1, /*precision=*/-1, + /*scientific=*/false, /*shortest=*/true)); + + AddInputFromArray(TensorShape({4}), {-42, 0, 3.14159, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({4})); + test::FillValues(&expected, {""-42"", ""0"", ""3.14159"", ""42""}); + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, FloatPrecisionOnly) { + TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"""", /*width=*/-1, /*precision=*/2)); + + AddInputFromArray(TensorShape({4}), {-42, 0, 3.14159, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({4})); + test::FillValues(&expected, {""-42.00"", ""0.00"", ""3.14"", ""42.00""}); + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, FloatWidthOnly) { + TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"""", /*width=*/5)); + + AddInputFromArray(TensorShape({4}), {-42, 0, 3.14159, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({4})); + test::FillValues( + &expected, {""-42.000000"", ""0.000000"", ""3.141590"", ""42.000000""}); + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, Float_5_2_Format) { + TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"""", /*width=*/5, /*precision=*/2)); + + AddInputFromArray(TensorShape({4}), {-42, 0, 3.14159, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({4})); + test::FillValues(&expected, {""-42.00"", "" 0.00"", "" 3.14"", ""42.00""}); + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, Complex) { + TF_ASSERT_OK(Init(DT_COMPLEX64, /*fill=*/"""", /*width=*/5, /*precision=*/2)); + + AddInputFromArray(TensorShape({3}), {{-4, 2}, {0}, {3.14159, -1}}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({3})); + test::FillValues( + &expected, {""(-4.00, 2.00)"", ""( 0.00, 0.00)"", ""( 3.14,-1.00)""}); + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, Bool) { + TF_ASSERT_OK(Init(DT_BOOL)); + + AddInputFromArray(TensorShape({2}), {true, false}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({2})); + test::FillValues(&expected, {""true"", ""false""}); + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, String) { + Status s = Init(DT_STRING); + ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); + ASSERT_TRUE(absl::StrContains( + s.error_message(), + ""Value for attr 'T' of string is not in the list of allowed values"")); +} + +TEST_F(AsStringGraphTest, OnlyOneOfScientificAndShortest) { + Status s = Init(DT_FLOAT, /*fill=*/"""", /*width=*/-1, /*precision=*/-1, + /*scientific=*/true, /*shortest=*/true); + ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); + ASSERT_TRUE( + absl::StrContains(s.error_message(), + ""Cannot select both scientific and shortest notation"")); +} + +TEST_F(AsStringGraphTest, NoShortestForNonFloat) { + Status s = Init(DT_INT32, /*fill=*/"""", /*width=*/-1, /*precision=*/-1, + /*scientific=*/false, /*shortest=*/true); + ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); + ASSERT_TRUE(absl::StrContains( + s.error_message(), + ""scientific and shortest format not supported for datatype"")); +} + +TEST_F(AsStringGraphTest, NoScientificForNonFloat) { + Status s = Init(DT_INT32, /*fill=*/"""", /*width=*/-1, /*precision=*/-1, + /*scientific=*/true); + ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); + ASSERT_TRUE(absl::StrContains( + s.error_message(), + ""scientific and shortest format not supported for datatype"")); +} + +TEST_F(AsStringGraphTest, NoPrecisionForNonFloat) { + Status s = Init(DT_INT32, /*fill=*/"""", /*width=*/-1, /*precision=*/5); + ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); + ASSERT_TRUE(absl::StrContains(s.error_message(), + ""precision not supported for datatype"")); +} + +TEST_F(AsStringGraphTest, LongFill) { + Status s = Init(DT_INT32, /*fill=*/""asdf""); + ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); + ASSERT_TRUE(absl::StrContains(s.error_message(), + ""Fill string must be one or fewer characters"")); +} + +TEST_F(AsStringGraphTest, FillWithZero) { + TF_ASSERT_OK(Init(DT_INT64, /*fill=*/""0"", /*width=*/4)); + + AddInputFromArray(TensorShape({3}), {-42, 0, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({3})); + test::FillValues(&expected, {""-042"", ""0000"", ""0042""}); + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, FillWithSpace) { + TF_ASSERT_OK(Init(DT_INT64, /*fill=*/"" "", /*width=*/4)); + + AddInputFromArray(TensorShape({3}), {-42, 0, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({3})); + test::FillValues(&expected, {"" -42"", "" 0"", "" 42""}); + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, FillWithChar1) { + TF_ASSERT_OK(Init(DT_INT64, /*fill=*/""-"", /*width=*/4)); + + AddInputFromArray(TensorShape({3}), {-42, 0, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({3})); + test::FillValues(&expected, {""-42 "", ""0 "", ""42 ""}); + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, FillWithChar3) { + Status s = Init(DT_INT32, /*fill=*/""s""); + ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); + ASSERT_TRUE( + absl::StrContains(s.error_message(), ""Fill argument not supported"")); +} + +TEST_F(AsStringGraphTest, FillWithChar4) { + Status s = Init(DT_INT32, /*fill=*/""n""); + ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); + ASSERT_TRUE( + absl::StrContains(s.error_message(), ""Fill argument not supported"")); +} + +} // end namespace +} // end namespace tensorflow ",1,train 9a133d73ae4b4664d22bd1aa6d654fec13c52ee1,tensorflow/tensorflow,"Prevent segfault in `GetSessionHandle{,V2}`. In eager mode, session state is null. PiperOrigin-RevId: 332548597 Change-Id: If094812c2e094044220b9ba28f7d7601be042f38",session_ops.cc,"@@ -16,6 +16,7 @@ limitations under the License. // See docs in ../ops/data_flow_ops.cc. #include + #include #include ""tensorflow/core/common_runtime/device.h"" @@ -27,6 +28,7 @@ limitations under the License. #include ""tensorflow/core/framework/types.h"" #include ""tensorflow/core/lib/core/errors.h"" #include ""tensorflow/core/lib/gtl/map_util.h"" +#include ""tensorflow/core/platform/errors.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/macros.h"" #include ""tensorflow/core/platform/mutex.h"" @@ -42,7 +44,11 @@ class GetSessionHandleOp : public OpKernel { void Compute(OpKernelContext* ctx) override { const Tensor& val = ctx->input(0); - int64 id = ctx->session_state()->GetNewId(); + auto session_state = ctx->session_state(); + OP_REQUIRES(ctx, session_state != nullptr, + errors::FailedPrecondition( + ""GetSessionHandle called on null session state"")); + int64 id = session_state->GetNewId(); TensorStore::TensorAndKey tk{val, id, requested_device()}; OP_REQUIRES_OK(ctx, ctx->tensor_store()->AddTensor(name(), tk)); ",1,train 9a133d73ae4b4664d22bd1aa6d654fec13c52ee1,tensorflow/tensorflow,"Prevent segfault in `GetSessionHandle{,V2}`. In eager mode, session state is null. PiperOrigin-RevId: 332548597 Change-Id: If094812c2e094044220b9ba28f7d7601be042f38",raw_ops_test.py,"@@ -25,6 +25,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.ops import gen_data_flow_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import gen_string_ops from tensorflow.python.platform import test @@ -79,6 +80,13 @@ class RawOpsTest(test.TestCase, parameterized.TestCase): pad_width=0, preserve_short_sequences=False)) + def testGetSessionHandle(self): + if context.executing_eagerly(): + with self.assertRaisesRegex( + errors.FailedPreconditionError, + ""GetSessionHandle called on null session state""): + gen_data_flow_ops.GetSessionHandle(value=[1]) + if __name__ == ""__main__"": ops.enable_eager_execution() ",1,train 0462de5b544ed4731aa2fb23946ac22c01856b80,tensorflow/tensorflow,"Validate `data_splits` for `tf.StringNGrams`. Without validation, we can cause a heap buffer overflow which results in data leakage and/or segfaults. PiperOrigin-RevId: 332543478 Change-Id: Iee5bda24497a195d09d122355502480830b1b317",string_ngrams_op.cc,"@@ -19,6 +19,7 @@ limitations under the License. #include ""absl/strings/ascii.h"" #include ""absl/strings/str_cat.h"" #include ""tensorflow/core/framework/op_kernel.h"" +#include ""tensorflow/core/platform/errors.h"" namespace tensorflow { namespace text { @@ -60,6 +61,18 @@ class StringNGramsOp : public tensorflow::OpKernel { OP_REQUIRES_OK(context, context->input(""data_splits"", &splits)); const auto& splits_vec = splits->flat(); + // Validate that the splits are valid indices into data + const int input_data_size = data->flat().size(); + const int splits_vec_size = splits_vec.size(); + for (int i = 0; i < splits_vec_size; ++i) { + bool valid_splits = splits_vec(i) >= 0; + valid_splits = valid_splits && (splits_vec(i) <= input_data_size); + OP_REQUIRES( + context, valid_splits, + errors::InvalidArgument(""Invalid split value "", splits_vec(i), + "", must be in [0,"", input_data_size, ""]"")); + } + int num_batch_items = splits_vec.size() - 1; tensorflow::Tensor* ngrams_splits; OP_REQUIRES_OK( ",1,test 0462de5b544ed4731aa2fb23946ac22c01856b80,tensorflow/tensorflow,"Validate `data_splits` for `tf.StringNGrams`. Without validation, we can cause a heap buffer overflow which results in data leakage and/or segfaults. PiperOrigin-RevId: 332543478 Change-Id: Iee5bda24497a195d09d122355502480830b1b317",raw_ops_test.py,"@@ -18,16 +18,21 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from absl.testing import parameterized + from tensorflow.python.eager import context from tensorflow.python.framework import constant_op +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import gen_string_ops from tensorflow.python.platform import test @test_util.run_all_in_graph_and_eager_modes -class RawOpsTest(test.TestCase): +@test_util.disable_tfrt +class RawOpsTest(test.TestCase, parameterized.TestCase): def testSimple(self): x = constant_op.constant(1) @@ -58,6 +63,22 @@ class RawOpsTest(test.TestCase): gen_math_ops.Any(input=x, axis=0), gen_math_ops.Any(input=x, axis=0, keep_dims=False)) + @parameterized.parameters([[0, 8]], [[-1, 6]]) + def testStringNGramsBadDataSplits(self, splits): + data = [""aa"", ""bb"", ""cc"", ""dd"", ""ee"", ""ff""] + with self.assertRaisesRegex(errors.InvalidArgumentError, + ""Invalid split value""): + self.evaluate( + gen_string_ops.string_n_grams( + data=data, + data_splits=splits, + separator="""", + ngram_widths=[2], + left_pad="""", + right_pad="""", + pad_width=0, + preserve_short_sequences=False)) + if __name__ == ""__main__"": ops.enable_eager_execution() ",1,test adf095206f25471e864a8e63a0f1caef53a0e3a6,tensorflow/tensorflow,"Validate `NodeDef`s from `FunctionDefLibrary` of a `GraphDef`. We already validated `NodeDef`s from a `GraphDef` but missed validating those from the `FunctionDefLibrary`. Thus, some maliciously crafted models could evade detection and cause denial of service due to a `CHECK`-fail. PiperOrigin-RevId: 332536309 Change-Id: I052efe919ff1fe2f90815e286a1aa4c54c7b94ff",loader.cc,"@@ -21,6 +21,7 @@ limitations under the License. #include ""tensorflow/cc/saved_model/loader_util.h"" #include ""tensorflow/cc/saved_model/reader.h"" #include ""tensorflow/core/framework/attr_value.pb.h"" +#include ""tensorflow/core/framework/function.pb.h"" #include ""tensorflow/core/framework/node_def.pb.h"" #include ""tensorflow/core/framework/tensor.pb.h"" #include ""tensorflow/core/lib/io/path.h"" @@ -73,26 +74,41 @@ uint64 GetLatencyMicroseconds(const uint64 start_microseconds) { // Ensure that constant tensors loaded from the saved model have valid shape. // Also ensure that constant nodes have a value assigned to them. // TODO(b/154763635): this is temporary and will be replaced with a better audit +static Status ValidateNode(const NodeDef& node) { + const auto node_iterator = node.attr().find(""value""); + if (node_iterator != node.attr().end()) { + AttrValue node_value = node_iterator->second; + if (node_value.has_tensor()) { + const PartialTensorShape node_shape(node_value.tensor().tensor_shape()); + if (node_shape.num_elements() < 0) { + return errors::FailedPrecondition( + ""Saved model contains node \"""", node.name(), ""\"" (op \"""", node.op(), + ""\"") which initializes from a tensor with "", + node_shape.num_elements(), "" elements""); + } + } + } else if (node.op() == ""Const"") { + return errors::FailedPrecondition( + ""Saved model contains node \"""", node.name(), + ""\"" which is a constant tensor but no value has been provided""); + } + return Status::OK(); +} + static Status ValidateSavedTensors(const GraphDef& graph_def) { for (const auto& node : graph_def.node()) { - const auto node_iterator = node.attr().find(""value""); - if (node_iterator != node.attr().end()) { - AttrValue node_value = node_iterator->second; - if (node_value.has_tensor()) { - const PartialTensorShape node_shape(node_value.tensor().tensor_shape()); - if (node_shape.num_elements() < 0) { - return errors::FailedPrecondition( - ""Saved model contains node \"""", node.name(), ""\"" (op \"""", - node.op(), ""\"") which initializes from a tensor with "", - node_shape.num_elements(), "" elements""); - } + TF_RETURN_IF_ERROR(ValidateNode(node)); + } + + if (graph_def.has_library()) { + const FunctionDefLibrary& library = graph_def.library(); + for (const auto& function : library.function()) { + for (const auto& node : function.node_def()) { + TF_RETURN_IF_ERROR(ValidateNode(node)); } - } else if (node.op() == ""Const"") { - return errors::FailedPrecondition( - ""Saved model contains node \"""", node.name(), - ""\"" which is a constant tensor but no value has been provided""); } } + return Status::OK(); } ",1,train adf095206f25471e864a8e63a0f1caef53a0e3a6,tensorflow/tensorflow,"Validate `NodeDef`s from `FunctionDefLibrary` of a `GraphDef`. We already validated `NodeDef`s from a `GraphDef` but missed validating those from the `FunctionDefLibrary`. Thus, some maliciously crafted models could evade detection and cause denial of service due to a `CHECK`-fail. PiperOrigin-RevId: 332536309 Change-Id: I052efe919ff1fe2f90815e286a1aa4c54c7b94ff",saved_model_bundle_test.cc,"@@ -45,6 +45,8 @@ constexpr char kTestFuzzGeneratedNegativeShape[] = ""cc/saved_model/testdata/fuzz_generated/negative_shape""; constexpr char kTestFuzzGeneratedConstWithNoValue[] = ""cc/saved_model/testdata/fuzz_generated/const_with_no_value""; +constexpr char kTestFuzzGeneratedBadNodeAttr[] = + ""cc/saved_model/testdata/fuzz_generated/bad_node_attr""; class LoaderTest : public ::testing::Test { protected: @@ -328,5 +330,20 @@ TEST_F(LoaderTest, ConstNoValue) { std::string::npos); } +TEST_F(LoaderTest, BadNodeAttr) { + SavedModelBundle bundle; + RunOptions run_options; + SessionOptions session_options; + + const string export_dir = + io::JoinPath(testing::TensorFlowSrcRoot(), kTestFuzzGeneratedBadNodeAttr); + Status st = LoadSavedModel(session_options, run_options, export_dir, + {kSavedModelTagServe}, &bundle); + EXPECT_FALSE(st.ok()); + EXPECT_NE( + st.error_message().find(""constant tensor but no value has been provided""), + std::string::npos); +} + } // namespace } // namespace tensorflow ",1,train 2d88f470dea2671b430884260f3626b1fe99830a,tensorflow/tensorflow,"[tflite] Ensure `ResolveAxis` properly handles negative inputs. In Python, a list `l` of length `n` allows indexing with negative indices, `l[i]`. The only constraint is that `n + i` becomes positive. Code in `ResolveAxis` assumes the constraints and only checks it using a `DCHECK`. But the macro is a no-op in non-debug builds and that can result in reading from negative offsets (buffer underflows). PiperOrigin-RevId: 332530683 Change-Id: I464e073fee618054ae3719a3679739007bb3f3bc",reduce.h,"@@ -70,6 +70,9 @@ inline bool ResolveAxis(const int num_dims, const int* axis, // eg: For num_dims=3, [0, 1, 2] is the same as [-3, -2, -1] */ int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx]; TFLITE_DCHECK(current >= 0 && current < num_dims); + if (current < 0 || current >= num_dims) { + return false; + } bool is_dup = false; for (int j = 0; j < *out_num_axis; ++j) { if (out_axis[j] == current) { ",1,train 8ee24e7949a203d234489f9da2c5bf45a7d5157d,tensorflow/tensorflow,"[tflite] Ensure `MatchingDim` does not allow buffer overflow. We check in `MatchingDim` that both arguments have the same dimensionality, however that is a `DCHECK` only enabled if building in debug mode. Hence, it could be possible to cause buffer overflows by passing in a tensor with larger dimensions as the second argument. To fix, we now make `MatchingDim` return the minimum of the two sizes. A much better fix would be to return a status object but that requires refactoring a large part of the codebase for minor benefits. PiperOrigin-RevId: 332526127 Change-Id: If627d0d2c80a685217b6e0d1e64b0872dbf1c5e4",types.h,"@@ -438,7 +438,7 @@ int MatchingArraySize(const ArrayType1& array1, int index1, inline int MatchingDim(const RuntimeShape& shape1, int index1, const RuntimeShape& shape2, int index2) { TFLITE_DCHECK_EQ(shape1.Dims(index1), shape2.Dims(index2)); - return shape1.Dims(index1); + return std::min(shape1.Dims(index1), shape2.Dims(index2)); } template ",1,train 0b5662bc2be13a8c8f044d925d87fb6e56247cd8,tensorflow/tensorflow,"[tflite] Ensure input tensors don't have `nullptr` buffers. A crafted TFLite model can force a node to have as input a tensor backed by a `nullptr` buffer. That is, by carefully changing the buffer index in the flatbuffer serialization, we can force the TFLite interpreter to consider a read-only tensor to be a read-write one and assume that there is an operator that has this tensor as output, writing to it and allocating memory before the tensor is used as input. If this does not happen, we get memory corruption. PiperOrigin-RevId: 332524692 Change-Id: I57ef175152a29020af9ab041dc959e5631dce40f",subgraph.cc,"@@ -19,6 +19,7 @@ limitations under the License. #include #include ""tensorflow/lite/arena_planner.h"" +#include ""tensorflow/lite/builtin_ops.h"" #include ""tensorflow/lite/c/common.h"" #include ""tensorflow/lite/context_util.h"" #include ""tensorflow/lite/core/api/tensor_utils.h"" @@ -1030,6 +1031,19 @@ TfLiteStatus Subgraph::Invoke() { tensor->data_is_stale) { TF_LITE_ENSURE_STATUS(EnsureTensorDataIsReadable(tensor_index)); } + if (tensor->data.raw == nullptr && tensor->bytes > 0) { + if (registration.builtin_code == kTfLiteBuiltinReshape && i == 1) { + // In general, having a tensor here with no buffer will be an error. + // However, for the reshape operator, the second input tensor is only + // used for the shape, not for the data. Thus, null buffer is ok. + continue; + } else { + // In all other cases, we need to return an error as otherwise we will + // trigger a null pointer dereference (likely). + ReportError(""Input tensor %d lacks data"", tensor_index); + return kTfLiteError; + } + } } if (check_cancelled_func_ != nullptr && ",1,train 0b5662bc2be13a8c8f044d925d87fb6e56247cd8,tensorflow/tensorflow,"[tflite] Ensure input tensors don't have `nullptr` buffers. A crafted TFLite model can force a node to have as input a tensor backed by a `nullptr` buffer. That is, by carefully changing the buffer index in the flatbuffer serialization, we can force the TFLite interpreter to consider a read-only tensor to be a read-write one and assume that there is an operator that has this tensor as output, writing to it and allocating memory before the tensor is used as input. If this does not happen, we get memory corruption. PiperOrigin-RevId: 332524692 Change-Id: I57ef175152a29020af9ab041dc959e5631dce40f",model_test.cc,"@@ -438,24 +438,48 @@ TEST(BasicFlatBufferModel, TestParseModelWithSparseTensor) { } // TODO(b/150072943): Add malformed model with sparse tensor tests. -TEST(BasicFlatBufferModel, TestHandleMalformedModel) { - const auto model_paths = { - // These models use the same tensor as both input and ouput of a node - ""tensorflow/lite/testdata/add_shared_tensors.bin"", - }; - - for (const auto& model_path : model_paths) { - std::unique_ptr model = - FlatBufferModel::BuildFromFile(model_path); - ASSERT_NE(model, nullptr); - - tflite::ops::builtin::BuiltinOpResolver resolver; - InterpreterBuilder builder(*model, resolver); - std::unique_ptr interpreter; - ASSERT_EQ(builder(&interpreter), kTfLiteOk); - ASSERT_NE(interpreter, nullptr); - ASSERT_NE(interpreter->AllocateTensors(), kTfLiteOk); - } + +// The models here have at least a node that uses the same tensor as input and +// output. This causes segfaults when trying to eval the operator, hence we try +// to prevent this scenario. The earliest place we can check this is in +// `AllocateTensors`, hence the test checks that `interpreter->AllocateTensors` +// detects these bad models. +TEST(BasicFlatBufferModel, TestHandleMalformedModelReuseTensor) { + const auto model_path = + ""tensorflow/lite/testdata/add_shared_tensors.bin""; + + std::unique_ptr model = + FlatBufferModel::BuildFromFile(model_path); + ASSERT_NE(model, nullptr); + + tflite::ops::builtin::BuiltinOpResolver resolver; + InterpreterBuilder builder(*model, resolver); + std::unique_ptr interpreter; + ASSERT_EQ(builder(&interpreter), kTfLiteOk); + ASSERT_NE(interpreter, nullptr); + ASSERT_NE(interpreter->AllocateTensors(), kTfLiteOk); +} + +// The models here have a buffer index for a tensor pointing to a null buffer. +// This results in the tensor being interpreted as read-write, but the model +// assumes the tensor is read-only. As such, `interpreter->Invoke()` would +// segfault if no precondition check is added. The test checks that the +// precondition check exists. +TEST(BasicFlatBufferModel, TestHandleMalformedModelInvalidBuffer) { + const auto model_path = + ""tensorflow/lite/testdata/segment_sum_invalid_buffer.bin""; + + std::unique_ptr model = + FlatBufferModel::BuildFromFile(model_path); + ASSERT_NE(model, nullptr); + + tflite::ops::builtin::BuiltinOpResolver resolver; + InterpreterBuilder builder(*model, resolver); + std::unique_ptr interpreter; + ASSERT_EQ(builder(&interpreter), kTfLiteOk); + ASSERT_NE(interpreter, nullptr); + ASSERT_EQ(interpreter->AllocateTensors(), kTfLiteOk); + ASSERT_NE(interpreter->Invoke(), kTfLiteOk); } // TODO(aselle): Add tests for serialization of builtin op data types. ",1,train d58c96946b2880991d63d1dacacb32f0a4dfa453,tensorflow/tensorflow,"[tflite] Ensure inputs and outputs don't overlap. If a model uses the same tensor for both an input and an output then this can result in data loss and memory corruption. This should not happen. PiperOrigin-RevId: 332522916 Change-Id: If0905b142415a9dfceaf2d181872f2a8fb88f48a",subgraph.cc,"@@ -581,6 +581,33 @@ TfLiteStatus Subgraph::CheckTensorIndices(const char* label, const int* indices, return kTfLiteOk; } +// We have two arrays and we need to check that elements from one array don't +// show up in the other. We could sort both arrays and then iterate with two +// pointers from start to finish always increasing the smaller one but since +// these arrays are usually short (<25 elements for inputs, usually <3 for +// outputs), this might be slower than the naive approach (if arrays have size n +// and m, with n >> m ~ O(1), first approach is O(nlogn) whereas the other is +// O(n)). Plus, sorting the input and output arrays might not be something we +// want as it destroys ordering of elements. +// +// If it turns out that this is an issue, we can switch to the other algorithm. +TfLiteStatus Subgraph::CheckInputAndOutputForOverlap(const int* input_indices, + int num_inputs, + const int* output_indices, + int num_outputs) { + for (int i = 0; i < num_inputs; i++) { + for (int j = 0; j < num_outputs; j++) { + if (input_indices[i] == output_indices[j]) { + ReportError(""Tensor %d is both input %d and output %d\n"", + input_indices[i], i, j); + consistent_ = false; + return kTfLiteError; + } + } + } + return kTfLiteOk; +} + namespace { // Multiply two sizes and return true if overflow occurred; // This is based off tensorflow/overflow.h but is simpler as we already @@ -707,6 +734,16 @@ TfLiteStatus Subgraph::AddNodeWithParameters( &context_, CheckTensorIndices(""node outputs"", outputs.data(), outputs.size())); + // For builtin ops, inputs and outputs must not overlap. Custom ops must do + // this check by themselves if they don't support overlapping tensors. This + // distinction is to allow custom ops to just forward a tensor, reusing it as + // both input and output. + if (builtin_data != nullptr) { + TF_LITE_ENSURE_OK(&context_, CheckInputAndOutputForOverlap( + inputs.data(), inputs.size(), + outputs.data(), outputs.size())); + } + int new_node_index = nodes_and_registration_.size(); if (node_index) *node_index = new_node_index; nodes_and_registration_.resize(nodes_and_registration_.size() + 1); ",1,test d58c96946b2880991d63d1dacacb32f0a4dfa453,tensorflow/tensorflow,"[tflite] Ensure inputs and outputs don't overlap. If a model uses the same tensor for both an input and an output then this can result in data loss and memory corruption. This should not happen. PiperOrigin-RevId: 332522916 Change-Id: If0905b142415a9dfceaf2d181872f2a8fb88f48a",subgraph.h,"@@ -451,6 +451,15 @@ class Subgraph { TfLiteStatus CheckTensorIndices(const char* label, const int* indices, int length); + // Check that the input indices and the output indices don't overlap. + // This is needed because same tensor must not be used both as input and + // output for an operator. + // NOTE: this changes consistent_ to be false if indices are out of bounds. + TfLiteStatus CheckInputAndOutputForOverlap(const int* input_indices, + int num_inputs, + const int* output_indices, + int num_outputs); + // Compute the number of bytes required to represent a tensor with dimensions // specified by the array dims (of length dims_size). Returns the status code // and bytes. ",1,test d58c96946b2880991d63d1dacacb32f0a4dfa453,tensorflow/tensorflow,"[tflite] Ensure inputs and outputs don't overlap. If a model uses the same tensor for both an input and an output then this can result in data loss and memory corruption. This should not happen. PiperOrigin-RevId: 332522916 Change-Id: If0905b142415a9dfceaf2d181872f2a8fb88f48a",model_test.cc,"@@ -438,6 +438,25 @@ TEST(BasicFlatBufferModel, TestParseModelWithSparseTensor) { } // TODO(b/150072943): Add malformed model with sparse tensor tests. +TEST(BasicFlatBufferModel, TestHandleMalformedModel) { + const auto model_paths = { + // These models use the same tensor as both input and ouput of a node + ""tensorflow/lite/testdata/add_shared_tensors.bin"", + }; + + for (const auto& model_path : model_paths) { + std::unique_ptr model = + FlatBufferModel::BuildFromFile(model_path); + ASSERT_NE(model, nullptr); + + tflite::ops::builtin::BuiltinOpResolver resolver; + InterpreterBuilder builder(*model, resolver); + std::unique_ptr interpreter; + ASSERT_EQ(builder(&interpreter), kTfLiteOk); + ASSERT_NE(interpreter, nullptr); + ASSERT_NE(interpreter->AllocateTensors(), kTfLiteOk); + } +} // TODO(aselle): Add tests for serialization of builtin op data types. // These tests will occur with the evaluation tests of individual operators, ",1,test 00302787b788c5ff04cb6f62aed5a74d936e86c0,tensorflow/tensorflow,"[tflite] Make `GetOptionalInputTensor` the same as `GetInput`. With the previous change, there is no more need for two separate APIs. We would deprecate `GetOptionalInputTensor` in the future. PiperOrigin-RevId: 332513386 Change-Id: Id7110271c25ebd6126ad8c82a493e37e0e0756b3",kernel_util.cc,"@@ -75,12 +75,7 @@ TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node, const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context, const TfLiteNode* node, int index) { - const bool use_tensor = index < node->inputs->size && - node->inputs->data[index] != kTfLiteOptionalTensor; - if (use_tensor) { - return GetMutableInput(context, node, index); - } - return nullptr; + return GetInput(context, node, index); } // Per-axis ",1,test 46d5b0852528ddfd614ded79bccc75589f801bd9,tensorflow/tensorflow,"[tflite] Test for `kTfLiteOptionalTensor` in `GetInput`. `GetInput`, `GetVariableInput` and `GetOutput` all fail to check for the case where `node->inputs->data[index]` is the special `kTfLiteOptionalTensor` value (-1) which then causes `context->tensors[node->inputs->data[index]]` to read from invalid memory location. This fix makes `GetInput` and related return `nullptr` in those cases, asking the caller to check for `nullptr`. This is better than having `GetOptionalInputTensor` and `GetOptionalOutputTensor` (does not exist but could be added) as using the patched `GetInput` in error would be caught by a sanitizer test in the default optimized build (due to the `-fsanitize=null` option). PiperOrigin-RevId: 332512190 Change-Id: Iabca54da2f2de02b6ece3c38b54f76d4277d689e",kernel_util.cc,"@@ -32,11 +32,17 @@ namespace { inline TfLiteTensor* GetMutableInput(const TfLiteContext* context, const TfLiteNode* node, int index) { - if (context->tensors != nullptr) { - return &context->tensors[node->inputs->data[index]]; - } else { - return context->GetTensor(context, node->inputs->data[index]); + if (index >= 0 && index < node->inputs->size) { + const int tensor_index = node->inputs->data[index]; + if (tensor_index != kTfLiteOptionalTensor) { + if (context->tensors != nullptr) { + return &context->tensors[tensor_index]; + } else { + return context->GetTensor(context, tensor_index); + } + } } + return nullptr; } } // anonymous namespace. @@ -54,11 +60,17 @@ TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node, TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node, int index) { - if (context->tensors != nullptr) { - return &context->tensors[node->outputs->data[index]]; - } else { - return context->GetTensor(context, node->outputs->data[index]); + if (index >= 0 && index < node->outputs->size) { + const int tensor_index = node->outputs->data[index]; + if (tensor_index != kTfLiteOptionalTensor) { + if (context->tensors != nullptr) { + return &context->tensors[tensor_index]; + } else { + return context->GetTensor(context, tensor_index); + } + } } + return nullptr; } const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context, ",1,train 46d5b0852528ddfd614ded79bccc75589f801bd9,tensorflow/tensorflow,"[tflite] Test for `kTfLiteOptionalTensor` in `GetInput`. `GetInput`, `GetVariableInput` and `GetOutput` all fail to check for the case where `node->inputs->data[index]` is the special `kTfLiteOptionalTensor` value (-1) which then causes `context->tensors[node->inputs->data[index]]` to read from invalid memory location. This fix makes `GetInput` and related return `nullptr` in those cases, asking the caller to check for `nullptr`. This is better than having `GetOptionalInputTensor` and `GetOptionalOutputTensor` (does not exist but could be added) as using the patched `GetInput` in error would be caught by a sanitizer test in the default optimized build (due to the `-fsanitize=null` option). PiperOrigin-RevId: 332512190 Change-Id: Iabca54da2f2de02b6ece3c38b54f76d4277d689e",kernel_util.h,"@@ -29,18 +29,46 @@ namespace tflite { // benchmark_model for MobileNet + MobileBERT is unaffected. If such a change is // made, move the newly non-inlined function declarations to the top of this // header file. + +// Note: You must check if result is not null: +// +// TfLiteTensor* my_tensor = GetInput(context, node, kMyTensorIdx); +// TF_LITE_ENSURE(context, my_tensor != nullptr); +// +// This is because the index might point to the optional tensor constant +// (kTfLiteOptionalTensor) in which case there is no tensor to return. const TfLiteTensor* GetInput(const TfLiteContext* context, const TfLiteNode* node, int index); // Note: You must check if result is not null: -// TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx); -// TF_LITE_ENSURE(context, my_tensor != nullptr); +// +// TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx); +// TF_LITE_ENSURE(context, my_tensor != nullptr); +// +// This is because the index might point to the optional tensor constant +// (kTfLiteOptionalTensor) in which case there is no tensor to return. TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node, int index); +// Note: You must check if result is not null: +// +// TfLiteTensor* my_tensor = GetOutput(context, node, kMyTensorIdx); +// TF_LITE_ENSURE(context, my_tensor != nullptr); +// +// This is because the index might point to the optional tensor constant +// (kTfLiteOptionalTensor) in which case there is no tensor to return. TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node, int index); +// Note: You must check if result is not null: +// +// TfLiteTensor* my_tensor = GetOptionalInputTensor(context, node, kIdx); +// TF_LITE_ENSURE(context, my_tensor != nullptr); +// +// This is because the index might point to the optional tensor constant +// (kTfLiteOptionalTensor) in which case there is no tensor to return. +// +// Deprecated. GetInput has the same functionality. const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context, const TfLiteNode* node, int index); @@ -50,14 +78,46 @@ inline int SizeOfDimension(const TfLiteTensor* t, int dim) { } #ifndef TF_LITE_STATIC_MEMORY +// Note: You must check if result is not null: +// +// TfLiteTensor* my_tensor = GetTemporary(context, node, kMyTensorIdx); +// TF_LITE_ENSURE(context, my_tensor != nullptr); +// +// This is because the index might point to the optional tensor constant +// (kTfLiteOptionalTensor) in which case there is no tensor to return. inline TfLiteTensor* GetTemporary(TfLiteContext* context, const TfLiteNode* node, int index) { - return &context->tensors[node->temporaries->data[index]]; + if (index >= 0 && index < node->temporaries->size) { + const int tensor_index = node->temporaries->data[index]; + if (tensor_index != kTfLiteOptionalTensor) { + if (context->tensors != nullptr) { + return &context->tensors[tensor_index]; + } + } + } + return nullptr; } + +// Note: You must check if result is not null: +// +// TfLiteTensor* my_tensor = GetIntermediates(context, node, kMyTensorIdx); +// TF_LITE_ENSURE(context, my_tensor != nullptr); +// +// This is because the index might point to the optional tensor constant +// (kTfLiteOptionalTensor) in which case there is no tensor to return. inline const TfLiteTensor* GetIntermediates(TfLiteContext* context, const TfLiteNode* node, int index) { - return &context->tensors[node->intermediates->data[index]]; + if (index >= 0 && index < node->intermediates->size) { + const int tensor_index = node->intermediates->data[index]; + if (tensor_index != kTfLiteOptionalTensor) { + if (context->tensors != nullptr) { + return &context->tensors[tensor_index]; + } + } + } + return nullptr; } + inline int NumIntermediates(const TfLiteNode* node) { return node->intermediates->size; } ",1,train cd31fd0ce0449a9e0f83dcad08d6ed7f1d6bef3f,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332518902 Change-Id: I92eb164a6101ac3cca66090061a9b56a97288236",test_helpers.cc,"@@ -601,7 +601,8 @@ TfLiteStatus SimpleStatefulOp::Prepare(TfLiteContext* context, OpData* data = reinterpret_cast(node->user_data); // Make sure that the input is in uint8_t with at least 1 data entry. - const TfLiteTensor* input = tflite::GetInput(context, node, kInputTensor); + const TfLiteTensor* input; + TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input)); if (input->type != kTfLiteUInt8) return kTfLiteError; if (NumElements(input->dims) == 0) return kTfLiteError; @@ -622,7 +623,8 @@ TfLiteStatus SimpleStatefulOp::Invoke(TfLiteContext* context, OpData* data = reinterpret_cast(node->user_data); *data->invoke_count += 1; - const TfLiteTensor* input = GetInput(context, node, kInputTensor); + const TfLiteTensor* input; + TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input)); const uint8_t* input_data = GetTensorData(input); int size = NumElements(input->dims); @@ -641,9 +643,13 @@ TfLiteStatus SimpleStatefulOp::Invoke(TfLiteContext* context, } } - TfLiteTensor* median = GetOutput(context, node, kMedianTensor); + TfLiteTensor* median; + TF_LITE_ENSURE_OK(context, + GetOutputSafe(context, node, kMedianTensor, &median)); uint8_t* median_data = GetTensorData(median); - TfLiteTensor* invoke_count = GetOutput(context, node, kInvokeCount); + TfLiteTensor* invoke_count; + TF_LITE_ENSURE_OK(context, + GetOutputSafe(context, node, kInvokeCount, &invoke_count)); int32_t* invoke_count_data = GetTensorData(invoke_count); median_data[0] = sorting_buffer[size / 2]; @@ -681,11 +687,14 @@ TfLiteStatus MockCustom::Prepare(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus MockCustom::Invoke(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = tflite::GetInput(context, node, 0); + const TfLiteTensor* input; + TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input)); const int32_t* input_data = input->data.i32; - const TfLiteTensor* weight = tflite::GetInput(context, node, 1); + const TfLiteTensor* weight; + TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &weight)); const uint8_t* weight_data = weight->data.uint8; - TfLiteTensor* output = GetOutput(context, node, 0); + TfLiteTensor* output; + TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output)); int32_t* output_data = output->data.i32; output_data[0] = 0; // Catch output tensor sharing memory with an input tensor ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",activations.cc,"@@ -139,7 +139,9 @@ TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) { ReluOpData* data = static_cast(node->user_data); const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); if (input->type == kTfLiteInt8) { CalculateReluOpData(input, output, data); @@ -200,6 +202,7 @@ TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) { Relu6OpData* data = static_cast(node->user_data); const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); if (input->type == kTfLiteInt8) { data->six_int8 = FloatToAsymmetricQuantizedInt8(6.0f, input->params.scale, ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",add.cc,"@@ -201,8 +201,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TFLITE_DCHECK(node->builtin_data != nullptr); const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); + TF_LITE_ENSURE(context, input1 != nullptr); const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); + TF_LITE_ENSURE(context, input2 != nullptr); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); OpData* data = static_cast(node->user_data); auto* params = reinterpret_cast(node->builtin_data); ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",ceil.cc,"@@ -30,7 +30,9 @@ constexpr int kOutputTensor = 0; TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32); ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",circular_buffer.cc,"@@ -77,7 +77,9 @@ void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); TF_LITE_ENSURE(context, input != nullptr); TF_LITE_ENSURE(context, output != nullptr); ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",comparisons.cc,"@@ -619,7 +619,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { OpData* data = static_cast(node->user_data); const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); + TF_LITE_ENSURE(context, input1 != nullptr); const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); + TF_LITE_ENSURE(context, input2 != nullptr); if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) { auto input1_offset = -input1->params.zero_point; ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",concatenation.cc,"@@ -136,8 +136,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteConcatenationParams* params = reinterpret_cast(node->builtin_data); - TfLiteType input_type = GetInput(context, node, 0)->type; - TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type; + const TfLiteTensor* input_tensor = GetInput(context, node, 0); + TF_LITE_ENSURE(context, input_tensor != nullptr); + TfLiteType input_type = input_tensor->type; + const TfLiteTensor* output_tensor = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output_tensor != nullptr); + TfLiteType output_type = output_tensor->type; // Check activation and input type TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone); @@ -156,6 +160,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Shapes with dimensions >4 are not yet supported with static allocation. for (int i = 0; i < num_inputs; ++i) { const TfLiteTensor* input = GetInput(context, node, i); + TF_LITE_ENSURE(context, input != nullptr); int num_dimensions = NumDimensions(input); if (num_dimensions > 4) { @@ -173,6 +178,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { OpData* data = static_cast(node->user_data); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); switch (output_type) { // Already know in/outtypes are same. case kTfLiteFloat32: @@ -199,6 +205,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Store input scale and zero point values in OpParams: for (int i = 0; i < node->inputs->size; ++i) { const TfLiteTensor* t = GetInput(context, node, i); + TF_LITE_ENSURE(context, t != nullptr); input_scales[i] = t->params.scale; input_zero_points[i] = t->params.zero_point; } @@ -220,7 +227,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type; + const TfLiteTensor* output_tensor = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output_tensor != nullptr); + TfLiteType output_type = output_tensor->type; switch (output_type) { // Already know in/outtypes are same. case kTfLiteFloat32: ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",conv.cc,"@@ -97,10 +97,13 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, // parameters set. This is usually done during quantized training. if (data_type != kTfLiteFloat32) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + TF_LITE_ENSURE(context, filter != nullptr); const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); int output_channels = filter->dims->data[kConvQuantizedDimension]; TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams( @@ -127,8 +130,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const auto params = static_cast(node->builtin_data); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + TF_LITE_ENSURE(context, filter != nullptr); int input_width = input->dims->data[2]; int input_height = input->dims->data[1]; ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",depthwise_conv.cc,"@@ -82,10 +82,13 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, // parameters set. This is usually done during quantized training. if (data_type != kTfLiteFloat32) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + TF_LITE_ENSURE(context, filter != nullptr); const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; return tflite::PopulateConvolutionQuantizationParams( @@ -114,8 +117,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { OpData* data = static_cast(node->user_data); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + TF_LITE_ENSURE(context, filter != nullptr); const TfLiteType data_type = input->type; int width = SizeOfDimension(input, 2); ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",dequantize.cc,"@@ -52,7 +52,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // TODO(b/140515557): Add cached dequant to improve hybrid model performance. const TfLiteTensor* input = GetInput(context, node, 0); + TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* output = GetOutput(context, node, 0); + TF_LITE_ENSURE(context, output != nullptr); TF_LITE_ENSURE(context, input->type == kTfLiteUInt8 || input->type == kTfLiteInt8 || ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",elementwise.cc,"@@ -41,7 +41,9 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); const TfLiteTensor* input = GetInput(context, node, 0); + TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* output = GetOutput(context, node, 0); + TF_LITE_ENSURE(context, output != nullptr); TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); if (!IsSupportedType(input->type)) { TF_LITE_KERNEL_LOG(context, ""Input data type %s (%d) is not supported."", ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",fully_connected.cc,"@@ -93,9 +93,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { static_cast(node->builtin_data); const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor); + TF_LITE_ENSURE(context, filter != nullptr); const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); TF_LITE_ENSURE_MSG(context, input->type == filter->type, ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",hard_swish.cc,"@@ -45,7 +45,9 @@ TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) { HardSwishParams* params = static_cast(node->user_data); ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",l2norm.cc,"@@ -50,7 +50,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); TF_LITE_ENSURE(context, NumDimensions(input) <= 4); ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",logistic.cc,"@@ -43,7 +43,9 @@ struct OpData { TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node, OpData* data) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); if (input->type == kTfLiteInt8) { ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",mul.cc,"@@ -51,8 +51,11 @@ struct OpData { TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, TfLiteMulParams* params, OpData* data) { const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor); + TF_LITE_ENSURE(context, input1 != nullptr); const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor); + TF_LITE_ENSURE(context, input2 != nullptr); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",pad.cc,"@@ -50,10 +50,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); const TfLiteTensor* input = GetInput(context, node, /*index=*/0); + TF_LITE_ENSURE(context, input != nullptr); const TfLiteTensor* paddings = GetInput(context, node, /*index=*/1); + TF_LITE_ENSURE(context, paddings != nullptr); const TfLiteTensor* constant_values = NumInputs(node) == 3 ? GetInput(context, node, /*index=*/2) : nullptr; TfLiteTensor* output = GetOutput(context, node, /*index=*/0); + TF_LITE_ENSURE(context, output != nullptr); TF_LITE_ENSURE_EQ(context, input->type, output->type); ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",pooling.cc,"@@ -222,7 +222,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { OpData* data = static_cast(node->user_data); const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data)); ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",prelu.cc,"@@ -95,8 +95,11 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) { PreluParams* params = static_cast(node->user_data); const TfLiteTensor* input = GetInput(context, node, 0); + TF_LITE_ENSURE(context, input != nullptr); const TfLiteTensor* alpha = GetInput(context, node, 1); + TF_LITE_ENSURE(context, alpha != nullptr); TfLiteTensor* output = GetOutput(context, node, 0); + TF_LITE_ENSURE(context, output != nullptr); return CalculatePreluParams(input, alpha, output, params); } ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",quantize.cc,"@@ -50,7 +50,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); const TfLiteTensor* input = GetInput(context, node, 0); + TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* output = GetOutput(context, node, 0); + TF_LITE_ENSURE(context, output != nullptr); // TODO(b/128934713): Add support for fixed-point per-channel quantization. // Currently this only support affine per-layer quantization. ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",reduce.cc,"@@ -64,6 +64,7 @@ TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) { // Validate axis type const TfLiteTensor* axis = GetInput(context, node, 1); + TF_LITE_ENSURE(context, axis != nullptr); TF_LITE_ENSURE_TYPES_EQ(context, axis->type, kTfLiteInt32); if (input->type == kTfLiteInt8) { ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",reshape.cc,"@@ -32,7 +32,9 @@ constexpr int kOutputTensor = 0; TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); // Tensorflow's Reshape allows one of the shape components to have the // special -1 value, meaning it will be calculated automatically based on the // input. Here we calculate what that dimension should be so that the number ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",round.cc,"@@ -30,7 +30,9 @@ constexpr int kOutputTensor = 0; TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32); ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",softmax.cc,"@@ -119,9 +119,11 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); const TfLiteTensor* input = GetInput(context, node, 0); + TF_LITE_ENSURE(context, input != nullptr); TF_LITE_ENSURE(context, NumDimensions(input) >= 1); TfLiteTensor* output = GetOutput(context, node, 0); + TF_LITE_ENSURE(context, output != nullptr); TFLITE_DCHECK(node->user_data != nullptr); SoftmaxParams* data = static_cast(node->user_data); ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",split.cc,"@@ -69,6 +69,7 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node, TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* axis = GetInput(context, node, 0); + TF_LITE_ENSURE(context, axis != nullptr); // Dynamic output tensors are needed if axis tensor is not constant. // But Micro doesn't support dynamic memory allocation, so we only support ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",sub.cc,"@@ -108,8 +108,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); + TF_LITE_ENSURE(context, input1 != nullptr); const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); + TF_LITE_ENSURE(context, input2 != nullptr); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); TF_LITE_ENSURE_STATUS( CalculateOpData(context, params, input1, input2, output, data)); ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",svdf.cc,"@@ -366,13 +366,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // [4] = Activation State (variable), // {2, batch_size, memory_size * num_filters} const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); const TfLiteTensor* weights_feature = GetInput(context, node, kWeightsFeatureTensor); + TF_LITE_ENSURE(context, weights_feature != nullptr); const TfLiteTensor* weights_time = GetInput(context, node, kWeightsTimeTensor); + TF_LITE_ENSURE(context, weights_time != nullptr); const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); const TfLiteTensor* activation_state = GetInput(context, node, kInputActivationStateTensor); + TF_LITE_ENSURE(context, activation_state != nullptr); // Define input constants based on input tensor definition above: const int rank = params->rank; @@ -392,6 +396,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // [0] = float/int8_t, {2, batch_size, num_units} TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2); TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size); TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units); ",1,train fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors. As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages. We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`). PiperOrigin-RevId: 332520146 Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",tanh.cc,"@@ -51,7 +51,9 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node, TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); @@ -76,6 +78,7 @@ TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) { OpData* data = static_cast(node->user_data); const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); data->input_zero_point = input->params.zero_point; return CalculateArithmeticOpData(context, node, data); } ",1,train 204945b19e44b57906c9344c0d00120eeeae178a,tensorflow/tensorflow,"[tflite] Validate segment ids for segment_sum. Segment identifiers in segment_sum should be in a 1-D tensor of same size as the first dimension of the input. The values of the tensor should be integers from {0, 1, 2, ... k-1}, where k is the first dimension of the input. The segment identifiers must not contain jumps and must be increasing. See https://www.tensorflow.org/api_docs/python/tf/math#Segmentation as the source for these constraints. PiperOrigin-RevId: 332510942 Change-Id: I898beaba00642c918bcd4b4d4ce893ebb190d869",segment_sum.cc,"@@ -34,11 +34,24 @@ TfLiteStatus ResizeOutputTensor(TfLiteContext* context, const TfLiteTensor* data, const TfLiteTensor* segment_ids, TfLiteTensor* output) { - int max_index = -1; + // Segment ids should be of same cardinality as first input dimension and they + // should be increasing by at most 1, from 0 (e.g., [0, 0, 1, 2, 3] is valid) const int segment_id_size = segment_ids->dims->data[0]; - if (segment_id_size > 0) { - max_index = segment_ids->data.i32[segment_id_size - 1]; + TF_LITE_ENSURE_EQ(context, segment_id_size, data->dims->data[0]); + int previous_segment_id = -1; + for (int i = 0; i < segment_id_size; i++) { + const int current_segment_id = GetTensorData(segment_ids)[i]; + if (i == 0) { + TF_LITE_ENSURE_EQ(context, current_segment_id, 0); + } else { + int delta = current_segment_id - previous_segment_id; + TF_LITE_ENSURE(context, delta == 0 || delta == 1); + } + previous_segment_id = current_segment_id; } + + const int max_index = previous_segment_id; + const int data_rank = NumDimensions(data); TfLiteIntArray* output_shape = TfLiteIntArrayCreate(NumDimensions(data)); output_shape->data[0] = max_index + 1; ",1,train 204945b19e44b57906c9344c0d00120eeeae178a,tensorflow/tensorflow,"[tflite] Validate segment ids for segment_sum. Segment identifiers in segment_sum should be in a 1-D tensor of same size as the first dimension of the input. The values of the tensor should be integers from {0, 1, 2, ... k-1}, where k is the first dimension of the input. The segment identifiers must not contain jumps and must be increasing. See https://www.tensorflow.org/api_docs/python/tf/math#Segmentation as the source for these constraints. PiperOrigin-RevId: 332510942 Change-Id: I898beaba00642c918bcd4b4d4ce893ebb190d869",segment_sum_test.cc,"@@ -110,5 +110,37 @@ TEST(SegmentSumOpModelTest, Float32Test_ThreeDimensions) { EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 2, 1})); } +TEST(SegmentSumOpModelTest, TestFailIfSegmentsAreNotSorted) { + SegmentSumOpModel model({TensorType_INT32, {3, 2}}, + {TensorType_INT32, {3}}); + model.PopulateTensor(model.data(), {1, 2, 3, 4, 5, 6}); + model.PopulateTensor(model.segment_ids(), {0, 3, 1}); + ASSERT_EQ(model.InvokeUnchecked(), kTfLiteError); +} + +TEST(SegmentSumOpModelTest, TestFailIfSegmentsAreNotConsecutive) { + SegmentSumOpModel model({TensorType_INT32, {3, 2}}, + {TensorType_INT32, {3}}); + model.PopulateTensor(model.data(), {1, 2, 3, 4, 5, 6}); + model.PopulateTensor(model.segment_ids(), {0, 3, 5}); + ASSERT_EQ(model.InvokeUnchecked(), kTfLiteError); +} + +TEST(SegmentSumOpModelTest, TestFailIfSegmentsAreNegative) { + SegmentSumOpModel model({TensorType_INT32, {3, 2}}, + {TensorType_INT32, {3}}); + model.PopulateTensor(model.data(), {1, 2, 3, 4, 5, 6}); + model.PopulateTensor(model.segment_ids(), {-1, 0, 1}); + ASSERT_EQ(model.InvokeUnchecked(), kTfLiteError); +} + +TEST(SegmentSumOpModelTest, TestFailIfSegmentsAreNotTheRightCardinality) { + SegmentSumOpModel model({TensorType_INT32, {3, 2}}, + {TensorType_INT32, {2}}); + model.PopulateTensor(model.data(), {1, 2, 3, 4, 5, 6}); + model.PopulateTensor(model.segment_ids(), {0, 1}); + ASSERT_EQ(model.InvokeUnchecked(), kTfLiteError); +} + } // namespace } // namespace tflite ",1,train eccb7ec454e6617738554a255d77f08e60ee0808,tensorflow/tensorflow,"Prevent segfault in `quantize_and_dequantize` Fixes #42105. If `tf.quantization.quantize_and_dequantize` is called with `axis` argument pointing to outside of the input tensor, we obtain a `CHECK` fail which then aborts the application/interpreter. This change adds a condition check and returns a `Status` instead of crashing. PiperOrigin-RevId: 337972243 Change-Id: I71ec32c00a87266e364fb017f0ad5dfd3e23542f",quantize_and_dequantize_op.cc,"@@ -71,6 +71,10 @@ class QuantizeAndDequantizeV2Op : public OpKernel { void Compute(OpKernelContext* ctx) override { const Tensor& input = ctx->input(0); + OP_REQUIRES( + ctx, (axis_ == -1 || axis_ < input.shape().dims()), + errors::InvalidArgument(""Shape must be at least rank "", axis_ + 1, + "" but is rank "", input.shape().dims())); const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_); Tensor input_min_tensor; Tensor input_max_tensor; ",1,train eccb7ec454e6617738554a255d77f08e60ee0808,tensorflow/tensorflow,"Prevent segfault in `quantize_and_dequantize` Fixes #42105. If `tf.quantization.quantize_and_dequantize` is called with `axis` argument pointing to outside of the input tensor, we obtain a `CHECK` fail which then aborts the application/interpreter. This change adds a condition check and returns a `Status` instead of crashing. PiperOrigin-RevId: 337972243 Change-Id: I71ec32c00a87266e364fb017f0ad5dfd3e23542f",array_ops_test.py,"@@ -1628,6 +1628,22 @@ class QuantizeAndDequantizeTest(test_util.TensorFlowTestCase): axis=(axis - 4))) self.assertAllClose(fake_quantized, expected) + def testBadAxis(self): + input_tensor = [2.5, 2.5] + input_min = [0, 0] + input_max = [1, 1] + error_message_pattern = ""Shape must be at least rank 11 but is rank 1"" + # TODO(b/171260356): Eager mode and graph mode throw different error types + error = errors.InvalidArgumentError if context.executing_eagerly( + ) else ValueError + with self.assertRaisesRegex(error, error_message_pattern): + self.evaluate( + array_ops.quantize_and_dequantize_v2( + input=input_tensor, + input_min=input_min, + input_max=input_max, + axis=10)) + def testQuantizeDequantizeGrad(self): shape = (2, 2) max_threshold = 0 ",1,train ace0c15a22f7f054abcc1f53eabbcb0a1239a9e2,tensorflow/tensorflow,"Default initialize fixed point Eigen types. In certain cases, tensors are filled with default values of the type. But, for these fixed point types, these values were uninitialized. Thus, we would have uninitialized memory access bugs, some of which were caught by MSAN. PiperOrigin-RevId: 344101137 Change-Id: I14555fda74dca3b5f1582da9008901937e3f14e2",FixedPointTypes.h,"@@ -49,7 +49,7 @@ struct scalar_product_traits { // the compiler from silently type cast the mantissa into a bigger or a smaller // representation. struct QInt8 { - QInt8() {} + QInt8() : value(0) {} QInt8(const int8_t v) : value(v) {} QInt8(const QInt32 v); @@ -59,7 +59,7 @@ struct QInt8 { }; struct QUInt8 { - QUInt8() {} + QUInt8() : value(0) {} QUInt8(const uint8_t v) : value(v) {} QUInt8(const QInt32 v); @@ -69,7 +69,7 @@ struct QUInt8 { }; struct QInt16 { - QInt16() {} + QInt16() : value(0) {} QInt16(const int16_t v) : value(v) {} QInt16(const QInt32 v); operator int() const { return static_cast(value); } @@ -78,7 +78,7 @@ struct QInt16 { }; struct QUInt16 { - QUInt16() {} + QUInt16() : value(0) {} QUInt16(const uint16_t v) : value(v) {} QUInt16(const QInt32 v); operator int() const { return static_cast(value); } @@ -87,7 +87,7 @@ struct QUInt16 { }; struct QInt32 { - QInt32() {} + QInt32() : value(0) {} QInt32(const int8_t v) : value(v) {} QInt32(const int32_t v) : value(v) {} QInt32(const uint32_t v) : value(static_cast(v)) {} ",1,train ebc70b7a592420d3d2f359e4b1694c236b82c7ae,tensorflow/tensorflow,"Validate that `DataFormat*` attributes form a permutation. The `src_format` and `dst_format` attributes for the `DataFormatDimMap` and `DataFormatVecPermute` raw ops are supposed to determine a permutation. However, this was not validated and could result in unitialized memory accesses as well as writes outside of bounds and potential crashes. While here, we also test that the format attributes have the needed length, add tests for all validation failure cases, remove unnecessary calls to `strings::StrCat`, and fix a few grammar errors. This will be cherry-picked on the supported release branches. PiperOrigin-RevId: 346135579 Change-Id: I1c76392382c89ad8f072d5bc93d70669851eb404",data_format_ops.cc,"@@ -18,16 +18,52 @@ limitations under the License. #define EIGEN_USE_THREADS #include ""tensorflow/core/kernels/data_format_ops.h"" + +#include + #include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/register_types.h"" #include ""tensorflow/core/framework/tensor.h"" +#include ""tensorflow/core/platform/errors.h"" namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; +// Ensure that `src` and `dst` define a valid permutation. +// Ops defined in this file assume that user specifies a permutation via two +// string attributes. This check validates that these attributes properly define +// it to prevent security vulnerabilities. +static bool IsValidPermutation(const std::string& src, const std::string& dst) { + if (src.size() != dst.size()) { + return false; + } + + std::map characters; + + // Every character in `src` must be present only once + for (const auto c : src) { + if (characters[c]) { + return false; + } + characters[c] = true; + } + + // Every character in `dst` must show up in `src` exactly once + for (const auto c : dst) { + if (!characters[c]) { + return false; + } + characters[c] = false; + } + + // At this point, characters[] has been switched to true and false exactly + // once for all character in `src` (and `dst`) so we have a valid permutation + return true; +} + template class DataFormatDimMapOp : public OpKernel { public: @@ -38,15 +74,19 @@ class DataFormatDimMapOp : public OpKernel { string dst_format; OP_REQUIRES_OK(context, context->GetAttr(""dst_format"", &dst_format)); OP_REQUIRES(context, src_format.size() == 4 || src_format.size() == 5, - errors::InvalidArgument(strings::StrCat( - ""Source format must of length 4 or 5, received "" + errors::InvalidArgument( + ""Source format must be of length 4 or 5, received "" ""src_format = "", - src_format))); + src_format)); + OP_REQUIRES(context, dst_format.size() == 4 || dst_format.size() == 5, + errors::InvalidArgument(""Destination format must be of length "" + ""4 or 5, received dst_format = "", + dst_format)); OP_REQUIRES( - context, dst_format.size() == 4 || dst_format.size() == 5, - errors::InvalidArgument(strings::StrCat( - ""Destination format must of length 4 or 5, received dst_format = "", - dst_format))); + context, IsValidPermutation(src_format, dst_format), + errors::InvalidArgument( + ""Destination and source format must determine a permutation, got "", + src_format, "" and "", dst_format)); dst_idx_ = Tensor(DT_INT32, {static_cast(src_format.size())}); for (int i = 0; i < src_format.size(); ++i) { for (int j = 0; j < dst_format.size(); ++j) { @@ -78,8 +118,22 @@ class DataFormatVecPermuteOp : public OpKernel { : OpKernel(context) { string src_format; OP_REQUIRES_OK(context, context->GetAttr(""src_format"", &src_format)); + OP_REQUIRES(context, src_format.size() == 4 || src_format.size() == 5, + errors::InvalidArgument( + ""Source format must be of length 4 or 5, received "" + ""src_format = "", + src_format)); string dst_format; OP_REQUIRES_OK(context, context->GetAttr(""dst_format"", &dst_format)); + OP_REQUIRES(context, dst_format.size() == 4 || dst_format.size() == 5, + errors::InvalidArgument(""Destination format must be of length "" + ""4 or 5, received dst_format = "", + dst_format)); + OP_REQUIRES( + context, IsValidPermutation(src_format, dst_format), + errors::InvalidArgument( + ""Destination and source format must determine a permutation, got "", + src_format, "" and "", dst_format)); src_format_ = src_format; dst_format_ = dst_format; } @@ -127,6 +181,10 @@ class DataFormatVecPermuteOp : public OpKernel { }; keep_only_spatial_dimensions(&src_format_str); keep_only_spatial_dimensions(&dst_format_str); + OP_REQUIRES(context, + src_format_str.size() == 2 && dst_format_str.size() == 2, + errors::InvalidArgument( + ""Format specifier must contain H and W for 2D case"")); } ComputeDstIndex(src_format_str, dst_format_str, input.dims(), &dst_idx); ",1,train ebc70b7a592420d3d2f359e4b1694c236b82c7ae,tensorflow/tensorflow,"Validate that `DataFormat*` attributes form a permutation. The `src_format` and `dst_format` attributes for the `DataFormatDimMap` and `DataFormatVecPermute` raw ops are supposed to determine a permutation. However, this was not validated and could result in unitialized memory accesses as well as writes outside of bounds and potential crashes. While here, we also test that the format attributes have the needed length, add tests for all validation failure cases, remove unnecessary calls to `strings::StrCat`, and fix a few grammar errors. This will be cherry-picked on the supported release branches. PiperOrigin-RevId: 346135579 Change-Id: I1c76392382c89ad8f072d5bc93d70669851eb404",nn_test.py,"@@ -27,6 +27,7 @@ from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import test_util @@ -1260,6 +1261,7 @@ class DataFormatDimMapTest(test_lib.TestCase): y_val = self.evaluate(y) self.assertAllEqual(y_val, y_val_expected) + @test_util.disable_xla(""XLA catches the error and rethrows as different one"") def testArbitraryASCII(self): x_val = [-4, -3, -2, -1, 0, 1, 2, 3] y_val_expected = [3, 2, 1, 0, 3, 2, 1, 0] @@ -1269,6 +1271,46 @@ class DataFormatDimMapTest(test_lib.TestCase): y_val = self.evaluate(y) self.assertAllEqual(y_val, y_val_expected) + @test_util.disable_xla(""XLA catches the error and rethrows as different one"") + def testInvalidLength(self): + x = [-4, -3, -2, -1, 0, 1, 2, 3] + with self.assertRaisesRegex(errors.InvalidArgumentError, + ""Source format must be of length 4 or 5""): + op = nn_ops.data_format_dim_map( + x, src_format=""12345678"", dst_format=""87654321"") + with test_util.use_gpu(): + self.evaluate(op) + + @test_util.disable_xla(""XLA catches the error and rethrows as different one"") + def testDuplicateSrc(self): + x = [-4, -3, -2, -1, 0, 1, 2, 3] + with self.assertRaisesRegex( + errors.InvalidArgumentError, + ""Destination and source format must determine a permutation""): + op = nn_ops.data_format_dim_map(x, src_format=""1233"", dst_format=""4321"") + with test_util.use_gpu(): + self.evaluate(op) + + @test_util.disable_xla(""XLA catches the error and rethrows as different one"") + def testDuplicateDst(self): + x = [-4, -3, -2, -1, 0, 1, 2, 3] + with self.assertRaisesRegex( + errors.InvalidArgumentError, + ""Destination and source format must determine a permutation""): + op = nn_ops.data_format_dim_map(x, src_format=""1234"", dst_format=""3321"") + with test_util.use_gpu(): + self.evaluate(op) + + @test_util.disable_xla(""XLA catches the error and rethrows as different one"") + def testExtraSpecifiers(self): + x = [-4, -3, -2, -1, 0, 1, 2, 3] + with self.assertRaisesRegex( + errors.InvalidArgumentError, + ""Destination and source format must determine a permutation""): + op = nn_ops.data_format_dim_map(x, src_format=""1234"", dst_format=""5321"") + with test_util.use_gpu(): + self.evaluate(op) + class DataFormatVectorPermuteTest(test_lib.TestCase): @@ -1370,6 +1412,60 @@ class DataFormatVectorPermuteTest(test_lib.TestCase): y_val = self.evaluate(y) self.assertAllEqual(y_val, [[7, 4], [4, 5], [5, 1], [9, 3]]) + @test_util.disable_xla(""XLA catches the error and rethrows as different one"") + def testInvalidLength(self): + x = [0, 1, 2, 3] + with self.assertRaisesRegex(errors.InvalidArgumentError, + ""Source format must be of length 4 or 5""): + op = nn_ops.data_format_vec_permute( + x, src_format=""12345678"", dst_format=""87654321"") + with test_util.use_gpu(): + self.evaluate(op) + + @test_util.disable_xla(""XLA catches the error and rethrows as different one"") + def testDuplicateSrc(self): + x = [0, 1, 2, 3] + with self.assertRaisesRegex( + errors.InvalidArgumentError, + ""Destination and source format must determine a permutation""): + op = nn_ops.data_format_vec_permute( + x, src_format=""1233"", dst_format=""4321"") + with test_util.use_gpu(): + self.evaluate(op) + + @test_util.disable_xla(""XLA catches the error and rethrows as different one"") + def testDuplicateDst(self): + x = [0, 1, 2, 3] + with self.assertRaisesRegex( + errors.InvalidArgumentError, + ""Destination and source format must determine a permutation""): + op = nn_ops.data_format_vec_permute( + x, src_format=""1234"", dst_format=""3321"") + with test_util.use_gpu(): + self.evaluate(op) + + @test_util.disable_xla(""XLA catches the error and rethrows as different one"") + def testExtraSpecifiers(self): + x = [0, 1, 2, 3] + with self.assertRaisesRegex( + errors.InvalidArgumentError, + ""Destination and source format must determine a permutation""): + op = nn_ops.data_format_vec_permute( + x, src_format=""1234"", dst_format=""5321"") + with test_util.use_gpu(): + self.evaluate(op) + + @test_util.disable_xla(""XLA catches the error and rethrows as different one"") + def test2DNoWH(self): + x = [[0, 1], [2, 3]] + with self.assertRaisesRegex( + errors.InvalidArgumentError, + ""Format specifier must contain H and W for 2D case""): + op = nn_ops.data_format_vec_permute( + x, src_format=""1234"", dst_format=""4321"") + with test_util.use_gpu(): + self.evaluate(op) + @test_util.run_all_in_graph_and_eager_modes class AvgPoolTest(test_lib.TestCase): ",1,train c1e1fc899ad5f8c725dcbb6470069890b5060bc7,tensorflow/tensorflow,"Mark `MemmappedTensorAllocator` as returning opaque handle. This allocator is used for `ImmutableConstantOp` and it returns a handle to the contents of a memory mapped file which is supposed to represent a tensor. For tensors of complex types (resources, variables and strings), allocators which are not marked as returning opaque handles will call placement new to initialize each element. This means writing to the buffer. However, in our case, the buffer is immutable and already contains the tensor data. Hence, writing to it is both destructive and causes a crash. PiperOrigin-RevId: 345786451 Change-Id: I46369c50fa60b3431709ffe068a728d3061f49c4",immutable_constant_op.cc,"@@ -62,6 +62,12 @@ class MemmappedTensorAllocator : public Allocator { void set_delete_on_deallocate() { delete_on_deallocate_ = true; } + // Make sure tensors or complex types (strings, variants, resources) don't get + // their constructor called via a placement new since that would require + // writing to immutable data. + // See also: tensorflow/core/framework/typed_allocator.h + bool AllocatesOpaqueHandle() const override { return true; } + private: std::unique_ptr memory_region_; // If there is an error during allocation we keep it in this status. ",1,train 8b5b9dc96666a3a5d27fad7179ff215e3b74b67c,tensorflow/tensorflow,"Completely rewrite `GetMatchingPaths`. The current parallel implementation is too complex (lambda inside lambda, two levels of parallelism) and has a read outside of bounds issue. The new implementation cleans up artifacts from the previous implementations that were left in the code as it evolves. We add multiple helper functions, and document invariants and preconditions as well as every major step. This way, we fix the security issue and a potential new one which was not caught before PiperOrigin-RevId: 346146220 Change-Id: Iec0f44673f43349797bf9944dffe9b2f779137d8",file_system_helper.cc,"@@ -52,115 +52,217 @@ void ForEach(int first, int last, const std::function& f) { #endif } +// A globbing pattern can only start with these characters: +static const char kGlobbingChars[] = ""*?[\\""; + +static inline bool IsGlobbingPattern(const std::string& pattern) { + return (pattern.find_first_of(kGlobbingChars) != std::string::npos); +} + +// Make sure that the first entry in `dirs` during glob expansion does not +// contain a glob pattern. This is to prevent a corner-case bug where +// `` would be treated differently than `./`. +static std::string PatchPattern(const std::string& pattern) { + const std::string fixed_prefix = + pattern.substr(0, pattern.find_first_of(kGlobbingChars)); + + // Patching is needed when there is no directory part in `prefix` + if (io::Dirname(fixed_prefix).empty()) { + return io::JoinPath(""."", pattern); + } + + // No patching needed + return pattern; +} + +static std::vector AllDirectoryPrefixes(const std::string& d) { + std::vector dirs; + const std::string patched = PatchPattern(d); + StringPiece dir(patched); + + // If the pattern ends with a `/` (or `\\` on Windows), we need to strip it + // otherwise we would have one additional matching step and the result set + // would be empty. + bool is_directory = d[d.size() - 1] == '/'; +#ifdef PLATFORM_WINDOWS + is_directory = is_directory || (d[d.size() - 1] == '\\'); +#endif + if (is_directory) { + dir = io::Dirname(dir); + } + + while (!dir.empty()) { + dirs.emplace_back(dir); + StringPiece new_dir(io::Dirname(dir)); + // io::Dirname(""/"") returns ""/"" so we need to break the loop. + // On Windows, io::Dirname(""C:\\"") would return ""C:\\"", so we check for + // identity of the result instead of checking for dir[0] == `/`. + if (dir == new_dir) break; + dir = new_dir; + } + + // Order the array from parent to ancestor (reverse order). + std::reverse(dirs.begin(), dirs.end()); + + return dirs; +} + +static inline int GetFirstGlobbingEntry(const std::vector& dirs) { + int i = 0; + for (const auto& d : dirs) { + if (IsGlobbingPattern(d)) { + break; + } + i++; + } + return i; +} + } // namespace Status GetMatchingPaths(FileSystem* fs, Env* env, const string& pattern, std::vector* results) { + // Check that `fs`, `env` and `results` are non-null. + if (fs == nullptr || env == nullptr || results == nullptr) { + return Status(tensorflow::error::INVALID_ARGUMENT, + ""Filesystem calls GetMatchingPaths with nullptr arguments""); + } + + // By design, we don't match anything on empty pattern results->clear(); if (pattern.empty()) { return Status::OK(); } - string fixed_prefix = pattern.substr(0, pattern.find_first_of(""*?[\\"")); - string eval_pattern = pattern; - string dir(io::Dirname(fixed_prefix)); - // If dir is empty then we need to fix up fixed_prefix and eval_pattern to - // include . as the top level directory. - if (dir.empty()) { - dir = "".""; - fixed_prefix = io::JoinPath(dir, fixed_prefix); - eval_pattern = io::JoinPath(dir, eval_pattern); - } - bool is_directory = pattern[pattern.size() - 1] == '/'; -#ifdef PLATFORM_WINDOWS - is_directory = is_directory || pattern[pattern.size() - 1] == '\\'; -#endif - std::vector dirs; - if (!is_directory) { - dirs.emplace_back(eval_pattern); - } - StringPiece tmp_dir(io::Dirname(eval_pattern)); - while (tmp_dir.size() > dir.size()) { - dirs.emplace_back(string(tmp_dir)); - tmp_dir = io::Dirname(tmp_dir); + // The pattern can contain globbing characters at multiple levels, e.g.: + // + // foo/ba?/baz/f*r + // + // To match the full pattern, we must match every prefix subpattern and then + // operate on the children for each match. Thus, we separate all subpatterns + // in the `dirs` vector below. + std::vector dirs = AllDirectoryPrefixes(pattern); + + // We can have patterns that have several parents where no globbing is being + // done, for example, `foo/bar/baz/*`. We don't need to expand the directories + // which don't contain the globbing characters. + int matching_index = GetFirstGlobbingEntry(dirs); + + // If we don't have globbing characters in the pattern then it specifies a + // path in the filesystem. We add it to the result set if it exists. + if (matching_index == dirs.size()) { + if (fs->FileExists(pattern).ok()) { + results->emplace_back(pattern); + } + return Status::OK(); } - dirs.emplace_back(dir); - std::reverse(dirs.begin(), dirs.end()); - // Setup a parallel BFS to explore everything under dir. - std::deque> dir_q; - std::deque> next_dir_q; - dir_q.emplace_back(std::make_pair(dirs[0], 0)); - Status ret; // Status to return. - mutex results_mutex; - condition_variable results_cond; - mutex next_que_mutex; - condition_variable next_que_cond; - while (!dir_q.empty()) { - next_dir_q.clear(); - std::vector new_rets(dir_q.size()); - auto handle_level = [fs, &results, &dir_q, &next_dir_q, &new_rets, - &is_directory, &dirs, &results_mutex, &results_cond, - &next_que_mutex, &next_que_cond](int i) { - string current_dir = dir_q.at(i).first; - int dir_index = dir_q.at(i).second; - dir_index++; - std::vector children; - Status s = fs->GetChildren(current_dir, &children); - // In case PERMISSION_DENIED is encountered, we bail here. + + // To expand the globbing, we do a BFS from `dirs[matching_index-1]`. + // At every step, we work on a pair `{dir, ix}` such that `dir` is a real + // directory, `ix < dirs.size() - 1` and `dirs[ix+1]` is a globbing pattern. + // To expand the pattern, we select from all the children of `dir` only those + // that match against `dirs[ix+1]`. + // If there are more entries in `dirs` after `dirs[ix+1]` this mean we have + // more patterns to match. So, we add to the queue only those children that + // are also directories, paired with `ix+1`. + // If there are no more entries in `dirs`, we return all children as part of + // the answer. + // Since we can get into a combinatorial explosion issue (e.g., pattern + // `/*/*/*`), we process the queue in parallel. Each parallel processing takes + // elements from `expand_queue` and adds them to `next_expand_queue`, after + // which we swap these two queues (similar to double buffering algorithms). + // PRECONDITION: `IsGlobbingPattern(dirs[0]) == false` + // PRECONDITION: `matching_index > 0` + // INVARIANT: If `{d, ix}` is in queue, then `d` and `dirs[ix]` are at the + // same level in the filesystem tree. + // INVARIANT: If `{d, _}` is in queue, then `IsGlobbingPattern(d) == false`. + // INVARIANT: If `{d, _}` is in queue, then `d` is a real directory. + // INVARIANT: If `{_, ix}` is in queue, then `ix < dirs.size() - 1`. + // INVARIANT: If `{_, ix}` is in queue, `IsGlobbingPattern(dirs[ix + 1])`. + std::deque> expand_queue; + std::deque> next_expand_queue; + expand_queue.emplace_back(dirs[matching_index - 1], matching_index - 1); + + // Adding to `result` or `new_expand_queue` need to be protected by mutexes + // since there are multiple threads writing to these. + mutex result_mutex; + mutex queue_mutex; + + while (!expand_queue.empty()) { + next_expand_queue.clear(); + + // The work item for every item in `expand_queue`. + // pattern, we process them in parallel. + auto handle_level = [&fs, &results, &dirs, &expand_queue, + &next_expand_queue, &result_mutex, + &queue_mutex](int i) { + // See invariants above, all of these are valid accesses. + const auto& queue_item = expand_queue.at(i); + const std::string& parent = queue_item.first; + const int index = queue_item.second + 1; + const std::string& match_pattern = dirs[index]; + + // Get all children of `parent`. If this fails, return early. + std::vector children; + Status s = fs->GetChildren(parent, &children); if (s.code() == tensorflow::error::PERMISSION_DENIED) { return; } - new_rets[i] = s; - if (children.empty()) return; - - // children_dir_status holds is_dir status for children. It can have three - // possible values: OK for true; FAILED_PRECONDITION for false; CANCELLED - // if we don't calculate IsDirectory (we might do that because there isn't - // any point in exploring that child path). - std::vector children_dir_status; - - // This IsDirectory call can be expensive for some FS. Parallelizing it. - children_dir_status.resize(children.size()); - auto handle_children = [fs, ¤t_dir, &children, &dirs, dir_index, - is_directory, &children_dir_status](int j) { - const string child_path = io::JoinPath(current_dir, children[j]); - if (!fs->Match(child_path, dirs[dir_index])) { - children_dir_status[j] = + + // Also return early if we don't have any children + if (children.empty()) { + return; + } + + // Since we can get extremely many children here and on some filesystems + // `IsDirectory` is expensive, we process the children in parallel. + // We also check that children match the pattern in parallel, for speedup. + // We store the status of the match and `IsDirectory` in + // `children_status` array, one element for each children. + std::vector children_status(children.size()); + auto handle_children = [&fs, &match_pattern, &parent, &children, + &children_status](int j) { + const std::string path = io::JoinPath(parent, children[j]); + if (!fs->Match(path, match_pattern)) { + children_status[j] = Status(tensorflow::error::CANCELLED, ""Operation not needed""); - } else if (dir_index != dirs.size() - 1) { - children_dir_status[j] = fs->IsDirectory(child_path); } else { - children_dir_status[j] = - is_directory ? fs->IsDirectory(child_path) : Status::OK(); + children_status[j] = fs->IsDirectory(path); } }; ForEach(0, children.size(), handle_children); - for (size_t j = 0; j < children.size(); ++j) { - const string child_path = io::JoinPath(current_dir, children[j]); - // If the IsDirectory call was cancelled we bail. - if (children_dir_status[j].code() == tensorflow::error::CANCELLED) { + // At this point, pairing `children` with `children_status` will tell us + // if a children: + // * does not match the pattern + // * matches the pattern and is a directory + // * matches the pattern and is not a directory + // We fully ignore the first case. + // If we matched the last pattern (`index == dirs.size() - 1`) then all + // remaining children get added to the result. + // Otherwise, only the directories get added to the next queue. + for (size_t j = 0; j < children.size(); j++) { + if (children_status[j].code() == tensorflow::error::CANCELLED) { continue; } - if (children_dir_status[j].ok()) { - if (dir_index != dirs.size() - 1) { - mutex_lock lk(next_que_mutex); - next_dir_q.emplace_back(std::make_pair(child_path, dir_index)); - next_que_cond.notify_one(); - } else { - mutex_lock lk(results_mutex); - results->emplace_back(child_path); - results_cond.notify_one(); - } + + const std::string path = io::JoinPath(parent, children[j]); + if (index == dirs.size() - 1) { + mutex_lock l(result_mutex); + results->emplace_back(path); + } else if (children_status[j].ok()) { + mutex_lock l(queue_mutex); + next_expand_queue.emplace_back(path, index); } } }; - ForEach(0, dir_q.size(), handle_level); + ForEach(0, expand_queue.size(), handle_level); - ret.Update(new_rets[dir_q.size() - 1]); - std::swap(dir_q, next_dir_q); + // After evaluating one level, swap the ""buffers"" + std::swap(expand_queue, next_expand_queue); } - return ret; + + return Status::OK(); } } // namespace internal ",1,train 14755416e364f17fb1870882fa778c7fec7f16e3,tensorflow/tensorflow,"Prevent CHECK-fail in LSTM/GRU with zero-length input. PiperOrigin-RevId: 346239181 Change-Id: I5f233dbc076aab7bb4e31ba24f5abd4eaf99ea4f",cuda_dnn.cc,"@@ -1468,7 +1468,9 @@ class CudnnRnnSequenceTensorDescriptor static port::StatusOr Create( GpuExecutor* parent, int max_seq_length, int batch_size, int data_size, cudnnDataType_t data_type) { - CHECK_GT(max_seq_length, 0); + if (max_seq_length <= 0) { + return port::Status(port::error::INVALID_ARGUMENT, ""max_seq_length <= 0""); + } int dims[] = {batch_size, data_size, 1}; int strides[] = {dims[1] * dims[2], dims[2], 1}; TensorDescriptor tensor_desc = CreateTensorDescriptor(); @@ -1486,7 +1488,9 @@ class CudnnRnnSequenceTensorDescriptor GpuExecutor* parent, int max_seq_length, int batch_size, int data_size, const absl::Span& seq_lengths, bool time_major, cudnnDataType_t data_type) { - CHECK_GT(max_seq_length, 0); + if (max_seq_length <= 0) { + return port::Status(port::error::INVALID_ARGUMENT, ""max_seq_length <= 0""); + } int dims[] = {batch_size, data_size, 1}; int strides[] = {dims[1] * dims[2], dims[2], 1}; TensorDescriptor tensor_desc = CreateTensorDescriptor(); ",1,train 0cc38aaa4064fd9e79101994ce9872c6d91f816b,tensorflow/tensorflow,"Prevent unitialized memory access in `GraphConstructor::MakeEdge` The `MakeEdge` implementation assumes that there exists an output at `output_index` of `src` node and an input at `input_index` of `dst` node. However, if this is not the case this results in accessing data out of bounds. Because we are accessing an array that is a private member of a class and only in read only mode, this usually results only in unitialized memory access. However, it is reasonable to think that malicious users could manipulate these indexes to actually read data outside the class, thus resulting in information leakage and further exploits. PiperOrigin-RevId: 346343288 Change-Id: I2127da27c2023d27f26efd39afa6c853385cab6f",graph_constructor.cc,"@@ -44,6 +44,7 @@ limitations under the License. #include ""tensorflow/core/lib/gtl/inlined_vector.h"" #include ""tensorflow/core/lib/strings/scanner.h"" #include ""tensorflow/core/lib/strings/str_util.h"" +#include ""tensorflow/core/platform/errors.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/macros.h"" #include ""tensorflow/core/public/version.h"" @@ -1425,6 +1426,17 @@ void GraphConstructor::Undo() { Status GraphConstructor::MakeEdge(Node* src, int output_index, Node* dst, int input_index) { + if (output_index >= src->num_outputs()) { + return errors::InvalidArgument( + ""Output "", output_index, "" of node "", src->name(), + "" does not exist. Node only has "", src->num_outputs(), "" outputs.""); + } + if (input_index >= dst->num_inputs()) { + return errors::InvalidArgument( + ""Input "", input_index, "" of node "", dst->name(), + "" does not exist. Node only has "", dst->num_inputs(), "" inputs.""); + } + DataType src_out = src->output_type(output_index); DataType dst_in = dst->input_type(input_index); if (!TypesCompatible(dst_in, src_out)) { ",1,train 5ac1b9e24ff6afc465756edf845d2e9660bd34bf,tensorflow/tensorflow,"Fix segfault when attempting to convert string to float16. To make sure this gets fixed, add test for converting string to any numeric type. PiperOrigin-RevId: 286650886 Change-Id: I81f770ec2bbd33a863e8057ce198c679912fa8e0",constant_op_test.py,"@@ -0,0 +1,61 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""""""Tests for tensorflow.python.framework.constant_op."""""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.platform import test + + +class ConstantOpTest(test.TestCase, parameterized.TestCase): + + @parameterized.parameters( + dtypes.bfloat16, + dtypes.complex128, + dtypes.complex64, + dtypes.double, + dtypes.float16, + dtypes.float32, + dtypes.float64, + dtypes.half, + dtypes.int16, + dtypes.int32, + dtypes.int64, + dtypes.int8, + dtypes.qint16, + dtypes.qint32, + dtypes.qint8, + dtypes.quint16, + dtypes.quint8, + dtypes.uint16, + dtypes.uint32, + dtypes.uint64, + dtypes.uint8, + ) + def test_convert_string_to_number(self, dtype): + with self.assertRaises(TypeError): + constant_op.constant(""hello"", dtype) + + +if __name__ == ""__main__"": + ops.enable_eager_execution() + test.main() ",1,train 5ac1b9e24ff6afc465756edf845d2e9660bd34bf,tensorflow/tensorflow,"Fix segfault when attempting to convert string to float16. To make sure this gets fixed, add test for converting string to any numeric type. PiperOrigin-RevId: 286650886 Change-Id: I81f770ec2bbd33a863e8057ce198c679912fa8e0",py_seq_tensor.cc,"@@ -21,6 +21,7 @@ limitations under the License. #include ""tensorflow/core/lib/core/errors.h"" #include ""tensorflow/core/lib/core/stringpiece.h"" #include ""tensorflow/core/lib/strings/str_util.h"" +#include ""tensorflow/core/platform/macros.h"" #include ""tensorflow/core/platform/types.h"" #include ""tensorflow/python/lib/core/numpy.h"" #include ""tensorflow/python/lib/core/py_util.h"" @@ -396,6 +397,21 @@ typedef Converter Int32Converter; // Floating-point support +// Returns `true` if `out` overflows when converted from `as_double`. +template +static inline bool CheckForOverflow(double as_double, T* out) { + return (sizeof(T) < sizeof(double) && std::isinf(*out) && + std::isfinite(as_double)); +} + +// There is no `std::isinf` that takes `Eigen::half` as argument but Eigen +// provides `Eigen::half_impl::isinf` instead. +template <> +inline bool CheckForOverflow(double as_double, Eigen::half* out) { + return (sizeof(Eigen::half) < sizeof(double) && + Eigen::half_impl::isinf(*out) && std::isfinite(as_double)); +} + template static const char* ConvertOneFloat(PyObject* v, T* out) { if (PyErr_Occurred()) { @@ -405,20 +421,19 @@ static const char* ConvertOneFloat(PyObject* v, T* out) { const double as_double = PyFloat_AS_DOUBLE(v); *out = static_cast(as_double); // Check for overflow - if (TF_PREDICT_FALSE(sizeof(T) < sizeof(double) && std::isinf(*out) && - std::isfinite(as_double))) { + if (TF_PREDICT_FALSE(CheckForOverflow(as_double, out))) { return ErrorOutOfRangeDouble; } return nullptr; } #if PY_MAJOR_VERSION < 3 if (PyInt_Check(v)) { - *out = PyInt_AS_LONG(v); + *out = static_cast(PyInt_AS_LONG(v)); return nullptr; } #endif if (PyLong_Check(v)) { - *out = PyLong_AsDouble(v); + *out = static_cast(PyLong_AsDouble(v)); if (PyErr_Occurred()) return ErrorOutOfRangeDouble; return nullptr; } @@ -467,13 +482,7 @@ struct ConverterTraits { static const tensorflow::DataType kTypeEnum = DT_HALF; static const char* ConvertScalar(PyObject* v, Eigen::half* out) { - // NOTE(nareshmodi): Is there a way to convert to C double without the - // intermediate Python double? This will help with ConvertOneFloat as well. - Safe_PyObjectPtr as_float = make_safe(PyNumber_Float(v)); - double v_double = PyFloat_AS_DOUBLE(as_float.get()); - *out = Eigen::half(v_double); - - return nullptr; + return ConvertOneFloat(v, out); } }; @@ -613,7 +622,9 @@ Status PySeqToTensor(PyObject* obj, DataType dtype, Tensor* ret) { break; case DT_HALF: - RETURN_STRING_AS_STATUS(NumpyHalfConverter::Convert(obj, &state, ret)); + if (NumpyHalfConverter::Convert(obj, &state, ret) == nullptr) + return Status::OK(); + break; case DT_INT64: if (Int64Converter::Convert(obj, &state, ret) == nullptr) ",1,train db4f9717c41bccc3ce10099ab61996b246099892,tensorflow/tensorflow,"Fix heap buffer overflow in UnsortedSegmentSum. When Index=int32, data_size and num_segments were truncated from int64 to int32. This truncation can produce negative numbers, which causes UnsortedSegmentFunctor to access out of bounds memory. Also: - Switches some indexing calculations to int64 to avoid signed integer overflow when either the input or output tensors have more than 2**31 - 1 elements. - Fixes a range check error in the GPU kernel. The segment ID was checked against an upper bound measured in elements, not segments. PiperOrigin-RevId: 256451663",segment_reduction_ops.cc,"@@ -376,18 +376,17 @@ namespace functor { template struct UnsortedSegmentFunctor { - void operator()(OpKernelContext* ctx, const Index num_segments, - const TensorShape& segment_ids_shape, + void operator()(OpKernelContext* ctx, const TensorShape& segment_ids_shape, typename TTypes::ConstFlat segment_ids, - const Index data_size, const T* data, + typename TTypes::ConstTensor data, typename TTypes::Tensor output) { output.setConstant(InitialValueF()()); - if (data_size == 0) { + if (data.size() == 0) { return; } const int64 N = segment_ids.dimension(0); + const int64 num_segments = output.dimension(0); ReductionF reduction; - auto data_flat = typename TTypes::ConstTensor(data, N, data_size / N); for (int64 i = 0; i < N; ++i) { Index j = internal::SubtleMustCopy(segment_ids(i)); if (j < 0) { @@ -397,7 +396,7 @@ struct UnsortedSegmentFunctor { errors::InvalidArgument( ""segment_ids"", SliceDebugString(segment_ids_shape, i), "" = "", j, "" is out of range [0, "", num_segments, "")"")); - reduction(data_flat.template chip<0>(i), output.template chip<0>(j)); + reduction(data.template chip<0>(i), output.template chip<0>(j)); } } }; @@ -485,7 +484,7 @@ class UnsortedSegmentReductionOp : public OpKernel { return; } const auto segment_flat = segment_ids.flat(); - const Index output_rows = internal::SubtleMustCopy(static_cast( + const int64 output_rows = internal::SubtleMustCopy(static_cast( num_segments.dtype() == DT_INT32 ? num_segments.scalar()() : num_segments.scalar()())); OP_REQUIRES(context, output_rows >= 0, @@ -499,9 +498,9 @@ class UnsortedSegmentReductionOp : public OpKernel { Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); auto output_flat = output->flat_outer_dims(); - auto data_ptr = data.template flat().data(); - reduction_functor_(context, output_rows, segment_ids.shape(), segment_flat, - data.NumElements(), data_ptr, output_flat); + auto data_flat = data.flat_inner_outer_dims(segment_ids.dims() - 1); + reduction_functor_(context, segment_ids.shape(), segment_flat, data_flat, + output_flat); } protected: ",1,train db4f9717c41bccc3ce10099ab61996b246099892,tensorflow/tensorflow,"Fix heap buffer overflow in UnsortedSegmentSum. When Index=int32, data_size and num_segments were truncated from int64 to int32. This truncation can produce negative numbers, which causes UnsortedSegmentFunctor to access out of bounds memory. Also: - Switches some indexing calculations to int64 to avoid signed integer overflow when either the input or output tensors have more than 2**31 - 1 elements. - Fixes a range check error in the GPU kernel. The segment ID was checked against an upper bound measured in elements, not segments. PiperOrigin-RevId: 256451663",segment_reduction_ops.h,"@@ -59,10 +59,9 @@ struct SegmentSumFunctor { template struct UnsortedSegmentFunctor { - void operator()(OpKernelContext* ctx, const Index num_segments, - const TensorShape& segment_ids_shape, + void operator()(OpKernelContext* ctx, const TensorShape& segment_ids_shape, typename TTypes::ConstFlat segment_ids, - const Index data_size, const T* data, + typename TTypes::ConstTensor data, typename TTypes::Tensor output); }; ",1,train db4f9717c41bccc3ce10099ab61996b246099892,tensorflow/tensorflow,"Fix heap buffer overflow in UnsortedSegmentSum. When Index=int32, data_size and num_segments were truncated from int64 to int32. This truncation can produce negative numbers, which causes UnsortedSegmentFunctor to access out of bounds memory. Also: - Switches some indexing calculations to int64 to avoid signed integer overflow when either the input or output tensors have more than 2**31 - 1 elements. - Fixes a range check error in the GPU kernel. The segment ID was checked against an upper bound measured in elements, not segments. PiperOrigin-RevId: 256451663",segment_reduction_ops_gpu.cu.cc,"@@ -106,21 +106,21 @@ __global__ void SortedSegmentSumCustomKernel(const Index input_outer_dim_size, // Each element is mapped from input to output by a combination of its // 'segment_ids' mapping and 'inner_dim_size'. template -__global__ void UnsortedSegmentCustomKernel(const Index input_outer_dim_size, - const Index inner_dim_size, - const Index output_outer_dim_size, +__global__ void UnsortedSegmentCustomKernel(const int64 input_outer_dim_size, + const int64 inner_dim_size, + const int64 output_outer_dim_size, const Index* segment_ids, const T* input, T* output) { - const Index input_total_size = input_outer_dim_size * inner_dim_size; - const Index output_total_size = output_outer_dim_size * inner_dim_size; - for (int input_index : GpuGridRangeX(input_total_size)) { - const Index input_segment_index = input_index / inner_dim_size; - const Index segment_offset = input_index % inner_dim_size; + const int64 input_total_size = input_outer_dim_size * inner_dim_size; + for (int64 input_index : GpuGridRangeX(input_total_size)) { + const int64 input_segment_index = input_index / inner_dim_size; + const int64 segment_offset = input_index % inner_dim_size; const Index output_segment_index = segment_ids[input_segment_index]; - if (output_segment_index < 0 || output_segment_index >= output_total_size) { + if (output_segment_index < 0 || + output_segment_index >= output_outer_dim_size) { continue; } - const Index output_index = + const int64 output_index = output_segment_index * inner_dim_size + segment_offset; KernelReductionFunctor()(output + output_index, ldg(input + input_index)); } @@ -174,10 +174,9 @@ void SegmentSumFunctor::operator()( template struct UnsortedSegmentFunctor { - void operator()(OpKernelContext* ctx, const Index num_segments, - const TensorShape& segment_ids_shape, + void operator()(OpKernelContext* ctx, const TensorShape& segment_ids_shape, typename TTypes::ConstFlat segment_ids, - const Index data_size, const T* data, + typename TTypes::ConstTensor data, typename TTypes::Tensor output) { if (output.size() == 0) { return; @@ -188,6 +187,7 @@ struct UnsortedSegmentFunctor { TF_CHECK_OK(GpuLaunchKernel( SetToValue, config.block_count, config.thread_per_block, 0, d.stream(), output.size(), output.data(), InitialValueF()())); + const int64 data_size = data.size(); if (data_size == 0 || segment_ids_shape.num_elements() == 0) { return; } @@ -196,15 +196,16 @@ struct UnsortedSegmentFunctor { // *) 'data_size' is the total number of elements to process. // *) 'segment_ids.shape' is a prefix of data's shape. // *) 'input_outer_dim_size' is the total number of segments to process. - const Index input_outer_dim_size = segment_ids.dimension(0); - const Index input_inner_dim_size = data_size / input_outer_dim_size; + const int64 input_outer_dim_size = segment_ids.dimension(0); + const int64 input_inner_dim_size = data.dimension(1); + const int64 output_outer_dim_size = output.dimension(0); config = GetGpuLaunchConfig(data_size, d); - TF_CHECK_OK( - GpuLaunchKernel(UnsortedSegmentCustomKernel, - config.block_count, config.thread_per_block, 0, - d.stream(), input_outer_dim_size, input_inner_dim_size, - num_segments, segment_ids.data(), data, output.data())); + TF_CHECK_OK(GpuLaunchKernel( + UnsortedSegmentCustomKernel, config.block_count, + config.thread_per_block, 0, d.stream(), input_outer_dim_size, + input_inner_dim_size, output_outer_dim_size, segment_ids.data(), + data.data(), output.data())); } }; ",1,train 49f73c55d56edffebde4bca4a407ad69c1cae433,tensorflow/tensorflow,"Fix integer overflow in BMP decoder by making the checks in DecodeBmp more stringent. Add fuzzer to improve the robustness of the decoder in the future. PiperOrigin-RevId: 185780111",decode_bmp_op.cc,"@@ -91,15 +91,32 @@ class DecodeBmpOp : public OpKernel { errors::InvalidArgument( ""Number of channels must be 1, 3 or 4, was "", channels_)); + OP_REQUIRES(context, width > 0 && header_size >= 0, + errors::InvalidArgument(""Width must be positive"")); + OP_REQUIRES(context, header_size >= 0, + errors::InvalidArgument(""header size must be nonnegative"")); + + // The real requirement is < 2^31 minus some headers and channel data, + // so rounding down to something that's still ridiculously big. + OP_REQUIRES( + context, + (static_cast(width) * std::abs(static_cast(height))) < + static_cast(std::numeric_limits::max() / 8), + errors::InvalidArgument( + ""Total possible pixel bytes must be less than 2^30"")); + + const int32 abs_height = abs(height); + // there may be padding bytes when the width is not a multiple of 4 bytes // 8 * channels == bits per pixel const int row_size = (8 * channels_ * width + 31) / 32 * 4; - const int last_pixel_offset = - header_size + (abs(height) - 1) * row_size + (width - 1) * channels_; + const int64 last_pixel_offset = static_cast(header_size) + + (abs_height - 1) * row_size + + (width - 1) * channels_; // [expected file size] = [last pixel offset] + [last pixel size=channels] - const int expected_file_size = last_pixel_offset + channels_; + const int64 expected_file_size = last_pixel_offset + channels_; OP_REQUIRES( context, (expected_file_size <= input.size()), @@ -115,12 +132,12 @@ class DecodeBmpOp : public OpKernel { Tensor* output = nullptr; OP_REQUIRES_OK( context, context->allocate_output( - 0, TensorShape({abs(height), width, channels_}), &output)); + 0, TensorShape({abs_height, width, channels_}), &output)); const uint8* bmp_pixels = &img_bytes[header_size]; Decode(bmp_pixels, row_size, output->flat().data(), width, - abs(height), channels_, top_down); + abs_height, channels_, top_down); } uint8* Decode(const uint8* input, const int row_size, uint8* const output, ",1,test 49f73c55d56edffebde4bca4a407ad69c1cae433,tensorflow/tensorflow,"Fix integer overflow in BMP decoder by making the checks in DecodeBmp more stringent. Add fuzzer to improve the robustness of the decoder in the future. PiperOrigin-RevId: 185780111",decode_bmp_fuzz.cc,"@@ -0,0 +1,29 @@ +/* Copyright 2018 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/cc/ops/standard_ops.h"" +#include ""tensorflow/core/kernels/fuzzing/fuzz_session.h"" + +namespace tensorflow { +namespace fuzzing { + +class FuzzDecodeBmp : public FuzzStringInputOp { + SINGLE_INPUT_OP_BUILDER(DT_STRING, DecodeBmp); +}; + +STANDARD_TF_FUZZ_FUNCTION(FuzzDecodeBmp); + +} // end namespace fuzzing +} // end namespace tensorflow ",1,test c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter PiperOrigin-RevId: 377454790 Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",tf_tfl_flatbuffer_helpers.cc,"@@ -25,6 +25,7 @@ limitations under the License. #include ""mlir/Support/FileUtilities.h"" // from @llvm-project #include ""mlir/Transforms/ViewOpGraph.h"" // from @llvm-project #include ""tensorflow/compiler/mlir/lite/common/tfl_pass_config.h"" +#include ""tensorflow/compiler/mlir/lite/metrics/error_collector.h"" #include ""tensorflow/compiler/mlir/lite/tf_tfl_passes.h"" #include ""tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h"" #include ""tensorflow/compiler/mlir/lite/transforms/passes.h"" @@ -316,6 +317,9 @@ Status ConvertMLIRToTFLiteFlatBuffer( mlir::PassManager pm(module->getContext(), mlir::OpPassManager::Nesting::Implicit); ::tensorflow::SetCrashReproducer(pm); + pm.addInstrumentation( + std::make_unique( + module->getContext())); tensorflow::AddTFToTFLConversionPasses(model_flags, toco_flags, pass_config, &pm, session); ",0,train c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter PiperOrigin-RevId: 377454790 Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",convert.py,"@@ -36,6 +36,7 @@ from tensorflow.lite.python.convert_phase import Component from tensorflow.lite.python.convert_phase import convert_phase from tensorflow.lite.python.convert_phase import ConverterError from tensorflow.lite.python.convert_phase import SubComponent +from tensorflow.lite.python.metrics_wrapper import metrics_wrapper as _metrics_wrapper from tensorflow.lite.toco import model_flags_pb2 as _model_flags_pb2 from tensorflow.lite.toco import toco_flags_pb2 as _toco_flags_pb2 from tensorflow.lite.toco import types_pb2 as _types_pb2 @@ -295,7 +296,10 @@ def toco_convert_protos(model_flags_str, enable_mlir_converter) return model_str except Exception as e: - raise ConverterError(str(e)) + converter_error = ConverterError(str(e)) + for error_data in _metrics_wrapper.get_collected_errors(): + converter_error.append_error(error_data) + raise converter_error return _run_toco_binary(model_flags_str, toco_flags_str, input_data_str, debug_info_str) ",0,train c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter PiperOrigin-RevId: 377454790 Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",metrics_nonportable.py,"@@ -18,8 +18,8 @@ from typing import Optional, Text import uuid from tensorflow.lite.python import metrics_interface -from tensorflow.lite.python.metrics_wrapper import _pywrap_tensorflow_lite_metrics_wrapper as _metrics_wrapper from tensorflow.lite.python.metrics_wrapper import converter_error_data_pb2 +from tensorflow.lite.python.metrics_wrapper import metrics_wrapper from tensorflow.python.eager import monitoring _counter_debugger_creation = monitoring.Counter( @@ -116,7 +116,7 @@ class TFLiteConverterMetrics(TFLiteMetrics): def __init__(self) -> None: super(TFLiteConverterMetrics, self).__init__() session_id = uuid.uuid4().hex - self._metrics_exporter = _metrics_wrapper.MetricsWrapper(session_id) + self._metrics_exporter = metrics_wrapper.MetricsWrapper(session_id) self._exported = False def __del__(self): ",0,train c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter PiperOrigin-RevId: 377454790 Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",metrics_nonportable_test.py,"@@ -25,13 +25,17 @@ import tensorflow as tf from tensorflow.lite.python import lite from tensorflow.lite.python import metrics_nonportable as metrics from tensorflow.lite.python.convert import ConverterError +from tensorflow.lite.python.convert import register_custom_opdefs from tensorflow.python.client import session +from tensorflow.python.eager import monitoring from tensorflow.python.framework import convert_to_constants from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import string_ops +from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.platform import test from tensorflow.python.saved_model import saved_model from tensorflow.python.training.tracking import tracking @@ -317,5 +321,93 @@ class ConverterMetricsTest(test_util.TensorFlowTestCase): mock_exporter.ExportMetrics.assert_called_once() +def mock_ngrams(data, width, axis=-1, string_separator=' ', name=None): + """"""This mock Ngrams lack the width attr, causing conversion to fail."""""" + + experimental_implements = [ + 'name: ""tftext:Ngrams""', + 'attr { key: ""axis"" value { i: %d } }' % axis, + 'attr { key: ""reduction_type"" value { s: ""STRING_JOIN"" } }', + 'attr { key: ""string_separator"" value { s: ""%s"" } }' % string_separator, + ] + experimental_implements = ' '.join(experimental_implements) + + @tf.function(experimental_implements=experimental_implements) + def func(data): + with ops.name_scope(name, 'NGrams', [data, width]): + data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data') + slices = [] + for start in range(width): + stop = None if start - width + 1 == 0 else start - width + 1 + if axis >= 0: + idx = [slice(None)] * axis + [slice(start, stop)] + else: + idx = [Ellipsis, slice(start, stop)] + [slice(None)] * (-axis - 1) + slices.append(data[idx]) + + # Stack the slices. + stack_axis = axis + 1 if axis >= 0 else axis + windowed_data = array_ops.stack(slices, stack_axis) + + return string_ops.reduce_join( + windowed_data, axis=axis, separator=string_separator) + + return func(data) + + +class ConverterErrorMetricTest(test_util.TensorFlowTestCase): + """"""Testing conversion error metric."""""" + + def setUp(self): + super(ConverterErrorMetricTest, self).setUp() + + # Mock metrics instance except errors so other test cases are not affected. + mock_attempt = mock.create_autospec(monitoring.Counter, instance=True) + self._counter_conversion_attempt = metrics._counter_conversion_attempt + metrics._counter_conversion_attempt = mock_attempt + + mock_success = mock.create_autospec(monitoring.Counter, instance=True) + self._counter_conversion_success = metrics._counter_conversion_success + metrics._counter_conversion_success = mock_success + + mock_params = mock.create_autospec(monitoring.StringGauge, instance=True) + self._gauge_conversion_params = metrics._gauge_conversion_params + metrics._gauge_conversion_params = mock_params + + def tearDown(self): + super(ConverterErrorMetricTest, self).tearDown() + # # Restore metrics instances. + metrics._counter_conversion_attempt = self._counter_conversion_attempt + metrics._counter_conversion_success = self._counter_conversion_success + metrics._gauge_conversion_params = self._gauge_conversion_params + + def test_failure_at_PrepareCompositeFunctionsPass(self): + + class NgramsLayer(tf.keras.layers.Layer): + + def call(self, input_tensor, **kwargs): + return mock_ngrams(input_tensor, width=2, axis=-1, string_separator=' ') + + # Registers a fake WhitespaceTokenizeWithOffsets so the TFText fusing logic + # is enable in MLIR side. + custom_opdefs_str = ( + 'name: \'WhitespaceTokenizeWithOffsets\' input_arg: {name: \'Input1\' ' + 'type: DT_FLOAT} input_arg: {name: \'Input2\' type: DT_FLOAT} ' + 'output_arg: {name: \'Output\' type: DT_FLOAT}') + register_custom_opdefs([custom_opdefs_str]) + + model = tf.keras.models.Sequential([NgramsLayer()]) + model.predict(tf.constant(['test'])) + converter = tf.lite.TFLiteConverter.from_keras_model(model) + converter.allow_custom_ops = True + with self.assertRaises(ConverterError): + converter.convert() + exported_error = metrics._gauge_conversion_errors.get_cell( + 'CONVERT_TF_TO_TFLITE_MODEL', 'PrepareCompositeFunctionsPass', '', + 'UNKNOWN').value() + self.assertEqual(exported_error, + ""\'width\' attribute is not set or not an integer\n"") + + if __name__ == '__main__': test.main() ",0,train c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter PiperOrigin-RevId: 377454790 Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",metrics_wrapper.h,"@@ -52,6 +52,9 @@ class MetricsWrapper { const std::unique_ptr exporter_; }; +// Returns a vector of serialized ConverterErrorData from ErrorCollector. +std::vector GetCollectedErrors(); + } // namespace metrics_wrapper } // namespace tflite ",0,train c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter PiperOrigin-RevId: 377454790 Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",metrics_wrapper.py,"@@ -0,0 +1,39 @@ +# Lint as: python2, python3 +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""""""Stub to make pywrap metrics wrapper accessible."""""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.lite.python.metrics_wrapper import converter_error_data_pb2 +from tensorflow.lite.python.metrics_wrapper._pywrap_tensorflow_lite_metrics_wrapper import GetCollectedErrors as _get_collected_errors +from tensorflow.lite.python.metrics_wrapper._pywrap_tensorflow_lite_metrics_wrapper import MetricsWrapper # pylint: disable=unused-import + + +def get_collected_errors(): + """"""Returns a list of collected errors in ErrorCollector. + + The GetCollectedErrors function in C++ returns a list of serialized proto + messages. This function will convert them to ConverterErrorData instances. + + Returns: + A list of ConverterErrorData. + """""" + serialized_message_list = _get_collected_errors() + return list( + map(converter_error_data_pb2.ConverterErrorData.FromString, + serialized_message_list)) ",0,train c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter PiperOrigin-RevId: 377454790 Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",metrics_wrapper_nonportable.cc,"@@ -17,6 +17,7 @@ limitations under the License. #include #include ""learning/brain/google/monitoring/metrics_exporter.h"" +#include ""tensorflow/compiler/mlir/lite/metrics/error_collector.h"" #include ""tensorflow/lite/python/metrics_wrapper/metrics_wrapper.h"" namespace tflite { @@ -58,5 +59,14 @@ PyObject* MetricsWrapper::ExportMetrics() { Py_RETURN_NONE; } +std::vector GetCollectedErrors() { + mlir::TFL::ErrorCollector* collector = mlir::TFL::GetErrorCollector(); + std::vector result; + for (const auto& error_data : collector->CollectedErrors()) { + result.push_back(error_data.SerializeAsString()); + } + return result; +} + } // namespace metrics_wrapper } // namespace tflite ",0,train c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter PiperOrigin-RevId: 377454790 Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",metrics_wrapper_portable.cc,"@@ -55,5 +55,9 @@ PyObject* MetricsWrapper::ExportMetrics() { Py_RETURN_NONE; } +std::vector GetCollectedErrors() { + return std::vector(); +} + } // namespace metrics_wrapper } // namespace tflite ",0,train c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter PiperOrigin-RevId: 377454790 Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",metrics_wrapper_pybind11.cc,"@@ -39,4 +39,12 @@ PYBIND11_MODULE(_pywrap_tensorflow_lite_metrics_wrapper, m) { .def(""ExportMetrics"", [](MetricsWrapper& self) { return tensorflow::PyoOrThrow(self.ExportMetrics()); }); + m.def(""GetCollectedErrors"", []() { + py::list serialized_message_list; + for (const auto& error_data : + tflite::metrics_wrapper::GetCollectedErrors()) { + serialized_message_list.append(py::bytes(error_data)); + } + return serialized_message_list; + }); } ",0,train a9f8a9b1c1995dc4a7c0b6d53df8af6082325360,tensorflow/tensorflow,"Do not call Unprotect on remote inputs Unprotect should only be called on local handles. In order to test the triggering of forwarding for remote inputs to a function we add an optimization whereby EagerExecute releases the inputs of the eager operation. This enforces that a TFE_Op cannot be reused since the inputs would have been removed. This was technically already true since if the inputs were ever forwarded we should not be re-using the TFE_Op. PiperOrigin-RevId: 306564949 Change-Id: I94bd3a243658277891867802b792a4492ec0a039",c_api_remote_test.cc,"@@ -129,7 +129,45 @@ void TestRemoteExecute(bool async) { TEST(CAPI, RemoteExecute) { TestRemoteExecute(false); } TEST(CAPI, RemoteExecuteAsync) { TestRemoteExecute(true); } -void TestRemoteExecuteSilentCopies(bool async, bool remote) { +string MatMulFunction() { + tensorflow::FunctionDef def; + CHECK(tensorflow::protobuf::TextFormat::ParseFromString( + "" signature {"" + "" name: 'MatMulFunction'"" + "" input_arg {"" + "" name: 'a'"" + "" type: DT_FLOAT"" + "" }"" + "" input_arg {"" + "" name: 'b'"" + "" type: DT_FLOAT"" + "" }"" + "" output_arg {"" + "" name: 'm'"" + "" type: DT_FLOAT"" + "" }"" + "" }"" + "" node_def {"" + "" name: 'matmul'"" + "" op: 'MatMul'"" + "" input: 'a'"" + "" input: 'b'"" + "" attr {"" + "" key: 'T'"" + "" value {"" + "" type: DT_FLOAT"" + "" }"" + "" }"" + "" }"" + "" ret {"" + "" key: 'm'"" + "" value: 'matmul:product'"" + "" }"", + &def)); + return def.SerializeAsString(); +} + +void TestRemoteExecuteSilentCopies(bool async, bool remote, bool func) { tensorflow::ServerDef server_def = GetServerDef(3); // This server def has the task index set to 0. @@ -169,10 +207,29 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote) { TFE_TensorHandleCopyToDevice(h1_task0, ctx, task2_name, status); ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); - // Handles are on task0 (local), and task2, but op is on task1. - TFE_Op* matmul = MatMulOp(ctx, h0_task0, h1_task2); + TFE_Op* matmul = nullptr; + if (func) { + string function_def = MatMulFunction(); + TFE_ContextAddFunctionDef(ctx, function_def.data(), function_def.size(), + status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + + matmul = TFE_NewOp(ctx, ""MatMulFunction"", status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_OpAddInput(matmul, h0_task0, status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_OpAddInput(matmul, h1_task2, status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + } else { + // Handles are on task0 (local), and task2, but op is on task1. + matmul = MatMulOp(ctx, h0_task0, h1_task2); + } if (remote) { TFE_OpSetDevice(matmul, task1_name, status); + } else if (!async) { + auto remote_arg = tensorflow::TensorHandleFromInterface(h1_task2->handle); + // The input handles should never change since they have been mirrored. + ASSERT_FALSE(remote_arg->HasLocalMirror(nullptr)); } EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); @@ -182,12 +239,10 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote) { EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); // TODO(gjn): Add support for waiting on async local mirrors - if (!async) { + if (!remote && !async) { auto remote_arg = tensorflow::TensorHandleFromInterface(h1_task2->handle); - tensorflow::EagerOperation* op = - tensorflow::OperationFromInterface(matmul->operation); // The input handles should never change since they have been mirrored. - ASSERT_EQ(op->Inputs()[1], remote_arg); + ASSERT_TRUE(remote_arg->HasLocalMirror(nullptr)); } auto* retval_task0 = TFE_TensorHandleCopyToDevice( @@ -217,6 +272,9 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote) { TFE_ExecutorWaitForAllPendingNodes(executor, status); ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteExecutor(executor); + if (func) { + TFE_ContextRemoveFunction(ctx, ""MatMulFunction"", status); + } TFE_DeleteContext(ctx); TF_DeleteStatus(status); @@ -227,16 +285,22 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote) { } TEST(CAPI, RemoteExecuteSilentCopies) { - TestRemoteExecuteSilentCopies(false, true); + TestRemoteExecuteSilentCopies(false, true, false); } TEST(CAPI, RemoteExecuteSilentCopiesAsync) { - TestRemoteExecuteSilentCopies(true, true); + TestRemoteExecuteSilentCopies(true, true, false); +} +TEST(CAPI, RemoteExecuteSilentCopiesAsyncFunc) { + TestRemoteExecuteSilentCopies(true, true, true); } TEST(CAPI, RemoteExecuteSilentCopiesLocal) { - TestRemoteExecuteSilentCopies(false, false); + TestRemoteExecuteSilentCopies(false, false, false); } TEST(CAPI, RemoteExecuteSilentCopiesLocalAsync) { - TestRemoteExecuteSilentCopies(true, false); + TestRemoteExecuteSilentCopies(true, false, false); +} +TEST(CAPI, RemoteExecuteSilentCopiesLocalAsyncFunc) { + TestRemoteExecuteSilentCopies(true, false, true); } void TestRemoteExecuteDeleteContextWithOutstandingRPC(bool async) { ",0,train a9f8a9b1c1995dc4a7c0b6d53df8af6082325360,tensorflow/tensorflow,"Do not call Unprotect on remote inputs Unprotect should only be called on local handles. In order to test the triggering of forwarding for remote inputs to a function we add an optimization whereby EagerExecute releases the inputs of the eager operation. This enforces that a TFE_Op cannot be reused since the inputs would have been removed. This was technically already true since if the inputs were ever forwarded we should not be re-using the TFE_Op. PiperOrigin-RevId: 306564949 Change-Id: I94bd3a243658277891867802b792a4492ec0a039",c_api_test.cc,"@@ -78,11 +78,18 @@ void BM_Execute(int iters, int async) { TFE_DeleteContextOptions(opts); TFE_TensorHandle* m = TestMatrixTensorHandle(ctx); - TFE_Op* matmul = MatMulOp(ctx, m, m); + TFE_Op* matmul = TFE_NewOp(ctx, ""MatMul"", status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_TensorHandle* retvals[1]; int num_retvals = 1; tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { + TFE_OpReset(matmul, ""MatMul"", nullptr, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_OpAddInput(matmul, m, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_OpAddInput(matmul, m, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_Execute(matmul, &retvals[0], &num_retvals, status); CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); } @@ -113,11 +120,15 @@ void BM_Execute_Identity(int iters, int async) { TFE_DeleteContextOptions(opts); TFE_TensorHandle* m = TestMatrixTensorHandle(ctx); - TFE_Op* identity = IdentityOp(ctx, m); + TFE_Op* identity = TFE_NewOp(ctx, ""Identity"", status); TFE_TensorHandle* retvals[1]; int num_retvals = 1; tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { + TFE_OpReset(identity, ""Identity"", nullptr, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_OpAddInput(identity, m, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_Execute(identity, &retvals[0], &num_retvals, status); CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); } @@ -405,6 +416,11 @@ void TensorHandleSilentCopy(bool async, hcpu, ctx, gpu_device_name.c_str(), status.get()); ASSERT_EQ(TF_GetCode(status.get()), TF_OK) << TF_Message(status.get()); + auto cpu_arg = tensorflow::TensorHandleFromInterface(hcpu->handle); + auto gpu_arg = tensorflow::TensorHandleFromInterface(hgpu->handle); + auto gpu_device = absl::get(gpu_arg->device()); + ASSERT_FALSE(cpu_arg->HasLocalMirror(gpu_device)); + TFE_Op* matmul = MatMulOp(ctx, hcpu, hgpu); if (cpu_op) { string cpu_device_name; @@ -420,15 +436,8 @@ void TensorHandleSilentCopy(bool async, TFE_Execute(matmul, &retvals[0], &num_retvals, status.get()); ASSERT_EQ(TF_GetCode(status.get()), TF_OK) << TF_Message(status.get()); - // Validate if the input was replaced with a different TensorHandle - auto arg0 = tensorflow::TensorHandleFromInterface(hcpu->handle); - auto arg1 = tensorflow::TensorHandleFromInterface(hgpu->handle); - tensorflow::EagerOperation* op = - tensorflow::OperationFromInterface(matmul->operation); - - // The input handles should never change since they have been mirrored. - EXPECT_EQ(op->Inputs()[0], arg0); - EXPECT_EQ(op->Inputs()[1], arg1); + // The CPU handle should have been copied and have a mirror on the GPU + ASSERT_TRUE(cpu_arg->HasLocalMirror(gpu_device)); TFE_DeleteOp(matmul); TFE_DeleteTensorHandle(retvals[0]); @@ -626,17 +635,6 @@ void ExecuteAdd(bool async, bool forward_input, bool tfrt) { } int num_retvals = 1; - - if (async) { - // Enqueue dummy ops so we backlog async execution & actually test async. - for (int i = 0; i < 10000; ++i) { - TFE_TensorHandle* dummy = nullptr; - TFE_Execute(add_op, &dummy, &num_retvals, status); - ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); - TFE_DeleteTensorHandle(dummy); - } - } - TFE_TensorHandle* retval = nullptr; TFE_Execute(add_op, &retval, &num_retvals, status); EXPECT_EQ(1, num_retvals); ",0,train a9f8a9b1c1995dc4a7c0b6d53df8af6082325360,tensorflow/tensorflow,"Do not call Unprotect on remote inputs Unprotect should only be called on local handles. In order to test the triggering of forwarding for remote inputs to a function we add an optimization whereby EagerExecute releases the inputs of the eager operation. This enforces that a TFE_Op cannot be reused since the inputs would have been removed. This was technically already true since if the inputs were ever forwarded we should not be re-using the TFE_Op. PiperOrigin-RevId: 306564949 Change-Id: I94bd3a243658277891867802b792a4492ec0a039",execute.cc,"@@ -596,6 +596,10 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals, &ctx, op->Inputs(), op->remote_func_params(), std::move(kernel), graph_collector, op->GetCancellationManager(), absl::Span(retvals, num_outputs)); + // Release the inputs from the eager operation since the AsyncExecuteNode + // would have taken ownership. This allows the inputs to be forwarded if + // possible. + op->Clear(); // For async mode, execution order will make sure that all // input handles are ready before executing them. // TODO(b/137118203): Consider executing ""cheap"" kernels inline for @@ -609,6 +613,10 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals, graph_collector, op->GetCancellationManager(), {retvals, static_cast(num_outputs)}); s = executor.SyncExecute(&node); + // We release the inputs AFTER executing the operation in sync mode since + // ExecuteNode does not increment the reference count and thus does not have + // ownership of the inputs while executing. + op->Clear(); } // Since the operation failed, we need to Unref any outputs if they were // allocated. ",0,train a9f8a9b1c1995dc4a7c0b6d53df8af6082325360,tensorflow/tensorflow,"Do not call Unprotect on remote inputs Unprotect should only be called on local handles. In order to test the triggering of forwarding for remote inputs to a function we add an optimization whereby EagerExecute releases the inputs of the eager operation. This enforces that a TFE_Op cannot be reused since the inputs would have been removed. This was technically already true since if the inputs were ever forwarded we should not be re-using the TFE_Op. PiperOrigin-RevId: 306564949 Change-Id: I94bd3a243658277891867802b792a4492ec0a039",tensor_handle.cc,"@@ -449,7 +449,7 @@ Status TensorHandle::NumElements(int64* num_elements) const { Status TensorHandle::Unprotect(const Device* d) { DVLOG(3) << ""Unprotect on TensorHandle: "" << this << "" device: "" << d; - if (d == absl::get(device_)) { + if (!IsRemote() && (d == absl::get(device_))) { auto& data = absl::get(data_); return data.Unprotect(); } ",0,train 835ac7291dd62277e27d1a66e241608b98790bb3,tensorflow/tensorflow,"Internal change PiperOrigin-RevId: 296339357 Change-Id: Ife4d6cc532586e15b94c049786977c4a7acf597d",network.py,"@@ -1063,7 +1063,28 @@ class Network(base_layer.Layer): ValueError: For invalid/unknown format arguments. """""" self._assert_weights_created() - save_format = validate_save_format(filepath, save_format) + filepath_is_h5 = _is_hdf5_filepath(filepath) + if save_format is None: + if filepath_is_h5: + save_format = 'h5' + else: + save_format = 'tf' + else: + user_format = save_format.lower().strip() + if user_format in ('tensorflow', 'tf'): + save_format = 'tf' + elif user_format in ('hdf5', 'h5', 'keras'): + save_format = 'h5' + else: + raise ValueError( + 'Unknown format ""%s"". Was expecting one of {""tf"", ""h5""}.' % ( + save_format,)) + if save_format == 'tf' and filepath_is_h5: + raise ValueError( + ('save_weights got save_format=""tf""/""tensorflow"", but the ' + 'filepath (""%s"") looks like an HDF5 file. Omit the "".h5""/"".keras"" ' + 'when saving in TensorFlow format.') + % filepath) if save_format == 'h5' and h5py is None: raise ImportError( @@ -2086,67 +2107,3 @@ def get_network_config(network, serialize_layer_fn=None): model_outputs = tf_utils.convert_inner_node_data(model_outputs) config['output_layers'] = model_outputs return config - - -def validate_save_format(filepath, save_format, default='tf'): - """"""Validates `save_format` argument passed to methods used for saving. - - Returns either 'tf' or 'h5', indicating whether to save the model - to Tensorflow SavedModel or HDF5. Output will default to 'tf' in TF2.X and - 'h5' in TF1.X. - - Defaults to 'h5' if `filepath` is a path to a hdf5 file (having suffix '.h5' - or '.hdf5' or '.keras') or is an h5py.File object. - - Args: - filepath: Value of the `filepath` argument passed to the method. - Can be: - String - h5py.File object - save_format: String, value of the 'save_format' argument as passed. - default: Default format if save_format isn't specified and the filepath - doesn't indicate that the format is 'h5'. - - Returns: - save_format: String, 'h5' or 'tf'. The processed - value of the `save_format` argument. - - Raises: - ValueError: If - - `filepath` is not a String or an h5py.File object. - - `save_format` is not valid. Valid values are ""tensorflow"", ""tf"" for - saving in SavedModel format, and ""hdf5"", ""keras"" or ""h5"" for saving in - h5 format. - - `save_format` is ""tf"" but `filepath` is a path to a h5 file. - - `save_format` is ""tf"" but `filepath` is an h5py.File object. - """""" - if not isinstance(filepath, (str, h5py.File)): - raise ValueError( - 'Expected `filepath` to be a String or h5py.File object. Got ' - 'unsupported value %s of type %s' % (filepath, type(filepath))) - - filepath_is_h5py_file = h5py is not None and isinstance(filepath, h5py.File) - filepath_is_h5 = isinstance(filepath, str) and _is_hdf5_filepath(filepath) - if save_format is None: - if filepath_is_h5 or filepath_is_h5py_file: - save_format = 'h5' - else: - save_format = default - else: - user_format = save_format.lower().strip() - if user_format in ('tensorflow', 'tf'): - save_format = 'tf' - elif user_format in ('hdf5', 'h5', 'keras'): - save_format = 'h5' - else: - raise ValueError( - 'Unknown format ""%s"". Was expecting one of {""tf"", ""h5""}.' % - (save_format)) - if save_format == 'tf' and filepath_is_h5: - raise ValueError( - ('Got save_format=""tf""/""tensorflow"", but the filepath (""%s"") looks ' - 'like an HDF5 file. Omit the "".h5""/"".keras"" when saving in ' - 'TensorFlow format.') % filepath) - if save_format == 'tf' and filepath_is_h5py_file: - raise ValueError( - 'Got save_format=""tf""/""tensorflow"", but the given `filepath`' - 'is an h5py.File object.') - return save_format ",0,train 835ac7291dd62277e27d1a66e241608b98790bb3,tensorflow/tensorflow,"Internal change PiperOrigin-RevId: 296339357 Change-Id: Ife4d6cc532586e15b94c049786977c4a7acf597d",network_test.py,"@@ -1880,43 +1880,5 @@ class CacheCorrectnessTest(keras_parameterized.TestCase): self.assertEqual(network.compute_output_shape((1, i, 32)), (1, i, 2)) -class SaveFormatValidationTest(keras_parameterized.TestCase): - - def test_save_format_validation(self): - filepath = 'file/path' - h5_filepath = 'h5_filepath.h5' - h5_filepath_2 = 'h5_filepath.hdf5' - h5_filepath_3 = 'h5_filepath.keras' - - self.assertEqual( - network_lib.validate_save_format(filepath, None, 'h5'), 'h5') - self.assertEqual( - network_lib.validate_save_format(filepath, None, 'tf'), 'tf') - - self.assertEqual(network_lib.validate_save_format(filepath, 'h5'), 'h5') - self.assertEqual(network_lib.validate_save_format(h5_filepath, None), 'h5') - self.assertEqual( - network_lib.validate_save_format(h5_filepath_2, None), 'h5') - self.assertEqual( - network_lib.validate_save_format(h5_filepath_3, None), 'h5') - self.assertEqual( - network_lib.validate_save_format(h5_filepath, 'hdf5'), 'h5') - self.assertEqual( - network_lib.validate_save_format(h5_filepath, 'keras'), 'h5') - - self.assertEqual(network_lib.validate_save_format(filepath, 'tf'), 'tf') - self.assertEqual( - network_lib.validate_save_format(filepath, 'tensorflow'), 'tf') - - with self.assertRaises(ValueError): - network_lib.validate_save_format(42, 'h5') - - with self.assertRaises(ValueError): - network_lib.validate_save_format(filepath, 'unknown_format') - - with self.assertRaises(ValueError): - network_lib.validate_save_format(h5_filepath, 'tf') - - if __name__ == '__main__': test.main() ",0,train 835ac7291dd62277e27d1a66e241608b98790bb3,tensorflow/tensorflow,"Internal change PiperOrigin-RevId: 296339357 Change-Id: Ife4d6cc532586e15b94c049786977c4a7acf597d",save.py,"@@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os import sys import six @@ -28,15 +29,8 @@ from tensorflow.python.keras.saving.saved_model import load as saved_model_load from tensorflow.python.keras.saving.saved_model import save as saved_model_save from tensorflow.python.keras.utils import generic_utils from tensorflow.python.saved_model import loader_impl -from tensorflow.python.util.lazy_loader import LazyLoader from tensorflow.python.util.tf_export import keras_export -# pylint: disable=g-inconsistent-quotes -network = LazyLoader( - ""network"", globals(), - ""tensorflow.python.keras.engine.network"") -# pylint: enable=g-inconsistent-quotes - # pylint: disable=g-import-not-at-top if sys.version_info >= (3, 4): import pathlib @@ -46,6 +40,9 @@ except ImportError: h5py = None # pylint: enable=g-import-not-at-top +_HDF5_EXTENSIONS = ['.h5', '.hdf5', '.keras'] + + # TODO(kathywu): Remove this when Keras SavedModel is not experimental. _KERAS_SAVED_MODEL_STILL_EXPERIMENTAL = True @@ -115,14 +112,15 @@ def save_model(model, """""" from tensorflow.python.keras.engine import sequential # pylint: disable=g-import-not-at-top + default_format = 'tf' if tf2.enabled() else 'h5' + save_format = save_format or default_format + if sys.version_info >= (3, 4) and isinstance(filepath, pathlib.Path): filepath = str(filepath) - default_format = 'tf' if tf2.enabled() else 'h5' - save_format = network.validate_save_format(filepath, save_format, - default_format) - - if save_format == 'h5': + if (save_format == 'h5' or + (h5py is not None and isinstance(filepath, h5py.File)) or + os.path.splitext(filepath)[1] in _HDF5_EXTENSIONS): # TODO(b/130258301): add utility method for detecting model type. if (not model._is_graph_network and # pylint:disable=protected-access not isinstance(model, sequential.Sequential)): ",0,train cebfa65df5451af59f88115d509b4a7830d34d26,tensorflow/tensorflow,"Enable the mlir quantizer for all the zip tests This also enabled the tests for all the tests including the floating-point conversions with the _experimental_new_quantizer is on. PiperOrigin-RevId: 348900650 Change-Id: Iea3cd528d4cf105fb218fb856b4c7fda27db2bdb",zip_test_utils.py,"@@ -368,11 +368,6 @@ def make_zip_of_tests(options, ""fully_quantize"", False) or param_dict.get(""quant_16x8"", False)): continue - # Skips the new quantizer tests when `fully_quantize` is set to false - # or it is not set. - if options.mlir_quantizer and not param_dict.get(""fully_quantize"", False): - continue - def generate_inputs_outputs(tflite_model_binary, min_value=0, max_value=255): ",0,train 880d16bc8fcf8160037abc05c2585baed6a35cd5,tensorflow/tensorflow,"Change max clones to simple heuristic based on original number of functions Less arbitrary than fixed number but degenerate behavior is now defined in terms of the number of functions in the original module. PiperOrigin-RevId: 381109197 Change-Id: I68f66b303044648c8410e0a16d3926dc0a5570ea",guarantee_all_funcs_one_use.cc,"@@ -67,15 +67,11 @@ class GuaranteeAllFuncsOneUse SymbolTable &symbol_table = symbol_table_collection.getSymbolTable(module); bool made_changes = false; - // The maximum number of clones value needs to be low enough to actually - // stop compilation in a reasonable time, but not too low that it blocks - // real programs. This number was chosen semi-randomly. - int number_of_functions = [&]() -> int { - auto fn_range = module.getOps(); - return std::distance(fn_range.begin(), fn_range.end()); - }(); - const int kMaxClones = 4 * number_of_functions; - + // This value needs to be low enough to actually stop compilation in a + // reasonable time, but not too low that it blocks real programs. + // This number was chosen semi-randomly. + // TODO(jpienaar): Switch to a more context aware heuristic. + const int kMaxClones = 10000; int num_clones = 0; do { SymbolUserMap symbol_users(symbol_table_collection, module); ",0,train 2b703011163454ae15ba07ec89dd6bd2d8633a00,tensorflow/tensorflow,"add type info described in issue #5236 (#5278) * add type info when assign * add INFO * add spaces * wrap to 80 lines",simple_placer.cc,"@@ -815,9 +815,11 @@ void SimplePlacer::AssignAndLog(const string& assigned_device, node->set_assigned_device_name(assigned_device); // Log placement if log_device_placement is set. if (options_ && options_->config.log_device_placement()) { - printf(""%s: %s\n"", node->name().c_str(), + printf(""%s: (%s): %s\n"", node->name().c_str(), + node->type_string().c_str(), node->assigned_device_name().c_str()); - LOG(INFO) << node->name() << "": "" << node->assigned_device_name(); + LOG(INFO) << node->name() << "": "" << ""("" << node->type_string() << "")"" + << node->assigned_device_name(); } } ",0,train e5379bb6c053c1d1af913cd1c8c14663191e58b3,tensorflow/tensorflow,pylint: whitespace changes,mirrored_strategy.py,"@@ -87,7 +87,7 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): self._devices = [device_util.resolve(d) for d in devices] self._canonical_device_set = set(self._devices) self._device_index = values.PerDevice( - {d: i for i, d in enumerate(devices)}) + {d: i for i, d in enumerate(devices)}) self._cross_tower_ops = cross_tower_ops self._prefetch_on_device = prefetch_on_device # TODO(yuefengz): consider setting the default device. ",0,train e5379bb6c053c1d1af913cd1c8c14663191e58b3,tensorflow/tensorflow,pylint: whitespace changes,errors_impl.py,"@@ -476,7 +476,7 @@ _CODE_TO_EXCEPTION_CLASS = { c_api.PyExceptionRegistry_Init(_CODE_TO_EXCEPTION_CLASS) _EXCEPTION_CLASS_TO_CODE = { - class_: code for code, class_ in _CODE_TO_EXCEPTION_CLASS.items()} + class_: code for code, class_ in _CODE_TO_EXCEPTION_CLASS.items()} @tf_export(""errors.exception_type_from_error_code"") ",0,train 49e59a8cad98ff3cfaa38247108bad2f8d23e70f,tensorflow/tensorflow,"Add a launch id field in run options and hlo module config. PiperOrigin-RevId: 307922589 Change-Id: Ie1ea0b389e5228f827d570086799227983035f81",executable_run_options.h,"@@ -127,6 +127,13 @@ class ExecutableRunOptions { ExecutableRunOptions& set_rng_seed(int rng_seed); int rng_seed() const; + ExecutableRunOptions& set_launch_id(int32 launch_id) { + launch_id_ = launch_id; + return *this; + } + + int32 launch_id() const { return launch_id_; } + ExecutableRunOptions& set_run_id(RunId id); RunId run_id() const; @@ -153,6 +160,7 @@ class ExecutableRunOptions { const Eigen::ThreadPoolDevice* intra_op_thread_pool_ = nullptr; ExecutionProfile* execution_profile_ = nullptr; int rng_seed_ = 0; + int32 launch_id_ = 0; stream_executor::Stream* host_to_device_stream_ = nullptr; ThenExecuteFunction* then_execute_function_ = nullptr; RunId run_id_; ",0,train 49e59a8cad98ff3cfaa38247108bad2f8d23e70f,tensorflow/tensorflow,"Add a launch id field in run options and hlo module config. PiperOrigin-RevId: 307922589 Change-Id: Ie1ea0b389e5228f827d570086799227983035f81",hlo_module_config.h,"@@ -108,6 +108,12 @@ class HloModuleConfig { void set_seed(uint64 seed) { seed_ = seed; } uint64 seed() const { return seed_; } + // Set the launch id of the program. Launch id identifies a set of programs + // that should be launched together. + void set_launch_id(uint64 launch_id) { launch_id_ = launch_id; } + + int32 launch_id() const { return launch_id_; } + void set_replica_count(int64 replica_count) { replica_count_ = replica_count; } @@ -197,6 +203,9 @@ class HloModuleConfig { // Module/graph-level seed handle. uint64 seed_ = 0; + // Program id that identifies a set of program to be launched together. + int32 launch_id_ = 0; + // The number of replicas (data parallelism) to compile this binary for. int64 replica_count_ = 1; ",0,train 49e59a8cad98ff3cfaa38247108bad2f8d23e70f,tensorflow/tensorflow,"Add a launch id field in run options and hlo module config. PiperOrigin-RevId: 307922589 Change-Id: Ie1ea0b389e5228f827d570086799227983035f81",service.cc,"@@ -314,6 +314,7 @@ StatusOr> Service::CreateModuleConfig( config->set_num_partitions(execution_options->num_partitions()); } config->set_seed(execution_options->seed()); + config->set_launch_id(execution_options->launch_id()); config->set_debug_options(execution_options->debug_options()); } else { config->set_replica_count(options_.number_of_replicas()); ",0,train 9b1fe8f31ee1788208d8d6b7385382e436c5e1d7,tensorflow/tensorflow,"Use `tempfile.mkdtemp` instead of `tempfile.mktemp` to create directories. The `tempfile.mktemp` function is [deprecated](https://docs.python.org/3/library/tempfile.html#tempfile.mktemp) due to [security issues](https://cwe.mitre.org/data/definitions/377.html). The switch is easy to do: just a name change PiperOrigin-RevId: 420370858 Change-Id: I44a0849d161132eacd4f3881fdb615e09c0f02a2",debug_data_test.py,"@@ -147,8 +147,7 @@ class DebugTensorDatumTest(test_util.TensorFlowTestCase): class DebugDumpDirTest(test_util.TensorFlowTestCase): def setUp(self): - self._dump_root = tempfile.mktemp() - os.mkdir(self._dump_root) + self._dump_root = tempfile.mkdtemp() def tearDown(self): # Tear down temporary dump directory. @@ -179,7 +178,7 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase): def testDebugDumpDir_nonexistentDumpRoot(self): with self.assertRaisesRegex(IOError, ""does not exist""): - debug_data.DebugDumpDir(tempfile.mktemp() + ""_foo"") + debug_data.DebugDumpDir(tempfile.mkdtemp() + ""_foo"") def testDebugDumpDir_invalidFileNamingPattern(self): # File name with too few underscores should lead to an exception. ",0,test 4475cc8744b1c6b2f61052a5da0810ecc34ee642,tensorflow/tensorflow,"Internal change PiperOrigin-RevId: 397208029 Change-Id: I16d248be52ccdef0e88995842780c6dc93a20ba8",session_test.py,"@@ -76,7 +76,6 @@ except ImportError: defaultdict = collections.defaultdict # pylint:disable=invalid-name -@test_util.with_eager_op_as_function class SessionTest(test_util.TensorFlowTestCase): def setUp(self): @@ -1963,9 +1962,7 @@ class SessionTest(test_util.TensorFlowTestCase): self.assertEqual(c, 3) self.assertEqual(d, 3) # Ensure that we did log device placement. - add_executions = [ - l for l in str(log).splitlines() if 'Executing op AddV2' in l - ] + add_executions = [l for l in str(log).splitlines() if 'AddV2' in l] self.assertEqual(len(add_executions), 2) @def_function.function ",0,train 5c438dfc7a0d47b18f0064c6ad6172df6eee4325,tensorflow/tensorflow,"...Setting shapes of placeholders used in tf.compat.v2.test.compute_gradient... PiperOrigin-RevId: 293212323 Change-Id: I07e7620965bff0872f83d4619941509d97bc499f",gradient_checker_v2.py,"@@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - """"""Gradient checker for functions. The gradient checker verifies numerically that an function properly @@ -94,7 +93,7 @@ def _to_numpy(a): return a -def _prepare(f, xs_dtypes): +def _prepare(f, xs_dtypes, xs_shapes): """"""Return a function that executes 'f'. In TF 2.x, this is the same as `f`. @@ -104,9 +103,9 @@ def _prepare(f, xs_dtypes): Args: f: the function. xs_dtypes: dtypes of f's arguments. + xs_shapes: shapes of f's arguments. Returns: - a function that will be evaluated in both graph and eager mode """""" if context.executing_eagerly(): @@ -114,12 +113,17 @@ def _prepare(f, xs_dtypes): return f(*map(ops.convert_to_tensor, xs_data)) return decorated_eager - xs = [array_ops.placeholder(x_dtype) for x_dtype in xs_dtypes] + xs = [ + array_ops.placeholder(x_dtype, shape=x_shape) + for x_dtype, x_shape in zip(xs_dtypes, xs_shapes) + ] y = f(*xs) sess = ops.get_default_session() + def decorated_graph(*xs_data): xs_data = [_to_numpy(a) for a in xs_data] return sess.run(y, feed_dict=dict(zip(xs, xs_data))) + return decorated_graph @@ -159,12 +163,13 @@ def _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param): # For each of the entry of dy, we set this to be 1 and # everything else to be 0 and compute the gradients -- this will give us one - # one row of the Jacobian matrix. + # row of the Jacobian matrix. dy_data = np.zeros(y_shape, dtype=y_dtype.as_numpy_dtype) dy_data_flat = dy_data.ravel().view(y_dtype.real_dtype.as_numpy_dtype) grad_fn_unprep = backprop.gradients_function(f, [param]) grad_fn = _prepare(lambda dy, *xs: grad_fn_unprep(*xs, dy=dy), - [y_dtype] + [z.dtype for z in xs]) + [y_dtype] + [z.dtype for z in xs], + [None] + [z.shape for z in xs]) for row in range(y_size): dy_data_flat[row] = 1 grad = _to_numpy(grad_fn(dy_data, *xs)[0]) @@ -192,8 +197,7 @@ def _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param): return jacobian -def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, - delta): +def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta): """"""Computes the numeric Jacobian for f regarding xs[param]. One can think of the relation among f, xs and y as y = f(xs). @@ -227,6 +231,7 @@ def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, y_dtype = y_dtype.real_dtype.as_numpy_dtype xs_dtypes = [x.dtype for x in xs] + xs_shapes = [x.shape for x in xs] # Converts xs to numpy arrays to do in-place perturbation. # Calls asarray() to avoid copying in ravel() later. xs = [np.asarray(_to_numpy(x)) for x in xs] @@ -240,7 +245,7 @@ def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, # For each of the entry of x, we slightly perturbs this by adding and # subtracting a delta and then compute difference between the outputs. This # will give us one column of the Jacobian matrix. - f = _prepare(f, xs_dtypes) + f = _prepare(f, xs_dtypes, xs_shapes) for col in range(x_size): original = x.ravel().view(x_dtype)[col] x.ravel().view(x_dtype)[col] += delta @@ -256,17 +261,14 @@ def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, return jacobian -def _compute_gradient(f, - y_shape, - y_dtype, - xs, - param, - delta): +def _compute_gradient(f, y_shape, y_dtype, xs, param, delta): """"""Computes the theoretical and numerical jacobian."""""" x = xs[param] t = x.dtype - allowed_types = [dtypes.float16, dtypes.bfloat16, dtypes.float32, - dtypes.float64, dtypes.complex64, dtypes.complex128] + allowed_types = [ + dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64, + dtypes.complex64, dtypes.complex128 + ] assert t.base_dtype in allowed_types, (""Cannot compute gradient for "" ""unsupported type %s of argument %s"" % (t.name, param)) @@ -274,10 +276,8 @@ def _compute_gradient(f, assert t2.base_dtype in allowed_types, (""Cannot compute gradient for "" ""unsupported type %s of y"" % t2.name) y_size = _product(y_shape) - jacob_t = _compute_theoretical_jacobian(f, y_shape, y_dtype, - xs, param) - jacob_n = _compute_numeric_jacobian(f, y_size, y_dtype, xs, - param, delta) + jacob_t = _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param) + jacob_n = _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta) return jacob_t, jacob_n @@ -287,10 +287,13 @@ def _compute_gradient_list(f, xs, delta): xs = list(map(ops.convert_to_tensor, xs)) # run the function to get info of the result xs_dtypes = [x.dtype for x in xs] - f_temp = _prepare(f, xs_dtypes) + xs_shapes = [x.shape for x in xs] + f_temp = _prepare(f, xs_dtypes, xs_shapes) y = f_temp(*xs) - return zip(*[_compute_gradient(f, y.shape, dtypes.as_dtype(y.dtype), - xs, i, delta) for i in range(len(xs))]) + return zip(*[ + _compute_gradient(f, y.shape, dtypes.as_dtype(y.dtype), xs, i, delta) + for i in range(len(xs)) + ]) @tf_export(""test.compute_gradient"", v1=[]) ",0,train 5c438dfc7a0d47b18f0064c6ad6172df6eee4325,tensorflow/tensorflow,"...Setting shapes of placeholders used in tf.compat.v2.test.compute_gradient... PiperOrigin-RevId: 293212323 Change-Id: I07e7620965bff0872f83d4619941509d97bc499f",gradient_checker_v2_test.py,"@@ -12,8 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -""""""Tests for compute_gradient. -"""""" +""""""Tests for compute_gradient."""""" from __future__ import absolute_import from __future__ import division @@ -47,6 +46,21 @@ def _random_complex(shape, dtype): @test_util.run_all_in_graph_and_eager_modes class GradientCheckerTest(test.TestCase): + def testWithStaticShape(self): + size = (2, 3) + constant = constant_op.constant(2.0, shape=size, name=""const"") + + def add_constant_with_static_shape_check(x): + self.assertAllEqual(x.shape.as_list(), constant.shape.as_list()) + return x + constant + + x = constant_op.constant(3.0, shape=size, name=""x"") + + error = gradient_checker.max_error(*gradient_checker.compute_gradient( + add_constant_with_static_shape_check, [x])) + + self.assertLess(error, 1e-4) + def testAddSimple(self): size = (2, 3) x1 = constant_op.constant(2.0, shape=size, name=""x1"") @@ -58,31 +72,32 @@ class GradientCheckerTest(test.TestCase): def testAddCustomized(self): size = (2, 3) - x1 = constant_op.constant( - 2.0, shape=size, dtype=dtypes.float64, name=""x1"") + x1 = constant_op.constant(2.0, shape=size, dtype=dtypes.float64, name=""x1"") x2 = np.asarray(np.arange(6, dtype=np.float64).reshape(2, 3)) # checkint gradients for x2 using a special delta error = gradient_checker.max_error(*gradient_checker.compute_gradient( - lambda x2: math_ops.add(x1, x2), - [x2], delta=1e-2)) + lambda x2: math_ops.add(x1, x2), [x2], delta=1e-2)) tf_logging.info(""x2 error = %f"", error) self.assertLess(error, 1e-10) def testGather(self): + def f(params): index_values = [1, 3] indices = constant_op.constant(index_values, name=""i"") return array_ops.gather(params, indices, name=""y"") + p_shape = (4, 2) p_size = 8 params = constant_op.constant( np.arange(p_size).astype(np.float), shape=p_shape, name=""p"") - error = gradient_checker.max_error(*gradient_checker.compute_gradient( - f, [params])) + error = gradient_checker.max_error( + *gradient_checker.compute_gradient(f, [params])) tf_logging.info(""gather error = %f"", error) self.assertLess(error, 1e-4) def testNestedGather(self): + def f(params): index_values = [1, 3, 5, 6] indices = constant_op.constant(index_values, name=""i"") @@ -90,57 +105,62 @@ class GradientCheckerTest(test.TestCase): index_values2 = [0, 2] indices2 = constant_op.constant(index_values2, name=""i2"") return array_ops.gather(y, indices2, name=""y2"") + p_shape = (8, 2) p_size = 16 params = constant_op.constant( np.arange(p_size).astype(np.float), shape=p_shape, name=""p"") - error = gradient_checker.max_error(*gradient_checker.compute_gradient( - f, [params])) + error = gradient_checker.max_error( + *gradient_checker.compute_gradient(f, [params])) tf_logging.info(""nested gather error = %f"", error) self.assertLess(error, 1e-4) def testComplexMul(self): c = constant_op.constant(5 + 7j, dtype=dtypes.complex64) + def f(x): return c * x + x_shape = c.shape x_dtype = c.dtype x = constant_op.constant(_random_complex(x_shape, x_dtype)) - analytical, numerical = gradient_checker.compute_gradient( - f, [x]) + analytical, numerical = gradient_checker.compute_gradient(f, [x]) correct = np.array([[5, -7], [7, 5]]) self.assertAllEqual(correct, analytical[0]) self.assertAllClose(correct, numerical[0], rtol=1e-4) x = constant_op.constant(_random_complex(x_shape, x_dtype)) self.assertLess( - gradient_checker.max_error(*gradient_checker.compute_gradient( - f, [x])), 3e-4) + gradient_checker.max_error(*gradient_checker.compute_gradient(f, [x])), + 3e-4) def testComplexConj(self): + def f(x): return math_ops.conj(x) + x_shape = () x_dtype = dtypes.complex64 x = constant_op.constant(_random_complex(x_shape, x_dtype)) - analytical, numerical = gradient_checker.compute_gradient( - f, [x]) + analytical, numerical = gradient_checker.compute_gradient(f, [x]) correct = np.array([[1, 0], [0, -1]]) self.assertAllEqual(correct, analytical[0]) self.assertAllClose(correct, numerical[0], rtol=2e-5) x = constant_op.constant(_random_complex(x_shape, x_dtype)) self.assertLess( - gradient_checker.max_error(*gradient_checker.compute_gradient( - f, [x])), 2e-5) + gradient_checker.max_error(*gradient_checker.compute_gradient(f, [x])), + 2e-5) def testEmptySucceeds(self): + def f(x): return array_ops.identity(x) - x = constant_op.constant(np.random.random_sample((0, 3)), - dtype=dtypes.float32) + + x = constant_op.constant( + np.random.random_sample((0, 3)), dtype=dtypes.float32) for grad in gradient_checker.compute_gradient(f, [x]): self.assertEqual(grad[0].shape, (0, 0)) - error = gradient_checker.max_error(*gradient_checker.compute_gradient( - f, [x])) + error = gradient_checker.max_error( + *gradient_checker.compute_gradient(f, [x])) self.assertEqual(error, 0) def testEmptyMatMul(self): @@ -160,37 +180,47 @@ class GradientCheckerTest(test.TestCase): self.assertEqual(error, 0) def testEmptyFails(self): + @custom_gradient.custom_gradient def id_bad_grad(x): y = array_ops.identity(x) + def grad_fn(dy): # dx = constant_op.constant(np.zeros((1, 4)), dtype=dtypes.float32) dx = array_ops.transpose(dy) return dx + return y, grad_fn + def f(x): return id_bad_grad(x) - x = constant_op.constant(np.random.random_sample((0, 3)), - dtype=dtypes.float32) + + x = constant_op.constant( + np.random.random_sample((0, 3)), dtype=dtypes.float32) bad = r""Empty gradient has wrong shape: expected \(0, 3\), got \(3, 0\)"" with self.assertRaisesRegexp(ValueError, bad): gradient_checker.compute_gradient(f, [x]) def testNaNGradFails(self): + @custom_gradient.custom_gradient def id_nan_grad(x): y = array_ops.identity(x) + def grad_fn(dy): dx = np.nan * dy # dx = dy return dx + return y, grad_fn + def f(x): return id_nan_grad(x) - x = constant_op.constant(np.random.random_sample((1, 1)), - dtype=dtypes.float32) - error = gradient_checker.max_error(*gradient_checker.compute_gradient( - f, [x])) + + x = constant_op.constant( + np.random.random_sample((1, 1)), dtype=dtypes.float32) + error = gradient_checker.max_error( + *gradient_checker.compute_gradient(f, [x])) # Typical test would assert error < max_err, so assert this test would # raise AssertionError, since NaN is not < 1.0. with self.assertRaisesRegexp(AssertionError, ""nan not less than 1.0""): @@ -264,9 +294,7 @@ class MiniMNISTTest(test.TestCase): name=""softmax_bias"") # List all the parameter so that we can test them one at a time - all_params = [ - inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias - ] + all_params = [inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias] # Now, Building MNIST def f(inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias): @@ -287,8 +315,9 @@ class MiniMNISTTest(test.TestCase): xs = all_params i = param_index # use x for the i-th parameter - xs = xs[0:i]+[x]+xs[i+1:] + xs = xs[0:i] + [x] + xs[i + 1:] return f(*xs) + # Test the gradients. err = gradient_checker.max_error(*gradient_checker.compute_gradient( f_restricted, [all_params[param_index]], delta=1e-5)) ",0,train 72aa2aea58513bb00f3d9cf24bc79ebe880258a7,tensorflow/tensorflow,"[tpu_driver] Add missing namespace to fix OS build PiperOrigin-RevId: 376286743 Change-Id: I7aaa700d2a86dc608c2775a62916f08d3e595715",tpu_client.cc,"@@ -95,7 +95,7 @@ PyTpuClient::PyTpuClient(std::string platform_name, devices_(std::move(devices)), process_index_(process_index) { for (const std::shared_ptr& device : devices_) { - down_cast(device.get())->set_tpu_client(this); + tensorflow::down_cast(device.get())->set_tpu_client(this); CHECK(id_to_device_.insert({device->id(), device}).second) << ""Duplicate device id: "" << device->id(); ",0,train 547daed2591ff84b4c9d27ae26336ab4b6d5bf06,tensorflow/tensorflow,"Fix HandleCopies so it no longer invokes UB when the `params` or `out` TensorMaps are empty with no backing data. PiperOrigin-RevId: 313686113 Change-Id: I4b38d7e7e8cebb40d8b7f2390f841f0b541a01e5",gather_functor.h,"@@ -44,8 +44,8 @@ SliceIndex HandleCopies(OpKernelContext* ctx, const SliceIndex indices_size = static_cast(indices.dimension(0)); const SliceIndex batch_size = static_cast(params.dimension(0)); const Index limit = static_cast(params.dimension(1)); - T* out_base = &out(0, 0, 0); - const T* params_base = ¶ms(0, 0, 0); + T* out_base = out.data(); + const T* params_base = params.data(); if (static_slice_elems >= 0) { // Give compiler static knowledge of the number of elements/bytes slice_elems = static_slice_elems; ",0,train 716fea7be71d03ba486dde6c1adba245d18e805f,tensorflow/tensorflow,"Update all tf.to_float to tf.cast(..,dtype=tf.float32) in losses_impl PiperOrigin-RevId: 226041616",losses_impl.py,"@@ -139,7 +139,7 @@ def _num_present(losses, weights, per_batch=False): and not math_ops.equal(weights, 0.0))): return _num_elements(losses) with ops.name_scope(None, ""num_present"", (losses, weights)) as scope: - weights = math_ops.to_float(weights) + weights = math_ops.cast(weights, dtype=dtypes.float32) present = array_ops.where( math_ops.equal(weights, 0.0), array_ops.zeros_like(weights), @@ -207,8 +207,8 @@ def compute_weighted_loss( weights_broadcast_ops.assert_broadcastable(weights, losses),)): losses = ops.convert_to_tensor(losses) input_dtype = losses.dtype - losses = math_ops.to_float(losses) - weights = math_ops.to_float(weights) + losses = math_ops.cast(losses, dtype=dtypes.float32) + weights = math_ops.cast(weights, dtype=dtypes.float32) weighted_losses = math_ops.multiply(losses, weights) if reduction == Reduction.NONE: loss = weighted_losses @@ -275,8 +275,8 @@ def absolute_difference( raise ValueError(""predictions must not be None."") with ops.name_scope(scope, ""absolute_difference"", (predictions, labels, weights)) as scope: - predictions = math_ops.to_float(predictions) - labels = math_ops.to_float(labels) + predictions = math_ops.cast(predictions, dtype=dtypes.float32) + labels = math_ops.cast(labels, dtype=dtypes.float32) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) losses = math_ops.abs(math_ops.subtract(predictions, labels)) return compute_weighted_loss( @@ -329,8 +329,8 @@ def cosine_distance( raise ValueError(""predictions must not be None."") with ops.name_scope(scope, ""cosine_distance_loss"", (predictions, labels, weights)) as scope: - predictions = math_ops.to_float(predictions) - labels = math_ops.to_float(labels) + predictions = math_ops.cast(predictions, dtype=dtypes.float32) + labels = math_ops.cast(labels, dtype=dtypes.float32) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) radial_diffs = math_ops.multiply(predictions, labels) @@ -377,8 +377,8 @@ def hinge_loss(labels, logits, weights=1.0, scope=None, if logits is None: raise ValueError(""logits must not be None."") with ops.name_scope(scope, ""hinge_loss"", (logits, labels, weights)) as scope: - logits = math_ops.to_float(logits) - labels = math_ops.to_float(labels) + logits = math_ops.cast(logits, dtype=dtypes.float32) + labels = math_ops.cast(labels, dtype=dtypes.float32) logits.get_shape().assert_is_compatible_with(labels.get_shape()) # We first need to convert binary labels to -1/1 labels (as floats). all_ones = array_ops.ones_like(labels) @@ -446,8 +446,8 @@ def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None, raise ValueError(""predictions must not be None."") with ops.name_scope(scope, ""huber_loss"", (predictions, labels, weights)) as scope: - predictions = math_ops.to_float(predictions) - labels = math_ops.to_float(labels) + predictions = math_ops.cast(predictions, dtype=dtypes.float32) + labels = math_ops.cast(labels, dtype=dtypes.float32) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) error = math_ops.subtract(predictions, labels) abs_error = math_ops.abs(error) @@ -512,8 +512,8 @@ def log_loss(labels, predictions, weights=1.0, epsilon=1e-7, scope=None, raise ValueError(""predictions must not be None."") with ops.name_scope(scope, ""log_loss"", (predictions, labels, weights)) as scope: - predictions = math_ops.to_float(predictions) - labels = math_ops.to_float(labels) + predictions = math_ops.cast(predictions, dtype=dtypes.float32) + labels = math_ops.cast(labels, dtype=dtypes.float32) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) losses = -math_ops.multiply( labels, @@ -580,11 +580,11 @@ def mean_pairwise_squared_error( raise ValueError(""predictions must not be None."") with ops.name_scope(scope, ""mean_pairwise_squared_error"", (predictions, labels, weights)) as scope: - weights = math_ops.to_float(weights) - labels = math_ops.to_float(labels) + weights = math_ops.cast(weights, dtype=dtypes.float32) + labels = math_ops.cast(labels, dtype=dtypes.float32) with ops.control_dependencies(( weights_broadcast_ops.assert_broadcastable(weights, labels),)): - predictions = math_ops.to_float(predictions) + predictions = math_ops.cast(predictions, dtype=dtypes.float32) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) diffs = math_ops.subtract(predictions, labels) ",0,train 898c7319013fede56e08370f6aa9998aaad9df35,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2021-02-25 PiperOrigin-RevId: 359466367 Change-Id: Ia3f896f0a1fbd4d029f21a93f193b6e726cd1cfa",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 2, 24) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 2, 25) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train 8f457cef03c03dbd646824739f152d103c4239e8,tensorflow/tensorflow,Add None check to restorer,saved_model_aot_compile.py,"@@ -321,7 +321,8 @@ def aot_compile_cpu_meta_graph_def(checkpoint_path, # Load the Variables so that we can freeze the graph. with session.Session(graph=ops_lib.Graph()) as sess: restorer = saver_lib.import_meta_graph(meta_graph_def, clear_devices=True) - restorer.restore(sess, checkpoint_path) + if restorer is not None: + restorer.restore(sess, checkpoint_path) graph_def.CopyFrom( graph_util.convert_variables_to_constants( sess, ",0,train 4ab86d026ff419a5d35ee41493e29611f29a555d,tensorflow/tensorflow,"[TF:MLIR] Add promote resources to arguments pass when converting MLIR to XLA computation. Enable IR printing in ConvertMLIRToXlaComputation when vlog level is 1. PiperOrigin-RevId: 290674378 Change-Id: I90739f8bde085e1f92b54c2f3c7e2448b2eb9bc1",compile_mlir_util.cc,"@@ -28,6 +28,7 @@ limitations under the License. #include ""mlir/Transforms/Passes.h"" // TF:llvm-project #include ""tensorflow/compiler/mlir/tensorflow/transforms/passes.h"" #include ""tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h"" +#include ""tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.h"" #include ""tensorflow/compiler/mlir/tensorflow/utils/convert_type.h"" #include ""tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.h"" #include ""tensorflow/compiler/mlir/tensorflow/utils/error_util.h"" @@ -211,6 +212,7 @@ Status ConvertMLIRToXlaComputation(mlir::ModuleOp module_op, mlir::PassManager tf2xla(module_op.getContext()); tf2xla.addNestedPass(mlir::createCanonicalizerPass()); tf2xla.addPass(mlir::xla_hlo::createLegalizeTFControlFlowPass()); + tf2xla.addPass(mlir::TF::CreatePromoteResourcesToArgsPass()); // We need to run LegalizeTFPass 2 times because first // LegalizeTFPass(allow_partial_conversion=true) can expose more graph pruning // and canonicalization opportunities that are necessary for the second @@ -221,17 +223,17 @@ Status ConvertMLIRToXlaComputation(mlir::ModuleOp module_op, tf2xla.addNestedPass( mlir::xla_hlo::createLegalizeTFPass(false)); - { - // Make sure we catch any error reported by MLIR and forward it to the TF - // error reporting system. Report a generic error if pass manager failed - // without emitting a diagnostic. - mlir::StatusScopedDiagnosticHandler error_handler(module_op.getContext()); - - mlir::LogicalResult result = tf2xla.run(module_op); - if (failed(result)) { - return error_handler.Combine( - errors::Internal(""MLIR TF to XLA legalization failed"")); - } + if (VLOG_IS_ON(1)) + tf2xla.enableIRPrinting(std::make_unique()); + + // Make sure we catch any error reported by MLIR and forward it to the TF + // error reporting system. Report a generic error if pass manager failed + // without emitting a diagnostic. + mlir::StatusScopedDiagnosticHandler error_handler(module_op.getContext()); + + if (failed(tf2xla.run(module_op))) { + return error_handler.Combine( + errors::Internal(""MLIR TF to XLA legalization failed"")); } if (VLOG_IS_ON(1)) ",0,train 23a07f2c1444509986eece54e486cdcf0b8e32e4,tensorflow/tensorflow,"[tf.data] Adding serialization support for `StatsAggregatorDatasets` to make it possible to apply static optimizations to input pipelines whose prefix contains the `set_stats_aggregator` transformation. PiperOrigin-RevId: 214619583",latency_all_edges_test.py,"@@ -34,8 +34,8 @@ class OptimizeStatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase): optimization.assert_next( [""LatencyStats"", ""Map"", ""LatencyStats"", ""Prefetch"", ""LatencyStats""])).map(lambda x: x * x).prefetch(1).apply( - optimization.optimize([""latency_all_edges""])).apply( - stats_ops.set_stats_aggregator(stats_aggregator)) + stats_ops.set_stats_aggregator(stats_aggregator)).apply( + optimization.optimize([""latency_all_edges""])) iterator = dataset.make_initializable_iterator() get_next = iterator.get_next() summary_t = stats_aggregator.get_summary() ",0,train 23a07f2c1444509986eece54e486cdcf0b8e32e4,tensorflow/tensorflow,"[tf.data] Adding serialization support for `StatsAggregatorDatasets` to make it possible to apply static optimizations to input pipelines whose prefix contains the `set_stats_aggregator` transformation. PiperOrigin-RevId: 214619583",stats_aggregator_dataset_op.cc,"@@ -34,16 +34,18 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel { &stats_aggregator_resource)); core::ScopedUnref unref_stats_aggregator(stats_aggregator_resource); - *output = new Dataset(ctx, input, stats_aggregator_resource); + *output = new Dataset(ctx, input, ctx->input(1), stats_aggregator_resource); } private: class Dataset : public DatasetBase { public: explicit Dataset(OpKernelContext* ctx, const DatasetBase* input, + const Tensor& resource_handle, StatsAggregatorResource* stats_aggregator_resource) : DatasetBase(DatasetContext(ctx)), input_(input), + resource_handle_(resource_handle), stats_aggregator_resource_(stats_aggregator_resource) { input_->Ref(); stats_aggregator_resource_->Ref(); @@ -75,8 +77,13 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel { Status AsGraphDefInternal(SerializationContext* ctx, DatasetGraphDefBuilder* b, Node** output) const override { - return errors::Unimplemented(""%s does not support serialization"", - DebugString()); + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node)); + Node* resource_handle_node = nullptr; + TF_RETURN_IF_ERROR(b->AddTensor(resource_handle_, &resource_handle_node)); + TF_RETURN_IF_ERROR(b->AddDataset( + this, {input_graph_node, resource_handle_node}, output)); + return Status::OK(); } private: @@ -129,6 +136,7 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel { }; const DatasetBase* const input_; + const Tensor resource_handle_; StatsAggregatorResource* stats_aggregator_resource_; }; }; ",0,train d329c289b9381137ca849466e79b11e91048d9e0,tensorflow/tensorflow,"Update GraphDef version to 744. PiperOrigin-RevId: 369826495 Change-Id: Iab9ed9151647604517fd60834be883905548e29a",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 743 // Updated: 2021/4/21 +#define TF_GRAPH_DEF_VERSION 744 // Updated: 2021/4/22 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,train 26cd260fac5fa98ade11ff2a5ec38ede65631cc0,tensorflow/tensorflow,"Add additional data validation while saving and restoring iterators. PiperOrigin-RevId: 319078544 Change-Id: I4a439934e1ba35d5eab38513cae735372d62c8d6",iterator_ops.cc,"@@ -331,6 +331,12 @@ class IteratorVariantSerializer { data.reserve(num_tensors); for (int i = 0; i < num_tensors; ++i) { auto* w = serialized_vec(i).get(); + if (!w) { + return errors::Internal( + ""Cannot initialize an iterator from tensor "", + serialized_vec(i).DebugString(), + "". Expected a variant tensor of type IteratorStateVariant""); + } data.push_back(w->GetData()); } reader_ = absl::make_unique(data); @@ -349,6 +355,10 @@ class IteratorVariantSerializer { } int64 size = variants_.size(); for (int64 i = 0; i < size; ++i) { + if (variants_[i].GetData() == nullptr) { + return errors::Internal( + ""Cannot serialize an empty IteratorStateVariant""); + } serialized->vec()(i) = variants_[i]; } return Status::OK(); ",0,train 1ee4fe2db09c29fba0631e580dee941c7c2b2beb,tensorflow/tensorflow,"Untrack eager tensors from GC during dealloc. We were seeing an issue where the item would be deleted, make a call to ClearWeakRefs which seemingly dealloc'd the object (bypassing the call to ClearWeakRefs?), and then would fail since we would try to re-delete things. The base type dealloc mentions that clearing weakrefs with GC enabled for the object might be problematic. See https://github.com/python/cpython/blob/f78a5e9ce8f32a195f5f788aade79578437f30a6/Objects/typeobject.c#L1206-L1209 PiperOrigin-RevId: 240381495",pywrap_tensor.cc,"@@ -499,6 +499,10 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) { // tp_dealloc for EagerTensor. void EagerTensor_dealloc(EagerTensor* self) { + // Unhook the object from python's GC so that the weakref deleter doesn't + // try to re-delete this. + PyObject_GC_UnTrack((PyObject*)self); + // Clear weak references to self. // Needs to happen before any actual destruction. PyObject_ClearWeakRefs((PyObject*)self); ",0,test d1fd406e41dc8220d1b26b6164776e529396007e,tensorflow/tensorflow,"[MLIR:TF/XLA] Enable variable runtime reformatting pass. PiperOrigin-RevId: 293846291 Change-Id: I30774c15524830612d84feaa59cc78fab6a8bfb0",bridge.cc,"@@ -54,8 +54,7 @@ void CreateTPUBridge(OpPassManager &pm) { pm.addNestedPass(TFDevice::CreateReplicateInvariantOpHoistingPass()); pm.addNestedPass(CreateTPUDynamicLayoutPass()); pm.addNestedPass(CreateTPUMergeVariablesWithExecutePass()); - // TODO(b/147020076): Enable this pass. - // pm.addPass(CreateTPUVariableReformattingPass()); + pm.addPass(CreateTPUVariableReformattingPass()); pm.addNestedPass(CreateFunctionalToExecutorDialectConversionPass()); pm.addNestedPass(CreateBreakUpIslandsPass()); pm.addNestedPass(TFDevice::CreateReplicateToIslandPass()); ",0,train 5dc0ab7565afc0f707adc628c2865233a9702cfa,tensorflow/tensorflow,"Inline read/write ops of Stack and TensorArray. Change: 113373813",stack_ops.cc,"@@ -136,6 +136,8 @@ class StackPushOp : public OpKernel { stack->Push(PersistentTensor(ctx->input(1))); ctx->set_output(0, ctx->input(1)); } + + bool IsExpensive() override { return false; } }; REGISTER_KERNEL_BUILDER(Name(""StackPush"").Device(DEVICE_CPU), StackPushOp); @@ -165,6 +167,8 @@ class StackPopOp : public OpKernel { ""Calling Pop() when the stack is empty."")); ctx->set_output(0, *value.AccessTensor(ctx)); } + + bool IsExpensive() override { return false; } }; REGISTER_KERNEL_BUILDER(Name(""StackPop"").Device(DEVICE_CPU), StackPopOp); ",0,test 5dc0ab7565afc0f707adc628c2865233a9702cfa,tensorflow/tensorflow,"Inline read/write ops of Stack and TensorArray. Change: 113373813",tensor_array_ops.cc,"@@ -273,6 +273,8 @@ class TensorArrayWriteOp : public OpKernel { PersistentTensor persistent_tensor(*tensor_value); OP_REQUIRES_OK(ctx, tensor_array->Write(ctx, index, &persistent_tensor)); } + + bool IsExpensive() override { return false; } }; #define REGISTER_WRITE(type) \ @@ -332,6 +334,8 @@ class TensorArrayReadOp : public OpKernel { ctx->set_output(0, *value.AccessTensor(ctx)); } + bool IsExpensive() override { return false; } + private: DataType dtype_; }; ",0,test 6eec9c2ea33f3b86012cb0ea2aeb9e49e65bc716,tensorflow/tensorflow,"[XLA] Hlo parser: support rng and reduce-precision. Also simplify the lexer by regarding several things as identifier. PiperOrigin-RevId: 177548483",hlo_instruction.cc,"@@ -2060,6 +2060,14 @@ std::vector HloInstruction::ExtraAttributesToString() const { extra.push_back( StrCat(""outfeed_config=\"""", CEscape(outfeed_config_), ""\"""")); } + if (opcode() == HloOpcode::kRng) { + extra.push_back( + StrCat(""distribution="", RandomDistributionToString(distribution_))); + } + if (opcode() == HloOpcode::kReducePrecision) { + extra.push_back(StrCat(""exponent_bits="", exponent_bits_)); + extra.push_back(StrCat(""mantissa_bits="", mantissa_bits_)); + } return extra; } @@ -3029,6 +3037,28 @@ string OpMetadataToString(const OpMetadata& metadata) { return Join(result, "" ""); } +string RandomDistributionToString(const RandomDistribution& distribution) { + return tensorflow::str_util::Lowercase(RandomDistribution_Name(distribution)); +} + +StatusOr StringToRandomDistribution(const string& name) { + static std::unordered_map* map = [] { + static auto* map = new std::unordered_map; + for (int i = 0; i < RandomDistribution_ARRAYSIZE; i++) { + if (RandomDistribution_IsValid(i)) { + auto value = static_cast(i); + (*map)[RandomDistributionToString(value)] = value; + } + } + return map; + }(); + auto found = map->find(tensorflow::str_util::Lowercase(name)); + if (found == map->end()) { + return InvalidArgument(""Unknown distribution""); + } + return found->second; +} + std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind) { return os << ToString(kind); } ",0,train 6eec9c2ea33f3b86012cb0ea2aeb9e49e65bc716,tensorflow/tensorflow,"[XLA] Hlo parser: support rng and reduce-precision. Also simplify the lexer by regarding several things as identifier. PiperOrigin-RevId: 177548483",hlo_instruction.h,"@@ -1285,9 +1285,12 @@ string ToString(HloInstruction::FusionKind kind); StatusOr StringToFusionKind( const string& kind_name); -// Custom stringification functions for protos that live inside HloInstruction. +// Custom (de)stringification functions for protos that live inside +// HloInstruction. string PaddingConfigToString(const PaddingConfig& padding); string OpMetadataToString(const OpMetadata& metadata); +string RandomDistributionToString(const RandomDistribution& distribution); +StatusOr StringToRandomDistribution(const string& name); std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind); ",0,train 6eec9c2ea33f3b86012cb0ea2aeb9e49e65bc716,tensorflow/tensorflow,"[XLA] Hlo parser: support rng and reduce-precision. Also simplify the lexer by regarding several things as identifier. PiperOrigin-RevId: 177548483",hlo_lexer.cc,"@@ -17,7 +17,6 @@ limitations under the License. #include -#include ""tensorflow/compiler/xla/service/hlo_instruction.h"" #include ""tensorflow/compiler/xla/shape_util.h"" #include ""tensorflow/compiler/xla/statusor.h"" #include ""tensorflow/compiler/xla/util.h"" @@ -153,15 +152,15 @@ TokKind HloLexer::LexToken() { } } -// Lex a shape, name, keyword, opcode, attribute name, or the dim labels -// pattern. +// Lex a shape, name, keyword, attribute name, the dim labels pattern, and +// other identifiers. // // shape ::= ([a-zA-Z0-9_]*[0-9]*)\[([0-9,]*)\](?:\s*{([0-9,]*)})? // name ::= [a-zA-Z_][a-zA-Z0-9_.-]*: // keyword ::= HloModule, ENTRY, ... -// opcode ::= add, greater-than, ... // attribute_name ::= condition, body, dimensions, ... // dim_labels_pattern ::= [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,} +// identifiers ::= other cases that match [a-zA-Z_][a-zA-Z0-9_.-]* TokKind HloLexer::LexIdentifier() { { auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); @@ -220,20 +219,6 @@ TokKind HloLexer::LexIdentifier() { #undef KEYWORD - // See if this is an opcode. - auto opcode = StringToHloOpcode(identifier.ToString()); - if (opcode.ok()) { - opcode_val_ = opcode.ValueOrDie(); - return TokKind::kOpcode; - } - - // See if this is an fusion kind. - auto kind = xla::StringToFusionKind(identifier.ToString()); - if (kind.ok()) { - fusion_kind_val_ = kind.ValueOrDie(); - return TokKind::kFusionKind; - } - { auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); static LazyRE2 dim_labels_pattern = { @@ -244,8 +229,9 @@ TokKind HloLexer::LexIdentifier() { return TokKind::kDimLabels; } } - current_ptr_ = token_start_ + 1; - return TokKind::kError; + + str_val_ = identifier.ToString(); + return TokKind::kIdent; } // Lex names after a % character. @@ -428,14 +414,12 @@ string TokKindToString(TokKind kind) { return ""kDxD""; case TokKind::kPad: return ""kPad""; + case TokKind::kIdent: + return ""kIdent""; case TokKind::kString: return ""kString""; case TokKind::kShape: return ""kShape""; - case TokKind::kOpcode: - return ""kOpcode""; - case TokKind::kFusionKind: - return ""kFusionKind""; case TokKind::kInt: return ""kInt""; case TokKind::kDecimal: ",0,train 6eec9c2ea33f3b86012cb0ea2aeb9e49e65bc716,tensorflow/tensorflow,"[XLA] Hlo parser: support rng and reduce-precision. Also simplify the lexer by regarding several things as identifier. PiperOrigin-RevId: 177548483",hlo_lexer.h,"@@ -18,9 +18,8 @@ limitations under the License. #include -#include ""tensorflow/compiler/xla/service/hlo_instruction.h"" -#include ""tensorflow/compiler/xla/service/hlo_opcode.h"" #include ""tensorflow/compiler/xla/tools/parser/hlo_token.h"" +#include ""tensorflow/compiler/xla/types.h"" #include ""tensorflow/compiler/xla/xla_data.pb.h"" #include ""tensorflow/core/lib/core/stringpiece.h"" #include ""tensorflow/core/platform/logging.h"" @@ -48,6 +47,7 @@ class HloLexer { case TokKind::kDxD: case TokKind::kPad: case TokKind::kString: + case TokKind::kIdent: return str_val_; default: LOG(FATAL) << ""This token does not have string value""; @@ -57,14 +57,6 @@ class HloLexer { CHECK(GetKind() == TokKind::kShape); return shape_val_; } - HloOpcode GetOpcodeVal() const { - CHECK(GetKind() == TokKind::kOpcode); - return opcode_val_; - } - HloInstruction::FusionKind GetFusionKindVal() const { - CHECK(GetKind() == TokKind::kFusionKind); - return fusion_kind_val_; - } int64 GetInt64Val() const { CHECK(GetKind() == TokKind::kInt); return int64_val_; @@ -114,8 +106,6 @@ class HloLexer { TokKind current_kind_; string str_val_; Shape shape_val_; - HloOpcode opcode_val_; - HloInstruction::FusionKind fusion_kind_val_; int64 int64_val_; double decimal_val_; }; ",0,train 6eec9c2ea33f3b86012cb0ea2aeb9e49e65bc716,tensorflow/tensorflow,"[XLA] Hlo parser: support rng and reduce-precision. Also simplify the lexer by regarding several things as identifier. PiperOrigin-RevId: 177548483",hlo_parser.cc,"@@ -16,6 +16,7 @@ limitations under the License. #include ""tensorflow/compiler/xla/tools/parser/hlo_parser.h"" #include ""tensorflow/compiler/xla/literal_util.h"" +#include ""tensorflow/compiler/xla/service/hlo_opcode.h"" #include ""tensorflow/compiler/xla/shape_util.h"" #include ""tensorflow/compiler/xla/util.h"" #include ""tensorflow/core/lib/gtl/map_util.h"" @@ -104,6 +105,7 @@ class HloParser { kPaddingConfig, kMetadata, kFusionKind, + kDistribution, }; struct AttrConfig { @@ -174,6 +176,7 @@ class HloParser { bool ParseShape(Shape* result); bool ParseOpcode(HloOpcode* result); bool ParseFusionKind(HloInstruction::FusionKind* result); + bool ParseRandomDistribution(RandomDistribution* result); bool ParseInt64(int64* result); bool ParseDouble(double* result); bool ParseBool(bool* result); @@ -816,10 +819,36 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, shape, operands[0], config ? *config : """")); break; } + case HloOpcode::kRng: { + optional distribution; + attrs[""distribution""] = {/*required=*/true, AttrTy::kDistribution, + &distribution}; + if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { + return false; + } + instruction = builder->AddInstruction( + HloInstruction::CreateRng(shape, *distribution, operands)); + break; + } + case HloOpcode::kReducePrecision: { + optional exponent_bits; + optional mantissa_bits; + attrs[""exponent_bits""] = {/*required=*/true, AttrTy::kInt64, + &exponent_bits}; + attrs[""mantissa_bits""] = {/*required=*/true, AttrTy::kInt64, + &mantissa_bits}; + if (!ParseOperands(&operands, /*expected_size=*/1) || + !ParseAttributes(attrs)) { + return false; + } + instruction = + builder->AddInstruction(HloInstruction::CreateReducePrecision( + shape, operands[0], static_cast(*exponent_bits), + static_cast(*mantissa_bits))); + break; + } case HloOpcode::kConditional: case HloOpcode::kCustomCall: - case HloOpcode::kReducePrecision: - case HloOpcode::kRng: case HloOpcode::kTrace: return TokenError(StrCat(""parsing not yet implemented for op: "", HloOpcodeString(opcode))); @@ -1548,6 +1577,15 @@ bool HloParser::ParseAttributeHelper( static_cast*>(attr_out_ptr)->emplace(result); return true; } + case AttrTy::kDistribution: { + RandomDistribution result; + if (!ParseRandomDistribution(&result)) { + return false; + } + static_cast*>(attr_out_ptr) + ->emplace(result); + return true; + } } }(); if (!success) { @@ -2024,20 +2062,51 @@ bool HloParser::ParseMetadata(OpMetadata* metadata) { bool HloParser::ParseOpcode(HloOpcode* result) { VLOG(1) << ""ParseOpcode""; - if (lexer_.GetKind() != TokKind::kOpcode) { + if (lexer_.GetKind() != TokKind::kIdent) { return TokenError(""expects opcode""); } - *result = lexer_.GetOpcodeVal(); + string val = lexer_.GetStrVal(); + auto status_or_result = StringToHloOpcode(val); + if (!status_or_result.ok()) { + return TokenError( + Printf(""expects opcode but sees: %s, error: %s"", val.c_str(), + status_or_result.status().error_message().c_str())); + } + *result = status_or_result.ValueOrDie(); lexer_.Lex(); return true; } bool HloParser::ParseFusionKind(HloInstruction::FusionKind* result) { VLOG(1) << ""ParseFusionKind""; - if (lexer_.GetKind() != TokKind::kFusionKind) { + if (lexer_.GetKind() != TokKind::kIdent) { return TokenError(""expects fusion kind""); } - *result = lexer_.GetFusionKindVal(); + string val = lexer_.GetStrVal(); + auto status_or_result = StringToFusionKind(val); + if (!status_or_result.ok()) { + return TokenError( + Printf(""expects fusion kind but sees: %s, error: %s"", val.c_str(), + status_or_result.status().error_message().c_str())); + } + *result = status_or_result.ValueOrDie(); + lexer_.Lex(); + return true; +} + +bool HloParser::ParseRandomDistribution(RandomDistribution* result) { + VLOG(1) << ""ParseRandomDistribution""; + if (lexer_.GetKind() != TokKind::kIdent) { + return TokenError(""expects random distribution""); + } + string val = lexer_.GetStrVal(); + auto status_or_result = StringToRandomDistribution(val); + if (!status_or_result.ok()) { + return TokenError( + Printf(""expects random distribution but sees: %s, error: %s"", + val.c_str(), status_or_result.status().error_message().c_str())); + } + *result = status_or_result.ValueOrDie(); lexer_.Lex(); return true; } ",0,train 6eec9c2ea33f3b86012cb0ea2aeb9e49e65bc716,tensorflow/tensorflow,"[XLA] Hlo parser: support rng and reduce-precision. Also simplify the lexer by regarding several things as identifier. PiperOrigin-RevId: 177548483",hlo_parser_test.cc,"@@ -654,6 +654,31 @@ ENTRY %InfeedToOutfeed () -> (u32[3], pred[]) { %outfeed.1 = () outfeed((u32[3]{0}, pred[]) %infeed.1) } +)"" +}, +// Rng +{ +""Rng"", +R""(HloModule rng_module: + +ENTRY %Rng () -> f32[8] { + %constant = f32[] constant(0) + %constant.1 = f32[] constant(1) + ROOT %rng = f32[8]{0} rng(f32[] %constant, f32[] %constant.1), distribution=rng_uniform +} + +)"" +}, +// Reduce precision +{ +""ReducePrevison"", +R""(HloModule reduce_precision: + +ENTRY %ReducePrecision () -> f32[1] { + %constant = f32[1]{0} constant({3.14159}) + ROOT %reduce-precision = f32[1]{0} reduce-precision(f32[1]{0} %constant), exponent_bits=8, mantissa_bits=10 +} + )"" } }); ",0,train 6eec9c2ea33f3b86012cb0ea2aeb9e49e65bc716,tensorflow/tensorflow,"[XLA] Hlo parser: support rng and reduce-precision. Also simplify the lexer by regarding several things as identifier. PiperOrigin-RevId: 177548483",hlo_token.h,"@@ -18,6 +18,9 @@ limitations under the License. #include +#include ""tensorflow/compiler/xla/types.h"" +#include ""tensorflow/core/platform/types.h"" + namespace xla { namespace tools { @@ -60,10 +63,9 @@ enum class TokKind { kDimLabels, // [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,} kDxD, // [0-9]+(x[0-9]+)+ kPad, // [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)* + kIdent, // other identifiers kString, // ""abcd\""\n"" kShape, // f32[2,3]{1,0} - kOpcode, // add - kFusionKind, // kLoop, kOutput, ... kInt, // 42 kDecimal, // 4.2 }; ",0,train 3e7aab52a7f7f0dc797753f2aae9e4c2c2ba0356,tensorflow/tensorflow,"Unbreak the build. Change: 128425319",estimator_utils_test.py,"@@ -107,7 +107,7 @@ class EstimatorUtilsTest(tf.test.TestCase): } self.assertEqual(expected_base_features, base_features) - expected_targets = {""g"": mocks.MockTensor(""Out iue"", tf.int32)} + expected_targets = mocks.MockTensor(""Out iue"", tf.int32) self.assertEqual(expected_targets, targets) self.assertEqual(3, len(feature_columns)) ",0,test 631fd6fbdb6a8286c1030cf317a6d1eaca334100,tensorflow/tensorflow,"Implementing MultiHead's create_loss(). PiperOrigin-RevId: 174894493",head.py,"@@ -172,7 +172,8 @@ def multi_label_head(n_classes, weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It - will be multiplied by the loss of the example. + will be multiplied by the loss of the example. Per-class weighting is + not supported. thresholds: Iterable of floats in the range `(0, 1)`. Accuracy, precision and recall metrics are evaluated for each threshold value. The threshold is applied to the predicted probabilities, i.e. above the threshold is ",0,test 631fd6fbdb6a8286c1030cf317a6d1eaca334100,tensorflow/tensorflow,"Implementing MultiHead's create_loss(). PiperOrigin-RevId: 174894493",head_test.py,"@@ -226,7 +226,7 @@ class MultiLabelHead(test.TestCase): def test_weight_should_not_impact_prediction(self): n_classes = 4 - head = head_lib.multi_label_head(n_classes, weight_column='label_weights') + head = head_lib.multi_label_head(n_classes, weight_column='example_weights') self.assertEqual(n_classes, head.logits_dimension) logits = np.array( @@ -237,7 +237,7 @@ class MultiLabelHead(test.TestCase): spec = head.create_estimator_spec( features={ 'x': np.array(((42,),), dtype=np.int32), - 'label_weights': weights_2x1, + 'example_weights': weights_2x1, }, mode=model_fn.ModeKeys.PREDICT, logits=logits) @@ -549,7 +549,7 @@ class MultiLabelHead(test.TestCase): def test_eval_with_weights(self): n_classes = 2 - head = head_lib.multi_label_head(n_classes, weight_column='label_weights') + head = head_lib.multi_label_head(n_classes, weight_column='example_weights') logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) @@ -563,7 +563,7 @@ class MultiLabelHead(test.TestCase): spec = head.create_estimator_spec( features={ 'x': np.array([[41], [42]], dtype=np.int32), - 'label_weights': np.array([[1.], [2.]], dtype=np.float32), + 'example_weights': np.array([[1.], [2.]], dtype=np.float32), }, mode=model_fn.ModeKeys.EVAL, logits=logits, @@ -605,7 +605,7 @@ class MultiLabelHead(test.TestCase): def test_train_create_loss_large_logits(self): """"""Tests head.create_loss for train mode and large logits."""""" n_classes = 2 - head = head_lib.multi_label_head(n_classes, weight_column='label_weights') + head = head_lib.multi_label_head(n_classes, weight_column='example_weights') logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) @@ -623,7 +623,7 @@ class MultiLabelHead(test.TestCase): actual_weighted_sum_loss, actual_example_weight_sum, _ = head.create_loss( features={ 'x': np.array(((42,),), dtype=np.int32), - 'label_weights': weights + 'example_weights': weights }, mode=model_fn.ModeKeys.TRAIN, logits=logits, @@ -742,7 +742,7 @@ class MultiLabelHead(test.TestCase): def test_train_with_weights(self): n_classes = 2 - head = head_lib.multi_label_head(n_classes, weight_column='label_weights') + head = head_lib.multi_label_head(n_classes, weight_column='example_weights') logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) labels = np.array([[1, 0], [1, 1]], dtype=np.int64) @@ -761,7 +761,7 @@ class MultiLabelHead(test.TestCase): spec = head.create_estimator_spec( features={ 'x': np.array([[41], [42]], dtype=np.int32), - 'label_weights': np.array([[1.], [2.]], dtype=np.float32), + 'example_weights': np.array([[1.], [2.]], dtype=np.float32), }, mode=model_fn.ModeKeys.TRAIN, logits=logits, ",0,test 631fd6fbdb6a8286c1030cf317a6d1eaca334100,tensorflow/tensorflow,"Implementing MultiHead's create_loss(). PiperOrigin-RevId: 174894493",multi_head.py,"@@ -161,12 +161,52 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access def create_loss(self, features, mode, logits, labels): """"""See `Head`."""""" - # TODO(roumposg): Implement it. - raise NotImplementedError('create_loss not yet implemented for MultiHead.') + # TODO(roumposg): Add support for logits as single Tensor (with + # _split_logits utility). + if not isinstance(logits, dict): + raise ValueError('logits must be a dict. Single Tensor support coming ' + 'soon.') + weighted_sum_losses = [] + example_weight_sums = [] + labels_by_head = {} + for head in self._heads: + (weighted_sum_loss, + example_weight_sum, processed_labels) = head.create_loss( + features, mode, logits[head.name], labels[head.name]) + weighted_sum_losses.append(weighted_sum_loss) + example_weight_sums.append(example_weight_sum) + labels_by_head[head.name] = processed_labels + + weighted_sum_losses = tuple(weighted_sum_losses) + with ops.name_scope('merge_losses', + values=weighted_sum_losses + (self._head_weights or + tuple())): + if self._head_weights: + head_weighted_losses = [] + head_weighted_example_weight_sums = [] + for loss, example_weight_sum, weight in zip(weighted_sum_losses, + example_weight_sums, + self._head_weights): + head_weighted_losses.append(math_ops.multiply(loss, weight)) + head_weighted_example_weight_sums.append(math_ops.multiply( + example_weight_sum, weight)) + merged_weighted_sum_loss = math_ops.add_n(head_weighted_losses) + merged_example_weight_sum = math_ops.add_n( + head_weighted_example_weight_sums) + else: + merged_weighted_sum_loss = math_ops.add_n(weighted_sum_losses) + merged_example_weight_sum = math_ops.add_n(example_weight_sums) + + return head_lib.LossSpec( + weighted_sum_loss=merged_weighted_sum_loss, + example_weight_sum=merged_example_weight_sum, + processed_labels=labels_by_head) def create_estimator_spec( self, features, mode, logits, labels=None, train_op_fn=None): """"""See `_Head`."""""" + # TODO(roumposg): Add support for logits as single Tensor (with + # _split_logits utility). if not isinstance(logits, dict): raise ValueError('logits must be a dict. Given: {}'.format(logits)) if labels and not isinstance(labels, dict): @@ -183,6 +223,8 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access labels=labels[head_name] if labels else None, train_op_fn=_no_op_train_fn)) + # TODO(roumposg): Add LOSS and LOSS_MEAN summaries for the total head- + # combined loss. if mode == model_fn.ModeKeys.TRAIN: if train_op_fn is None: raise ValueError('train_op_fn can not be None in TRAIN mode.') ",0,test 631fd6fbdb6a8286c1030cf317a6d1eaca334100,tensorflow/tensorflow,"Implementing MultiHead's create_loss(). PiperOrigin-RevId: 174894493",multi_head_test.py,"@@ -178,7 +178,7 @@ class MultiHeadTest(test.TestCase): # (1 - labels) * (logits > 0) * logits => # head1: expected_unweighted_loss = [[10., 10.], [15., 0.]] # head2: expected_unweighted_loss = [[20., 20., 20.], [30., 0., 0]] - # Average over classes, weighted sum ober batch and heads. + # Average over classes, weighted sum over batch and heads. expected_loss_head1 = 17.5 expected_loss_head2 = 30.0 expected_loss = 1. * expected_loss_head1 + 2. * expected_loss_head2 @@ -231,18 +231,25 @@ class MultiHeadTest(test.TestCase): logits = {'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)} labels = {'head1': np.array([[1, 0], [1, 1]], dtype=np.int64)} - with self.assertRaisesRegexp( - NotImplementedError, - r'create_loss not yet implemented for MultiHead\.'): - multi_head.create_loss( - features={'x': np.array(((42,),), dtype=np.int32)}, - mode=model_fn.ModeKeys.TRAIN, - logits=logits, - labels=labels) + loss = multi_head.create_loss( + features={'x': np.array(((42,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels)[0] + tol = 1e-3 + with self.test_session(): + # Unreduced loss of the head is [[(10 + 10) / 2], (15 + 0) / 2] + # (averaged over classes, sum-reduced over examples). + self.assertAllClose(17.5, loss.eval(), rtol=tol, atol=tol) def test_train_create_loss_two_heads_with_weights(self): - head1 = head_lib.multi_label_head(n_classes=2, name='head1') - head2 = head_lib.multi_label_head(n_classes=3, name='head2') + # Use different example weighting for each head weighting. + weights1 = np.array([[1.], [2.]], dtype=np.float32) + weights2 = np.array([[2.], [3.]]) + head1 = head_lib.multi_label_head(n_classes=2, name='head1', + weight_column='weights1') + head2 = head_lib.multi_label_head(n_classes=3, name='head2', + weight_column='weights2') multi_head = multi_head_lib.multi_head( [head1, head2], head_weights=[1., 2.]) @@ -255,14 +262,27 @@ class MultiHeadTest(test.TestCase): 'head1': np.array([[1, 0], [1, 1]], dtype=np.int64), 'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64), } - with self.assertRaisesRegexp( - NotImplementedError, - r'create_loss not yet implemented for MultiHead\.'): - multi_head.create_loss( - features={'x': np.array(((42,),), dtype=np.int32)}, - mode=model_fn.ModeKeys.TRAIN, - logits=logits, - labels=labels) + weighted_sum_loss, example_weight_sum, _ = multi_head.create_loss( + features={ + 'x': np.array(((42,),), dtype=np.int32), + 'weights1': weights1, + 'weights2': weights2 + }, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels) + tol = 1e-3 + with self.test_session(): + # loss of the first head is [[(10 + 10) / 2], [(15 + 0) / 2]] + # = [10, 7.5] + # weighted_sum_loss = 1 * 10 + 2 * 7.5 = 25 + # loss of the second head is [[(20 + 20 + 20) / 3], [(30 + 0 + 0) / 3]] + # = [20, 10] + # weighted_sum_loss = 2 * 20 + 3 * 10 = 70 + # head-weighted merge = 1 * 25 + 2 * 70 = 165 + self.assertAllClose(165, weighted_sum_loss.eval(), rtol=tol, atol=tol) + # example_weight_sum = 1 * (1 + 2) + 2 * (2 + 3) = 13 + self.assertAllClose(13., example_weight_sum.eval(), rtol=tol, atol=tol) def test_train_one_head(self): head1 = head_lib.multi_label_head(n_classes=2, name='head1') @@ -332,7 +352,7 @@ class MultiHeadTest(test.TestCase): # (1 - labels) * (logits > 0) * logits => # head1: expected_unweighted_loss = [[10., 10.], [15., 0.]] # head2: expected_unweighted_loss = [[20., 20., 20.], [30., 0., 0]] - # Average over classes, weighted sum ober batch and heads. + # Average over classes, weighted sum over batch and heads. expected_loss_head1 = 17.5 expected_loss_head2 = 30.0 expected_loss = 1. * expected_loss_head1 + 2. * expected_loss_head2 ",0,test e0e7cbbc55af32041b1721bb4600a38f352d8242,tensorflow/tensorflow,"Disable TF whitelisting in Keras Layers. PiperOrigin-RevId: 230763811",base_layer.py,"@@ -524,13 +524,6 @@ class Layer(checkpointable.Checkpointable): # models using the functional API). build_graph = tf_utils.are_all_symbolic_tensors(input_list) - if build_graph: - # Only create Keras history if at least one tensor originates from a - # `keras.Input`. Otherwise this Layer may be being used outside the Keras - # framework. - if base_layer_utils.uses_keras_input_layers(inputs): - base_layer_utils.create_keras_history(inputs) - # Handle Keras mask propagation from previous layer to current layer. previous_mask = None if build_graph and (not hasattr(self, '_compute_previous_mask') or ",0,train e0e7cbbc55af32041b1721bb4600a38f352d8242,tensorflow/tensorflow,"Disable TF whitelisting in Keras Layers. PiperOrigin-RevId: 230763811",tensorflow_op_layer_test.py,"@@ -1,131 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the ""License""); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an ""AS IS"" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -""""""Test for allowing TF ops to work with Keras Functional API."""""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized -import numpy as np - -from tensorflow.python import keras -from tensorflow.python.framework import ops -from tensorflow.python.keras import keras_parameterized -from tensorflow.python.keras import testing_utils -from tensorflow.python.keras.optimizer_v2 import adam -from tensorflow.python.ops import gen_nn_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.platform import test -from tensorflow.python.util import nest - - -def _single_op_at_end(): - inputs = keras.Input(shape=(10,)) - x = keras.layers.Dense(10)(inputs) - outputs = gen_nn_ops.relu(x, name='hey') - return inputs, outputs - - -def _multiple_ops_at_end(): - inputs = keras.Input(shape=(10,)) - inputs = keras.Input(shape=(10,)) - x = keras.layers.Dense(10)(inputs) - x = gen_nn_ops.relu(x, name='hey') - outputs = gen_nn_ops.relu(x, name='hey2') - return inputs, outputs - - -def _single_op_in_middle(): - inputs = keras.Input(shape=(10,)) - x = keras.layers.Dense(10)(inputs) - x = gen_nn_ops.relu(x, name='hey') - outputs = keras.layers.Dense(10)(x) - return inputs, outputs - - -def _multiple_ops_in_middle(): - inputs = keras.Input(shape=(10,)) - x = keras.layers.Dense(10)(inputs) - x = gen_nn_ops.relu(x, name='hey') - x = gen_nn_ops.relu(x, name='hey2') - outputs = keras.layers.Dense(10)(x) - return inputs, outputs - - -def _single_standalone_branch(): - inputs = keras.Input(shape=(10,)) - x = keras.layers.Dense(10)(inputs) - outputs = x * 2 - return inputs, outputs - - -def _single_op_with_attrs(): - inputs = keras.Input(shape=(10,)) - x = math_ops.reduce_mean(inputs, axis=1, keepdims=True) - outputs = keras.layers.Dense(10)(x) - return inputs, outputs - - -@keras_parameterized.run_all_keras_modes -class AutoLambdaTest(keras_parameterized.TestCase): - - @parameterized.named_parameters( - ('single_op_at_end', _single_op_at_end), - ('multiple_ops_at_end', _multiple_ops_at_end), - ('single_op_in_middle', _single_op_in_middle), - ('multiple_ops_in_middle', _multiple_ops_in_middle), - ('single_standalone_branch', _single_standalone_branch), - ('single_op_with_attrs', _single_op_with_attrs)) - def test_autolambda(self, model_fn): - inputs, outputs = model_fn() - model = keras.Model(inputs, outputs) - model.compile( - adam.Adam(0.001), 'mse', run_eagerly=testing_utils.should_run_eagerly()) - - np_inputs = nest.map_structure(lambda x: np.ones((10, 10), 'float32'), - inputs) - np_outputs = nest.map_structure(lambda x: np.ones((10, 10), 'float32'), - outputs) - model.fit(np_inputs, np_outputs, batch_size=2) - - def test_numerical_correctness_simple(self): - x = ops.convert_to_tensor([[-1., 0., -2., 1.]]) - inputs = keras.Input(shape=(4,)) - outputs = gen_nn_ops.relu(inputs) - model = keras.Model(inputs, outputs) - y = self.evaluate(model(x)) - self.assertAllClose(y, [[0., 0., 0., 1.]]) - - def test_numerical_correctness_with_attrs(self): - x = ops.convert_to_tensor([[1.5, 1.5], [2.5, 3.5]]) - inputs = keras.Input(shape=(10,)) - outputs = math_ops.reduce_mean(inputs, axis=1) - model = keras.Model(inputs, outputs) - y = self.evaluate(model(x)) - self.assertAllClose(y, [1.5, 3.]) - - def test_serialization(self): - x = ops.convert_to_tensor([-1., 0., -2., 1.]) - inputs = keras.Input(shape=(4,)) - outputs = gen_nn_ops.relu(inputs) - model1 = keras.Model(inputs, outputs) - y1 = self.evaluate(model1(x)) - model2 = model1.from_config(model1.get_config()) - y2 = self.evaluate(model2(x)) - self.assertAllClose(y1, y2) - - -if __name__ == '__main__': - test.main() ",0,train 7b180f700755b0a3fc0eb9de349f7caacc422d2d,tensorflow/tensorflow,"Whitelist ExtractImagePatches op Enable use of ExtractImagePatches with TFLite when using select TF ops. See also #21526. PiperOrigin-RevId: 304676488 Change-Id: I3b3aafdc16bf04d3204b2a903274f8a990800e82",whitelisted_flex_ops.cc,"@@ -117,6 +117,7 @@ bool IsWhitelistedFlexOp(const std::string& tensorflow_op_name) { ""Exit"", ""Exp"", ""ExpandDims"", + ""ExtractImagePatches"", ""FakeQuantWithMinMaxArgs"", ""FakeQuantWithMinMaxArgsGradient"", ""FakeQuantWithMinMaxVars"", ",0,test a8712e5c8e47a7e03efa45dd7e900b866309c3b2,tensorflow/tensorflow,"Fix parallel_for converter code to record gradient when explicitly creating an op in the graph. PiperOrigin-RevId: 241435115",backprop_test.py,"@@ -1339,7 +1339,6 @@ class BackpropTest(test.TestCase): self.assertAllEqual(da[0], tf_da[0].eval()) -@test_util.run_all_in_graph_and_eager_modes class JacobianTest(test.TestCase): def _jacobian(self, experimental_use_pfor): @@ -1430,6 +1429,22 @@ class JacobianTest(test.TestCase): self.assertAllClose(g.jacobian(y, x, parallel_iterations=2), g.jacobian(y, x, parallel_iterations=3)) + @test_util.run_in_graph_and_eager_modes + def test_nested_jacobian(self): + if context.executing_eagerly(): + # TODO(agarwal): b/128842926 + self.skipTest('Conversion of function calls not implemented yet.') + x = array_ops.ones((10, 2)) + with backprop.GradientTape(persistent=False) as g: + g.watch(x) + with backprop.GradientTape(persistent=False) as gg: + gg.watch(x) + y = math_ops.reduce_sum(math_ops.square(x)) + dy_x = gg.jacobian(y, x) + dy_xx = g.batch_jacobian(dy_x, x) + dy_xx_answer = [[[2., 0], [0, 2.]]] * 10 + self.assertAllClose(dy_xx_answer, self.evaluate(dy_xx)) + @test_util.run_all_in_graph_and_eager_modes class BatchJacobianTest(test.TestCase): ",0,train a8712e5c8e47a7e03efa45dd7e900b866309c3b2,tensorflow/tensorflow,"Fix parallel_for converter code to record gradient when explicitly creating an op in the graph. PiperOrigin-RevId: 241435115",pfor.py,"@@ -22,6 +22,7 @@ from __future__ import print_function import collections from tensorflow.python.eager import context +from tensorflow.python.eager import execute from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -838,10 +839,15 @@ class RegisterPForWithArgs(RegisterPFor): return converter +# TODO(agarwal): call raw_ops instead of calling these low level routines. def _create_op(op_type, inputs, op_dtypes, attrs=None): """"""Utility to create an op."""""" - return ops.get_default_graph().create_op( + op = ops.get_default_graph().create_op( op_type, inputs, op_dtypes, attrs=attrs, compute_device=True) + flat_attrs = nest.flatten([(a, op.get_attr(a)) for a in attrs]) + execute.record_gradient( + op_type, op.inputs, tuple(flat_attrs), op.outputs[:], """") + return op WrappedTensor = collections.namedtuple(""WrappedTensor"", ",0,train 83903a2e993c5456a2038de88b9cf3b9f0e1436f,tensorflow/tensorflow,"Clarify the softmax op documentation. Change: 125961539",nn_ops.cc,"@@ -1013,7 +1013,7 @@ Computes softmax activations. For each batch `i` and class `j` we have - softmax[i, j] = exp(logits[i, j]) / sum(exp(logits[i])) + softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j])) logits: 2-D with shape `[batch_size, num_classes]`. softmax: Same shape as `logits`. ",0,train 5de1fdaf8aef6d7b219f9e7a2e54027869a08912,tensorflow/tensorflow,"Remove TODO: Adding a clamp operation makes no performance difference. PiperOrigin-RevId: 333536789 Change-Id: Ia19f7aad7bc13ddeac0f226c04ecb14c5c67445e",lstm_parser.cc,"@@ -288,7 +288,6 @@ absl::Status BuildCellStateUpdate(GraphFloat32* graph, ObjectReader* reader, return absl::OkStatus(); } - // TODO(b/157166356): Maybe add OperationType::CLAMP ? Value* max_clipped_state = CreateNewSimilarValue(graph, new_cell_state); { // #4 elementwise minimum: min(#3, clip) @@ -398,7 +397,6 @@ absl::Status BuildOutputStateUpdate(GraphFloat32* graph, ObjectReader* reader, return absl::OkStatus(); } - // TODO(b/157166356): Maybe add OperationType::CLAMP ? Value* max_clipped_state = CreateNewSimilarValue(graph, projected_output_state); { ",0,train 264e07271385838da8f1ec9e58dddb80ee13800a,tensorflow/tensorflow,"Moves delegate initialization to a common lib for both benchmark tool & eval framework, and utilizes the helpers in TfliteInferenceStage. PiperOrigin-RevId: 244869850",benchmark_tflite_model.cc,"@@ -23,52 +23,23 @@ limitations under the License. #include #include -#include ""tensorflow/lite/delegates/nnapi/nnapi_delegate.h"" #include ""tensorflow/lite/kernels/register.h"" #include ""tensorflow/lite/model.h"" #include ""tensorflow/lite/op_resolver.h"" #include ""tensorflow/lite/string_util.h"" #include ""tensorflow/lite/tools/benchmark/logging.h"" +#include ""tensorflow/lite/tools/evaluation/utils.h"" #ifdef GEMMLOWP_PROFILING #include ""profiling/profiler.h"" #endif -#if defined(__ANDROID__) -#include ""tensorflow/lite/delegates/gpu/gl_delegate.h"" -#endif - #ifdef TFLITE_CUSTOM_OPS_HEADER void RegisterSelectedOps(::tflite::MutableOpResolver* resolver); #endif namespace tflite { namespace benchmark { -namespace { - -#if defined(__ANDROID__) -Interpreter::TfLiteDelegatePtr CreateGPUDelegate( - tflite::FlatBufferModel* model) { - TfLiteGpuDelegateOptions options; - options.metadata = TfLiteGpuDelegateGetModelMetadata(model->GetModel()); - options.compile_options.precision_loss_allowed = 1; - options.compile_options.preferred_gl_object_type = - TFLITE_GL_OBJECT_TYPE_FASTEST; - options.compile_options.dynamic_batch_enabled = 0; - return Interpreter::TfLiteDelegatePtr(TfLiteGpuDelegateCreate(&options), - &TfLiteGpuDelegateDelete); -} - -Interpreter::TfLiteDelegatePtr CreateNNAPIDelegate() { - return Interpreter::TfLiteDelegatePtr( - NnApiDelegate(), - // NnApiDelegate() returns a singleton, so provide a no-op deleter. - [](TfLiteDelegate*) {}); -} - -#endif // defined(__ANDROID__) - -} // namespace void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) { TFLITE_BENCHMARK_CHECK(interpreter); @@ -469,18 +440,21 @@ BenchmarkTfLiteModel::TfLiteDelegatePtrMap BenchmarkTfLiteModel::GetDelegates() const { TfLiteDelegatePtrMap delegates; if (params_.Get(""use_gpu"")) { -#if defined(__ANDROID__) - delegates.emplace(""GPU"", CreateGPUDelegate(model.get())); -#else - TFLITE_LOG(WARN) << ""GPU acceleration is unsupported on this platform.""; -#endif + Interpreter::TfLiteDelegatePtr delegate = + evaluation::CreateGPUDelegate(model.get()); + if (!delegate) { + TFLITE_LOG(WARN) << ""GPU acceleration is unsupported on this platform.""; + } else { + delegates.emplace(""GPU"", std::move(delegate)); + } } if (params_.Get(""use_nnapi"")) { -#if defined(__ANDROID__) - delegates.emplace(""NNAPI"", CreateNNAPIDelegate()); -#else - TFLITE_LOG(WARN) << ""NNAPI acceleration is unsupported on this platform.""; -#endif + Interpreter::TfLiteDelegatePtr delegate = evaluation::CreateNNAPIDelegate(); + if (!delegate) { + TFLITE_LOG(WARN) << ""NNAPI acceleration is unsupported on this platform.""; + } else { + delegates.emplace(""NNAPI"", std::move(delegate)); + } } return delegates; } ",0,test 264e07271385838da8f1ec9e58dddb80ee13800a,tensorflow/tensorflow,"Moves delegate initialization to a common lib for both benchmark tool & eval framework, and utilizes the helpers in TfliteInferenceStage. PiperOrigin-RevId: 244869850",tflite_inference_stage.cc,"@@ -20,6 +20,7 @@ limitations under the License. #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/lite/profiling/time.h"" #include ""tensorflow/lite/tools/evaluation/proto/evaluation_stages.pb.h"" +#include ""tensorflow/lite/tools/evaluation/utils.h"" namespace tflite { namespace evaluation { @@ -57,14 +58,36 @@ TfLiteStatus TfliteInferenceStage::Init() { model_ = FlatBufferModel::BuildFromFile(params.model_file_path().c_str()); resolver_.reset(new ops::builtin::BuiltinOpResolver); InterpreterBuilder(*model_, *resolver_)(&interpreter_); - if (params.delegate() == TfliteInferenceParams::NNAPI) { - interpreter_->UseNNAPI(true); - } if (!interpreter_) { LOG(ERROR) << ""Could not build interpreter""; return kTfLiteError; } interpreter_->SetNumThreads(params.num_threads()); + + // TODO(b/122482115): Add support for multiple delegates in + // TfLiteInferenceParams. + if (params.delegate() == TfliteInferenceParams::NNAPI) { + Interpreter::TfLiteDelegatePtr delegate = CreateNNAPIDelegate(); + if (delegate) { + delegates_.push_back(std::move(delegate)); + } else { + LOG(WARNING) << ""NNAPI not supported""; + } + } else if (params.delegate() == TfliteInferenceParams::GPU) { + Interpreter::TfLiteDelegatePtr delegate = CreateGPUDelegate(model_.get()); + if (!delegate) { + delegates_.push_back(std::move(delegate)); + } else { + LOG(WARNING) << ""GPU not supported""; + } + } + for (int i = 0; i < delegates_.size(); ++i) { + if (interpreter_->ModifyGraphWithDelegate(delegates_[i].get()) != + kTfLiteOk) { + LOG(FATAL) << ""Failed to apply delegate %d"" << i; + } + } + interpreter_->AllocateTensors(); model_info_ = GetTfliteModelInfo(*interpreter_); ",0,test 264e07271385838da8f1ec9e58dddb80ee13800a,tensorflow/tensorflow,"Moves delegate initialization to a common lib for both benchmark tool & eval framework, and utilizes the helpers in TfliteInferenceStage. PiperOrigin-RevId: 244869850",tflite_inference_stage.h,"@@ -67,6 +67,7 @@ class TfliteInferenceStage : public EvaluationStage { std::unique_ptr model_; std::unique_ptr resolver_; std::unique_ptr interpreter_; + std::vector delegates_; TfLiteModelInfo model_info_; std::vector* inputs_ = nullptr; ",0,test 264e07271385838da8f1ec9e58dddb80ee13800a,tensorflow/tensorflow,"Moves delegate initialization to a common lib for both benchmark tool & eval framework, and utilizes the helpers in TfliteInferenceStage. PiperOrigin-RevId: 244869850",utils.cc,"@@ -22,6 +22,11 @@ limitations under the License. #include #include ""tensorflow/core/platform/logging.h"" +#include ""tensorflow/lite/delegates/nnapi/nnapi_delegate.h"" + +#if defined(__ANDROID__) +#include ""tensorflow/lite/delegates/gpu/gl_delegate.h"" +#endif namespace tflite { namespace evaluation { @@ -44,5 +49,32 @@ bool ReadFileLines(const std::string& file_path, return true; } +Interpreter::TfLiteDelegatePtr CreateNNAPIDelegate() { +#if defined(__ANDROID__) + return Interpreter::TfLiteDelegatePtr( + NnApiDelegate(), + // NnApiDelegate() returns a singleton, so provide a no-op deleter. + [](TfLiteDelegate*) {}); +#else + return Interpreter::TfLiteDelegatePtr(nullptr, [](TfLiteDelegate*) {}); +#endif // defined(__ANDROID__) +} + +Interpreter::TfLiteDelegatePtr CreateGPUDelegate( + tflite::FlatBufferModel* model) { +#if defined(__ANDROID__) + TfLiteGpuDelegateOptions options; + options.metadata = TfLiteGpuDelegateGetModelMetadata(model->GetModel()); + options.compile_options.precision_loss_allowed = 1; + options.compile_options.preferred_gl_object_type = + TFLITE_GL_OBJECT_TYPE_FASTEST; + options.compile_options.dynamic_batch_enabled = 0; + return Interpreter::TfLiteDelegatePtr(TfLiteGpuDelegateCreate(&options), + &TfLiteGpuDelegateDelete); +#else + return Interpreter::TfLiteDelegatePtr(nullptr, [](TfLiteDelegate*) {}); +#endif // defined(__ANDROID__) +} + } // namespace evaluation } // namespace tflite ",0,test 264e07271385838da8f1ec9e58dddb80ee13800a,tensorflow/tensorflow,"Moves delegate initialization to a common lib for both benchmark tool & eval framework, and utilizes the helpers in TfliteInferenceStage. PiperOrigin-RevId: 244869850",utils.h,"@@ -19,10 +19,17 @@ limitations under the License. #include #include +#include ""tensorflow/lite/model.h"" + namespace tflite { namespace evaluation { bool ReadFileLines(const std::string& file_path, std::vector* lines_output); + +Interpreter::TfLiteDelegatePtr CreateNNAPIDelegate(); + +Interpreter::TfLiteDelegatePtr CreateGPUDelegate(FlatBufferModel* model); + } // namespace evaluation } // namespace tflite ",0,test 7dd20b844ced19610f8fa67be61d93948563ac43,tensorflow/tensorflow,"Convert Python Variable objects to tensors in custom_gradient, which allows nested custom_gradient functions. This allows a custom_gradient wrapped function to call through to another custom_gradient wrapped function. PiperOrigin-RevId: 237295007",custom_gradient.py,"@@ -238,6 +238,9 @@ def _graph_mode_decorator(f, *args, **kwargs): original_tensors = all_tensors with ops.get_default_graph().gradient_override_map({""IdentityN"": name}): all_tensors = array_ops.identity_n(all_tensors) + + original_tensors = [ops.convert_to_tensor(x) for x in original_tensors] + # Propagate handle data for happier shape inference for resource variables. for i, t in enumerate(original_tensors): if t.dtype == dtypes.resource and hasattr(t, ""_handle_data""): ",0,train 7dd20b844ced19610f8fa67be61d93948563ac43,tensorflow/tensorflow,"Convert Python Variable objects to tensors in custom_gradient, which allows nested custom_gradient functions. This allows a custom_gradient wrapped function to call through to another custom_gradient wrapped function. PiperOrigin-RevId: 237295007",gradients_test.py,"@@ -1033,6 +1033,42 @@ class CustomGradientTest(test_util.TensorFlowTestCase): self.assertAllEqual(g.eval(), [2.0]) self.assertAllEqual(g.eval(feed_dict={conditional: False}), [3.0]) + def testRecursiveCustomGradient(self): + @custom_gradient.custom_gradient + def F(x): + out = core_layers.dense(x, 3, use_bias=False) + + def Grad(out_grad, variables=None): # pylint: disable=redefined-outer-name + self.assertEqual(1, len(variables)) + grads = gradients.gradients(out, [x, variables[0]], grad_ys=out_grad) + return grads[0], [array_ops.ones((4, 3))] + + return out, Grad + + @custom_gradient.custom_gradient + def DoubleF(x): + out = F(x) + + def Grad(out_grad, variables=None): # pylint: disable=redefined-outer-name + self.assertEqual(1, len(variables)) + grads = gradients.gradients(out, [x, variables[0]], grad_ys=out_grad) + return grads[0], [array_ops.ones((4, 3))] + + return out, Grad + with ops.Graph().as_default(): + x = array_ops.ones((2, 4)) + with variable_scope.variable_scope(""f"", use_resource=True) as vs: + y = DoubleF(x) + all_vars = vs.global_variables() + assert len(all_vars) == 1 + grads = gradients.gradients(y, [x, all_vars[0]]) + for g in grads: + self.assertIsNotNone(g) + with session.Session() as sess: + self.evaluate(variables.global_variables_initializer()) + dw = sess.run(math_ops.reduce_sum(grads[1])) + self.assertEqual(12., dw) + class AggregateIndexedSlicesGradientsTest(test_util.TensorFlowTestCase): ",0,train bc84ebd3b0d49debca3a886d62b76a95698f1a1f,tensorflow/tensorflow,"Make sure the buffers in NNAPI shared memory pool are 16 bytes aligned. PiperOrigin-RevId: 245247027",nnapi_delegate.cc,"@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include ""tensorflow/lite/delegates/nnapi/nnapi_delegate.h"" + #include #include #include @@ -23,7 +25,6 @@ limitations under the License. #include ""tensorflow/lite/builtin_ops.h"" #include ""tensorflow/lite/c/c_api_internal.h"" #include ""tensorflow/lite/context_util.h"" -#include ""tensorflow/lite/delegates/nnapi/nnapi_delegate.h"" #include ""tensorflow/lite/kernels/kernel_util.h"" #include ""tensorflow/lite/nnapi/nnapi_implementation.h"" @@ -116,7 +117,16 @@ bool IsRestrictedScalesCompliant(const TfLiteContext* context, constexpr int32_t kMinSdkVersionForNNAPI = 27; constexpr int32_t kMinSdkVersionForNNAPI11 = 28; constexpr int32_t kMinSdkVersionForNNAPI12 = 29; +constexpr size_t kDefaultByteAlignmentForNNAPI = 16; +static size_t getNumPaddingBytes(size_t byte_size) { + size_t num_padding_bytes = 0; + if (byte_size % kDefaultByteAlignmentForNNAPI) { + num_padding_bytes = kDefaultByteAlignmentForNNAPI - + (byte_size % kDefaultByteAlignmentForNNAPI); + } + return num_padding_bytes; +} } // namespace // RAII NN API Model Destructor for use with std::unique_ptr @@ -1156,6 +1166,7 @@ class NNAPIDelegateKernel { execution, relative_input_index, nullptr, nn_input_memory_->get_handle(), input_offset, tensor->bytes)); input_offset += tensor->bytes; + input_offset += getNumPaddingBytes(tensor->bytes); relative_input_index++; } } @@ -1171,6 +1182,7 @@ class NNAPIDelegateKernel { execution, relative_output_index, nullptr, nn_output_memory_->get_handle(), output_offset, tensor->bytes)); output_offset += tensor->bytes; + output_offset += getNumPaddingBytes(tensor->bytes); relative_output_index++; } @@ -1210,6 +1222,7 @@ class NNAPIDelegateKernel { memcpy(tensor->data.raw, nn_output_memory_->get_data_ptr() + output_offset, tensor->bytes); output_offset += tensor->bytes; + output_offset += getNumPaddingBytes(tensor->bytes); } return kTfLiteOk; @@ -1376,6 +1389,7 @@ class NNAPIDelegateKernel { context->tensors[i].allocation_type != kTfLiteMmapRo) { inputs.push_back(operand_mapping_.lite_index_to_ann(i)); total_input_byte_size += context->tensors[i].bytes; + total_input_byte_size += getNumPaddingBytes(context->tensors[i].bytes); } } @@ -1383,6 +1397,7 @@ class NNAPIDelegateKernel { for (int i : TfLiteIntArrayView(output_tensors)) { outputs.push_back(operand_mapping_.lite_index_to_ann(i)); total_output_byte_size += context->tensors[i].bytes; + total_output_byte_size += getNumPaddingBytes(context->tensors[i].bytes); } // Add state output tensors as model outputs. ",0,train 9d93a11431f62a82eda1f314c6c8b2acee1bc1c1,tensorflow/tensorflow,"Bugfix: tf.contrib.distributions.Affine incorrectly computed log-det-jacobian when using `event_ndims=0` and `scale_identity_multiplier` flag. PiperOrigin-RevId: 170887218",affine_test.py,"@@ -829,6 +829,15 @@ class AffineBijectorTest(test.TestCase): x=np.array( [1., 2], dtype=np.float32)) + def testScalarEventIdentityScale(self): + with self.test_session() as sess: + doubler = Affine( + scale_identity_multiplier=2., + event_ndims=0) + doubler2 = doubler.inverse_log_det_jacobian(2.) + doubler2_ildj_ = sess.run([doubler2]) + self.assertAllClose([-np.log(2.)], doubler2_ildj_) + if __name__ == ""__main__"": test.main() ",0,train 9d93a11431f62a82eda1f314c6c8b2acee1bc1c1,tensorflow/tensorflow,"Bugfix: tf.contrib.distributions.Affine incorrectly computed log-det-jacobian when using `event_ndims=0` and `scale_identity_multiplier` flag. PiperOrigin-RevId: 170887218",transformed_distribution_test.py,"@@ -172,6 +172,19 @@ class TransformedDistributionTest(test.TestCase): self.assertAllClose(actual_mvn_entropy, fake_mvn.entropy().eval()) + def testScalarBatchScalarEventIdentityScale(self): + with self.test_session() as sess: + exp2 = self._cls()( + ds.Exponential(rate=0.25), + bijector=ds.bijectors.Affine( + scale_identity_multiplier=2., + event_ndims=0)) + log_prob = exp2.log_prob(1.) + log_prob_ = sess.run(log_prob) + base_log_prob = -0.5 * 0.25 + np.log(0.25) + ildj = np.log(2.) + self.assertAllClose(base_log_prob - ildj, log_prob_, rtol=1e-6, atol=0.) + class ScalarToMultiTest(test.TestCase): ",0,train 9d93a11431f62a82eda1f314c6c8b2acee1bc1c1,tensorflow/tensorflow,"Bugfix: tf.contrib.distributions.Affine incorrectly computed log-det-jacobian when using `event_ndims=0` and `scale_identity_multiplier` flag. PiperOrigin-RevId: 170887218",affine_impl.py,"@@ -388,10 +388,11 @@ class Affine(bijector.Bijector): if self._is_only_identity_multiplier: # We don't pad in this case and instead let the fldj be applied # via broadcast. - d = math_ops.cast(array_ops.shape(x)[-1], dtype=self._scale.dtype) - one = ops.convert_to_tensor(1., self._scale.dtype) - return math_ops.log(math_ops.abs(self._scale)) * array_ops.where( - math_ops.equal(self._shaper.event_ndims, 0), one, d) + event_size = distribution_util.pick_vector( + math_ops.equal(self._shaper.event_ndims, 0), + [1], array_ops.shape(x))[-1] + event_size = math_ops.cast(event_size, dtype=self._scale.dtype) + return math_ops.log(math_ops.abs(self._scale)) * event_size return self.scale.log_abs_determinant() def _maybe_check_scale(self): ",0,train 26c01423d9a4c4b17993f636764e9c1ee5ea1c4f,tensorflow/tensorflow,"Fix windows test. The problem was that LocalTempFilename returns a full path, not just a basename, so we were joining two full paths, which doesn't work on Windows. PiperOrigin-RevId: 324094697 Change-Id: I563f692ba6525097c95fda2a45ee9d565ac9c8a1",journal_test.cc,"@@ -29,11 +29,11 @@ namespace { using ::testing::HasSubstr; bool NewJournalDir(std::string* journal_dir) { - std::string filename; - if (!Env::Default()->LocalTempFilename(&filename)) { + std::string filename = testing::TmpDir(); + if (!Env::Default()->CreateUniqueFileName(&filename, ""journal_dir"")) { return false; } - *journal_dir = io::JoinPath(testing::TmpDir(), filename); + *journal_dir = filename; return true; } ",0,train f3afacffc3c6b431677a69754bb69a7791261318,tensorflow/tensorflow,Fix bug in hadoop_file_system.cc when reading big variable of hdfs,hadoop_file_system.cc,"@@ -209,8 +209,9 @@ class HDFSRandomAccessFile : public RandomAccessFile { // We lock inside the loop rather than outside so we don't block other // concurrent readers. mutex_lock lock(mu_); + size_t read_n = std::min(n,static_cast(std::numeric_limits::max()-2)); tSize r = hdfs_->hdfsPread(fs_, file_, static_cast(offset), dst, - static_cast(n)); + static_cast(read_n)); if (r > 0) { dst += r; n -= r; ",0,test 7c1123eac5fb2409d76001d0113a704aa3e824da,tensorflow/tensorflow,Address review comments,xla_gpu_device.cc,"@@ -16,14 +16,16 @@ limitations under the License. // Registers the XLA_GPU device, which is an XlaDevice instantiation that runs // operators using XLA via the XLA ""CUDA"" (GPU) backend. +#include #include ""absl/memory/memory.h"" +#include ""absl/strings/numbers.h"" +#include ""absl/strings/str_split.h"" #include ""tensorflow/compiler/jit/kernels/xla_ops.h"" #include ""tensorflow/compiler/jit/xla_device.h"" #include ""tensorflow/compiler/jit/xla_device_ops.h"" #include ""tensorflow/compiler/tf2xla/xla_op_registry.h"" #include ""tensorflow/core/common_runtime/device_factory.h"" #include ""tensorflow/core/lib/core/status.h"" -#include ""tensorflow/core/lib/strings/str_util.h"" namespace tensorflow { @@ -53,20 +55,21 @@ Status XlaGpuDeviceFactory::CreateDevices( VLOG(1) << ""Failed to create XLA_GPU device: "" << platform.status(); return Status::OK(); } - const auto& allowed_gpus = + string allowed_gpus = session_options.config.gpu_options().visible_device_list(); - std::unordered_set gpu_ids; + std::set gpu_ids; int num_visible_devices = platform.ValueOrDie()->VisibleDeviceCount(); if (allowed_gpus.empty()) { for (int i = 0; i < num_visible_devices; ++i) gpu_ids.insert(i); } else { + // For loop below is copied from gpu/gpu_device.cc. It validates + // configuration string. It should be redundant since code would fail there + // before it gets to here. const std::vector visible_devices = - str_util::Split(allowed_gpus, ','); - // copied from gpu/gpu_device.cc Should be redundant since code would fail - // there before it gets to here. + absl::StrSplit(allowed_gpus, ','); for (const string& platform_gpu_id_str : visible_devices) { int32 platform_gpu_id; - if (!strings::safe_strto32(platform_gpu_id_str, &platform_gpu_id)) { + if (!absl::SimpleAtoi(platform_gpu_id_str, &platform_gpu_id)) { return errors::InvalidArgument( ""Could not parse entry in 'visible_device_list': '"", platform_gpu_id_str, ""'. visible_device_list = "", allowed_gpus); @@ -79,8 +82,8 @@ Status XlaGpuDeviceFactory::CreateDevices( gpu_ids.insert(platform_gpu_id); } } - for (int i = 0; i < num_visible_devices; ++i) { - if (gpu_ids.count(i) == 0) continue; + for (const auto i : gpu_ids) { + // Skip devices that are not in the set. XlaDevice::Options options; options.platform = platform.ValueOrDie(); options.device_name_prefix = name_prefix; ",0,train 4ab315314078b043240908209086fb64f5260cc5,tensorflow/tensorflow,"Deprecate random_binomial in favor of random_bernoulli. PiperOrigin-RevId: 299205387 Change-Id: Id347f893f8e8fd6bf573c62827e96ec4d1de3343",backend.py,"@@ -79,6 +79,7 @@ from tensorflow.python.util import nest from tensorflow.python.util import object_identity from tensorflow.python.util import tf_contextlib from tensorflow.python.util import tf_inspect +from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.tf_export import keras_export py_all = all @@ -5703,10 +5704,13 @@ def random_uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None): shape, minval=minval, maxval=maxval, dtype=dtype, seed=seed) +@deprecated(None, 'Use `tf.keras.backend.random_bernoulli` instead.') @keras_export('keras.backend.random_binomial') def random_binomial(shape, p=0.0, dtype=None, seed=None): """"""Returns a tensor with random binomial distribution of values. + DEPRECATED, use `tf.keras.backend.random_bernoulli` instead. + The binomial distribution with parameters `n` and `p` is the probability distribution of the number of successful Bernoulli process. Only supports `n` = 1 for now. @@ -5729,6 +5733,22 @@ def random_binomial(shape, p=0.0, dtype=None, seed=None): array_ops.ones(shape, dtype=dtype), array_ops.zeros(shape, dtype=dtype)) +@keras_export('keras.backend.random_bernoulli') +def random_bernoulli(shape, p=0.0, dtype=None, seed=None): + """"""Returns a tensor with random bernoulli distribution of values. + + Arguments: + shape: A tuple of integers, the shape of tensor to create. + p: A float, `0. <= p <= 1`, probability of bernoulli distribution. + dtype: String, dtype of returned tensor. + seed: Integer, random seed. + + Returns: + A tensor. + """""" + return random_binomial(shape, p, dtype, seed) + + @keras_export('keras.backend.truncated_normal') def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None): """"""Returns a tensor with truncated random normal distribution of values. ",0,train f5d9e2c9d7a23ffb92e94032b1b39b82795c129a,tensorflow/tensorflow,Fix space,ragged_constant_value_op_test.py,"@@ -73,7 +73,7 @@ class RaggedConstantValueOpTest(test_util.TensorFlowTestCase, np.array([]), [[5, 6], [7, 8], [9, 0]]], ragged_rank=1, expected_shape=(3, None, 2)), - dict( + dict( pylist=[np.array([3, np.array(4)]), [[1, 2]], np.array([]), [[5, 6], [7, 8], [9, 0]]], ragged_rank=1, ",0,test 821116f72752cb9545565cb57e276e14e45e4973,tensorflow/tensorflow,Tensor boolean strictness fix for skflow.,losses_ops.py,"@@ -46,7 +46,7 @@ def softmax_classifier(tensor_in, labels, weights, biases, class_weight=None, na """""" with tf.op_scope([tensor_in, labels], name, ""softmax_classifier""): logits = tf.nn.xw_plus_b(tensor_in, weights, biases) - if class_weight: + if class_weight is not None: logits = tf.mul(logits, class_weight) xent = tf.nn.softmax_cross_entropy_with_logits(logits, labels, @@ -54,4 +54,3 @@ def softmax_classifier(tensor_in, labels, weights, biases, class_weight=None, na loss = tf.reduce_mean(xent, name=""xent"") predictions = tf.nn.softmax(logits, name=name) return predictions, loss - ",0,train 821116f72752cb9545565cb57e276e14e45e4973,tensorflow/tensorflow,Tensor boolean strictness fix for skflow.,trainer.py,"@@ -121,7 +121,7 @@ class TensorFlowTrainer(object): """""" for step in xrange(steps): feed_dict = feed_dict_fn() - if summaries: + if summaries is not None: global_step, loss, summ, _ = sess.run( [self.global_step, self.loss, summaries, self.trainer], feed_dict=feed_dict) @@ -131,7 +131,7 @@ class TensorFlowTrainer(object): feed_dict=feed_dict) monitor.update(step, global_step, loss, sess, feed_params_fn, loss_expression_tensor=self.loss) - if summaries and summary_writer and summ is not None: + if summaries is not None and summary_writer and summ is not None: summary_writer.add_summary(summ, global_step) if monitor.monitor_inducing_stop(): break ",0,train 2050596162cddc19a21ba1e880bf7a5959c80841,tensorflow/tensorflow,"iOS Metal GPU delegate: reshape tests added. PiperOrigin-RevId: 269413768",model.h,"@@ -579,6 +579,17 @@ Status ConnectTwoNodes(Graph* graph, const Node* from_node, using GraphFloat32 = Model>; +// @return true if all tensors have same batch value. +inline bool IsBatchMatchesForAllValues(const GraphFloat32& model) { + const int32_t b = model.values()[0]->tensor.shape.b; + for (auto value : model.values()) { + if (value->tensor.shape.b != b) { + return false; + } + } + return true; +} + } // namespace gpu } // namespace tflite ",0,train 2050596162cddc19a21ba1e880bf7a5959c80841,tensorflow/tensorflow,"iOS Metal GPU delegate: reshape tests added. PiperOrigin-RevId: 269413768",api.cc,"@@ -369,18 +369,6 @@ class CompiledModelImpl std::unordered_map object_sizes_; CompilerStats stats_; }; - -// @return true if all tensors have same batch value. -bool IsBatchMatchesForAllValues(const GraphFloat32& model) { - const int32_t b = model.values()[0]->tensor.shape.b; - for (auto value : model.values()) { - if (value->tensor.shape.b != b) { - return false; - } - } - return true; -} - } // namespace Status Compile(const CompilationOptions& options, const GraphFloat32& model, ",0,train 2050596162cddc19a21ba1e880bf7a5959c80841,tensorflow/tensorflow,"iOS Metal GPU delegate: reshape tests added. PiperOrigin-RevId: 269413768",api.cc,"@@ -122,6 +122,9 @@ Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, const std::vector& outputs, const RuntimeOptions& options, std::vector* tasks) { + if (!IsBatchMatchesForAllValues(graph)) { + return InvalidArgumentError(""Only identical batch dimension is supported""); + } int node_id = static_cast(node->id); auto op_type = OperationTypeFromString(node->operation.type); switch (op_type) { ",0,train 96ba1c3609b8a0210bdc72c2ba339cf81831f998,tensorflow/tensorflow,"[XLA] BufferValue::Color now type aliases int64. PiperOrigin-RevId: 313404227 Change-Id: I2d393d426865c61ff210f10e3d9b8402a1813cf1",buffer_assignment.cc,"@@ -261,7 +261,7 @@ void BufferAllocation::AddAssignment(const HloValue& buffer, int64 offset, Shape* shape = ShapeUtil::GetMutableSubshape( position.instruction->mutable_shape(), position.index); if (shape->has_layout()) { - shape->mutable_layout()->set_memory_space(buffer.color().value()); + shape->mutable_layout()->set_memory_space(buffer.color()); } } } @@ -272,7 +272,7 @@ BufferAllocationProto BufferAllocation::ToProto() const { proto.set_size(size_); proto.set_is_thread_local(is_thread_local_); proto.set_is_tuple(is_tuple_); - proto.set_color(color_.value()); + proto.set_color(color_); if (is_entry_computation_parameter_) { proto.set_is_entry_computation_parameter(true); for (int64 idx : param_shape_index()) { @@ -336,8 +336,8 @@ static const HloInstruction* GetOutputInstruction( string BufferAllocation::ToString() const { string output; StrAppendFormat(&output, ""allocation %d: %p, size %d"", index_, this, size()); - if (color().value() != 0) { - StrAppend(&output, "", color "", color().value()); + if (color() != 0) { + StrAppend(&output, "", color "", color()); } if (is_entry_computation_parameter()) { const HloInstruction* param = GetEntryParameterInstruction(*this); @@ -607,9 +607,7 @@ void BufferAssignment::AddAssignment(BufferAllocation* allocation, // BufferAllocation. void BufferAssignment::CombineTempAllocations() { VLOG(1) << ""CombineTempAllocations()""; - flat_hash_map - combined_allocation_map; + flat_hash_map combined_allocation_map; // Move all temp allocations into a single run at the end of the allocations // vector. @@ -1059,8 +1057,8 @@ Status BufferAssigner::MergeInplaceOpBuffers(BufferAssignment* assignment) { // The instruction or operand color is excluded because it was assigned by // memory_space_assignment. - if (excluded_colors.contains(instruction_buffer.color().value()) || - excluded_colors.contains(operand_buffer.color().value())) { + if (excluded_colors.contains(instruction_buffer.color()) || + excluded_colors.contains(operand_buffer.color())) { continue; } @@ -1353,13 +1351,10 @@ Status BufferAssigner::AssignBuffersForComputations( return Status::OK(); } -flat_hash_map, - LogicalBuffer::Color::Hasher> +flat_hash_map> BufferAssigner::SplitBuffersByColor( const flat_hash_set& buffers) { - flat_hash_map, - LogicalBuffer::Color::Hasher> - color_map; + flat_hash_map> color_map; for (auto buffer : buffers) { color_map[buffer->color()].insert(buffer); } @@ -1374,8 +1369,7 @@ Status BufferAssigner::AssignPresetBuffers( } // Create an allocation for each preset color. - absl::flat_hash_map + absl::flat_hash_map preset_allocations; for (auto& color_and_info : preset_assignments_->assignment_informations()) { LogicalBuffer::Color color(color_and_info.first); ",0,train 96ba1c3609b8a0210bdc72c2ba339cf81831f998,tensorflow/tensorflow,"[XLA] BufferValue::Color now type aliases int64. PiperOrigin-RevId: 313404227 Change-Id: I2d393d426865c61ff210f10e3d9b8402a1813cf1",buffer_assignment.h,"@@ -673,8 +673,7 @@ class BufferAssigner { // Split a set of buffers into several sets, each of which contains buffers // colored with the same color. absl::flat_hash_map, - LogicalBuffer::Color::Hasher> + absl::flat_hash_set> SplitBuffersByColor(const absl::flat_hash_set& buffers); // If true, allocate buffers for constant instructions. ",0,train 96ba1c3609b8a0210bdc72c2ba339cf81831f998,tensorflow/tensorflow,"[XLA] BufferValue::Color now type aliases int64. PiperOrigin-RevId: 313404227 Change-Id: I2d393d426865c61ff210f10e3d9b8402a1813cf1",buffer_value.cc,"@@ -59,7 +59,7 @@ LogicalBufferProto BufferValue::ToProto(const SizeFunction& size_fn) const { ToLocationProto(*instruction(), index()); proto.mutable_defined_at()->Swap(&proto_location); if (has_color()) { - proto.set_color(color().value()); + proto.set_color(color()); } return proto; } ",0,train 96ba1c3609b8a0210bdc72c2ba339cf81831f998,tensorflow/tensorflow,"[XLA] BufferValue::Color now type aliases int64. PiperOrigin-RevId: 313404227 Change-Id: I2d393d426865c61ff210f10e3d9b8402a1813cf1",buffer_value.h,"@@ -25,7 +25,6 @@ limitations under the License. #include ""tensorflow/compiler/xla/shape_util.h"" #include ""tensorflow/compiler/xla/types.h"" #include ""tensorflow/compiler/xla/xla_data.pb.h"" -#include ""tensorflow/core/lib/gtl/int_type.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/macros.h"" #include ""tensorflow/core/platform/types.h"" @@ -86,7 +85,7 @@ namespace xla { class BufferValue { public: - TF_LIB_GTL_DEFINE_INT_TYPE(Color, int64); + using Color = int64; // Id is a unique identifier for the BufferValue to facilitate efficient // collections of BufferValues with stable iteration order. @@ -154,7 +153,7 @@ class BufferValue { static LogicalBufferProto::Location ToLocationProto( const HloInstruction& instruction, const ShapeIndex& index); - const Color kInvalidColor = Color(-1); + const Color kInvalidColor = -1; protected: BufferValue(HloInstruction* instruction, const ShapeIndex& index, Id id); ",0,train 96ba1c3609b8a0210bdc72c2ba339cf81831f998,tensorflow/tensorflow,"[XLA] BufferValue::Color now type aliases int64. PiperOrigin-RevId: 313404227 Change-Id: I2d393d426865c61ff210f10e3d9b8402a1813cf1",hlo_value.cc,"@@ -91,8 +91,7 @@ string HloValue::ToShortString() const { return absl::StrFormat( ""<%d %s%s%s%s>"", id(), instruction()->name(), instruction()->shape().IsTuple() ? index().ToString() : """", - is_phi() ? "" (phi)"" : """", - has_color() ? StrCat("" @"", color().value()) : """"); + is_phi() ? "" (phi)"" : """", has_color() ? StrCat("" @"", color()) : """"); } string HloValue::ToString(int indent) const { ",0,train 96ba1c3609b8a0210bdc72c2ba339cf81831f998,tensorflow/tensorflow,"[XLA] BufferValue::Color now type aliases int64. PiperOrigin-RevId: 313404227 Change-Id: I2d393d426865c61ff210f10e3d9b8402a1813cf1",logical_buffer.cc,"@@ -34,7 +34,7 @@ LogicalBuffer::~LogicalBuffer() {} string LogicalBuffer::ToString() const { string color_string; if (has_color()) { - color_string = absl::StrCat("" @"", color().value()); + color_string = absl::StrCat("" @"", color()); } return absl::StrCat(instruction_->name(), ""["", absl::StrJoin(index_, "",""), ""](#"", id(), color_string, "")""); ",0,train 4727d0180fb693fb7cfd70b372b606752f8efa45,tensorflow/tensorflow,"Split tensorflow.python.tpu module doctests into different target. These TPU tests do not yet run in OSS. PiperOrigin-RevId: 310959419 Change-Id: I2a1662e52f25da9c4c58c018c83729dc6da9008d",tpu_embedding.py,"@@ -828,7 +828,7 @@ class TPUEmbedding(object): ... end_learning_rate=0.0) >>> wordpiece_table_config = TableConfig( ... vocabulary_size=119547, - ... dimension=768, + ... dimension=256, ... learning_rate_fn=learning_rate_fn) >>> wordpiece_feature_config = FeatureConfig( ... table_id='bert/embeddings/word_embeddings', @@ -846,11 +846,11 @@ class TPUEmbedding(object): ... batch_size=128, ... mode=TRAINING, ... optimization_parameters=optimization_parameters, - ... device_config=DeviceConfig( - ... num_cores=64, num_hosts=4, job_name='tpu_worker')) + ... master='') >>> with tf.Graph().as_default(): ... init_tpu_op = tf.compat.v1.tpu.initialize_system( - ... embedding_config=tpu_embedding.config_proto, job='tpu_worker') + ... embedding_config=tpu_embedding.config_proto) + ... tf.compat.v1.Session().run(init_tpu_op) """""" # TODO(shizhiw): Consider adding a field to FeatureConfig that indicates that ",0,train 4727d0180fb693fb7cfd70b372b606752f8efa45,tensorflow/tensorflow,"Split tensorflow.python.tpu module doctests into different target. These TPU tests do not yet run in OSS. PiperOrigin-RevId: 310959419 Change-Id: I2a1662e52f25da9c4c58c018c83729dc6da9008d",tf_doctest.py,"@@ -43,6 +43,8 @@ tf.keras.preprocessing = preprocessing FLAGS = flags.FLAGS flags.DEFINE_string('module', None, 'A specific module to run doctest on.') +flags.DEFINE_list('module_prefix_skip', [], + 'A list of modules to ignore when resolving modules.') flags.DEFINE_boolean('list', None, 'List all the modules in the core package imported.') flags.DEFINE_string('file', None, 'A specific file to run doctest on.') @@ -50,6 +52,7 @@ flags.DEFINE_string('file', None, 'A specific file to run doctest on.') flags.mark_flags_as_mutual_exclusive(['module', 'file']) flags.mark_flags_as_mutual_exclusive(['list', 'file']) +# Both --module and --module_prefix_skip are relative to PACKAGE. PACKAGE = 'tensorflow.python.' @@ -140,6 +143,9 @@ def load_tests(unused_loader, tests, unused_ignore): tf_modules = get_module_and_inject_docstring(FLAGS.file) for module in tf_modules: + if any(module.__name__.startswith(PACKAGE + prefix) + for prefix in FLAGS.module_prefix_skip): + continue testcase = TfTestCase() tests.addTests( doctest.DocTestSuite( ",0,train 71f86a96994e66280ff6a862594ebfd9ee1dc6d7,tensorflow/tensorflow,"Remove an old TODO in benchmark_tflite_model.cc. For string tensors, don't try populate its content. PiperOrigin-RevId: 369907367 Change-Id: Id926d787ce5ccf0b511d017826d1345b71e511d3",benchmark_tflite_model.cc,"@@ -562,7 +562,8 @@ BenchmarkTfLiteModel::CreateRandomTensorData(const TfLiteTensor& t, num_elements, std::uniform_int_distribution(low, high)); } case kTfLiteString: { - // TODO(haoliang): No need to cache string tensors right now. + // Don't populate input for string. Instead, return a default-initialized + // `InputTensorData` object directly. break; } case kTfLiteBool: { ",0,train 50316308851f9e6049167dc6b475e0f9a9a4274d,tensorflow/tensorflow,"Fixup output shape for IntegerLookup/StringLookup layers This makes the following fixes for BINARY and COUNT output - Fixes compute_output_shape and compute_output_signature - Properly propogates batch shape for dense inputs - Adds test coverage PiperOrigin-RevId: 355071871 Change-Id: I7820763100b643b8cd12908caf416aae1c4a1f14",category_encoding.py,"@@ -534,5 +534,6 @@ def dense_bincount(inputs, out_depth, binary_output, count_weights=None): dtype=K.floatx(), axis=-1, binary_output=binary_output) - result.set_shape(tensor_shape.TensorShape((None, out_depth))) + batch_size = inputs.shape.as_list()[0] + result.set_shape(tensor_shape.TensorShape((batch_size, out_depth))) return result ",0,test 50316308851f9e6049167dc6b475e0f9a9a4274d,tensorflow/tensorflow,"Fixup output shape for IntegerLookup/StringLookup layers This makes the following fixes for BINARY and COUNT output - Fixes compute_output_shape and compute_output_signature - Properly propogates batch shape for dense inputs - Adds test coverage PiperOrigin-RevId: 355071871 Change-Id: I7820763100b643b8cd12908caf416aae1c4a1f14",index_lookup.py,"@@ -27,6 +27,7 @@ import numpy as np from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec +from tensorflow.python.keras import backend as K from tensorflow.python.keras.engine import base_preprocessing_layer from tensorflow.python.keras.layers.preprocessing import category_encoding from tensorflow.python.keras.layers.preprocessing import table_utils @@ -160,22 +161,20 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer): super(IndexLookup, self).__init__( combiner=_IndexLookupCombiner(vocab_size, self.mask_token), **kwargs) - self._output_dtype = dtypes.int64 - # We need to save the key dtype so that we know if we're expecting int64 # keys. If we are, we will cast int32 inputs to int64 as well. if invert: - self._key_dtype = self._output_dtype - value_dtype = self.dtype + self._key_dtype = dtypes.int64 + self._value_dtype = self.dtype oov_value = self.oov_token else: self._key_dtype = self.dtype - value_dtype = self._output_dtype + self._value_dtype = dtypes.int64 oov_value = self._oov_value self._table = lookup_ops.MutableHashTable( key_dtype=self._key_dtype, - value_dtype=value_dtype, + value_dtype=self._value_dtype, default_value=oov_value, name=(self._name + ""_index_table"")) tracked_table = self._add_trackable(self._table, trainable=False) @@ -201,11 +200,14 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer): self.set_vocabulary(vocabulary) def compute_output_shape(self, input_shape): + if self.output_mode != INT: + return tensor_shape.TensorShape([input_shape[0], self.max_tokens]) + return input_shape def compute_output_signature(self, input_spec): output_shape = self.compute_output_shape(input_spec.shape.as_list()) - output_dtype = self.dtype if self.invert else self._output_dtype + output_dtype = self._value_dtype if self.output_mode == INT else K.floatx() return tensor_spec.TensorSpec(shape=output_shape, dtype=output_dtype) def adapt(self, data, reset_state=True): ",0,test 50316308851f9e6049167dc6b475e0f9a9a4274d,tensorflow/tensorflow,"Fixup output shape for IntegerLookup/StringLookup layers This makes the following fixes for BINARY and COUNT output - Fixes compute_output_shape and compute_output_signature - Properly propogates batch shape for dense inputs - Adds test coverage PiperOrigin-RevId: 355071871 Change-Id: I7820763100b643b8cd12908caf416aae1c4a1f14",index_lookup_test.py,"@@ -618,8 +618,8 @@ class IndexLookupOutputTest(keras_parameterized.TestCase, output_dataset = model.predict(input_array) self.assertAllEqual(expected_output, output_dataset) - def test_output_shape(self): - input_data = keras.Input(shape=(4,), dtype=dtypes.string) + def test_int_output_shape(self): + input_data = keras.Input(batch_size=16, shape=(4,), dtype=dtypes.string) layer = get_layer_class()( max_tokens=2, num_oov_indices=1, @@ -627,7 +627,7 @@ class IndexLookupOutputTest(keras_parameterized.TestCase, oov_token=""[OOV]"", dtype=dtypes.string) int_data = layer(input_data) - self.assertAllEqual(int_data.shape[1:], input_data.shape[1:]) + self.assertAllEqual(int_data.shape.as_list(), [16, 4]) def test_int_output_no_reserved_zero(self): vocab_data = [""earth"", ""wind"", ""and"", ""fire""] @@ -667,6 +667,70 @@ class IndexLookupOutputTest(keras_parameterized.TestCase, output_dataset = model.predict(input_array) self.assertAllEqual(expected_output, output_dataset) + def test_binary_output(self): + vocab_data = [""earth"", ""wind"", ""and"", ""fire""] + input_array = np.array([[""earth"", ""wind"", ""and"", ""fire""], + [""fire"", ""and"", ""earth"", ""michigan""]]) + expected_output = [[0, 0, 1, 1, 1, 1], [0, 1, 1, 0, 1, 1]] + + input_data = keras.Input(shape=(None,), dtype=dtypes.string) + layer = get_layer_class()( + max_tokens=None, + num_oov_indices=1, + mask_token="""", + oov_token=""[OOV]"", + output_mode=index_lookup.BINARY, + dtype=dtypes.string) + layer.set_vocabulary(vocab_data) + binary_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=binary_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_binary_output_shape(self): + input_data = keras.Input(batch_size=16, shape=(4,), dtype=dtypes.string) + layer = get_layer_class()( + max_tokens=2, + num_oov_indices=1, + mask_token="""", + oov_token=""[OOV]"", + output_mode=index_lookup.BINARY, + dtype=dtypes.string) + binary_data = layer(input_data) + self.assertAllEqual(binary_data.shape.as_list(), [16, 2]) + + def test_count_output(self): + vocab_data = [""earth"", ""wind"", ""and"", ""fire""] + input_array = np.array([[""earth"", ""wind"", ""and"", ""wind""], + [""fire"", ""fire"", ""fire"", ""michigan""]]) + expected_output = [[0, 0, 1, 2, 1, 0], [0, 1, 0, 0, 0, 3]] + + input_data = keras.Input(shape=(None,), dtype=dtypes.string) + layer = get_layer_class()( + max_tokens=None, + num_oov_indices=1, + mask_token="""", + oov_token=""[OOV]"", + output_mode=index_lookup.COUNT, + dtype=dtypes.string) + layer.set_vocabulary(vocab_data) + count_data = layer(input_data) + model = keras.Model(inputs=input_data, outputs=count_data) + output_dataset = model.predict(input_array) + self.assertAllEqual(expected_output, output_dataset) + + def test_count_output_shape(self): + input_data = keras.Input(batch_size=16, shape=(4,), dtype=dtypes.string) + layer = get_layer_class()( + max_tokens=2, + num_oov_indices=1, + mask_token="""", + oov_token=""[OOV]"", + output_mode=index_lookup.COUNT, + dtype=dtypes.string) + count_data = layer(input_data) + self.assertAllEqual(count_data.shape.as_list(), [16, 2]) + @keras_parameterized.run_all_keras_modes class IndexLookupVocabularyTest(keras_parameterized.TestCase, ",0,test db0ec387400032d14e16932dea77a59cbdd5755d,tensorflow/tensorflow,"Added flops calculation for most computational intensive kernels. PiperOrigin-RevId: 414808724 Change-Id: I95185d1f5c92368c91bac641dd6c4e89d9d16528",flops_util.cc,"@@ -0,0 +1,54 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/lite/delegates/gpu/common/flops_util.h"" + +namespace tflite { +namespace gpu { + +uint64_t GetConvolutionFlops(const BHWC& dst_shape, const OHWI& weights_shape) { + uint64_t dst_elements = dst_shape.b * dst_shape.h * dst_shape.w * dst_shape.c; + // 2 flops per operation( s = a * b + s); + return dst_elements * weights_shape.i * weights_shape.w * weights_shape.h * 2; +} + +uint64_t GetConvolutionWinograd4x4To6x6Flops(const BHWC& dst_shape, + const OHWI& weights_shape) { + return GetConvolutionFlops(dst_shape, weights_shape) / 4u; +} + +uint64_t GetConvolutionTransposedFlops(const BHWC& src_shape, + const OHWI& weights_shape) { + uint64_t elements = src_shape.b * src_shape.h * src_shape.w * weights_shape.o; + // 2 flops per operation( s = a * b + s); + return elements * weights_shape.i * weights_shape.w * weights_shape.h * 2; +} + +uint64_t GetDepthwiseConvolutionFlops(const BHWC& dst_shape, + const OHWI& weights_shape) { + uint64_t dst_elements = dst_shape.b * dst_shape.h * dst_shape.w * dst_shape.c; + // 2 flops per operation( s = a * b + s); + return dst_elements * weights_shape.w * weights_shape.h * 2; +} + +uint64_t GetFullyConnectedFlops(const BHWC& dst_shape, + const OHWI& weights_shape) { + uint64_t dst_elements = dst_shape.b * dst_shape.h * dst_shape.w * dst_shape.c; + // 2 flops per operation( s = a * b + s); + return dst_elements * weights_shape.i * 2; +} + +} // namespace gpu +} // namespace tflite ",0,train db0ec387400032d14e16932dea77a59cbdd5755d,tensorflow/tensorflow,"Added flops calculation for most computational intensive kernels. PiperOrigin-RevId: 414808724 Change-Id: I95185d1f5c92368c91bac641dd6c4e89d9d16528",flops_util.h,"@@ -0,0 +1,42 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_FLOPS_UTIL_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_FLOPS_UTIL_H_ + +#include + +#include ""tensorflow/lite/delegates/gpu/common/shape.h"" + +namespace tflite { +namespace gpu { + +uint64_t GetConvolutionFlops(const BHWC& dst_shape, const OHWI& weights_shape); +uint64_t GetConvolutionWinograd4x4To6x6Flops(const BHWC& dst_shape, + const OHWI& weights_shape); + +uint64_t GetConvolutionTransposedFlops(const BHWC& src_shape, + const OHWI& weights_shape); + +uint64_t GetDepthwiseConvolutionFlops(const BHWC& dst_shape, + const OHWI& weights_shape); + +uint64_t GetFullyConnectedFlops(const BHWC& dst_shape, + const OHWI& weights_shape); + +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_FLOPS_UTIL_H_ ",0,train db0ec387400032d14e16932dea77a59cbdd5755d,tensorflow/tensorflow,"Added flops calculation for most computational intensive kernels. PiperOrigin-RevId: 414808724 Change-Id: I95185d1f5c92368c91bac641dd6c4e89d9d16528",operation_selector.cc,"@@ -15,9 +15,13 @@ limitations under the License. #include ""tensorflow/lite/delegates/gpu/common/selectors/operation_selector.h"" +#include +#include + #include ""absl/strings/str_cat.h"" #include ""absl/types/any.h"" #include ""tensorflow/lite/delegates/gpu/common/data_type.h"" +#include ""tensorflow/lite/delegates/gpu/common/flops_util.h"" #include ""tensorflow/lite/delegates/gpu/common/gpu_info.h"" #include ""tensorflow/lite/delegates/gpu/common/operations.h"" #include ""tensorflow/lite/delegates/gpu/common/selectors/convolution_selector.h"" @@ -126,6 +130,8 @@ absl::Status WinogradFromNode(const GpuInfo& gpu_info, conv.operation = SelectConvolutionForWinograd(attr, input_shape, gpu_info, conv_def, hints); conv.name = ""convolution_winograd_4x4_6x6""; + conv.operation->flops_ = + GetConvolutionWinograd4x4To6x6Flops(output_shape, attr.weights.shape); OperationDef winograd_down_def; winograd_down_def.precision = op_def.precision; @@ -226,6 +232,9 @@ absl::Status GPUOperationFromNodePart0( attr, weights_shape, dst_shape, gpu_info, conv_def, hints, &conv_weights_desc); conv_op.name = ""mat_mul_as_convolution""; + conv_op.operation->flops_ = GetConvolutionFlops( + outputs[0]->tensor.shape, OHWI(weights_shape.b, weights_shape.h, + weights_shape.w, weights_shape.c)); int aligned_output = AlignByN(weights_shape.b, conv_weights_desc.GetOutputGroupSize() * 4); @@ -330,11 +339,8 @@ absl::Status GPUOperationFromNodePart0( gpu_op = InitSingleOpSubgraph(inputs, outputs, gpu_subgraph); *gpu_op = SelectConvolution(attr, output_shape, gpu_info, op_def, hints); - uint64_t dst_elements = - output_shape.b * output_shape.h * output_shape.w * output_shape.c; - // 2 flops per element, we have for every element multiply and add - (*gpu_op)->flops_ = dst_elements * attr.weights.shape.i * - attr.weights.shape.w * attr.weights.shape.h * 2; + (*gpu_op)->flops_ = + GetConvolutionFlops(output_shape, attr.weights.shape); return absl::OkStatus(); } } else { @@ -359,6 +365,9 @@ absl::Status GPUOperationFromNodePart0( attr, weights_shape, output_shape, gpu_info, conv_def, hints, &conv_weights_desc); conv_op.name = ""convolution_dynamic""; + conv_op.operation->flops_ = GetConvolutionFlops( + outputs[0]->tensor.shape, OHWI(weights_shape.b, weights_shape.h, + weights_shape.w, weights_shape.c)); int aligned_output = AlignByN( weights_shape.b, conv_weights_desc.GetOutputGroupSize() * 4); @@ -386,6 +395,8 @@ absl::Status GPUOperationFromNodePart0( node.operation.attributes); if (inputs.size() == 1) { *gpu_op = SelectConvolutionTransposed(attr, gpu_info, op_def); + (*gpu_op)->flops_ = GetConvolutionTransposedFlops( + inputs[0]->tensor.shape, attr.weights.shape); return absl::OkStatus(); } else { // CONVOLUTION_TRANSPOSED with runtime weights @@ -405,6 +416,8 @@ absl::Status GPUOperationFromNodePart0( attr, gpu_info, op_def, &weights_desc); conv_op.output_ids = {static_cast(outputs[0]->id)}; conv_op.name = ""conv_transposed_dynamic""; + conv_op.operation->flops_ = GetConvolutionTransposedFlops( + inputs[0]->tensor.shape, weights_shape); const int dst_depth = AlignByN(DivideRoundUp(weights_shape.o, 4), weights_desc.GetOutputGroupSize()); @@ -456,6 +469,8 @@ absl::Status GPUOperationFromNodePart0( node.operation.attributes); if (inputs.size() == 1) { *gpu_op = SelectDWConvolution(attr, gpu_info, op_def); + (*gpu_op)->flops_ = GetDepthwiseConvolutionFlops( + outputs[0]->tensor.shape, attr.weights.shape); } else { if (inputs[1]->tensor.shape.b != 1) { return absl::UnimplementedError( @@ -463,6 +478,10 @@ absl::Status GPUOperationFromNodePart0( ""!= 1""); } *gpu_op = SelectDWConvolutionDynamicWeights(attr, gpu_info, op_def); + (*gpu_op)->flops_ = GetDepthwiseConvolutionFlops( + outputs[0]->tensor.shape, + OHWI(inputs[1]->tensor.shape.b, inputs[1]->tensor.shape.h, + inputs[1]->tensor.shape.w, inputs[1]->tensor.shape.c)); } return absl::OkStatus(); } @@ -477,6 +496,8 @@ absl::Status GPUOperationFromNodePart0( absl::any_cast(node.operation.attributes); *gpu_op = SelectFullyConnected(attr, gpu_info, op_def, inputs[0]->tensor.shape.b); + (*gpu_op)->flops_ = + GetFullyConnectedFlops(outputs[0]->tensor.shape, attr.weights.shape); return absl::OkStatus(); } case OperationType::FULLY_CONNECTED_INT8: { ",0,train db0ec387400032d14e16932dea77a59cbdd5755d,tensorflow/tensorflow,"Added flops calculation for most computational intensive kernels. PiperOrigin-RevId: 414808724 Change-Id: I95185d1f5c92368c91bac641dd6c4e89d9d16528",special_selector.cc,"@@ -15,8 +15,12 @@ limitations under the License. #include ""tensorflow/lite/delegates/gpu/common/selectors/special_selector.h"" +#include +#include + #include ""absl/types/any.h"" #include ""tensorflow/lite/delegates/gpu/common/data_type.h"" +#include ""tensorflow/lite/delegates/gpu/common/flops_util.h"" #include ""tensorflow/lite/delegates/gpu/common/operations.h"" #include ""tensorflow/lite/delegates/gpu/common/shape.h"" #include ""tensorflow/lite/delegates/gpu/common/status.h"" @@ -108,6 +112,10 @@ absl::Status TryDepthwiseConvPlus1x1Conv( auto operation = CreateDepthwiseConvPlus1x1Conv(op_def, gpu_info, dw_attr, conv_attr, relu_attr_ptr); *gpu_op = absl::make_unique(std::move(operation)); + (*gpu_op)->flops_ = GetDepthwiseConvolutionFlops(dw_outputs[0]->tensor.shape, + dw_attr.weights.shape) + + GetConvolutionFlops(conv_outputs[0]->tensor.shape, + conv_attr.weights.shape); std::string fused_nodes = std::to_string(dw_node->id); if (relu_node) { fused_nodes += "" "" + std::to_string(relu_node->id); ",0,train 5fa4e1ac928b0512b28e955c588c5a7eab2ea046,tensorflow/tensorflow,"Parallel_for: fix converters for some ops that don't support broadcasting. PiperOrigin-RevId: 215133508",pfor.py,"@@ -1987,14 +1987,12 @@ def _convert_cast(pfor_input): @RegisterPForWithArgs(""Pow"", math_ops.pow) @RegisterPForWithArgs(""RealDiv"", math_ops.divide) @RegisterPForWithArgs(""Real"", math_ops.real) -@RegisterPForWithArgs(""ReciprocalGrad"", math_ops.reciprocal_grad) @RegisterPForWithArgs(""Reciprocal"", math_ops.reciprocal) @RegisterPForWithArgs(""Relu6"", nn_ops.relu6) @RegisterPForWithArgs(""Relu"", nn_ops.relu) @RegisterPForWithArgs(""RightShift"", bitwise_ops.right_shift) @RegisterPForWithArgs(""Rint"", math_ops.rint) @RegisterPForWithArgs(""Round"", math_ops.round) -@RegisterPForWithArgs(""RsqrtGrad"", math_ops.rsqrt_grad) @RegisterPForWithArgs(""Rsqrt"", math_ops.rsqrt) @RegisterPForWithArgs(""Selu"", nn_ops.selu) @RegisterPForWithArgs(""Sigmoid"", math_ops.sigmoid) @@ -2003,7 +2001,6 @@ def _convert_cast(pfor_input): @RegisterPForWithArgs(""Sin"", math_ops.sin) @RegisterPForWithArgs(""Softplus"", nn_ops.softplus) @RegisterPForWithArgs(""Softsign"", nn_ops.softsign) -@RegisterPForWithArgs(""SqrtGrad"", math_ops.sqrt_grad) @RegisterPForWithArgs(""Sqrt"", math_ops.sqrt) @RegisterPForWithArgs(""SquaredDifference"", math_ops.squared_difference) @RegisterPForWithArgs(""Square"", math_ops.square) @@ -2095,6 +2092,9 @@ def _convert_biasaddgrad(pfor_input): @RegisterPForWithArgs(""SoftplusGrad"") @RegisterPForWithArgs(""SoftsignGrad"") @RegisterPForWithArgs(""TanhGrad"") +@RegisterPForWithArgs(""SqrtGrad"") +@RegisterPForWithArgs(""RsqrtGrad"") +@RegisterPForWithArgs(""ReciprocalGrad"") def _convert_grads(pfor_input, op_type, *args, **kw_args): del args del kw_args ",0,train 9eb453a230590d49478c716b6bb5ace09d33087c,tensorflow/tensorflow,"Additional test for function conversions. PiperOrigin-RevId: 239717899",call_trees_test.py,"@@ -36,7 +36,7 @@ class CallTreesTest(converter_testing.TestCase): converter_testing.RESULT_OF_MOCK_CONVERTED_CALL + 3) self.assertListEqual(self.dynamic_calls, [((), {})]) - def test_function_with_call_in_argument(self): + def test_function_with_expression_in_argument(self): def test_fn(f, g): return f(g() + 7) + 3 @@ -50,6 +50,20 @@ class CallTreesTest(converter_testing.TestCase): ((converter_testing.RESULT_OF_MOCK_CONVERTED_CALL + 7,), {}), ]) + def test_function_with_call_in_argument(self): + + def test_fn(f, g): + return f(g()) + 3 + + with self.converted(test_fn, call_trees, {}) as result: + self.assertEqual( + result.test_fn(None, None), + converter_testing.RESULT_OF_MOCK_CONVERTED_CALL + 3) + self.assertListEqual(self.dynamic_calls, [ + ((), {}), + ((converter_testing.RESULT_OF_MOCK_CONVERTED_CALL,), {}), + ]) + def test_function_with_kwarg(self): def test_fn(f, a, b): ",0,train da4f92ed3635ace5c1e9275b2f899755dbfdb64f,tensorflow/tensorflow,"[XLA:CPU] Remove injected functions from llvm.compiler.used LLVM's InjectTLIMappings adds these so they don't get discarded early, but we actually manually inline them so there is no reason to keep them around. This is a bit of a hack, but avoids dangling references to functions that are otherwise unreferenced. PiperOrigin-RevId: 291900762 Change-Id: I9da177b5b5912dcb856c1700125332f368fab28f",llvm_ir_runtime.cc,"@@ -40,6 +40,38 @@ const char* const kLogV16F32SymbolName = ""__xla_cpu_runtime_LogV16F32AVX""; namespace { +// Removes 'fn' from the list of symbols to keep in 'module'. +void RemoveFunctionFromUsedList(llvm::Module* module, llvm::Function* fn) { + llvm::GlobalVariable* used = module->getGlobalVariable(""llvm.compiler.used""); + if (!used) { + return; + } + + llvm::Type* int8_ptr_type = llvm::Type::getInt8PtrTy(module->getContext()); + llvm::Constant* casted_fn = llvm::ConstantExpr::getBitCast(fn, int8_ptr_type); + auto* initializer = llvm::cast(used->getInitializer()); + llvm::SmallVector new_initializer; + for (auto& op : initializer->operands()) { + if (op != casted_fn) { + new_initializer.push_back(llvm::cast(op)); + } + } + + if (new_initializer.size() == initializer->getNumOperands()) { + return; + } + + used->eraseFromParent(); + llvm::ArrayType* array_type = + llvm::ArrayType::get(int8_ptr_type, new_initializer.size()); + used = new llvm::GlobalVariable( + *module, array_type, /*isConstant=*/false, + llvm::GlobalValue::AppendingLinkage, + llvm::ConstantArray::get(array_type, new_initializer), + ""llvm.compiler.used""); + used->setSection(""llvm.metadata""); +} + // Replaces calls to the function `fn_name` with the code generated by // fn_body_generator. // @@ -71,10 +103,6 @@ void RewriteCalls( fn = new_fn; } - // Other libraries using tfcompile could also have generated a function with - // the same name and body. Tell the linker to discard all but one instance. - fn->setLinkage(llvm::GlobalVariable::LinkOnceODRLinkage); - llvm::LLVMContext* context = &module->getContext(); llvm::BasicBlock* fn_body = llvm::BasicBlock::Create(*context, ""body"", fn); @@ -115,10 +143,11 @@ void RewriteCalls( CHECK( llvm::InlineFunction(call_to_inline, inline_function_info).isSuccess()); } - // Delete the function if all uses have been inlined. - if (fn->use_empty()) { - fn->eraseFromParent(); - } + // LLVM's InjectTLIMappings adds functions that might be used for + // vectorization to 'llvm.compiler.used'. Remove it before deleting the + // function. + RemoveFunctionFromUsedList(module, fn); + fn->eraseFromParent(); } llvm::Value* GenerateVF32Tanh(llvm::IRBuilder<>* b, llvm::Value* input, ",0,train 2b95bfb6d812d40c3ef9001c61068571b7c059c2,tensorflow/tensorflow,"Add MakeUnaryHlo() and MakeReverseHlo() to hlo_creation_utils.h/.cc PiperOrigin-RevId: 296080049 Change-Id: I81d020a76da6820086a1a50379c77efc6c43918c",hlo_creation_utils.cc,"@@ -33,6 +33,15 @@ limitations under the License. namespace xla { using absl::StrCat; +StatusOr MakeUnaryHlo(HloOpcode opcode, + HloInstruction* operand) { + HloComputation* computation = operand->parent(); + TF_ASSIGN_OR_RETURN(Shape unary_op_shape, + ShapeInference::InferUnaryOpShape(opcode, operand)); + return computation->AddInstruction( + HloInstruction::CreateUnary(unary_op_shape, opcode, operand)); +} + StatusOr MakeBinaryHlo(HloOpcode opcode, HloInstruction* lhs, HloInstruction* rhs) { HloComputation* computation = lhs->parent(); @@ -344,6 +353,15 @@ StatusOr MakeReduceHlo(HloInstruction* operand, scalar_shape, operand, init_value, all_dims, reduce_computation)); } +StatusOr MakeReverseHlo(HloInstruction* operand, + absl::Span dimensions) { + HloComputation* computation = operand->parent(); + TF_ASSIGN_OR_RETURN(Shape reverse_shape, ShapeInference::InferReverseShape( + operand->shape(), dimensions)); + return computation->AddInstruction( + HloInstruction::CreateReverse(reverse_shape, operand, dimensions)); +} + StatusOr MakeSelectHlo(HloInstruction* pred, HloInstruction* on_true, HloInstruction* on_false, ",0,train 2b95bfb6d812d40c3ef9001c61068571b7c059c2,tensorflow/tensorflow,"Add MakeUnaryHlo() and MakeReverseHlo() to hlo_creation_utils.h/.cc PiperOrigin-RevId: 296080049 Change-Id: I81d020a76da6820086a1a50379c77efc6c43918c",hlo_creation_utils.h,"@@ -27,6 +27,11 @@ namespace xla { // ergonomic. We don't have a complete set of helpers yet -- I expect we'll // expand this interface as needed on an ad-hoc basis. +// Creates a unary HLO instruction and adds it to the computation containing +// `operand`. +StatusOr MakeUnaryHlo(HloOpcode opcode, + HloInstruction* operand); + // Creates a binary HLO instruction and adds it to the computation containing // `lhs` and `rhs` (`lhs` and `rhs` must be in the same computation). StatusOr MakeBinaryHlo(HloOpcode opcode, HloInstruction* lhs, @@ -145,6 +150,11 @@ StatusOr MakeReduceHlo(HloInstruction* operand, HloOpcode binary_opcode, HloModule* module); +// Creates a Reverse HLO instruction and adds it to the computation containing +// `operand`. +StatusOr MakeReverseHlo(HloInstruction* operand, + absl::Span dimensions); + // Creates a Select HLO instruction and adds it to the computation containing // the predicate. The on_true and on_false instructions must also be contained // in the same computation. If on_true and on_false are tuples, create a tuple ",0,train 2f6edb4f0cf63f9f7dfaf8e50e593a96d9bf2779,tensorflow/tensorflow,"Fix custom initializer and regularizer loading. PiperOrigin-RevId: 242022733",initializers.py,"@@ -193,8 +193,14 @@ def get(identifier): if isinstance(identifier, dict): return deserialize(identifier) elif isinstance(identifier, six.string_types): - config = {'class_name': str(identifier), 'config': {}} - return deserialize(config) + identifier = str(identifier) + # We have to special-case functions that return classes. + # TODO(omalleyt): Turn these into classes or class aliases. + special_cases = ['he_normal', 'he_uniform', 'lecun_normal', 'lecun_uniform'] + if identifier in special_cases: + # Treat like a class. + return deserialize({'class_name': identifier, 'config': {}}) + return deserialize(identifier) elif callable(identifier): return identifier else: ",0,train 2f6edb4f0cf63f9f7dfaf8e50e593a96d9bf2779,tensorflow/tensorflow,"Fix custom initializer and regularizer loading. PiperOrigin-RevId: 242022733",initializers_test.py,"@@ -23,6 +23,7 @@ import numpy as np from tensorflow.python import keras from tensorflow.python import tf2 from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.platform import test @@ -213,6 +214,18 @@ class KerasInitializersTest(test.TestCase): finally: tf2._force_enable = tf2_force_enabled # pylint: disable=protected-access + def test_custom_initializer_saving(self): + + def my_initializer(shape, dtype=None): + return array_ops.ones(shape, dtype=dtype) + + inputs = keras.Input((10,)) + outputs = keras.layers.Dense(1, kernel_initializer=my_initializer)(inputs) + model = keras.Model(inputs, outputs) + model2 = model.from_config( + model.get_config(), custom_objects={'my_initializer': my_initializer}) + self.assertEqual(model2.layers[1].kernel_initializer, my_initializer) + if __name__ == '__main__': test.main() ",0,train 2f6edb4f0cf63f9f7dfaf8e50e593a96d9bf2779,tensorflow/tensorflow,"Fix custom initializer and regularizer loading. PiperOrigin-RevId: 242022733",regularizers.py,"@@ -106,8 +106,14 @@ def get(identifier): if isinstance(identifier, dict): return deserialize(identifier) elif isinstance(identifier, six.string_types): - config = {'class_name': str(identifier), 'config': {}} - return deserialize(config) + identifier = str(identifier) + # We have to special-case functions that return classes. + # TODO(omalleyt): Turn these into classes or class aliases. + special_cases = ['l1', 'l2', 'l1_l2'] + if identifier in special_cases: + # Treat like a class. + return deserialize({'class_name': identifier, 'config': {}}) + return deserialize(str(identifier)) elif callable(identifier): return identifier else: ",0,train 2f6edb4f0cf63f9f7dfaf8e50e593a96d9bf2779,tensorflow/tensorflow,"Fix custom initializer and regularizer loading. PiperOrigin-RevId: 242022733",regularizers_test.py,"@@ -25,6 +25,7 @@ from tensorflow.python import keras from tensorflow.python.framework import test_util from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import testing_utils +from tensorflow.python.ops import math_ops from tensorflow.python.platform import test @@ -92,6 +93,18 @@ class KerasRegularizersTest(keras_parameterized.TestCase): model.compile('sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly()) model.fit(x, y, batch_size=5, epochs=1) + def test_custom_regularizer_saving(self): + + def my_regularizer(weights): + return math_ops.reduce_sum(math_ops.abs(weights)) + + inputs = keras.Input((10,)) + outputs = keras.layers.Dense(1, kernel_regularizer=my_regularizer)(inputs) + model = keras.Model(inputs, outputs) + model2 = model.from_config( + model.get_config(), custom_objects={'my_regularizer': my_regularizer}) + self.assertEqual(model2.layers[1].kernel_regularizer, my_regularizer) + if __name__ == '__main__': test.main() ",0,train 2f6edb4f0cf63f9f7dfaf8e50e593a96d9bf2779,tensorflow/tensorflow,"Fix custom initializer and regularizer loading. PiperOrigin-RevId: 242022733",generic_utils.py,"@@ -200,17 +200,20 @@ def deserialize_keras_object(identifier, with CustomObjectScope(custom_objects): return cls(**cls_config) elif isinstance(identifier, six.string_types): - function_name = identifier - if custom_objects and function_name in custom_objects: - fn = custom_objects.get(function_name) - elif function_name in _GLOBAL_CUSTOM_OBJECTS: - fn = _GLOBAL_CUSTOM_OBJECTS[function_name] + object_name = identifier + if custom_objects and object_name in custom_objects: + obj = custom_objects.get(object_name) + elif object_name in _GLOBAL_CUSTOM_OBJECTS: + obj = _GLOBAL_CUSTOM_OBJECTS[object_name] else: - fn = module_objects.get(function_name) - if fn is None: - raise ValueError('Unknown ' + printable_module_name + ':' + - function_name) - return fn + obj = module_objects.get(object_name) + if obj is None: + raise ValueError('Unknown ' + printable_module_name + ':' + object_name) + # Classes passed by name are instantiated with no args, functions are + # returned as-is. + if tf_inspect.isclass(obj): + return obj() + return obj else: raise ValueError('Could not interpret serialized ' + printable_module_name + ': ' + identifier) ",0,train 835c5d2948527ffb4af2b5ab8d453ba834cc364a,tensorflow/tensorflow,"Replace calls to tf_inspect.getargspec with tf_inspect.getfullargspec. PiperOrigin-RevId: 209511641",sequential.py,"@@ -239,9 +239,9 @@ class Sequential(Model): x = inputs for layer in self.layers: kwargs = {} - if 'mask' in tf_inspect.getargspec(layer.call).args: + if 'mask' in tf_inspect.getfullargspec(layer.call).args: kwargs['mask'] = mask - if 'training' in tf_inspect.getargspec(layer.call).args: + if 'training' in tf_inspect.getfullargspec(layer.call).args: kwargs['training'] = training if isinstance(layer, Network) and layer._compute_output_and_mask_jointly: ",0,test 2171a012c117664d4f9928b7c1f75cbcc15be416,tensorflow/tensorflow,"Enable JAX->MLIR lowering by default. Before this change, JAX produces HLO using the XLA:Python builder APIs. After this change JAX produces MHLO using MLIR:Python APIs, and converts the MHLO to HLO for compilation with XLA. This is a lateral shift that should have little immediate impact, but unlocks a number of interesting opportunities in the future (e.g., mixing MLIR dialects within a JAX program). [XLA:Python] Pass MLIR input as a std::string to work around https://github.com/pybind/pybind11/issues/2765. A better fix would be to update pybind11 but that is hitting Windows-related hurdles; for now, just avoid relying on reference lifetime extension. Brax: update test seeds to avoid test failures. Additional constant folding (canonicalization) in the MHLO lowering path seems to cause small numerical differences. PiperOrigin-RevId: 420755696 Change-Id: I5e2626ea1e82c046a847300bf6bbe94208007802",py_client.cc,"@@ -258,7 +258,7 @@ StatusOr> PyClient::Compile( } StatusOr> PyClient::CompileMlir( - absl::string_view mlir_module, CompileOptions options) { + std::string mlir_module, CompileOptions options) { std::unique_ptr executable; absl::optional fingerprint; { ",0,train 2171a012c117664d4f9928b7c1f75cbcc15be416,tensorflow/tensorflow,"Enable JAX->MLIR lowering by default. Before this change, JAX produces HLO using the XLA:Python builder APIs. After this change JAX produces MHLO using MLIR:Python APIs, and converts the MHLO to HLO for compilation with XLA. This is a lateral shift that should have little immediate impact, but unlocks a number of interesting opportunities in the future (e.g., mixing MLIR dialects within a JAX program). [XLA:Python] Pass MLIR input as a std::string to work around https://github.com/pybind/pybind11/issues/2765. A better fix would be to update pybind11 but that is hitting Windows-related hurdles; for now, just avoid relying on reference lifetime extension. Brax: update test seeds to avoid test failures. Additional constant folding (canonicalization) in the MHLO lowering path seems to cause small numerical differences. PiperOrigin-RevId: 420755696 Change-Id: I5e2626ea1e82c046a847300bf6bbe94208007802",py_client.h,"@@ -151,8 +151,8 @@ class PyClient : public std::enable_shared_from_this { StatusOr> Compile( const XlaComputation& computation, CompileOptions options); - StatusOr> CompileMlir( - absl::string_view mlir_module, CompileOptions options); + StatusOr> CompileMlir(std::string mlir_module, + CompileOptions options); StatusOr SerializeExecutable( const PyExecutable& executable) const; ",0,train 2171a012c117664d4f9928b7c1f75cbcc15be416,tensorflow/tensorflow,"Enable JAX->MLIR lowering by default. Before this change, JAX produces HLO using the XLA:Python builder APIs. After this change JAX produces MHLO using MLIR:Python APIs, and converts the MHLO to HLO for compilation with XLA. This is a lateral shift that should have little immediate impact, but unlocks a number of interesting opportunities in the future (e.g., mixing MLIR dialects within a JAX program). [XLA:Python] Pass MLIR input as a std::string to work around https://github.com/pybind/pybind11/issues/2765. A better fix would be to update pybind11 but that is hitting Windows-related hurdles; for now, just avoid relying on reference lifetime extension. Brax: update test seeds to avoid test failures. Additional constant folding (canonicalization) in the MHLO lowering path seems to cause small numerical differences. PiperOrigin-RevId: 420755696 Change-Id: I5e2626ea1e82c046a847300bf6bbe94208007802",xla_client.py,"@@ -46,6 +46,9 @@ profiler = _xla.profiler # changes. _version = 51 +# Version number for MLIR:Python components. +mlir_api_version = 1 + xla_platform_names = { 'cpu': 'Host', 'gpu': 'CUDA', ",0,train 2fd76cbef3b30bd10807d6d660fedfa7f5451bf5,tensorflow/tensorflow,"batch_matmul_op_test.py: Updated to pass in TF2, by using gradient_checker_v2 (second try). PiperOrigin-RevId: 224190234",batch_matmul_op_test.py,"@@ -20,9 +20,9 @@ from __future__ import print_function import numpy as np -from tensorflow.python.framework import constant_op +from tensorflow.python import tf2 from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import gradient_checker_v2 from tensorflow.python.ops import math_ops from tensorflow.python.platform import test @@ -105,32 +105,32 @@ class BatchMatmulOpTest(test.TestCase): def _testNonEmpty(self, dtype, adjoint_a, adjoint_b, use_static_shape): - def compareNonEmpty(self, a_shape, b_shape): + def CompareNonEmpty(self, a_shape, b_shape): self._compare( self._rand(a_shape, dtype), self._rand(b_shape, dtype), adjoint_a, adjoint_b, use_static_shape) - compareNonEmpty(self, [1, 2, 3], [1, 3, 5]) - compareNonEmpty(self, [1, 2, 3], [1, 3, 1]) - compareNonEmpty(self, [1, 1, 3], [1, 3, 5]) - compareNonEmpty(self, [1, 2, 3], [1, 3, 5]) - compareNonEmpty(self, [7, 1, 3], [7, 3, 5]) - compareNonEmpty(self, [7, 2, 3], [7, 3, 1]) - compareNonEmpty(self, [7, 2, 3], [7, 3, 5]) - compareNonEmpty(self, [10, 64, 75], [10, 75, 30]) - compareNonEmpty(self, [5, 7, 2, 3], [5, 7, 3, 5]) + CompareNonEmpty(self, [1, 2, 3], [1, 3, 5]) + CompareNonEmpty(self, [1, 2, 3], [1, 3, 1]) + CompareNonEmpty(self, [1, 1, 3], [1, 3, 5]) + CompareNonEmpty(self, [1, 2, 3], [1, 3, 5]) + CompareNonEmpty(self, [7, 1, 3], [7, 3, 5]) + CompareNonEmpty(self, [7, 2, 3], [7, 3, 1]) + CompareNonEmpty(self, [7, 2, 3], [7, 3, 5]) + CompareNonEmpty(self, [10, 64, 75], [10, 75, 30]) + CompareNonEmpty(self, [5, 7, 2, 3], [5, 7, 3, 5]) def _testEmpty(self, dtype, adjoint_a, adjoint_b, use_static_shape): - def compareEmpty(self, a_shape, b_shape): + def CompareEmpty(self, a_shape, b_shape): self._compare( np.zeros(a_shape).astype(dtype), np.zeros(b_shape).astype(dtype), adjoint_a, adjoint_b, use_static_shape) - compareEmpty(self, [0, 3, 2], [0, 2, 4]) - compareEmpty(self, [3, 0, 2], [3, 2, 5]) - compareEmpty(self, [3, 3, 2], [3, 2, 0]) + CompareEmpty(self, [0, 3, 2], [0, 2, 4]) + CompareEmpty(self, [3, 0, 2], [3, 2, 5]) + CompareEmpty(self, [3, 3, 2], [3, 2, 0]) def _GetBatchMatmulOpTest(dtype, adjoint_a, adjoint_b, use_static_shape): @@ -154,17 +154,13 @@ class BatchMatmulGradientTest(test.TestCase): y = y_in if not adjoint_b else y_in.reshape(y_t_shape) epsilon = np.finfo(x.dtype).eps delta = epsilon**(1.0 / 3.0) + def Loss(x, y): + z = math_ops.matmul(x, y, adjoint_a, adjoint_b) + return math_ops.reduce_sum(z) with self.cached_session(use_gpu=True): - inx = constant_op.constant(x) - iny = constant_op.constant(y) - z = math_ops.matmul(inx, iny, adjoint_a, adjoint_b) - loss = math_ops.reduce_sum(z) - ((x_jacob_t, x_jacob_n), - (y_jacob_t, y_jacob_n)) = gradient_checker.compute_gradient( - [inx, iny], [x.shape, y.shape], - loss, [1], - x_init_value=[x, y], - delta=delta) + ((x_jacob_t, y_jacob_t), + (x_jacob_n, y_jacob_n)) = gradient_checker_v2.compute_gradient( + Loss, [x, y], delta=delta) tol = 20 * delta self.assertAllClose(x_jacob_t, x_jacob_n, rtol=tol, atol=tol) self.assertAllClose(y_jacob_t, y_jacob_n, rtol=tol, atol=tol) @@ -202,11 +198,11 @@ if __name__ == ""__main__"": for adjoint_a_ in False, True: for adjoint_b_ in False, True: name = ""%s_%s_%s"" % (dtype_.__name__, adjoint_a_, adjoint_b_) - for use_static_shape in True, False: + for use_static_shape_ in set([True, tf2.enabled()]): setattr(BatchMatmulOpTest, - ""testBatchMatmulOp_"" + name + (""_%s"" % use_static_shape), + ""testBatchMatmulOp_"" + name + (""_%s"" % use_static_shape_), _GetBatchMatmulOpTest(dtype_, adjoint_a_, adjoint_b_, - use_static_shape)) + use_static_shape_)) if dtype_ is not np.int32: setattr(BatchMatmulGradientTest, ""testBatchMatmulGradient_"" + name, _GetBatchMatmulGradientTest(dtype_, adjoint_a_, adjoint_b_)) ",0,train fd3adc51e4112572c07db15c9548cf7e70586b50,tensorflow/tensorflow,"Make lite_test.py pass with MLIR converter. PiperOrigin-RevId: 290998140 Change-Id: I3c6164c96610e10ec170829cacfab8b976ccf297",lite_test.py,"@@ -1491,33 +1491,6 @@ class FromFrozenGraphObjectDetection(LiteTest): output_details[3]['name']) self.assertTrue(([1] == output_details[3]['shape']).all()) - def testTFLiteGraphDefMissingShape(self): - # Tests invalid cases for the model that cannot be loaded in TensorFlow. - self._initObjectDetectionArgs() - - # Missing `input_shapes`. - with self.assertRaises(ValueError) as error: - lite.TFLiteConverter.from_frozen_graph(self._graph_def_file, - self._input_arrays, - self._output_arrays) - self.assertEqual('input_shapes must be defined for this model.', - str(error.exception)) - - def testTFLiteGraphDefInvalidShape(self): - # Tests invalid cases for the model that cannot be loaded in TensorFlow. - self._initObjectDetectionArgs() - - # `input_shapes` does not contain the names in `input_arrays`. - with self.assertRaises(ValueError) as error: - lite.TFLiteConverter.from_frozen_graph( - self._graph_def_file, - self._input_arrays, - self._output_arrays, - input_shapes={'invalid-value': [1, 19]}) - self.assertEqual( - 'input_shapes must contain a value for each item in input_array.', - str(error.exception)) - class FromSavedModelTest(TestModels): ",0,test 5707e2e2a2ddabf218a56a950a27358a7222bc97,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2021-08-03 PiperOrigin-RevId: 388406270 Change-Id: Ifc2c2ff6a7156c25fba8b3ba85b5cf1819123c51",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 8, 2) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 8, 3) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train 9cfcec41235ef6426d1fae9a7b44cb02c1e19274,tensorflow/tensorflow,"Automated rollback of commit a7e7582e9a2b698054bf93aa27e53ebbc081d1a6. Revert #31106. PiperOrigin-RevId: 263621326",math_grad.py,"@@ -192,26 +192,22 @@ def _SumGrad(op, grad): return [array_ops.tile(grad, tile_scaling), None] input_shape = array_ops.shape(op.inputs[0]) - - if not op.get_attr(""keep_dims""): - # TODO(apassos) remove this once device placement for eager ops makes more - # sense. - with ops.colocate_with(input_shape): - output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) - grad = array_ops.reshape(grad, output_shape_kept_dims) - return [array_ops.broadcast_to(grad, input_shape), None] + # TODO(apassos) remove this once device placement for eager ops makes more + # sense. + with ops.colocate_with(input_shape): + output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) + tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims) + grad = array_ops.reshape(grad, output_shape_kept_dims) + return [array_ops.tile(grad, tile_scaling), None] def _MinOrMaxGrad(op, grad): """"""Gradient for Min or Max. Amazingly it's precisely the same code."""""" input_shape = array_ops.shape(op.inputs[0]) + output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) y = op.outputs[0] - if not op.get_attr(""keep_dims""): - output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) - y = array_ops.reshape(y, output_shape_kept_dims) - grad = array_ops.reshape(grad, output_shape_kept_dims) - else: - output_shape_kept_dims = array_ops.shape(y) + y = array_ops.reshape(y, output_shape_kept_dims) + grad = array_ops.reshape(grad, output_shape_kept_dims) # Compute the number of selected (maximum or minimum) elements in each # reduction dimension. If there are multiple minimum or maximum elements @@ -267,11 +263,10 @@ def _ProdGrad(op, grad): reduction_indices = array_ops.reshape(op.inputs[1], [-1]) # Expand grad to full input shape - if not op.get_attr(""keep_dims""): - output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) - grad = array_ops.reshape(grad, output_shape_kept_dims) - - grad = array_ops.broadcast_to(grad, input_shape) + output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) + tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims) + grad = array_ops.reshape(grad, output_shape_kept_dims) + grad = array_ops.tile(grad, tile_scaling) # Pack all reduced dimensions into a single one, so we can perform the # cumprod ops. If the reduction dims list is empty, it defaults to float32, ",0,train 90842ada9e7ef534eb77ff2890047bf75070fb56,tensorflow/tensorflow,Cosmetic changes,mmap_allocation.cc,"@@ -60,11 +60,11 @@ MMAPAllocation::MMAPAllocation(const char* filename, MMAPAllocation::~MMAPAllocation() { if (valid()) { - UnmapViewOfFile( mmapped_buffer_ ); + UnmapViewOfFile(mmapped_buffer_); } if (file_mapping_ != nullptr) { - CloseHandle( file_mapping_ ); + CloseHandle(file_mapping_); } if (file_handle_ != nullptr){ ",0,train 6062f26f626555ca980c716d52c6204e17745503,tensorflow/tensorflow,"Adapt the shape function for `tf.fill()` to handle partial shapes. Fixes #3102. Change: 126324523",constant_op_test.py,"@@ -516,6 +516,9 @@ class FillTest(tf.test.TestCase): tf.placeholder(tf.int32, shape=(4,)), 3.0) self.assertEqual([None, None, None, None], f.get_shape().as_list()) + f = tf.fill([tf.placeholder(tf.int32, shape=()), 17], 1.0) + self.assertEqual([None, 17], f.get_shape().as_list()) + def testGradient(self): with self.test_session(): in_v = tf.constant(5.0) ",0,train 6062f26f626555ca980c716d52c6204e17745503,tensorflow/tensorflow,"Adapt the shape function for `tf.fill()` to handle partial shapes. Fixes #3102. Change: 126324523",array_ops.py,"@@ -1835,16 +1835,16 @@ def _FillShape(op): Returns: A single-element list containing the shape of the output. + + Raises: + ValueError: If the shapes or arguments are known to be invalid. """""" - dimensions_shape = op.inputs[0].get_shape().with_rank(1) - op.inputs[1].get_shape().assert_is_compatible_with(tensor_shape.scalar()) + op.inputs[0].get_shape().assert_has_rank(1) + op.inputs[1].get_shape().assert_has_rank(0) fill_dims = tensor_util.constant_value(op.inputs[0]) - if fill_dims is None: - # Attempt to infer the rank of the output from the length of - # dimensions. - return [tensor_shape.unknown_shape(ndims=dimensions_shape[0].value)] - else: - return [tensor_shape.TensorShape(fill_dims.tolist())] + if fill_dims is not None and any(d < 0 for d in fill_dims): + raise ValueError(""Fill dimensions must be >= 0"") + return [tensor_util.constant_value_as_shape(op.inputs[0])] @ops.RegisterShape(""InvertPermutation"") ",0,train cc70f17486c0b5416bc2c5d5d6e9014d2f48004f,tensorflow/tensorflow,"Add reallocation capability to bfc_allocator. This commit mitigates external fragmentation in bfc_allocator by reallocation. That is, although the sum of regions and unallocated bytes is larger than the requested bytes but the bfc_allocator still fails to allocate a large enough contiguous region to fulfill the request due to fragmentation. To avoid this case, a relocation feature is implemented to deallocate free regions so that a larger region can be formed.",bfc_allocator.cc,"@@ -16,6 +16,7 @@ limitations under the License. #include ""tensorflow/core/common_runtime/bfc_allocator.h"" #include +#include ""absl/container/flat_hash_set.h"" #include ""tensorflow/core/common_runtime/allocator_retry.h"" #include ""tensorflow/core/lib/core/bits.h"" @@ -260,6 +261,76 @@ size_t BFCAllocator::RoundedBytes(size_t bytes) { return rounded_bytes; } +bool BFCAllocator::DeallocateFreeRegions(size_t rounded_bytes) { + // Searching for free regions. + absl::flat_hash_set free_region_ptrs; + size_t total_free_bytes = 0; + for (const auto& region : region_manager_.regions()) { + ChunkHandle h = region_manager_.get_handle(region.ptr()); + bool any_use = false; + while (h != kInvalidChunkHandle) { + const Chunk* c = ChunkFromHandle(h); + if (c->in_use()) { + any_use = true; + break; + } + h = c->next; + } + + if (!any_use) { + VLOG(2) << ""Found free region with ptr = "" << region.ptr(); + free_region_ptrs.insert(region.ptr()); + total_free_bytes += region.memory_size(); + } + } + + if (total_free_bytes == 0) { + return false; + } + + // Rough estimation to check whether deallocation can help. + size_t available_bytes = + memory_limit_ - total_region_allocated_bytes_ + total_free_bytes; + if (rounded_bytes > available_bytes) { + return false; + } + + VLOG(INFO) << ""Re-allocate memory regions to avoid OOM due to memory"" + << "" fragmentation. If you see this message frequently, note"" + << "" that the re-allocation may incur performance overhead despite"" + << "" better memory utilization. You may try smaller batch sizes"" + << "" to see if it can give you better performance.""; + + // Deallocate free regions. + auto it = region_manager_.regions().begin(); + while (it != region_manager_.regions().end()) { + if (!free_region_ptrs.contains(it->ptr())) { + ++it; + continue; + } + + VLOG(2) << ""Deallocate region with ptr = "" << it->ptr(); + // Remove all chunk registrations from Bins. + ChunkHandle h = region_manager_.get_handle(it->ptr()); + while (h != kInvalidChunkHandle) { + const Chunk* c = ChunkFromHandle(h); + if (c->bin_num != kInvalidBinNum) { + RemoveFreeChunkFromBin(h); + } + auto h_to_delete = h; + h = c->next; + DeleteChunk(h_to_delete); + } + + // Deallocate the memory. + sub_allocator_->Free(it->ptr(), it->memory_size()); + total_region_allocated_bytes_ -= it->memory_size(); + it = region_manager_.RemoveAllocationRegion(it); + } + + return true; +} + void* BFCAllocator::AllocateRawInternal(size_t unused_alignment, size_t num_bytes, bool dump_log_on_failure, @@ -307,6 +378,18 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment, } } + // Reaching this point means that no chunks can satisfy the request. Also, + // the unallocated bytes cannot satisfy the request. Before giving up, let's + // try deallocating free regions so that suballocator can combine them with + // the unallocated bytes and form a larger region. + if (DeallocateFreeRegions(rounded_bytes) && + Extend(unused_alignment, rounded_bytes)) { + ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before); + if (ptr != nullptr) { + return ptr; + } + } + // We searched all bins for an existing free chunk to use and // couldn't find one. This means we must have run out of memory, // Dump the memory log for analysis. ",0,test cc70f17486c0b5416bc2c5d5d6e9014d2f48004f,tensorflow/tensorflow,"Add reallocation capability to bfc_allocator. This commit mitigates external fragmentation in bfc_allocator by reallocation. That is, although the sum of regions and unallocated bytes is larger than the requested bytes but the bfc_allocator still fails to allocate a large enough contiguous region to fulfill the request due to fragmentation. To avoid this case, a relocation feature is implemented to deallocate free regions so that a larger region can be formed.",bfc_allocator.h,"@@ -309,6 +309,11 @@ class BFCAllocator : public Allocator { regions_.insert(entry, AllocationRegion(ptr, memory_size)); } + std::vector::const_iterator RemoveAllocationRegion( + std::vector::const_iterator it) { + return regions_.erase(it); + } + ChunkHandle get_handle(const void* p) const { return RegionFor(p)->get_handle(p); } @@ -354,6 +359,14 @@ class BFCAllocator : public Allocator { bool Extend(size_t alignment, size_t rounded_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_); + // Deallocate free regions to give back the memory to suballocator, so that + // we can re-allocate a larger region. The main use scenario of this function + // is when OOM happens but we have free regions and the sum of sizes of free + // regions and unallocated bytes is larger than the requested size, implying + // (external) memory fragmentation. Returns true if deallocating any free + // regions; false otherwise. + bool DeallocateFreeRegions(size_t rounded_bytes); + // Returns a pointer to an underlying allocated chunk of size // 'rounded_bytes'. void* FindChunkPtr(BinNum bin_num, size_t rounded_bytes, size_t num_bytes, ",0,test cc70f17486c0b5416bc2c5d5d6e9014d2f48004f,tensorflow/tensorflow,"Add reallocation capability to bfc_allocator. This commit mitigates external fragmentation in bfc_allocator by reallocation. That is, although the sum of regions and unallocated bytes is larger than the requested bytes but the bfc_allocator still fails to allocate a large enough contiguous region to fulfill the request due to fragmentation. To avoid this case, a relocation feature is implemented to deallocate free regions so that a larger region can be formed.",gpu_bfc_allocator_test.cc,"@@ -568,6 +568,47 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test { EXPECT_EQ(GPUBFCAllocator::RoundedBytes(1LL << 31), force_no_allow_growth_allocator.curr_region_allocation_bytes_); } + + void TestRegionDeallocation() { + setenv(""TF_FORCE_GPU_ALLOW_GROWTH"", ""unparseable"", 1); + GPUOptions options; + options.set_allow_growth(true); + + // Max of 2GiB, but starts out small. + PlatformGpuId platform_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(), + platform_gpu_id, /*use_unified_memory=*/false, {}, {}); + GPUBFCAllocator a(sub_allocator, 1LL << 31, options, ""GPU_0_bfc""); + + // Allocate 128 raw pointers of 4 megs. + const size_t size = 1LL << 22; + std::vector initial_ptrs; + for (size_t s = 0; s < 128; s++) { + void* raw = a.AllocateRaw(1, size); + initial_ptrs.push_back(raw); + } + + // Make sure there are more than 1 regions in preparation for the test. + EXPECT_LT(1, a.region_manager_.regions().size()); + + // Deallocate all the memories except the last one. + for (size_t i = 0; i < initial_ptrs.size() - 1; i++) { + a.DeallocateRaw(initial_ptrs[i]); + } + + // Deallocate free regions and there shall be only one region left. + EXPECT_EQ(true, a.DeallocateFreeRegions(/*rounded_bytes=*/0)); + EXPECT_EQ(1, a.region_manager_.regions().size()); + + // There should be only one chunk left in bins. + size_t num_chunks_in_bins = 0; + for (int i = 0; i < BFCAllocator::kNumBins; i++) { + BFCAllocator::Bin* bin = a.BinFromIndex(i); + num_chunks_in_bins += bin->free_chunks.size(); + } + EXPECT_EQ(1, num_chunks_in_bins); + } }; TEST_F(GPUBFCAllocatorPrivateMethodsTest, BinDebugInfo) { TestBinDebugInfo(); } @@ -580,6 +621,10 @@ TEST_F(GPUBFCAllocatorPrivateMethodsTest, ForceAllowGrowth) { TestForceAllowGrowth(); } +TEST_F(GPUBFCAllocatorPrivateMethodsTest, TestRegionDeallocation) { + TestRegionDeallocation(); +} + } // namespace tensorflow #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM ",0,test 95ea3404528afcb1a74dd5f0946ea8d17beda28b,tensorflow/tensorflow,"Handle next-with-default. Fixes #37983. PiperOrigin-RevId: 306065738 Change-Id: I0964d7c8ceee1b859b8bb5033e1473654c0719bf",py_builtins.py,"@@ -21,25 +21,29 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools import inspect +import numpy as np import six from tensorflow.python.autograph.utils import py_func from tensorflow.python.autograph.utils import tensors from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import iterator_ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_parsing_ops from tensorflow.python.ops import gen_string_ops from tensorflow.python.ops import list_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import sort_ops -from tensorflow.python.ops import check_ops from tensorflow.python.util import lazy_loader from tensorflow.python.util import nest @@ -397,6 +401,96 @@ def _py_map(fn, *iterables): return map(fn, *iterables) +def next_(iterator, default=UNSPECIFIED): + if isinstance(iterator, iterator_ops.OwnedIterator): + return next_tf_iterator(iterator, default) + return next_py(iterator, default) + + +# TODO(mdan): These checks should be easier. Fix the nest API. +def _verify_spec_compatible(input_name, spec_name, input_, spec): + """"""Verifies that a symbol has a type compatible vith a given spec. + + Here, compatibility is viewed in the general TensorFlow sense: that the dtypes + are the same after implicit conversion, if both are tensors. + + This verifier ensures consistent treatment of types across AutoGraph. + + Args: + input_name: A name to use for `input_` in error messages. + spec_name: A name to use for `spec` in error messages. + input_: Any, value to verify. + spec: TypeSpec that `input_` must be compatible with. + + Raises: + ValueError if the two types have been determined not to be compatible. + """""" + assert isinstance(spec, tensor_spec.TensorSpec) + if input is None: + # TODO(mdan): raise from None when switching to Py3. + raise ValueError('{} cannot be None'.format(input_name)) + + # TODO(mdan): Use TensorCompatible when ready. + if isinstance(input_, (bool, int, float, str, np.ndarray)): + input_ = ops.convert_to_tensor_v2(input_) + + input_dtype = getattr(input_, 'dtype', None) + + if input_dtype != spec.dtype: + input_dtype_str = 'no dtype' if input_dtype is None else str(input_dtype) + + raise TypeError( + '{} must have the same dtype as {}. Expected {}, got {}'.format( + input_name, spec_name, spec.dtype, input_dtype_str)) + + +def _verify_structure_compatible(input_name, spec_name, input_, spec): + """"""Verifies that possibly-structured symbol has types compatible vith another. + + See _verify_spec_compatible for a more concrete meaning of ""compatible"". + Unspec _verify_spec_compatible, which handles singular Tensor-spec objects, + verify_structures_compatible can process structures recognized by tf.nest. + + Args: + input_name: A name to use for `input_` in error messages. + spec_name: A name to use for `spec` in error messages. + input_: Any, value to verify. May, but doesn't need to, be a structure. + spec: Any, value that `input_` must be compatible with. May, but doesn't + need to, be a structure. + + Raises: + ValueError if the two types have been determined not to be compatible. + """""" + try: + nest.assert_same_structure(input_, spec, expand_composites=True) + except (ValueError, TypeError) as e: + raise TypeError( + '{} must have the same element structure as {}.\n\n{}'.format( + input_name, spec_name, str(e))) + + nest.map_structure( + functools.partial(_verify_spec_compatible, input_name, spec_name), input_, + spec) + + +def next_tf_iterator(iterator, default=UNSPECIFIED): + if default is UNSPECIFIED: + # Without a default, fall back to the ""normal"" behavior which raises + # a runtime exception. + return next(iterator) + opt_iterate = iterator_ops.get_next_as_optional(iterator) + _verify_structure_compatible( + 'the default argument', 'the iterate', default, iterator.element_spec) + return control_flow_ops.cond( + opt_iterate.has_value(), opt_iterate.get_value, lambda: default) + + +def next_py(iterator, default=UNSPECIFIED): + if default is UNSPECIFIED: + return next(iterator) + return next(iterator, default) + + def filter_(function, iterable): if isinstance(iterable, dataset_ops.DatasetV2): return _tf_dataset_filter(function, iterable) @@ -515,18 +609,18 @@ if six.PY2: BUILTIN_FUNCTIONS_MAP = { 'abs': abs_, + 'any': any_, + 'all': all_, + 'enumerate': enumerate_, + 'filter': filter_, 'float': float_, 'int': int_, 'len': len_, + 'map': map_, + 'next': next_, 'print': print_, 'range': range_, - # TODO(mdan): This might make more sense as tf.data.range. + 'sorted': sorted_, 'xrange': range_, - 'enumerate': enumerate_, 'zip': zip_, - 'map': map_, - 'filter': filter_, - 'any': any_, - 'all': all_, - 'sorted': sorted_, } ",0,test 95ea3404528afcb1a74dd5f0946ea8d17beda28b,tensorflow/tensorflow,"Handle next-with-default. Fixes #37983. PiperOrigin-RevId: 306065738 Change-Id: I0964d7c8ceee1b859b8bb5033e1473654c0719bf",py_builtins_test.py,"@@ -27,13 +27,14 @@ from tensorflow.python.autograph.core import function_wrappers from tensorflow.python.autograph.operators import data_structures from tensorflow.python.autograph.operators import py_builtins from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops -from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import tensor_array_ops from tensorflow.python.platform import test @@ -248,6 +249,86 @@ class PyBuiltinsTest(test.TestCase): self.assertAllEqual(self.evaluate(iterator.get_next()), -34) self.assertAllEqual(self.evaluate(iterator.get_next()), 9) + def test_next_normal(self): + iterator = iter([1, 2, 3]) + self.assertEqual(py_builtins.next_(iterator), 1) + self.assertEqual(py_builtins.next_(iterator), 2) + self.assertEqual(py_builtins.next_(iterator), 3) + with self.assertRaises(StopIteration): + py_builtins.next_(iterator) + self.assertEqual(py_builtins.next_(iterator, 4), 4) + + def test_next_tf_iterator(self): + # graph-mode iterators are only supported inside tf.function. + @def_function.function(autograph=False) + def test_fn(go_out_of_range, with_default): + iterator = iter(dataset_ops.Dataset.range(3)) + retval = ( + py_builtins.next_(iterator), + py_builtins.next_(iterator), + py_builtins.next_(iterator), + ) + if go_out_of_range: + if with_default: + retval += ( + py_builtins.next_(iterator, + constant_op.constant(-3, dtype=dtypes.int64)), + py_builtins.next_(iterator, + constant_op.constant(-4, dtype=dtypes.int64)), + ) + else: + py_builtins.next_(iterator) + return retval + + self.assertAllEqual( + self.evaluate(test_fn(go_out_of_range=False, with_default=None)), + (0, 1, 2)) + self.assertAllEqual( + self.evaluate(test_fn(go_out_of_range=True, with_default=True)), + (0, 1, 2, -3, -4)) + with self.assertRaises(errors_impl.OutOfRangeError): + self.evaluate(test_fn(go_out_of_range=True, with_default=False)) + + def test_next_tf_iterator_error_checking(self): + # graph-mode iterators are only supported inside tf.function. + @def_function.function(autograph=False) + def test_fn(): + iterator = iter(dataset_ops.Dataset.range(1)) + py_builtins.next_(iterator) + py_builtins.next_(iterator, constant_op.constant(-3)) + + # Dataset.range defaults to int64, + with self.assertRaisesRegex(TypeError, 'default.*int64'): + self.evaluate(test_fn()) + + def test_next_tf_iterator_error_checking_structures(self): + # graph-mode iterators are only supported inside tf.function. + @def_function.function(autograph=False) + def test_fn(default_val): + ds = dataset_ops.Dataset.range(1) + ds = ds.map(lambda i: {'a': i + 1, 'b': i + 10}) + iterator = iter(ds) + py_builtins.next_(iterator) + py_builtins.next_(iterator, default_val) + + default = { + 'a': constant_op.constant(3, dtype=dtypes.int64), + } + with self.assertRaisesRegex(TypeError, 'same element structure'): + test_fn(default) + default = { + 'a': constant_op.constant(3.0), + 'b': [constant_op.constant(30), constant_op.constant(300)] + } + with self.assertRaisesRegex(TypeError, 'same element structure'): + test_fn(default) + default = { + 'a': constant_op.constant(3.0), + 'b': constant_op.constant(30, dtype=dtypes.int64), + } + with self.assertRaisesRegex(TypeError, 'float32'): + test_fn(default) + def _basic_function_scope(self): return function_wrappers.FunctionScope( 'test_function_name', ",0,test 6e7e5836af7bd09bed2c271938c4373a891e0d0a,tensorflow/tensorflow,"Preserve context info when entering merge_call. PiperOrigin-RevId: 433130849",mirrored_run.py,"@@ -120,6 +120,15 @@ def _enter_graph(g, eager, creator_stack=None): yield +@contextlib.contextmanager +def _maybe_enter_eager_mode(eager): + if eager: + with context.eager_mode(): + yield + else: + yield + + def _cpu_device(device): cpu_device = tf_device.DeviceSpec.from_string(device) cpu_device = cpu_device.replace(device_type=""CPU"", device_index=0) @@ -241,9 +250,29 @@ def _call_for_each_replica(distribution, fn, args, kwargs): mtt_captured_control_deps = set() for t in threads: mtt_captured_control_deps.update(t.captured_control_deps) - with ops.name_scope(mtt_captured_name_scope),\ - ops.control_dependencies(mtt_captured_control_deps), \ - variable_scope.variable_scope(mtt_captured_var_scope): + + # Control is transfered from _MirroredReplicaThread (MRT) to the main + # thread, i.e., here, to perform `merge_fn`, and thus we preserve the + # name scope, control dependencies, etc. from MRT at the time + # `merge_call` is made. + # One special case is that the `merge_call` is made under an + # `tf.init_scope` in the MRT. `tf.init_scope` will clear control + # dependencies, pause gradient tape, and enter the lowest context on + # the `context_stack` that is not building a graph function. Entering + # the lowest context could be one of the two things: installation of a + # graph as the default graph or switch into eager mode. If the former + # is done and causes `merge_call` to be called in a different graph + # from the one in which `call_for_each_replica` is called, we do not + # allow this case (see comment in `_merge_call`) and we would not have + # arrived here due to the assertion in `_merge_call`. However, if the + # latter is done, we want to make sure the main thread enter an eager + # mode scope as well so that `merge_fn` does not have trouble + # accessing resources defined in MRT under the same context. + with ops.name_scope( + mtt_captured_name_scope), ops.control_dependencies( + mtt_captured_control_deps), variable_scope.variable_scope( + mtt_captured_var_scope), _maybe_enter_eager_mode( + threads[0].merge_call_entered_in_eager): merge_result = threads[0].merge_fn(distribution, *merge_args, **merge_kwargs) for r, t in enumerate(threads): @@ -438,6 +467,8 @@ class _MirroredReplicaContext(distribute_lib.ReplicaContext): t.captured_var_scope = variable_scope.get_variable_scope() t.captured_control_deps = t.graph._current_control_dependencies() # pylint: disable=protected-access + t.merge_call_entered_in_eager = context.context().executing_eagerly() + # It is problematic if `merge_call` is called under a different graph other # than the one that `_call_for_each_replica` is called under, there are # 3 cases this can happen: @@ -488,6 +519,7 @@ class _MirroredReplicaContext(distribute_lib.ReplicaContext): t.should_run.clear() if t.coord.should_stop(): raise _RequestedStop() + t.merge_call_entered_in_eager = None return t.merge_result @property ",0,train 6e7e5836af7bd09bed2c271938c4373a891e0d0a,tensorflow/tensorflow,"Preserve context info when entering merge_call. PiperOrigin-RevId: 433130849",strategy_common_test.py,"@@ -63,6 +63,29 @@ class StrategyTest(test.TestCase, parameterized.TestCase): g() + def testMergeCallInitScope(self, strategy): + with strategy.scope(): + + @def_function.function + def fn(): + + def merge_fn(unused_strat): + + y = constant_op.constant(11) + return y + + def replica_fn(): + + with ops.init_scope(): + y = ds_context.get_replica_context().merge_call(merge_fn) + z = y + 1 + return z + + return strategy.run(replica_fn) + + result = strategy.experimental_local_results(fn()) + self.assertAllClose(result, [12] * _get_num_replicas_per_client(strategy)) + @combinations.generate( combinations.combine( ",0,train e25c7a82285f22e9a99153f094222ea41fae8fe6,tensorflow/tensorflow,TST: check num of fearues and targets,numpy_io_test.py,"@@ -294,20 +294,22 @@ class NumpyIoTest(test.TestCase): with self.test_session() as session: input_fn = numpy_io.numpy_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) - features, target = input_fn() + features_tensor, targets_tensor = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) - res = session.run([features, target]) - self.assertAllEqual(res[0]['a'], [0, 1]) - self.assertAllEqual(res[0]['b'], [32, 33]) - self.assertAllEqual(res[1]['y1'], [-32, -31]) - self.assertAllEqual(res[1]['y2'], [32, 31]) + features, targets = session.run([features_tensor, targets_tensor]) + self.assertEqual(len(features), 2) + self.assertAllEqual(features['a'], [0, 1]) + self.assertAllEqual(features['b'], [32, 33]) + self.assertEqual(len(targets), 2) + self.assertAllEqual(targets['y1'], [-32, -31]) + self.assertAllEqual(targets['y2'], [32, 31]) - session.run([features, target]) + session.run([features_tensor, targets_tensor]) with self.assertRaises(errors.OutOfRangeError): - session.run([features, target]) + session.run([features_tensor, targets_tensor]) coord.request_stop() coord.join(threads) ",0,train c28b38d452ec2b89c330d5c2fabea956a029f348,tensorflow/tensorflow,"[kernel_gen] Add generation of tf.SquaredDifference Disabled benchmark for now as it is very slow without fusion. PiperOrigin-RevId: 354496217 Change-Id: I72216cf8c2599afecd61bbac9dc43dda6cd345f4",cwise_op_gpu_squared_difference.cu.cc,"@@ -19,7 +19,10 @@ limitations under the License. namespace tensorflow { namespace functor { +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED) DEFINE_BINARY4(squared_difference, float, Eigen::half, double, int64); +#endif } // namespace functor } // namespace tensorflow ",0,train c28b38d452ec2b89c330d5c2fabea956a029f348,tensorflow/tensorflow,"[kernel_gen] Add generation of tf.SquaredDifference Disabled benchmark for now as it is very slow without fusion. PiperOrigin-RevId: 354496217 Change-Id: I72216cf8c2599afecd61bbac9dc43dda6cd345f4",cwise_op_squared_difference.cc,"@@ -20,9 +20,12 @@ REGISTER8(BinaryOp, CPU, ""SquaredDifference"", functor::squared_difference, float, Eigen::half, double, bfloat16, int32, int64, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED) REGISTER4(BinaryOp, GPU, ""SquaredDifference"", functor::squared_difference, float, Eigen::half, double, int64); #endif +#endif // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel @@ -36,5 +39,4 @@ REGISTER_KERNEL_BUILDER( .TypeConstraint(""T""), BinaryOp>); - } // namespace tensorflow ",0,train c28b38d452ec2b89c330d5c2fabea956a029f348,tensorflow/tensorflow,"[kernel_gen] Add generation of tf.SquaredDifference Disabled benchmark for now as it is very slow without fusion. PiperOrigin-RevId: 354496217 Change-Id: I72216cf8c2599afecd61bbac9dc43dda6cd345f4",gpu_binary_ops_test.cc,"@@ -843,6 +843,22 @@ GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES( /*test_name=*/Int64, int64, int64, test::DefaultInput(), test::DefaultInputLessThanBitwidth(), baseline_right_shift) +/// Test `tf.SquaredDifference`. + +template +T baseline_squared_difference(T lhs, T rhs) { + return (lhs - rhs) * (lhs - rhs); +} + +GENERATE_DEFAULT_TESTS(SquaredDifference, /*test_name=*/Half, Eigen::half, + Eigen::half, baseline_squared_difference) +GENERATE_DEFAULT_TESTS(SquaredDifference, /*test_name=*/Float, float, float, + baseline_squared_difference) +GENERATE_DEFAULT_TESTS(SquaredDifference, /*test_name=*/Double, double, double, + baseline_squared_difference) +GENERATE_DEFAULT_TESTS(SquaredDifference, /*test_name=*/Int64, int64, int64, + baseline_squared_difference) + /// Test `tf.Sub`. template ",0,train c28b38d452ec2b89c330d5c2fabea956a029f348,tensorflow/tensorflow,"[kernel_gen] Add generation of tf.SquaredDifference Disabled benchmark for now as it is very slow without fusion. PiperOrigin-RevId: 354496217 Change-Id: I72216cf8c2599afecd61bbac9dc43dda6cd345f4",gpu_op_squared_difference.cc,"@@ -0,0 +1,26 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" +#include ""tensorflow/core/kernels/mlir_generated/gpu_ops_base.h"" + +namespace tensorflow { + +GENERATE_AND_REGISTER_BINARY_KERNEL(SquaredDifference, f16, DT_HALF, + Eigen::half); +GENERATE_AND_REGISTER_BINARY_KERNEL(SquaredDifference, f32, DT_FLOAT, float); +GENERATE_AND_REGISTER_BINARY_KERNEL(SquaredDifference, f64, DT_DOUBLE, double); +GENERATE_AND_REGISTER_BINARY_KERNEL(SquaredDifference, i64, DT_INT64, int64); + +} // namespace tensorflow ",0,train 6c1f11a557add7f836751361f26caf2e0062d509,tensorflow/tensorflow,"Minor fix to include order. PiperOrigin-RevId: 312298890 Change-Id: I3ae60f2d4c5f6c92aa165c7fa1263445c4a98a6d",text_vectorization_test.py,"@@ -37,9 +37,9 @@ from tensorflow.python.keras import testing_utils from tensorflow.python.keras.layers import convolutional from tensorflow.python.keras.layers import core from tensorflow.python.keras.layers import embeddings +from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils from tensorflow.python.keras.layers.preprocessing import text_vectorization from tensorflow.python.keras.layers.preprocessing import text_vectorization_v1 -from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils from tensorflow.python.keras.saving import saved_model_experimental as saving from tensorflow.python.keras.utils import generic_utils from tensorflow.python.keras.utils.generic_utils import CustomObjectScope ",0,test 4c01f5d82f0eb3f8f585d87676d759c78a7fa5cf,tensorflow/tensorflow,"[tf.data] Fix the bug when restoring the iterator, the buffered elements are not correctly recorded for ParallelInterleaveDataset. PiperOrigin-RevId: 356627548 Change-Id: I8bb0ea293d468453790bfdb996fbf428ba7cf71d",parallel_interleave_dataset_op.cc,"@@ -1278,6 +1278,7 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { absl::StrCat(kResultsSuffix, ""["", i, ""]["", j, ""]""), &result->return_values.back())); } + RecordBufferEnqueue(ctx, result->return_values); element->results[i] = std::move(result); } if (!reader->Contains(iterator_name, @@ -1339,6 +1340,9 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { TF_RETURN_IF_ERROR( ReadElementsParallel(ctx, reader, size, kCurrentElements, &elements)); mutex_lock l(*mu_); + for (auto& element : current_elements_) { + DCHECK(element == nullptr); + } for (int idx = 0; idx < size; ++idx) { current_elements_[idx] = std::move(elements[idx]); } @@ -1361,6 +1365,9 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { TF_RETURN_IF_ERROR( ReadElementsParallel(ctx, reader, size, kFutureElements, &elements)); mutex_lock l(*mu_); + for (auto& element : future_elements_) { + DCHECK(element == nullptr); + } for (int idx = 0; idx < size; ++idx) { future_elements_[idx] = std::move(elements[idx]); } ",0,test ed408b579eba3844dda9a96ae57fffb0f2c4d10d,tensorflow/tensorflow,"Generate the C++ header files. PiperOrigin-RevId: 337399875 Change-Id: I251463719569aa261f5c2768a7a5c045d192bfbc",build_cc_api_headers.py,"@@ -0,0 +1,63 @@ +# Lint as: python3 +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""""""Generate Java reference docs for TensorFlow.org."""""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import pathlib +import subprocess + +from absl import app +from absl import flags + +FLAGS = flags.FLAGS + +# These flags are required by infrastructure, not all of them are used. +flags.DEFINE_string('output_dir', None, + (""Use this branch as the root version and don't"" + ' create in version directory')) + +# __file__ is the path to this file +DOCS_TOOLS_DIR = pathlib.Path(__file__).resolve().parent +TENSORFLOW_ROOT = DOCS_TOOLS_DIR.parents[2] + + +def build_headers(output_dir): + """"""Builds the headers files for TF."""""" + + # `$ yes | configure` + yes = subprocess.Popen(['yes', ''], stdout=subprocess.PIPE) + configure = subprocess.Popen([TENSORFLOW_ROOT / 'configure'], + stdin=yes.stdout, + cwd=TENSORFLOW_ROOT) + configure.communicate() + + subprocess.check_call(['bazel', 'build', 'tensorflow/cc:cc_ops'], + cwd=TENSORFLOW_ROOT) + subprocess.check_call( + ['cp', '--dereference', '-r', 'bazel-bin', output_dir / 'bazel-genfiles'], + cwd=TENSORFLOW_ROOT) + + +def main(argv): + del argv + build_headers(pathlib.Path(FLAGS.output_dir)) + + +if __name__ == '__main__': + flags.mark_flags_as_required(['output_dir']) + app.run(main) ",0,train 9c4dd646b711ae9c65543a2d14e53f12dc4bcafc,tensorflow/tensorflow,"Replace llvm::sys::fs::F_{None,Text} with llvm::sys::fs::OF_{None,Text} The former are deprecated aliases which will be removed by https://reviews.llvm.org/D101506 PiperOrigin-RevId: 371371581 Change-Id: I860fac466c3415655bea048f9ee7042ad882999a",dump_ir_pass.cc,"@@ -48,7 +48,7 @@ class DumpIrPass : public llvm::FunctionPass { bool doInitialization(llvm::Module &M) override { out_.reset(new llvm::raw_fd_ostream(llvm::StringRef(output_filename_), ec_, - llvm::sys::fs::F_None)); + llvm::sys::fs::OF_None)); if (ec_) { LOG(FATAL) << ""Unable to open "" << output_filename_ << "" to dump LLVM IR: "" << ec_.message(); ",0,train 9c4dd646b711ae9c65543a2d14e53f12dc4bcafc,tensorflow/tensorflow,"Replace llvm::sys::fs::F_{None,Text} with llvm::sys::fs::OF_{None,Text} The former are deprecated aliases which will be removed by https://reviews.llvm.org/D101506 PiperOrigin-RevId: 371371581 Change-Id: I860fac466c3415655bea048f9ee7042ad882999a",gpu_backend_lib.cc,"@@ -216,7 +216,7 @@ void AddOptimizationPasses(unsigned opt_level, unsigned size_level, void EmitBitcodeToFile(const llvm::Module& module, absl::string_view filename) { std::error_code error_code; llvm::ToolOutputFile outfile(string(filename).c_str(), error_code, - llvm::sys::fs::F_None); + llvm::sys::fs::OF_None); if (error_code) { LOG(FATAL) << ""opening bitcode file for writing: "" << error_code.message(); } @@ -696,7 +696,7 @@ StatusOr> EmitModuleToHsaco( // Dump LLVM IR. std::unique_ptr ir_fs( - new llvm::raw_fd_ostream(ir_path, ec, llvm::sys::fs::F_None)); + new llvm::raw_fd_ostream(ir_path, ec, llvm::sys::fs::OF_None)); module->print(*ir_fs, nullptr); ir_fs->flush(); @@ -713,7 +713,7 @@ StatusOr> EmitModuleToHsaco( llvm::SmallVector stream; llvm::raw_svector_ostream pstream(stream); std::unique_ptr isabin_fs( - new llvm::raw_fd_ostream(isabin_path, ec, llvm::sys::fs::F_Text)); + new llvm::raw_fd_ostream(isabin_path, ec, llvm::sys::fs::OF_Text)); module->setDataLayout(target_machine->createDataLayout()); target_machine->addPassesToEmitFile(codegen_passes, *isabin_fs, nullptr, llvm::CGFT_ObjectFile); @@ -722,7 +722,7 @@ StatusOr> EmitModuleToHsaco( if (keep_tempfiles) { std::unique_ptr ir_fs( - new llvm::raw_fd_ostream(ir_opt_path, ec, llvm::sys::fs::F_None)); + new llvm::raw_fd_ostream(ir_opt_path, ec, llvm::sys::fs::OF_None)); module->print(*ir_fs, nullptr); ir_fs->flush(); } ",0,train 05bef43400c83ca6d27617e27f1661e14f5f05e8,tensorflow/tensorflow,"Do not constant fold nodes with DT_VARIANT type outputs in XLA. XLA does not support Const nodes of type Variant. It needs to see the source ops for the Variant operations to be able to build its own representation. PiperOrigin-RevId: 229801068",encapsulate_subgraphs_pass.cc,"@@ -2535,7 +2535,33 @@ Status EncapsulateSubgraphsPass::Run( std::vector* input_permutation, std::vector* output_permutation, NodeDef* node) { // Optimize the subgraph. - OptimizeGraph(flr, subgraph); + // Do not constant fold nodes that output DT_VARIANT type tensors. + // XLA does not support Const nodes of Variant type since it needs + // to know the original ops to be able to compile them to the relevant + // XLA form. + // TODO(srbs): This filter is a little conservative. E.g. a subgraph of + // the form: + // Const + // | + // EmptyTensorList -> TensorListPushBack -> TensorListPopBack -> Op + // | + // (Discard popped list) + // + // Would have been reduced to ""Const -> Op"" without this filter. + // However since we are only allowed to specify the filter at the ""Node"" + // level there is no good way to allow the above behavior. So we + // disallow any sort of constant folding on Variant nodes for now. + auto cf_consider_fn = [](const Node* n) { + for (const auto& output_arg : n->op_def().output_arg()) { + if (output_arg.type() == DT_VARIANT) { + return false; + } + } + return true; + }; + GraphOptimizer::Options graph_optimizer_options; + graph_optimizer_options.cf_consider_fn = cf_consider_fn; + OptimizeGraph(flr, subgraph, graph_optimizer_options); const int num_args = input_permutation->size(); std::vector const_args(num_args); ",0,train 05bef43400c83ca6d27617e27f1661e14f5f05e8,tensorflow/tensorflow,"Do not constant fold nodes with DT_VARIANT type outputs in XLA. XLA does not support Const nodes of type Variant. It needs to see the source ops for the Variant operations to be able to build its own representation. PiperOrigin-RevId: 229801068",tensor_list_ops_test.py,"@@ -48,24 +48,39 @@ class ListOpsTest(xla_test.XLATestCase): def testPushPop(self): with self.cached_session() as sess, self.test_scope(): - num = array_ops.placeholder(dtypes.int32) l = list_ops.tensor_list_reserve( - element_shape=(7, 15), num_elements=num, element_dtype=dtypes.float32) + element_shape=(7, 15), num_elements=10, element_dtype=dtypes.float32) l = list_ops.tensor_list_push_back( l, constant_op.constant(1.0, shape=(7, 15))) l = list_ops.tensor_list_push_back( l, constant_op.constant(2.0, shape=(7, 15))) l, e2 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) _, e1 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) - self.assertAllEqual(sess.run(e2, {num: 10}), 2.0 * np.ones((7, 15))) - self.assertAllEqual(sess.run(e1, {num: 10}), 1.0 * np.ones((7, 15))) + self.assertAllEqual(sess.run(e2), 2.0 * np.ones((7, 15))) + self.assertAllEqual(sess.run(e1), 1.0 * np.ones((7, 15))) + + def testDoNotConstantFoldVariants(self): + with self.cached_session() as sess, self.test_scope(): + val = array_ops.placeholder(dtype=dtypes.float32) + l = list_ops.tensor_list_reserve( + element_shape=(7, 15), num_elements=10, element_dtype=dtypes.float32) + # Note: Pushing a Placeholder will force the constant folding code + # to build a Const node with a DT_VARIANT output. This tests that XLA + # passes a cf_consider_fn which prevent folding such nodes. + l = list_ops.tensor_list_push_back( + l, array_ops.fill(value=val, dims=(7, 15))) + l = list_ops.tensor_list_push_back( + l, constant_op.constant(2.0, shape=(7, 15))) + l, e2 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) + _, e1 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) + self.assertAllEqual(sess.run(e2, {val: 1.0}), 2.0 * np.ones((7, 15))) + self.assertAllEqual(sess.run(e1, {val: 1.0}), 1.0 * np.ones((7, 15))) def testPushPopSeparateLists(self): with self.cached_session() as sess, self.test_scope(): - num = array_ops.placeholder(dtypes.int32) l = list_ops.tensor_list_reserve( element_shape=scalar_shape(), - num_elements=num, + num_elements=20, element_dtype=dtypes.float32) l = list_ops.tensor_list_push_back(l, constant_op.constant(1.0)) l2 = list_ops.tensor_list_push_back(l, constant_op.constant(2.0)) @@ -75,7 +90,7 @@ class ListOpsTest(xla_test.XLATestCase): l2, e22 = list_ops.tensor_list_pop_back(l2, element_dtype=dtypes.float32) l3, e31 = list_ops.tensor_list_pop_back(l3, element_dtype=dtypes.float32) l3, e32 = list_ops.tensor_list_pop_back(l3, element_dtype=dtypes.float32) - result = sess.run([e11, [e21, e22], [e31, e32]], {num: 20}) + result = sess.run([e11, [e21, e22], [e31, e32]]) self.assertEqual(result, [1.0, [2.0, 1.0], [3.0, 1.0]]) def testEmptyTensorList(self): ",0,train 05bef43400c83ca6d27617e27f1661e14f5f05e8,tensorflow/tensorflow,"Do not constant fold nodes with DT_VARIANT type outputs in XLA. XLA does not support Const nodes of type Variant. It needs to see the source ops for the Variant operations to be able to build its own representation. PiperOrigin-RevId: 229801068",xla_compiler.cc,"@@ -462,8 +462,34 @@ std::unique_ptr XlaCompiler::GetGraph(const FunctionBody* fbody) { opts.set_do_function_inlining(true); opts.set_do_constant_folding(true); GraphOptimizer optimizer(opts); + // Do not constant fold nodes that output DT_VARIANT type tensors. + // XLA does not support Const nodes of Variant type since it needs + // to know the original ops to be able to compile them to the relevant + // XLA form. + // TODO(srbs): This filter is a little conservative. E.g. a subgraph of + // the form: + // Const + // | + // EmptyTensorList -> TensorListPushBack -> TensorListPopBack -> Op + // | + // (Discard popped list) + // + // Would have been reduced to ""Const -> Op"" without this filter. + // However since we are only allowed to specify the filter at the ""Node"" + // level there is no good way to allow the above behavior. So we + // disallow any sort of constant folding on Variant nodes for now. + auto cf_consider_fn = [](const Node* n) { + for (const auto& output_arg : n->op_def().output_arg()) { + if (output_arg.type() == DT_VARIANT) { + return false; + } + } + return true; + }; + GraphOptimizer::Options graph_optimizer_options; + graph_optimizer_options.cf_consider_fn = cf_consider_fn; optimizer.Optimize(flib_runtime_, flib_runtime_->env(), - /*device=*/nullptr, &graph, /*shape_map=*/nullptr); + /*device=*/nullptr, &graph, graph_optimizer_options); return graph; } ",0,train 05bef43400c83ca6d27617e27f1661e14f5f05e8,tensorflow/tensorflow,"Do not constant fold nodes with DT_VARIANT type outputs in XLA. XLA does not support Const nodes of type Variant. It needs to see the source ops for the Variant operations to be able to build its own representation. PiperOrigin-RevId: 229801068",function.cc,"@@ -786,13 +786,19 @@ void DumpGraph(StringPiece label, const Graph* g) { } } -void OptimizeGraph(FunctionLibraryRuntime* lib, std::unique_ptr* g) { +void OptimizeGraph(FunctionLibraryRuntime* lib, std::unique_ptr* g, + const GraphOptimizer::Options& graph_optimizer_options) { OptimizerOptions opts; opts.set_do_common_subexpression_elimination(true); opts.set_do_function_inlining(true); opts.set_do_constant_folding(true); GraphOptimizer optimizer(opts); - optimizer.Optimize(lib, lib->env(), lib->device(), g, /*shape_map=*/nullptr); + optimizer.Optimize(lib, lib->env(), lib->device(), g, + graph_optimizer_options); +} + +void OptimizeGraph(FunctionLibraryRuntime* lib, std::unique_ptr* g) { + OptimizeGraph(lib, g, GraphOptimizer::Options()); } namespace { ",0,train 05bef43400c83ca6d27617e27f1661e14f5f05e8,tensorflow/tensorflow,"Do not constant fold nodes with DT_VARIANT type outputs in XLA. XLA does not support Const nodes of type Variant. It needs to see the source ops for the Variant operations to be able to build its own representation. PiperOrigin-RevId: 229801068",function.h,"@@ -21,6 +21,7 @@ limitations under the License. #include ""tensorflow/core/common_runtime/device.h"" #include ""tensorflow/core/common_runtime/device_mgr.h"" +#include ""tensorflow/core/common_runtime/graph_optimizer.h"" #include ""tensorflow/core/common_runtime/process_function_library_runtime.h"" #include ""tensorflow/core/framework/function.h"" #include ""tensorflow/core/graph/graph.h"" @@ -133,6 +134,8 @@ void DumpGraph(StringPiece label, const Graph* g); // OptimizeGraph mutates **g extensively and replaces '*g' with a // complete copy. Therefore, the caller should not keep any references // to nodes *g. +void OptimizeGraph(FunctionLibraryRuntime* lib, std::unique_ptr* g, + const GraphOptimizer::Options& graph_optimizer_options); void OptimizeGraph(FunctionLibraryRuntime* lib, std::unique_ptr* g); // Convert the Graph of a function to a GraphDef. ",0,train 05bef43400c83ca6d27617e27f1661e14f5f05e8,tensorflow/tensorflow,"Do not constant fold nodes with DT_VARIANT type outputs in XLA. XLA does not support Const nodes of type Variant. It needs to see the source ops for the Variant operations to be able to build its own representation. PiperOrigin-RevId: 229801068",graph_optimizer.cc,"@@ -38,8 +38,7 @@ void GraphOptimizer::Optimize( std::unique_ptr* graph, const std::unordered_map>* shape_map, - const std::function& cse_consider_fn, - const std::function& cf_consider_fn) { + const NodePredicate& cse_consider_fn, const NodePredicate& cf_consider_fn) { Graph* g = graph->get(); DumpGraph(""Initial"", g); @@ -103,4 +102,11 @@ void GraphOptimizer::Optimize( DumpGraph(""ReCopy"", graph->get()); } +void GraphOptimizer::Optimize(FunctionLibraryRuntime* runtime, Env* env, + Device* device, std::unique_ptr* graph, + const Options& options) { + Optimize(runtime, env, device, graph, options.shape_map, + options.cse_consider_fn, options.cf_consider_fn); +} + } // end namespace tensorflow ",0,train 05bef43400c83ca6d27617e27f1661e14f5f05e8,tensorflow/tensorflow,"Do not constant fold nodes with DT_VARIANT type outputs in XLA. XLA does not support Const nodes of type Variant. It needs to see the source ops for the Variant operations to be able to build its own representation. PiperOrigin-RevId: 229801068",graph_optimizer.h,"@@ -26,6 +26,28 @@ namespace tensorflow { class GraphOptimizer { public: + using NodePredicate = std::function; + + struct Options { + // If not null it maps from nodes in graph to partially-known + // shapes of their outputs, and may be used, e.g., in the constant folding + // pass. The use of shape_map implies that the mapping from node name to the + // vector of partial shapes of its outputs is stable, i.e., no optimization + // pass may replace a node with a different node of the same name that has a + // different number of outputs, or outputs with different known shapes. + // TODO(b/65453533) introduce a unique way to name nodes in a graph. + std::unordered_map>* shape_map = + nullptr; + + // If not null then only nodes for which cse_consider_fn returns true will + // be considered for CSE. + NodePredicate cse_consider_fn = nullptr; + + // If not null then only nodes for which cf_consider_fn returns true will be + // considered for CF. + NodePredicate cf_consider_fn = nullptr; + }; + GraphOptimizer(const OptimizerOptions& opts); ~GraphOptimizer(); @@ -34,26 +56,17 @@ class GraphOptimizer { // on which the 'graph' will execute. It's passed to the optimizers // so that they can respect constraints if any, that should be // respected. - // - // If shape_map is not null it maps from nodes in graph to partially-known - // shapes of their outputs, and may be used, e.g., in the constant folding - // pass. The use of shape_map implies that the mapping from node name to the - // vector of partial shapes of its outputs is stable, i.e., no optimization - // pass may replace a node with a different node of the same name that has a - // different number of outputs, or outputs with different known shapes. - // TODO(b/65453533) introduce a unique way to name nodes in a graph. - // - // If cse_consider_fn is not null then only nodes for which cse_consider_fn - // returns true will be considered for CSE. - // If cf_consider_fn is not null then only nodes for which cf_consider_fn - // returns true will be considered for CF. + void Optimize(FunctionLibraryRuntime* runtime, Env* env, Device* device, + std::unique_ptr* graph, + const Options& graph_optimizer_options); + // DEPRECATED: Consider passing a GraphOptimizer::Options object instead. void Optimize( FunctionLibraryRuntime* runtime, Env* env, Device* device, std::unique_ptr* graph, const std::unordered_map>* shape_map, - const std::function& cse_consider_fn = nullptr, - const std::function& cf_consider_fn = nullptr); + const NodePredicate& cse_consider_fn = nullptr, + const NodePredicate& cf_consider_fn = nullptr); const OptimizerOptions& options() { return opts_; } ",0,train ccee426384468b152aba22e1a9f9a3fd2f92bf00,tensorflow/tensorflow,"Fix use of uninitialized memory in BFCAllocator. PiperOrigin-RevId: 367554433 Change-Id: I5688685303e4b845a477feac371d7c62b7d0c8c8",bfc_allocator.cc,"@@ -45,7 +45,8 @@ BFCAllocator::BFCAllocator(SubAllocator* sub_allocator, size_t total_memory, sub_allocator_(sub_allocator), name_(name), free_chunks_list_(kInvalidChunkHandle), - next_allocation_id_(1) { + next_allocation_id_(1), + action_counter_(0) { if (allow_growth) { // 2MiB smallest initial allocation, unless total memory available // is less. ",0,train ab0cbb3cc082a1b4dde989552ccd73986566f57a,tensorflow/tensorflow,"Refine tf.const in TF shape inference. PiperOrigin-RevId: 307726788 Change-Id: I7bb1ede57d9c27b191078f7533fad5975f1e713d",shape_inference.cc,"@@ -274,6 +274,15 @@ bool InferShapeForCall(Operation* op) { return changed; } +bool RefineTfConst(TF::ConstOp const_op) { + Type old_type = const_op.getType(); + if (const_op.valueAttr().getType() == old_type) return false; + const_op.getResult().setType(const_op.valueAttr().getType()); + AddCastBackForUnsupportedNonTFUses(const_op, const_op.getResult(), + const_op.getDialect(), old_type); + return true; +} + } // namespace bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect, @@ -622,6 +631,13 @@ LogicalResult InferShapeUntilFixPoint(Region* region, int64_t graph_version, return; } + if (auto tf_const = dyn_cast(op)) { + changed |= RefineTfConst(tf_const); + // TODO(jpienaar): Debug why we can't just return here. We end up with + // additional constant due to the propagation of constant into attached + // function if we return already. + } + // Before attempting inference, just try to fold the operation. if (succeeded(folder.tryToFold(op))) return; ",0,train f13063ee503653e0693a1eee461b55e22c14a7a0,tensorflow/tensorflow,"Lower mlir.gpu.memcpy to tfrt_gpu.mem.copy. PiperOrigin-RevId: 392411612 Change-Id: I979a86fe7294e7155d51efd47802a6dd20acdf53",gpu_passes.cc,"@@ -37,6 +37,7 @@ limitations under the License. #include ""tensorflow/compiler/mlir/tfrt/transforms/lhlo_gpu_to_tfrt_gpu/PassDetail.h"" #include ""tensorflow/compiler/mlir/tfrt/transforms/lhlo_gpu_to_tfrt_gpu/ccl_pattern.h"" #include ""tensorflow/compiler/mlir/tfrt/transforms/lhlo_gpu_to_tfrt_gpu/gemm_pattern.h"" +#include ""tensorflow/compiler/mlir/tfrt/transforms/lhlo_gpu_to_tfrt_gpu/memcpy_pattern.h"" #include ""tensorflow/compiler/xla/service/gpu/xlir_ops.h"" #include ""tfrt/gpu/kernels/gpu_ops.h"" // from @tf_runtime #include ""tfrt/gpu/pass/pass.h"" // from @tf_runtime @@ -57,7 +58,8 @@ struct LmhloGpuAsyncConversionPass converter.addConversion([&](BaseMemRefType) { return buffer_type; }); ConversionTarget target(*context); - target.addIllegalDialect(); + target + .addIllegalDialect(); target.addLegalDialect(); target.addDynamicallyLegalOp([&](FuncOp op) { @@ -72,10 +74,12 @@ struct LmhloGpuAsyncConversionPass RewritePatternSet patterns(context); populateCclConversionPattern(patterns); populateGemmConversionPattern(patterns); + populateMemcpyConversionPattern(patterns); populateFuncOpTypeConversionPattern(patterns, converter); ConversionTarget wrap_target(*context); - wrap_target.addLegalDialect(); + wrap_target + .addLegalDialect(); wrap_target.addLegalOp(); tfrt::gpu::populateGpuAsyncConversionPatterns(patterns, converter, ",0,train f13063ee503653e0693a1eee461b55e22c14a7a0,tensorflow/tensorflow,"Lower mlir.gpu.memcpy to tfrt_gpu.mem.copy. PiperOrigin-RevId: 392411612 Change-Id: I979a86fe7294e7155d51efd47802a6dd20acdf53",memcpy_pattern.cc,"@@ -0,0 +1,88 @@ +// Copyright 2020 The TensorFlow Runtime Authors +// +// Licensed under the Apache License, Version 2.0 (the ""License""); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an ""AS IS"" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//===- memcpy_pattern.cc +//---------------------------------------------------------===// +// +// Pattern to lower mlir::gpu::memcpy Ops to tfrt cuda dialect. +// +//===----------------------------------------------------------------------===// +#include ""tensorflow/compiler/mlir/tfrt/transforms/lhlo_gpu_to_tfrt_gpu/memcpy_pattern.h"" + +#include +#include + +#include +#include + +#include ""mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.h"" +#include ""mlir/IR/BuiltinAttributes.h"" +#include ""mlir/IR/Location.h"" +#include ""mlir/IR/Types.h"" +#include ""mlir/Transforms/DialectConversion.h"" +#include ""llvm/ADT/ArrayRef.h"" +#include ""llvm/ADT/StringRef.h"" +#include ""mlir/Dialect/StandardOps/IR/Ops.h"" // from @llvm-project +#include ""mlir/IR/BlockAndValueMapping.h"" // from @llvm-project +#include ""mlir/IR/Value.h"" // from @llvm-project +#include ""mlir/Support/LogicalResult.h"" // from @llvm-project +#include ""tensorflow/compiler/mlir/xla/type_to_shape.h"" +#include ""tensorflow/compiler/xla/layout_util.h"" +#include ""tensorflow/compiler/xla/shape.h"" +#include ""tfrt/gpu/kernels/gpu_ops.h"" // from @tf_runtime +#include ""tfrt/gpu/pass/pass.h"" // from @tf_runtime +#include ""tfrt/gpu/wrapper/cublas_wrapper.h"" // from @tf_runtime +#include ""tfrt/basic_kernels/opdefs/basic_kernels.h"" // from @tf_runtime +#include ""tfrt/basic_kernels/opdefs/types.h"" // from @tf_runtime + +namespace tensorflow { +namespace { + +using llvm::ArrayRef; + +// Creates tfrt::gpu::MemCopyOp from mlir::gpu::MemcpyOp. +struct MemcpyRewritePattern + : tfrt::gpu::GpuAsyncOpConversionPattern { + using tfrt::gpu::GpuAsyncOpConversionPattern< + mlir::gpu::MemcpyOp>::GpuAsyncOpConversionPattern; + + FailureOr matchAndRewriteOp( + mlir::gpu::MemcpyOp op, Value chain, Value stream, + ArrayRef operands, + ConversionPatternRewriter& rewriter) const override { + if (!all_of(operands, [](Value operand) { + return operand.getType().isa(); + })) + return rewriter.notifyMatchFailure(op, ""expected buffer operands""); + + BlockAndValueMapping mapping; + for (auto pair : llvm::zip_first(op->getOperands(), operands)) + mapping.map(std::get<0>(pair), std::get<1>(pair)); + + rewriter.eraseOp(op); + + return rewriter + .create(op.getLoc(), mapping.lookup(op.dst()), + mapping.lookup(op.src()), stream, chain) + .getResult(); + } +}; + +} // namespace + +void populateMemcpyConversionPattern(RewritePatternSet& patterns) { + patterns.add(patterns.getContext()); +} + +} // namespace tensorflow ",0,train f13063ee503653e0693a1eee461b55e22c14a7a0,tensorflow/tensorflow,"Lower mlir.gpu.memcpy to tfrt_gpu.mem.copy. PiperOrigin-RevId: 392411612 Change-Id: I979a86fe7294e7155d51efd47802a6dd20acdf53",memcpy_pattern.h,"@@ -0,0 +1,33 @@ +// Copyright 2020 The TensorFlow Runtime Authors +// +// Licensed under the Apache License, Version 2.0 (the ""License""); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an ""AS IS"" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORFLOW_COMPILER_MLIR_TFRT_TRANSFORMS_LHLO_GPU_TO_TFRT_GPU_MEMCPY_PATTERN_H_ +#define TENSORFLOW_COMPILER_MLIR_TFRT_TRANSFORMS_LHLO_GPU_TO_TFRT_GPU_MEMCPY_PATTERN_H_ + +#include ""mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.h"" +#include ""mlir/IR/BlockAndValueMapping.h"" +#include ""mlir/Transforms/DialectConversion.h"" +#include ""llvm/ADT/ArrayRef.h"" +#include ""llvm/ADT/StringRef.h"" +#include ""mlir/Dialect/GPU/GPUDialect.h"" // from @llvm-project + +namespace tensorflow { + +// Add a pattern to the given pattern list to convert from mlir::gpu::MemcpyOp +// to tfrt::gpu::MemCopyOp. +void populateMemcpyConversionPattern(mlir::RewritePatternSet& patterns); + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_MLIR_TFRT_TRANSFORMS_LHLO_GPU_TO_TFRT_GPU_MEMCPY_PATTERN_H_ ",0,train f13063ee503653e0693a1eee461b55e22c14a7a0,tensorflow/tensorflow,"Lower mlir.gpu.memcpy to tfrt_gpu.mem.copy. PiperOrigin-RevId: 392411612 Change-Id: I979a86fe7294e7155d51efd47802a6dd20acdf53",bef_thunk.cc,"@@ -213,6 +213,9 @@ static StatusOr GetThunkKind(mlir::Operation* op) { if (mlir::isa(op)) { return Thunk::Kind::kGemm; } + if (mlir::isa(op)) { + return Thunk::Kind::kMemcpy; + } if (mlir::isa(op)) { return Thunk::Kind::kNcclAllGather; } ",0,train f13063ee503653e0693a1eee461b55e22c14a7a0,tensorflow/tensorflow,"Lower mlir.gpu.memcpy to tfrt_gpu.mem.copy. PiperOrigin-RevId: 392411612 Change-Id: I979a86fe7294e7155d51efd47802a6dd20acdf53",mlir_gpu_test_base.cc,"@@ -135,7 +135,7 @@ MlirGpuTestBase::RunMlirModuleWithHostBuffers( StatusOr MlirGpuTestBase::ParseMlirModule( absl::string_view module_text, mlir::MLIRContext& context) { context.loadDialect(); llvm::SourceMgr source_mgr; std::string diagnostic_str; ",0,train f13063ee503653e0693a1eee461b55e22c14a7a0,tensorflow/tensorflow,"Lower mlir.gpu.memcpy to tfrt_gpu.mem.copy. PiperOrigin-RevId: 392411612 Change-Id: I979a86fe7294e7155d51efd47802a6dd20acdf53",thunk.cc,"@@ -70,6 +70,8 @@ StatusOr Thunk::ExecuteParams::GetGlobalDeviceId() const { return ""kInfeed""; case Thunk::kKernel: return ""kKernel""; + case Thunk::kMemcpy: + return ""kMemcpy""; case Thunk::kMemset32BitValue: return ""kMemset32BitValue""; case Thunk::kMemzero: ",0,train f13063ee503653e0693a1eee461b55e22c14a7a0,tensorflow/tensorflow,"Lower mlir.gpu.memcpy to tfrt_gpu.mem.copy. PiperOrigin-RevId: 392411612 Change-Id: I979a86fe7294e7155d51efd47802a6dd20acdf53",thunk.h,"@@ -56,6 +56,7 @@ class Thunk { kGemm, kInfeed, kKernel, + kMemcpy, kMemset32BitValue, kMemzero, kNcclAllGather, ",0,train 3e15ac3dd22d58e45b7d6db17dedbb189d789891,tensorflow/tensorflow,"[TF:XLA] Copy elision does not need to know about existing copies. It already detects layout-changing copies and those are already left unchanged by copy elision. Special case copies are also skipped because they are tagged separately (SetCopyElisionAllowed) PiperOrigin-RevId: 202574858",copy_insertion.cc,"@@ -1093,8 +1093,7 @@ void MaybeDumpModule(const string& message, const HloModule& module) { } // namespace Status RemoveUnnecessaryCopies( - const HloOrdering& ordering, - const tensorflow::gtl::FlatSet& copies_to_exclude, HloModule* module, + const HloOrdering& ordering, HloModule* module, const HloDataflowAnalysis::FusionCanShareBufferFunction& fusion_can_share_buffer) { MaybeDumpModule(""after adding copies to resolve interference"", *module); @@ -1108,7 +1107,6 @@ Status RemoveUnnecessaryCopies( for (HloComputation* computation : module->computations()) { for (HloInstruction* instruction : computation->instructions()) { if (instruction->opcode() == HloOpcode::kCopy && - !ContainsKey(copies_to_exclude, instruction->unique_id()) && instruction->CopyElisionAllowed()) { TF_RETURN_IF_ERROR(copy_remover.TryElideCopy(instruction).status()); } @@ -1152,16 +1150,13 @@ StatusOr CopyInsertion::Run(HloModule* module) { ""Call graph must be flattened before copy insertion.""); } - // Gather Ids of existing kCopy instructions in the module. We avoid removing - // these copies (except via DCE in TupleSimplifier) because they may have been - // added for reasons not considered by copy insertion (eg, layout assignment). - // Instruction id is used instead of HloInstruction* because the pointer - // values may be recycled. - tensorflow::gtl::FlatSet existing_copies; - for (HloComputation* computation : module->computations()) { - for (HloInstruction* instruction : computation->instructions()) { - if (instruction->opcode() == HloOpcode::kCopy) { - existing_copies.insert(instruction->unique_id()); + int64 num_existing_copies = 0; + if (VLOG_IS_ON(1)) { + for (HloComputation* computation : module->computations()) { + for (HloInstruction* instruction : computation->instructions()) { + if (instruction->opcode() == HloOpcode::kCopy) { + ++num_existing_copies; + } } } } @@ -1181,8 +1176,7 @@ StatusOr CopyInsertion::Run(HloModule* module) { TF_DCHECK_OK(VerifyNoLiveRangeInterference(module)); DependencyHloOrdering ordering(module); - TF_RETURN_IF_ERROR( - RemoveUnnecessaryCopies(ordering, existing_copies, module)); + TF_RETURN_IF_ERROR(RemoveUnnecessaryCopies(ordering, module)); TF_RETURN_IF_ERROR(AddSpecialCaseCopies(*call_graph, module)); @@ -1203,7 +1197,7 @@ StatusOr CopyInsertion::Run(HloModule* module) { } } } - VLOG(1) << ""Num copies before copy-insertion: "" << existing_copies.size(); + VLOG(1) << ""Num copies before copy-insertion: "" << num_existing_copies; VLOG(1) << ""Num copies after copy-insertion: "" << num_total_copies; } ",0,train 3e15ac3dd22d58e45b7d6db17dedbb189d789891,tensorflow/tensorflow,"[TF:XLA] Copy elision does not need to know about existing copies. It already detects layout-changing copies and those are already left unchanged by copy elision. Special case copies are also skipped because they are tagged separately (SetCopyElisionAllowed) PiperOrigin-RevId: 202574858",copy_insertion.h,"@@ -21,7 +21,6 @@ limitations under the License. #include ""tensorflow/compiler/xla/service/hlo_instruction.h"" #include ""tensorflow/compiler/xla/service/hlo_module.h"" #include ""tensorflow/compiler/xla/service/hlo_pass_interface.h"" -#include ""tensorflow/core/lib/gtl/flatmap.h"" namespace xla { @@ -79,11 +78,10 @@ class CopyInsertion : public HloPassInterface { }; // Try to remove as many copies from the module as possible without introducing -// live range interference. Copy instructions (identified by their unique id) in -// the set copies_to_exclude are not considered for removal. +// live range interference. Only copy instructions that are eligible for +// copy elision are considered for removal. Status RemoveUnnecessaryCopies( - const HloOrdering& ordering, - const tensorflow::gtl::FlatSet& copies_to_exclude, HloModule* module, + const HloOrdering& ordering, HloModule* module, const HloDataflowAnalysis::FusionCanShareBufferFunction& fusion_can_share_buffer = nullptr); ",0,train 3e15ac3dd22d58e45b7d6db17dedbb189d789891,tensorflow/tensorflow,"[TF:XLA] Copy elision does not need to know about existing copies. It already detects layout-changing copies and those are already left unchanged by copy elision. Special case copies are also skipped because they are tagged separately (SetCopyElisionAllowed) PiperOrigin-RevId: 202574858",copy_insertion_test.cc,"@@ -125,21 +125,27 @@ TEST_F(CopyInsertionTest, SingleConstant) { } TEST_F(CopyInsertionTest, ExistingCopiesNotRemoved) { - // Verify that an kCopy instructions which exist in the pass before + // Verify that kCopy instructions which change layout and exist before // copy-insertion remain in the graph after copy-insertion. auto module = CreateNewModule(); auto builder = HloComputation::Builder(TestName()); - HloInstruction* constant = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(1.0))); - HloInstruction* copy_1 = builder.AddInstruction(HloInstruction::CreateUnary( - constant->shape(), HloOpcode::kCopy, constant)); - HloInstruction* copy_2 = builder.AddInstruction(HloInstruction::CreateUnary( - constant->shape(), HloOpcode::kCopy, constant)); + HloInstruction* constant = + builder.AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR2({{0.f, 2.f}, {2.f, 4.f}}))); + auto minor_to_major = LayoutUtil::MinorToMajor(constant->shape()); + Layout reversed_layout = + LayoutUtil::MakeLayoutFromMajorToMinor(minor_to_major); + Shape copy_shape = constant->shape(); + *copy_shape.mutable_layout() = reversed_layout; + HloInstruction* copy_1 = builder.AddInstruction( + HloInstruction::CreateUnary(copy_shape, HloOpcode::kCopy, constant)); + HloInstruction* copy_2 = builder.AddInstruction( + HloInstruction::CreateUnary(copy_shape, HloOpcode::kCopy, constant)); HloInstruction* add = builder.AddInstruction(HloInstruction::CreateBinary( constant->shape(), HloOpcode::kAdd, copy_1, copy_2)); - HloInstruction* add_copy = builder.AddInstruction( - HloInstruction::CreateUnary(constant->shape(), HloOpcode::kCopy, add)); + builder.AddInstruction( + HloInstruction::CreateUnary(add->shape(), HloOpcode::kCopy, add)); module->AddEntryComputation(builder.Build()); @@ -147,12 +153,11 @@ TEST_F(CopyInsertionTest, ExistingCopiesNotRemoved) { InsertCopies(module.get()); - EXPECT_EQ(CountCopies(*module), 3); + EXPECT_EQ(CountCopies(*module), 2); - EXPECT_EQ(module->entry_computation()->root_instruction(), add_copy); - EXPECT_THAT( - module->entry_computation()->root_instruction(), - op::Copy(op::Add(op::Copy(op::Constant()), op::Copy(op::Constant())))); + EXPECT_EQ(module->entry_computation()->root_instruction(), add); + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::Add(op::Copy(op::Constant()), op::Copy(op::Constant()))); } TEST_F(CopyInsertionTest, MultipleConstantsAndParameters) { ",0,train 3e15ac3dd22d58e45b7d6db17dedbb189d789891,tensorflow/tensorflow,"[TF:XLA] Copy elision does not need to know about existing copies. It already detects layout-changing copies and those are already left unchanged by copy elision. Special case copies are also skipped because they are tagged separately (SetCopyElisionAllowed) PiperOrigin-RevId: 202574858",hlo_rematerialization.cc,"@@ -1244,7 +1244,7 @@ StatusOr HloRematerialization::Run( // TODO(b/80249101): Instead of a separate copy elision pass, use the // ordering from the HLO schedule directly for copy insertion. SequentialHloOrdering ordering(module, *sequence); - TF_RETURN_IF_ERROR(RemoveUnnecessaryCopies(ordering, {}, module)); + TF_RETURN_IF_ERROR(RemoveUnnecessaryCopies(ordering, module)); } // Compute peak memory usage of all computations in the module called in a ",0,train ee85e6d230278e763a2784ba86acc747abdb2242,tensorflow/tensorflow,"Use the numerically stable two-pass algorithm to calculate variance in MeanStddevNormalization. Add an extra test case with large mean and large variance. PiperOrigin-RevId: 281179296 Change-Id: Ib2a5c3a0b0870670c3c41afc15b9ea3e13fa1f8c",portable_tensor_utils.cc,"@@ -624,13 +624,16 @@ void PortableMeanStddevNormalization(const float* input_vector, int n_batch) { for (int batch = 0; batch < n_batch; ++batch) { float sum = 0.0f; - float sum_sq = 0.0f; for (int i = 0; i < v_size; ++i) { sum += input_vector[i]; - sum_sq += input_vector[i] * input_vector[i]; } const float mean = sum / v_size; - const float variance = sum_sq / v_size - mean * mean; + float sum_diff_sq = 0.0f; + for (int i = 0; i < v_size; ++i) { + const float diff = input_vector[i] - mean; + sum_diff_sq += diff * diff; + } + const float variance = sum_diff_sq / v_size; constexpr float kNormalizationConstant = 1e-8f; const float stddev_inv = 1.0f / std::sqrt(variance + kNormalizationConstant); ",0,train ee85e6d230278e763a2784ba86acc747abdb2242,tensorflow/tensorflow,"Use the numerically stable two-pass algorithm to calculate variance in MeanStddevNormalization. Add an extra test case with large mean and large variance. PiperOrigin-RevId: 281179296 Change-Id: Ib2a5c3a0b0870670c3c41afc15b9ea3e13fa1f8c",tensor_utils_test.cc,"@@ -1502,13 +1502,13 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple(0.01f, 0.01f, 2.53e-5f), // small mean, small variance std::make_tuple(0.01f, 100.0f, 1.20e-7f), // small mean, large variance std::make_tuple(100.0f, 0.0f, 0.0f), // large mean, zero variance - std::make_tuple(100.0f, 0.01f, 199.0f), // large mean, small variance + std::make_tuple(100.0f, 0.01f, 1.81e-4f), // large mean, small variance std::make_tuple(100.0f, 100.0f, 1.20e-7f) // large mean, large variance )); TEST(uKernels, MeanStddevNormalizationAllBatches) { constexpr int kVectorSize = 4; - constexpr int kBatchSize = 8; // 9, but large mean, small variance fails + constexpr int kBatchSize = 9; // None-zero input. static float input[kVectorSize * kBatchSize] = { @@ -1519,6 +1519,7 @@ TEST(uKernels, MeanStddevNormalizationAllBatches) { -0.01f, 0.0f, 0.02f, 0.03f, // small mean, small variance -199.99f, -99.99f, 100.01f, 200.01f, // small mean, large variance 100.0f, 100.0f, 100.0f, 100.0f, // large mean, zero variance + 99.98f, 99.99f, 100.01f, 100.02f, // large mean, small variance -100.0f, 0.0f, 200.0f, 300.0f, // large mean, large variance }; float output[kVectorSize * kBatchSize]; @@ -1533,10 +1534,11 @@ TEST(uKernels, MeanStddevNormalizationAllBatches) { -ksqrt16, -ksqrt04, ksqrt04, ksqrt16, // small mean, small variance -ksqrt16, -ksqrt04, ksqrt04, ksqrt16, // small mean, large variance 0.0f, 0.0f, 0.0f, 0.0f, // large mean, zero variance + -ksqrt16, -ksqrt04, ksqrt04, ksqrt16, // large mean, small variance -ksqrt16, -ksqrt04, ksqrt04, ksqrt16, // large mean, large variance }; EXPECT_THAT(output, testing::ElementsAreArray( - ArrayFloatNear(expected_output, 2.6e-5f))); + ArrayFloatNear(expected_output, 1.81e-4f))); } } // namespace tensor_utils ",0,train 7dd3d091a3346622c366eecc3e7509221d91fad1,tensorflow/tensorflow,"[dataset]: Remove extra `repeat` in the docstring for `shard`. PiperOrigin-RevId: 197185877",dataset_ops.py,"@@ -740,7 +740,6 @@ class Dataset(object): d = d.shard(FLAGS.num_workers, FLAGS.worker_index) d = d.repeat(FLAGS.num_epochs) d = d.shuffle(FLAGS.shuffle_buffer_size) - d = d.repeat() d = d.interleave(tf.data.TFRecordDataset, cycle_length=FLAGS.num_readers, block_length=1) d = d.map(parser_fn, num_parallel_calls=FLAGS.num_map_threads) ",0,train daf85eddacbdacec50c3d67b145cb1ff59928484,tensorflow/tensorflow,"Support gradient multiplier for embeddings in TPUEstimator. PiperOrigin-RevId: 241354959",_tpu_estimator_embedding.py,"@@ -25,11 +25,14 @@ import six from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.feature_column import feature_column as core_fc from tensorflow.python.feature_column import feature_column_lib as core_fc_lib +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.tpu import feature_column as tpu_fc from tensorflow.python.tpu import tpu_embedding from tensorflow.python.tpu.tpu_embedding import AdagradParameters from tensorflow.python.tpu.tpu_embedding import AdamParameters from tensorflow.python.tpu.tpu_embedding import StochasticGradientDescentParameters +from tensorflow.python.training import training # pylint: disable=protected-access _TPU_EMBEDDING_COLUMN_CLASSES = (tpu_fc._TPUEmbeddingColumn, @@ -150,7 +153,8 @@ def get_tpu_embedding_config_from_feature_columns(feature_columns): class EmbeddingConfigSpec( collections.namedtuple('EmbeddingConfigSpec', [ 'feature_columns', 'optimization_parameters', 'clipping_limit', - 'pipeline_execution_with_tensor_core' + 'pipeline_execution_with_tensor_core', + 'experimental_gradient_multiplier_fn' ])): """"""Class to keep track of embedding config specification."""""" @@ -158,7 +162,8 @@ class EmbeddingConfigSpec( feature_columns, optimization_parameters, clipping_limit=None, - pipeline_execution_with_tensor_core=False): + pipeline_execution_with_tensor_core=False, + experimental_gradient_multiplier_fn=None): """"""Creates an EmbeddingConfigSpec instance. Args: @@ -172,6 +177,8 @@ class EmbeddingConfigSpec( faster, but trained model will be different if step N and step N+1 involve the same set of embedding IDs. Please see `tpu_embedding_configuration.proto` for details. + experimental_gradient_multiplier_fn: (Optional) A Fn taking global step as + input returning the current multiplier for all embedding gradients. Returns: An EmbeddingConfigSpec instance. @@ -208,7 +215,8 @@ class EmbeddingConfigSpec( feature_columns=feature_columns, optimization_parameters=optimization_parameters, clipping_limit=clipping_limit, - pipeline_execution_with_tensor_core=pipeline_execution_with_tensor_core) + pipeline_execution_with_tensor_core=pipeline_execution_with_tensor_core, + experimental_gradient_multiplier_fn=experimental_gradient_multiplier_fn) class EmbeddingConfig(object): @@ -221,6 +229,9 @@ class EmbeddingConfig(object): def __init__(self, embedding_config_spec, train_batch_size, eval_batch_size, num_hosts, num_cores, run_config): + if not embedding_config_spec: + raise ValueError('embedding_config_spec cannot be None.') + self._embedding_config_spec = embedding_config_spec self._train_batch_size = train_batch_size self._eval_batch_size = eval_batch_size @@ -234,6 +245,15 @@ class EmbeddingConfig(object): self._mode_to_tpu_embedding_dict = {} self.dummy_table_variables = None + self._grad_multiplier_fn = ( + embedding_config_spec.experimental_gradient_multiplier_fn) + + def get_grad_multiplier(self): + if self._grad_multiplier_fn: + return ops.convert_to_tensor( + self._grad_multiplier_fn(training.get_global_step()), + dtype=dtypes.float32) + def has_embedding_tables(self): return bool(self._table_to_config_dict) ",0,train daf85eddacbdacec50c3d67b145cb1ff59928484,tensorflow/tensorflow,"Support gradient multiplier for embeddings in TPUEstimator. PiperOrigin-RevId: 241354959",tpu_estimator.py,"@@ -1488,8 +1488,14 @@ class _ModelFnWrapper(object): tpu_embedding_gradient.get_gradients_through_dummy_table_variables( tpu_embedding_) ) + grad_multiplier = self._ctx.embedding_config.get_grad_multiplier() + if grad_multiplier is not None: + scaled_gradients = collections.OrderedDict( + (k, v * grad_multiplier) for k, v in six.iteritems(gradients)) + else: + scaled_gradients = gradients apply_sparse_grads = [ - tpu_embedding_.generate_send_gradients_op(gradients) + tpu_embedding_.generate_send_gradients_op(scaled_gradients) ] # We must run train_op to update the variables prior to running the ",0,train b74d6ba60a6fee82f430a8c8ba80cace44050cd9,tensorflow/tensorflow,"Remove run_v1_only from model_coverage Python tests. PiperOrigin-RevId: 261199843",model_coverage_lib_test.py,"@@ -38,7 +38,6 @@ from tensorflow.python.saved_model import saved_model from tensorflow.python.training.training_util import write_graph -@test_util.run_v1_only('Incompatible with 2.0.') class EvaluateFrozenGraph(test.TestCase): def _saveFrozenGraph(self, sess): @@ -47,27 +46,29 @@ class EvaluateFrozenGraph(test.TestCase): return graph_def_file def testFloat(self): - with session.Session().as_default() as sess: - in_tensor = array_ops.placeholder( - shape=[1, 16, 16, 3], dtype=dtypes.float32) - _ = in_tensor + in_tensor - filename = self._saveFrozenGraph(sess) + with ops.Graph().as_default(): + with session.Session().as_default() as sess: + in_tensor = array_ops.placeholder( + shape=[1, 16, 16, 3], dtype=dtypes.float32) + _ = in_tensor + in_tensor + filename = self._saveFrozenGraph(sess) model_coverage.test_frozen_graph(filename, ['Placeholder'], ['add']) def testMultipleOutputs(self): - with session.Session().as_default() as sess: - in_tensor_1 = array_ops.placeholder( - shape=[1, 16], dtype=dtypes.float32, name='inputA') - in_tensor_2 = array_ops.placeholder( - shape=[1, 16], dtype=dtypes.float32, name='inputB') - - weight = constant_op.constant(-1.0, shape=[16, 16]) - bias = constant_op.constant(-1.0, shape=[16]) - layer = math_ops.matmul(in_tensor_1, weight) + bias - _ = math_ops.reduce_mean(math_ops.square(layer - in_tensor_2)) - filename = self._saveFrozenGraph(sess) + with ops.Graph().as_default(): + with session.Session().as_default() as sess: + in_tensor_1 = array_ops.placeholder( + shape=[1, 16], dtype=dtypes.float32, name='inputA') + in_tensor_2 = array_ops.placeholder( + shape=[1, 16], dtype=dtypes.float32, name='inputB') + weight = constant_op.constant(-1.0, shape=[16, 16]) + bias = constant_op.constant(-1.0, shape=[16]) + layer = math_ops.matmul(in_tensor_1, weight) + bias + _ = math_ops.reduce_mean(math_ops.square(layer - in_tensor_2)) + + filename = self._saveFrozenGraph(sess) model_coverage.test_frozen_graph(filename, ['inputA', 'inputB'], ['add', 'Mean']) @@ -94,17 +95,18 @@ class EvaluateFrozenGraph(test.TestCase): def _getQuantizedModel(self): np.random.seed(0) - with session.Session().as_default() as sess: - # The tensor needs to have more than 1024 elements for quantize_weights to - # kick in. Thus, the [33, 33] shape. - in_tensor_1 = array_ops.placeholder( - shape=[33, 33], dtype=dtypes.float32, name='inputA') - in_tensor_2 = constant_op.constant( - np.random.uniform(low=-10., high=10., size=(33, 33)), - shape=[33, 33], - dtype=dtypes.float32, - name='inputB') - _ = math_ops.matmul(in_tensor_1, in_tensor_2, name='output') + with ops.Graph().as_default(): + with session.Session().as_default() as sess: + # The tensor needs to have more than 1024 elements for quantize_weights + # to kick in. Thus, the [33, 33] shape. + in_tensor_1 = array_ops.placeholder( + shape=[33, 33], dtype=dtypes.float32, name='inputA') + in_tensor_2 = constant_op.constant( + np.random.uniform(low=-10., high=10., size=(33, 33)), + shape=[33, 33], + dtype=dtypes.float32, + name='inputB') + _ = math_ops.matmul(in_tensor_1, in_tensor_2, name='output') filename = self._saveFrozenGraph(sess) return filename @@ -125,25 +127,24 @@ class EvaluateFrozenGraph(test.TestCase): target_ops=set([lite.OpsSet.SELECT_TF_OPS])) -@test_util.run_v1_only('Incompatible with 2.0.') class EvaluateSavedModel(test.TestCase): def testFloat(self): saved_model_dir = os.path.join(self.get_temp_dir(), 'simple_savedmodel') - with session.Session().as_default() as sess: - in_tensor_1 = array_ops.placeholder( - shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB') - in_tensor_2 = array_ops.placeholder( - shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA') - out_tensor = in_tensor_1 + in_tensor_2 - - inputs = {'x': in_tensor_1, 'y': in_tensor_2} - outputs = {'z': out_tensor} - saved_model.simple_save(sess, saved_model_dir, inputs, outputs) + with ops.Graph().as_default(): + with session.Session().as_default() as sess: + in_tensor_1 = array_ops.placeholder( + shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB') + in_tensor_2 = array_ops.placeholder( + shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA') + out_tensor = in_tensor_1 + in_tensor_2 + + inputs = {'x': in_tensor_1, 'y': in_tensor_2} + outputs = {'z': out_tensor} + saved_model.simple_save(sess, saved_model_dir, inputs, outputs) model_coverage.test_saved_model(saved_model_dir) -@test_util.run_v1_only('Incompatible with 2.0.') class EvaluateKerasModel(test.TestCase): def _getSingleInputKerasModel(self): ",0,train 549dd6fd66a9b176ee3fe5e7093e4a1654bcbdb1,tensorflow/tensorflow,"Add an argument `stop_gradients` to `tf.gradients` in order to hold specific tensors constant wrt `xs`. PiperOrigin-RevId: 167501127",gradients_impl.py,"@@ -278,7 +278,7 @@ def _VerifyGeneratedGradients(grads, op): ""inputs %d"" % (len(grads), op.node_def, len(op.inputs))) -def _StopOps(from_ops, pending_count): +def _StopOps(from_ops, stop_gradient_ops, pending_count): """"""The set of ops that terminate the gradient computation. This computes the frontier of the forward graph *before* which backprop @@ -288,8 +288,11 @@ def _StopOps(from_ops, pending_count): `_PendingCount(g, xs, from_ops)`. An 'op' has predecessors in `from_ops` iff pending_count[op._id] > 0. + In addition, none of `stop_gradient_ops` will be differentiated. + Args: from_ops: list of Operations. + stop_gradient_ops: list of Operations never to backprop through. pending_count: List of integers, indexed by operation id. Returns: @@ -304,6 +307,7 @@ def _StopOps(from_ops, pending_count): break if is_stop_op: stop_ops.add(op._id) + stop_ops.update(op._id for op in stop_gradient_ops) # pylint: disable=protected-access return stop_ops @@ -374,17 +378,17 @@ def gradients(ys, name=""gradients"", colocate_gradients_with_ops=False, gate_gradients=False, - aggregation_method=None): - """"""Constructs symbolic partial derivatives of sum of `ys` w.r.t. x in `xs`. + aggregation_method=None, + stop_gradients=None): + """"""Constructs symbolic derivatives of sum of `ys` w.r.t. x in `xs`. `ys` and `xs` are each a `Tensor` or a list of tensors. `grad_ys` is a list of `Tensor`, holding the gradients received by the `ys`. The list must be the same length as `ys`. - `gradients()` adds ops to the graph to output the partial - derivatives of `ys` with respect to `xs`. It returns a list of - `Tensor` of length `len(xs)` where each tensor is the `sum(dy/dx)` - for y in `ys`. + `gradients()` adds ops to the graph to output the derivatives of `ys` with + respect to `xs`. It returns a list of `Tensor` of length `len(xs)` where + each tensor is the `sum(dy/dx)` for y in `ys`. `grad_ys` is a list of tensors of the same length as `ys` that holds the initial gradients for each y in `ys`. When `grad_ys` is None, @@ -394,6 +398,31 @@ def gradients(ys, one wanted to weight the gradient differently for each value in each y). + `stop_gradients` is a `Tensor` or a list of tensors to be considered constant + with respect to all `xs`. These tensors will not be backpropagated through, + as though they had been explicitly disconnected using `stop_gradient`. Among + other things, this allows computation of partial derivatives as opposed to + total derivatives. For example: + + a = tf.constant(0.) + b = 2 * a + g = tf.gradients(a + b, [a, b], stop_gradients=[a, b]) + + Here the partial derivatives `g` evaluate to `[1.0, 1.0]`, compared to the + total derivatives `tf.gradients(a + b, [a, b])`, which take into account the + influence of `a` on `b` and evaluate to `[3.0, 1.0]`. Note that the above is + equivalent to: + + a = tf.stop_gradient(tf.constant(0.)) + b = tf.stop_gradient(2 * a) + g = tf.gradients(a + b, [a, b]) + + `stop_gradients` provides a way of stopping gradient after the graph has + already been constructed, as compared to `tf.stop_gradient` which is used + during graph construction. When the two approaches are combined, + backpropagation stops at both `tf.stop_gradient` nodes and nodes in + `stop_gradients`, whichever is encountered first. + Args: ys: A `Tensor` or list of tensors to be differentiated. xs: A `Tensor` or list of tensors to be used for differentiation. @@ -407,6 +436,8 @@ def gradients(ys, for an operations. This avoids some race conditions. aggregation_method: Specifies the method used to combine gradient terms. Accepted values are constants defined in the class `AggregationMethod`. + stop_gradients: Optional. A `Tensor` or list of tensors not to differentiate + through. Returns: A list of `sum(dy/dx)` for each x in `xs`. @@ -423,12 +454,15 @@ def gradients(ys, ""functions in tf.contrib.eager.backprop instead."") ys = _AsList(ys) xs = _AsList(xs) + stop_gradients = [] if stop_gradients is None else _AsList(stop_gradients) if grad_ys is None: grad_ys = [None] * len(ys) else: grad_ys = _AsList(grad_ys) - with ops.name_scope(name, ""gradients"", ys + xs + grad_ys) as grad_scope: + with ops.name_scope( + name, ""gradients"", + list(ys) + list(xs) + list(stop_gradients) + list(grad_ys)) as grad_scope: ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name=""y"") xs = [x.handle if isinstance(x, resource_variable_ops.ResourceVariable) else x @@ -450,6 +484,7 @@ def gradients(ys, ys = [array_ops.identity(y) if y.consumers() else y for y in ys] to_ops = [t.op for t in ys] from_ops = [t.op for t in xs] + stop_gradient_ops = [t.op for t in stop_gradients] pending_count, loop_state = _PendingCount(ops.get_default_graph(), to_ops, from_ops, colocate_gradients_with_ops) @@ -488,8 +523,7 @@ def gradients(ys, _SetGrad(grads, y, loop_state.ZerosLikeForExit(y)) queue.append(y.op) - # The set of 'from_ops'. - stop_ops = _StopOps(from_ops, pending_count) + stop_ops = _StopOps(from_ops, stop_gradient_ops, pending_count) while queue: # generate gradient subgraph for op. op = queue.popleft() ",0,train 549dd6fd66a9b176ee3fe5e7093e4a1654bcbdb1,tensorflow/tensorflow,"Add an argument `stop_gradients` to `tf.gradients` in order to hold specific tensors constant wrt `xs`. PiperOrigin-RevId: 167501127",gradients_test.py,"@@ -349,6 +349,64 @@ class GradientsTest(test_util.TensorFlowTestCase): g = gradients.gradients([z, z2], x) self.assertAllClose(17502.0, g[0].eval()) + def testPartialDerivatives(self): + with self.test_session(): + x = constant_op.constant(1.) + y = 2 * x + z = x + y + totalg = gradients.gradients(z, [x, y]) + self.assertEqual([3.0, 1.0], [g.eval() for g in totalg]) + partialg = gradients.gradients(z, [x, y], stop_gradients=[x, y]) + self.assertEqual([1.0, 1.0], [g.eval() for g in partialg]) + + def testStopGradients(self): + def _MakeGraph(rng, stop_gradients=()): + def _FunctionOf(xs, k=3): + return ops.convert_to_tensor( + sum(math_ops.matmul(rng.rand(k, k), x) for x in xs) + + rng.rand(k, k)) + + a = _FunctionOf([]) + if ""a"" in stop_gradients: a = array_ops.stop_gradient(a) + b = _FunctionOf([a]) + if ""b"" in stop_gradients: b = array_ops.stop_gradient(b) + c = _FunctionOf([a, b]) + if ""c"" in stop_gradients: c = array_ops.stop_gradient(c) + d = _FunctionOf([b, c]) + if ""d"" in stop_gradients: d = array_ops.stop_gradient(d) + return dict(a=a, b=b, c=c, d=d) + + def _Gradients(ys, xs, **kwargs): + dydxs = gradients.gradients(ys, xs, **kwargs) + dydxs = [0. * x if dydx is None else dydx + for x, dydx in zip(xs, dydxs)] + return dydxs + + seed = np.random.randint(1000) + cases = [] + subsets = [""""] + ""a b c d ab ac ad bc bd cd abc abd acd bcd abcd"".split() + graph = _MakeGraph(np.random.RandomState(seed)) + for constants in subsets: + graph_with_stops = _MakeGraph(np.random.RandomState(seed), constants) + for variables_ in subsets: + # compute the gradient when stopped using tf.stop_gradients + grad1 = _Gradients([graph_with_stops[""d""]], + [graph_with_stops[v] for v in variables_]) + # compute the gradient when stopped using the stop_gradients kwarg + grad2 = _Gradients([graph[""d""]], + [graph[v] for v in variables_], + stop_gradients=[graph[v] for v in constants]) + cases.append(dict(grad1=grad1, grad2=grad2, + constants=constants, variables=variables_)) + + # evaluate all tensors in one call to session.run for speed + with self.test_session() as session: + results = session.run([(case[""grad1""], case[""grad2""]) for case in cases]) + + for (npgrad1, npgrad2), case in zip(results, cases): + for a, b in zip(npgrad1, npgrad2): + np.testing.assert_allclose(a, b) + class FunctionGradientsTest(test_util.TensorFlowTestCase): ",0,train 386de03d9d76d934ca99a00c02060e57e8ab86b0,tensorflow/tensorflow,"Move to using stateless image_ops so that the random seed constructor argument works. PiperOrigin-RevId: 378938459 Change-Id: I3d6d24a9accc8a52bbf48afeca180eccf8f7d493",image_preprocessing.py,"@@ -412,11 +412,13 @@ class RandomFlip(base_layer.Layer): def random_flipped_inputs(): flipped_outputs = inputs if self.horizontal: - flipped_outputs = image_ops.random_flip_left_right( - flipped_outputs, self.seed) + flipped_outputs = image_ops.stateless_random_flip_left_right( + flipped_outputs, + self._rng.make_seeds()[:, 0]) if self.vertical: - flipped_outputs = image_ops.random_flip_up_down(flipped_outputs, - self.seed) + flipped_outputs = image_ops.stateless_random_flip_up_down( + flipped_outputs, + self._rng.make_seeds()[:, 0]) return flipped_outputs output = control_flow_util.smart_cond(training, random_flipped_inputs, @@ -1083,6 +1085,7 @@ class RandomContrast(base_layer.Layer): raise ValueError('Factor cannot have negative values or greater than 1.0,' ' got {}'.format(factor)) self.seed = seed + self._rng = make_generator(self.seed) self.input_spec = InputSpec(ndim=4) super(RandomContrast, self).__init__(**kwargs) base_preprocessing_layer.keras_kpl_gauge.get_cell('RandomContrast').set( @@ -1093,8 +1096,9 @@ class RandomContrast(base_layer.Layer): training = backend.learning_phase() def random_contrasted_inputs(): - return image_ops.random_contrast(inputs, 1. - self.lower, 1. + self.upper, - self.seed) + return image_ops.stateless_random_contrast(inputs, 1. - self.lower, + 1. + self.upper, + self._rng.make_seeds()[:, 0]) output = control_flow_util.smart_cond(training, random_contrasted_inputs, lambda: inputs) @@ -1314,7 +1318,7 @@ def make_generator(seed=None): Returns: A generator object. """""" - if seed: + if seed is not None: return stateful_random_ops.Generator.from_seed(seed) else: return stateful_random_ops.Generator.from_non_deterministic_state() ",0,test 386de03d9d76d934ca99a00c02060e57e8ab86b0,tensorflow/tensorflow,"Move to using stateless image_ops so that the random seed constructor argument works. PiperOrigin-RevId: 378938459 Change-Id: I3d6d24a9accc8a52bbf48afeca180eccf8f7d493",image_preprocessing_test.py,"@@ -14,6 +14,7 @@ # ============================================================================== """"""Tests for image preprocessing layers."""""" +import functools from absl.testing import parameterized import numpy as np @@ -377,7 +378,10 @@ class RandomFlipTest(keras_parameterized.TestCase): if mode == 'vertical' or mode == 'horizontal_and_vertical': expected_output = np.flip(expected_output, axis=1) with test.mock.patch.object( - random_ops, 'random_uniform', return_value=mock_random): + stateless_random_ops, + 'stateless_random_uniform', + return_value=mock_random, + ): with testing_utils.use_gpu(): layer = image_preprocessing.RandomFlip(mode) actual_output = layer(inp, training=1) @@ -427,7 +431,10 @@ class RandomFlipTest(keras_parameterized.TestCase): mock_random = [1, 1] mock_random = np.reshape(mock_random, [2, 1, 1, 1]) with test.mock.patch.object( - random_ops, 'random_uniform', return_value=mock_random): + stateless_random_ops, + 'stateless_random_uniform', + return_value=mock_random, + ): with self.cached_session(): layer = image_preprocessing.RandomFlip() actual_output = layer(input_images, training=1) @@ -460,7 +467,10 @@ class RandomContrastTest(keras_parameterized.TestCase): inp_mean = np.mean(inp_mean, axis=2, keepdims=True) expected_output = (inp - inp_mean) * mock_random + inp_mean with test.mock.patch.object( - random_ops, 'random_uniform', return_value=mock_random): + stateless_random_ops, + 'stateless_random_uniform', + return_value=mock_random, + ): with testing_utils.use_gpu(): layer = image_preprocessing.RandomContrast((lower, upper)) actual_output = layer(inp, training=True) @@ -1449,5 +1459,35 @@ class LearningPhaseTest(keras_parameterized.TestCase): self.assertEqual(tuple(int(i) for i in out.shape[1:]), shape) +@keras_parameterized.run_all_keras_modes(always_skip_v1=True) +class DeterminismTest(keras_parameterized.TestCase): + + @parameterized.named_parameters( + ('random_flip', image_preprocessing.RandomFlip), + ('random_contrast', + functools.partial(image_preprocessing.RandomContrast, factor=1.)), + ('random_crop', + functools.partial(image_preprocessing.RandomCrop, height=2, width=2)), + ('random_translation', + functools.partial(image_preprocessing.RandomTranslation, 0.3, 0.2)), + ('random_rotation', + functools.partial(image_preprocessing.RandomRotation, 0.5)), + ('random_zoom', functools.partial(image_preprocessing.RandomZoom, 0.2)), + ('random_height', functools.partial(image_preprocessing.RandomHeight, + 0.4)), + ('random_width', functools.partial(image_preprocessing.RandomWidth, 0.3)), + ) + def test_seed_constructor_arg(self, layer_cls): + input_image = np.random.random((2, 5, 8, 3)).astype(np.float32) + + layer1 = layer_cls(seed=0.) + layer2 = layer_cls(seed=0.) + layer1_output = layer1(input_image) + layer2_output = layer2(input_image) + + self.assertAllClose(layer1_output.numpy().tolist(), + layer2_output.numpy().tolist()) + + if __name__ == '__main__': test.main() ",0,test bc78f9b060cece8e29a89f7dbcdedcadbc61891d,tensorflow/tensorflow,"internal END_PUBLIC BEGIN_PUBLIC Automated g4 rollback of changelist 193600682 PiperOrigin-RevId: 193723856",rev_block_lib.py,"@@ -45,7 +45,6 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import variable_scope from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import nest -from tensorflow.python.util import tf_inspect __all__ = [""rev_block"", ""RevBlock"", ""recompute_grad""] @@ -430,13 +429,12 @@ def enable_with_args(dec): @enable_with_args -def recompute_grad(fn, use_data_dep=_USE_DEFAULT, tupleize_grads=False, - tensor_arg_names=None): +def recompute_grad(fn, use_data_dep=_USE_DEFAULT, tupleize_grads=False): """"""Decorator that recomputes the function on the backwards pass. Args: - fn: the subgraph-producing function to wrap and recompute when computing - gradients. Provide `tensor_arg_names` if not all arguments are `Tensor`s. + fn: a function that takes Tensors (all as positional arguments) and returns + a tuple of Tensors. use_data_dep: `bool`, if `True` will use a dummy data dependency to force the recompute to happen. If `False` will use a control dependency. By default will be `True` if in an XLA context and `False` otherwise. XLA @@ -445,25 +443,17 @@ def recompute_grad(fn, use_data_dep=_USE_DEFAULT, tupleize_grads=False, that all gradients are produced before any are consumed by downstream ops. If `use_data_dep` is also `True`, will use a data dependency instead of a control dependency. - tensor_arg_names: `list`, names of the `Tensor` arguments to `fn`. If - `None`, assumes all arguments are `Tensor`s. Returns: A wrapped fn that is identical to fn when called, but its activations will be discarded and recomputed on the backwards pass (i.e. on a call to tf.gradients). """""" - if tensor_arg_names: - if not isinstance(tensor_arg_names, (list, tuple)): - raise TypeError(""tensor_arg_names must be a list"") @functools.wraps(fn) - def wrapped(*args, **kwargs): - tensor_only_fn, tensor_args = _make_tensor_only(fn, args, kwargs, - tensor_arg_names) + def wrapped(*args): return _recompute_grad( - tensor_only_fn, tensor_args, use_data_dep=use_data_dep, - tupleize_grads=tupleize_grads) + fn, args, use_data_dep=use_data_dep, tupleize_grads=tupleize_grads) return wrapped @@ -473,59 +463,11 @@ def _is_on_tpu(): return control_flow_util.GetContainingXLAContext(ctxt) is not None -def _make_tensor_only(fn, args, kwargs, tensor_arg_names): - """"""Return fn such that it only takes Tensor args for tensor_arg_names."""""" - argspec = tf_inspect.getargspec(fn) - if argspec.varargs is not None or argspec.keywords is not None: - raise ValueError(""Function decorated with recompute_grad must not use "" - ""*args or **kwargs."") - fn_arg_names = list(argspec.args) - - # name_to_arg is a dict of argument name to argument value, including both - # positional and keyword arguments passed. - name_to_arg = {} - # Populate positional arguments. - for name, arg in zip(fn_arg_names[:len(args)], args): - name_to_arg[name] = arg - # Populate keyword arguments. - name_to_arg.update(kwargs) - - # Separate the Tensor arguments from the non-Tensor arguments. - # The default is that all arguments are Tensor arguments. - tensor_arg_names = tensor_arg_names or fn_arg_names - for name in tensor_arg_names: - if name not in name_to_arg: - raise ValueError(""Must provide Tensor argument %s"" % name) - tensor_args = [name_to_arg[name] for name in tensor_arg_names] - non_tensor_kwargs = dict([(name, arg) for name, arg in name_to_arg.items() - if name not in tensor_arg_names]) - - # Check that Tensor arguments are in fact Tensors and that non-Tensor - # arguments are not. - for name, arg in zip(tensor_arg_names, tensor_args): - if not isinstance(arg, framework_ops.Tensor): - raise TypeError(""Fn argument %s must be a Tensor."" % name) - for name, arg in non_tensor_kwargs.items(): - if isinstance(arg, framework_ops.Tensor): - raise TypeError(""Fn argument %s must not be a Tensor."" % name) - - # Construct a Tensor-only wrapper function that will pass the non-Tensor - # arguments as well when called. - def tensor_only_fn(*tensors): - all_kwargs = dict(zip(tensor_arg_names, tensors)) - all_kwargs.update(non_tensor_kwargs) - return fn(**all_kwargs) - - return tensor_only_fn, tensor_args - - -def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, - tupleize_grads=False): +def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, tupleize_grads=False): """"""See recompute_grad."""""" for arg in args: if not isinstance(arg, framework_ops.Tensor): raise ValueError(""All inputs to function must be Tensors"") - use_data_dep_ = use_data_dep if use_data_dep_ == _USE_DEFAULT: use_data_dep_ = _is_on_tpu() @@ -559,11 +501,14 @@ def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, grad_vars = grads[len(inputs):] return grad_inputs, grad_vars - # TODO(rsepassi): Replace with tf.custom_gradient @_fn_with_custom_grad(grad_fn) def fn_with_recompute(*args): cached_vs.append(variable_scope.get_variable_scope()) - cached_arg_scope.append(contrib_framework_ops.current_arg_scope()) + # TODO(rsepassi): Rm conditional in TF 1.4 + if hasattr(contrib_framework_ops, ""current_arg_scope""): + cached_arg_scope.append(contrib_framework_ops.current_arg_scope()) + else: + cached_arg_scope.append({}) return fn(*args) return fn_with_recompute(*args) ",0,train bc78f9b060cece8e29a89f7dbcdedcadbc61891d,tensorflow/tensorflow,"internal END_PUBLIC BEGIN_PUBLIC Automated g4 rollback of changelist 193600682 PiperOrigin-RevId: 193723856",rev_block_lib_test.py,"@@ -318,108 +318,6 @@ class RecomputeTest(test.TestCase): self.assertEqual(1, len(grads)) self.assertTrue(grads[0] is not None) - def testWithNontensorArgs(self): - @rev_block_lib.recompute_grad(tupleize_grads=True, - tensor_arg_names=[""inputs""]) - def layer_with_recompute(inputs, plus=None): - var = variable_scope.get_variable(""var"", ()) - self.assertFalse(plus) # called with False below - if plus: - return var + inputs - else: - return var * inputs - - inputs = array_ops.ones((), dtypes.float32) - outputs = layer_with_recompute(inputs, plus=False) - loss = math_ops.square(outputs) - grads = gradients_impl.gradients(loss, variables.trainable_variables()) - self.assertEqual(1, len(grads)) - self.assertTrue(grads[0] is not None) - - -class MakeTensorOnlyTest(test.TestCase): - - def testMakeTensorOnly(self): - def fn(a, b, c, d=1, e=None, f=7): - return (a, b, c, d, e, f) - - t1 = array_ops.ones(()) - t2 = array_ops.ones(()) - t3 = array_ops.ones(()) - args = [1, t1, 3, t2] - kwargs = {""e"": t3} - tensor_only_fn, tensor_args = rev_block_lib._make_tensor_only( - fn, args, kwargs, [""b"", ""d"", ""e""]) - self.assertAllEqual(tensor_args, [t1, t2, t3]) - out = tensor_only_fn(*tensor_args) - self.assertAllEqual(out, (1, t1, 3, t2, t3, 7)) - - def testMakeTensorOnlyPositionalArgsOnly(self): - def fn(a, b, c): - return (a, b, c) - - t1 = array_ops.ones(()) - t2 = array_ops.ones(()) - args = [t1, 3, t2] - tensor_only_fn, tensor_args = rev_block_lib._make_tensor_only( - fn, args, {}, [""a"", ""c""]) - self.assertAllEqual(tensor_args, [t1, t2]) - out = tensor_only_fn(*tensor_args) - self.assertAllEqual(out, (t1, 3, t2)) - - def testMakeTensorOnlyKwargsArgsOnly(self): - def fn(a=1, b=2, c=3): - return (a, b, c) - - t1 = array_ops.ones(()) - t2 = array_ops.ones(()) - args = [t1] - kwargs = {""c"": t2} - tensor_only_fn, tensor_args = rev_block_lib._make_tensor_only( - fn, args, kwargs, [""a"", ""c""]) - self.assertAllEqual(tensor_args, [t1, t2]) - out = tensor_only_fn(*tensor_args) - self.assertAllEqual(out, (t1, 2, t2)) - - def testErrorOnMissingTensorArg(self): - def fn(a, b): - return (a, b) - - with self.assertRaisesWithPredicateMatch( - ValueError, ""provide Tensor argument""): - rev_block_lib._make_tensor_only(fn, [], {""b"": 2}, [""a""]) - - def testErrorOnSignatureSplats(self): - def fn1(a, *args): - return (a, args) - - err_msg = r""must not use \*args or \*\*kwargs"" - with self.assertRaisesWithPredicateMatch(ValueError, err_msg): - rev_block_lib._make_tensor_only(fn1, [1, 2], {}, [""a""]) - - def fn2(a, **kwargs): - return (a, kwargs) - - with self.assertRaisesWithPredicateMatch(ValueError, err_msg): - rev_block_lib._make_tensor_only(fn2, [], {""a"": 1, ""b"": 2}, [""a""]) - - def testErrorOnNonTensorForTensor(self): - def fn(a, b): - return (a, b) - - with self.assertRaisesWithPredicateMatch(TypeError, ""must be a Tensor""): - rev_block_lib._make_tensor_only(fn, [2, 3], {}, [""a""]) - - def testErrorOnTensorForNonTensor(self): - def fn(a, b): - return (a, b) - - with self.assertRaisesWithPredicateMatch( - TypeError, ""must not be a Tensor""): - t1 = array_ops.ones(()) - t2 = array_ops.ones(()) - rev_block_lib._make_tensor_only(fn, [t1, t2], {}, [""a""]) - class FnWithCustomGradTest(test.TestCase): ",0,train ab9a9084d2feb820a1fe94e6d42825209eaed076,tensorflow/tensorflow,"Changing ""GpuVersion"" datatype to include hipDeviceProp_t::gcnArchName Currently the ""GpuVersion"" datatype (for AMDGPU in XLA code) is an `int`, whose value is the same as the `int hipDeviceProp_t::gcnArch;` field. Starting with ROCm 4.?, which introduces targetID support, that value will no longer be sufficient to create a LLVM AMDGPUTarget that accurately represents underlying GPU. We will need to information contained withing the `string hipDeviceProp_t gcnArchName` field for that purpose. This commit updates fthe ""GpuVersion"" datatype from being a simple int to a (int, string) pair, and stores the value of the `string hipDeviceProp_t gcnArchName` field in the string.",amdgpu_compiler.cc,"@@ -100,8 +100,14 @@ GpuVersion AMDGPUCompiler::GetGpuVersion(se::StreamExecutor* stream_exec) { << ""Couldn't get AMDGPU ISA version for device; assuming gfx803.""; isa_version = 803; } + std::string gcn_arch_name = + stream_exec->GetDeviceDescription().rocm_amdgpu_gcn_arch_name(); + if (gcn_arch_name == stream_exec->GetDeviceDescription().kUndefinedString) { + LOG(WARNING) << ""Couldn't get AMDGPU GCN Arch for device; assuming gfx803.""; + gcn_arch_name = ""gfx803""; + } - return isa_version; + return std::make_pair(isa_version, gcn_arch_name); } StatusOr>> ",0,train ab9a9084d2feb820a1fe94e6d42825209eaed076,tensorflow/tensorflow,"Changing ""GpuVersion"" datatype to include hipDeviceProp_t::gcnArchName Currently the ""GpuVersion"" datatype (for AMDGPU in XLA code) is an `int`, whose value is the same as the `int hipDeviceProp_t::gcnArch;` field. Starting with ROCm 4.?, which introduces targetID support, that value will no longer be sufficient to create a LLVM AMDGPUTarget that accurately represents underlying GPU. We will need to information contained withing the `string hipDeviceProp_t gcnArchName` field for that purpose. This commit updates fthe ""GpuVersion"" datatype from being a simple int to a (int, string) pair, and stores the value of the `string hipDeviceProp_t gcnArchName` field in the string.",gpu_executable.cc,"@@ -101,10 +101,11 @@ Status GpuExecutable::CheckCompatibilityWithServiceExecutableRunOptions( int stream_isa_version; main_stream->parent()->GetDeviceDescription().rocm_amdgpu_isa_version( &stream_isa_version); - GpuVersion amd_isa_version = stream_isa_version; - TF_RET_CHECK(amd_isa_version == gpu_version_) - << ""AMDGPU GCN ISA version mismatch; expected {"" - << absl::get(gpu_version_) << "", but was "" << stream_isa_version; + int gpu_exec_isa_version = + absl::get>(gpu_version_).first; + TF_RET_CHECK(stream_isa_version == gpu_exec_isa_version) + << ""AMDGPU GCN ISA version mismatch; expected {"" << gpu_exec_isa_version + << "", but was "" << stream_isa_version; } else if (platform_kind == stream_executor::PlatformKind::kCuda) { std::pair stream_compute_compatibility; main_stream->parent()->GetDeviceDescription().cuda_compute_capability( ",0,train ab9a9084d2feb820a1fe94e6d42825209eaed076,tensorflow/tensorflow,"Changing ""GpuVersion"" datatype to include hipDeviceProp_t::gcnArchName Currently the ""GpuVersion"" datatype (for AMDGPU in XLA code) is an `int`, whose value is the same as the `int hipDeviceProp_t::gcnArch;` field. Starting with ROCm 4.?, which introduces targetID support, that value will no longer be sufficient to create a LLVM AMDGPUTarget that accurately represents underlying GPU. We will need to information contained withing the `string hipDeviceProp_t gcnArchName` field for that purpose. This commit updates fthe ""GpuVersion"" datatype from being a simple int to a (int, string) pair, and stores the value of the `string hipDeviceProp_t gcnArchName` field in the string.",gpu_types.h,"@@ -21,10 +21,19 @@ limitations under the License. namespace xla { namespace gpu { -// GpuVersion is used to abstract Gpu hardware version. On Cuda platform, -// it comprises a pair of integers denoting major and minor version. -// On ROCm platform, it comprises one integer for AMD GCN ISA version. -using GpuVersion = absl::variant, int>; +// GpuVersion is used to abstract Gpu hardware version. +// +// On Cuda platform, it comprises of an pair +// denoting major and minor version. +// +// On ROCm platform, it comprises of an pair +// the int has the contents of the hipDeviceProp_t::gcnArchValue field. +// the string has the contents of the hipDeviceProp_t::gcnArchName field. +// The string contains all the information needed to create an exact LLVM +// AMDGPUTarget corresopnding the AMDGPU device it represents, the int value +// by itself is not sufficient for this purpose +using GpuVersion = + absl::variant, std::pair>; } // namespace gpu } // namespace xla ",0,train ab9a9084d2feb820a1fe94e6d42825209eaed076,tensorflow/tensorflow,"Changing ""GpuVersion"" datatype to include hipDeviceProp_t::gcnArchName Currently the ""GpuVersion"" datatype (for AMDGPU in XLA code) is an `int`, whose value is the same as the `int hipDeviceProp_t::gcnArch;` field. Starting with ROCm 4.?, which introduces targetID support, that value will no longer be sufficient to create a LLVM AMDGPUTarget that accurately represents underlying GPU. We will need to information contained withing the `string hipDeviceProp_t gcnArchName` field for that purpose. This commit updates fthe ""GpuVersion"" datatype from being a simple int to a (int, string) pair, and stores the value of the `string hipDeviceProp_t gcnArchName` field in the string.",gpu_backend_lib.cc,"@@ -787,13 +787,13 @@ Status AMDGPUTargetModuleLinker(llvm::Module* module, GpuVersion gpu_version, const HloModuleConfig& hlo_module_config, const string& device_bitcode_dir_path) { // Link the input module with ROCDL. - auto amdgpu_version = absl::get_if(&gpu_version); + auto amdgpu_version = absl::get_if>(&gpu_version); if (!amdgpu_version) { return xla::InternalError( ""Incompatible AMD GCN ISA version was specified.""); } - TF_RETURN_IF_ERROR( - LinkROCDLIfNecessary(module, *amdgpu_version, device_bitcode_dir_path)); + TF_RETURN_IF_ERROR(LinkROCDLIfNecessary(module, amdgpu_version->first, + device_bitcode_dir_path)); return Status::OK(); } @@ -861,13 +861,14 @@ StatusOr> CompileToHsaco( tensorflow::profiler::TraceMeLevel::kInfo); XLA_SCOPED_LOGGING_TIMER(""Compile module "" + module->getName().str()); - auto amdgpu_version = absl::get_if(&gpu_version); + auto amdgpu_version = + absl::get_if>(&gpu_version); if (!amdgpu_version) { return xla::InternalError( ""Incompatible AMD GCN ISA version was specified.""); } uint64_t hash; - if (HsacoCache::Find(str, hash, *amdgpu_version, hsaco)) { + if (HsacoCache::Find(str, hash, amdgpu_version->first, hsaco)) { VLOG(1) << ""HSACO cache hit""; return hsaco; } @@ -885,7 +886,7 @@ StatusOr> CompileToHsaco( llvm::Triple default_target_triple(""amdgcn--amdhsa-amdgiz""); // Construct LLVM TargetMachine for AMDGPU. std::unique_ptr target_machine = - AMDGPUGetTargetMachine(default_target_triple, *amdgpu_version, + AMDGPUGetTargetMachine(default_target_triple, amdgpu_version->first, hlo_module_config); // Link with ROCm-Device-Libs, and optimize the LLVM module. @@ -896,7 +897,7 @@ StatusOr> CompileToHsaco( // Lower optimized LLVM module to HSA code object. TF_ASSIGN_OR_RETURN(hsaco, EmitModuleToHsaco(module, target_machine.get())); - HsacoCache::Add(str, hash, *amdgpu_version, hsaco); + HsacoCache::Add(str, hash, amdgpu_version->first, hsaco); } return hsaco; } ",0,train ab9a9084d2feb820a1fe94e6d42825209eaed076,tensorflow/tensorflow,"Changing ""GpuVersion"" datatype to include hipDeviceProp_t::gcnArchName Currently the ""GpuVersion"" datatype (for AMDGPU in XLA code) is an `int`, whose value is the same as the `int hipDeviceProp_t::gcnArch;` field. Starting with ROCm 4.?, which introduces targetID support, that value will no longer be sufficient to create a LLVM AMDGPUTarget that accurately represents underlying GPU. We will need to information contained withing the `string hipDeviceProp_t gcnArchName` field for that purpose. This commit updates fthe ""GpuVersion"" datatype from being a simple int to a (int, string) pair, and stores the value of the `string hipDeviceProp_t gcnArchName` field in the string.",llvm_compiler_test.cc,"@@ -53,7 +53,9 @@ class GpuDummyCompiler : public GpuCompiler { return Status::OK(); } - GpuVersion GetGpuVersion(se::StreamExecutor* stream_exec) { return 0; } + GpuVersion GetGpuVersion(se::StreamExecutor* stream_exec) { + return std::make_pair(0, 0); + } StatusOr>> CompileTargetBinary( const HloModuleConfig& module_config, llvm::Module* llvm_module, ",0,train af8ec29dc16c6d1ddae557dbbfd3f6f4e7e88fbd,tensorflow/tensorflow,"Use GesvdjBatched on GPU for batches of square matrices up to size 32x32. This substantially speeds up SVD on small matrices on the GPU. On a Quadro P4000 performing SVD on 1000 16x16 matrices reduces from 0.617s to 0.0135s. PiperOrigin-RevId: 267036566",svd_op_gpu.cu.cc,"@@ -99,40 +99,70 @@ class SvdOpGpu : public AsyncOpKernel { std::unique_ptr solver) { // Compute U S V* = M. // 1. cuSolver works in column-major rather than row-major. - // 2. Gesvd returns V*. - // 3. Hence M should be transposed before input and U (rather than V) should - // be transposed on output. + // 2. Gesvd returns V*. GesvdjBatched returns V. + // 3. Hence M should be transposed before input and + // a) U (rather than V) should be transposed on output with Gesvd. + // b) U and V should be transposed on output with GesvdjBatched. - Tensor u_copy; - if (compute_uv_) { - TensorShape u_shape; - if (full_matrices_) { + // get the pointers to input data + Scalar* input_ptr; + RealScalar* outputS_ptr; + auto input_reshaped = M_copy.template flat_inner_dims(); + input_ptr = input_reshaped.data(); + const int64 batch_size = + M_copy.dims() > 2 ? input_reshaped.dimension(0) : 1; + // Gesvdjbatched handles matrices up to 32x32. + // TODO(jamessspencer): if not full_matrices, compute full U and V matrices + // using Gesvdjbatched and return slices. + const bool batched = m <= 32 && n <= 32 && batch_size > 1 && full_matrices_; + + // Copies of U and V if required so can take transposes after SVD. + Tensor u_copy, v_copy; + Scalar* outputU_ptr = NULL; + Scalar* outputV_ptr = NULL; + if (compute_uv_ || batched) { + TensorShape u_shape, v_shape; + if (batched) { + // Gesvdjbatched seems to require U and V matrices even if the vectors + // aren't computed. + TensorShape shapeRaw = M_copy.shape(); + shapeRaw.RemoveLastDims(2); + u_shape = shapeRaw; + u_shape.AddDim(m); + u_shape.AddDim(m); + v_shape = shapeRaw; + v_shape.AddDim(n); + v_shape.AddDim(n); + } else if (full_matrices_) { u_shape = U->shape(); + v_shape = V->shape(); } else { TensorShape shapeRaw = M_copy.shape(); shapeRaw.RemoveLastDims(2); u_shape = shapeRaw; u_shape.AddDim(p); u_shape.AddDim(m); + v_shape = shapeRaw; + v_shape.AddDim(p); + v_shape.AddDim(n); } OP_REQUIRES_OK_ASYNC( context, solver->allocate_scoped_tensor(U->dtype(), u_shape, &u_copy), done); + if (batched) { + OP_REQUIRES_OK_ASYNC( + context, + solver->allocate_scoped_tensor(V->dtype(), v_shape, &v_copy), done); + } + outputU_ptr = u_copy.template flat_inner_dims().data(); + if (batched) { + outputV_ptr = v_copy.template flat_inner_dims().data(); + } else { + outputV_ptr = V->template flat_inner_dims().data(); + } } - // get the pointers to the data - Scalar* input_ptr; - RealScalar* outputS_ptr; - Scalar* outputU_ptr = NULL; - Scalar* outputV_ptr = NULL; - auto input_reshaped = M_copy.template flat_inner_dims(); - input_ptr = input_reshaped.data(); outputS_ptr = S->template flat_inner_dims().data(); - if (compute_uv_) { - outputU_ptr = u_copy.template flat_inner_dims().data(); - outputV_ptr = V->template flat_inner_dims().data(); - } - const int64 batch_size = input_reshaped.dimension(0); std::vector dev_info; dev_info.push_back(solver->GetDeviceLapackInfo(batch_size, ""gesvd"")); int* dev_info_ptr = dev_info.back().mutable_data(); @@ -151,33 +181,44 @@ class SvdOpGpu : public AsyncOpKernel { batch_size * m * sizeof(Scalar)); } - for (int64 batch = 0; batch < batch_size; ++batch) { - Scalar* input = input_ptr + batch * m * n; - RealScalar* outputS = outputS_ptr + batch * p; - Scalar* outputU = NULL; - Scalar* outputVT = NULL; - char jobu = 'N'; - char jobvt = 'N'; - - if (compute_uv_) { - if (full_matrices_) { - outputU = outputU_ptr + batch * m * m; - outputVT = outputV_ptr + batch * n * n; - jobu = 'A'; - jobvt = 'A'; - } else { - outputU = outputU_ptr + batch * m * p; - outputVT = outputV_ptr + batch * n * p; - jobu = 'S'; - jobvt = 'S'; - } - } - + if (batched) { + cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_NOVECTOR; + if (compute_uv_) jobz = CUSOLVER_EIG_MODE_VECTOR; OP_REQUIRES_OK_ASYNC( context, - solver->Gesvd(jobu, jobvt, m, n, input, m, outputS, outputU, m, - outputVT, n, dev_info_ptr + batch), + solver->GesvdjBatched(jobz, m, n, input_ptr, m, outputS_ptr, + outputU_ptr, m, outputV_ptr, n, dev_info_ptr, + batch_size), done); + } else { + for (int64 batch = 0; batch < batch_size; ++batch) { + Scalar* input = input_ptr + batch * m * n; + RealScalar* outputS = outputS_ptr + batch * p; + Scalar* outputU = NULL; + Scalar* outputVT = NULL; + char jobu = 'N'; + char jobvt = 'N'; + + if (compute_uv_) { + if (full_matrices_) { + outputU = outputU_ptr + batch * m * m; + outputVT = outputV_ptr + batch * n * n; + jobu = 'A'; + jobvt = 'A'; + } else { + outputU = outputU_ptr + batch * m * p; + outputVT = outputV_ptr + batch * n * p; + jobu = 'S'; + jobvt = 'S'; + } + } + + OP_REQUIRES_OK_ASYNC( + context, + solver->Gesvd(jobu, jobvt, m, n, input, m, outputS, outputU, m, + outputVT, n, dev_info_ptr + batch), + done); + } } // This is a bug in cuSolver: @@ -213,6 +254,10 @@ class SvdOpGpu : public AsyncOpKernel { if (compute_uv_) { auto device = context->eigen_device(); OP_REQUIRES_OK_ASYNC(context, DoMatrixTranspose(device, u_copy, U), done); + if (batched) { + OP_REQUIRES_OK_ASYNC(context, DoMatrixTranspose(device, v_copy, V), + done); + } } CheckResult(context, std::move(done), dev_info, std::move(solver)); @@ -289,6 +334,7 @@ class SvdOpGpu : public AsyncOpKernel { } // Call the SVD: compute V S U* = M*. + // Note (m, n) and (U, V) are swapped accordingly. RunSVD(context, done, n, m, p, input_copy, S, V, U, std::move(solver)); } ",0,train a4a6bab62151616b54216059919bb2c111a45881,tensorflow/tensorflow,"Expose stream executor namespace in cmake shared object. (#19415) Instead of perftools::gputools expose stream executor namespace in cmake shared object.",create_def_file.py,"@@ -44,7 +44,7 @@ UNDNAME = ""undname.exe"" DUMPBIN = ""dumpbin.exe"" # Exclude if matched -EXCLUDE_RE = re.compile(r""RTTI|deleting destructor|::internal::"") +EXCLUDE_RE = re.compile(r""RTTI|deleting destructor|::internal::|Internal|python_op_gen_internal|grappler"") # Include if matched before exclude INCLUDEPRE_RE = re.compile(r""google::protobuf::internal::ExplicitlyConstructed|"" @@ -56,6 +56,9 @@ INCLUDEPRE_RE = re.compile(r""google::protobuf::internal::ExplicitlyConstructed|"" r""tensorflow::ops::internal::Enter|"" r""tensorflow::strings::internal::AppendPieces|"" r""tensorflow::strings::internal::CatPieces|"" + r""tensorflow::errors::Internal|"" + r""tensorflow::Tensor::CopyFromInternal|"" + r""tensorflow::kernel_factory::OpKernelRegistrar::InitInternal|"" r""tensorflow::io::internal::JoinPathImpl"") # Include if matched after exclude @@ -64,7 +67,7 @@ INCLUDE_RE = re.compile(r""^(TF_\w*)$|"" r""tensorflow::|"" r""functor::|"" r""\?nsync_|"" - r""perftools::gputools"") + r""stream_executor::"") # We want to identify data members explicitly in the DEF file, so that no one # can implicitly link against the DLL if they use one of the variables exported ",0,train fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset. PiperOrigin-RevId: 193437651",minimize_loss_test.py,"@@ -54,21 +54,18 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): def testTrainNetwork(self, distribution, optimizer_fn, use_callable_loss, is_tpu): with distribution.scope(): - model_fn, dataset, layer = minimize_loss_example( - optimizer_fn, - use_bias=True, - use_callable_loss=use_callable_loss) + model_fn, dataset_fn, layer = minimize_loss_example( + optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss) + def tpu_dataset_fn(): + return dataset_fn().batch(2) # TODO(isaprykin): Eliminate `is_tpu`. Probably add a # `DistributionStrategy.create_monitor` so that each DistributionStrategy # could influence its training loop. That method would return an instance # of Monitor. TPUMonitor would execute tpu.initialize_system() and # tpu.shutdown_system(). - if is_tpu: - dataset = dataset.batch(2) - iterator = distribution.distribute_dataset( - dataset).make_one_shot_iterator() + tpu_dataset_fn if is_tpu else dataset_fn).make_one_shot_iterator() def run_step(): # TODO(isaprykin): Make iterator get_next() return a list of sub- @@ -122,14 +119,14 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): # `distribution.scope`. with variable_scope.variable_creator_scope( appending_creator), distribution.scope(): - model_fn, dataset, layer = minimize_loss_example( + model_fn, dataset_fn, layer = minimize_loss_example( optimizer_fn, use_bias=True, use_callable_loss=True, create_optimizer_inside_model_fn=True) iterator = distribution.distribute_dataset( - dataset).make_one_shot_iterator() + dataset_fn).make_one_shot_iterator() def run_step(): return distribution.group( @@ -176,7 +173,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): """"""Verifies that moving mean updates are reduced across towers."""""" with distribution.scope(): num_towers = len(distribution.worker_devices) - model_fn, dataset, batchnorm = batchnorm_example( + model_fn, dataset_fn, batchnorm = batchnorm_example( optimizer_fn, batch_per_epoch=num_towers, momentum=momentum, @@ -188,7 +185,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): if isinstance(distribution, mirrored_strategy.MirroredStrategy): distribution._prefetch_on_device = False iterator = distribution.distribute_dataset( - dataset).make_one_shot_iterator() + dataset_fn).make_one_shot_iterator() def run_step(): return control_flow_ops.group( @@ -260,11 +257,13 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): else: return optimizer.minimize(loss_fn()) - features = dataset_ops.Dataset.from_tensors([[2.], [7.]]) - labels = dataset_ops.Dataset.from_tensors([[6.], [21.]]) - dataset = dataset_ops.Dataset.zip((features, labels)).repeat() + def dataset_fn(): + features = dataset_ops.Dataset.from_tensors([[2.], [7.]]) + labels = dataset_ops.Dataset.from_tensors([[6.], [21.]]) + return dataset_ops.Dataset.zip((features, labels)).repeat() + iterator = distribution.distribute_dataset( - dataset).make_one_shot_iterator() + dataset_fn).make_one_shot_iterator() def run_step(): return distribution.group( ",0,train fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset. PiperOrigin-RevId: 193437651",mirrored_strategy.py,"@@ -140,9 +140,10 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): g.add_to_collections(collections, result) return result - def distribute_dataset(self, dataset): + def distribute_dataset(self, dataset_fn): return values.PerDeviceDataset( - dataset, self._devices, self._prefetch_on_device) + self._call_dataset_fn(dataset_fn), self._devices, + self._prefetch_on_device) def _broadcast(self, tensor, destinations): # TODO(josh11b): In eager mode, use one thread per device, or async mode. ",0,train fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset. PiperOrigin-RevId: 193437651",mirrored_strategy_multigpu_test.py,"@@ -247,9 +247,9 @@ class MirroredStrategyVariableCreationTest(test.TestCase): dist = mirrored_strategy.MirroredStrategy( [""/device:GPU:0"", ""/device:CPU:0""]) - features = dataset_ops.Dataset.from_tensors([[1.]]).repeat(10) features = dist.distribute_dataset( - features).make_one_shot_iterator().get_next() + lambda: dataset_ops.Dataset.from_tensors([[1.]]).repeat(10) + ).make_one_shot_iterator().get_next() with dist.scope(): result = dist.call_for_each_tower( ",0,train fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset. PiperOrigin-RevId: 193437651",one_device_strategy.py,"@@ -60,8 +60,8 @@ class OneDeviceStrategy(distribute_lib.DistributionStrategy): with ops.colocate_with(colocate_with): return next_creator(*args, **kwargs) - def distribute_dataset(self, dataset): - return dataset + def distribute_dataset(self, dataset_fn): + return self._call_dataset_fn(dataset_fn) def _broadcast(self, tensor, destinations): return tensor ",0,train fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset. PiperOrigin-RevId: 193437651",optimizer_v2_test.py,"@@ -39,11 +39,11 @@ class MinimizeLossOptimizerV2Test(test.TestCase, parameterized.TestCase): def testTrainNetwork(self, distribution, optimizer_fn, use_callable_loss=True): with distribution.scope(): - model_fn, dataset, layer = minimize_loss_example( + model_fn, dataset_fn, layer = minimize_loss_example( optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss) iterator = distribution.distribute_dataset( - dataset).make_one_shot_iterator() + dataset_fn).make_one_shot_iterator() def run_step(): return control_flow_ops.group(distribution.unwrap( ",0,train fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset. PiperOrigin-RevId: 193437651",single_loss_example.py,"@@ -29,7 +29,10 @@ from tensorflow.python.ops import math_ops def single_loss_example(optimizer_fn, distribution, use_bias=False): """"""Build a very simple network to use in tests and examples."""""" - dataset = dataset_ops.Dataset.from_tensors([[1.]]).repeat() + + def dataset_fn(): + return dataset_ops.Dataset.from_tensors([[1.]]).repeat() + optimizer = optimizer_fn() layer = core.Dense(1, use_bias=use_bias) @@ -37,8 +40,8 @@ def single_loss_example(optimizer_fn, distribution, use_bias=False): y = array_ops.reshape(layer(x), []) - constant_op.constant(1.) return y * y - single_loss_step = step_fn.StandardSingleLossStep(dataset, loss_fn, optimizer, - distribution) + single_loss_step = step_fn.StandardSingleLossStep(dataset_fn, loss_fn, + optimizer, distribution) # Layer is returned for inspecting the kernels in tests. return single_loss_step, layer @@ -49,7 +52,10 @@ def minimize_loss_example(optimizer_fn, use_callable_loss=True, create_optimizer_inside_model_fn=False): """"""Example of non-distribution-aware legacy code."""""" - dataset = dataset_ops.Dataset.from_tensors([[1.]]).repeat() + + def dataset_fn(): + return dataset_ops.Dataset.from_tensors([[1.]]).repeat() + # An Optimizer instance is created either outside or inside model_fn. outer_optimizer = None if not create_optimizer_inside_model_fn: @@ -71,7 +77,7 @@ def minimize_loss_example(optimizer_fn, else: return optimizer.minimize(loss_fn()) - return model_fn, dataset, layer + return model_fn, dataset_fn, layer def batchnorm_example(optimizer_fn, @@ -79,12 +85,15 @@ def batchnorm_example(optimizer_fn, momentum=0.9, renorm=False): """"""Example of non-distribution-aware legacy code with batch normalization."""""" - # input shape is [16, 8], input values are increasing in both dimensions. - dataset = dataset_ops.Dataset.from_tensor_slices( - [[[float(x * 8 + y + z * 100) - for y in range(8)] - for x in range(16)] - for z in range(batch_per_epoch)]).repeat() + + def dataset_fn(): + # input shape is [16, 8], input values are increasing in both dimensions. + return dataset_ops.Dataset.from_tensor_slices( + [[[float(x * 8 + y + z * 100) + for y in range(8)] + for x in range(16)] + for z in range(batch_per_epoch)]).repeat() + optimizer = optimizer_fn() batchnorm = normalization.BatchNormalization( renorm=renorm, momentum=momentum, fused=False) @@ -99,4 +108,4 @@ def batchnorm_example(optimizer_fn, # Callable loss. return optimizer.minimize(loss_fn) - return model_fn, dataset, batchnorm + return model_fn, dataset_fn, batchnorm ",0,train fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset. PiperOrigin-RevId: 193437651",step_fn.py,"@@ -49,13 +49,14 @@ class StandardInputStep(Step): """"""Step with a standard implementation of input handling. Args: - input_dataset: a tf.data Dataset that provides input. + dataset_fn: a function that returns a tf.data Dataset that produces the + input for the model. """""" - def __init__(self, input_dataset, distribution): + def __init__(self, dataset_fn, distribution): Step.__init__(self, distribution) self._distributed_input = distribution.distribute_dataset( - input_dataset).make_one_shot_iterator() + dataset_fn).make_one_shot_iterator() def inputs(self): return self._distributed_input.get_next() @@ -77,14 +78,15 @@ class StandardSingleLossStep(StandardInputStep): ``` Args: - input_dataset: a tf.data Dataset that provides input. + dataset_fn: a function that returns a tf.data Dataset that produces the + input for the model. loss_fn: a function that returns loss. optimizer: an optimizer that implements an update rule. distribution: a `DistributionStrategy` object. """""" - def __init__(self, input_dataset, loss_fn, optimizer, distribution): - StandardInputStep.__init__(self, input_dataset, distribution) + def __init__(self, dataset_fn, loss_fn, optimizer, distribution): + StandardInputStep.__init__(self, dataset_fn, distribution) self._loss_fn = loss_fn self._optimizer = optimizer self._is_run_concurrently = False ",0,train fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset. PiperOrigin-RevId: 193437651",estimator.py,"@@ -688,22 +688,19 @@ class Estimator(object): def _get_features_and_labels_from_input_fn(self, input_fn, mode): """"""Extracts the `features` and labels from return values of `input_fn`."""""" - result = self._call_input_fn(input_fn, mode) - # TODO(anjalisridhar): What about the default DistributionStrategy? Perhaps - # using any input is alright in that case. There is also a - # has_dataset_or_queue_runner function that we may want to extend and use. - if (self._distribution is not None and - not isinstance(result, dataset_ops.Dataset) and - mode == model_fn_lib.ModeKeys.TRAIN): - raise ValueError('input_fn() must return a tf.data.Dataset when using a ' - 'DistributionStrategy.') input_hooks = [] - if isinstance(result, dataset_ops.Dataset): - if self._distribution is not None and mode == model_fn_lib.ModeKeys.TRAIN: - result = self._distribution.distribute_dataset(result) + if self._distribution is not None and mode == model_fn_lib.ModeKeys.TRAIN: + result = self._distribution.distribute_dataset( + lambda: self._call_input_fn(input_fn, mode)) iterator = result.make_initializable_iterator() input_hooks.append(_DatasetInitializerHook(iterator)) result = iterator.get_next() + else: + result = self._call_input_fn(input_fn, mode) + if isinstance(result, dataset_ops.Dataset): + iterator = result.make_initializable_iterator() + input_hooks.append(_DatasetInitializerHook(iterator)) + result = iterator.get_next() if isinstance(result, (list, tuple)): if len(result) != 2: raise ValueError( ",0,train fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset. PiperOrigin-RevId: 193437651",distribute.py,"@@ -20,6 +20,7 @@ from __future__ import print_function import threading +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -672,25 +673,35 @@ class DistributionStrategy(object): _require_distribution_strategy_scope(self) return variable_scope.variable_creator_scope(create_colocated_variable) + def _call_dataset_fn(self, dataset_fn): + result = dataset_fn() + if not isinstance(result, dataset_ops.Dataset): + raise ValueError( + ""dataset_fn() must return a tf.data.Dataset when using a "" + ""DistributionStrategy."") + return result + # TODO(josh11b): `PerDeviceDataset` currently only implements a few methods of # Dataset API such as make_one_shot_iterator and make_initializable_iterator. # Extend to implement more functionality of datasets. - def distribute_dataset(self, dataset): + def distribute_dataset(self, dataset_fn): """"""Return a `dataset` split across all towers. Suitable for providing input to for `call_for_each_tower()` by creating an iterator: ``` + def dataset_fn(): + return tf.data.Dataset.from_tensors([[1.]]).repeat() with distribution_strategy.scope(): - distributed_dataset = distribution_strategy.distribute_dataset(dataset) + distributed_dataset = distribution_strategy.distribute_dataset(dataset_fn) iterator = distributed_dataset.make_one_shot_iterator() tower_results = distribution_strategy.call_for_each_tower( tower_fn, iterator.get_next()) ``` Args: - dataset: A `tf.data.Dataset`. + dataset_fn: A function that returns a `tf.data.Dataset`. Returns: A `PerDeviceDataset` that will produce data for each tower. @@ -1135,8 +1146,8 @@ class _DefaultDistributionStrategy(DistributionStrategy): _require_distribution_strategy_scope(self) return ops.colocate_with(colocate_with_variable) - def distribute_dataset(self, dataset): - return dataset + def distribute_dataset(self, dataset_fn): + return self._call_dataset_fn(dataset_fn) def _broadcast(self, tensor, destinations): if destinations is None: ",0,train b0240486be5a8c4286961d1751fe8560e9c6970e,tensorflow/tensorflow,"Exposes the memory limit in the allocator's stats. Change: 115036211",gpu_bfc_allocator.cc,"@@ -41,6 +41,7 @@ GPUBFCAllocator::GPUBFCAllocator(int device_id, size_t total_memory) // Allocate the requested amount of memory. gpu_memory_size_ = total_memory; + stats_.bytes_limit = static_cast(total_memory); // Create a bunch of bins of various good sizes. @@ -256,7 +257,7 @@ void* GPUBFCAllocator::AllocateRawInternal(size_t unused_alignment, DumpMemoryLog(rounded_bytes); LOG(WARNING) << ""Ran out of memory trying to allocate "" << strings::HumanReadableNumBytes(num_bytes) - << "". See logs for memory state""; + << "". See logs for memory state.""; } return nullptr; } @@ -544,6 +545,7 @@ void GPUBFCAllocator::DumpMemoryLog(size_t num_bytes) { } LOG(INFO) << ""Sum Total of in-use chunks: "" << strings::HumanReadableNumBytes(total_bytes); + LOG(INFO) << ""Stats: \n"" << stats_.DebugString(); } void GPUBFCAllocator::GetStats(AllocatorStats* stats) { ",0,test b0240486be5a8c4286961d1751fe8560e9c6970e,tensorflow/tensorflow,"Exposes the memory limit in the allocator's stats. Change: 115036211",allocator.cc,"@@ -27,16 +27,18 @@ void AllocatorStats::Clear() { this->bytes_in_use = 0; this->max_bytes_in_use = 0; this->max_alloc_size = 0; + this->bytes_limit = 0; } string AllocatorStats::DebugString() const { return strings::Printf( + ""Limit: %20lld\n"" ""InUse: %20lld\n"" ""MaxInUse: %20lld\n"" ""NumAllocs: %20lld\n"" ""MaxAllocSize: %20lld\n"", - this->bytes_in_use, this->max_bytes_in_use, this->num_allocs, - this->max_alloc_size); + this->bytes_limit, this->bytes_in_use, this->max_bytes_in_use, + this->num_allocs, this->max_alloc_size); } Allocator::~Allocator() {} ",0,test b0240486be5a8c4286961d1751fe8560e9c6970e,tensorflow/tensorflow,"Exposes the memory limit in the allocator's stats. Change: 115036211",allocator.h,"@@ -45,6 +45,11 @@ struct AllocatorStats { int64 max_bytes_in_use; // The maximum bytes in use. int64 max_alloc_size; // The max single allocation seen. + // The upper limit what the allocator can allocate, if such a limit + // is known. Certain allocator may return 0 to indicate the limit is + // unknown. + int64 bytes_limit; + AllocatorStats() { Clear(); } void Clear(); ",0,test 0c9ddf3ffd78196cb579d040c59a72d604152073,tensorflow/tensorflow,TST: add unit test,tensor_util_test.py,"@@ -314,6 +314,16 @@ class TensorUtilTest(test.TestCase): shape=[3, 4], dtype=dtype))) + def testIntMixedWithDimension(self): + dtype = dtypes.int32 + nptype = np.int32 + t = tensor_util.make_tensor_proto([10, tensor_shape.Dimension(20), 30], + dtype=dtype) + self.assertEquals(dtype, t.dtype) + a = tensor_util.MakeNdarray(t) + self.assertEquals(nptype, a.dtype) + self.assertAllClose(np.array([10, 20, 30], dtype=nptype), a) + def testLong(self): t = tensor_util.make_tensor_proto(10, dtype=dtypes.int64) self.assertProtoEquals("""""" ",0,train fc274666282c17e0fdcda350744e07b52dda827d,tensorflow/tensorflow,"Prepare //tensorflow/python/data/kernel_tests:map_test for Tensor equality. PiperOrigin-RevId: 263589206",map_test.py,"@@ -522,10 +522,10 @@ class MapTest(test_base.DatasetTestBase, parameterized.TestCase): divide, name=""cond_mult"") - pred_fn_pairs = { - math_ops.logical_or(math_ops.equal(y, 2), math_ops.equal(y, 3)): - defaults_two, - } + pred_fn_pairs = [ + (math_ops.logical_or(math_ops.equal(y, 2), + math_ops.equal(y, 3)), defaults_two), + ] return control_flow_ops.case( pred_fn_pairs, default=multiply, exclusive=True) @@ -555,10 +555,10 @@ class MapTest(test_base.DatasetTestBase, parameterized.TestCase): def divide(): return x // 2 - pred_fn_pairs = { - math_ops.logical_or(math_ops.equal(y, 2), math_ops.equal(y, 3)): - divide, - } + pred_fn_pairs = [ + (math_ops.logical_or(math_ops.equal(y, 2), + math_ops.equal(y, 3)), divide), + ] return control_flow_ops.case( pred_fn_pairs, default=multiply, exclusive=True) @@ -596,10 +596,10 @@ class MapTest(test_base.DatasetTestBase, parameterized.TestCase): divide, name=""cond_mult"") - pred_fn_pairs = { - math_ops.logical_or(math_ops.equal(y, 2), math_ops.equal(y, 3)): - defaults_two, - } + pred_fn_pairs = [ + (math_ops.logical_or(math_ops.equal(y, 2), + math_ops.equal(y, 3)), defaults_two), + ] return control_flow_ops.case( pred_fn_pairs, default=multiply, exclusive=True) ",0,test b8d17793a7aae010754d7776612af5b9b1f5252f,tensorflow/tensorflow,"Don't redundantly prefix metric names for multi-output models. For multi-output models, we prefix metric names with their associated output names to disambiguate / uniqify them. When these models are repeatedly saved and loaded, this prefixing repeats as well, leading to long metrics names such as ""head_0_head_0_head_0_accuracy"", where ""head_0"" is an output name. PiperOrigin-RevId: 359151924 Change-Id: I509ea27a7d91446c3893d13d73818f857705ef5f",compile_utils.py,"@@ -293,7 +293,19 @@ class LossesContainer(Container): class MetricsContainer(Container): """"""A container class for metrics passed to `Model.compile`."""""" - def __init__(self, metrics=None, weighted_metrics=None, output_names=None): + def __init__(self, metrics=None, weighted_metrics=None, output_names=None, + from_serialized=False): + """"""Initializes a container for metrics. + + Arguments: + metrics: see the `metrics` argument from `tf.keras.Model.compile`. + weighted_metrics: see the `weighted_metrics` argument from + `tf.keras.Model.compile`. + output_names: A list of strings of names of outputs for the model. + from_serialized: Whether the model being compiled is from a serialized + model. Used to avoid redundantly applying pre-processing renaming + steps. + """""" super(MetricsContainer, self).__init__(output_names=output_names) # Keep user-supplied values untouched for recompiling and serialization. @@ -304,6 +316,8 @@ class MetricsContainer(Container): self._weighted_metrics = weighted_metrics self._built = False + self._from_serialized = from_serialized + @property def metrics(self): """"""All metrics in this container."""""" @@ -357,7 +371,11 @@ class MetricsContainer(Container): y_pred, self._weighted_metrics, check_types=False) # Assumes metrics, weighted_metrics have been flattened up to outputs. - self._set_metric_names() + # + # If we are loading a model that has been already serialized, we do not + # want to re-apply any pre-processing metric renaming steps. + if not self._from_serialized: + self._set_metric_names() self._create_ordered_metrics() self._built = True ",0,train b8d17793a7aae010754d7776612af5b9b1f5252f,tensorflow/tensorflow,"Don't redundantly prefix metric names for multi-output models. For multi-output models, we prefix metric names with their associated output names to disambiguate / uniqify them. When these models are repeatedly saved and loaded, this prefixing repeats as well, leading to long metrics names such as ""head_0_head_0_head_0_accuracy"", where ""head_0"" is an output name. PiperOrigin-RevId: 359151924 Change-Id: I509ea27a7d91446c3893d13d73818f857705ef5f",training.py,"@@ -569,6 +569,11 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): if not steps_per_execution: steps_per_execution = kwargs.pop('experimental_steps_per_execution') + # When compiling from an already-serialized model, we do not want to + # reapply some processing steps (e.g. metric renaming for multi-output + # models, which have prefixes added for each corresponding output name). + from_serialized = kwargs.pop('from_serialized', False) + self._validate_compile(optimizer, metrics, **kwargs) self._run_eagerly = run_eagerly @@ -576,7 +581,8 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): self.compiled_loss = compile_utils.LossesContainer( loss, loss_weights, output_names=self.output_names) self.compiled_metrics = compile_utils.MetricsContainer( - metrics, weighted_metrics, output_names=self.output_names) + metrics, weighted_metrics, output_names=self.output_names, + from_serialized=from_serialized) self._configure_steps_per_execution(steps_per_execution or 1) ",0,train b8d17793a7aae010754d7776612af5b9b1f5252f,tensorflow/tensorflow,"Don't redundantly prefix metric names for multi-output models. For multi-output models, we prefix metric names with their associated output names to disambiguate / uniqify them. When these models are repeatedly saved and loaded, this prefixing repeats as well, leading to long metrics names such as ""head_0_head_0_head_0_accuracy"", where ""head_0"" is an output name. PiperOrigin-RevId: 359151924 Change-Id: I509ea27a7d91446c3893d13d73818f857705ef5f",training_v1.py,"@@ -308,6 +308,7 @@ class Model(training_lib.Model): # Prepare Session arguments (legacy). kwargs.pop('cloning', None) # Legacy DistStrat argument, never used. + kwargs.pop('from_serialized', None) # Not used in v1. allowed_kwargs = {'feed_dict', 'fetches', 'options', 'run_metadata'} unknown_kwargs = set(kwargs.keys()) - allowed_kwargs if unknown_kwargs: ",0,train b8d17793a7aae010754d7776612af5b9b1f5252f,tensorflow/tensorflow,"Don't redundantly prefix metric names for multi-output models. For multi-output models, we prefix metric names with their associated output names to disambiguate / uniqify them. When these models are repeatedly saved and loaded, this prefixing repeats as well, leading to long metrics names such as ""head_0_head_0_head_0_accuracy"", where ""head_0"" is an output name. PiperOrigin-RevId: 359151924 Change-Id: I509ea27a7d91446c3893d13d73818f857705ef5f",hdf5_format.py,"@@ -201,7 +201,7 @@ def load_model_from_hdf5(filepath, custom_objects=None, compile=True): # pylint # Compile model. model.compile(**saving_utils.compile_args_from_training_config( - training_config, custom_objects)) + training_config, custom_objects), from_serialized=True) saving_utils.try_build_compiled_arguments(model) # Set optimizer weights. ",0,train b8d17793a7aae010754d7776612af5b9b1f5252f,tensorflow/tensorflow,"Don't redundantly prefix metric names for multi-output models. For multi-output models, we prefix metric names with their associated output names to disambiguate / uniqify them. When these models are repeatedly saved and loaded, this prefixing repeats as well, leading to long metrics names such as ""head_0_head_0_head_0_accuracy"", where ""head_0"" is an output name. PiperOrigin-RevId: 359151924 Change-Id: I509ea27a7d91446c3893d13d73818f857705ef5f",save_test.py,"@@ -1003,6 +1003,39 @@ class TestWholeModelSaving(keras_parameterized.TestCase): loaded = keras.models.load_model(saved_model_dir) self.assertIs(loaded.layers[1], loaded.layers[2].layer) + @combinations.generate(combinations.combine(mode=['eager'])) + def test_multi_output_metrics_name_stay_same(self): + """"""Tests that metric names don't change with each save/load cycle. + + e.g. ""head_0_accuracy"" should not become ""head_0_head_0_accuracy"" after + saving and loading a model. + """""" + input_ = keras.Input((4,)) + model = keras.Model( + input_, + [keras.layers.Softmax(name='head_0')(keras.layers.Dense(3)(input_)), + keras.layers.Softmax(name='head_1')(keras.layers.Dense(5)(input_))]) + metric = keras.metrics.BinaryAccuracy() + model.compile(optimizer='rmsprop', + loss='mse', + metrics={'head_0': [metric, 'accuracy']}) + + # Run one iteration. + x = np.random.rand(2, 4) + y = {'head_0': np.random.randint(2, size=(2, 3)), + 'head_1': np.random.randint(2, size=(2, 5))} + model.fit(x, y, verbose=0) + + # Save and reload. + save_format = testing_utils.get_save_format() + saved_model_dir = self._save_model_dir() + keras.models.save_model(model, saved_model_dir, save_format=save_format) + loaded = keras.models.load_model(saved_model_dir) + + # Make sure the metrics names from the model before saving match the loaded + # model. + self.assertSequenceEqual(model.metrics_names, loaded.metrics_names) + # Factory functions to create models that will be serialized inside a Network. def _make_graph_network(input_size, output_size): ",0,train b8d17793a7aae010754d7776612af5b9b1f5252f,tensorflow/tensorflow,"Don't redundantly prefix metric names for multi-output models. For multi-output models, we prefix metric names with their associated output names to disambiguate / uniqify them. When these models are repeatedly saved and loaded, this prefixing repeats as well, leading to long metrics names such as ""head_0_head_0_head_0_accuracy"", where ""head_0"" is an output name. PiperOrigin-RevId: 359151924 Change-Id: I509ea27a7d91446c3893d13d73818f857705ef5f",load.py,"@@ -168,7 +168,7 @@ def load(path, compile=True, options=None): # pylint: disable=redefined-builtin 'training_config', None) if training_config is not None: model.compile(**saving_utils.compile_args_from_training_config( - training_config)) + training_config), from_serialized=True) saving_utils.try_build_compiled_arguments(model) else: logging.warning('No training configuration found in save file, so the ' ",0,train 7b1ebf50c9686dacf5fb5036168a9110ae0add32,tensorflow/tensorflow,"Do not assume hasattr is available in Metric.__del__ Python does not guarantee that builtins are available by the time __del__ is called, so using hasattr is unsafe. PiperOrigin-RevId: 254255638",monitoring.py,"@@ -121,10 +121,14 @@ class Metric(object): self._metric = self._metric_methods[self._label_length].create(*args) def __del__(self): - if hasattr(self, '_metric'): + try: deleter = self._metric_methods[self._label_length].delete - if deleter is not None: - deleter(self._metric) + metric = self._metric + except AttributeError: + return + + if deleter is not None: + deleter(metric) def get_cell(self, *labels): """"""Retrieves the cell."""""" ",0,train d6efc3b8d44c6ac583b4ff529343749cdebbff06,tensorflow/tensorflow,"Remove deprecated overload of CopyRawToHost PiperOrigin-RevId: 438902197",pjrt_client.h,"@@ -667,18 +667,6 @@ class PjRtBuffer { virtual PjRtFuture CopyRawToHost(void* dst, int64_t offset, int64_t transfer_size) = 0; - // Transfers a sub-range of the on-device representation of the buffer. - // offset+transfer_size must be less than GetOnDeviceSizeInBytes. on_ready - // is called if and only if CopyRawToHost returns OK. on_ready will be called - // with a non-OK status if the buffer asynchronously transitions to an error - // state. - ABSL_DEPRECATED(""Use CopyRawToHost(...).OnReady() instead"") - Status CopyRawToHost(void* dst, int64_t offset, int64_t transfer_size, - std::function on_ready) { - CopyRawToHost(dst, offset, transfer_size).OnReady(std::move(on_ready)); - return Status::OK(); - } - // Drops the buffer's reference to its associated device memory, leaving the // buffer in an invalid state. The memory will be freed lazily when all async // operations using the buffer have completed, according to the allocation ",0,train 9085ba6c5e291a27b17ca6e6c5e6e1d3fbda77c7,tensorflow/tensorflow,Change batch_norm default (#1831),batch_norm_ops.py,"@@ -19,7 +19,7 @@ from __future__ import print_function import tensorflow as tf -def batch_normalize(tensor_in, epsilon=1e-5, convnet=True, decay=0.9, +def batch_normalize(tensor_in, epsilon=1e-5, convnet=False, decay=0.9, scale_after_normalization=True): """"""Batch Normalization ",0,train 9085ba6c5e291a27b17ca6e6c5e6e1d3fbda77c7,tensorflow/tensorflow,Change batch_norm default (#1831),conv_ops.py,"@@ -57,7 +57,7 @@ def conv2d(tensor_in, n_filters, filter_shape, strides=None, padding='SAME', tf.float32) output = output + bias_var if batch_norm: - output = batch_normalize(output) + output = batch_normalize(output, convnet=True) if activation: output = activation(output) return output ",0,train f94c8482969c50c07f43063e01fd63747ef3a99f,tensorflow/tensorflow,Reverts erroneous change that removed adj_x/adj_y support in BatchMatMul,convert_nodes.cc,"@@ -4194,9 +4194,8 @@ Status ConvertBatchMatMul(OpConverterParams* params) { } TFAttrs attrs(node_def); - if (attrs.get(""adj_x"") || attrs.get(""adj_y"")) { - return errors::InvalidArgument(""TensorRT cannot adjoint inputs.""); - } + const bool transpose_a = attrs.get(""adj_x""); + const bool transpose_b = attrs.get(""adj_y""); // Removes the batch dimension from weights. const auto remove_weights_batch_dim = @@ -4232,8 +4231,8 @@ Status ConvertBatchMatMul(OpConverterParams* params) { return Status::OK(); } - return ConvertMatMulHelper(params, tensor_l, tensor_r, /*transpose_a=*/false, - /*transpose_b=*/false, node_def.name()); + return ConvertMatMulHelper(params, tensor_l, tensor_r, transpose_a, + transpose_b, node_def.name()); } Status ConvertSoftmax(OpConverterParams* params) { ",0,train f94c8482969c50c07f43063e01fd63747ef3a99f,tensorflow/tensorflow,Reverts erroneous change that removed adj_x/adj_y support in BatchMatMul,convert_nodes_test.cc,"@@ -1678,8 +1678,10 @@ void OpConverterTest::TestMatMulHelper( AddTestWeights(""weights"", {2, 2}, {0, 1, 2, 3}); if (is_batch_matmul) { if (transpose_a || transpose_b) { - RunValidationAndConversion(node_def, error::INVALID_ARGUMENT, - ""TensorRT cannot adjoint inputs.""); + RunValidationAndConversion( + node_def, error::INVALID_ARGUMENT, + ""Input weight attempts to broadcast across batch dimension for "" + ""BatchMatMul, at my_matmul""); } else { RunValidationAndConversion( node_def, error::INVALID_ARGUMENT, @@ -1717,8 +1719,10 @@ void OpConverterTest::TestMatMulHelper( AddTestWeights(""weights"", {2, 2}, {0, 1, 2, 3}); if (is_batch_matmul) { if (transpose_b) { - RunValidationAndConversion(node_def, error::INVALID_ARGUMENT, - ""TensorRT cannot adjoint inputs.""); + RunValidationAndConversion( + node_def, error::INVALID_ARGUMENT, + ""Input weight attempts to broadcast across batch dimension for "" + ""BatchMatMul, at my_matmul""); } else { RunValidationAndConversion( node_def, error::INVALID_ARGUMENT, @@ -1822,22 +1826,36 @@ TEST_F(OpConverterTest, ConvertBatchMatMul) { return matmul.operation.node()->def(); }; - { - Reset(); - NodeDef node_def = get_batch_matmul_nodedef(DT_FLOAT, /*transpose_a=*/false, - /*transpose_b=*/false); - AddTestTensor(""input"", {1, 3}, /*batch_size=*/1); - AddTestWeights(""weights"", {1, 3, 1}, {1, 2, 3}); + for (bool transpose_a : {false, true}) { + for (bool transpose_b : {false, true}) { + Reset(); + NodeDef node_def = + get_batch_matmul_nodedef(DT_FLOAT, transpose_a, transpose_b); + AddTestTensor(""input"", {2, 2}, /*batch_size=*/1); + AddTestWeights(""weights"", {1, 2, 2}, {1, 2, 3, 4}); - RunValidationAndConversion(node_def); - TRT_TensorOrWeights output; - TF_EXPECT_OK(GetTensorOrWeights(""my_matmul"", &output)); - ASSERT_TRUE(output.is_tensor()); - ExpectTrtDimsEqualsArray({1, 1}, output.tensor()->getDimensions()); - const DataVec input_data{{""input"", test::AsTensor({0, 1, 2})}}; - DataVec output_data{{""my_matmul"", ConstructTensor(1, 1)}}; - BuildAndRun(input_data, &output_data); - EXPECT_THAT(GetSpanForData(output_data[0]), ElementsAre(8)); + RunValidationAndConversion(node_def); + TRT_TensorOrWeights output; + TF_EXPECT_OK(GetTensorOrWeights(""my_matmul"", &output)); + ASSERT_TRUE(output.is_tensor()); + ExpectTrtDimsEqualsArray({2, 2}, output.tensor()->getDimensions()); + const DataVec input_data{{""input"", test::AsTensor({0, 1, 2, 3})}}; + DataVec output_data{{""my_matmul"", ConstructTensor(4)}}; + BuildAndRun(input_data, &output_data); + if (!transpose_a && !transpose_b) { + EXPECT_THAT(GetSpanForData(output_data[0]), + ElementsAre(3, 4, 11, 16)); + } else if (transpose_a && transpose_b) { + EXPECT_THAT(GetSpanForData(output_data[0]), + ElementsAre(4, 8, 7, 15)); + } else if (transpose_a) { + EXPECT_THAT(GetSpanForData(output_data[0]), + ElementsAre(6, 8, 10, 14)); + } else if (transpose_b) { + EXPECT_THAT(GetSpanForData(output_data[0]), + ElementsAre(2, 4, 8, 18)); + } + } } TestMatMulHelper(get_batch_matmul_nodedef, ""BatchMatMul""); ",0,train 8017c247c84c4c80fa11744b1b913aec3ee88f3e,tensorflow/tensorflow,"Mark the `SerializeSparseOp` kernel as inexpensive. Since this op only performs a constant amount of work, and typically executes in a few microseconds, it should be profitable to execute this op inline, rather than scheduling it on a remote thread. PiperOrigin-RevId: 186522885",serialize_sparse_op.cc,"@@ -44,6 +44,8 @@ class SerializeSparseOp : public OpKernel { explicit SerializeSparseOp(OpKernelConstruction* context) : OpKernel(context) {} + bool IsExpensive() override; + Status Initialize(Tensor* result); Status Serialize(const Tensor& input, T* result); @@ -82,6 +84,21 @@ class SerializeSparseOp : public OpKernel { } }; +// NOTE(mrry): We specialize the IsExpensive() method differently for +// the string and variant cases, because (i) the string version +// actually performs memory copies as part of its serialization (and +// is hence potentially expensive), and (ii) the variant version +// performs O(1) shallow copies (and hence is much cheaper than +// dispatching to another thread would be). +template <> +bool SerializeSparseOp::IsExpensive() { + return true; +} +template <> +bool SerializeSparseOp::IsExpensive() { + return false; +} + template <> Status SerializeSparseOp::Initialize(Tensor* result) { *result = Tensor(DT_STRING, TensorShape({3})); ",0,train 29a67eaedd8d95866011bb1c87a9d1739d448686,tensorflow/tensorflow,"Fix typo in error message. PiperOrigin-RevId: 214348730",nvptx_compiler.cc,"@@ -402,7 +402,7 @@ void WarnIfBadPtxasVersion(const string& ptxas_path) { LOG(WARNING) << ""*** WARNING *** You are using ptxas "" << vmaj << ""."" << vmin << ""."" << vdot - << "", which older than 9.2.88. ptxas 9.x before 9.2.88 is known to "" + << "", which is older than 9.2.88. ptxas 9.x before 9.2.88 is known to "" ""miscompile XLA code, leading to incorrect results or "" ""invalid-address errors.\n\nYou do not need to update to CUDA "" ""9.2.88; cherry-picking the ptxas binary is sufficient.""; ",0,train 16a50fcbacb8e46f6c4560a6e58ed26f5fd2d133,tensorflow/tensorflow,"Enable int64 test cases for MatMul Signed-off-by: Yong Tang ",matmul_op_test.py,"@@ -102,7 +102,7 @@ class MatMulGradientTest(test_lib.TestCase): def _GetMatMulGradientTest(a_np_, b_np_, use_static_shape_, **kwargs_): def Test(self): - if not use_static_shape_ or a_np_.dtype in (np.int32, np.float16): + if not use_static_shape_ or a_np_.dtype in (np.int32, np.int64, np.float16): self.skipTest(""Skipping infeasible gradient test."") # Transpose and possibly conjugate a_np_ and b_np_ according to the @@ -214,9 +214,9 @@ if __name__ == ""__main__"": sizes = [1, 3, 5] trans_options = [[False, False], [True, False], [False, True]] for use_static_shape in [False, True]: - for dtype in (np.int32, np.float16, np.float32, np.float64, np.complex64, - np.complex128): - if not use_static_shape and dtype == np.int32: + for dtype in (np.int32, np.int64, np.float16, np.float32, np.float64, + np.complex64, np.complex128): + if not use_static_shape and dtype == np.int32 and dtype == np.int64: # TODO(rmlarsen): Re-enable this test when we have fixed the underlying # bug in Windows (b/35935459). continue ",0,train bbf6bb9920823f2fafe83bcd38ed0b06806b6d5b,tensorflow/tensorflow,"[cleanup] Use scoped counter for measuring GraphOptimizer and TFData latencies. Add accumulate functionality to the ScopedTimer. PiperOrigin-RevId: 409447266 Change-Id: I12e377f5425681ce1933571269f00cd23f0f1ac8",graph_optimizer.cc,"@@ -39,9 +39,10 @@ void GraphOptimizer::Optimize(FunctionLibraryRuntime* runtime, Env* env, const Device* device, std::unique_ptr* graph, const Options& options) { + static const char* kGraphOptimizerCategory = ""GraphOptimizerPass""; + Graph* g = graph->get(); DumpGraph(""Initial"", g); - bool changed = true; const int kMaxRounds = 10; for (int rounds = 0; rounds < kMaxRounds; ++rounds) { @@ -51,8 +52,9 @@ void GraphOptimizer::Optimize(FunctionLibraryRuntime* runtime, Env* env, changed = true; } - uint64 inlining_start_us = Env::Default()->NowMicros(); - uint64 inlining_total_us = 0; + tensorflow::metrics::ScopedCounter<2> inlining_timings( + tensorflow::metrics::GetGraphOptimizationCounter(), + {kGraphOptimizerCategory, ""function_inlining""}); if (opts_.do_function_inlining() && RemoveDeadNodes(g)) { DumpGraph(""RemoveDeadNodes"", g); changed = true; @@ -62,11 +64,14 @@ void GraphOptimizer::Optimize(FunctionLibraryRuntime* runtime, Env* env, changed = true; } if (opts_.do_function_inlining()) { - inlining_total_us += Env::Default()->NowMicros() - inlining_start_us; + inlining_timings.AccumulateAndStop(); } if (opts_.do_constant_folding()) { - const uint64 pass_start_us = Env::Default()->NowMicros(); + tensorflow::metrics::ScopedCounter<2> timings( + tensorflow::metrics::GetGraphOptimizationCounter(), + {kGraphOptimizerCategory, ""constant_folding""}); + ConstantFoldingOptions cf_opts; cf_opts.shape_map = options.shape_map; cf_opts.consider = options.cf_consider_fn; @@ -82,32 +87,28 @@ void GraphOptimizer::Optimize(FunctionLibraryRuntime* runtime, Env* env, DumpGraph(""ConstFolding"", g); changed = true; } - const uint64 pass_end_us = Env::Default()->NowMicros(); - metrics::UpdateGraphOptimizerPassTime(""constant_folding"", - pass_end_us - pass_start_us); } - inlining_start_us = Env::Default()->NowMicros(); - if (opts_.do_function_inlining() && FixupSourceAndSinkEdges(g)) { - DumpGraph(""FixupSourceAndSinkEdges"", g); - changed = true; - } if (opts_.do_function_inlining()) { - inlining_total_us += Env::Default()->NowMicros() - inlining_start_us; + inlining_timings.Start(); + if (FixupSourceAndSinkEdges(g)) { + DumpGraph(""FixupSourceAndSinkEdges"", g); + changed = true; + } + inlining_timings.AccumulateAndStop(); } if (opts_.do_common_subexpression_elimination()) { - const uint64 pass_start_us = Env::Default()->NowMicros(); + tensorflow::metrics::ScopedCounter<2> timings( + tensorflow::metrics::GetGraphOptimizationCounter(), + {kGraphOptimizerCategory, ""common_subexpression_elimination""}); if (OptimizeCSE(g, options.cse_consider_fn)) { DumpGraph(""OptimizeCSE"", g); changed = true; } - const uint64 pass_end_us = Env::Default()->NowMicros(); - metrics::UpdateGraphOptimizerPassTime(""common_subexpression_elimination"", - pass_end_us - pass_start_us); } if (opts_.do_function_inlining()) { - inlining_start_us = Env::Default()->NowMicros(); + inlining_timings.Start(); ExpandInlineFunctionsOptions expand_inline_opts; expand_inline_opts.native_options.inlined_function_body_placer = InlinedFunctionBodyPlacer::SingleDevice(); @@ -144,10 +145,7 @@ void GraphOptimizer::Optimize(FunctionLibraryRuntime* runtime, Env* env, changed = true; } - const uint64 inlining_end_us = Env::Default()->NowMicros(); - metrics::UpdateGraphOptimizerPassTime( - ""function_inlining"", - (inlining_end_us - inlining_start_us) + inlining_total_us); + inlining_timings.ReportAndStop(); } if (!changed) break; } ",0,train bbf6bb9920823f2fafe83bcd38ed0b06806b6d5b,tensorflow/tensorflow,"[cleanup] Use scoped counter for measuring GraphOptimizer and TFData latencies. Add accumulate functionality to the ScopedTimer. PiperOrigin-RevId: 409447266 Change-Id: I12e377f5425681ce1933571269f00cd23f0f1ac8",metrics.cc,"@@ -408,24 +408,6 @@ void UpdateGrapplerPassTime(const string& pass_name, } } -void UpdateTFDataPassTime(const string& pass_name, - const uint64 running_time_usecs) { - if (running_time_usecs > 0) { - GetGraphOptimizationCounter() - ->GetCell(""TFDataPass"", pass_name) - ->IncrementBy(running_time_usecs); - } -} - -void UpdateGraphOptimizerPassTime(const string& pass_name, - const uint64 running_time_usecs) { - if (running_time_usecs > 0) { - GetGraphOptimizationCounter() - ->GetCell(""GraphOptimizerPass"", pass_name) - ->IncrementBy(running_time_usecs); - } -} - void UpdateGraphBuildTime(const uint64 running_time_usecs) { if (running_time_usecs > 0) { static auto* build_graph_calls_cell = build_graph_calls->GetCell(); ",0,train bbf6bb9920823f2fafe83bcd38ed0b06806b6d5b,tensorflow/tensorflow,"[cleanup] Use scoped counter for measuring GraphOptimizer and TFData latencies. Add accumulate functionality to the ScopedTimer. PiperOrigin-RevId: 409447266 Change-Id: I12e377f5425681ce1933571269f00cd23f0f1ac8",metrics.h,"@@ -203,6 +203,23 @@ class ScopedCounter final { // Start the measurement with the existing set of labels. void Reset() { Init(); } + // Temporarily stop the timer, but keep accumulated time. + void AccumulateAndStop() { + if (started_) { + accumulated_time_ = tensorflow::Env::Default()->NowMicros() - start_time_; + started_ = false; + } + } + + // Start previously stopped timer. + void Start() { + if (started_) return; + + // Keep previously accumulated time if any. + start_time_ = tensorflow::Env::Default()->NowMicros(); + started_ = true; + } + ~ScopedCounter() { ReportAndStop(); } private: @@ -210,6 +227,7 @@ class ScopedCounter final { void ReportInternal(std::index_sequence) { uint64 time_interval = tensorflow::Env::Default()->NowMicros() - start_time_; + time_interval += accumulated_time_; if (time_interval > 0) { counter_->GetCell(labels_[S]...)->IncrementBy(time_interval); } @@ -218,25 +236,22 @@ class ScopedCounter final { void Init() { start_time_ = tensorflow::Env::Default()->NowMicros(); started_ = true; + accumulated_time_ = 0; } monitoring::Counter* counter_; std::array labels_; bool started_{false}; uint64 start_time_; + uint64 accumulated_time_; }; // Returns a counter used to capture timing metrics for graph optimization // passes. monitoring::Counter<2>* GetGraphOptimizationCounter(); -// Updates the metrics stored about graph optimizations. void UpdateGrapplerPassTime(const string& pass_name, const uint64 running_time_usecs); -void UpdateTFDataPassTime(const string& pass_name, - const uint64 running_time_usecs); -void UpdateGraphOptimizerPassTime(const string& pass_name, - const uint64 running_time_usecs); // Updates metrics for time to distribute variables to all TPU hosts. void UpdateTpuVariableDistributionTime(const uint64 distribution_time_usecs); ",0,train bbf6bb9920823f2fafe83bcd38ed0b06806b6d5b,tensorflow/tensorflow,"[cleanup] Use scoped counter for measuring GraphOptimizer and TFData latencies. Add accumulate functionality to the ScopedTimer. PiperOrigin-RevId: 409447266 Change-Id: I12e377f5425681ce1933571269f00cd23f0f1ac8",meta_optimizer.cc,"@@ -103,10 +103,11 @@ Status TFDataMetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, // Perform optimizations in a meaningful order. for (const auto& optimization : kTFDataOptimizations) { - const uint64 pass_start_us = Env::Default()->NowMicros(); + tensorflow::metrics::ScopedCounter<2> timings( + tensorflow::metrics::GetGraphOptimizationCounter(), + {""TFData"", optimization}); Status status = ApplyOptimization(optimization, cluster, &optimized_item); - const uint64 pass_end_us = Env::Default()->NowMicros(); - metrics::UpdateTFDataPassTime(optimization, pass_end_us - pass_start_us); + timings.ReportAndStop(); if (!status.ok()) return status; } ",0,train b56ecc8f4bff7576bf48431d3f7b61ab466b6420,tensorflow/tensorflow,Used true_divide to make sure float division,ops_test.py,"@@ -178,9 +178,9 @@ class OpsTest(test_util.TensorFlowTestCase): if all(x >= 0 for x in v2): self.assertAllEqual((a**b), np.power(v1, v2)) - self.assertAllEqual((a / b), np.divide(v1, v2)) + self.assertAllEqual((a / b), np.true_divide(v1, v2)) - self.assertAllEqual((a / a), np.divide(v1, v1)) + self.assertAllEqual((a / a), np.true_divide(v1, v1)) self.assertAllEqual((a % b), np.mod(v1, v2)) self.assertAllEqual((a < b), np.less(v1, v2)) ",0,train a0c56725b12af530a7869de6913826a41448f61f,tensorflow/tensorflow,"Update metrics.py Updated as per https://github.com/tensorflow/tensorflow/pull/47343#issuecomment-793362918",metrics.py,"@@ -3343,7 +3343,7 @@ def categorical_accuracy(y_true, y_pred): Categorical accuracy values. """""" # assert if predicted and true labels Tensors have the same shape - check_ops.assert_equal_v2(array_ops.shape_v2(y_pred), array_ops.shape_v2(y_pred)) + check_ops.assert_equal_v2(array_ops.shape_v2(y_pred), array_ops.shape_v2(y_true)) return math_ops.cast( math_ops.equal( ",0,train 82f4f50f4fbfd74aceb741ff097d6c42688b5023,tensorflow/tensorflow,"Add check for events_ not containing the event we are waiting on because it has already completed PiperOrigin-RevId: 337185599 Change-Id: I9c73388c0a99c2abbc52aef4e7bf2c61656e8199",pod_tpu_driver.cc,"@@ -736,24 +736,38 @@ class PodTpuDriver : public TpuDriver { auto done = [this, event_id]() { mu_.AssertHeld(); - if (events_.count(event_id) == 0) { - LOG(ERROR) << ""Cannot find event id "" << event_id - << "" in WaitForEvent.""; - } - return events_[event_id]->underlying_event != nullptr && - events_[event_id]->underlying_event.use_count() != 0; + // The event was either completed and erased from the map or we have + // an underlying event available to us. + return events_.count(event_id) == 0 || + (events_[event_id]->underlying_event != nullptr && + events_[event_id]->underlying_event.use_count() != 0); }; auto status = mu_.AwaitWithTimeout(absl::Condition(&done), duration); if (!status) { return absl::nullopt; } - underlying_event = events_[event_id]->underlying_event; + + if (events_.count(event_id) > 0) { + underlying_event = events_[event_id]->underlying_event; + } else { + underlying_event = nullptr; + } } // Wait for the underlying event without holding on to the event_lock_, or // else incoming events will not be processed. - return underlying_event->AwaitWithTimeout(duration); + if (underlying_event != nullptr) { + return underlying_event->AwaitWithTimeout(duration); + } else { + absl::MutexLock l(&mu_); + auto event_status = abnormal_event_status_.find(event_id); + if (event_status == abnormal_event_status_.end()) { + return Status::OK(); + } else { + return event_status->second; + } + } } void AddCallbackForEvent(int64_t event_id, std::function fn) ",0,train e31e0f7c71d051ae8e7d4ce7b07ad9ea3ec8e508,tensorflow/tensorflow,"Use resource variable with placeholder for KMeans. PiperOrigin-RevId: 236676217",clustering_ops.py,"@@ -286,36 +286,31 @@ class KMeans(object): - update_in_steps: numbers of steps left before we sync cluster_centers_updated back to cluster_centers. """""" - init_value = array_ops.constant([], dtype=dtypes.float32) + init_value = array_ops.placeholder_with_default([], shape=None) cluster_centers = variable_scope.variable( - init_value, name=CLUSTERS_VAR_NAME, validate_shape=False, - use_resource=False) + init_value, name=CLUSTERS_VAR_NAME, validate_shape=False) cluster_centers_initialized = variable_scope.variable( - False, dtype=dtypes.bool, name='initialized', use_resource=False) + False, dtype=dtypes.bool, name='initialized') if self._use_mini_batch and self._mini_batch_steps_per_iteration > 1: # Copy of cluster centers actively updated each step according to # mini-batch update rule. cluster_centers_updated = variable_scope.variable( - init_value, name='clusters_updated', validate_shape=False, - use_resource=False) + init_value, name='clusters_updated', validate_shape=False) # How many steps till we copy the updated clusters to cluster_centers. update_in_steps = variable_scope.variable( self._mini_batch_steps_per_iteration, dtype=dtypes.int64, - name='update_in_steps', - use_resource=False) + name='update_in_steps') # Count of points assigned to cluster_centers_updated. cluster_counts = variable_scope.variable( - array_ops.zeros([num_clusters], dtype=dtypes.int64), - use_resource=False) + array_ops.zeros([num_clusters], dtype=dtypes.int64)) else: cluster_centers_updated = cluster_centers update_in_steps = None cluster_counts = ( - variable_scope.variable( # pylint:disable=g-long-ternary - array_ops.ones([num_clusters], dtype=dtypes.int64), - use_resource=False) + variable_scope.variable( + array_ops.ones([num_clusters], dtype=dtypes.int64)) if self._use_mini_batch else None) return (cluster_centers, cluster_centers_initialized, cluster_counts, cluster_centers_updated, update_in_steps) ",0,train 6ab65bc0c96982c538454ace8f97cc010edc66a0,tensorflow/tensorflow,"Better workaround for MSVC 14.0 limitation related to constexpr array. PiperOrigin-RevId: 258793545",memmapped_file_system.cc,"@@ -190,13 +190,8 @@ const void* MemmappedFileSystem::GetMemoryWithOffset(uint64 offset) const { return reinterpret_cast(mapped_memory_->data()) + offset; } -#if defined(_MSC_VER) -constexpr char* MemmappedFileSystem::kMemmappedPackagePrefix; -constexpr char* MemmappedFileSystem::kMemmappedPackageDefaultGraphDef; -#else -constexpr char MemmappedFileSystem::kMemmappedPackagePrefix[]; -constexpr char MemmappedFileSystem::kMemmappedPackageDefaultGraphDef[]; -#endif +constexpr const char MemmappedFileSystem::kMemmappedPackagePrefix[]; +constexpr const char MemmappedFileSystem::kMemmappedPackageDefaultGraphDef[]; Status MemmappedFileSystem::InitializeFromFile(Env* env, const string& filename) { ",0,test 6ab65bc0c96982c538454ace8f97cc010edc66a0,tensorflow/tensorflow,"Better workaround for MSVC 14.0 limitation related to constexpr array. PiperOrigin-RevId: 258793545",memmapped_file_system.h,"@@ -53,19 +53,11 @@ class MemmappedFileSystem : public FileSystem { public: // Memmapped regions use this prefix to distinguish from // the filesystem. -#if defined(_MSC_VER) - static constexpr char* kMemmappedPackagePrefix = -#else - static constexpr char kMemmappedPackagePrefix[] = -#endif + static constexpr const char kMemmappedPackagePrefix[] = ""memmapped_package://""; -// The default graphdef in the package. -#if defined(_MSC_VER) - static constexpr char* kMemmappedPackageDefaultGraphDef = -#else - static constexpr char kMemmappedPackageDefaultGraphDef[] = -#endif + // The default graphdef in the package. + static constexpr const char kMemmappedPackageDefaultGraphDef[] = ""memmapped_package://.""; MemmappedFileSystem(); ",0,test 27ea707dfef54fec24e5b92210142898a6e87dfc,tensorflow/tensorflow,"In the doc example, commas in tensors are missing. Now they are added.",array_ops.cc,"@@ -660,14 +660,14 @@ For example: ```prettyprint # tensor 't' is [1, 2, 3, 4, 5, 6, 7, 8, 9] # tensor 't' has shape [9] -reshape(t, [3, 3]) ==> [[1, 2, 3] - [4, 5, 6] +reshape(t, [3, 3]) ==> [[1, 2, 3], + [4, 5, 6], [7, 8, 9]] -# tensor 't' is [[[1, 1], [2, 2]] +# tensor 't' is [[[1, 1], [2, 2]], # [[3, 3], [4, 4]]] # tensor 't' has shape [2, 2, 2] -reshape(t, [2, 4]) ==> [[1, 1, 2, 2] +reshape(t, [2, 4]) ==> [[1, 1, 2, 2], [3, 3, 4, 4]] # tensor 't' is [[[1, 1, 1], ",0,test 9392ffa09224f0a7735aa7076bee2024c39f1e69,tensorflow/tensorflow,"Improve compatibility of while_v2 with XLA tests Remove assumption where resource variables could not be included as outputs of the body. We instead iterate through the outputs to find the first resource variable index. Also loosen the requirements to specify maximum_iterations for XLA. PiperOrigin-RevId: 226932912",while_op.cc,"@@ -291,20 +291,15 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) { xla::XlaOp while_result = xla::While(cond_wrapper, *body.computation, init); - auto while_shape_or = builder->GetShape(while_result); - OP_REQUIRES_OK(ctx, while_shape_or.status()); - auto count = xla::ShapeUtil::TupleElementCount(while_shape_or.ValueOrDie()); - int max_index = body.outputs.size() + body.resource_updates.size() - 1; - OP_REQUIRES( - ctx, max_index < count, - errors::Internal(""Max tuple element requested ("", max_index, - "") needs to be less than tuple size ("", count, "")"")); - - // Sets non-variable outputs. + // Sets non-variable outputs and determine when resource variables start. + int resource_index = 0; for (int i = 0; i < ctx->num_outputs(); ++i) { if (ctx->input_type(i) != DT_RESOURCE) { ctx->SetOutput(body.input_mapping[i], xla::GetTupleElement(while_result, i)); + ++resource_index; + } else { + break; } } if (has_token_input_output_) { @@ -326,7 +321,7 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) { XlaResource* resource; OP_REQUIRES_OK(ctx, ctx->GetResourceInput(update.input_index, &resource)); if (update.modified) { - int pos = body.outputs.size() + i; + int pos = resource_index + i; OP_REQUIRES_OK(ctx, resource->SetFromPack( arguments[update.input_index].tensor_array_gradients, ",0,train 9392ffa09224f0a7735aa7076bee2024c39f1e69,tensorflow/tensorflow,"Improve compatibility of while_v2 with XLA tests Remove assumption where resource variables could not be included as outputs of the body. We instead iterate through the outputs to find the first resource variable index. Also loosen the requirements to specify maximum_iterations for XLA. PiperOrigin-RevId: 226932912",control_flow_ops_py_test.py,"@@ -1183,6 +1183,8 @@ class ControlFlowTest(test.TestCase): @test_util.run_v1_only(""b/120545219"") def testInvalidMaximumIterationsWhileLoopGradientInXLAContext(self): + if control_flow_util.ENABLE_CONTROL_FLOW_V2: + self.skipTest(""WhileV2 does lazy evaluation of maximum_iterations"") v = constant_op.constant(1.0) def inner_body(i, x): @@ -1203,44 +1205,27 @@ class ControlFlowTest(test.TestCase): gs = gradients_impl.gradients(loop_no_xla, v) self.evaluate(gs) # This should execute without error. - if control_flow_util.ENABLE_CONTROL_FLOW_V2: - xla_context = control_flow_ops.XLAControlFlowContext() - xla_context.Enter() - with self.assertRaisesRegexp( - ValueError, - r""maximum_iterations is None. It is required and must be statically "" - r""known \(e.g. a constant value or known shape dimension\) when "" - r""building while_loop in XLA context.""): - loop_no_maxiter = create_while_loop() - with self.assertRaisesRegexp( - ValueError, - r""maximum_iterations must be statically "" - r""known \(e.g. a constant value or known shape dimension\) when "" - r""building while_loop in XLA context.""): - loop_with_maxiter = create_while_loop(maximum_iterations=2) - xla_context.Exit() - else: - xla_context = control_flow_ops.XLAControlFlowContext() - xla_context.Enter() - loop_no_maxiter = create_while_loop() - loop_with_maxiter = create_while_loop(maximum_iterations=2) - xla_context.Exit() + xla_context = control_flow_ops.XLAControlFlowContext() + xla_context.Enter() + loop_no_maxiter = create_while_loop() + loop_with_maxiter = create_while_loop(maximum_iterations=2) + xla_context.Exit() - with self.assertRaisesRegexp( - ValueError, - r""Cannot create a gradient accumulator for tensor '.+' inside "" - r""XLA while_loop because maximum_iterations was not passed to "" - r""the tf.while_loop call \('.+'\).""): - _ = gradients_impl.gradients(loop_no_maxiter, v) + with self.assertRaisesRegexp( + ValueError, + r""Cannot create a gradient accumulator for tensor '.+' inside "" + r""XLA while_loop because maximum_iterations was not passed to "" + r""the tf.while_loop call \('.+'\).""): + _ = gradients_impl.gradients(loop_no_maxiter, v) - with self.assertRaisesRegexp( - ValueError, - r""Cannot create a gradient accumulator for tensor '.+' inside XLA "" - r""while_loop. maximum_iterations tensor '.+' for while_loop context "" - r""'.+' must be statically known \(e.g. a constant value or known "" - r""shape dimension\), or be defined at or outside the while loop "" - r""context '.*' \(currently defined in '.*'\)""): - _ = gradients_impl.gradients(loop_with_maxiter, v) + with self.assertRaisesRegexp( + ValueError, + r""Cannot create a gradient accumulator for tensor '.+' inside XLA "" + r""while_loop. maximum_iterations tensor '.+' for while_loop context "" + r""'.+' must be statically known \(e.g. a constant value or known "" + r""shape dimension\), or be defined at or outside the while loop "" + r""context '.*' \(currently defined in '.*'\)""): + _ = gradients_impl.gradients(loop_with_maxiter, v) @test_util.run_v1_only(""b/120545219"") def testInvalidMaximumIterationsFromSiblingContextWhileLoopInXLAContext(self): @@ -1265,10 +1250,7 @@ class ControlFlowTest(test.TestCase): xla_context = control_flow_ops.XLAControlFlowContext() xla_context.Enter() with self.assertRaisesRegexp( - ValueError, - r""maximum_iterations must be statically known \(e.g. a constant value"" - r"" or known shape dimension\) when building while_loop in XLA "" - r""context.""): + ValueError, r""Tensor.*Placeholder:0.* must be from the same graph.*""): loop = create_while_loop() xla_context.Exit() else: ",0,train 9392ffa09224f0a7735aa7076bee2024c39f1e69,tensorflow/tensorflow,"Improve compatibility of while_v2 with XLA tests Remove assumption where resource variables could not be included as outputs of the body. We instead iterate through the outputs to find the first resource variable index. Also loosen the requirements to specify maximum_iterations for XLA. PiperOrigin-RevId: 226932912",while_v2.py,"@@ -254,6 +254,7 @@ def _WhileGrad(op, *grads): # pylint: disable=invalid-name maximum_iterations = op.get_attr( ""_maximum_iterations"") if _is_in_xla_context() else None assert not _is_in_xla_context() or maximum_iterations is not None + maximum_iterations = _validate_and_convert_to_tensor(maximum_iterations) # Set the incoming gradient of non-trainable inputs to None. It is possible # that we receive non-None gradients for non-trainable types in nested while @@ -376,28 +377,30 @@ def _validate_and_convert_to_tensor(maximum_iterations): Raises: ValueError: If `maximum_iterations` is invalid. """""" - if _is_in_xla_context(): - if maximum_iterations is None: - raise ValueError(""maximum_iterations is None. It is required and must "" - ""be statically known (e.g. a constant value or known "" - ""shape dimension) when building while_loop in XLA "" - ""context."") - if isinstance(maximum_iterations, ops.Tensor): - # Get the constant value from the `maximum_iterations` tensor to avoid - # capturing a Const tensor from outside this graph. - maximum_iterations = tensor_util.constant_value(maximum_iterations) - if maximum_iterations is None: - raise ValueError(""maximum_iterations must be statically known (e.g. a "" - ""constant value or known shape dimension) when "" - ""building while_loop in XLA context."") - - if maximum_iterations is not None: - # EmptyTensorList expects `max_num_elements` to be of type int32. - maximum_iterations = ops.convert_to_tensor( - maximum_iterations, dtype=dtypes.int32, name=""maximum_iterations"") - if maximum_iterations.shape.ndims != 0: - raise ValueError(""maximum_iterations must be a scalar, saw shape: %s"" % - maximum_iterations.shape) + if maximum_iterations is None: + return None + + if _is_in_xla_context() and isinstance(maximum_iterations, ops.Tensor): + # Get the constant value from the `maximum_iterations` tensor to avoid + # capturing a Const tensor from outside this graph. + value = tensor_util.constant_value(maximum_iterations) + if value is None: + # XLA requires maximum_iterations to be statically known (e.g. a + # constant value or known shape dimension) when intermediate values + # from the forward pass are needed in the gradients pass. However, + # maximum_iterations may not be required if the gradient isn't built + # or no intermediates are required, thus we return the tensor as is. + return maximum_iterations + + maximum_iterations = value + + # EmptyTensorList expects `max_num_elements` to be of type int32. + maximum_iterations = ops.convert_to_tensor( + maximum_iterations, dtype=dtypes.int32, name=""maximum_iterations"") + if maximum_iterations.shape.ndims != 0: + raise ValueError(""maximum_iterations must be a scalar, saw shape: %s"" % + maximum_iterations.shape) + return maximum_iterations @@ -815,7 +818,7 @@ def _copy_handle_data(src_tensors, tgt_tensors): def _maybe_set_maximum_iterations_attr(op, maximum_iterations): - if control_flow_util.IsInXLAContext(op): + if maximum_iterations is not None and control_flow_util.IsInXLAContext(op): # Store the maximum_iterations to use in the gradient pass. op._set_attr( # pylint: disable=protected-access ""_maximum_iterations"", ",0,train ada0605591911094c142d39cbd87294ed2716e8b,tensorflow/tensorflow,"Update Keras Tracking API 1. move _keras_api_gauge.get_cell('compile').set(True) after line 316 (Reasoning: according to Pavithra, for some use cases, when user first call compile, it will save the info and run it later; move metric after is_compiled set to true to avoid double count) 2. Breakdown tracking for different training/evaluating/predicting methods (Different methods for train/evaluate/predict are useful for engineers to observe which methods is mostly used, the previous implementation covers most of the train/evaluate/predict, except for train_on_batch/test_on_batch/predict_on_batch) 3. add a meta metric in __init__ of model to decide if a borg job uses keras API (should be a combination of 1 + 2 + user self-defined model) Draft doc: go/tensorflow-api-metrics PiperOrigin-RevId: 260419061",training.py,"@@ -143,6 +143,7 @@ class Model(network.Network): def __init__(self, *args, **kwargs): super(Model, self).__init__(*args, **kwargs) + _keras_api_gauge.get_cell('model').set(True) # initializing _distribution_strategy here since it is possible to call # predict on a model without compiling it. self._distribution_strategy = None @@ -242,7 +243,6 @@ class Model(network.Network): ValueError: In case of invalid arguments for `optimizer`, `loss`, `metrics` or `sample_weight_mode`. """""" - _keras_api_gauge.get_cell('compile').set(True) self._run_eagerly = kwargs.pop('run_eagerly', None) self._run_distributed = kwargs.pop('run_distributed', False) @@ -323,6 +323,7 @@ class Model(network.Network): # time the model gets called on training data. return self._is_compiled = True + _keras_api_gauge.get_cell('compile').set(True) # Prepare list of loss functions, same size of model outputs. self.loss_functions = training_utils.prepare_loss_functions( @@ -705,7 +706,7 @@ class Model(network.Network): ValueError: In case of mismatch between the provided input data and what the model expects. """""" - _keras_api_gauge.get_cell('train').set(True) + _keras_api_gauge.get_cell('fit').set(True) # Legacy support if 'nb_epoch' in kwargs: logging.warning( @@ -1279,7 +1280,7 @@ class Model(network.Network): if self._distribution_strategy: raise NotImplementedError('`fit_generator` is not supported for ' 'models compiled with tf.distribute.Strategy.') - _keras_api_gauge.get_cell('train').set(True) + _keras_api_gauge.get_cell('fit_generator').set(True) self._check_call_args('fit_generator') return training_generator.fit_generator( self, @@ -1353,8 +1354,9 @@ class Model(network.Network): if self._distribution_strategy: raise NotImplementedError('`evaluate_generator` is not supported for ' 'models compiled with tf.distribute.Strategy.') - _keras_api_gauge.get_cell('evaluate').set(True) + _keras_api_gauge.get_cell('evaluate_generator').set(True) self._check_call_args('evaluate_generator') + return training_generator.evaluate_generator( self, generator, @@ -1411,8 +1413,7 @@ class Model(network.Network): if self._distribution_strategy: raise NotImplementedError('`predict_generator` is not supported for ' 'models compiled with tf.distribute.Strategy.') - _keras_api_gauge.get_cell('predict').set(True) - self._check_call_args('predict_generator') + _keras_api_gauge.get_cell('predict_generator').set(True) return training_generator.predict_generator( self, generator, ",0,train 57dc9f9681b8c4ea88eb0a3ee43c82d67f7707a2,tensorflow/tensorflow,"Forward Dataset._functions() through DatasetV1Adapter We will need to re-register functions in the exported graph when saving SavedModels PiperOrigin-RevId: 243359370",dataset_test.py,"@@ -194,6 +194,10 @@ class DatasetTest(test_base.DatasetTestBase, parameterized.TestCase): nest.flatten(input_datasets), dataset_fn(input_datasets)._inputs()) + def testFunctions(self): + dataset = dataset_ops.Dataset.range(5).map(lambda x: x * 2) + self.assertLen(dataset._functions(), 1) + def testCollectInputs(self): ds1 = dataset_ops.Dataset.range(0) ds2 = ds1.concatenate(ds1) ",0,train 57dc9f9681b8c4ea88eb0a3ee43c82d67f7707a2,tensorflow/tensorflow,"Forward Dataset._functions() through DatasetV1Adapter We will need to re-register functions in the exported graph when saving SavedModels PiperOrigin-RevId: 243359370",dataset_ops.py,"@@ -1781,6 +1781,9 @@ class DatasetV1Adapter(DatasetV1): def _inputs(self): return self._dataset._inputs() # pylint: disable=protected-access + def _functions(self): + return self._dataset._functions() # pylint: disable=protected-access + def options(self): return self._dataset.options() ",0,train ce25634e3ec6c79c89645e4c52b004eabb869cb8,tensorflow/tensorflow,"Fix iterator invalidation when pushing a self-reference on a SmallVector This crashes with https://github.com/llvm/llvm-project/commit/2c196bbc6bd897b3dcc1d87a3baac28e1e88df41 PiperOrigin-RevId: 342653532 Change-Id: I26b32e9674a03a1dfeb74c7760916a9e43a080fc",tf_executor.cc,"@@ -677,7 +677,7 @@ ParseResult ParseMergeOp(OpAsmParser &parser, OperationState &result) { } else { // In case of the short form, use the parsed type for both the operands and // the remaining operands are expected to be control inputs. - types.push_back(types.front()); + types.push_back(Type(types.front())); Type control_type = ControlType::get(parser.getBuilder().getContext()); types.append(op_infos.size() - 2, control_type); ",0,train bba4c4a5524522ac24df5bfe8a4c4843a084a990,tensorflow/tensorflow,"[MLIR][KernelGen] Add MLIR-generated `tf.Round` kernel PiperOrigin-RevId: 378725030 Change-Id: I531306aa301fe1b6087fd8e53dcdb240c7a21429",cwise_op_gpu_round.cu.cc,"@@ -19,7 +19,12 @@ limitations under the License. namespace tensorflow { namespace functor { + +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED) DEFINE_UNARY5(round, Eigen::half, float, double, int32, int64); +#endif + } // namespace functor } // namespace tensorflow ",0,train bba4c4a5524522ac24df5bfe8a4c4843a084a990,tensorflow/tensorflow,"[MLIR][KernelGen] Add MLIR-generated `tf.Round` kernel PiperOrigin-RevId: 378725030 Change-Id: I531306aa301fe1b6087fd8e53dcdb240c7a21429",cwise_op_round.cc,"@@ -16,12 +16,16 @@ limitations under the License. #include ""tensorflow/core/kernels/cwise_ops_common.h"" namespace tensorflow { + REGISTER5(UnaryOp, CPU, ""Round"", functor::round, Eigen::half, float, double, int32, int64); - #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED) REGISTER5(UnaryOp, GPU, ""Round"", functor::round, Eigen::half, float, double, int32, int64); #endif +#endif + } // namespace tensorflow ",0,train bba4c4a5524522ac24df5bfe8a4c4843a084a990,tensorflow/tensorflow,"[MLIR][KernelGen] Add MLIR-generated `tf.Round` kernel PiperOrigin-RevId: 378725030 Change-Id: I531306aa301fe1b6087fd8e53dcdb240c7a21429",gpu_op_round.cc,"@@ -0,0 +1,27 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" +#include ""tensorflow/core/kernels/mlir_generated/base_gpu_op.h"" + +namespace tensorflow { + +GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Round, DT_HALF); +GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Round, DT_FLOAT); +GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Round, DT_DOUBLE); +GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Round, DT_INT32); +GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Round, DT_INT64); + +} // namespace tensorflow ",0,train bba4c4a5524522ac24df5bfe8a4c4843a084a990,tensorflow/tensorflow,"[MLIR][KernelGen] Add MLIR-generated `tf.Round` kernel PiperOrigin-RevId: 378725030 Change-Id: I531306aa301fe1b6087fd8e53dcdb240c7a21429",gpu_unary_ops_test.cc,"@@ -737,10 +737,40 @@ GENERATE_DEFAULT_TEST(Rint, DT_FLOAT, DT_FLOAT, std::rint, GENERATE_DEFAULT_TEST_WITH_SPECIFIC_INPUT_VALUES( Rint, DT_DOUBLE, DT_DOUBLE, - test::InputAsVector({-1.7, -1.5, -0.2, 0.2, 0.5000001, 1.5, 1.7, - 2.0}), + test::InputAsVector({-1.7, -1.5, -0.2, -0.0, 0.0, 0.2, 0.5000001, + 1.5, 1.7, 2.0}), std::rint, test::OpsTestConfig().ExpectStrictlyEqual()) +/// Test `tf.Round`. + +/// `tf.Round` is the same as `std::rint` and different from `std::round`. It +/// rounds to the nearest even integer, not towards zero. + +template +T baseline_round(T x) { + T y = std::rint(x); + return y == T(0) ? T(0) : y; +} + +GENERATE_DEFAULT_TEST_WITH_SPECIFIC_INPUT_VALUES( + Round, DT_DOUBLE, DT_DOUBLE, + test::InputAsVector({-1.7, -1.5, -0.2, -0.0, 0.0, 0.2, 0.5000001, + 1.5, 1.7, 2.0}), + baseline_round, test::OpsTestConfig().ExpectStrictlyEqual()) + +GENERATE_DEFAULT_TEST(Round, DT_FLOAT, DT_FLOAT, baseline_round, + test::OpsTestConfig().ExpectStrictlyEqual()) + +GENERATE_DEFAULT_TEST_2(Round, DT_HALF, DT_FLOAT, DT_HALF, DT_FLOAT, + baseline_round, + test::OpsTestConfig().ExpectStrictlyEqual()) + +GENERATE_DEFAULT_TEST(Round, DT_INT32, DT_INT32, baseline_round, + test::OpsTestConfig().ExpectStrictlyEqual()) + +GENERATE_DEFAULT_TEST(Round, DT_INT64, DT_INT64, baseline_round, + test::OpsTestConfig().ExpectStrictlyEqual()) + /// Test `tf.Rsqrt`. /// Reference implementation. ",0,train f9d0d3205f7d0522dcd6f6d7e2d32896275fc78c,tensorflow/tensorflow,"Remove erroneous comment from resnet_v2.py PiperOrigin-RevId: 155933792",resnet_v2.py,"@@ -25,8 +25,6 @@ introduced by: The key difference of the full preactivation 'v2' variant compared to the 'v1' variant in [1] is the use of batch normalization before every weight layer. -Another difference is that 'v2' ResNets do not include an activation function in -the main pathway. Also see [2; Fig. 4e]. Typical use: ",0,train c444870b2d6f85c3d6936322e74984e0f889acfd,tensorflow/tensorflow,"[MLIR][KernelGen] Add experimental JIT-compiled sign kernels for i8, i16 on GPU PiperOrigin-RevId: 404633033 Change-Id: I8bbfbc2266b2ba3d5cee9dfa1463cca696481a35",gpu_op_sign.cc,"@@ -26,4 +26,10 @@ GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Sign, DT_INT64); GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Sign, DT_COMPLEX64); GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Sign, DT_COMPLEX128); +// These kernels are JIT-compiled. +#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED) +GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Sign, DT_INT8); +GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Sign, DT_INT16); +#endif + } // namespace tensorflow ",0,train c444870b2d6f85c3d6936322e74984e0f889acfd,tensorflow/tensorflow,"[MLIR][KernelGen] Add experimental JIT-compiled sign kernels for i8, i16 on GPU PiperOrigin-RevId: 404633033 Change-Id: I8bbfbc2266b2ba3d5cee9dfa1463cca696481a35",gpu_unary_ops_test.cc,"@@ -990,25 +990,29 @@ std::complex baseline_sign(std::complex x) { GENERATE_DEFAULT_TEST(Sign, DT_FLOAT, DT_FLOAT, baseline_sign, test::OpsTestConfig().ExpectStrictlyEqual()) - GENERATE_DEFAULT_TEST(Sign, DT_DOUBLE, DT_DOUBLE, baseline_sign, test::OpsTestConfig().ExpectStrictlyEqual()) - // TODO(b/162577610): We should actually use ExpectStrictlyEqual() // here. This requires returning 0.0 for input -0.0. GENERATE_DEFAULT_TEST_2(Sign, DT_HALF, DT_FLOAT, DT_HALF, DT_FLOAT, baseline_sign, test::OpsTestConfig()) - GENERATE_DEFAULT_TEST(Sign, DT_INT64, DT_INT64, baseline_sign, test::OpsTestConfig().ExpectStrictlyEqual()) - GENERATE_DEFAULT_TEST_2(Sign, DT_COMPLEX64, DT_COMPLEX128, DT_COMPLEX64, DT_COMPLEX128, baseline_sign, test::OpsTestConfig().ExpectStrictlyEqual()) - GENERATE_DEFAULT_TEST(Sign, DT_COMPLEX128, DT_COMPLEX128, baseline_sign, test::OpsTestConfig().ExpectStrictlyEqual()) +// These kernels are JIT-compiled. +#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \ + defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED) +GENERATE_DEFAULT_TEST(Sign, DT_INT8, DT_INT8, baseline_sign, + test::OpsTestConfig().ExpectStrictlyEqual()) +GENERATE_DEFAULT_TEST(Sign, DT_INT16, DT_INT16, baseline_sign, + test::OpsTestConfig().ExpectStrictlyEqual()) +#endif + /// Test `tf.Sin`. GENERATE_DEFAULT_TEST(Sin, DT_FLOAT, DT_FLOAT, std::sin, test::OpsTestConfig()) ",0,train 373f458fb66fdb709d7af828fe6200e3137942e6,tensorflow/tensorflow,"Optimize calls to std::string::find() and friends for a single char. The character literal overload is more efficient. PiperOrigin-RevId: 348473483 Change-Id: Ia76efa5ee243f7a92b35f1fb81d4af864fca8372",gpu_backend_lib.cc,"@@ -838,11 +838,11 @@ StatusOr> CompileToHsaco( // Delete the first two lines, since they usually vary even when the rest of // the code is the same (but verify that they are what we expect). if (str.size() >= 13 && str.substr(0, 13) == ""; ModuleID = "") { - auto pos = str.find(""\n""); + auto pos = str.find('\n'); if (pos != std::string::npos) str = str.substr(pos + 1); } if (str.size() >= 18 && str.substr(0, 18) == ""source_filename = "") { - auto pos = str.find(""\n""); + auto pos = str.find('\n'); if (pos != std::string::npos) str = str.substr(pos + 1); } str += hlo_module_config.compilation_cache_key(); ",0,test 373f458fb66fdb709d7af828fe6200e3137942e6,tensorflow/tensorflow,"Optimize calls to std::string::find() and friends for a single char. The character literal overload is more efficient. PiperOrigin-RevId: 348473483 Change-Id: Ia76efa5ee243f7a92b35f1fb81d4af864fca8372",hlo_instruction.cc,"@@ -2538,7 +2538,7 @@ string PrintName(const string& name, bool print_ids) { if (print_ids) { return name; } else { - auto dot_position = name.find_first_of("".""); + auto dot_position = name.find_first_of('.'); return name.substr(0, dot_position); } } ",0,test 373f458fb66fdb709d7af828fe6200e3137942e6,tensorflow/tensorflow,"Optimize calls to std::string::find() and friends for a single char. The character literal overload is more efficient. PiperOrigin-RevId: 348473483 Change-Id: Ia76efa5ee243f7a92b35f1fb81d4af864fca8372",dot_operation_test.cc,"@@ -1210,7 +1210,7 @@ XLA_TEST_P(EinsumTest, SimpleEinsumTest) { .ValueOrDie(), &builder); auto config = std::get<2>(GetParam()); - if (config.find("","") == config.npos) { + if (config.find(',') == config.npos) { Einsum(x, config); } else { Einsum(x, y, config); ",0,test 5cf69dc5d8caa129903a7812a916f619b4e03114,tensorflow/tensorflow,refine,hlo_ops.cc,"@@ -4468,17 +4468,14 @@ OpFoldResult SelectOp::fold(ArrayRef operands) { // false_value, true_value) static LogicalResult selectCanonicalization(SelectOp selectOp, PatternRewriter& rewriter) { - if (auto notOp = selectOp.pred().getDefiningOp()) { - if (1 == - notOp.operand().getType().cast().getElementTypeBitWidth()) { - std::array newOperands = {notOp.operand(), - selectOp.getOperands()[2], - selectOp.getOperands()[1]}; - selectOp.getOperation()->setOperands(newOperands); - return success(); - } + auto notOp = selectOp.pred().getDefiningOp(); + if (!notOp) { + return failure(); } - return failure(); + std::array newOperands = {notOp.operand(), selectOp.on_false(), + selectOp.on_true()}; + selectOp.getOperation()->setOperands(newOperands); + return success(); } void SelectOp::getCanonicalizationPatterns(RewritePatternSet& results, ",0,train a2dc1ae9cd5991e248dc8885ed8879bb6ab15096,tensorflow/tensorflow,"Fix the training losses when examples have weights associated with it. Note: in the unlikely event this causes your relus to die, you can lower the learning rate. Change: 131131862",target_column.py,"@@ -175,9 +175,51 @@ class _TargetColumn(object): def problem_type(self): return self._problem_type + def _weighted_loss(self, loss, weight_tensor): + """"""Returns cumulative weighted loss."""""" + unweighted_loss = array_ops.reshape(loss, shape=(-1,)) + weighted_loss = math_ops.mul(unweighted_loss, + array_ops.reshape( + weight_tensor, shape=(-1,))) + return weighted_loss + + def training_loss(self, logits, target, features): + """"""Returns training loss tensor for this head. + + Training loss is different from the loss reported on the tensorboard as we + should respect the example weights when computing the gradient. + + L = sum_{i} w_{i} * l_{i} / B + + where B is the number of examples in the batch, l_{i}, w_{i} are individual + losses, and example weight. + + Args: + logits: logits, a float tensor. + target: either a tensor for labels or in multihead case, a dict of string + to target tensor. + features: features dict. + + Returns: + Loss tensor. + """""" + target = target[self.name] if isinstance(target, dict) else target + loss_unweighted = self._loss_fn(logits, target) + + weight_tensor = self.get_weight_tensor(features) + if weight_tensor is None: + return math_ops.reduce_mean(loss_unweighted, name=""loss"") + else: + loss_weighted = self._weighted_loss(loss_unweighted, weight_tensor) + return math_ops.reduce_mean(loss_weighted, name=""loss"") + def loss(self, logits, target, features): """"""Returns loss tensor for this head. + The loss returned is the weighted average. + + L = sum_{i} w_{i} * l_{i} / sum_{i} w_{i} + Args: logits: logits, a float tensor. target: either a tensor for labels or in multihead case, a dict of string @@ -194,10 +236,7 @@ class _TargetColumn(object): if weight_tensor is None: return math_ops.reduce_mean(loss_unweighted, name=""loss"") else: - loss_unweighted = array_ops.reshape(loss_unweighted, shape=(-1,)) - loss_weighted = math_ops.mul( - loss_unweighted, - array_ops.reshape(weight_tensor, shape=(-1,))) + loss_weighted = self._weighted_loss(loss_unweighted, weight_tensor) return math_ops.div( math_ops.reduce_sum(loss_weighted), math_ops.to_float(math_ops.reduce_sum(weight_tensor)), ",0,train a2dc1ae9cd5991e248dc8885ed8879bb6ab15096,tensorflow/tensorflow,"Fix the training losses when examples have weights associated with it. Note: in the unlikely event this causes your relus to die, you can lower the learning rate. Change: 131131862",target_column_test.py,"@@ -27,23 +27,29 @@ class RegressionTargetColumnTest(tf.test.TestCase): def testRegression(self): target_column = tf.contrib.layers.regression_target() with tf.Graph().as_default(), tf.Session() as sess: - logits = tf.constant([[1.], [1.], [3.]]) + prediction = tf.constant([[1.], [1.], [3.]]) targets = tf.constant([[0.], [1.], [1.]]) - self.assertAlmostEqual(5. / 3, - sess.run(target_column.loss(logits, targets, {}))) + self.assertAlmostEqual( + 5. / 3, sess.run(target_column.loss(prediction, targets, {}))) def testRegressionWithWeights(self): target_column = tf.contrib.layers.regression_target( weight_column_name=""label_weight"") with tf.Graph().as_default(), tf.Session() as sess: - features = {""label_weight"": tf.constant([[1.], [0.], [0.]])} - logits = tf.constant([[1.], [1.], [3.]]) + features = {""label_weight"": tf.constant([[2.], [5.], [0.]])} + prediction = tf.constant([[1.], [1.], [3.]]) targets = tf.constant([[0.], [1.], [1.]]) self.assertAlmostEqual( - 1., sess.run(target_column.loss(logits, targets, features))) + 2. / 7, + sess.run(target_column.loss(prediction, targets, features)), + places=3) + self.assertAlmostEqual( + 2. / 3, + sess.run(target_column.training_loss(prediction, targets, features)), + places=3) -class MulltiClassTargetColumnTest(tf.test.TestCase): +class MultiClassTargetColumnTest(tf.test.TestCase): def testBinaryClassification(self): target_column = tf.contrib.layers.multi_class_target(n_classes=2) @@ -126,9 +132,9 @@ class MulltiClassTargetColumnTest(tf.test.TestCase): def testBinarySVMDefaultWeights(self): target_column = tf.contrib.layers.binary_svm_target() - logits = tf.constant([[-0.5], [1.2]]) + predictions = tf.constant([[-0.5], [1.2]]) targets = tf.constant([0, 1]) - loss = target_column.loss(logits, targets, {}) + loss = target_column.loss(predictions, targets, {}) # Prediction for first example is in the right side of the hyperplane (i.e., # < 0) but it is within the [-1,1] margin. There is a 0.5 loss incurred by # this example. The 2nd prediction is outside the margin so it incurs no @@ -139,15 +145,17 @@ class MulltiClassTargetColumnTest(tf.test.TestCase): def testBinarySVMWithWeights(self): target_column = tf.contrib.layers.binary_svm_target( weight_column_name=""weights"") - logits = tf.constant([[-0.7], [0.2]]) + predictions = tf.constant([[-0.7], [0.2]]) targets = tf.constant([0, 1]) features = {""weights"": tf.constant([2.0, 10.0])} - loss = target_column.loss(logits, targets, features) + loss = target_column.loss(predictions, targets, features) + training_loss = target_column.training_loss(predictions, targets, features) # Prediction for both examples are in the right side of the hyperplane but # within the margin. The (weighted) loss incurred is 2*0.3=0.6 and 10*0.8=8 # respectively. The overall (normalized) loss is therefore 8.6/12. with tf.Session() as sess: - self.assertAlmostEqual(8.6 / 12, sess.run(loss)) + self.assertAlmostEqual(8.6 / 12, sess.run(loss), places=3) + self.assertAlmostEqual(8.6 / 2, sess.run(training_loss), places=3) if __name__ == ""__main__"": ",0,train a2dc1ae9cd5991e248dc8885ed8879bb6ab15096,tensorflow/tensorflow,"Fix the training losses when examples have weights associated with it. Note: in the unlikely event this causes your relus to die, you can lower the learning rate. Change: 131131862",dnn_linear_combined.py,"@@ -174,16 +174,20 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator): else: centered_bias_step = [] with ops.control_dependencies(centered_bias_step): - loss = self._target_column.loss(logits, targets, features) - logging_ops.scalar_summary(""loss"", loss) + training_loss = self._target_column.training_loss(logits, targets, + features) + weighted_average_loss = self._target_column.loss(logits, targets, + features) - linear_train_step = self._linear_model.get_train_step(loss) - dnn_train_step = (self._dnn_model.get_train_step(loss) - if self._dnn_model else []) + logging_ops.scalar_summary(""loss"", weighted_average_loss) + + linear_train_step = self._linear_model.get_train_step(training_loss) + dnn_train_step = (self._dnn_model.get_train_step(training_loss) if + self._dnn_model else []) with ops.control_dependencies(linear_train_step + dnn_train_step): with ops.get_default_graph().colocate_with(global_step): - return state_ops.assign_add(global_step, 1).op, loss + return state_ops.assign_add(global_step, 1).op, weighted_average_loss def _get_eval_ops(self, features, targets, metrics=None): """"""See base class."""""" @@ -242,10 +246,11 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator): logits = array_ops.reshape( array_ops.tile(centered_bias[0], [batch_size]), [batch_size, self._target_column.num_label_columns]) - loss = self._target_column.loss(logits, targets, features) + training_loss = self._target_column.training_loss(logits, targets, features) # Learn central bias by an optimizer. 0.1 is a convervative lr for a single # variable. - return training.AdagradOptimizer(0.1).minimize(loss, var_list=centered_bias) + return training.AdagradOptimizer(0.1).minimize( + training_loss, var_list=centered_bias) def _logits(self, features, is_training=False): linear_feature_columns = self._get_linear_feature_columns() ",0,train 88e0e42fdd472200c91732ab5669f91c48b1fdd7,tensorflow/tensorflow,"Remove unused StreamExecutorFactory PiperOrigin-RevId: 244956772",cuda_gpu_executor.cc,"@@ -1139,14 +1139,6 @@ DeviceDescription* GpuExecutor::PopulateDeviceDescription() const { } // namespace gpu -void initialize_cuda_gpu_executor() { - *internal::MakeCUDAExecutorImplementation() = [](const PluginConfig& config) { - return new gpu::GpuExecutor{config}; - }; -} - } // namespace stream_executor -REGISTER_MODULE_INITIALIZER(cuda_gpu_executor, { - stream_executor::initialize_cuda_gpu_executor(); -}); +REGISTER_MODULE_INITIALIZER(cuda_gpu_executor, {}); ",0,train 88e0e42fdd472200c91732ab5669f91c48b1fdd7,tensorflow/tensorflow,"Remove unused StreamExecutorFactory PiperOrigin-RevId: 244956772",rocm_gpu_executor.cc,"@@ -961,14 +961,6 @@ DeviceDescription* GpuExecutor::PopulateDeviceDescription() const { } // namespace gpu -void initialize_rocm_gpu_executor() { - *internal::MakeROCMExecutorImplementation() = [](const PluginConfig& config) { - return new gpu::GpuExecutor{config}; - }; -} - } // namespace stream_executor -REGISTER_MODULE_INITIALIZER(rocm_gpu_executor, { - stream_executor::initialize_rocm_gpu_executor(); -}); +REGISTER_MODULE_INITIALIZER(rocm_gpu_executor, {}); ",0,train 88e0e42fdd472200c91732ab5669f91c48b1fdd7,tensorflow/tensorflow,"Remove unused StreamExecutorFactory PiperOrigin-RevId: 244956772",stream_executor_internal.cc,"@@ -18,31 +18,6 @@ limitations under the License. namespace stream_executor { namespace internal { -// -- CUDA - -StreamExecutorFactory* MakeCUDAExecutorImplementation() { - static StreamExecutorFactory instance; - return &instance; -} - -// -- ROCm - -StreamExecutorFactory* MakeROCMExecutorImplementation() { - static StreamExecutorFactory instance; - return &instance; -} - -// -- OpenCL - -StreamExecutorFactory* MakeOpenCLExecutorImplementation() { - static StreamExecutorFactory instance; - return &instance; -} - -// -- Host - -StreamExecutorFactory MakeHostExecutorImplementation; - // The default implementation just calls the other HostCallback method. // It should make all existing code that uses a void() callback still work. bool StreamExecutorInterface::HostCallback(Stream* stream, ",0,train 88e0e42fdd472200c91732ab5669f91c48b1fdd7,tensorflow/tensorflow,"Remove unused StreamExecutorFactory PiperOrigin-RevId: 244956772",stream_executor_internal.h,"@@ -383,21 +383,6 @@ class StreamExecutorInterface { SE_DISALLOW_COPY_AND_ASSIGN(StreamExecutorInterface); }; -using StreamExecutorFactory = - std::function; -using EventFactory = std::function; -using StreamFactory = std::function; -using TimerFactory = std::function; -using KernelFactory = std::function; - -StreamExecutorFactory *MakeCUDAExecutorImplementation(); - -StreamExecutorFactory *MakeROCMExecutorImplementation(); - -StreamExecutorFactory *MakeOpenCLExecutorImplementation(); - -extern StreamExecutorFactory MakeHostExecutorImplementation; - } // namespace internal } // namespace stream_executor ",0,train 88e0e42fdd472200c91732ab5669f91c48b1fdd7,tensorflow/tensorflow,"Remove unused StreamExecutorFactory PiperOrigin-RevId: 244956772",stream_executor_pimpl.cc,"@@ -60,37 +60,6 @@ void BlockOnThreadExecutor(port::ThreadPool *executor) { n.WaitForNotification(); } -internal::StreamExecutorInterface *StreamExecutorImplementationFromPlatformKind( - PlatformKind platform_kind, const PluginConfig &plugin_config) { - // Note: we use this factory-assignment-in-switch pattern instead of just - // invoking the callable in case linkage is messed up -- instead of invoking a - // nullptr std::function (due to failed registration) we give a nice - // LOG(FATAL) message. - internal::StreamExecutorFactory factory; - switch (platform_kind) { - case PlatformKind::kCuda: - factory = *internal::MakeCUDAExecutorImplementation(); - break; - case PlatformKind::kROCm: - factory = *internal::MakeROCMExecutorImplementation(); - break; - case PlatformKind::kOpenCL: - factory = *internal::MakeOpenCLExecutorImplementation(); - break; - case PlatformKind::kHost: - factory = internal::MakeHostExecutorImplementation; - break; - default: - factory = nullptr; - } - if (factory == nullptr) { - LOG(FATAL) - << ""cannot create StreamExecutor implementation for platform kind: "" - << PlatformKindString(platform_kind); - } - return factory(plugin_config); -} - std::atomic_int_fast64_t correlation_id_generator(0); } // namespace @@ -154,20 +123,6 @@ MakeScopedTracer(StreamExecutor *stream_exec, BeginCallT begin_call, /* static */ mutex StreamExecutor::static_mu_{LINKER_INITIALIZED}; -StreamExecutor::StreamExecutor(PlatformKind platform_kind, - const PluginConfig &plugin_config) - : platform_(nullptr), - implementation_(StreamExecutorImplementationFromPlatformKind( - platform_kind, plugin_config)), - platform_kind_(platform_kind), - device_ordinal_(-1), - background_threads_(new port::ThreadPool( - port::Env::Default(), ""stream_executor"", kNumBackgroundThreads)), - live_stream_count_(0), - tracing_enabled_(false) { - CheckPlatformKindIsValid(platform_kind); -} - // Get per-device memory limit in bytes. Returns 0 if // TF_PER_DEVICE_MEMORY_LIMIT_MB environment variable is not set. static int64 GetMemoryLimitBytes() { ",0,train 88e0e42fdd472200c91732ab5669f91c48b1fdd7,tensorflow/tensorflow,"Remove unused StreamExecutorFactory PiperOrigin-RevId: 244956772",stream_executor_pimpl.h,"@@ -70,9 +70,6 @@ class ScopedTracer; // StreamExecutor interface should not be invoked from a signal handler. class StreamExecutor { public: - explicit StreamExecutor(PlatformKind kind, - const PluginConfig &plugin_config = PluginConfig()); - StreamExecutor( const Platform *platform, std::unique_ptr implementation); ",0,train 9edecf8e2391d73e506878da92951a902da0719b,tensorflow/tensorflow,"Fix a typo in set_virtual_device_configuration PiperOrigin-RevId: 275292912 Change-Id: Ia710b9bb14d466710fdc6bd9a60c10d849f9e19d",context.py,"@@ -1245,7 +1245,7 @@ class Context(object): ""Setting memory limit is required for GPU virtual devices"") else: raise ValueError(""Virtual devices are not supported for %s"" % - dev.device_type()) + dev.device_type) if self._virtual_device_map.get(dev) == virtual_devices: return ",0,test f901da42e3a7781add9023965eb76162cdbfe29b,tensorflow/tensorflow,"Lowering for tfl.sparse_to_dense to tosa Sparse to dense can be implemented using a series of reshapes, constants, numerical operations, and a final scatter. This should work to decompose into a TOSA compatible form.",legalize_tfl.cc,"@@ -152,6 +152,7 @@ DECL_CONVERT_OP(Const); DECL_CONVERT_OP(QConst); DECL_CONVERT_OP(Gather); DECL_CONVERT_OP(GatherNd); +DECL_CONVERT_OP(SparseToDense); DECL_CONVERT_OP(OneHot); DECL_CONVERT_OP(ArgMax); DECL_CONVERT_OP(FakeQuant); @@ -3016,6 +3017,87 @@ LogicalResult ConvertTFLGatherNdOp::matchAndRewrite( return success(); } +LogicalResult ConvertTFLSparseToDenseOp::matchAndRewrite( + Operation* op, PatternRewriter& rewriter) const { + auto tfl_sparse_to_dense_op = cast(op); + auto indices = tfl_sparse_to_dense_op.sparse_indices(); + auto values = tfl_sparse_to_dense_op.sparse_values(); + auto output_shape_value = tfl_sparse_to_dense_op.output_shape(); + auto default_value = tfl_sparse_to_dense_op.default_value(); + auto indices_ty = indices.getType().cast(); + auto indices_ety = indices_ty.getElementType(); + auto values_ty = values.getType().cast(); + auto result_ty = + tfl_sparse_to_dense_op.getResult().getType().cast(); + auto result_ety = result_ty.getElementType(); + auto loc = op->getLoc(); + + if (!result_ty.hasStaticShape()) return failure(); + auto result_rank = result_ty.getRank(); + + // We want to generate the default tensor we need to scatter. Note that the + // result_ty needs to be a statically shaped tensor. + ElementsAttr default_value_attr; + if (!matchPattern(default_value, m_Constant(&default_value_attr))) + return failure(); + + if (!default_value_attr.isSplat()) return failure(); + + ShapedType scatter_ty = + RankedTensorType::get({1, result_ty.getNumElements(), 1}, result_ety); + + Value default_const = rewriter.create( + loc, scatter_ty, + DenseElementsAttr::get(scatter_ty, + default_value_attr.getSplatValue().sext( + result_ety.getIntOrFloatBitWidth()))); + + // We need to determine what the index multiplier does + llvm::SmallVector multiply_constant_ints; + multiply_constant_ints.resize(result_rank, 1); + for (int i = result_rank - 1; i > 0; i--) { + multiply_constant_ints[i - 1] = + result_ty.getDimSize(i) * multiply_constant_ints[i]; + } + + indices_ety = rewriter.getI32Type(); + indices_ty = RankedTensorType::get(indices_ty.getShape(), indices_ety); + indices = CreateOpAndInfer(rewriter, loc, indices_ty, indices); + + auto multiply_constant_type = + RankedTensorType::get({result_rank}, indices_ety); + auto multiply_constant_attr = DenseElementsAttr::get( + multiply_constant_type, llvm::makeArrayRef(multiply_constant_ints)); + Value multiply_constant = CreateOpAndInfer( + rewriter, loc, multiply_constant_type, multiply_constant_attr); + + Value multiply_op = CreateOpAndInfer( + rewriter, loc, indices_ty, indices, multiply_constant, 0); + + Value reduce_op = CreateOpAndInfer( + rewriter, loc, UnrankedTensorType::get(indices_ety), multiply_op, + rewriter.getI64IntegerAttr(1)); + + auto values_reshape_op = CreateOpAndInfer( + rewriter, loc, UnrankedTensorType::get(result_ety), values, + rewriter.getI64ArrayAttr( + ArrayRef{1, values_ty.getDimSize(0), 1})); + + auto index_reshape_op = CreateOpAndInfer( + rewriter, loc, UnrankedTensorType::get(indices_ety), reduce_op, + rewriter.getI64ArrayAttr(ArrayRef{1, indices_ty.getDimSize(0)})); + + auto scatter = CreateOpAndInfer( + rewriter, loc, UnrankedTensorType::get(result_ety), default_const, + index_reshape_op, values_reshape_op); + + CreateReplaceOpAndInfer( + rewriter, op, result_ty, scatter, + rewriter.getI64ArrayAttr(result_ty.getShape())); + + return success(); +} + LogicalResult ConvertTFLOneHotOp::matchAndRewrite( Operation* op, PatternRewriter& rewriter) const { auto tfl_one_hot_op = cast(op); @@ -3179,6 +3261,7 @@ void LegalizeTFL::runOnFunction() { DEF_PATTERN_INSERT(Constant); DEF_PATTERN_INSERT(TFLGather); DEF_PATTERN_INSERT(TFLGatherNd); + DEF_PATTERN_INSERT(TFLSparseToDense); DEF_PATTERN_INSERT(TFLArgMax); DEF_PATTERN_INSERT(TFLFakeQuant); DEF_PATTERN_INSERT(TFLOneHot); ",0,train c56bdc3302fba4b89a30f64a4ec0a18f64ccc2e4,tensorflow/tensorflow,"Changing docs to match the code. Change: 111445385",port.h,"@@ -24,12 +24,12 @@ limitations under the License. // Choose which platform we are on. #if defined(ANDROID) || defined(__ANDROID__) -#define PLATFORM_POSIX_ANDROID +#define PLATFORM_GOOGLE_ANDROID #elif defined(__APPLE__) #define PLATFORM_POSIX #else // If no platform specified, use: -#define PLATFORM_POSIX +#define PLATFORM_GOOGLE #endif #endif ",0,train c56bdc3302fba4b89a30f64a4ec0a18f64ccc2e4,tensorflow/tensorflow,"Changing docs to match the code. Change: 111445385",ops.py,"@@ -785,16 +785,16 @@ class SparseTensor(object): """"""Represents a sparse tensor. Tensorflow represents a sparse tensor as three separate dense tensors: - `indices`, `values`, and `dense_shape`. In Python, the three tensors are + `indices`, `values`, and `shape`. In Python, the three tensors are collected into a `SparseTensor` class for ease of use. If you have separate - `indices`, `values`, and `dense_shape` tensors, wrap them in a `SparseTensor` - object before passing to the Ops below. + `indices`, `values`, and `shape` tensors, wrap them in a `SparseTensor` + object before passing to the ops below. - Concretely, the sparse tensor `SparseTensor(values, indices, dense_shape)` is + Concretely, the sparse tensor `SparseTensor(values, indices, shape)` is * `indices`: A 2-D int64 tensor of shape `[N, ndims]`. * `values`: A 1-D tensor of any type and shape `[N]`. - * `dense_shape`: A 1-D int64 tensor of shape `[ndims]`. + * `shape`: A 1-D int64 tensor of shape `[ndims]`. where `N` and `ndims` are the number of values, and number of dimensions in the `SparseTensor` respectively. @@ -802,15 +802,15 @@ class SparseTensor(object): The corresponding dense tensor satisfies ```python - dense.shape = dense_shape + dense.shape = shape dense[tuple(indices[i])] = values[i] ``` By convention, `indices` should be sorted in row-major order (or equivalently lexicographic order on the tuples `indices[i]`). This is not enforced when `SparseTensor` objects are constructed, but most ops assume correct ordering. - If the ordering is wrong, it can be fixed by calling `sparse_reorder` on the - misordered `SparseTensor`. + If the ordering of sparse tensor `st` is wrong, a fixed version can be + obtained by calling `tf.sparse_reorder(st)`. Example: The sparse tensor ",0,train 6b493f72c82593cb1a642af2d091e93b15b56ddc,tensorflow/tensorflow,"Change contrib estimator to save relative paths in checkpoint. Change: 155016674",estimator.py,"@@ -966,7 +966,8 @@ class BaseEstimator( saver.Saver( sharded=True, max_to_keep=self._config.keep_checkpoint_max, - defer_build=True)) + defer_build=True, + save_relative_paths=True)) chief_hooks = [] if (self._config.save_checkpoints_secs or ",0,train 6b493f72c82593cb1a642af2d091e93b15b56ddc,tensorflow/tensorflow,"Change contrib estimator to save relative paths in checkpoint. Change: 155016674",estimator_test.py,"@@ -28,6 +28,8 @@ import numpy as np import six from six.moves import xrange # pylint: disable=redefined-builtin +from google.protobuf import text_format + from tensorflow.contrib import learn from tensorflow.contrib import lookup from tensorflow.contrib.framework.python.ops import variables @@ -50,6 +52,7 @@ from tensorflow.python.client import session as session_lib from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.lib.io import file_io from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops @@ -61,6 +64,7 @@ from tensorflow.python.platform import test from tensorflow.python.saved_model import loader from tensorflow.python.saved_model import tag_constants from tensorflow.python.training import basic_session_run_hooks +from tensorflow.python.training import checkpoint_state_pb2 from tensorflow.python.training import input as input_lib from tensorflow.python.training import monitored_session from tensorflow.python.training import saver as saver_lib @@ -674,6 +678,38 @@ class EstimatorTest(test.TestCase): metrics={'MSE': metric_ops.streaming_mean_squared_error}) self.assertLess(scores3['MSE'], scores['MSE']) + def test_checkpoint_contains_relative_paths(self): + tmpdir = tempfile.mkdtemp() + est = estimator.Estimator( + model_dir=tmpdir, + model_fn=linear_model_fn_with_model_fn_ops) + est.fit(input_fn=boston_input_fn, steps=5) + + checkpoint_file_content = file_io.read_file_to_string( + os.path.join(tmpdir, 'checkpoint')) + ckpt = checkpoint_state_pb2.CheckpointState() + text_format.Merge(checkpoint_file_content, ckpt) + self.assertEqual(ckpt.model_checkpoint_path, 'model.ckpt-5') + self.assertAllEqual( + ['model.ckpt-1', 'model.ckpt-5'], ckpt.all_model_checkpoint_paths) + + def test_train_save_copy_reload(self): + tmpdir = tempfile.mkdtemp() + model_dir1 = os.path.join(tmpdir, 'model_dir1') + est1 = estimator.Estimator( + model_dir=model_dir1, + model_fn=linear_model_fn_with_model_fn_ops) + est1.fit(input_fn=boston_input_fn, steps=5) + + model_dir2 = os.path.join(tmpdir, 'model_dir2') + os.renames(model_dir1, model_dir2) + est2 = estimator.Estimator( + model_dir=model_dir2, + model_fn=linear_model_fn_with_model_fn_ops) + self.assertEqual(5, est2.get_variable_value('global_step')) + est2.fit(input_fn=boston_input_fn, steps=5) + self.assertEqual(10, est2.get_variable_value('global_step')) + def testEstimatorParams(self): boston = base.load_boston() est = estimator.SKCompat( ",0,train 8ce1e9268e6f3f9a46504b9f2112b21fd1799b18,tensorflow/tensorflow,"Fix typo in StreamingFilesDataset PiperOrigin-RevId: 244678021",datasets.py,"@@ -130,8 +130,8 @@ def StreamingFilesDataset(files, if sloppy is None: sloppy = True - if file_reader_job == 'cordinator': - file_reader_device = '/job:%s/task:0' % file_reader_job + if file_reader_job == 'coordinator': + file_reader_device = '/job:coordinator/task:0' else: file_reader_device = '/job:%s' % file_reader_job ",0,test a16391de4e9d3c799e916959b47934472daa6fba,tensorflow/tensorflow,"Added selection of best storage type for Metal api. PiperOrigin-RevId: 410322725 Change-Id: I5848cdab1a35a867ed508258067c9510bdd5a657",metal_spatial_tensor.cc,"@@ -510,6 +510,17 @@ absl::Status CreateSharedImage2DBufferTensor(id buffer, return absl::OkStatus(); } +TensorStorageType GetFastestStorageType(const GpuInfo& gpu_info) { + const bool a7_or_a8 = + gpu_info.IsApple() && (gpu_info.apple_info.IsA7GenerationGpu() || + gpu_info.apple_info.IsA8GenerationGpu()); + if (a7_or_a8) { + return TensorStorageType::TEXTURE_2D; + } else { + return TensorStorageType::BUFFER; + } +} + } // namespace metal } // namespace gpu } // namespace tflite ",0,train a16391de4e9d3c799e916959b47934472daa6fba,tensorflow/tensorflow,"Added selection of best storage type for Metal api. PiperOrigin-RevId: 410322725 Change-Id: I5848cdab1a35a867ed508258067c9510bdd5a657",metal_spatial_tensor.h,"@@ -134,6 +134,8 @@ absl::Status CreateSharedImage2DBufferTensor(id buffer, const BHWDC& const TensorDescriptor& descriptor, int row_bytes_alignment, MetalSpatialTensor* result); +TensorStorageType GetFastestStorageType(const GpuInfo& gpu_info); + template absl::Status MetalSpatialTensor::WriteData(id device, const tflite::gpu::Tensor& src) { ",0,train 6c08402e3a7d3e440d6913cb683f26d28514ad8d,tensorflow/tensorflow,"[tf.data] Properly export `tf.contrib.data.group_by_reducer()` PiperOrigin-RevId: 201386380",__init__.py,"@@ -33,6 +33,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@choose_from_datasets @@dense_to_sparse_batch @@enumerate_dataset +@@group_by_reducer @@group_by_window @@ignore_errors @@make_batched_features_dataset @@ -71,6 +72,7 @@ from tensorflow.contrib.data.python.ops.enumerate_ops import enumerate_dataset from tensorflow.contrib.data.python.ops.error_ops import ignore_errors from tensorflow.contrib.data.python.ops.get_single_element import get_single_element from tensorflow.contrib.data.python.ops.grouping import bucket_by_sequence_length +from tensorflow.contrib.data.python.ops.grouping import group_by_reducer from tensorflow.contrib.data.python.ops.grouping import group_by_window from tensorflow.contrib.data.python.ops.interleave_ops import choose_from_datasets from tensorflow.contrib.data.python.ops.interleave_ops import parallel_interleave ",0,train 09e0b6cea4c357049c0cc7dbd415f99b1eae568d,tensorflow/tensorflow,"Adds a max_rematerialized_block_size field. PiperOrigin-RevId: 301205071 Change-Id: I9403816bbdc1e079a634b6ada730ecf7983eba6c",hlo_rematerialization.cc,"@@ -1648,6 +1648,8 @@ StatusOr HloRematerialization::RematerializeComputation( } else { // Found a valid block. Reset to start looking for single instructions // again. + max_rematerialized_block_size_ = + std::max(max_rematerialized_block_size_, max_block_size); changed = true; min_block_size = 1; max_block_size = 1; ",0,train 09e0b6cea4c357049c0cc7dbd415f99b1eae568d,tensorflow/tensorflow,"Adds a max_rematerialized_block_size field. PiperOrigin-RevId: 301205071 Change-Id: I9403816bbdc1e079a634b6ada730ecf7983eba6c",hlo_rematerialization.h,"@@ -180,6 +180,10 @@ class HloRematerialization : public HloModulePass { // dead. Hence, no net instructions were added. int64 net_instructions_added_ = 0; + // Size of the largest block that has been rematerialized. This is actually an + // upper bound (within a factor of 2) on the block size. + int max_rematerialized_block_size_ = 0; + RematerializationMode mode_; }; ",0,train 383023c892ce9e89b6ff993f71c6ae65e838ab0d,tensorflow/tensorflow,"[XLA] Use a stricter ErrorSpec for some tests PiperOrigin-RevId: 279972069 Change-Id: I3116e9812999398ebd37f6bf78e509a0549b5e66",array_elementwise_ops_test.cc,"@@ -43,6 +43,7 @@ namespace { class ArrayElementwiseOpTest : public ClientLibraryTestBase { public: ErrorSpec error_spec_{0.0001, 0.0001}; + ErrorSpec strict_error_spec_{0x1p-48, 0x1p-48}; }; class ArrayElementwiseOpTestParamCount @@ -71,7 +72,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, NegConstantF64) { auto a = ConstantR1(&builder, {-2.5, 3.14, 2.25, -10.0, 6.0}); Neg(a); - ComputeAndCompare(&builder, {}, error_spec_); + ComputeAndCompare(&builder, {}, strict_error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, NegConstantS32) { @@ -458,7 +459,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, SubTwoConstantF64s) { auto b = ConstantR1(&builder, {100.0, 3.13, 2.75, 10.5, -999.0}); Sub(a, b); - ComputeAndCompare(&builder, {}, error_spec_); + ComputeAndCompare(&builder, {}, strict_error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, DivTwoConstantF32s) { @@ -490,7 +491,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, DivTwoConstantF64s) { 2.1, 3.1, 9.9, -4.5, -11.0, -21.5, M_PI}); Div(a, b); - ComputeAndCompare(&builder, {}, error_spec_); + ComputeAndCompare(&builder, {}, strict_error_spec_); } class IntegerDivideOpTest : public ArrayElementwiseOpTest { ",0,test d966b6a9600e64ebbde9c982e85c8ef2fc7e36f5,tensorflow/tensorflow,"[tf.data service] Raise an error when using dynamic sharding with Dataset.snapshot. PiperOrigin-RevId: 387858516 Change-Id: Ic96e70442251a417438220f2e762224f84f12f21",snapshot_dataset_op.cc,"@@ -141,6 +141,12 @@ class SnapshotDatasetV2Op::Dataset : public DatasetBase { Iterator::Params{this, absl::StrCat(prefix, ""::Snapshot"")}); } + Status MakeSplitProviders(std::vector>* + split_providers) const override { + return errors::Unimplemented( + ""Splitting is not implemented for snapshot datasets.""); + } + const DataTypeVector& output_dtypes() const override { return input_->output_dtypes(); } @@ -985,6 +991,12 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel { Iterator::Params{this, absl::StrCat(prefix, ""::Snapshot"")}); } + Status MakeSplitProviders(std::vector>* + split_providers) const override { + return errors::Unimplemented( + ""Splitting is not implemented for snapshot datasets.""); + } + const DataTypeVector& output_dtypes() const override { return input_->output_dtypes(); } ",0,test d966b6a9600e64ebbde9c982e85c8ef2fc7e36f5,tensorflow/tensorflow,"[tf.data service] Raise an error when using dynamic sharding with Dataset.snapshot. PiperOrigin-RevId: 387858516 Change-Id: Ic96e70442251a417438220f2e762224f84f12f21",dynamic_sharding_test.py,"@@ -274,6 +274,23 @@ class DynamicShardingTest(data_service_test_base.TestBase, self.assertDatasetProduces( ds, list(range(200)), assert_items_equal=assert_items_equal) + @combinations.generate( + combinations.times(test_base.default_test_combinations(), + combinations.combine(already_written=[True, False]))) + def testSnapshot(self, already_written): + num_workers = 3 + cluster = data_service_test_base.TestCluster(num_workers=num_workers) + ds = dataset_ops.Dataset.range(100) + ds = ds.snapshot(self.get_temp_dir()) + if already_written: + # Materialize the snapshot. + self.getDatasetOutput(ds) + + ds = self._make_dynamic_sharding_dataset(ds, cluster) + error_regex = ""Splitting is not implemented for snapshot datasets"" + with self.assertRaisesRegex(errors.UnimplementedError, error_regex): + self.getDatasetOutput(ds) + @combinations.generate(test_base.default_test_combinations()) def testDistributedDataset(self): cluster_1 = data_service_test_base.TestCluster(num_workers=1) ",0,test 6e17966cc2ac75737eee912863d7a4599eaaad3e,tensorflow/tensorflow,"[tf.data] Follow up to cl/270460372 which extends the multi-device function check with an op kernel registration check. PiperOrigin-RevId: 270469526",captured_function.cc,"@@ -23,6 +23,7 @@ limitations under the License. #include ""tensorflow/core/framework/cancellation.h"" #include ""tensorflow/core/framework/function.h"" #include ""tensorflow/core/framework/function_handle_cache.h"" +#include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/stats_aggregator.h"" #include ""tensorflow/core/kernels/data/dataset_utils.h"" #include ""tensorflow/core/kernels/data/stats_utils.h"" @@ -856,6 +857,7 @@ Status CapturedFunction::IsMultiDevice(IteratorContext* ctx, LookupFunction(*metadata_->lib_def(), metadata_->func().name(), &fdef)); Device* current_device = ctx->flr()->device(); + DeviceType current_device_type(current_device->device_type()); DeviceNameUtils::ParsedName current_device_name; if (!DeviceNameUtils::ParseFullName(current_device->name(), ¤t_device_name)) { @@ -864,8 +866,8 @@ Status CapturedFunction::IsMultiDevice(IteratorContext* ctx, } // Check if any of the captured inputs are placed on a device not compatible - // with the current device. For non-captured inputs, we assume the are placed - // on the same device as the iterator. + // with the current device. For non-captured inputs, we assume they are placed + // on the current device. for (const auto& input : captured_inputs_) { DataType dtype = input.dtype(); if (dtype == DT_RESOURCE) { @@ -884,12 +886,18 @@ Status CapturedFunction::IsMultiDevice(IteratorContext* ctx, } } - // Check if any of the ops are placed on a device not compatible with the - // current device. + // Check if all ops could be placed on the current device. for (const auto& name : metadata_->lib_def()->ListFunctionNames()) { const FunctionDef* fdef; TF_RETURN_IF_ERROR(LookupFunction(*metadata_->lib_def(), name, &fdef)); for (const auto& node : fdef->node_def()) { + // Check if the op has a kernel availabe for the current device. + if (!KernelDefAvailable(current_device_type, node)) { + *is_multi_device = true; + return Status::OK(); + } + // If the op has a requested device, check if the requested device is + // compatible with the current device. if (!node.device().empty()) { DeviceNameUtils::ParsedName node_device_name; if (!DeviceNameUtils::ParseFullName(node.device(), &node_device_name)) { ",0,train 688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions. PiperOrigin-RevId: 200271078",add.cc,"@@ -126,16 +126,19 @@ void EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, int32 input1_multiplier; int input1_shift; - QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &input1_multiplier, - &input1_shift); + QuantizeMultiplierSmallerThanOneExp(real_input1_multiplier, + &input1_multiplier, &input1_shift); + input1_shift *= -1; int32 input2_multiplier; int input2_shift; - QuantizeMultiplierSmallerThanOne(real_input2_multiplier, &input2_multiplier, - &input2_shift); + QuantizeMultiplierSmallerThanOneExp(real_input2_multiplier, + &input2_multiplier, &input2_shift); + input2_shift *= -1; int32 output_multiplier; int output_shift; - QuantizeMultiplierSmallerThanOne(real_output_multiplier, &output_multiplier, - &output_shift); + QuantizeMultiplierSmallerThanOneExp(real_output_multiplier, + &output_multiplier, &output_shift); + output_shift *= -1; int32 output_activation_min, output_activation_max; CalculateActivationRangeUint8(params->activation, output, ",0,test 688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions. PiperOrigin-RevId: 200271078",conv.cc,"@@ -257,8 +257,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( context, input, filter, bias, output, &real_multiplier)); TF_LITE_ENSURE(context, real_multiplier < 1.0); - QuantizeMultiplierSmallerThanOne(real_multiplier, &data->output_multiplier, - &data->output_shift); + QuantizeMultiplierSmallerThanOneExp( + real_multiplier, &data->output_multiplier, &data->output_shift); + data->output_shift *= -1; CalculateActivationRangeUint8(params->activation, output, &data->output_activation_min, &data->output_activation_max); ",0,test 688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions. PiperOrigin-RevId: 200271078",fully_connected.cc,"@@ -118,8 +118,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( context, input, filter, bias, output, &real_multiplier)); TF_LITE_ENSURE(context, real_multiplier < 1.0); - QuantizeMultiplierSmallerThanOne(real_multiplier, &data->output_multiplier, - &data->output_shift); + QuantizeMultiplierSmallerThanOneExp( + real_multiplier, &data->output_multiplier, &data->output_shift); + data->output_shift *= -1; CalculateActivationRangeUint8(params->activation, output, &data->output_activation_min, &data->output_activation_max); ",0,test 688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions. PiperOrigin-RevId: 200271078",logsoftmax_quantized_test.cc,"@@ -116,10 +116,11 @@ void RunOneLogSoftmaxTest(const uint8* input_data, const Dims<4>& dims_common, int32 reverse_scaling_divisor; int reverse_scaling_right_shift; static const int kScaledDiffIntegerBits = 5; - tflite::PreprocessLogSoftmaxScaling( + tflite::PreprocessLogSoftmaxScalingExp( beta, input_scale, kScaledDiffIntegerBits, &input_beta_multiplier, &input_beta_left_shift, &reverse_scaling_divisor, &reverse_scaling_right_shift); + reverse_scaling_right_shift *= -1; // diff_min has a negative value, and is used to limit the maximum magnitude // of the diffs, which are <= 0. const int diff_min = -tflite::CalculateInputRadius(kScaledDiffIntegerBits, ",0,test 688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions. PiperOrigin-RevId: 200271078",optimized_ops.h,"@@ -1082,10 +1082,10 @@ struct GemmlowpOutputPipeline { gemmlowp::OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint, gemmlowp::OutputStageClamp, gemmlowp::OutputStageSaturatingCastToUint8> Pipeline; - static Pipeline Make(const int32* bias_data, int output_rows, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max) { + static Pipeline MakeExp(const int32* bias_data, int output_rows, + int32 output_offset, int32 output_multiplier, + int output_left_shift, int32 output_activation_min, + int32 output_activation_max) { ColVectorMap bias_vector(bias_data, output_rows); gemmlowp::OutputStageBiasAddition bias_addition_stage; bias_addition_stage.bias_vector = bias_vector; @@ -1093,7 +1093,7 @@ struct GemmlowpOutputPipeline { quantize_down_stage; quantize_down_stage.result_offset_after_shift = output_offset; quantize_down_stage.result_fixedpoint_multiplier = output_multiplier; - quantize_down_stage.result_shift = output_shift; + quantize_down_stage.result_shift = -output_left_shift; gemmlowp::OutputStageClamp clamp_stage; clamp_stage.min = output_activation_min; clamp_stage.max = output_activation_max; @@ -1146,8 +1146,8 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, input_data, filter_cols, batches, filter_cols); gemmlowp::MatrixMap output_matrix( output_data, output_rows, batches, output_rows); - const auto& output_pipeline = GemmlowpOutputPipeline::Make( - bias_data, output_rows, output_offset, output_multiplier, output_shift, + const auto& output_pipeline = GemmlowpOutputPipeline::MakeExp( + bias_data, output_rows, output_offset, output_multiplier, -output_shift, output_activation_min, output_activation_max); gemmlowp::GemmWithOutputPipeline( @@ -2084,8 +2084,8 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims, gemm_input_data, gemm_input_rows, gemm_input_cols); gemmlowp::MatrixMap output_matrix( output_data, output_rows, output_cols); - const auto& output_pipeline = GemmlowpOutputPipeline::Make( - bias_data, output_rows, output_offset, output_multiplier, output_shift, + const auto& output_pipeline = GemmlowpOutputPipeline::MakeExp( + bias_data, output_rows, output_offset, output_multiplier, -output_shift, output_activation_min, output_activation_max); gemmlowp::GemmWithOutputPipeline( @@ -2242,8 +2242,8 @@ void ConvAsGemm(const uint8* input_data, const Dims<4>& input_dims, input_data, filter_cols, output_cols, filter_cols); gemmlowp::MatrixMap output_matrix( output_data, output_rows, output_cols, output_rows); - const auto& output_pipeline = GemmlowpOutputPipeline::Make( - bias_data, output_rows, output_offset, output_multiplier, output_shift, + const auto& output_pipeline = GemmlowpOutputPipeline::MakeExp( + bias_data, output_rows, output_offset, output_multiplier, -output_shift, output_activation_min, output_activation_max); gemmlowp::GemmWithOutputPipeline( @@ -2387,8 +2387,9 @@ void L2Normalization(const float* input_data, const Dims<4>& input_dims, } } -inline void GetInvSqrtQuantizedMultiplier(int32 input, int32* output_inv_sqrt, - int* output_shift) { +inline void GetInvSqrtQuantizedMultiplierExp(int32 input, + int32* output_inv_sqrt, + int* output_shift) { *output_shift = 11; while (input >= (1 << 29)) { input /= 4; @@ -2430,6 +2431,7 @@ inline void GetInvSqrtQuantizedMultiplier(int32 input, int32* output_inv_sqrt, *output_inv_sqrt <<= -*output_shift; *output_shift = 0; } + *output_shift *= kReverseShift; } inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims, @@ -2448,13 +2450,13 @@ inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims, } int32 inv_l2norm_multiplier; int inv_l2norm_shift; - GetInvSqrtQuantizedMultiplier(square_l2_norm, &inv_l2norm_multiplier, - &inv_l2norm_shift); + GetInvSqrtQuantizedMultiplierExp(square_l2_norm, &inv_l2norm_multiplier, + &inv_l2norm_shift); for (int c = 0; c < depth; c++) { int32 diff = *input_data - input_zero_point; int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp( - 128 * diff, inv_l2norm_multiplier, kReverseShift * inv_l2norm_shift); + 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift); int32 unclamped_output_val = 128 + rescaled_diff; int32 output_val = std::min(255, std::max(0, unclamped_output_val)); *output_data = static_cast(output_val); ",0,test 688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions. PiperOrigin-RevId: 200271078",quantization_util.cc,"@@ -48,15 +48,15 @@ void QuantizeMultiplierGreaterThanOne(double double_multiplier, TFLITE_CHECK_GE(*left_shift, 0); } -void QuantizeMultiplierSmallerThanOne(double double_multiplier, - int32_t* quantized_multiplier, - int* right_shift) { +void QuantizeMultiplierSmallerThanOneExp(double double_multiplier, + int32_t* quantized_multiplier, + int* left_shift) { TFLITE_CHECK_LT(double_multiplier, 1.); TFLITE_CHECK_GT(double_multiplier, 0.); int shift; QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift); TFLITE_CHECK_LE(shift, 0); - *right_shift = -shift; + *left_shift = shift; } void PreprocessSoftmaxScaling(double beta, double input_scale, @@ -78,20 +78,21 @@ void PreprocessSoftmaxScaling(double beta, double input_scale, quantized_multiplier, left_shift); } -void PreprocessLogSoftmaxScaling(double beta, double input_scale, - int input_integer_bits, - int32_t* quantized_multiplier, int* left_shift, - int32_t* reverse_scaling_divisor, - int* reverse_scaling_right_shift) { +void PreprocessLogSoftmaxScalingExp(double beta, double input_scale, + int input_integer_bits, + int32_t* quantized_multiplier, + int* left_shift, + int32_t* reverse_scaling_divisor, + int* reverse_scaling_left_shift) { PreprocessSoftmaxScaling(beta, input_scale, input_integer_bits, quantized_multiplier, left_shift); // Also calculate what amounts to the inverse scaling factor for the input. const double real_reverse_scaling_divisor = (1 << (31 - *left_shift)) / static_cast(*quantized_multiplier); - tflite::QuantizeMultiplierSmallerThanOne(real_reverse_scaling_divisor, - reverse_scaling_divisor, - reverse_scaling_right_shift); + tflite::QuantizeMultiplierSmallerThanOneExp(real_reverse_scaling_divisor, + reverse_scaling_divisor, + reverse_scaling_left_shift); } int CalculateInputRadius(int input_integer_bits, int input_left_shift) { ",0,test 688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions. PiperOrigin-RevId: 200271078",quantization_util.h,"@@ -167,9 +167,9 @@ IntOut SafeCast(FloatIn x) { // this is intended as a RIGHT-shift. // // Restricted to the case where the multiplier < 1 (and non-negative). -void QuantizeMultiplierSmallerThanOne(double double_multiplier, - int32_t* quantized_multiplier, - int* right_shift); +void QuantizeMultiplierSmallerThanOneExp(double double_multiplier, + int32_t* quantized_multiplier, + int* left_shift); // Decompose a double multiplier into a Q0.31 int32 representation of its // significand, and shift representation of its exponent. @@ -197,11 +197,12 @@ void PreprocessSoftmaxScaling(double beta, double input_scale, int input_integer_bits, int32_t* quantized_multiplier, int* left_shift); // Like PreprocessSoftmaxScaling, but inverse scaling factors also calculated. -void PreprocessLogSoftmaxScaling(double beta, double input_scale, - int input_integer_bits, - int32_t* quantized_multiplier, int* left_shift, - int32_t* reverse_scaling_divisor, - int* reverse_scaling_right_shift); +void PreprocessLogSoftmaxScalingExp(double beta, double input_scale, + int input_integer_bits, + int32_t* quantized_multiplier, + int* left_shift, + int32_t* reverse_scaling_divisor, + int* reverse_scaling_left_shift); // Calculate the largest input that will result in a within-bounds intermediate // result within MultiplyByQuantizedMultiplierGreaterThanOne. In other words, // it must not overflow before we reduce the value by multiplication by the ",0,test 688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions. PiperOrigin-RevId: 200271078",quantization_util_test.cc,"@@ -196,21 +196,21 @@ TEST(QuantizationUtilTest, ChooseQuantizationParamsInvalidRange) { EXPECT_DEATH(ChooseQuantizationParams(10.0, -30.0), """"); } -TEST(QuantizationUtilTest, QuantizeMultiplierSmallerThanOne) { +TEST(QuantizationUtilTest, QuantizeMultiplierSmallerThanOneExp) { auto quantize = [](double d) { int32_t q; int s; - QuantizeMultiplierSmallerThanOne(d, &q, &s); + QuantizeMultiplierSmallerThanOneExp(d, &q, &s); return std::pair{q, s}; }; EXPECT_DEATH(quantize(-0.1), """"); EXPECT_DEATH(quantize(0.0), """"); - EXPECT_THAT(quantize(0.25), Pair(1073741824, 1)); + EXPECT_THAT(quantize(0.25), Pair(1073741824, -1)); // Around 0.5 we can see the change in exponent and how we try hard to // void hitting max int32. - EXPECT_THAT(quantize(0.50 - 5e-9), Pair(2147483627, 1)); + EXPECT_THAT(quantize(0.50 - 5e-9), Pair(2147483627, -1)); EXPECT_THAT(quantize(0.50 - 1e-10), Pair(1073741824, 0)); EXPECT_THAT(quantize(0.50), Pair(1073741824, 0)); ",0,test 688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions. PiperOrigin-RevId: 200271078",reference_ops.h,"@@ -968,8 +968,9 @@ void L2Normalization(const float* input_data, const Dims<4>& input_dims, } } -inline void GetInvSqrtQuantizedMultiplier(int32 input, int32* output_inv_sqrt, - int* output_shift) { +inline void GetInvSqrtQuantizedMultiplierExp(int32 input, + int32* output_inv_sqrt, + int* output_shift) { *output_shift = 11; while (input >= (1 << 29)) { input /= 4; @@ -1011,6 +1012,7 @@ inline void GetInvSqrtQuantizedMultiplier(int32 input, int32* output_inv_sqrt, *output_inv_sqrt <<= -*output_shift; *output_shift = 0; } + *output_shift *= kReverseShift; } inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims, @@ -1027,14 +1029,14 @@ inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims, } int32 inv_l2norm_multiplier; int inv_l2norm_shift; - GetInvSqrtQuantizedMultiplier(square_l2_norm, &inv_l2norm_multiplier, - &inv_l2norm_shift); + GetInvSqrtQuantizedMultiplierExp(square_l2_norm, &inv_l2norm_multiplier, + &inv_l2norm_shift); for (int c = 0; c < depth; c++) { int32 diff = input_data[Offset(input_dims, c, i, 0, 0)] - input_zero_point; int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp( - 128 * diff, inv_l2norm_multiplier, kReverseShift * inv_l2norm_shift); + 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift); int32 unclamped_output_val = 128 + rescaled_diff; int32 output_val = std::min(255, std::max(0, unclamped_output_val)); output_data[Offset(output_dims, c, i, 0, 0)] = ",0,test 688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions. PiperOrigin-RevId: 200271078",mul.cc,"@@ -120,8 +120,9 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, double real_multiplier = input1->params.scale * input2->params.scale / output->params.scale; - QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, - &output_shift); + QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier, + &output_shift); + output_shift *= -1; int32 output_activation_min, output_activation_max; CalculateActivationRangeUint8(params->activation, output, ",0,test 688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions. PiperOrigin-RevId: 200271078",sub.cc,"@@ -126,16 +126,19 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, int32 input1_multiplier; int input1_shift; - QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &input1_multiplier, - &input1_shift); + QuantizeMultiplierSmallerThanOneExp(real_input1_multiplier, + &input1_multiplier, &input1_shift); + input1_shift *= -1; int32 input2_multiplier; int input2_shift; - QuantizeMultiplierSmallerThanOne(real_input2_multiplier, &input2_multiplier, - &input2_shift); + QuantizeMultiplierSmallerThanOneExp(real_input2_multiplier, + &input2_multiplier, &input2_shift); + input2_shift *= -1; int32 output_multiplier; int output_shift; - QuantizeMultiplierSmallerThanOne(real_output_multiplier, &output_multiplier, - &output_shift); + QuantizeMultiplierSmallerThanOneExp(real_output_multiplier, + &output_multiplier, &output_shift); + output_shift *= -1; int32 output_activation_min, output_activation_max; CalculateActivationRangeUint8(params->activation, output, ",0,test b8969d12f9260a7b1981b8d22788aa1f8c8cbbb6,tensorflow/tensorflow,"Mark Supervisor deprecated. Please use MonitoredTrainingSession instead. Fixes #6263. PiperOrigin-RevId: 177230053",monitored_session.py,"@@ -52,7 +52,6 @@ _PREEMPTION_ERRORS = (errors.AbortedError, errors.UnavailableError) USE_DEFAULT = object() -# TODO(touts): Share that with the Supervisor. class Scaffold(object): """"""Structure to create or gather pieces commonly needed to train a model. ",0,train b8969d12f9260a7b1981b8d22788aa1f8c8cbbb6,tensorflow/tensorflow,"Mark Supervisor deprecated. Please use MonitoredTrainingSession instead. Fixes #6263. PiperOrigin-RevId: 177230053",supervisor.py,"@@ -36,11 +36,15 @@ from tensorflow.python.training import coordinator from tensorflow.python.training import saver as saver_mod from tensorflow.python.training import session_manager as session_manager_mod from tensorflow.python.training import training_util +from tensorflow.python.util import deprecation class Supervisor(object): """"""A training helper that checkpoints models and computes summaries. + This class is deprecated. Please use + ${tf.train.MonitoredTrainingSession} instead. + The Supervisor is a small wrapper around a `Coordinator`, a `Saver`, and a `SessionManager` that takes care of common needs of TensorFlow training programs. @@ -198,6 +202,8 @@ class Supervisor(object): # the default behavior should be used. USE_DEFAULT = 0 + @deprecation.deprecated(None, + ""Please switch to tf.train.MonitoredTrainingSession"") def __init__(self, graph=None, ready_op=USE_DEFAULT, ",0,train 305d30ce6130d0d621746a59dc3116c5824ab523,tensorflow/tensorflow,"Fix MSAN failure in TF-XLA lowering This is an issue with an ArrayRef with content going out of scope at the end of the expression. PiperOrigin-RevId: 399055398 Change-Id: Ieb7c4c1ce3cf77b5000553fb13b254961f187bd3",legalize_tf.cc,"@@ -466,10 +466,10 @@ Value BatchDot(Location loc, Value lhs, bool transpose_lhs, Value rhs, ArrayAttr precision_config, OpBuilder *builder) { auto batch_dimensions = llvm::to_vector<4>(llvm::seq(0, num_batch_dims)); - auto lhs_contracting_dimensions = - llvm::makeArrayRef({transpose_lhs ? num_batch_dims : num_batch_dims + 1}); - auto rhs_contracting_dimensions = - llvm::makeArrayRef({transpose_rhs ? num_batch_dims + 1 : num_batch_dims}); + auto lhs_contracting_dimensions = llvm::to_vector<1>(llvm::makeArrayRef( + {transpose_lhs ? num_batch_dims : num_batch_dims + 1})); + auto rhs_contracting_dimensions = llvm::to_vector<1>(llvm::makeArrayRef( + {transpose_rhs ? num_batch_dims + 1 : num_batch_dims})); auto dimension_numbers = DotDimensionNumbersAttr::get( builder->getContext(), /*lhs_batching_dimensions=*/batch_dimensions, ",0,test edfc5938ba99cbe81ac50796f6ff647a374daf82,tensorflow/tensorflow,"Don't match to backward input convolution in unsupported case. For grouped convolutions, we assume that in the backward input convolution case, the input and output feature dimensions of the kernel are adjacent. If that is not the case, don't treat it as backward input convolution. PiperOrigin-RevId: 339029980 Change-Id: If0b4f8a64cd3ca73e9648358d8a579ce262b27c9",gpu_conv_rewriter.cc,"@@ -536,11 +536,12 @@ MatchBackwardInput(HloInstruction* conv) { // 'kernel_output_feature_dimension' by 'feature_group_count'. int64 input_feature_dimension = dnums.kernel_input_feature_dimension(); int64 output_feature_dimension = dnums.kernel_output_feature_dimension(); + // The following code assumes that input_feature_dimension and + // output_feature_dimension are adjacent. + if (std::abs(input_feature_dimension - output_feature_dimension) != 1) { + return no_match_result; + } - // In the backward convolution case, the spatial dimensions become the - // feature dimensions, and we are guaranteed that the spatial dimensions are - // adjacent. - CHECK_EQ(std::abs(input_feature_dimension - output_feature_dimension), 1LL); int64 input_features = rhs->shape().dimensions(input_feature_dimension); int64 output_features = rhs->shape().dimensions(output_feature_dimension); ",0,train edfc5938ba99cbe81ac50796f6ff647a374daf82,tensorflow/tensorflow,"Don't match to backward input convolution in unsupported case. For grouped convolutions, we assume that in the backward input convolution case, the input and output feature dimensions of the kernel are adjacent. If that is not the case, don't treat it as backward input convolution. PiperOrigin-RevId: 339029980 Change-Id: If0b4f8a64cd3ca73e9648358d8a579ce262b27c9",grouped_convolution_test.cc,"@@ -13,6 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include +#include + +#include ""absl/algorithm/container.h"" #include ""absl/types/optional.h"" #include ""tensorflow/compiler/xla/client/xla_computation.h"" #include ""tensorflow/compiler/xla/execution_options_util.h"" @@ -23,6 +27,7 @@ limitations under the License. #include ""tensorflow/compiler/xla/tests/client_library_test_base.h"" #include ""tensorflow/compiler/xla/tests/hlo_test_base.h"" #include ""tensorflow/compiler/xla/tests/test_macros.h"" +#include ""tensorflow/compiler/xla/tests/test_utils.h"" namespace xla { namespace { @@ -248,5 +253,28 @@ INSTANTIATE_TEST_CASE_P( ::testing::Bool()), GroupedConvolution2DTestDataToString); +using GroupedConvolutionTest = HloTestBase; + +XLA_TEST_F(GroupedConvolutionTest, BackwardInputConvolution) { + auto module = ParseAndReturnVerifiedModule(R""( + HloModule convolution_module + +ENTRY convolution { + p1 = f32[2,1,1,1]{3,2,1,0} parameter(0) + p2 = f32[2,4,4,1]{3,2,1,0} parameter(1) + reverse = f32[2,4,4,1]{3,2,1,0} reverse(p2), dimensions={1,2} + ROOT convolution = f32[2,4,4,1]{3,2,1,0} convolution(p1, reverse), window={size=4x4 pad=3_3x3_3}, dim_labels=fb01_o01i->f01b, feature_group_count=2 +} +)"") + .ValueOrDie(); + TF_ASSERT_OK_AND_ASSIGN(auto fake_arguments, MakeFakeArguments(module.get())); + std::vector fake_argument_ptrs; + absl::c_transform( + fake_arguments, std::back_inserter(fake_argument_ptrs), + [](const Literal& literal) { return &const_cast(literal); }); + EXPECT_TRUE(RunAndCompare(std::move(module), fake_argument_ptrs, + ErrorSpec{0.01, 0.01})); +} + } // namespace } // namespace xla ",0,train 0b69e6ed798b40b64aecea24a97aa2f198120688,tensorflow/tensorflow,"Fix two race conditions found in eager/c_api_test: 1. context_id shouldn't be read during update. 2. EagerExecutor::state_ should be set before creating EagerExecutor::thread_ PiperOrigin-RevId: 262968876",context.cc,"@@ -210,10 +210,16 @@ bool EagerContext::MirrorTensors() const { void EagerContext::CloseRemoteContexts() { // Close all remote contexts. eager::CloseContextRequest request; - request.set_context_id(context_id_); + uint64 context_id; + { + mutex_lock l(remote_state_mu_); + if (!is_master_) return; + context_id = context_id_; + context_id_ = kInvalidContextId; + } + request.set_context_id(context_id); // Setting context_id to a new value can avoid us issuing DestroyTensorHandle // request to closed remote workers. - context_id_ = kInvalidContextId; std::vector responses(remote_contexts_.size()); BlockingCounter counter(static_cast(remote_contexts_.size())); @@ -223,10 +229,11 @@ void EagerContext::CloseRemoteContexts() { Status s = remote_eager_workers_->GetClient(worker, &client); client->CloseContextAsync( - &request, &responses[i], [this, &worker, &counter](const Status& s) { + &request, &responses[i], + [&worker, &counter, context_id](const Status& s) { if (!s.ok()) { LOG(ERROR) << ""Unable to close remote context with ID "" - << context_id_ << "" for worker: "" << worker << "" due to "" + << context_id << "" for worker: "" << worker << "" due to "" << s.error_message(); } counter.DecrementCount(); @@ -252,11 +259,12 @@ void EagerContext::WaitForAndCloseRemoteContexts() { } keep_alive_thread_.reset(); - mutex_lock l(remote_state_mu_); - if (!remote_contexts_.empty() && is_master_) { + if (!remote_contexts_.empty()) { CloseRemoteContexts(); } + mutex_lock l(remote_state_mu_); + default_executor_.ShutDown().IgnoreError(); std::unordered_map executors_copy; { @@ -301,7 +309,7 @@ EagerContext::~EagerContext() { keep_alive_thread_cv_.notify_all(); } keep_alive_thread_.reset(); - if (!remote_contexts_.empty() && is_master_) { + if (!remote_contexts_.empty()) { CloseRemoteContexts(); } #endif // !IS_MOBILE_PLATFORM @@ -392,7 +400,7 @@ Status EagerContext::MaybeRegisterFunctionRemotely(const FunctionDef& fdef) { BlockingCounter blocking_counter(static_cast(remote_contexts_.size())); eager::RegisterFunctionRequest request; - request.set_context_id(context_id_); + request.set_context_id(GetContextId()); *request.mutable_function_def() = fdef; std::vector responses( remote_contexts_.size()); @@ -618,7 +626,10 @@ Status EagerContext::GetClient(const DeviceNameUtils::ParsedName& device_name, return Status::OK(); } -uint64 EagerContext::GetContextId() { return context_id_; } +uint64 EagerContext::GetContextId() { + tf_shared_lock l(remote_state_mu_); + return context_id_; +} Status EagerContext::StoreCollectiveOpsServer( std::unique_ptr server, DeviceMgr* device_mgr, @@ -672,14 +683,15 @@ Status EagerContext::InitializeRemoteMaster( ""Failed to initialize remote for master context due to invalid "", ""context id""); } - mutex_lock l(remote_state_mu_); - is_master_ = true; if (!remote_contexts_.empty()) { CloseRemoteContexts(); } - remote_contexts_ = remote_contexts; + + mutex_lock l(remote_state_mu_); + is_master_ = true; context_id_ = context_id; + remote_contexts_ = remote_contexts; use_send_tensor_rpc_ = ReadBoolFromEnvVar(""TF_EAGER_REMOTE_USE_SEND_TENSOR_RPC"", false); ",0,test 0b69e6ed798b40b64aecea24a97aa2f198120688,tensorflow/tensorflow,"Fix two race conditions found in eager/c_api_test: 1. context_id shouldn't be read during update. 2. EagerExecutor::state_ should be set before creating EagerExecutor::thread_ PiperOrigin-RevId: 262968876",context.h,"@@ -439,7 +439,7 @@ class EagerContext : public core::RefCounted { mutex remote_state_mu_; - uint64 context_id_; + uint64 context_id_ GUARDED_BY(remote_state_mu_); std::vector remote_contexts_; int keep_alive_secs_ GUARDED_BY(remote_state_mu_); ",0,test 0b69e6ed798b40b64aecea24a97aa2f198120688,tensorflow/tensorflow,"Fix two race conditions found in eager/c_api_test: 1. context_id shouldn't be read during update. 2. EagerExecutor::state_ should be set before creating EagerExecutor::thread_ PiperOrigin-RevId: 262968876",eager_executor.h,"@@ -160,10 +160,6 @@ class EagerExecutor { std::multimap node_done_notifications_ GUARDED_BY(node_queue_mutex_); - // Thread object that calls the `Run` method in async mode.This thread runs - // till thread_done_ is set to true. It is `nullptr` in sync mode. - const std::unique_ptr thread_; - // thread_exited_notification_ is notified by the `thread_` right before it // exits. Notification thread_exited_notification_; @@ -171,6 +167,10 @@ class EagerExecutor { // Indicates that `thread_` should stop as soon as it is done executing the // current EagerNode. ExecutorState state_ GUARDED_BY(node_queue_mutex_) = ExecutorState::kActive; + + // Thread object that calls the `Run` method in async mode.This thread runs + // until state_ is set to kShuttingDown. It is `nullptr` in sync mode. + const std::unique_ptr thread_; }; } // namespace tensorflow ",0,test b131b30fb59aa41bc826588b53571f6f98dcabc7,tensorflow/tensorflow,"PR #46097: [INTEL MKL] Change order for remapper Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/46097 This PR is to test if there will be any regression if the order of remapper in grappler meta_optimizer is moved before arithmetic_optimizer. Copybara import of the project: -- 613041d6e6b28f331aecd01aa6f69c8f0953fdac by mdfaijul : Change order for remapper. PiperOrigin-RevId: 352011080 Change-Id: I5041e87bfeeb41120517ce454d1baa247c32d4fb",meta_optimizer.cc,"@@ -269,9 +269,6 @@ Status MetaOptimizer::InitializeOptimizers( if (cfg_.pin_to_host_optimization() == RewriterConfig::ON) { optimizers->push_back(MakeUnique()); } - if (cfg_.remapping() != RewriterConfig::OFF) { - optimizers->push_back(MakeUnique(cfg_.remapping())); - } if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) { optimizers->push_back( MakeUnique(cfg_.arithmetic_optimization())); @@ -281,6 +278,9 @@ Status MetaOptimizer::InitializeOptimizers( /*optimization level*/ cfg_.layout_optimizer(), /*CPU layout conversion*/ cfg_.cpu_layout_conversion())); } + if (cfg_.remapping() != RewriterConfig::OFF) { + optimizers->push_back(MakeUnique(cfg_.remapping())); + } if (cfg_.loop_optimization() != RewriterConfig::OFF) { optimizers->push_back( MakeUnique(cfg_.loop_optimization(), cpu_device_)); ",0,test c0757ec6ed6e55b7fb50b5049276e6c140981b5d,tensorflow/tensorflow,"Add image decoding ops to flex delegate These include: - DecodeBmp - DecodeGif - DecodeJpeg - DecodePng PiperOrigin-RevId: 329224370 Change-Id: I696bd5bea4ab2cc570408b202d66058c7ca35a83",allowlisted_flex_ops.cc,"@@ -112,13 +112,7 @@ const std::set& GetFlexAllowlist() { ""DataFormatVecPermute"", ""DebugGradientIdentity"", ""DebugGradientRefIdentity"", - ""DecodeAndCropJpeg"", ""DecodeBase64"", - ""DecodeBmp"", - ""DecodeGif"", - ""DecodeImage"", - ""DecodeJpeg"", - ""DecodePng"", ""DecodeRaw"", ""DecodeWav"", ""DeepCopy"", @@ -139,9 +133,6 @@ const std::set& GetFlexAllowlist() { ""EluGrad"", ""Empty"", ""EncodeBase64"", - ""EncodeJpeg"", - ""EncodeJpegVariableQuality"", - ""EncodePng"", ""EncodeWav"", ""EnsureShape"", ""Enter"", ",0,train 7a0fa5db007318965d820127e318a3acbc42e701,tensorflow/tensorflow,Cleaned up code; added test for kl(p||p) = 0,beta_test.py,"@@ -322,6 +322,10 @@ class BetaTest(tf.test.TestCase): kl_val = sess.run(kl) self.assertEqual(kl.get_shape(), shape) self.assertAllClose(kl_val, kl_expected) + + # Make sure KL(d1||d1) is 0 + kl_same = sess.run(tf.contrib.distributions.kl(d1, d1)) + self.assertAllClose(kl_same, np.zeros_like(kl_expected)) if __name__ == ""__main__"": ",0,train 7a0fa5db007318965d820127e318a3acbc42e701,tensorflow/tensorflow,Cleaned up code; added test for kl(p||p) = 0,categorical_test.py,"@@ -226,16 +226,17 @@ class CategoricalTest(tf.test.TestCase): kl = tf.contrib.distributions.kl(a, b) kl_val = sess.run(kl) + # Make sure KL(a||a) is 0 + kl_same = sess.run(tf.contrib.distributions.kl(a, a)) prob_a = np_softmax(a_logits) prob_b = np_softmax(b_logits) kl_expected = np.sum( - prob_a * (np.log(prob_a) - np.log(prob_b)), - axis=-1, - ) + prob_a * (np.log(prob_a) - np.log(prob_b)), axis=-1) self.assertEqual(kl.get_shape(), (batch_size,)) self.assertAllClose(kl_val, kl_expected) + self.assertAllClose(kl_same, np.zeros_like(kl_expected)) if __name__ == ""__main__"": ",0,train 7a0fa5db007318965d820127e318a3acbc42e701,tensorflow/tensorflow,Cleaned up code; added test for kl(p||p) = 0,beta.py,"@@ -308,15 +308,16 @@ def _kl_beta_beta(d1, d2, name=None): Batchwise KL(d1 || d2) """""" inputs = [d1.a, d1.b, d1.a_b_sum, d2.a_b_sum] - with ops.name_scope( - name, ""kl_beta_beta"", inputs): - log_betas = (math_ops.lgamma(d2.a) + math_ops.lgamma(d2.b) - - math_ops.lgamma(d2.a_b_sum) + math_ops.lgamma(d1.a_b_sum) - - math_ops.lgamma(d1.a) - math_ops.lgamma(d1.b)) - digammas = ((d1.a - d2.a)*math_ops.digamma(d1.a) - + (d1.b - d2.b)*math_ops.digamma(d1.b) - + (d2.a_b_sum - d1.a_b_sum)*math_ops.digamma(d1.a_b_sum)) - return log_betas + digammas + with ops.name_scope(name, ""kl_beta_beta"", inputs): + # ln(B(a', b') / B(a, b)) + log_betas = (math_ops.lgamma(d2.a) + math_ops.lgamma(d2.b) + - math_ops.lgamma(d2.a_b_sum) + math_ops.lgamma(d1.a_b_sum) + - math_ops.lgamma(d1.a) - math_ops.lgamma(d1.b)) + # (a - a')*psi(a) + (b - b')*psi(b) + (a' - a + b' - b)*psi(a + b) + digammas = ((d1.a - d2.a)*math_ops.digamma(d1.a) + + (d1.b - d2.b)*math_ops.digamma(d1.b) + + (d2.a_b_sum - d1.a_b_sum)*math_ops.digamma(d1.a_b_sum)) + return log_betas + digammas # Register KL divergences. ",0,train 7a0fa5db007318965d820127e318a3acbc42e701,tensorflow/tensorflow,Cleaned up code; added test for kl(p||p) = 0,categorical.py,"@@ -183,8 +183,7 @@ def _kl_categorical_categorical(a, b, name=None): """""" with ops.name_scope( name, ""kl_categorical_categorical"", [a.logits, b.logits]): + # sum(p*ln(p/q)) return math_ops.reduce_sum( nn_ops.softmax(a.logits)*(nn_ops.log_softmax(a.logits) - - nn_ops.log_softmax(b.logits)), - reduction_indices=[-1], - ) + - nn_ops.log_softmax(b.logits)), reduction_indices=[-1]) ",0,train 03d13a4ddfde5ab2ac0edee72658d1f40fcfe3c9,tensorflow/tensorflow,"Delete seemingly unused and deprecated code. PiperOrigin-RevId: 375467299 Change-Id: I2df58f7b6872697cf7534c3fc9a35aefde13fd93",status_bar.py,"@@ -1,24 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the ""License""); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an ""AS IS"" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -""""""A no-op implementation of status bar functions."""""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -def SetupStatusBarInsideGoogle(unused_link_text, unused_port): - pass ",0,test 92ce8a70a5d6aacd84d4e4db250786fb86c2ac86,tensorflow/tensorflow,remove temp variable,grpc_worker_cache.cc,"@@ -43,7 +43,7 @@ class GrpcWorkerCache : public WorkerCachePartial { void* tag; bool ok; while (completion_queue_.Next(&tag, &ok)) { - GrpcClientCQTag* callback_tag = static_cast(tag); + auto callback_tag = static_cast(tag); callback_tag->OnCompleted(ok); } }); @@ -67,11 +67,10 @@ class GrpcWorkerCache : public WorkerCachePartial { if (target == local_target_) { return local_worker_; } else { - SharedGrpcChannelPtr channel = channel_cache_->FindWorkerChannel(target); + auto channel = channel_cache_->FindWorkerChannel(target); if (!channel) return nullptr; - WorkerInterface* ret = NewGrpcRemoteWorker(&live_rpc_counter_, channel, - &completion_queue_, &logger_); - return ret; + return NewGrpcRemoteWorker(&live_rpc_counter_, channel, + &completion_queue_, &logger_); } } ",0,train 8984bd30b49837893b95e44357264f5b4ee95118,tensorflow/tensorflow,"Only use cancellation_manager to cancel recv_op_ in eager mode. There is a race condition in TensorFlow 1.x if assigning a cancellation_manager to recv_op . Because both cancellation_manager.Cancel() and rendezvous::Abort() will be called if a session gets an error. PiperOrigin-RevId: 262415758",kernel_and_device.cc,"@@ -259,6 +259,7 @@ Status KernelAndDeviceOp::Run(ScopedStepContainer* step_container, } OpKernelContext::Params params; + params.is_eager = true; params.device = device_; params.frame_iter = FrameAndIter(0, 0); params.inputs = &inputs; ",0,train 8984bd30b49837893b95e44357264f5b4ee95118,tensorflow/tensorflow,"Only use cancellation_manager to cancel recv_op_ in eager mode. There is a race condition in TensorFlow 1.x if assigning a cancellation_manager to recv_op . Because both cancellation_manager.Cancel() and rendezvous::Abort() will be called if a session gets an error. PiperOrigin-RevId: 262415758",op_kernel.h,"@@ -621,6 +621,9 @@ class OpKernelContext { // The step being executed. int64 step_id = 0; + // True if the op is created by eager runtime. + bool is_eager = false; + // The op kernel being computed. OpKernel* op_kernel = nullptr; @@ -738,6 +741,8 @@ class OpKernelContext { int64 step_id() const { return params_->step_id; } + bool is_eager() const { return params_->is_eager; } + const OpKernel& op_kernel() const { return *params_->op_kernel; } // Input/output signature. ",0,train 8984bd30b49837893b95e44357264f5b4ee95118,tensorflow/tensorflow,"Only use cancellation_manager to cancel recv_op_ in eager mode. There is a race condition in TensorFlow 1.x if assigning a cancellation_manager to recv_op . Because both cancellation_manager.Cancel() and rendezvous::Abort() will be called if a session gets an error. PiperOrigin-RevId: 262415758",sendrecv_ops.cc,"@@ -169,7 +169,12 @@ void RecvOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) { Rendezvous::Args args; args.device_context = ctx->op_device_context(); args.alloc_attrs = ctx->output_alloc_attr(0); - args.cancellation_manager = ctx->cancellation_manager(); + if (ctx->is_eager()) { + // NOTE(fishx): Only set cancellation_manager in eager mode. Because in + // Tensorflow 1.x, session (or graph_mgr) will abort the underlying + // rendezvous if it encounters any error. + args.cancellation_manager = ctx->cancellation_manager(); + } FrameAndIter frame_iter = GetFrameAndIter(ctx, hostmem_sendrecv_); if (frame_iter == FrameAndIter(0, 0)) { ",0,train cb45cab0a2813c5b5d5f28bfd870897e521ca924,tensorflow/tensorflow,"Restore constness of kStatTypeStrMap PiperOrigin-RevId: 285455879 Change-Id: Ie4be8b1e001c3cda59057bb63cb865d22f5cd228",xplane_schema.cc,"@@ -22,7 +22,7 @@ const absl::string_view kHostThreads = ""Host Threads""; const int kNumStatTypes = static_cast(StatType::kHloModule) + 1; -static absl::string_view kStatTypeStrMap[kNumStatTypes] = { +static const absl::string_view kStatTypeStrMap[kNumStatTypes] = { ""unknown"", ""id"", ""parent_step_id"", ""function_step_id"", ""device_ordinal"", ""chip_ordinal"", ",0,train 0567f169e6910587e7dcca547aa5baeaaffbc03d,tensorflow/tensorflow,"Internal change. PiperOrigin-RevId: 191020351",values.py,"@@ -243,7 +243,7 @@ def _tensor_conversion(var, dtype=None, name=None, as_ref=False): ops.register_tensor_conversion_function(DistributedVariable, _tensor_conversion) -# TODO(josh11b): ops.register_dense_tensor_like_type(DistributedVariable)? +ops.register_dense_tensor_like_type(DistributedVariable) class _MirroredSaveable(saver.BaseSaverBuilder.ResourceVariableSaveable): ",0,test 5e3932ce780cb2eae1549bf93481a1d6d181f00b,tensorflow/tensorflow,"Disable a flaky test test_group_conv PiperOrigin-RevId: 314837253 Change-Id: I4deceead7a8a5b82a45b30025819a961dbdb5bb9",convolutional_test.py,"@@ -433,7 +433,7 @@ class GroupedConvTest(keras_parameterized.TestCase): ('Conv2D', keras.layers.Conv2D, (32, 12, 12, 32)), ('Conv3D', keras.layers.Conv3D, (32, 12, 12, 12, 32)), ) - def test_group_conv(self, layer_cls, input_shape): + def disable_test_group_conv(self, layer_cls, input_shape): if test.is_gpu_available(cuda_only=True): with test_util.use_gpu(): inputs = random_ops.random_uniform(shape=input_shape) ",0,train 47118ab5d64a0c8f93913294445424e2acb6c905,tensorflow/tensorflow,"Update keras dense layers to use embedding_lookup_sparse We are switching to this instead of sparse_tensor_dense_matmul, which has performance issue for large sparse tensor as it computes a dense gradient. PiperOrigin-RevId: 361029309 Change-Id: I213c46b6e482817801e6b0d1e306c757da8672ce",core_test.py,"@@ -26,6 +26,7 @@ from tensorflow.python import keras from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_spec from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import testing_utils @@ -33,6 +34,7 @@ from tensorflow.python.keras.layers import core from tensorflow.python.keras.mixed_precision import policy from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variables from tensorflow.python.ops.ragged import ragged_factory_ops from tensorflow.python.platform import test @@ -503,6 +505,36 @@ class CoreLayersTest(keras_parameterized.TestCase): testing_utils.layer_test( keras.layers.Dense, kwargs={'units': 3}, input_shape=(3, 4, 5, 2)) + def test_dense_output(self): + dense_inputs = ops.convert_to_tensor_v2_with_dispatch( + np.random.uniform(size=(10, 10)).astype('f')) + # Create some sparse data where multiple rows and columns are missing. + sparse_inputs = sparse_tensor.SparseTensor( + indices=np.random.randint(low=0, high=10, size=(5, 2)), + values=np.random.uniform(size=(5,)).astype('f'), + dense_shape=[10, 10]) + sparse_inputs = sparse_ops.sparse_reorder(sparse_inputs) + + layer = keras.layers.Dense( + 5, + kernel_initializer=keras.initializers.RandomUniform(), + bias_initializer=keras.initializers.RandomUniform(), + dtype='float32') + dense_outputs = layer(dense_inputs) + sparse_outpus = layer(sparse_inputs) + + expected_dense = math_ops.add( + math_ops.matmul(dense_inputs, keras.backend.get_value(layer.kernel)), + keras.backend.get_value(layer.bias)) + expected_sparse = math_ops.add( + math_ops.matmul( + sparse_ops.sparse_tensor_to_dense(sparse_inputs), + keras.backend.get_value(layer.kernel)), + keras.backend.get_value(layer.bias)) + + self.assertAllClose(dense_outputs, expected_dense) + self.assertAllClose(sparse_outpus, expected_sparse) + def test_dense_dtype(self): inputs = ops.convert_to_tensor_v2_with_dispatch( np.random.randint(low=0, high=7, size=(2, 2))) ",0,train 47118ab5d64a0c8f93913294445424e2acb6c905,tensorflow/tensorflow,"Update keras dense layers to use embedding_lookup_sparse We are switching to this instead of sparse_tensor_dense_matmul, which has performance issue for large sparse tensor as it computes a dense gradient. PiperOrigin-RevId: 361029309 Change-Id: I213c46b6e482817801e6b0d1e306c757da8672ce",core.py,"@@ -19,6 +19,7 @@ from __future__ import print_function from tensorflow.python.eager import context from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops @@ -47,8 +48,28 @@ def dense(inputs, kernel, bias=None, activation=None, dtype=None): rank = inputs.shape.rank if rank == 2 or rank is None: + # We use embedding_lookup_sparse as a more efficient matmul operation for + # large sparse input tensors. The op will result in a sparse gradient, as + # opposed to sparse_ops.sparse_tensor_dense_matmul which results in dense + # gradients. This can lead to sigfinicant speedups, see b/171762937. if isinstance(inputs, sparse_tensor.SparseTensor): - outputs = sparse_ops.sparse_tensor_dense_matmul(inputs, kernel) + # We need to fill empty rows, as the op assumes at least one id per row. + inputs, _ = sparse_ops.sparse_fill_empty_rows(inputs, 0) + # We need to do some munging of our input to use the embedding lookup as a + # matrix multiply. We split our input matrix into separate ids and weights + # tensors. The values of the ids tensor should be the column indices of + # our input matrix and the values of the weights tensor can continue to + # the actual matrix weights. The column arrangement of ids and weights + # will be summed over and does not matter. See the documentation for + # sparse_ops.sparse_tensor_dense_matmul a more detailed explanation of the + # inputs to both ops. + ids = sparse_tensor.SparseTensor( + indices=inputs.indices, + values=inputs.indices[:, 1], + dense_shape=inputs.dense_shape) + weights = inputs + outputs = embedding_ops.embedding_lookup_sparse_v2( + kernel, ids, weights, combiner=""sum"") else: outputs = gen_math_ops.MatMul(a=inputs, b=kernel) # Broadcast kernel to inputs. ",0,train 8f31716e68bb16ebc6a265b470297de695761882,tensorflow/tensorflow,"Added embedding learning rate multiplier support for DNN Classifier. Change: 140412953",dnn.py,"@@ -23,6 +23,7 @@ from tensorflow.contrib import layers from tensorflow.contrib.framework import deprecated from tensorflow.contrib.framework import deprecated_arg_values from tensorflow.contrib.framework.python.ops import variables as contrib_variables +from tensorflow.contrib.layers.python.layers import feature_column as feature_column_lib from tensorflow.contrib.layers.python.layers import optimizers from tensorflow.contrib.learn.python.learn import evaluable from tensorflow.contrib.learn.python.learn import monitors as monitor_lib @@ -34,6 +35,7 @@ from tensorflow.contrib.learn.python.learn.estimators import model_fn from tensorflow.contrib.learn.python.learn.estimators import prediction_key from tensorflow.contrib.learn.python.learn.utils import export from tensorflow.python import summary +from tensorflow.python.framework import ops from tensorflow.python.ops import nn from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import variable_scope @@ -64,6 +66,31 @@ def _add_hidden_layer_summary(value, tag): summary.histogram(""%s_activation"" % tag, value) +def _get_embedding_variable(column, collection_key, input_layer_scope): + return ops.get_collection(collection_key, + input_layer_scope + ""/"" + column.name) + + +def _extract_embedding_lr_multipliers(embedding_lr_multipliers, collection_key, + input_layer_scope): + """"""Convert embedding lr multipliers to variable based gradient multiplier."""""" + if not embedding_lr_multipliers: + return None + gradient_multipliers = {} + for column, lr_mult in embedding_lr_multipliers.items(): + if not isinstance(column, feature_column_lib._EmbeddingColumn): # pylint: disable=protected-access + raise ValueError( + ""learning rate multipler can be defined for embedding columns. "" + ""It is defined for {}"".format(column)) + embedding = _get_embedding_variable( + column, collection_key, input_layer_scope) + if not embedding: + raise ValueError(""Couldn't find a variable for column {}"".format(column)) + for v in embedding: + gradient_multipliers[v] = lr_mult + return gradient_multipliers + + def _dnn_model_fn(features, labels, mode, params): """"""Deep Neural Net model_fn. @@ -89,6 +116,9 @@ def _dnn_model_fn(features, labels, mode, params): * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * num_ps_replicas: The number of parameter server replicas. + * embedding_lr_multipliers: Optional. A dictionary from + `EbeddingColumn` to a `float` multiplier. Multiplier will be used to + multiply with learning rate for the embedding variables. Returns: predictions: A dict of `Tensor` objects. @@ -103,6 +133,7 @@ def _dnn_model_fn(features, labels, mode, params): dropout = params.get(""dropout"") gradient_clip_norm = params.get(""gradient_clip_norm"") num_ps_replicas = params.get(""num_ps_replicas"", 0) + embedding_lr_multipliers = params.get(""embedding_lr_multipliers"", {}) features = _get_feature_dict(features) parent_scope = ""dnn"" @@ -111,8 +142,9 @@ def _dnn_model_fn(features, labels, mode, params): partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) + input_layer_scope = parent_scope + ""/input_from_feature_columns"" with variable_scope.variable_scope( - parent_scope + ""/input_from_feature_columns"", + input_layer_scope, values=features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( @@ -160,6 +192,8 @@ def _dnn_model_fn(features, labels, mode, params): global_step=contrib_variables.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), + gradient_multipliers=_extract_embedding_lr_multipliers( + embedding_lr_multipliers, parent_scope, input_layer_scope), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging the training_loss. @@ -234,7 +268,8 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable): gradient_clip_norm=None, enable_centered_bias=False, config=None, - feature_engineering_fn=None): + feature_engineering_fn=None, + embedding_lr_multipliers=None): """"""Initializes a DNNClassifier instance. Args: @@ -271,6 +306,9 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable): labels which are the output of `input_fn` and returns features and labels which will be fed into the model. + embedding_lr_multipliers: Optional. A dictionary from `EbeddingColumn` to + a `float` multiplier. Multiplier will be used to multiply with + learning rate for the embedding variables. Returns: A `DNNClassifier` estimator. @@ -287,17 +325,27 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable): model_dir=model_dir, config=config, params={ - ""head"": head_lib._multi_class_head( # pylint: disable=protected-access - n_classes, - weight_column_name=weight_column_name, - enable_centered_bias=enable_centered_bias), - ""hidden_units"": hidden_units, - ""feature_columns"": feature_columns, - ""optimizer"": optimizer, - ""activation_fn"": activation_fn, - ""dropout"": dropout, - ""gradient_clip_norm"": gradient_clip_norm, - ""num_ps_replicas"": config.num_ps_replicas if config else 0, + ""head"": + head_lib._multi_class_head( # pylint: disable=protected-access + n_classes, + weight_column_name=weight_column_name, + enable_centered_bias=enable_centered_bias), + ""hidden_units"": + hidden_units, + ""feature_columns"": + feature_columns, + ""optimizer"": + optimizer, + ""activation_fn"": + activation_fn, + ""dropout"": + dropout, + ""gradient_clip_norm"": + gradient_clip_norm, + ""num_ps_replicas"": + config.num_ps_replicas if config else 0, + ""embedding_lr_multipliers"": + embedding_lr_multipliers, }, feature_engineering_fn=feature_engineering_fn) ",0,train 8f31716e68bb16ebc6a265b470297de695761882,tensorflow/tensorflow,"Added embedding learning rate multiplier support for DNN Classifier. Change: 140412953",dnn_test.py,"@@ -27,12 +27,88 @@ import numpy as np import tensorflow as tf from tensorflow.contrib.learn.python.learn.estimators import _sklearn +from tensorflow.contrib.learn.python.learn.estimators import dnn from tensorflow.contrib.learn.python.learn.estimators import estimator_test_utils +from tensorflow.contrib.learn.python.learn.estimators import head as head_lib from tensorflow.contrib.learn.python.learn.estimators import test_data from tensorflow.contrib.learn.python.learn.metric_spec import MetricSpec from tensorflow.python.ops import math_ops +class EmbeddingMultiplierTest(tf.test.TestCase): + """"""dnn_model_fn tests."""""" + + def testRaisesNonEmbeddingColumn(self): + one_hot_language = tf.contrib.layers.one_hot_column( + tf.contrib.layers.sparse_column_with_hash_bucket('language', 10)) + + params = { + 'feature_columns': [one_hot_language], + 'head': head_lib._multi_class_head(2), + 'hidden_units': [1], + # Set lr mult to 0. to keep embeddings constant. + 'embedding_lr_multipliers': { + one_hot_language: 0.0 + }, + } + features = { + 'language': + tf.SparseTensor( + values=['en', 'fr', 'zh'], + indices=[[0, 0], [1, 0], [2, 0]], + shape=[3, 1]), + } + labels = tf.constant([[0], [0], [0]], dtype=tf.int32) + with self.assertRaisesRegexp( + ValueError, 'can be defined for embedding columns'): + dnn._dnn_model_fn(features, labels, + tf.contrib.learn.ModeKeys.TRAIN, params) + + def testMultipliesGradient(self): + embedding_language = tf.contrib.layers.embedding_column( + tf.contrib.layers.sparse_column_with_hash_bucket('language', 10), + dimension=1, initializer=tf.constant_initializer(0.1)) + embedding_wire = tf.contrib.layers.embedding_column( + tf.contrib.layers.sparse_column_with_hash_bucket('wire', 10), + dimension=1, initializer=tf.constant_initializer(0.1)) + + params = { + 'feature_columns': [embedding_language, embedding_wire], + 'head': head_lib._multi_class_head(2), + 'hidden_units': [1], + # Set lr mult to 0. to keep embeddings constant. + 'embedding_lr_multipliers': { + embedding_language: 0.0 + }, + } + features = { + 'language': + tf.SparseTensor( + values=['en', 'fr', 'zh'], + indices=[[0, 0], [1, 0], [2, 0]], + shape=[3, 1]), + 'wire': + tf.SparseTensor( + values=['omar', 'stringer', 'marlo'], + indices=[[0, 0], [1, 0], [2, 0]], + shape=[3, 1]), + } + labels = tf.constant([[0], [0], [0]], dtype=tf.int32) + model_ops = dnn._dnn_model_fn(features, labels, + tf.contrib.learn.ModeKeys.TRAIN, params) + with tf.train.MonitoredSession() as sess: + language_var = dnn._get_embedding_variable( + embedding_language, 'dnn', 'dnn/input_from_feature_columns') + wire_var = dnn._get_embedding_variable( + embedding_wire, 'dnn', 'dnn/input_from_feature_columns') + for _ in range(2): + _, language_value, wire_value = sess.run( + [model_ops.train_op, language_var, wire_var]) + initial_value = np.full_like(language_value, 0.1) + self.assertTrue(np.all(np.isclose(language_value, initial_value))) + self.assertFalse(np.all(np.isclose(wire_value, initial_value))) + + class DNNClassifierTest(tf.test.TestCase): def _assertInRange(self, expected_min, expected_max, actual): @@ -118,10 +194,10 @@ class DNNClassifierTest(tf.test.TestCase): classifier = tf.contrib.learn.DNNClassifier( n_classes=2, feature_columns=feature_columns, - hidden_units=[3, 3], + hidden_units=[10, 10], config=tf.contrib.learn.RunConfig(tf_random_seed=1)) - classifier.fit(input_fn=_input_fn, steps=5) + classifier.fit(input_fn=_input_fn, steps=50) scores = classifier.evaluate(input_fn=_input_fn, steps=1) self._assertInRange(0.0, 1.0, scores['accuracy']) @@ -222,7 +298,7 @@ class DNNClassifierTest(tf.test.TestCase): n_classes=3, feature_columns=feature_columns, hidden_units=[3, 3], - config=tf.contrib.learn.RunConfig(tf_random_seed=3)) + config=tf.contrib.learn.RunConfig(tf_random_seed=1)) classifier.fit(x=train_x, y=train_y, steps=200) scores = classifier.evaluate(x=train_x, y=train_y, steps=1) @@ -310,7 +386,7 @@ class DNNClassifierTest(tf.test.TestCase): weight_column_name='w', feature_columns=[tf.contrib.layers.real_valued_column('x')], hidden_units=[3, 3], - config=tf.contrib.learn.RunConfig(tf_random_seed=3)) + config=tf.contrib.learn.RunConfig(tf_random_seed=1)) classifier.fit(input_fn=_input_fn_train, steps=5) scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1) @@ -339,8 +415,8 @@ class DNNClassifierTest(tf.test.TestCase): classifier = tf.contrib.learn.DNNClassifier( n_classes=3, feature_columns=feature_columns, - hidden_units=[3, 3], - config=tf.contrib.learn.RunConfig(tf_random_seed=3)) + hidden_units=[10, 10], + config=tf.contrib.learn.RunConfig(tf_random_seed=1)) classifier.fit(input_fn=_input_fn, steps=100) @@ -524,7 +600,7 @@ class DNNClassifierTest(tf.test.TestCase): } with tf.test.mock.patch.dict('os.environ', {'TF_CONFIG': json.dumps(tf_config)}): - config = tf.contrib.learn.RunConfig(tf_random_seed=5) + config = tf.contrib.learn.RunConfig(tf_random_seed=1) # Because we did not start a distributed cluster, we need to pass an # empty ClusterSpec, otherwise the device_setter will look for # distributed jobs, such as ""/job:ps"" which are not present. @@ -707,7 +783,7 @@ class DNNRegressorTest(tf.test.TestCase): regressor = tf.contrib.learn.DNNRegressor( feature_columns=[tf.contrib.layers.real_valued_column('x')], hidden_units=[3, 3], - config=tf.contrib.learn.RunConfig(tf_random_seed=3)) + config=tf.contrib.learn.RunConfig(tf_random_seed=1)) regressor.fit(input_fn=_input_fn_train, steps=5) scores = regressor.evaluate(input_fn=_input_fn_train, steps=1) @@ -772,7 +848,7 @@ class DNNRegressorTest(tf.test.TestCase): weight_column_name='w', feature_columns=[tf.contrib.layers.real_valued_column('x')], hidden_units=[3, 3], - config=tf.contrib.learn.RunConfig(tf_random_seed=3)) + config=tf.contrib.learn.RunConfig(tf_random_seed=1)) regressor.fit(input_fn=_input_fn_train, steps=5) scores = regressor.evaluate(input_fn=_input_fn_eval, steps=1) @@ -803,7 +879,7 @@ class DNNRegressorTest(tf.test.TestCase): regressor = tf.contrib.learn.DNNRegressor( feature_columns=feature_columns, hidden_units=[3, 3], - config=tf.contrib.learn.RunConfig(tf_random_seed=3)) + config=tf.contrib.learn.RunConfig(tf_random_seed=1)) regressor.fit(input_fn=_input_fn, steps=200) @@ -837,7 +913,7 @@ class DNNRegressorTest(tf.test.TestCase): regressor = tf.contrib.learn.DNNRegressor( feature_columns=feature_columns, hidden_units=[3, 3], - config=tf.contrib.learn.RunConfig(tf_random_seed=3)) + config=tf.contrib.learn.RunConfig(tf_random_seed=1)) regressor.fit(input_fn=_input_fn, steps=200) @@ -918,7 +994,7 @@ class DNNRegressorTest(tf.test.TestCase): model_dir=model_dir, feature_columns=feature_columns, hidden_units=[3, 3], - config=tf.contrib.learn.RunConfig(tf_random_seed=3)) + config=tf.contrib.learn.RunConfig(tf_random_seed=1)) regressor.fit(input_fn=_input_fn, steps=5) predict_input_fn = functools.partial(_input_fn, num_epochs=1) @@ -929,7 +1005,7 @@ class DNNRegressorTest(tf.test.TestCase): model_dir=model_dir, feature_columns=feature_columns, hidden_units=[3, 3], - config=tf.contrib.learn.RunConfig(tf_random_seed=3)) + config=tf.contrib.learn.RunConfig(tf_random_seed=1)) predictions2 = list(regressor2.predict(input_fn=predict_input_fn)) self.assertAllClose(predictions, predictions2) @@ -1004,7 +1080,7 @@ class DNNRegressorTest(tf.test.TestCase): feature_columns=feature_columns, hidden_units=[3, 3], enable_centered_bias=True, - config=tf.contrib.learn.RunConfig(tf_random_seed=3)) + config=tf.contrib.learn.RunConfig(tf_random_seed=1)) regressor.fit(input_fn=_input_fn, steps=5) self.assertIn('centered_bias_weight', regressor.get_variable_names()) @@ -1037,7 +1113,7 @@ class DNNRegressorTest(tf.test.TestCase): feature_columns=feature_columns, hidden_units=[3, 3], enable_centered_bias=False, - config=tf.contrib.learn.RunConfig(tf_random_seed=3)) + config=tf.contrib.learn.RunConfig(tf_random_seed=1)) regressor.fit(input_fn=_input_fn, steps=5) self.assertNotIn('centered_bias_weight', regressor.get_variable_names()) ",0,train f54e6f114d5180ce36a9e57ab0b4d8485ee81544,tensorflow/tensorflow,"Fix bug in function conversion that caused errors in the case of nested function calls. PiperOrigin-RevId: 275230384 Change-Id: I5ab3d2e869d6b645502fbf288b245c8df45879db",call_trees.py,"@@ -62,7 +62,8 @@ class _ArgTemplateBuilder(object): def _consume_args(self): if self._arg_accumulator: - self._argspec.append(gast.Tuple(elts=self._arg_accumulator, ctx=None)) + self._argspec.append( + gast.Tuple(elts=self._arg_accumulator, ctx=gast.Load())) self._arg_accumulator = [] def add_arg(self, a): @@ -84,7 +85,7 @@ class _ArgTemplateBuilder(object): for i in range(1, len(self._argspec)): result = gast.BinOp(result, gast.Add(), self._argspec[i]) return result - return gast.Tuple([], None) + return gast.Tuple([], gast.Load()) class CallTreeTransformer(converter.Base): ",0,test f54e6f114d5180ce36a9e57ab0b4d8485ee81544,tensorflow/tensorflow,"Fix bug in function conversion that caused errors in the case of nested function calls. PiperOrigin-RevId: 275230384 Change-Id: I5ab3d2e869d6b645502fbf288b245c8df45879db",call_trees_test.py,"@@ -29,7 +29,7 @@ from tensorflow.python.platform import test class CallTreesTest(converter_testing.TestCase): - def test_normal_function(self): + def test_function_no_args(self): def test_fn(f): return f() + 20 @@ -80,6 +80,24 @@ class CallTreesTest(converter_testing.TestCase): ((20,), None), ]) + def test_function_with_single_arg(self): + + def test_fn(f, a): + return f(a) + 20 + + with self.converted(test_fn, (function_scopes, call_trees), {}) as result: + self.assertEqual(result.test_fn(lambda a: a, 1), 21) + self.assertListEqual(self.dynamic_calls, [((1,), None)]) + + def test_function_with_args_only(self): + + def test_fn(f, a, b): + return f(a, b) + 300 + + with self.converted(test_fn, (function_scopes, call_trees), {}) as result: + self.assertEqual(result.test_fn(lambda a, b: a + b, 1, 20), 321) + self.assertListEqual(self.dynamic_calls, [((1, 20), None)]) + def test_function_with_kwarg(self): def test_fn(f, a, b): @@ -159,6 +177,20 @@ class CallTreesTest(converter_testing.TestCase): # 'e': 5 # })]) + def test_function_with_call_in_lambda_argument(self): + + def f(l, a): + return l(a) + 4000 + + def g(a, *args): + return a + sum(args) + + def test_fn(f, g, a, *args): + return f(lambda x: g(x, *args), a) + + with self.converted(test_fn, (function_scopes, call_trees), {}) as result: + self.assertEqual(result.test_fn(f, g, 1, *(20, 300)), 4321) + def test_debugger_set_trace(self): tracking_list = [] ",0,test 50736c76221ca0d28356f475442b8543e7505250,tensorflow/tensorflow,"Allow 64bit ids for TPU embedding workloads. PiperOrigin-RevId: 426991414 Change-Id: I2833279c932ccee5e71713993882c9233c980e60",tpu_embedding_v2.py,"@@ -912,7 +912,7 @@ class TPUEmbedding(tracking.AutoTrackable): ""Weight will always be 1 in this case."".format(path)) # For tensors, there are no indices and no weights. indices.append(int_zeros) - values.append(math_ops.cast(array_ops.reshape(tensor, [-1]), dtypes.int32)) + values.append(math_ops.cast(array_ops.reshape(tensor, [-1]), dtypes.int64)) weights.append(float_zeros) def _add_data_for_sparse_tensor(self, tensor, weight, indices, values, @@ -925,7 +925,7 @@ class TPUEmbedding(tracking.AutoTrackable): sample_indices = array_ops.pad( sample_indices, paddings=[[0, 0], [0, 1]]) indices.append(sample_indices) - values.append(math_ops.cast(tensor.values, dtypes.int32)) + values.append(math_ops.cast(tensor.values, dtypes.int64)) # If we have weights they must be a SparseTensor. if weight is not None: if not isinstance(weight, sparse_tensor.SparseTensor): @@ -940,7 +940,7 @@ class TPUEmbedding(tracking.AutoTrackable): weights, int_zeros, float_zeros, path, feature): row_splits.append(math_ops.cast(tensor.row_splits, dtypes.int32)) - values.append(math_ops.cast(tensor.values, dtypes.int32)) + values.append(math_ops.cast(tensor.values, dtypes.int64)) # If we have weights they must be a RaggedTensor. if weight is not None: if not isinstance(weight, ragged_tensor.RaggedTensor): ",0,train bbb3ae0790f042d2bc5f6cce434c75c698d4a978,tensorflow/tensorflow,"Automated rollback of commit 394db95965e1d745f08b4eeb550878ddc175af15 PiperOrigin-RevId: 209082119",quantize.py,"@@ -198,7 +198,7 @@ def _FindLayersToQuantize(graph): | [post_conv_correction] | - biasadd|folded_bias + [biasadd|folded_bias] | [bypass] | @@ -320,6 +320,7 @@ def _FindLayersToQuantize(graph): folded_bias_add_pattern, batch_norm_identity, bypass_pattern, + layer_pattern, ]) ]) ",0,train bbb3ae0790f042d2bc5f6cce434c75c698d4a978,tensorflow/tensorflow,"Automated rollback of commit 394db95965e1d745f08b4eeb550878ddc175af15 PiperOrigin-RevId: 209082119",quantize_test.py,"@@ -194,6 +194,33 @@ class QuantizeTest(test_util.TensorFlowTestCase): self.assertNotIn('test/relu6', [c.name for c in consumers]) + def testLayerActivationQuantized(self): + self._RunTestOverParameters(self._TestLayerActivationQuantized) + + def _TestLayerActivationQuantized(self, is_training): + graph = ops.Graph() + with graph.as_default(): + batch_size, height, width, depth = 5, 128, 128, 3 + input1 = array_ops.zeros((batch_size, height, width, depth)) + _ = conv2d( + input1, + 32, [5, 5], + stride=2, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=nn_ops.relu6, + biases_initializer=None, + scope='test') + # Ensure that both weights and output of activations are quantized + # when we have a conv->relu6 with no bias add + quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8) + activation_op = graph.get_operation_by_name('test/Relu6') + conv_op = graph.get_operation_by_name('test/Conv2D') + self.assertTrue('test/weights_quant/FakeQuantWithMinMaxVars:0' in + [tensor_in.name for tensor_in in conv_op.inputs]) + self.assertTrue('FakeQuantWithMinMaxVars' in + [op.type for op in activation_op.outputs[0].consumers()]) + def testFinalLayerQuantized(self): self._RunTestOverParameters(self._TestFinalLayerQuantized) ",0,train 9c6164dbcbaa4441c177653ac6ca51133c6363be,tensorflow/tensorflow,Optimize dependencies in expected graph in tests to emulate dependency optimizer + avoid constant duplicates in tests as they get optimized,base_test.py,"@@ -129,14 +129,14 @@ class SimpleMultiEnginesTest2(trt_test.TfTrtIntegrationTestBase): """"""Create a graph containing two segments."""""" n = inp for i in range(2): - c = constant_op.constant(1.0, name=""c%d"" % i) + c = constant_op.constant(float(i), name=""c%d"" % i) n = math_ops.add(n, c, name=""add%d"" % i) n = math_ops.mul(n, n, name=""mul%d"" % i) n = self.trt_incompatible_op(n, name=""incompatible"") - c = constant_op.constant(1.0, name=""c2"") + c = constant_op.constant(2.0, name=""c2"") n = math_ops.add(n, c, name=""add2"") n = math_ops.mul(n, n, name=""mul2"") - c = constant_op.constant(1.0, name=""c3"") + c = constant_op.constant(3.0, name=""c3"") n = math_ops.add(n, c, name=""add3"") n = math_ops.mul(n, n, name=""mul3"") return array_ops.squeeze(n, name=""output_0"") @@ -169,21 +169,18 @@ class ConstInputTest(trt_test.TfTrtIntegrationTestBase): """"""Create a graph containing multiple segment."""""" n = inp c = constant_op.constant(1.0, name=""c"") - # Adds control dependency from the constant op to a trt incompatible op, - # and adds control dependency from the trt incompatible op to all other + # Adds data dependency from the constant op to a trt incompatible op, + # and adds data dependency from the trt incompatible op to the other # ops, to make sure the constant op cannot be contracted with any trt # segment that depends on it. - with ops.control_dependencies([c]): - d = self.trt_incompatible_op(n, name=""incompatible"") - with ops.control_dependencies([d]): - n = math_ops.add(n, c, name=""add"") - n = math_ops.mul(n, n, name=""mul"") - n = math_ops.add(n, n, name=""add1"") + n = self.trt_incompatible_binary_op(n, c, name=""incompatible"") + n = math_ops.add(n, c, name=""add"") + n = math_ops.mul(n, n, name=""mul"") + n = math_ops.add(n, n, name=""add1"") n = self.trt_incompatible_op(n, name=""incompatible1"") - with ops.control_dependencies([d]): - n = math_ops.add(n, c, name=""add2"") - n = math_ops.mul(n, n, name=""mul1"") - n = math_ops.add(n, n, name=""add3"") + n = math_ops.add(n, c, name=""add2"") + n = math_ops.mul(n, n, name=""mul1"") + n = math_ops.add(n, n, name=""add3"") return array_ops.squeeze(n, name=""output_0"") def GetParams(self): @@ -255,25 +252,21 @@ class ConstDataInputMultipleEnginesTest(trt_test.TfTrtIntegrationTestBase): class ControlDependencyTest(trt_test.TfTrtIntegrationTestBase): def GraphFn(self, inp): - """"""Create a graph containing multiple segment."""""" + """"""Create a graph containing multiple segments."""""" c1 = constant_op.constant(1.0, name=""c1"") - c2 = constant_op.constant(1.0, name=""c2"") - d1 = constant_op.constant(1.0, name=""d1"") - d2 = self.trt_incompatible_op(inp, name=""d2"") - with ops.control_dependencies([d1, d2]): + c2 = constant_op.constant(2.0, name=""c2"") + d1 = self.trt_incompatible_op(inp, name=""d1"") + with ops.control_dependencies([d1]): add = math_ops.add(inp, c1, name=""add"") - with ops.control_dependencies([d1, d2]): - mul = math_ops.mul(add, add, name=""mul"") - with ops.control_dependencies([d1, d2]): - add1 = math_ops.add(mul, mul, name=""add1"") + mul = math_ops.mul(add, add, name=""mul"") + add1 = math_ops.add(mul, mul, name=""add1"") edge = self.trt_incompatible_op(add1, name=""incompatible"") - with ops.control_dependencies([d1, d2, add, mul]): + with ops.control_dependencies([d1, add1]): add2 = math_ops.add(edge, c2, name=""add2"") - with ops.control_dependencies([d1, d2, add1, mul]): - mul1 = math_ops.mul(add2, add2, name=""mul1"") - with ops.control_dependencies([d1, d2, add, add1]): - add3 = math_ops.add(mul1, mul1, name=""add3"") - return array_ops.squeeze(add3, name=""output_0"") + mul1 = math_ops.mul(add2, add2, name=""mul1"") + add3 = math_ops.add(mul1, mul1, name=""add3"") + inc = self.trt_incompatible_binary_op(d1, add3, name=""incompatible1"") + return array_ops.squeeze(inc, name=""output_0"") def GetParams(self): shapes = [[2, 32, 32, 3]] ",0,test 9c6164dbcbaa4441c177653ac6ca51133c6363be,tensorflow/tensorflow,Optimize dependencies in expected graph in tests to emulate dependency optimizer + avoid constant duplicates in tests as they get optimized,tf_trt_integration_test_base.py,"@@ -15,6 +15,7 @@ """"""Utilities to test TF-TensorRT integration."""""" import collections +import copy import errno import gc import itertools @@ -117,6 +118,10 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): def trt_incompatible_op(self): return math_ops.erfc + @property + def trt_incompatible_binary_op(self): + return math_ops.igamma + @property def precision_modes(self): return [""FP32"", ""FP16"", ""INT8""] @@ -625,6 +630,59 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): if k not in removed_const_nodes } + def dependency_optimization(input_map): + """"""Transitive reduction of the control dependencies."""""" + # 1. Topological sort. + working_edges = copy.deepcopy(input_map) + # Populate a set with all the nodes wiithout incoming edges. + working_set = { + name for name in working_edges if len(working_edges[name]) == 0} + sorted_nodes = [] + while working_set: + # Take a node from the set and add it to the sorted list. + node0 = working_set.pop() + sorted_nodes.append(node0) + # Remove outgoing edges and add nodes to the set if they have no + # incoming edge remaining. + for node1 in list(working_edges.keys()): + for edge_name in (node0, ""^"" + node0): + if edge_name in working_edges[node1]: + working_edges[node1].remove(edge_name) + if not working_edges[node1]: + working_set.add(node1) + if sum(len(edges) for edges in working_edges.values()): + raise ValueError(""Input map doesn't represent a DAG!"") + + # 2. Transitive reduction. + for i in range(len(sorted_nodes) - 1): + dep_name = ""^"" + sorted_nodes[i] + # Identify nodes which have a control edge from the current one. + targets = [ + j for j in range(i + 1, len(sorted_nodes)) + if dep_name in input_map[sorted_nodes[j]]] + if not targets: + continue + # Compute max path lengths until the last target node. + path_lengths = {sorted_nodes[i]: 0} + for j in range(i + 1, targets[-1] + 1): + j_name = sorted_nodes[j] + length = None + for edge_name in input_map[j_name]: + _, name = _InputName(edge_name) + if name in path_lengths and \ + (length is None or path_lengths[name] >= length): + length = path_lengths[name] + 1 + if length is not None: + path_lengths[j_name] = length + # Remove the control dependency of targets if there is a path of + # length strictly greater than 1 from the current node. + for j in targets: + j_name = sorted_nodes[j] + if path_lengths[j_name] > 1: + input_map[j_name].remove(dep_name) + + dependency_optimization(expected_input_map) + # Compute the actual mapping from each node to its input nodes. If a cast # op doesn't exist in the original graph, we replace the use of the cast op # with the input of the op. This allows the verification to handle the case ",0,test f29ef287148d2912e941c70891866d57de2fae04,tensorflow/tensorflow,"Fix crash in tf.gather gradient when indices rank was unknown. A crash will still occur if the rank is unknown and batch_dims < 0, since this case is hard to fix, but a sensible error message was added. This fixes a crash in sparse_softmax_cross_entropy_with_logits when determinism is enabled, as that function uses gather with batch_dims >= 0 when determinism is enabled. PiperOrigin-RevId: 398762499 Change-Id: I149567e53a5fcfbc69ab4bba35a73ca0ae81e967",gather_op_test.py,"@@ -31,7 +31,9 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradient_checker_v2 from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -478,6 +480,32 @@ class GatherTest(test.TestCase, parameterized.TestCase): result = array_ops.gather(params, indices, axis=axis, batch_dims=batch_dims) self.assertAllEqual(expected, result) + # Test gradients + f64_params = math_ops.cast(params, dtypes.float64) + def gather(params): + return array_ops.gather(params, indices, axis=axis, batch_dims=batch_dims) + theoretical, numerical = gradient_checker_v2.compute_gradient( + gather, [f64_params]) + self.assertAllClose(theoretical, numerical) + + # Test gradients when input shapes are unknown + @def_function.function(input_signature=[ + tensor_spec.TensorSpec(shape=None, dtype=dtypes.float64), + tensor_spec.TensorSpec(shape=None, dtype=dtypes.int32) + ]) + def gather_unknown_shapes(params, indices): + return array_ops.gather(params, indices, axis=axis, batch_dims=batch_dims) + if batch_dims is None or batch_dims >= 0: + theoretical, numerical = gradient_checker_v2.compute_gradient( + lambda p: gather_unknown_shapes(p, indices), [f64_params]) + self.assertAllClose(theoretical, numerical) + else: + with self.assertRaisesRegex( + ValueError, + ""Currently, it is unsupported to take the gradient of tf.gather""): + gradient_checker_v2.compute_gradient( + lambda p: gather_unknown_shapes(p, indices), [f64_params]) + # Test the gradients shape. with backprop.GradientTape() as tape: zeros = array_ops.zeros_like(params, dtype=dtypes.float32) ",0,test f29ef287148d2912e941c70891866d57de2fae04,tensorflow/tensorflow,"Fix crash in tf.gather gradient when indices rank was unknown. A crash will still occur if the rank is unknown and batch_dims < 0, since this case is hard to fix, but a sensible error message was added. This fixes a crash in sparse_softmax_cross_entropy_with_logits when determinism is enabled, as that function uses gather with batch_dims >= 0 when determinism is enabled. PiperOrigin-RevId: 398762499 Change-Id: I149567e53a5fcfbc69ab4bba35a73ca0ae81e967",array_grad.py,"@@ -586,7 +586,6 @@ def _GatherGrad(op, grad): def _GetBatchIndices(params_shape, indices, batch_dims): """"""Addds the batch offsets to the given indices and returns the results."""""" batch_indices = indices - indices_ndims = indices.shape.ndims indices_dtype = indices.dtype.base_dtype casted_params_shape = math_ops.cast(params_shape, indices_dtype) accum_dim_value = array_ops.ones((), dtype=indices_dtype) @@ -597,8 +596,10 @@ def _GetBatchIndices(params_shape, indices, batch_dims): step = array_ops.ones((), dtype=indices_dtype) dim_indices = math_ops.range(start, dim_value, step) dim_indices *= accum_dim_value - dim_shape = array_ops.stack( - [1] * (dim - 1) + [dim_value] + [1] * (indices_ndims - dim), axis=0) + dim_shape = array_ops.concat([ + array_ops.tile([1], [dim - 1]), [dim_value], + array_ops.tile([1], [array_ops.rank(indices) - dim]) + ], axis=0) batch_indices += array_ops.reshape(dim_indices, dim_shape) return batch_indices @@ -655,6 +656,13 @@ def _GatherV2Grad(op, grad): batch_dims = int(op.get_attr(""batch_dims"")) if batch_dims < 0: + if indices.shape.ndims is None: + raise ValueError( + f""Currently, it is unsupported to take the gradient of tf.gather "" + f""when batch_dims < 0 and the rank of the indices is unknown. Please "" + f""pass a positive batch_dims or use tf.ensure_shape to update the "" + f""shape of indices when calling tf.gather. Got "" + f""batch_dims={batch_dims} and indices={indices}"") batch_dims += indices.shape.ndims # For axis 0 gathers, build an appropriately shaped IndexedSlices. ",0,test 85011622c0df59cbd03bd7d4e035d6c3521832dd,tensorflow/tensorflow,"Added more fine-grained shape inference for TensorArray such that partly unknown shapes are supported. Change: 143288671",tensor_array_ops_test.py,"@@ -960,6 +960,46 @@ class TensorArrayTest(test.TestCase): with self.assertRaises(ValueError): w0.write(0, c2) + def testPartlyUnknownShape(self): + with self.test_session(): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, tensor_array_name=""foo"", size=6) + + c0 = array_ops.placeholder(dtypes.float32, [None, None, None, 3]) + w0 = ta.write(0, c0) + r0 = w0.read(0) + self.assertAllEqual([None, None, None, 3], r0.get_shape().as_list()) + + c1 = array_ops.placeholder(dtypes.float32, [None, None, None, 3]) + w1 = w0.write(1, c1) + r1 = w1.read(0) + self.assertAllEqual([None, None, None, 3], r1.get_shape().as_list()) + + # Writing less specific shape (doesn't change type.) + c2 = array_ops.placeholder(dtypes.float32, [None, None, None, None]) + w2 = w1.write(2, c2) + r2 = w2.read(0) + self.assertAllEqual([None, None, None, 3], r2.get_shape().as_list()) + + # Writing more specific shape in one dimension and less specific in + # another. + c3 = array_ops.placeholder(dtypes.float32, [None, None, 2, None]) + w3 = w2.write(3, c3) + r3 = w3.read(0) + self.assertAllEqual([None, None, 2, 3], r3.get_shape().as_list()) + + # Writing partly defined shape using TensorArray.scatter. + c4 = array_ops.placeholder(dtypes.float32, [2, None, 4, 2, 3]) + w4 = w3.scatter([4, 5], c4) + r4 = w4.read(0) + self.assertAllEqual([None, 4, 2, 3], r4.get_shape().as_list()) + + # Writing fully defined shape using TensorArray.split. + c5 = array_ops.placeholder(dtypes.float32, [10, 4, 2, 3]) + w5 = w4.split(c5, constant_op.constant([5, 5])) + r5 = w5.read(0) + self.assertAllEqual([5, 4, 2, 3], r5.get_shape().as_list()) + def _testUnpackShape(self): with self.test_session(): ta = tensor_array_ops.TensorArray( ",0,train 85011622c0df59cbd03bd7d4e035d6c3521832dd,tensorflow/tensorflow,"Added more fine-grained shape inference for TensorArray such that partly unknown shapes are supported. Change: 143288671",tensor_array_ops.py,"@@ -196,6 +196,26 @@ class TensorArray(object): """"""The reference to the TensorArray."""""" return self._handle + def _merge_element_shape(self, shape): + """"""Changes the element shape of the array given a shape to merge with. + + Args: + shape: A `TensorShape` object to merge with. + + Raises: + ValueError: if the provided shape is incompatible with the current + element shape of the `TensorArray`. + """""" + + if self._element_shape: + if not shape.is_compatible_with(self._element_shape[0]): + raise ValueError( + ""Inconsistent shapes: saw %s but expected %s "" + ""(and infer_shape=True)"" % (shape, self._element_shape[0])) + self._element_shape[0] = self._element_shape[0].merge_with(shape) + else: + self._element_shape.append(shape) + def grad(self, source, flow=None, name=None): # tensor_array_grad requires a flow input when forward # TensorArrays are dynamically sized. This forces the creation @@ -267,14 +287,7 @@ class TensorArray(object): ta._infer_shape = self._infer_shape ta._element_shape = self._element_shape if ta._infer_shape: - val_shape = value.get_shape() - if ta._element_shape: - if not val_shape == ta._element_shape[0]: - raise ValueError(""Inconsistent shapes: saw %s but expected %s "" - ""(and infer_shape=True)"" % - (val_shape, ta._element_shape[0])) - else: - ta._element_shape.append(val_shape) + ta._merge_element_shape(value.get_shape()) return ta def stack(self, name=None): @@ -423,13 +436,7 @@ class TensorArray(object): element_shape = tensor_shape.unknown_shape() if val_shape.dims is not None: element_shape = tensor_shape.TensorShape(val_shape.dims[1:]) - if ta._element_shape: - if not element_shape == ta._element_shape[0]: - raise ValueError(""Inconsistent shapes: saw %s but expected %s "" - ""(and infer_shape=True)"" % - (element_shape, ta._element_shape[0])) - else: - ta._element_shape.append(element_shape) + ta._merge_element_shape(element_shape) return ta def split(self, value, lengths, name=None): @@ -471,13 +478,7 @@ class TensorArray(object): if clengths is not None and clengths.max() == clengths.min(): element_shape = tensor_shape.TensorShape([clengths[0]] + val_shape.dims[1:]) - if ta._element_shape: - if not element_shape == ta._element_shape[0]: - raise ValueError(""Inconsistent shapes: saw %s but expected %s "" - ""(and infer_shape=True)"" % - (element_shape, ta._element_shape[0])) - else: - ta._element_shape.append(element_shape) + ta._merge_element_shape(element_shape) return ta def size(self, name=None): ",0,train ac40a68cd5fac2227fb6c4086b2eb01a7dc726c4,tensorflow/tensorflow,"Fix iterable bug Co-authored-by: melissagrueter Co-authored-by: irenedea ",op_specs.cc,"@@ -91,11 +91,6 @@ class TypeResolver { Type TypeResolver::TypeOf(const OpDef_ArgDef& arg_def, bool* iterable_out) { *iterable_out = false; - if (!arg_def.number_attr().empty()) { - // when number_attr is set, argument has to be a list of tensors - *iterable_out = true; - visited_attrs_.insert(std::make_pair(arg_def.number_attr(), Type::Int())); - } Type type = Type::Wildcard(); if (arg_def.type() != DataType::DT_INVALID) { type = Type::ForDataType(arg_def.type()); @@ -122,6 +117,11 @@ Type TypeResolver::TypeOf(const OpDef_ArgDef& arg_def, bool* iterable_out) { LOG(FATAL) << ""Cannot resolve data type of argument \"""" << arg_def.name() << ""\"" in operation \"""" << op_def_.name() << ""\""""; } + if (!arg_def.number_attr().empty()) { + // when number_attr is set, argument has to be a list of tensors + *iterable_out = true; + visited_attrs_.insert(std::make_pair(arg_def.number_attr(), Type::Int())); + } return type; } ",0,train f13b76a52886e39d8e97c9256c383eb3b79748d8,tensorflow/tensorflow,"Replace the fake updates with no updates when not training. This is possible now that the tf.cond bug has been fixed, and is needed to remove rare data races. PiperOrigin-RevId: 173601427",normalization.py,"@@ -436,27 +436,30 @@ class BatchNormalization(base.Layer): if dmax is not None: d = math_ops.maximum(d, -dmax) d = math_ops.minimum(d, dmax) - # When not training, use r=1, d=0, and decay=1 meaning no updates. + # When not training, use r=1, d=0. r = utils.smart_cond(training, lambda: r, lambda: array_ops.ones_like(r)) d = utils.smart_cond(training, lambda: d, lambda: array_ops.zeros_like(d)) - decay = utils.smart_cond(training, lambda: self.renorm_momentum, lambda: 1.) def _update_renorm_variable(var, weight, value): """"""Updates a moving average and weight, returns the unbiased value."""""" - # Update the variables without zero debiasing. The debiasing will be - # accomplished by dividing the exponential moving average by the weight. - # For example, after a single update, the moving average would be - # (1-decay) * value. and the weight will be 1-decay, with their ratio - # giving value. - # Make sure the weight is not updated until before r and d computation. value = array_ops.identity(value) - with ops.control_dependencies([value]): - weight_value = array_ops.constant(1., dtype=weight.dtype) - new_var = moving_averages.assign_moving_average( - var, value, decay, zero_debias=False) - new_weight = moving_averages.assign_moving_average( - weight, weight_value, decay, zero_debias=False) - return new_var / new_weight + def _do_update(): + # Update the variables without zero debiasing. The debiasing will be + # accomplished by dividing the exponential moving average by the weight. + # For example, after a single update, the moving average would be + # (1-decay) * value. and the weight will be 1-decay, with their ratio + # giving the value. + # Make sure the weight is not updated until before r and d computation. + with ops.control_dependencies([value]): + weight_value = array_ops.constant(1., dtype=weight.dtype) + new_var = moving_averages.assign_moving_average( + var, value, self.renorm_momentum, zero_debias=False) + new_weight = moving_averages.assign_moving_average( + weight, weight_value, self.renorm_momentum, zero_debias=False) + return new_var / new_weight + def _fake_update(): + return array_ops.identity(var) + return utils.smart_cond(training, _do_update, _fake_update) with ops.colocate_with(self.moving_mean): new_mean = _update_renorm_variable(self.renorm_mean, @@ -562,8 +565,6 @@ class BatchNormalization(base.Layer): else: new_mean, new_variance = mean, variance - # Update moving averages when training, and prevent updates otherwise. - decay = utils.smart_cond(training, lambda: self.momentum, lambda: 1.) if self.virtual_batch_size is not None: # This isn't strictly correct since in ghost batch norm, you are # supposed to sequentially update the moving_mean and moving_variance @@ -575,10 +576,18 @@ class BatchNormalization(base.Layer): new_variance = math_ops.reduce_mean(new_variance, axis=1, keep_dims=True) - mean_update = moving_averages.assign_moving_average( - self.moving_mean, new_mean, decay, zero_debias=False) - variance_update = moving_averages.assign_moving_average( - self.moving_variance, new_variance, decay, zero_debias=False) + def _do_update(var, value): + return moving_averages.assign_moving_average( + var, value, self.momentum, zero_debias=False) + + mean_update = utils.smart_cond( + training, + lambda: _do_update(self.moving_mean, new_mean), + lambda: self.moving_mean) + variance_update = utils.smart_cond( + training, + lambda: _do_update(self.moving_variance, new_variance), + lambda: self.moving_variance) if context.in_graph_mode(): self.add_update(mean_update, inputs=inputs) self.add_update(variance_update, inputs=inputs) ",0,train 02f5601acdca3b468e568de2833ec893cbba2ef8,tensorflow/tensorflow,"Create a TF dialect pass to strip _noinline attributes and ensure that these are stripped when running the TF->TFL converter before inlining. PiperOrigin-RevId: 405689268 Change-Id: I3e4ec98c921b4f3b635adcafe088bc6649233861",tf_tfl_passes.cc,"@@ -81,6 +81,7 @@ void AddConvertHloToTfPass(std::string entry_function_name, // DCE for private symbols. pass_manager->addPass(mlir::createSymbolDCEPass()); + pass_manager->addPass(mlir::TF::CreateStripNoinlineAttributePass()); // Add inline pass. pass_manager->addPass(mlir::createInlinerPass()); ",0,train 02f5601acdca3b468e568de2833ec893cbba2ef8,tensorflow/tensorflow,"Create a TF dialect pass to strip _noinline attributes and ensure that these are stripped when running the TF->TFL converter before inlining. PiperOrigin-RevId: 405689268 Change-Id: I3e4ec98c921b4f3b635adcafe088bc6649233861",passes.h,"@@ -183,6 +183,10 @@ LogicalResult ResourceLiftingForFunctionalControlFlow(FuncOp function); // known element shapes of push ops. std::unique_ptr> CreateStackOpsDecompositionPass(); +// Creates a pass to strip the ""tf._noinline"" attribute from the functions in +// the module. +std::unique_ptr> CreateStripNoinlineAttributePass(); + // Converts tensor list operations into operations on buffers and sizes. Needs // static shapes and known max element count. std::unique_ptr> CreateTensorListOpsDecompositionPass(); ",0,train 02f5601acdca3b468e568de2833ec893cbba2ef8,tensorflow/tensorflow,"Create a TF dialect pass to strip _noinline attributes and ensure that these are stripped when running the TF->TFL converter before inlining. PiperOrigin-RevId: 405689268 Change-Id: I3e4ec98c921b4f3b635adcafe088bc6649233861",strip_noinline_attribute.cc,"@@ -0,0 +1,43 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/compiler/mlir/tensorflow/transforms/passes.h"" +#include ""tensorflow/compiler/mlir/tensorflow/transforms/passes_detail.h"" + +namespace mlir { +namespace TF { + +namespace { + +// This tranformation pass strips any ""_noinline"" attributes from the module. +struct StripNoinlineAttributePass + : public StripNoinlineAttributePassBase { + public: + // void runOnOperation() override; + void runOnOperation() override { + // Strip the ""tf._noinline"" attribute from top-level functions. + for (auto func_op : getOperation().getOps()) + func_op->removeAttr(""tf._noinline""); + } +}; + +} // namespace + +std::unique_ptr> CreateStripNoinlineAttributePass() { + return std::make_unique(); +} + +} // namespace TF +} // namespace mlir ",0,train 9f4e69bb1e8da97d369d1c2a9999845a41bb589b,tensorflow/tensorflow,"ConvolutionTransposedThin converted to new style. PiperOrigin-RevId: 318083958 Change-Id: I8de4b4b250ceff00e1d16ef917cb2f8698d33e28",convolution_transposed_thin.cc,"@@ -28,21 +28,17 @@ namespace gpu { namespace cl { namespace { -std::string GenerateConvolutionTransposedCode( - const OperationDef& op_def, int src_depth, int dst_channels, - const int2& kernel_size, const CLDevice& device, - const std::vector& linked_operations) { - TensorCodeGenerator src_tensor( - ""src_data"", - WHSBPoint{""src_size.x"", ""src_size.y"", ""src_size.z"", ""src_size.w""}, - op_def.src_tensors[0]); - TensorCodeGenerator dst_tensor( - ""dst_data"", - WHSBPoint{""dst_size.x"", ""dst_size.y"", ""dst_size.z"", ""dst_size.w""}, - op_def.dst_tensors[0]); +std::string GenerateConvolutionTransposedCode(const OperationDef& op_def, + int src_depth, int dst_channels, + const int2& kernel_size, + Arguments* args) { + args->AddObjectRef( + ""src_tensor"", AccessType::READ, + absl::make_unique(op_def.src_tensors[0])); + args->AddObjectRef( + ""dst_tensor"", AccessType::WRITE, + absl::make_unique(op_def.dst_tensors[0])); - const std::string batch_id = op_def.IsBatchSupported() ? ""B"" : """"; - std::string c = GetCommonDefines(op_def.precision); const std::string channel_x = dst_channels == 1 ? """" : "".x""; const std::vector postfix = {channel_x, "".y"", "".z"", "".w""}; const std::vector channel = {"".x"", "".y"", "".z"", "".w""}; @@ -62,36 +58,33 @@ std::string GenerateConvolutionTransposedCode( break; } + std::string c = GetCommonDefines(op_def.precision); c += ""__kernel void main_function(\n""; - c += src_tensor.GetDeclaration(AccessType::READ) + "",\n""; - c += "" __constant FLT4* filters""; - c += GetArgsDeclaration(linked_operations); - c += dst_tensor.GetDeclaration(AccessType::WRITE) + "",\n""; - c += "" int4 src_size, \n""; - c += "" int4 dst_size, \n""; - c += "" FLT4 bias_value \n""; - c += "") {\n""; + c += ""$0) {\n""; if (op_def.IsBatchSupported()) { c += "" int linear_id = get_global_id(0);\n""; - c += "" int X = linear_id / dst_size.w;\n""; - c += "" int B = linear_id % dst_size.w;\n""; + c += "" int X = linear_id / args.dst_tensor.Batch();\n""; + c += "" int B = linear_id % args.dst_tensor.Batch();\n""; + c += "" args.dst_tensor.SetBatchRef(B);\n""; + c += "" args.src_tensor.SetBatchRef(B);\n""; } else { c += "" int X = get_global_id(0);\n""; } c += "" int Y = get_global_id(1);\n""; - c += "" if (X >= src_size.x || Y >= src_size.y) return;\n""; + c += "" if (X >= args.src_tensor.Width() || Y >= args.src_tensor.Height()) "" + ""return;\n""; c += "" "" + accum_type + "" r["" + std::to_string(kernel_size.y) + ""]["" + std::to_string(kernel_size.x) + ""];\n""; c += "" {\n""; - c += "" FLT4 src = "" + src_tensor.ReadWHSB(""X"", ""Y"", ""0"", batch_id) + "";\n""; + c += "" FLT4 src = args.src_tensor.Read(X, Y, 0);\n""; int index = 0; for (int y = 0; y < kernel_size.y; ++y) { for (int x = 0; x < kernel_size.x; ++x) { std::string r_s = "" r["" + std::to_string(y) + ""]["" + std::to_string(x) + ""]""; for (int d = 0; d < dst_channels; ++d) { - c += r_s + postfix[d] + "" = dot(src, filters["" + std::to_string(index) + - ""]);\n""; + c += r_s + postfix[d] + "" = dot(src, args.weights.Read("" + + std::to_string(index) + ""));\n""; index++; } } @@ -100,15 +93,15 @@ std::string GenerateConvolutionTransposedCode( for (int i = 1; i < src_depth; ++i) { c += "" if (X > "" + std::to_string(-i) + "") { // always true, to reduce registers usage\n""; - c += "" FLT4 src = "" + - src_tensor.ReadWHSB(""X"", ""Y"", std::to_string(i), batch_id) + "";\n""; + c += + "" FLT4 src = args.src_tensor.Read(X, Y, "" + std::to_string(i) + "");\n""; for (int y = 0; y < kernel_size.y; ++y) { for (int x = 0; x < kernel_size.x; ++x) { std::string r_s = "" r["" + std::to_string(y) + ""]["" + std::to_string(x) + ""]""; for (int d = 0; d < dst_channels; ++d) { - c += r_s + postfix[d] + "" += dot(src, filters["" + - std::to_string(index) + ""]);\n""; + c += r_s + postfix[d] + "" += dot(src, args.weights.Read("" + + std::to_string(index) + ""));\n""; index++; } } @@ -121,21 +114,16 @@ std::string GenerateConvolutionTransposedCode( for (int x = 0; x < kernel_size.x; ++x) { const std::string x_coord = ""X + "" + std::to_string(x); const std::string y_coord = ""Y + "" + std::to_string(y); - c += "" if ("" + x_coord + "" < dst_size.x && "" + y_coord + - "" < dst_size.y) {\n""; - c += "" FLT4 result = bias_value;\n""; + c += "" if ("" + x_coord + "" < args.dst_tensor.Width() && "" + y_coord + + "" < args.dst_tensor.Height()) {\n""; + c += "" FLT4 result = args.weights.Read("" + std::to_string(index) + + "");\n""; for (int d = 0; d < dst_channels; ++d) { c += "" result"" + channel[d] + "" += r["" + std::to_string(y) + ""]["" + std::to_string(x) + ""]"" + postfix[d] + "";\n""; } - const std::string x_3dcoord = op_def.IsBatchSupported() - ? ""("" + x_coord + "") * dst_size.w + B"" - : x_coord; - const LinkingContext context{""result"", x_3dcoord, y_coord, ""0""}; - c += PostProcess(linked_operations, context); - c += "" "" + - dst_tensor.WriteWHSB(""result"", x_coord, y_coord, ""0"", batch_id) + - ""\n""; + c += "" args.dst_tensor.Write(result, "" + x_coord + "", "" + y_coord + + "", 0);\n""; c += "" }\n""; } } @@ -150,19 +138,11 @@ ConvolutionTransposedThin::ConvolutionTransposedThin( : GPUOperation(definition), kernel_size_(attr.weights.shape.w, attr.weights.shape.h), src_channels_(attr.weights.shape.i), - dst_channels_(attr.weights.shape.o) { - float4 bias_value(0.0f); - for (int i = 0; i < attr.weights.shape.o; ++i) { - bias_value[i] = attr.bias.data[i]; - } - bias_value_ = FLT4(definition_.precision, bias_value); -} + dst_channels_(attr.weights.shape.o) {} ConvolutionTransposedThin::ConvolutionTransposedThin( ConvolutionTransposedThin&& operation) : GPUOperation(std::move(operation)), - weights_buf_(std::move(operation.weights_buf_)), - bias_value_(std::move(operation.bias_value_)), kernel_size_(operation.kernel_size_), src_channels_(operation.src_channels_), dst_channels_(operation.dst_channels_), @@ -172,8 +152,6 @@ ConvolutionTransposedThin::ConvolutionTransposedThin( ConvolutionTransposedThin& ConvolutionTransposedThin::operator=( ConvolutionTransposedThin&& operation) { if (this != &operation) { - weights_buf_ = std::move(operation.weights_buf_); - bias_value_ = std::move(operation.bias_value_); std::swap(kernel_size_, operation.kernel_size_); std::swap(src_channels_, operation.src_channels_); std::swap(dst_channels_, operation.dst_channels_); @@ -186,9 +164,15 @@ ConvolutionTransposedThin& ConvolutionTransposedThin::operator=( absl::Status ConvolutionTransposedThin::Compile( const CreationContext& creation_context) { - const auto code = GenerateConvolutionTransposedCode( + std::string code = GenerateConvolutionTransposedCode( definition_, DivideRoundUp(src_channels_, 4), dst_channels_, kernel_size_, - *creation_context.device, linked_operations_); + &args_); + std::string element_wise_code; + RETURN_IF_ERROR( + MergeOperations(linked_operations_, &args_, &element_wise_code)); + RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), + {{""dst_tensor"", element_wise_code}}, + &code)); std::vector options; if (definition_.precision == CalculationsPrecision::F16 && @@ -202,15 +186,10 @@ absl::Status ConvolutionTransposedThin::Compile( } absl::Status ConvolutionTransposedThin::BindArguments() { - kernel_.ResetBindingCounter(); - RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr())); - RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_buf_.GetMemoryPtr())); - RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_)); - RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting())); - RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWHSB())); - RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWHSB())); - RETURN_IF_ERROR(kernel_.SetBytesAuto(bias_value_)); - return absl::OkStatus(); + RETURN_IF_ERROR(args_.SetObjectRef(""src_tensor"", src_[0])); + RETURN_IF_ERROR(args_.SetObjectRef(""dst_tensor"", dst_[0])); + RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); + return args_.Bind(kernel_.kernel()); } int3 ConvolutionTransposedThin::GetGridSize() const { @@ -248,7 +227,7 @@ absl::Status CreateConvolutionTransposedThin( } *result = ConvolutionTransposedThin(definition, attr); RETURN_IF_ERROR( - result->UploadWeights(attr.weights, creation_context.context)); + result->UploadData(attr.weights, attr.bias, creation_context.context)); return absl::OkStatus(); } ",0,train 9f4e69bb1e8da97d369d1c2a9999845a41bb589b,tensorflow/tensorflow,"ConvolutionTransposedThin converted to new style. PiperOrigin-RevId: 318083958 Change-Id: I8de4b4b250ceff00e1d16ef917cb2f8698d33e28",convolution_transposed_thin.h,"@@ -58,8 +58,9 @@ class ConvolutionTransposedThin : public GPUOperation { ConvolutionTransposedThin(const OperationDef& definition, const ConvolutionTransposedAttributes& attr); template - absl::Status UploadWeights(const tflite::gpu::Tensor& weights, - CLContext* context); + absl::Status UploadData(const tflite::gpu::Tensor& weights, + const tflite::gpu::Tensor& biases, + CLContext* context); template void RearrangeWeightsData(const tflite::gpu::Tensor& weights, @@ -68,9 +69,6 @@ class ConvolutionTransposedThin : public GPUOperation { absl::Status BindArguments(); int3 GetGridSize() const; - Buffer weights_buf_; - FLT4 bias_value_; - int2 kernel_size_; int src_channels_; int dst_channels_; @@ -80,25 +78,50 @@ class ConvolutionTransposedThin : public GPUOperation { }; template -absl::Status ConvolutionTransposedThin::UploadWeights( - const tflite::gpu::Tensor& weights, CLContext* context) { +absl::Status ConvolutionTransposedThin::UploadData( + const tflite::gpu::Tensor& weights, + const tflite::gpu::Tensor& biases, CLContext* context) { const int src_depth = DivideRoundUp(src_channels_, 4); - const int elements_count = - kernel_size_.x * kernel_size_.y * src_depth * 4 * dst_channels_; + const int flt4_count = + kernel_size_.x * kernel_size_.y * src_depth * dst_channels_; + + const bool f32_weights = definition_.precision == CalculationsPrecision::F32; - const int float4_size = - definition_.precision == CalculationsPrecision::F32 ? 16 : 8; - if (definition_.GetDataType() == DataType::FLOAT32) { - std::vector gpu_data(elements_count); + BufferDescriptor desc; + desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16; + desc.element_size = 4; + desc.memory_type = MemoryType::CONSTANT; + + Buffer weights_buffer; + if (f32_weights) { + std::vector gpu_data(flt4_count); RearrangeWeightsData(weights, absl::MakeSpan(gpu_data)); - return CreateReadOnlyBuffer(float4_size * elements_count, gpu_data.data(), - context, &weights_buf_); + float4 bias_value(0.0f); + for (int i = 0; i < weights.shape.o; ++i) { + bias_value[i] = biases.data[i]; + } + gpu_data.push_back(bias_value); + RETURN_IF_ERROR(CreateReadOnlyBuffer(sizeof(float4) * gpu_data.size(), + gpu_data.data(), context, + &weights_buffer)); } else { - std::vector gpu_data(elements_count); + std::vector gpu_data(flt4_count); RearrangeWeightsData(weights, absl::MakeSpan(gpu_data)); - return CreateReadOnlyBuffer(float4_size * elements_count, gpu_data.data(), - context, &weights_buf_); + half4 bias_value(0.0f); + for (int i = 0; i < weights.shape.o; ++i) { + bias_value[i] = biases.data[i]; + } + gpu_data.push_back(bias_value); + RETURN_IF_ERROR(CreateReadOnlyBuffer(sizeof(half4) * gpu_data.size(), + gpu_data.data(), context, + &weights_buffer)); } + + args_.AddObject(""weights"", AccessType::READ, + absl::make_unique(std::move(weights_buffer)), + absl::make_unique(desc)); + + return absl::OkStatus(); } template ",0,train 3adaa332c7c5055398f38c189a6ea741f5c799ed,tensorflow/tensorflow,"Add shape inference and upate tests Signed-off-by: Yong Tang ",libsvm_ops.cc,"@@ -20,6 +20,7 @@ limitations under the License. namespace tensorflow { using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; REGISTER_OP(""DecodeLibsvm"") .Input(""input: string"") @@ -27,7 +28,19 @@ REGISTER_OP(""DecodeLibsvm"") .Output(""feature: dtype"") .Attr(""dtype: {float, double, int32, int64} = DT_FLOAT"") .Attr(""num_features: int >= 1"") - .SetShapeFn(shape_inference::UnknownShape) + .SetShapeFn([](InferenceContext* c) { + c->set_output(0, c->input(0)); + + int32 num_features; + TF_RETURN_IF_ERROR(c->GetAttr(""num_features"", &num_features)); + ShapeHandle out; + TF_RETURN_IF_ERROR( + c->Concatenate(c->input(0), c->Vector(num_features), &out)); + c->set_output(1, out); + + return Status::OK(); + }) + .Doc(R""doc( Convert LibSVM input to tensors. The output consists of a label and a feature tensor. The shape of the label tensor ",0,train 3adaa332c7c5055398f38c189a6ea741f5c799ed,tensorflow/tensorflow,"Add shape inference and upate tests Signed-off-by: Yong Tang ",decode_libsvm_op_test.py,"@@ -19,7 +19,7 @@ from __future__ import division from __future__ import print_function import numpy as np -import sys + from tensorflow.contrib.libsvm.python.ops import libsvm_ops from tensorflow.python.platform import test @@ -32,6 +32,12 @@ class DecodeLibsvmOpTest(test.TestCase): ""1 2:2.5 3:0.1 5:0.503"", ""2 3:2.5 2:0.1 1:0.105""] label, feature = libsvm_ops.decode_libsvm(content, num_features=6) + + # shape inference + self.assertAllEqual(label.get_shape().as_list(), [3]) + self.assertAllEqual(feature.get_shape().as_list(), [3, 6]) + + # sess.run() label, feature = sess.run([label, feature]) self.assertAllEqual(label, [1, 1, 2]) self.assertAllClose(feature, [[0, 3.4, 0.5, 0, 0.231, 0], ",0,train 592d2d67daca18db98c7f67b0a55ef487ed76f1c,tensorflow/tensorflow,"Transpose for high dimensional tensors using eigen (#15893) * speeding up transpose on CPU",transpose_functor_cpu.cc,"@@ -88,6 +88,18 @@ struct Transpose { internal::TransposeUsingEigen(d, in, perm, conjugate, out); break; + case 6: + internal::TransposeUsingEigen(d, in, perm, conjugate, + out); + break; + case 7: + internal::TransposeUsingEigen(d, in, perm, conjugate, + out); + break; + case 8: + internal::TransposeUsingEigen(d, in, perm, conjugate, + out); + break; default: TransposeSimple(d, in, perm, out); break; ",0,train 592d2d67daca18db98c7f67b0a55ef487ed76f1c,tensorflow/tensorflow,"Transpose for high dimensional tensors using eigen (#15893) * speeding up transpose on CPU",transpose_functor_gpu.cu.cc,"@@ -201,6 +201,27 @@ struct Transpose { out); } break; + case 6: + if (!internal::TransposeUsingTile::run(d, in, perm, + out)) { + internal::TransposeUsingEigen(d, in, perm, conjugate, + out); + } + break; + case 7: + if (!internal::TransposeUsingTile::run(d, in, perm, + out)) { + internal::TransposeUsingEigen(d, in, perm, conjugate, + out); + } + break; + case 8: + if (!internal::TransposeUsingTile::run(d, in, perm, + out)) { + internal::TransposeUsingEigen(d, in, perm, conjugate, + out); + } + break; default: internal::TransposeSimple(d, in, perm, out); break; ",0,train d735183a884ad1662750658d7292729efed15885,tensorflow/tensorflow,"Update GraphDef version to 752. PiperOrigin-RevId: 371291460 Change-Id: Ib35c171a5e91992993e80e74883488550cb4de3a",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 751 // Updated: 2021/4/29 +#define TF_GRAPH_DEF_VERSION 752 // Updated: 2021/4/30 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,train c923edca9d0d54360fa52a1da36d303822d63330,tensorflow/tensorflow,"Remove cloned prepare_tf pass description from unroll_batch_matmul pass Instead, there is already a class level documentation for the pass. PiperOrigin-RevId: 268550569",unroll_batch_matmul.cc,"@@ -13,22 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// This transformation pass prepares for legalization to the TFLite dialect by -// converting operations in TensorFlow dialect into operations that can be -// legalized to TensorFlow Lite dialect with simple replacements. The newly -// created operations are in the TensorFlow dialect if the operation can be -// represented using a TensorFlow op. Otherwise, TensorFlow Lite dialect op is -// used. For example, Conv2D in TFLite which uses OHWI data format for filters -// is not supported in TensorFlow because TensorFlow requires filters in the -// HWIO data format. -// -// Motivation to prepare for the TFLite legalization before the actual -// legalization is to exploit constant folding opportunities in any newly -// created ops by leveraging constant folding support for the TensorFlow ops. -// This way TFLite can be used as a serialization format only and does not -// require access to the TFLite runtime for optimizations as required by the -// TFLite team. - #include ""tensorflow/compiler/mlir/lite/transforms/unroll_batch_matmul.h"" #include ",0,train 995fa1fe89e647c2f7d62c9a289e4b50b912ddaf,tensorflow/tensorflow,"Remove Checkpoint proto symbols from the v2 API Keeps tf.train.latest_checkpoint, since that's widely used and still quite useful. The functionality of these symbols is replaced by tf.train.CheckpointManager. PiperOrigin-RevId: 220879395",checkpoint_management.py,"@@ -36,6 +36,7 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import training_util from tensorflow.python.training.checkpoint_state_pb2 import CheckpointState from tensorflow.python.util import compat +from tensorflow.python.util import deprecation from tensorflow.python.util.tf_export import tf_export @@ -55,7 +56,11 @@ def _GetCheckpointFilename(save_dir, latest_filename): return os.path.join(save_dir, latest_filename) -@tf_export(""train.generate_checkpoint_state_proto"") +@deprecation.deprecated( + date=None, + instructions=(""Use tf.train.CheckpointManager to manage checkpoints rather "" + ""than editing the Checkpoint proto manually."")) +@tf_export(v1=[""train.generate_checkpoint_state_proto""]) def generate_checkpoint_state_proto(save_dir, model_checkpoint_path, all_model_checkpoint_paths=None, @@ -121,7 +126,11 @@ def generate_checkpoint_state_proto(save_dir, return coord_checkpoint_proto -@tf_export(""train.update_checkpoint_state"") +@deprecation.deprecated( + date=None, + instructions=(""Use tf.train.CheckpointManager to manage checkpoints rather "" + ""than manually editing the Checkpoint proto."")) +@tf_export(v1=[""train.update_checkpoint_state""]) def update_checkpoint_state(save_dir, model_checkpoint_path, all_model_checkpoint_paths=None, @@ -344,7 +353,10 @@ def latest_checkpoint(checkpoint_dir, latest_filename=None): return None -@tf_export(""train.checkpoint_exists"") +@deprecation.deprecated( + date=None, + instructions=""Use standard file APIs to check for files with this prefix."") +@tf_export(v1=[""train.checkpoint_exists""]) def checkpoint_exists(checkpoint_prefix): """"""Checks whether a V1 or V2 checkpoint exists with the specified prefix. @@ -369,7 +381,10 @@ def checkpoint_exists(checkpoint_prefix): return False -@tf_export(""train.get_checkpoint_mtimes"") +@deprecation.deprecated( + date=None, + instructions=""Use standard file utilities to get mtimes."") +@tf_export(v1=[""train.get_checkpoint_mtimes""]) def get_checkpoint_mtimes(checkpoint_prefixes): """"""Returns the mtimes (modification timestamps) of the checkpoints. @@ -408,7 +423,10 @@ def get_checkpoint_mtimes(checkpoint_prefixes): return mtimes -@tf_export(""train.remove_checkpoint"") +@deprecation.deprecated( + date=None, + instructions=""Use standard file APIs to delete files with this prefix."") +@tf_export(v1=[""train.remove_checkpoint""]) def remove_checkpoint(checkpoint_prefix, checkpoint_format_version=saver_pb2.SaverDef.V2, meta_graph_suffix=""meta""): ",0,train c94d33c7d3fe32aa46decebe6fb261c2ff5012c3,tensorflow/tensorflow,"Reset the global_train_batch in each training. PiperOrigin-RevId: 303016514 Change-Id: I6af560f7f6e94c359600c2913a9dd426f062b921",callbacks.py,"@@ -2005,6 +2005,7 @@ class TensorBoard(Callback, version_utils.TensorBoardVersionSelector): self._should_trace = not (self._start_batch == 0 and self._stop_batch == 0) def on_train_begin(self, logs=None): + self._global_train_batch = 0 self._push_writer(self._train_writer, self._train_step) def on_train_end(self, logs=None): ",0,train c94d33c7d3fe32aa46decebe6fb261c2ff5012c3,tensorflow/tensorflow,"Reset the global_train_batch in each training. PiperOrigin-RevId: 303016514 Change-Id: I6af560f7f6e94c359600c2913a9dd426f062b921",callbacks_test.py,"@@ -2018,14 +2018,16 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): run_eagerly=testing_utils.should_run_eagerly()) return model - def _get_trace_file(self, logdir): + def _count_trace_file(self, logdir): profile_dir = os.path.join(logdir, 'plugins', 'profile') + count = 0 for (dirpath, dirnames, filenames) in os.walk(profile_dir): + del dirpath # unused del dirnames # unused for filename in filenames: if filename.endswith('.trace.json.gz'): - return os.path.join(dirpath, filename) - return None + count += 1 + return count def fitModelAndAssertKerasModelWritten(self, model): x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) @@ -2095,7 +2097,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): _ObservedSummary(logdir=self.train_dir, tag=u'batch_1'), }, ) - self.assertIsNotNone(self._get_trace_file(logdir=self.train_dir)) + self.assertEqual(1, self._count_trace_file(logdir=self.train_dir)) def test_TensorBoard_autoTrace_tagNameWithBatchNum(self): model = self._get_seq_model() @@ -2118,7 +2120,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): _ObservedSummary(logdir=self.train_dir, tag=u'batch_2'), }, ) - self.assertIsNotNone(self._get_trace_file(logdir=self.train_dir)) + self.assertEqual(1, self._count_trace_file(logdir=self.train_dir)) def test_TensorBoard_autoTrace_profileBatchRangeSingle(self): model = self._get_seq_model() @@ -2142,7 +2144,30 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): _ObservedSummary(logdir=self.train_dir, tag=u'batch_2'), }, ) - self.assertIsNotNone(self._get_trace_file(logdir=self.train_dir)) + self.assertEqual(1, self._count_trace_file(logdir=self.train_dir)) + + def test_TensorBoard_autoTrace_profileBatchRangeTwice(self): + model = self._get_seq_model() + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + tb_cbk = keras.callbacks.TensorBoard( + self.logdir, histogram_freq=1, profile_batch='10,10', write_graph=False) + + model.fit( + x, + y, + batch_size=3, + epochs=10, + validation_data=(x, y), + callbacks=[tb_cbk]) + + model.fit( + x, + y, + batch_size=3, + epochs=10, + validation_data=(x, y), + callbacks=[tb_cbk]) + self.assertEqual(2, self._count_trace_file(logdir=self.train_dir)) # Test case that replicates a Github issue. # https://github.com/tensorflow/tensorflow/issues/37543 @@ -2162,7 +2187,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): callbacks=[keras.callbacks.TensorBoard(logdir, profile_batch=1)], ) # Verifies trace exists in the first logdir. - self.assertIsNotNone(self._get_trace_file(logdir=logdir)) + self.assertEqual(1, self._count_trace_file(logdir=logdir)) logdir = os.path.join(self.get_temp_dir(), 'tb2') model.fit( np.zeros((64, 1)), @@ -2171,7 +2196,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): callbacks=[keras.callbacks.TensorBoard(logdir, profile_batch=2)], ) # Verifies trace exists in the second logdir. - self.assertIsNotNone(self._get_trace_file(logdir=logdir)) + self.assertEqual(1, self._count_trace_file(logdir=logdir)) def test_TensorBoard_autoTrace_profileBatchRange(self): model = self._get_seq_model() @@ -2195,7 +2220,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): _ObservedSummary(logdir=self.train_dir, tag=u'batch_3'), }, ) - self.assertIsNotNone(self._get_trace_file(logdir=self.train_dir)) + self.assertEqual(1, self._count_trace_file(logdir=self.train_dir)) def test_TensorBoard_autoTrace_profileInvalidBatchRange(self): with self.assertRaises(ValueError): @@ -2237,7 +2262,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): # Enabled trace only on the 10000th batch, thus it should be empty. self.assertEmpty(summary_file.tensors) - self.assertIsNone(self._get_trace_file(logdir=self.train_dir)) + self.assertEqual(0, self._count_trace_file(logdir=self.train_dir)) class MostRecentlyModifiedFileMatchingPatternTest(test.TestCase): ",0,train c88fd63cc599195cbf88885689e8630dd888bb6d,tensorflow/tensorflow,"[Grappler] Fix bug in arithmetic optimizer causing non-unique node names. PiperOrigin-RevId: 257124468",arithmetic_optimizer.cc,"@@ -887,7 +887,8 @@ class HoistCommonFactorOutOfAggregation : public ArithmeticOptimizerStage { // them, it's possible that rewritten node already exists in a graph return rewritten_nodes_.find(node->name()) != rewritten_nodes_.end() || ctx().node_map->NodeExists(OuterNodeName(node, false)) || - ctx().node_map->NodeExists(OuterNodeName(node, true)); + ctx().node_map->NodeExists(OuterNodeName(node, true)) || + ctx().node_map->NodeExists(InnerAddNodeName(node)); } // keep names of the nodes that were optimized by this stage ",0,train 3336574287a16a0ead083a33b5e80a1c7204fa62,tensorflow/tensorflow,"Fix shape mismatch in `rnn()` of keras backend PiperOrigin-RevId: 202231273",backend.py,"@@ -3161,10 +3161,16 @@ def rnn(step_function, array_ops.stack( [1, array_ops.shape(output)[1]])) output = array_ops.where(tiled_mask_t, output, states[0]) - new_states = [ - array_ops.where(tiled_mask_t, new_states[i], states[i]) - for i in range(len(states)) - ] + + masked_states = [] + for i in range(len(states)): + states_dim = array_ops.shape(new_states[i])[1] + stacked_states_dim = array_ops.stack([1, states_dim]) + tiled_mask = array_ops.tile(mask_t, stacked_states_dim) + masked_state = array_ops.where(tiled_mask, new_states[i], states[i]) + masked_states.append(masked_state) + new_states = masked_states + output_ta_t = output_ta_t.write(time, output) return (time + 1, output_ta_t) + tuple(new_states) else: ",0,train 3336574287a16a0ead083a33b5e80a1c7204fa62,tensorflow/tensorflow,"Fix shape mismatch in `rnn()` of keras backend PiperOrigin-RevId: 202231273",backend_test.py,"@@ -1077,7 +1077,7 @@ class BackendNNOpsTest(test.TestCase, parameterized.TestCase): {'go_backwards': False, 'mask': mask, 'unroll': True}, ] with self.test_session(): - for (i, kwargs) in enumerate(kwargs_list): + for i, kwargs in enumerate(kwargs_list): last_output, outputs, new_states = keras.backend.rnn(rnn_fn, inputs, initial_states, **kwargs) @@ -1124,6 +1124,115 @@ class BackendNNOpsTest(test.TestCase, parameterized.TestCase): for b_s, b_u_s in zip(state_list[2], state_list[3]): self.assertAllClose(b_s, b_u_s, atol=1e-04) + def test_rnn_additional_states(self): + # implement a simple RNN + num_samples = 4 + input_dim = 5 + output_dim = 3 + timesteps = 6 + + input_val = np.random.random( + (num_samples, timesteps, input_dim)).astype(np.float32) + init_state_val = np.random.random( + (num_samples, output_dim)).astype(np.float32) + w_i_val = np.random.random((input_dim, output_dim)).astype(np.float32) + w_o_val = np.random.random((output_dim, output_dim)).astype(np.float32) + np_mask = np.random.randint(2, size=(num_samples, timesteps)) + + def rnn_step_fn(): + w_i = keras.backend.variable(w_i_val) + w_o = keras.backend.variable(w_o_val) + + def step_function(x, states): + assert len(states) == 2 + prev_output = states[0] + output = keras.backend.dot(x, w_i) + keras.backend.dot(prev_output, w_o) + return output, [output, + keras.backend.concatenate([output, output], axis=-1)] + + return step_function + + # test default setup + last_output_list = [[], [], [], [], [], []] + outputs_list = [[], [], [], [], [], []] + state_list = [[], [], [], [], [], []] + additional_state_list = [[], [], [], [], [], []] + + rnn_fn = rnn_step_fn() + inputs = keras.backend.variable(input_val) + initial_states = [keras.backend.variable(init_state_val), + np.concatenate([init_state_val, init_state_val], axis=-1)] + mask = keras.backend.variable(np_mask) + + kwargs_list = [ + {'go_backwards': False, 'mask': None}, + {'go_backwards': False, 'mask': None, 'unroll': True}, + {'go_backwards': True, 'mask': None}, + {'go_backwards': True, 'mask': None, 'unroll': True}, + {'go_backwards': False, 'mask': mask}, + {'go_backwards': False, 'mask': mask, 'unroll': True}, + ] + with self.test_session(): + for i, kwargs in enumerate(kwargs_list): + last_output, outputs, new_states = keras.backend.rnn(rnn_fn, inputs, + initial_states, + **kwargs) + # check static shape inference + self.assertEqual(last_output.get_shape().as_list(), + [num_samples, output_dim]) + self.assertEqual(outputs.get_shape().as_list(), + [num_samples, timesteps, output_dim]) + # for state in new_states: + # self.assertEquals(state.get_shape().as_list(), + # [num_samples, output_dim]) + self.assertEqual(new_states[0].get_shape().as_list(), + [num_samples, output_dim]) + self.assertEqual(new_states[1].get_shape().as_list(), + [num_samples, 2 * output_dim]) + + last_output_list[i].append(keras.backend.eval(last_output)) + outputs_list[i].append(keras.backend.eval(outputs)) + self.assertEqual(len(new_states), 2) + state_list[i].append(keras.backend.eval(new_states[0])) + additional_state_list[i].append(keras.backend.eval(new_states[1])) + + def assert_list_pairwise(z_list, atol=1e-05): + for (z1, z2) in zip(z_list[1:], z_list[:-1]): + self.assertAllClose(z1, z2, atol=atol) + + assert_list_pairwise(last_output_list[0], atol=1e-04) + assert_list_pairwise(outputs_list[0], atol=1e-04) + assert_list_pairwise(state_list[0], atol=1e-04) + assert_list_pairwise(additional_state_list[0], atol=1e-04) + assert_list_pairwise(last_output_list[2], atol=1e-04) + assert_list_pairwise(outputs_list[2], atol=1e-04) + assert_list_pairwise(state_list[2], atol=1e-04) + assert_list_pairwise(additional_state_list[2], atol=1e-04) + + for l, u_l in zip(last_output_list[0], last_output_list[1]): + self.assertAllClose(l, u_l, atol=1e-04) + + for o, u_o in zip(outputs_list[0], outputs_list[1]): + self.assertAllClose(o, u_o, atol=1e-04) + + for s, u_s in zip(state_list[0], state_list[1]): + self.assertAllClose(s, u_s, atol=1e-04) + + for s, u_s in zip(additional_state_list[0], additional_state_list[1]): + self.assertAllClose(s, u_s, atol=1e-04) + + for b_l, b_u_l in zip(last_output_list[2], last_output_list[3]): + self.assertAllClose(b_l, b_u_l, atol=1e-04) + + for b_o, b_u_o in zip(outputs_list[2], outputs_list[3]): + self.assertAllClose(b_o, b_u_o, atol=1e-04) + + for b_s, b_u_s in zip(state_list[2], state_list[3]): + self.assertAllClose(b_s, b_u_s, atol=1e-04) + + for s, u_s in zip(additional_state_list[2], additional_state_list[3]): + self.assertAllClose(s, u_s, atol=1e-04) + def test_normalize_batch_in_training(self): val = np.random.random((10, 3, 10, 10)) x = keras.backend.variable(val) ",0,train 6787ce30efdfefbf69681ca9795959fb7244240b,tensorflow/tensorflow,"Update GraphDef version to 499. PiperOrigin-RevId: 327589838 Change-Id: I97384115fcb61069d7041b40d8cead6522f86532",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 498 // Updated: 2020/8/19 +#define TF_GRAPH_DEF_VERSION 499 // Updated: 2020/8/20 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,train a980aead874f555d01bb410b84024831404a408b,tensorflow/tensorflow,"Use test_adjusted_name when making the mangled_test_name in run_and_gather_logs_lib.py, to avoid duplicate file names when the same test is run on multiple GPUs. PiperOrigin-RevId: 157630193",run_and_gather_logs_lib.py,"@@ -131,8 +131,18 @@ def run_and_gather_logs(name, test_name, test_args, # Hopefully running in sandboxed mode test_executable = os.path.join(""."", test_executable) + test_adjusted_name = name + gpu_config = gpu_info_lib.gather_gpu_devices() + if gpu_config: + gpu_name = gpu_config[0].model + gpu_short_name_match = re.search(r""Tesla [KP][4,8]0"", gpu_name) + if gpu_short_name_match: + gpu_short_name = gpu_short_name_match.group(0) + test_adjusted_name = name + ""|"" + gpu_short_name.replace("" "", ""_"") + temp_directory = tempfile.mkdtemp(prefix=""run_and_gather_logs"") - mangled_test_name = name.strip(""/"").replace(""/"", ""_"").replace("":"", ""_"") + mangled_test_name = (test_adjusted_name.strip(""/"") + .replace(""|"", ""_"").replace(""/"", ""_"").replace("":"", ""_"")) test_file_prefix = os.path.join(temp_directory, mangled_test_name) test_file_prefix = ""%s."" % test_file_prefix @@ -151,15 +161,6 @@ def run_and_gather_logs(name, test_name, test_args, if not log_files: raise MissingLogsError(""No log files found at %s."" % test_file_prefix) - test_adjusted_name = name - gpu_config = gpu_info_lib.gather_gpu_devices() - if gpu_config: - gpu_name = gpu_config[0].model - gpu_short_name_match = re.search(r""Tesla [KP][4,8]0"", gpu_name) - if gpu_short_name_match: - gpu_short_name = gpu_short_name_match.group(0) - test_adjusted_name = name + ""|"" + gpu_short_name.replace("" "", ""_"") - return (process_test_logs( test_adjusted_name, test_name=test_name, ",0,train b6af9ad8555b834822bff052316fac00cc8d949a,tensorflow/tensorflow,"Supported more types in TensorDescriptor::GetDataTypeFromTemplateArgs. PiperOrigin-RevId: 432385164",tensor_desc.cc,"@@ -981,6 +981,18 @@ absl::Status TensorDescriptor::GetDataTypeFromTemplateArgs( *result = DataType::FLOAT16; } else if (read_type == ""float"") { *result = DataType::FLOAT32; + } else if (read_type == ""int"") { + *result = DataType::INT32; + } else if (read_type == ""short"") { + *result = DataType::INT16; + } else if (read_type == ""char"") { + *result = DataType::INT8; + } else if (read_type == ""uint"") { + *result = DataType::UINT32; + } else if (read_type == ""ushort"") { + *result = DataType::UINT16; + } else if (read_type == ""uchar"") { + *result = DataType::UINT8; } else { return absl::NotFoundError(absl::StrCat( ""Unrecognized Read selector template argument - "", read_type)); ",0,train ac96df2d153c6ba47f902eccecc99701b76d660c,tensorflow/tensorflow,"Added is_variable_initialized(variable) function. Change: 119321281",variable_ops.cc,"@@ -28,6 +28,8 @@ REGISTER_KERNEL_BUILDER(Name(""TemporaryVariable"").Device(DEVICE_CPU), TemporaryVariableOp); REGISTER_KERNEL_BUILDER(Name(""DestroyTemporaryVariable"").Device(DEVICE_CPU), DestroyTemporaryVariableOp); +REGISTER_KERNEL_BUILDER(Name(""IsVariableInitialized"").Device(DEVICE_CPU), + IsVariableInitializedOp); #if GOOGLE_CUDA // Only register 'Variable' on GPU for the subset of types also supported by @@ -43,7 +45,12 @@ REGISTER_KERNEL_BUILDER(Name(""DestroyTemporaryVariable"").Device(DEVICE_CPU), REGISTER_KERNEL_BUILDER(Name(""DestroyTemporaryVariable"") \ .Device(DEVICE_GPU) \ .TypeConstraint(""T""), \ - DestroyTemporaryVariableOp); + DestroyTemporaryVariableOp); \ + REGISTER_KERNEL_BUILDER(Name(""IsVariableInitialized"") \ + .Device(DEVICE_GPU) \ + .TypeConstraint(""dtype"") \ + .HostMemory(""is_initialized""), \ + IsVariableInitializedOp); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS ",0,train ac96df2d153c6ba47f902eccecc99701b76d660c,tensorflow/tensorflow,"Added is_variable_initialized(variable) function. Change: 119321281",variable_ops.h,"@@ -158,6 +158,22 @@ class DestroyTemporaryVariableOp : public OpKernel { string var_name_; }; +class IsVariableInitializedOp : public OpKernel { + public: + IsVariableInitializedOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { + // Get a mutable input tensor of the Ref input. + const Tensor& input_tensor = context->mutable_input(0, false); + Tensor* output = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, TensorShape({}), &output)); + auto output_tensor = output->tensor(); + bool result = input_tensor.IsInitialized(); + output_tensor() = result; + } +}; + } // namespace tensorflow #endif // TENSORFLOW_KERNELS_VARIABLE_OPS_H_ ",0,train ac96df2d153c6ba47f902eccecc99701b76d660c,tensorflow/tensorflow,"Added is_variable_initialized(variable) function. Change: 119321281",state_ops.cc,"@@ -40,6 +40,20 @@ shared_name: If non-empty, this variable is named in the given bucket with this shared_name. Otherwise, the node name is used instead. )doc""); +REGISTER_OP(""IsVariableInitialized"") + .Output(""is_initialized: bool"") + .Input(""ref: Ref(dtype)"") + .Attr(""dtype: type"") + .SetAllowsUninitializedInput() + .Doc(R""doc( +Checks whether a tensor has been initialized. + +Outputs boolean scalar indicating whether the tensor has been initialized. + +ref: Should be from a `Variable` node. May be uninitialized. +dtype: The type of elements in the variable tensor. +)doc""); + REGISTER_OP(""TemporaryVariable"") .Output(""ref: Ref(dtype)"") .Attr(""shape: shape"") ",0,train ac96df2d153c6ba47f902eccecc99701b76d660c,tensorflow/tensorflow,"Added is_variable_initialized(variable) function. Change: 119321281",variable_ops_test.py,"@@ -237,6 +237,14 @@ class VariableOpTest(tf.test.TestCase): result = tf.mul(var, var) self.assertAllClose([4.0], result.eval()) + def testIsVariableInitialized(self): + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + v0 = state_ops.variable_op([1, 2], tf.float32) + self.assertEqual(False, tf.is_variable_initialized(v0).eval()) + tf.assign(v0, [[2.0, 3.0]]).eval() + self.assertEqual(True, tf.is_variable_initialized(v0).eval()) + if __name__ == ""__main__"": tf.test.main() ",0,train ac96df2d153c6ba47f902eccecc99701b76d660c,tensorflow/tensorflow,"Added is_variable_initialized(variable) function. Change: 119321281",state_ops.py,"@@ -30,6 +30,7 @@ collected in the graph. @@initialize_all_variables @@initialize_variables @@initialize_local_variables +@@is_variable_initialized @@assert_variables_initialized ## Saving and Restoring Variables @@ -134,6 +135,8 @@ def variable_op(shape, dtype, name=""Variable"", set_shape=True, container="""", # NOTE(mrry): Shapes are conditionally set in the Python wrapper. ops.RegisterShape(""Variable"")(common_shapes.unknown_shape) +ops.RegisterShape(""IsVariableInitialized"")(common_shapes.scalar_shape) + @ops.RegisterShape(""TemporaryVariable"") def _TemporaryVariableShape(op): ",0,train ac96df2d153c6ba47f902eccecc99701b76d660c,tensorflow/tensorflow,"Added is_variable_initialized(variable) function. Change: 119321281",variables.py,"@@ -798,6 +798,18 @@ def initialize_local_variables(): return initialize_variables(local_variables()) +def is_variable_initialized(variable): + """"""Returns an Op to check if a variable has been initialized. + + Args: + variable: A `Variable`. + + Returns: + An operation to check whether a variable has been initialized. + """""" + return state_ops.is_variable_initialized(variable) + + def assert_variables_initialized(var_list=None): """"""Returns an Op to check if variables are initialized. ",0,train ac96df2d153c6ba47f902eccecc99701b76d660c,tensorflow/tensorflow,"Added is_variable_initialized(variable) function. Change: 119321281",session_manager_test.py,"@@ -71,6 +71,8 @@ class SessionManagerTest(tf.test.TestCase): os.rename(checkpoint_dir, checkpoint_dir2) gfile.MakeDirs(checkpoint_dir) v = tf.Variable([6.0, 7.0, 8.0], name=""v"") + with self.test_session(): + self.assertEqual(False, tf.is_variable_initialized(v).eval()) tf.train.SessionManager(ready_op=tf.assert_variables_initialized()) saver = tf.train.Saver({""v"": v}) # This should fail as there's no checkpoint within 2 seconds. @@ -85,6 +87,9 @@ class SessionManagerTest(tf.test.TestCase): sess = sm.prepare_session("""", init_op=None, saver=saver, checkpoint_dir=checkpoint_dir, wait_for_checkpoint=True, max_wait_secs=2) + self.assertEqual( + True, tf.is_variable_initialized( + sess.graph.get_tensor_by_name(""v:0"")).eval(session=sess)) def testRecoverSession(self): # Create a checkpoint. @@ -109,11 +114,16 @@ class SessionManagerTest(tf.test.TestCase): # Create a new Graph and SessionManager and recover. with tf.Graph().as_default(): v = tf.Variable(2, name=""v"") + with self.test_session(): + self.assertEqual(False, tf.is_variable_initialized(v).eval()) sm2 = tf.train.SessionManager(ready_op=tf.assert_variables_initialized()) saver = tf.train.Saver({""v"": v}) sess, initialized = sm2.recover_session("""", saver=saver, checkpoint_dir=checkpoint_dir) self.assertTrue(initialized) + self.assertEqual( + True, tf.is_variable_initialized( + sess.graph.get_tensor_by_name(""v:0"")).eval(session=sess)) self.assertEquals(1, sess.run(v)) def testWaitForSessionReturnsNoneAfterTimeout(self): ",0,train a39efe62d28a754bd27d08d485b414c5555c7411,tensorflow/tensorflow,"Fix cluster_resolver test breakage in strategy_common_test. PiperOrigin-RevId: 318360022 Change-Id: I112e970eb438621d54a5aa935d1b5df63b3a6e9d",strategy_common_test.py,"@@ -160,20 +160,16 @@ class StrategyClusterResolverTest(test.TestCase, parameterized.TestCase): with strategy.scope(): self.assertIs(strategy.cluster_resolver, resolver) + self.assertTrue(hasattr(resolver, 'cluster_spec')) - if isinstance(strategy, TPUStrategy): - self.skipTest('b/159747888') - self.assertTrue(hasattr(resolver, 'environment')) self.assertTrue(hasattr(resolver, 'master')) self.assertTrue(hasattr(resolver, 'num_accelerators')) - self.assertIsNone(resolver.rpc_layer) + self.assertTrue(hasattr(resolver, 'task_id')) + self.assertTrue(hasattr(resolver, 'task_type')) if isinstance(strategy, CollectiveAllReduceStrategy): self.assertEqual(resolver.task_id, 0) self.assertAllInSet(resolver.task_type, ['chief', 'worker']) - elif isinstance(strategy, TPUStrategy): - # TPUStrategy does not have task_id and task_type applicable. - self.assertIsNone(resolver.task_id) - self.assertIsNone(resolver.task_type) + self.assertIsNone(resolver.rpc_layer) if __name__ == '__main__': ",0,train c182e9bffe6e67878ff602fc06051c4d6021b7d9,tensorflow/tensorflow,"Add decomposition pattern for tfr.quant_act_range op quant_act_range determines proper clipping range, given activation and quantization parameters. PiperOrigin-RevId: 384660997 Change-Id: I850c1f1f8f767fe301c1deceeb3ab03ea9a0cec8",decompose.cc,"@@ -36,6 +36,7 @@ limitations under the License. #include ""mlir/Dialect/StandardOps/IR/Ops.h"" // from @llvm-project #include ""mlir/IR/Attributes.h"" // from @llvm-project #include ""mlir/IR/Builders.h"" // from @llvm-project +#include ""mlir/IR/BuiltinAttributes.h"" // from @llvm-project #include ""mlir/IR/BuiltinOps.h"" // from @llvm-project #include ""mlir/IR/BuiltinTypes.h"" // from @llvm-project #include ""mlir/IR/MLIRContext.h"" // from @llvm-project @@ -77,6 +78,16 @@ namespace TFR { namespace { +// Quantize the float value based on given scale and zero point attributes. +Attribute Quantize(float value, Attribute scale_attr, Attribute zp_attr, + OpBuilder builder) { + double scale = scale_attr.cast().getValueAsDouble(); + int64_t zp = zp_attr.cast().getInt(); + + int quantized = static_cast(std::round(value / scale) + zp); + return builder.getI32IntegerAttr(quantized); +} + // Decompose the TF ops with the registered composition library. struct DecomposeTFOpsPass : public PassWrapper { @@ -111,10 +122,13 @@ struct DecomposeTFOpsPass llvm::Optional external_tfr_module; }; +#include ""tensorflow/compiler/mlir/tfr/passes/generated_decompose.inc"" + void DecomposeTFOpsPass::ApplyCanonicalization() { FuncOp func = getFunction(); OwningRewritePatternList patterns(&getContext()); + populateWithGenerated(patterns); populateCanonicalizationPatterns(func, patterns); (void)applyPatternsAndFoldGreedily(func, std::move(patterns)); ",0,train 4d2627928b737a4332cb4a82e6a110f020a76a65,tensorflow/tensorflow,"Mark certain methods as const PiperOrigin-RevId: 288605569 Change-Id: I8d47e0289815e3b7031c77c352c27adf8bd7af9a",tensor_handle.cc,"@@ -276,7 +276,7 @@ bool TensorHandle::IsReady() const { return is_ready_; } -Status TensorHandle::WaitReady(const char* caller) { +Status TensorHandle::WaitReady(const char* caller) const { if (!IsReady()) { profiler::TraceMe activity(absl::StrCat(caller, "" WaitReady""), profiler::TraceMeLevel::kInfo); @@ -375,7 +375,7 @@ Status TensorHandle::CopyInferenceShape(TensorHandle* other) { return Status::OK(); } -Status TensorHandle::NumDims(int* num_dims) { +Status TensorHandle::NumDims(int* num_dims) const { DCHECK(num_dims != nullptr); if (!IsReady() && !inference_shape_.unknown_rank()) { *num_dims = inference_shape_.dims(); @@ -386,7 +386,7 @@ Status TensorHandle::NumDims(int* num_dims) { } } -Status TensorHandle::Dim(int dim_index, int64* dim) { +Status TensorHandle::Dim(int dim_index, int64* dim) const { DCHECK(dim != nullptr); if (!IsReady() && !inference_shape_.unknown_rank() && inference_shape_.dim_size(dim_index) != -1) { @@ -398,7 +398,7 @@ Status TensorHandle::Dim(int dim_index, int64* dim) { } } -Status TensorHandle::NumElements(int64* num_elements) { +Status TensorHandle::NumElements(int64* num_elements) const { DCHECK(num_elements != nullptr); if (!IsReady() && inference_shape_.IsFullyDefined()) { *num_elements = inference_shape_.num_elements(); ",0,train 4d2627928b737a4332cb4a82e6a110f020a76a65,tensorflow/tensorflow,"Mark certain methods as const PiperOrigin-RevId: 288605569 Change-Id: I8d47e0289815e3b7031c77c352c27adf8bd7af9a",tensor_handle.h,"@@ -124,9 +124,9 @@ class TensorHandle : public core::RefCounted { Device* DeviceOrHostCPU(EagerContext* ctx) const; Status Shape(tensorflow::TensorShape* shape); - Status NumDims(int* num_dims); - Status Dim(int dim_index, int64* dim); - Status NumElements(int64* num_elements); + Status NumDims(int* num_dims) const; + Status Dim(int dim_index, int64* dim) const; + Status NumElements(int64* num_elements) const; #if !defined(IS_MOBILE_PLATFORM) bool HasRemoteMirror(Device* d); @@ -214,7 +214,7 @@ class TensorHandle : public core::RefCounted { // If the contents of the Tensor pointed to by this handle is yet to be // computed by a EagerNode, this function will block till that computation is // done and the handle is ""ready"". - Status WaitReady(const char* caller); + Status WaitReady(const char* caller) const; // TODO(b/136608821): device_ == nullptr iff Host CPU:0 // This was expedient, but perhaps worth revisiting ('device_' should always ",0,train d7fbbc00235c8d0c34de7b34a156fb9c576fb209,tensorflow/tensorflow,"[XLA] Enable truncated normal for double. Fix a problem in testTruncatedNormalIsInRange that causes the test not actually run. Add testTruncatedNormalIsNotConstant for double. PiperOrigin-RevId: 257417015",random_ops_test.py,"@@ -116,12 +116,14 @@ class RandomOpsTest(xla_test.XLATestCase): def rng(dtype): return random_ops.truncated_normal(shape=[2], dtype=dtype) - self._testRngIsNotConstant(rng, dtypes.float32) + # TODO(b/34339814): make this test work with 16 bit float types. + for dtype in self._random_types() & {np.float32, np.float64}: + self._testRngIsNotConstant(rng, dtype) def testTruncatedNormalIsInRange(self): count = 10000000 # TODO(b/34339814): make this test work with 16 bit float types. - for dtype in self._random_types() & {dtypes.float32, dtypes.float64}: + for dtype in self._random_types() & {np.float32, np.float64}: with self.session() as sess: with self.test_scope(): x = random_ops.truncated_normal(shape=[count], dtype=dtype) ",0,train d7fbbc00235c8d0c34de7b34a156fb9c576fb209,tensorflow/tensorflow,"[XLA] Enable truncated normal for double. Fix a problem in testTruncatedNormalIsInRange that causes the test not actually run. Add testTruncatedNormalIsNotConstant for double. PiperOrigin-RevId: 257417015",random_ops.cc,"@@ -293,7 +293,7 @@ class TruncatedNormalOp : public XlaOpKernel { REGISTER_XLA_OP(Name(""TruncatedNormal"") .CompileTimeConstantInput(""shape"") - .TypeConstraint(""dtype"", DT_FLOAT), + .TypeConstraint(""dtype"", {DT_FLOAT, DT_DOUBLE}), TruncatedNormalOp); } // namespace ",0,train 83e49ee10c4ad54f37ff217b3813cc0f96026b75,tensorflow/tensorflow,"Add infrastructure to save the default fusion configuration in either per-edge or per-node mode. PiperOrigin-RevId: 254297785",instruction_fusion.cc,"@@ -455,7 +455,7 @@ StatusOr InstructionFusion::Run(HloModule* module) { module_ = module; int64 fuse_count = 0; std::vector>* fusion_config = nullptr; - if (is_main_fusion_) { + if (config_collection_mode_ != FusionConfigCollection::kOff) { fusion_config = module->mutable_fusion_config(); fusion_config->clear(); } @@ -550,7 +550,7 @@ StatusOr InstructionFusion::Run(HloModule* module) { } } - if (is_main_fusion_) { + if (config_collection_mode_ != FusionConfigCollection::kOff) { const std::vector* comp_fusion_config = fusion_queue->FusionConfiguration(); if (comp_fusion_config && comp_fusion_config->size() > 0) { @@ -559,17 +559,20 @@ StatusOr InstructionFusion::Run(HloModule* module) { } } - if (is_main_fusion_) { - int64 fused_edge_count = 0; + if (config_collection_mode_ != FusionConfigCollection::kOff) { + int64 fused_count = 0; for (auto& config_per_computation : *fusion_config) { for (auto edge : config_per_computation) { - if (edge) ++fused_edge_count; + if (edge) { + ++fused_count; + } } } - VLOG(4) << ""There are "" << fused_edge_count << "" fused edges that cause "" + VLOG(1) << ""There are "" << fused_count << "" fused bits that cause "" << fuse_count << "" fusion actions.""; - VLOG(4) << FusionConfigToString(*fusion_config); + VLOG(1) << FusionConfigToString(*fusion_config); } + VLOG(1) << ""Fusion count: "" << fuse_count; return changed; } ",0,train 83e49ee10c4ad54f37ff217b3813cc0f96026b75,tensorflow/tensorflow,"Add infrastructure to save the default fusion configuration in either per-edge or per-node mode. PiperOrigin-RevId: 254297785",instruction_fusion.h,"@@ -27,6 +27,12 @@ limitations under the License. namespace xla { +enum class FusionConfigCollection { + kOff, // Do not collect configuration. + kPerEdge, // Collect per-edge configuration. + kPerNode, // Collect per-node configuration. +}; + // HLO pass which performs instruction fusion. Instructions are fused // ""vertically"", meaning producing instructions are fused into their consumers // with the intent that the loops which compute their values will be fused in @@ -36,10 +42,12 @@ class InstructionFusion : public HloModulePass { public: explicit InstructionFusion( std::function is_expensive, - bool may_duplicate = true, bool main_fusion = false) + bool may_duplicate = true, + FusionConfigCollection config_collection_mode = + FusionConfigCollection::kOff) : is_expensive_(is_expensive), may_duplicate_(may_duplicate), - is_main_fusion_(main_fusion) {} + config_collection_mode_(config_collection_mode) {} ~InstructionFusion() override = default; absl::string_view name() const override { return ""fusion""; } @@ -123,7 +131,9 @@ class InstructionFusion : public HloModulePass { // Reachability information for the current computation. std::unique_ptr reachability_; - bool is_main_fusion() { return is_main_fusion_; } + FusionConfigCollection config_collection_mode() { + return config_collection_mode_; + } private: // The set of producers whose consumers we cannot fuse into. @@ -156,8 +166,8 @@ class InstructionFusion : public HloModulePass { // Returns whether we may duplicate an instruction if we want to fuse it. bool may_duplicate_; - // Main fusion pass. - bool is_main_fusion_; + // Configuration mode. + FusionConfigCollection config_collection_mode_; TF_DISALLOW_COPY_AND_ASSIGN(InstructionFusion); }; ",0,train 6728f85d82d73c34ce9cb7cf03e311c9965f13f6,tensorflow/tensorflow,"Don't treat `type` objects (with __array__) as ndarrays. PiperOrigin-RevId: 307454154 Change-Id: I6669c41e4dd8256ffd7c4203a1e84ddc2b2f876b",function.py,"@@ -53,6 +53,7 @@ from tensorflow.python.framework import func_graph as func_graph_module from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec +from tensorflow.python.framework import tensor_util from tensorflow.python.framework import type_spec from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -107,32 +108,36 @@ def _make_input_signature_hashable(elem, variable_map=None): return tuple(map(lambda e: _make_input_signature_hashable(e, variable_map), elem)) - # If the element is not hashable, assume it is a weakref to a variable - # and return the dtype & shape. Else, simply return the element try: hash(elem) except TypeError: + # TFE_Py_EncodeArg weakrefs arguments it does not recognize, and we expect + # all recognized types to be hashable. assert isinstance(elem, weakref.ReferenceType) v = elem() - # Check if v is a Variable. Note that we can't use isinstance to check if - # it's a variable, since not all variable types are subclass of Variable. - # TODO(mdan) Update this to use a generic ""Variable"" superclass once we - # create one. - if not (hasattr(v, ""shape"") and hasattr(v, ""dtype"")): - raise ValueError(""Arguments to a tf.function must be Tensors, Variables, "" - ""or hashable Python objects (or nested structures of "" - ""these types).\nGot type: %s"" % type(v).__name__) - - idx = variable_map.get(id(v)) - if idx is None: - idx = len(variable_map) - variable_map[id(v)] = idx - - # We include the class name to avoid having different types of variables - # having the same hash. We Also include the variable index which allows - # us to return a different hash if variables have been aliased in a call. - return v.__class__, tensor_spec.TensorSpec(v.shape, v.dtype), idx + if resource_variable_ops.is_resource_variable(v): + idx = variable_map.get(id(v)) + if idx is None: + idx = len(variable_map) + variable_map[id(v)] = idx + + # We include the class name to avoid having different types of variables + # having the same hash. We Also include the variable index which allows + # us to return a different hash if variables have been aliased in a call. + return v.__class__, tensor_spec.TensorSpec(v.shape, v.dtype), idx + + if _is_ndarray(v): + # Numpy arrays are not hashable, but when calling functions we treat them + # in the same way as tf.Tensors. + if not hasattr(v, ""shape"") or not hasattr(v, ""dtype""): + # TODO(tomhennigan) De-dup with _as_ndarray in _convert_numpy_inputs. + v = _as_ndarray(v) + return tensor_spec.TensorSpec(v.shape, v.dtype) + + raise ValueError(""Arguments to a tf.function must be Tensors, Variables, "" + ""or hashable Python objects (or nested structures of "" + ""these types).\nGot type: %s"" % type(v).__name__) return elem @@ -2668,6 +2673,24 @@ class FunctionSpec(object): return inputs, {} +def _as_ndarray(value): + """"""Converts value to an ndarray, assumes _is_ndarray(value)."""""" + # TODO(tomhennigan) Support __array_interface__ too. + return value.__array__() + + +def _is_ndarray(value): + """"""Tests whether the given value is an ndarray (and not a TF tensor/var)."""""" + # TODO(tomhennigan) Support __array_interface__ too. + return hasattr(value, ""__array__"") and not ( + resource_variable_ops.is_resource_variable(value) + or tensor_util.is_tensor(value) + # For legacy reasons we do not automatically promote Numpy strings. + or isinstance(value, np.str_) + # NumPy dtypes have __array__ as unbound methods. + or isinstance(value, type)) + + def _convert_numpy_inputs(inputs): """"""Convert numpy array inputs to tensors."""""" # We assume that any CompositeTensors have already converted their components @@ -2680,8 +2703,12 @@ def _convert_numpy_inputs(inputs): # possible since ndarrays are not hashable). need_packing = False for index, value in enumerate(flat_inputs): - if type(value) == np.ndarray: - flat_inputs[index] = constant_op.constant(value) + if _is_ndarray(value): + a = _as_ndarray(value) + if not isinstance(a, np.ndarray): + raise TypeError(""The output of __array__ must be an np.ndarray "" + ""(got {} from {})."".format(type(a), type(value))) + flat_inputs[index] = constant_op.constant(a) need_packing = True if need_packing: return nest.pack_sequence_as( ",0,test 6728f85d82d73c34ce9cb7cf03e311c9965f13f6,tensorflow/tensorflow,"Don't treat `type` objects (with __array__) as ndarrays. PiperOrigin-RevId: 307454154 Change-Id: I6669c41e4dd8256ffd7c4203a1e84ddc2b2f876b",function_test.py,"@@ -775,11 +775,44 @@ class FunctionTest(test.TestCase, parameterized.TestCase): # shouldn't trigger another function definition. self.assertLen(total_function_cache(defined), 1) + np_ones = numpy.ones([], numpy.float32) + np_zeros = numpy.zeros([], numpy.float32) + tf_ones = array_ops.ones([]) + tf_zeros = array_ops.zeros([]) + # Test that the numpy array is properly an argument to the graph function. - self.assertEqual(1., defined(numpy.ones([])).numpy()) - self.assertEqual(0., defined(numpy.zeros([])).numpy()) - self.assertEqual(1., defined(array_ops.ones([])).numpy()) - self.assertEqual(0., defined(array_ops.zeros([])).numpy()) + self.assertEqual(1., defined(np_ones).numpy()) + self.assertLen(total_function_cache(defined), 2) + self.assertEqual(0., defined(np_zeros).numpy()) + self.assertEqual(1., defined(tf_ones).numpy()) + self.assertEqual(0., defined(tf_zeros).numpy()) + self.assertLen(total_function_cache(defined), 2) + + # Test that mutable inputs are supported. + mutable = numpy.ones([], numpy.float32) + self.assertEqual(1., defined(mutable).numpy()) + mutable.fill(0) + self.assertEqual(0., defined(mutable).numpy()) + + class MyNdarray(numpy.ndarray): + pass + + # Test that the subclasses of ndarray are converted too. + self.assertEqual(1., defined(np_ones.view(MyNdarray)).numpy()) + self.assertEqual(0., defined(np_zeros.view(MyNdarray)).numpy()) + + # We should not have triggered any re-tracing of the python function. + self.assertLen(total_function_cache(defined), 2) + + def testNumpyDtypeInputSupported(self): + @function.defun + def f(x, dtype): + return constant_op.constant(dtype(x)) + + self.assertEqual(f(1, numpy.float32).numpy(), numpy.float32(1)) + self.assertEqual(f(2, numpy.float32).numpy(), numpy.float32(2)) + self.assertEqual(f(1, numpy.int32).numpy(), numpy.int32(1)) + self.assertEqual(f(2, numpy.int32).numpy(), numpy.int32(2)) def testDefunNumpyArraysConvertedToTensorsInKwargs(self): ",0,test c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",chlo_ops.h,"@@ -32,8 +32,14 @@ namespace mlir { namespace chlo { class HloClientDialect : public Dialect { + void initialize(); + public: - explicit HloClientDialect(MLIRContext *context); + explicit HloClientDialect(MLIRContext *context) + : Dialect(getDialectNamespace(), context, + TypeID::get()) { + initialize(); + } static StringRef getDialectNamespace() { return ""chlo""; } }; ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",chlo_ops.cc,"@@ -266,8 +266,7 @@ BROADCAST_BINARY_OP_DEFS(BroadcastXorOp); // chlo Dialect Constructor //===----------------------------------------------------------------------===// -HloClientDialect::HloClientDialect(MLIRContext* context) - : Dialect(getDialectNamespace(), context) { +void HloClientDialect::initialize() { addOperations< #define GET_OP_LIST #include ""mlir-hlo/Dialect/mhlo/IR/chlo_ops.cc.inc"" ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",hlo_ops.cc,"@@ -2188,7 +2188,7 @@ struct HLOInlinerInterface : public DialectInlinerInterface { //===----------------------------------------------------------------------===// MhloDialect::MhloDialect(MLIRContext* context) - : Dialect(getDialectNamespace(), context) { + : Dialect(getDialectNamespace(), context, TypeID::get()) { addOperations< #define GET_OP_LIST #include ""mlir-hlo/Dialect/mhlo/IR/hlo_ops.cc.inc"" ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",lhlo_ops.cc,"@@ -49,7 +49,7 @@ namespace mlir { namespace lmhlo { LmhloDialect::LmhloDialect(MLIRContext *context) - : Dialect(getDialectNamespace(), context) { + : Dialect(getDialectNamespace(), context, TypeID::get()) { addOperations< #define GET_OP_LIST #include ""mlir-hlo/Dialect/mhlo/IR/lhlo_ops.cc.inc"" ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",tfl_ops.cc,"@@ -269,7 +269,7 @@ struct TensorFlowLiteOpFolderDialectInterface }; TensorFlowLiteDialect::TensorFlowLiteDialect(mlir::MLIRContext *context) - : Dialect(/*name=*/""tfl"", context) { + : Dialect(/*name=*/""tfl"", context, TypeID::get()) { addOperations< #define GET_OP_LIST #include ""tensorflow/compiler/mlir/lite/ir/tfl_ops.cc.inc"" ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",tf_device.cc,"@@ -101,7 +101,8 @@ bool BlockWrapsSingleOp(Block* block) { } // end anonymous namespace TensorFlowDeviceDialect::TensorFlowDeviceDialect(MLIRContext* context) - : Dialect(/*name=*/""tf_device"", context) { + : Dialect(/*name=*/""tf_device"", context, + TypeID::get()) { addOperations< #define GET_OP_LIST #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc.inc"" ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",tf_executor.cc,"@@ -92,7 +92,8 @@ struct TensorFlowExecutorOpFolderDialectInterface } // namespace TensorFlowExecutorDialect::TensorFlowExecutorDialect(MLIRContext *context) - : Dialect(/*name=*/""tf_executor"", context) { + : Dialect(/*name=*/""tf_executor"", context, + TypeID::get()) { addOperations< #define GET_OP_LIST #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc.inc"" ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",tf_ops.cc,"@@ -188,7 +188,7 @@ std::vector new std::vector(); TensorFlowDialect::TensorFlowDialect(MLIRContext *context) - : Dialect(/*name=*/""tf"", context) { + : Dialect(/*name=*/""tf"", context, TypeID::get()) { addOperations< #define GET_OP_LIST #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_all_ops.cc.inc"" ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",tf_saved_model.cc,"@@ -113,7 +113,8 @@ static LogicalResult Verify(SessionInitializerOp session_initializer) { //===----------------------------------------------------------------------===// TensorFlowSavedModelDialect::TensorFlowSavedModelDialect(MLIRContext *context) - : Dialect(/*name=*/""tf_saved_model"", context) { + : Dialect(/*name=*/""tf_saved_model"", context, + TypeID::get()) { addOperations< #define GET_OP_LIST #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc.inc"" ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",tfjs_ops.cc,"@@ -25,8 +25,7 @@ namespace tfjs { // TFJSDialect //===----------------------------------------------------------------------===// -TFJSDialect::TFJSDialect(MLIRContext *context) - : Dialect(getDialectNamespace(), context) { +void TFJSDialect::initialize() { addOperations< #define GET_OP_LIST #include ""tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.cc.inc"" ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",cubin_creator.cc,"@@ -278,7 +278,8 @@ StatusOr> tensorflow::kernel_gen::GenerateCubinForTfCode( mlir::OwningModuleRef kernel_module = xla::mlir_gpu::ExtractKernelModule(*module).ValueOrDie(); - auto llvmModule = mlir::translateModuleToNVVMIR(*kernel_module); + llvm::LLVMContext llvmContext; + auto llvmModule = mlir::translateModuleToNVVMIR(*kernel_module, llvmContext); if (!llvmModule) { return InternalError(""Could not translate MLIR module to NVVM""); } ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",tf_framework_ops.cc,"@@ -24,8 +24,7 @@ namespace mlir { namespace kernel_gen { namespace tf_framework { -TFFrameworkDialect::TFFrameworkDialect(MLIRContext *context) - : Dialect(getDialectNamespace(), context) { +void TFFrameworkDialect::initialize() { addOperations< #define GET_OP_LIST #include ""tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc.inc"" ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",cpu_compiler.cc,"@@ -622,10 +622,9 @@ StatusOr> CpuCompiler::RunBackend( // Compile must be thread-safe so create a new LLVM context for the module. mlir::MLIRContext mlir_context; - auto llvm_module = absl::make_unique( - ""__compute_module"", - mlir_context.getRegisteredDialect() - ->getLLVMContext()); + llvm::LLVMContext llvm_context; + auto llvm_module = + absl::make_unique(""__compute_module"", llvm_context); auto jit = absl::make_unique( CompilerTargetOptions(module->config()), @@ -834,10 +833,8 @@ CpuCompiler::CompileAheadOfTime(std::unique_ptr module_group, // Compile must be thread-safe so create a new LLVM context for the module. mlir::MLIRContext mlir_context; - llvm::Module llvm_module( - ""__compute_module"", - mlir_context.getRegisteredDialect() - ->getLLVMContext()); + llvm::LLVMContext llvm_context; + llvm::Module llvm_module(""__compute_module"", llvm_context); llvm_module.setDataLayout(target_machine->createDataLayout()); llvm_module.setTargetTriple(triple.getTriple()); if (pic_level != llvm::PICLevel::NotPIC) { ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",mlir_emitter.cc,"@@ -32,7 +32,8 @@ namespace cpu { namespace { // Lower an MLIR module to an LLVM module. -std::unique_ptr MakeLLVMModule(mlir::OwningModuleRef module) { +std::unique_ptr MakeLLVMModule(mlir::OwningModuleRef module, + llvm::LLVMContext *context) { // When set, the LLVM backend will be allowed to reassociate floating-point // reductions, which enables much more efficient ""horizontal"" SIMD // implementations. @@ -47,7 +48,7 @@ std::unique_ptr MakeLLVMModule(mlir::OwningModuleRef module) { mlir::LowerVectorToLLVMOptions().setReassociateFPReductions( kReassociateFPReductions))); CHECK(succeeded(manager.run(*module))); - return mlir::translateModuleToLLVMIR(*module); + return mlir::translateModuleToLLVMIR(*module, *context); } // Get arguments to pass a memref to an mlir function. @@ -114,7 +115,8 @@ Status EmitMlirFuncAndCall( emitter(&op_builder, function); // Now link it all into the main LLVM module. - auto mlir_llvm_module = MakeLLVMModule(std::move(mlir_module)); + auto mlir_llvm_module = + MakeLLVMModule(std::move(mlir_module), &b->getContext()); mlir_llvm_module->setDataLayout(llvm_module->getDataLayout()); llvm::Linker::linkModules( *llvm_module, std::move(mlir_llvm_module), llvm::Linker::None, ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",xla_thunks_ops.cc,"@@ -28,7 +28,7 @@ namespace mlir { namespace xla_thunks { XLAThunksDialect::XLAThunksDialect(MLIRContext *context) - : Dialect(getDialectNamespace(), context) { + : Dialect(getDialectNamespace(), context, TypeID::get()) { addOperations< #define GET_OP_LIST #include ""tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.cc.inc"" ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",lhlo_dialect_emitter.cc,"@@ -205,7 +205,7 @@ LhloDialectEmitter::LhloDialectEmitter( platform_(platform) { LLVMDialect* llvmDialect = mlir_module.getContext()->getRegisteredDialect(); - pointer_size_ = llvmDialect->getLLVMModule().getDataLayout().getPointerSize(); + pointer_size_ = llvmDialect->getDataLayout().getPointerSize(); } void LhloDialectEmitter::AddThunkToThunkSequence(std::unique_ptr thunk) { ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",mlir_compiler.cc,"@@ -30,18 +30,14 @@ namespace { using ::mlir::MLIRContext; using ::mlir::LLVM::LLVMDialect; -int64 ConfigureLLVMModuleAndGetPointerSize(MLIRContext* context) { +int64 GetPointerSize(MLIRContext* context) { LLVMDialect* dialect = context->getRegisteredDialect(); - llvm::Module& module = dialect->getLLVMModule(); - module.setTargetTriple(gpu::nvptx::kTargetTriple); - module.setDataLayout(gpu::nvptx::kDataLayout); - return module.getDataLayout().getPointerSize(); + return dialect->getDataLayout().getPointerSize(); } } // namespace -MlirCompiler::MlirCompiler() - : pointer_size_(ConfigureLLVMModuleAndGetPointerSize(&context_)) {} +MlirCompiler::MlirCompiler() : pointer_size_(GetPointerSize(&context_)) {} se::Platform::Id MlirCompiler::PlatformId() const { return stream_executor::cuda::kCudaPlatformId; ",0,train c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",mlir_compiler_impl.cc,"@@ -18,6 +18,7 @@ limitations under the License. #include #include ""absl/container/flat_hash_map.h"" +#include ""llvm/IR/LLVMContext.h"" #include ""mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"" // from @llvm-project #include ""mlir/Dialect/GPU/GPUDialect.h"" // from @llvm-project #include ""mlir/Dialect/LLVMIR/LLVMDialect.h"" // from @llvm-project @@ -543,7 +544,11 @@ StatusOr> MlirCompilerImpl::RunBackend( TF_RETURN_IF_ERROR( module_hook_.invoke(IRHook::LoweringStage::KERNEL, *kernel_module)); - auto llvmModule = mlir::translateModuleToNVVMIR(*kernel_module); + // Translate to LLVM IR in a fresh context. The module is further translated + // to textual PTX and a CUBIN blob so there is no need for the context to live + // longer than this function. + llvm::LLVMContext llvmContext; + auto llvmModule = mlir::translateModuleToNVVMIR(*kernel_module, llvmContext); if (!llvmModule) { return InternalError(""Translation to LLVM failed""); ",0,train 4a7fedd035d446dbf5af689ff64053815faa3ec7,tensorflow/tensorflow,"[XLA:SPMD] Do not shard replicated 1D broadcast if its size is relatevely small. PiperOrigin-RevId: 425733363 Change-Id: I134c578a1b173d3f1bda7b269b78001619e7e362",sharding_propagation.cc,"@@ -726,6 +726,16 @@ bool InferShardingFromUsers( false)) { return false; } + + // TODO(b/214615180): Remove this special handing after a general solution. + // If the replicated broadcast is 1D and the size is relative small, + // no need to shard it. + if (is_spmd && instruction->opcode() == HloOpcode::kBroadcast && + instruction->has_sharding() && instruction->sharding().IsReplicated() && + instruction->shape().IsArray() && instruction->shape().rank() == 1 && + instruction->shape().dimensions(0) <= 128) { + return false; + } bool improved_sharding = false; const bool may_combine_partial_sharding = is_spmd && aggressiveness > 0; for (const HloInstruction* user : instruction->users()) { ",0,train 4a7fedd035d446dbf5af689ff64053815faa3ec7,tensorflow/tensorflow,"[XLA:SPMD] Do not shard replicated 1D broadcast if its size is relatevely small. PiperOrigin-RevId: 425733363 Change-Id: I134c578a1b173d3f1bda7b269b78001619e7e362",sharding_propagation_test.cc,"@@ -427,6 +427,31 @@ ENTRY %broadcast { } } +TEST_P(ParameterizedMetadataTest, Broadcast1DBackwardNoChange) { + const char* const hlo_string = R""( +HloModule module +ENTRY %broadcast { + %param0 = s32[128]{0} parameter(0) + %constant0 = s32[] constant(0), sharding={replicated} + %broadcast = s32[128]{0} broadcast(%constant0), dimensions={}, sharding={replicated} + ROOT %compare = pred[128]{0} compare(s32[128]{0} %param0, s32[128]{0} %broadcast), + direction=NE, sharding={devices=[4]0,1,2,3} +})""; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + if (GetParam().clear_metadata) { + ClearMetadata(module.get()); + } + TF_ASSERT_OK_AND_ASSIGN( + bool changed, + ShardingPropagation(/*is_spmd=*/false, GetParam().propagate_metadata) + .Run(module.get())); + EXPECT_FALSE(changed); + auto* instruction = FindInstruction(module.get(), ""broadcast""); + ASSERT_NE(instruction, nullptr); + EXPECT_THAT(instruction, op::Sharding(""{replicated}"")); +} + TEST_P(ParameterizedMetadataTestWithOutput, BroadcastForwardPartial) { const char* const hlo_string = R""( HloModule module ",0,train 77535c387165d9c27cf9617b33a882b6ceae05bd,tensorflow/tensorflow,"Integrate LLVM at https://github.com/llvm/llvm-project/commit/7d9518c8000b PiperOrigin-RevId: 319229381 Change-Id: Ibc22f7a1474a8354c5c023346de2fe3ce80ff460",tf_executor.h,"@@ -53,7 +53,7 @@ enum Kind { // The Control type is a token-like value that models control dependencies from // TensorFlow graphs. -class ControlType : public Type::TypeBase { +class ControlType : public Type::TypeBase { public: using Base::Base; @@ -65,7 +65,7 @@ class ControlType : public Type::TypeBase { static bool kindof(unsigned kind) { return kind == TFTypes::Control; } }; -class TokenType : public Type::TypeBase { +class TokenType : public Type::TypeBase { public: using Base::Base; ",0,test 77535c387165d9c27cf9617b33a882b6ceae05bd,tensorflow/tensorflow,"Integrate LLVM at https://github.com/llvm/llvm-project/commit/7d9518c8000b PiperOrigin-RevId: 319229381 Change-Id: Ibc22f7a1474a8354c5c023346de2fe3ce80ff460",tf_types.h,"@@ -110,9 +110,10 @@ namespace detail { // - `static unsigned getTypeKind()` that returns the (fixed) kind of the // type. template -class TensorFlowTypeImpl : public Type::TypeBase { +class TensorFlowTypeImpl + : public Type::TypeBase { public: - using Base = typename Type::TypeBase; + using Base = typename Type::TypeBase; using TFBase = TensorFlowTypeImpl; using Base::Base; ",0,test 77535c387165d9c27cf9617b33a882b6ceae05bd,tensorflow/tensorflow,"Integrate LLVM at https://github.com/llvm/llvm-project/commit/7d9518c8000b PiperOrigin-RevId: 319229381 Change-Id: Ibc22f7a1474a8354c5c023346de2fe3ce80ff460",hlo_ops.h,"@@ -62,7 +62,7 @@ enum Kind { }; } // namespace HLOTypes -class TokenType : public Type::TypeBase { +class TokenType : public Type::TypeBase { public: using Base::Base; ",0,test fc2d7fdacb35001e9b98ff8b844679985bbf61a4,tensorflow/tensorflow,"[Executor] Reorganize code in `ExecutorState::NodeDone()` for efficiency. Executor microbenchmarks show a 3.22% to 4.16% improvement with this change, which avoids re-checking the status multiple times in the non-error case. PiperOrigin-RevId: 304719934 Change-Id: I6a9e3d1db8b13f32eb558a57fcb272c07ba1079a",executor.cc,"@@ -316,6 +316,8 @@ class ExecutorState { // nodes in 'ready' into 'inline_ready'. // // This method will clear `*ready` before returning. + // + // REQUIRES: `!ready->empty()`. void ScheduleReady(TaggedNodeSeq* ready, TaggedNodeReadyQueue* inline_ready); // Clean up when this executor is done. @@ -1022,73 +1024,80 @@ template bool ExecutorState::NodeDone( const Status& s, TaggedNodeSeq* ready, NodeExecStatsInterface* stats, TaggedNodeReadyQueue* inline_ready) { - nodestats::SetAllEnd(stats); if (stats) { - if (stats_collector_) { - stats->Done(immutable_state_.params().device->name()); + nodestats::SetAllEnd(stats); + DCHECK_NE(stats_collector_, nullptr); + stats->Done(immutable_state_.params().device->name()); + } + + if (TF_PREDICT_TRUE(s.ok())) { + const size_t ready_size = ready->size(); + if (ready_size == 0) { + return num_outstanding_ops_.fetch_sub(1) == 1; } else { - delete stats; + // NOTE: Avoid touching the atomic counter if only one node becomes ready. + if (ready_size > 1) { + num_outstanding_ops_.fetch_add(ready_size - 1, + std::memory_order_relaxed); + } + + // Schedule the ready nodes in 'ready'. + ScheduleReady(ready, inline_ready); + + return false; } - } + } else { + bool abort_run = false; - bool abort_run = false; - if (!s.ok()) { // Some error happened. This thread of computation is done. - mutex_lock l(mu_); - if (status_.ok()) { - abort_run = true; - - // If execution has been cancelled, mark any new errors as being derived. - // This ensures any errors triggered by cancellation are marked as - // derived. - if (cancellation_manager_ && cancellation_manager_->IsCancelled()) { - status_ = StatusGroup::MakeDerived(s); - } else { - status_ = s; + { + mutex_lock l(mu_); + if (status_.ok()) { + // If this is the first node to fail in this run, we are responsible for + // aborting all other execution in the step. + abort_run = true; + + // If execution has been cancelled, mark any new errors as being + // derived. This ensures any errors triggered by cancellation are marked + // as derived. + if (cancellation_manager_ && cancellation_manager_->IsCancelled()) { + status_ = StatusGroup::MakeDerived(s); + } else { + status_ = s; + } } } - } - if (abort_run) { - TRACEPRINTF(""StartAbort: %s"", s.ToString().c_str()); - if (cancellation_manager_) { - // only log when the abort happens during the actual run time. - auto device_name = immutable_state_.params().device->name(); - // Use VLOG instead of LOG(warning) because error status is expected when - // the executor is run under the grappler optimization phase or when - // iterating through a tf.data input pipeline. - VLOG(1) << ""["" << device_name << ""] Executor start aborting: "" << s; - } - if (rendezvous_) { - rendezvous_->StartAbort(s); - } - if (collective_executor_) { - collective_executor_->StartAbort(s); - } - if (cancellation_manager_) { - cancellation_manager_->StartCancel(); - } - } + if (abort_run) { + TRACEPRINTF(""StartAbort: %s"", s.ToString().c_str()); + if (cancellation_manager_) { + // Only log when the abort happens during the actual run time. + // Use VLOG instead of LOG(warning) because error status is expected + // when the executor is run under the grappler optimization phase or + // when iterating through a tf.data input pipeline. + VLOG(1) << ""["" << immutable_state_.params().device->name() + << ""] Executor start aborting: "" << s; + } - bool completed = false; - const size_t ready_size = ready->size(); - if (ready_size == 0 || !s.ok()) { - completed = (num_outstanding_ops_.fetch_sub(1) == 1); - } else if (ready_size > 1) { - num_outstanding_ops_.fetch_add(ready_size - 1, std::memory_order_relaxed); - } + if (rendezvous_) { + rendezvous_->StartAbort(s); + } + if (collective_executor_) { + collective_executor_->StartAbort(s); + } + if (cancellation_manager_) { + cancellation_manager_->StartCancel(); + } + } - // Schedule the ready nodes in 'ready'. - if (s.ok()) { - ScheduleReady(ready, inline_ready); + return num_outstanding_ops_.fetch_sub(1) == 1; } - return completed; } template void ExecutorState::ScheduleReady( TaggedNodeSeq* ready, TaggedNodeReadyQueue* inline_ready) { - if (ready->empty()) return; + DCHECK(!ready->empty()); int64 scheduled_nsec = 0; if (stats_collector_) { ",0,train 0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>. We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible. Refactoring only, NFC intended. PiperOrigin-RevId: 336080771 Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",cpu_device.cc,"@@ -28,7 +28,7 @@ CpuDevice::CpuDevice(int id, : PjRtDevice(id, std::move(local_device_state), kCpuPlatformName, /*device_kind=*/kCpuPlatformName) {} -StatusOr> GetCpuClient(bool asynchronous) { +StatusOr> GetCpuClient(bool asynchronous) { TF_ASSIGN_OR_RETURN(se::Platform * platform, PlatformUtil::GetPlatform(""Host"")); if (platform->VisibleDeviceCount() <= 0) { @@ -56,7 +56,7 @@ StatusOr> GetCpuClient(bool asynchronous) { devices.push_back(std::move(device)); } - return std::make_shared( + return std::make_unique( kCpuPlatformName, client, std::move(devices), /*host_id=*/0, /*allocator=*/nullptr, /*host_memory_allocator=*/nullptr, /*should_stage_host_to_device_transfers=*/false, ",0,test 0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>. We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible. Refactoring only, NFC intended. PiperOrigin-RevId: 336080771 Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",cpu_device.h,"@@ -28,7 +28,7 @@ class CpuDevice : public PjRtDevice { CpuDevice(int id, std::unique_ptr local_device_state); }; -StatusOr> GetCpuClient(bool asynchronous); +StatusOr> GetCpuClient(bool asynchronous); } // namespace xla ",0,test 0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>. We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible. Refactoring only, NFC intended. PiperOrigin-RevId: 336080771 Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",gpu_multistream_test.cc,"@@ -28,7 +28,7 @@ namespace { // computation wait for the inputs to be produced before executing. TEST(GpuMultiStream, Basics) { TF_ASSERT_OK_AND_ASSIGN( - std::shared_ptr client, + std::unique_ptr client, GetNvidiaGpuClient(/*asynchronous=*/true, GpuAllocatorConfig(), /*distributed_client=*/nullptr, /*node_id=*/0)); ",0,test 0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>. We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible. Refactoring only, NFC intended. PiperOrigin-RevId: 336080771 Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",interpreter_device.cc,"@@ -28,7 +28,7 @@ InterpreterDevice::InterpreterDevice( : PjRtDevice(id, std::move(local_device_state), kInterpreterPlatformName, /*device_kind=*/kInterpreterPlatformName) {} -StatusOr> GetInterpreterClient() { +StatusOr> GetInterpreterClient() { TF_ASSIGN_OR_RETURN(se::Platform * platform, PlatformUtil::GetPlatform(""Interpreter"")); if (platform->VisibleDeviceCount() != 1) { @@ -50,7 +50,7 @@ StatusOr> GetInterpreterClient() { absl::make_unique(0, std::move(device_state)); devices.push_back(std::move(device)); - return std::make_shared( + return std::make_unique( kInterpreterPlatformName, client, std::move(devices), /*host_id=*/0, /*allocator=*/nullptr, /*host_memory_allocator=*/nullptr, /*should_stage_host_to_device_transfers=*/false, ",0,test 0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>. We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible. Refactoring only, NFC intended. PiperOrigin-RevId: 336080771 Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",interpreter_device.h,"@@ -29,7 +29,7 @@ class InterpreterDevice : public PjRtDevice { std::unique_ptr local_device_state); }; -StatusOr> GetInterpreterClient(); +StatusOr> GetInterpreterClient(); } // namespace xla ",0,test 0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>. We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible. Refactoring only, NFC intended. PiperOrigin-RevId: 336080771 Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",nvidia_gpu_device.cc,"@@ -301,7 +301,7 @@ GpuDevice::GpuDevice(int id, : PjRtDevice(id, std::move(local_device_state), kGpuPlatformName, std::move(device_kind), node_id) {} -StatusOr> GetNvidiaGpuClient( +StatusOr> GetNvidiaGpuClient( bool asynchronous, const GpuAllocatorConfig& allocator_config, std::shared_ptr distributed_client, int node_id) { TF_ASSIGN_OR_RETURN(LocalClient * xla_client, GetGpuXlaClient()); @@ -324,13 +324,12 @@ StatusOr> GetNvidiaGpuClient( devices = BuildLocalDevices(std::move(local_device_states)); } - std::shared_ptr pyclient = std::make_shared( + return std::unique_ptr(std::make_unique( ""gpu"", xla_client, std::move(devices), /*node_id=*/node_id, std::move(allocator), std::move(host_memory_allocator), /*should_stage_host_to_device_transfers=*/true, - /*gpu_run_options=*/std::move(gpu_run_options)); - return pyclient; + /*gpu_run_options=*/std::move(gpu_run_options))); } } // namespace xla ",0,test 0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>. We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible. Refactoring only, NFC intended. PiperOrigin-RevId: 336080771 Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",nvidia_gpu_device.h,"@@ -53,7 +53,7 @@ struct GpuAllocatorConfig { // distributed_client may be nullptr in non-distributed settings. // distributed_client should not be Open()ed before calling this function. -StatusOr> GetNvidiaGpuClient( +StatusOr> GetNvidiaGpuClient( bool asynchronous, const GpuAllocatorConfig& allocator_config, std::shared_ptr distributed_client, int node_id); ",0,test 0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>. We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible. Refactoring only, NFC intended. PiperOrigin-RevId: 336080771 Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",py_client.cc,"@@ -30,6 +30,8 @@ namespace xla { namespace py = pybind11; namespace pprof = tensorflow::tfprof::pprof; +PyClient::PyClient(std::unique_ptr pjrt_client) + : pjrt_client_(std::move(pjrt_client)) {} PyClient::PyClient(std::shared_ptr pjrt_client) : pjrt_client_(std::move(pjrt_client)) {} ",0,test 0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>. We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible. Refactoring only, NFC intended. PiperOrigin-RevId: 336080771 Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",py_client.h,"@@ -88,6 +88,7 @@ ClientAndPtr WrapWithClient(std::shared_ptr client, T* contents) { // We use a wrapper class to add Python-specific functionality. class PyClient : public std::enable_shared_from_this { public: + explicit PyClient(std::unique_ptr pjrt_client); explicit PyClient(std::shared_ptr pjrt_client); PjRtClient* pjrt_client() const { return pjrt_client_.get(); } ",0,test 0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>. We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible. Refactoring only, NFC intended. PiperOrigin-RevId: 336080771 Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",xla.cc,"@@ -556,13 +556,13 @@ PYBIND11_MODULE(xla_extension, m) { m.def( ""get_cpu_client"", [](bool asynchronous) -> StatusOr> { - TF_ASSIGN_OR_RETURN(std::shared_ptr client, + TF_ASSIGN_OR_RETURN(std::unique_ptr client, GetCpuClient(asynchronous)); return std::make_shared(std::move(client)); }, py::arg(""asynchronous"") = true); m.def(""get_interpreter_client"", []() -> StatusOr> { - TF_ASSIGN_OR_RETURN(std::shared_ptr client, + TF_ASSIGN_OR_RETURN(std::unique_ptr client, GetInterpreterClient()); return std::make_shared(std::move(client)); }); @@ -572,7 +572,7 @@ PYBIND11_MODULE(xla_extension, m) { std::shared_ptr distributed_client, int node_id) -> StatusOr> { TF_ASSIGN_OR_RETURN( - std::shared_ptr client, + std::unique_ptr client, GetNvidiaGpuClient(asynchronous, allocator_config, std::move(distributed_client), node_id)); return std::make_shared(std::move(client)); ",0,test 2f3eb7b5c2fd927ec2b21ae972a39788cdce89c4,tensorflow/tensorflow,"Nest shouldn't barf on ranges. PiperOrigin-RevId: 246185019",def_function_test.py,"@@ -17,6 +17,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from six.moves import range + import functools import weakref @@ -113,6 +115,14 @@ class DefFunctionTest(test.TestCase): with self.assertRaises(ValueError): fn(1.0) + def testRange(self): + + @def_function.function + def f(unused_x): + return 1.0 + + self.assertAllEqual(f(range(5)), 1.0) + def testCorrectVariableCreation(self): state = [] ",0,train 2f3eb7b5c2fd927ec2b21ae972a39788cdce89c4,tensorflow/tensorflow,"Nest shouldn't barf on ranges. PiperOrigin-RevId: 246185019",nest.py,"@@ -130,6 +130,8 @@ def _sequence_like(instance, args): elif _is_composite_tensor(instance): metadata = instance._component_metadata() # pylint: disable=protected-access return type(instance)._from_components(args, metadata) # pylint: disable=protected-access + elif isinstance(instance, _six.moves.range): + return _sequence_like(list(instance), args) else: # Not a namedtuple return type(instance)(args) ",0,train e50ff7aac1f7f98fd103950dcdfbc53804a6b0d4,tensorflow/tensorflow,Enhance the comment for RunDatasetOp(),dataset_test_base.h,"@@ -771,7 +771,11 @@ class DatasetOpsTestBaseV2 : public DatasetOpsTestBase { std::unique_ptr* iterator); // Runs the dataset operation according to the predefined dataset params and - // produces outputs. + // produces outputs. Different from `MakeDataset()` which returns a Dataset + // object, `RunDatasetOp()` executes the dataset kernel based on the input + // DatasetParams and returns the produced outputs as a tensor vector. It can + // be used to run some dataset operations that do not have an internal + // customized `Dataset` class (e.g. `ReduceDatasetOp`). Status RunDatasetOp(const DatasetParams& dataset_params, std::vector* outputs); ",0,train ab1478d380a3f91b75a43b1d452e63b7ab3a0868,tensorflow/tensorflow,"Work around the outside compilation / resource variable issue. PiperOrigin-RevId: 335441457 Change-Id: Id9055f26d786f8d9936edc0312f73e86a420e56a",tensor_tracer.py,"@@ -1494,7 +1494,7 @@ class TensorTracer(object): flush_op = tpu.outside_compilation( _flush_fun, cache_val, self._replica_id, - training_util.get_or_create_global_step()) + array_ops.identity(training_util.get_or_create_global_step())) else: flush_op = _flush_fun(cache_val, self._replica_id, training_util.get_or_create_global_step()) ",0,test eb3fcf1cb36297d58a1bb1e7c9fa79370894d1bd,tensorflow/tensorflow,"Extend experimental_get_compiler_ir to get serialized HLO modules. PiperOrigin-RevId: 354030850 Change-Id: I57c1a5445614eab200ad0e86af668c15cd7718d7",get_compiler_ir.cc,"@@ -119,7 +119,8 @@ xla::StatusOr GetCompilerIr( TF_RETURN_IF_ERROR(args.status()); switch (stage) { - case IrExportStage::HLO: { + case IrExportStage::HLO: + case IrExportStage::HLO_SERIALIZED: { XlaCompiler::CompilationResult result; TF_RETURN_IF_ERROR( compiler.CompileFunction(compile_options, function, *args, &result)); @@ -131,13 +132,23 @@ xla::StatusOr GetCompilerIr( std::unique_ptr new_module, xla::HloModule::CreateFromProto(result.computation->proto(), config)); - return new_module->ToString(); + if (stage == IrExportStage::HLO_SERIALIZED) { + return new_module->ToProto().SerializeAsString(); + } else { + return new_module->ToString(); + } } - case IrExportStage::OPTIMIZED_HLO: { + case IrExportStage::OPTIMIZED_HLO: + case IrExportStage::OPTIMIZED_HLO_SERIALIZED: { xla::StatusOr executable = GetLocalExecutable( options, compile_options, function, cache, *args, compiler); TF_RETURN_IF_ERROR(executable.status()); - return (*executable)->executable()->module().ToString(); + xla::Executable* new_executable = (*executable)->executable(); + if (stage == IrExportStage::OPTIMIZED_HLO_SERIALIZED) { + return new_executable->module().ToProto().SerializeAsString(); + } else { + return new_executable->module().ToString(); + } } case IrExportStage::OPTIMIZED_HLO_DOT: { xla::StatusOr executable = GetLocalExecutable( ",0,train eb3fcf1cb36297d58a1bb1e7c9fa79370894d1bd,tensorflow/tensorflow,"Extend experimental_get_compiler_ir to get serialized HLO modules. PiperOrigin-RevId: 354030850 Change-Id: I57c1a5445614eab200ad0e86af668c15cd7718d7",get_compiler_ir.h,"@@ -27,10 +27,16 @@ class Tensor; class TensorHandle; class EagerContext; -enum class IrExportStage { HLO, OPTIMIZED_HLO, OPTIMIZED_HLO_DOT }; - -// Returns HLO text for a given function `func_name` using library runtime -// `runtime` on a device `dev` with given `inputs`. +enum class IrExportStage { + HLO, + HLO_SERIALIZED, + OPTIMIZED_HLO, + OPTIMIZED_HLO_SERIALIZED, + OPTIMIZED_HLO_DOT +}; + +// Returns the IR format of the selected stage for a given function `func_name` +// using library runtime `runtime` on a device `dev` with given `inputs`. xla::StatusOr GetCompilerIr( IrExportStage stage, ProcessFunctionLibraryRuntime* pflr, absl::string_view func_name, Device* dev, EagerContext* context, ",0,train eb3fcf1cb36297d58a1bb1e7c9fa79370894d1bd,tensorflow/tensorflow,"Extend experimental_get_compiler_ir to get serialized HLO modules. PiperOrigin-RevId: 354030850 Change-Id: I57c1a5445614eab200ad0e86af668c15cd7718d7",def_function.py,"@@ -958,13 +958,21 @@ class Function(object): **kwargs: Keyword arguments used for compilation. Returns: - Function callable with the stage at which the compiler IR should be - serialized. Allowed values for the `stage` are: - - `hlo`: HLO output after conversion from TF - (https://www.tensorflow.org/xla/operation_semantics). - - `optimized_hlo`: HLO after compiler optimizations. - - `optimized_hlo_dot`: optimized HLO in DOT format suitable for - Graphviz. + Function callable with the following kwargs: + - `stage` at which the compiler IR should be serialized. Allowed values + are: + - `hlo`: HLO output after conversion from TF + (https://www.tensorflow.org/xla/operation_semantics). + - `hlo_serialized`: Like stage=`hlo`, but the output is a serialized + HLO module proto (a bytes object). + - `optimized_hlo`: HLO after compiler optimizations. + - `optimized_hlo_serialized`: Like stage=`optimized_hlo`, but the + output is a serialized HLO module proto (a bytes object). + - `optimized_hlo_dot`: optimized HLO in DOT format suitable for + Graphviz. + - `device_name` can be either None, in which case the preferred device + is used for compilation, or a device name. It can be a full device + name, or a partial one, e.g., `/device:CPU:0`. For example, for @@ -1013,21 +1021,20 @@ class Function(object): concrete_fn._function_spec.canonicalize_function_inputs( *args, **kwargs) - def compiler_ir_generator(stage='hlo'): - """"""Returns compiler IR for the given `stage`. - - Args: - stage: Stage at which to return the IR. Allowed values are 'hlo' and - 'optimized_hlo'. - """""" + def compiler_ir_generator(stage=""hlo"", device_name=None): # TODO(cheshire): This is a hack to get the current ""preferred"" device, # there is no current API to get it otherwise. - device = random_ops.random_normal([]).device - return context.context().get_compiler_ir( - device_name=device, + if device_name is None: + device_name = random_ops.random_normal([]).device + res_bytes = context.context().get_compiler_ir( + device_name=device_name, stage=stage, function_name=fn_name, args=list(filtered_flat_args) + concrete_fn.captured_inputs) + if stage in (""hlo_serialized"", ""optimized_hlo_serialized""): + return res_bytes + else: + return res_bytes.decode(""utf-8"") return compiler_ir_generator ",0,train eb3fcf1cb36297d58a1bb1e7c9fa79370894d1bd,tensorflow/tensorflow,"Extend experimental_get_compiler_ir to get serialized HLO modules. PiperOrigin-RevId: 354030850 Change-Id: I57c1a5445614eab200ad0e86af668c15cd7718d7",def_function_xla_jit_test.py,"@@ -778,6 +778,19 @@ class DefFunctionTest(xla_test.XLATestCase): self.assertIn('tuple', f.experimental_get_compiler_ir(l)()) + def testGetCompilerIrSerialized(self): + with ops.device('device:{}:0'.format(self.device)): + + @def_function.function(jit_compile=True) + def fn(x): + return x - x + + inputs = constant_op.constant([1, 2, 2, 3, 3]) + for stage in ('hlo_serialized', 'optimized_hlo_serialized'): + hlo = fn.experimental_get_compiler_ir(inputs)( + stage=stage, device_name=f'/device:{self.device}:0') + self.assertIsInstance(hlo, bytes) + def testConstantOnWrongDevice(self): with ops.device('device:{}:0'.format(self.device)): ",0,train eb3fcf1cb36297d58a1bb1e7c9fa79370894d1bd,tensorflow/tensorflow,"Extend experimental_get_compiler_ir to get serialized HLO modules. PiperOrigin-RevId: 354030850 Change-Id: I57c1a5445614eab200ad0e86af668c15cd7718d7",tfe_wrapper.cc,"@@ -21,6 +21,7 @@ limitations under the License. #include ""pybind11/complex.h"" #include ""pybind11/functional.h"" #include ""pybind11/pybind11.h"" +#include ""pybind11/pytypes.h"" #include ""pybind11/stl.h"" #include ""tensorflow/c/c_api.h"" #include ""tensorflow/c/c_api_experimental.h"" @@ -296,10 +297,10 @@ static py::object TFE_ClearScalarCache() { } // Returns compiler IR for a given function. -static std::string TFE_GetCompilerIr(py::handle& ctx, - const char* concrete_function_name, - const char* stage, const char* device_name, - py::handle& inputs) { +static py::bytes TFE_GetCompilerIr(py::handle& ctx, + const char* concrete_function_name, + const char* stage, const char* device_name, + py::handle& inputs) { EagerContext* context = ContextFromInterface( reinterpret_cast(InputTFE_Context(ctx))); @@ -307,8 +308,12 @@ static std::string TFE_GetCompilerIr(py::handle& ctx, IrExportStage selected_stage = [&] { if (s_stage == ""hlo"") { return IrExportStage::HLO; + } else if (s_stage == ""hlo_serialized"") { + return IrExportStage::HLO_SERIALIZED; } else if (s_stage == ""optimized_hlo"") { return IrExportStage::OPTIMIZED_HLO; + } else if (s_stage == ""optimized_hlo_serialized"") { + return IrExportStage::OPTIMIZED_HLO_SERIALIZED; } else if (s_stage == ""optimized_hlo_dot"") { return IrExportStage::OPTIMIZED_HLO_DOT; } else { @@ -341,19 +346,21 @@ static std::string TFE_GetCompilerIr(py::handle& ctx, d->parsed_name()); }); if (selected_device == devices.end()) { - ThrowValueError(""No matching device found""); + ThrowValueError( + absl::StrFormat(""No matching device found for '%s'"", device_name) + .c_str()); } - xla::StatusOr hlo_text = + xla::StatusOr hlo_str = GetCompilerIr(selected_stage, context->pflr(), concrete_function_name, *selected_device, context, input_handles); - if (!hlo_text.ok()) { + if (!hlo_str.ok()) { ThrowValueError(absl::StrFormat(""Failed getting HLO text: '%s'"", - hlo_text.status().error_message()) + hlo_str.status().error_message()) .c_str()); } - return *hlo_text; + return py::bytes(*hlo_str); } } // namespace tensorflow ",0,train e185d5bb9c536ae5cd87d64f875deb8522d66fc7,tensorflow/tensorflow,"Appending error message to address error thrown when certain train_step and test_step are decorated with tf.function PiperOrigin-RevId: 384969431 Change-Id: I1ff65811961eaa23d32934dc6a47ee9dc6f9319f",mirrored_run.py,"@@ -463,7 +463,9 @@ class _MirroredReplicaContext(distribute_lib.ReplicaContext): "" please avoid nested `tf.function`s or control flow statements that"" "" may potentially cross a synchronization boundary, for example,"" "" wrap the `fn` passed to `strategy.run` or the entire `strategy.run`"" - "" inside a `tf.function` or move the control flow out of `fn`"") + "" inside a `tf.function` or move the control flow out of `fn`. Also,"" + "" when subclassing a tf.keras.Model please avoid decorating"" + "" overridden methods`test_step` and `train_step` in `tf.function`."") t.has_paused.set() t.should_run.wait() ",0,train b08c54271084b05ea822b3348a3a448a9fe3b898,tensorflow/tensorflow,"[SE] [XLA:GPU] Inform --xla_hlo_profile of the GPU's memory bandwidth. Add a memory_bandwidth() property to StreamExecutor's DeviceDescription, and use this in the GPU's --xla_hlo_profile. PiperOrigin-RevId: 189157407",gpu_compiler.cc,"@@ -671,6 +671,8 @@ StatusOr> GpuCompiler::RunBackend( if (module->config().hlo_profiling_enabled()) { HloCostAnalysis cost_analysis(ShapeSizeBytesFunction()); + cost_analysis.set_bytes_per_second( + stream_exec->GetDeviceDescription().memory_bandwidth()); TF_RETURN_IF_ERROR(module->entry_computation()->Accept(&cost_analysis)); profile_index_map = MakeUnique(*module); profile_printer = ",0,test b08c54271084b05ea822b3348a3a448a9fe3b898,tensorflow/tensorflow,"[SE] [XLA:GPU] Inform --xla_hlo_profile of the GPU's memory bandwidth. Add a memory_bandwidth() property to StreamExecutor's DeviceDescription, and use this in the GPU's --xla_hlo_profile. PiperOrigin-RevId: 189157407",cuda_driver.cc,"@@ -1503,6 +1503,19 @@ static port::StatusOr GetSimpleAttribute(CUdevice device, return true; } +/* static */ port::StatusOr CUDADriver::GetDeviceAttribute( + CUdevice_attribute attribute, CUdevice device) { + int val; + CUresult res = cuDeviceGetAttribute(&val, attribute, device); + if (res != CUDA_SUCCESS) { + return port::Status{ + port::error::INTERNAL, + port::Printf(""failed to get device attribute %d for device %d: %s"", + attribute, device, ToString(res).c_str())}; + } + return val; +} + /* static */ bool CUDADriver::IsEccEnabled(CUdevice device, bool *result) { int value = -1; CUresult res = ",0,test b08c54271084b05ea822b3348a3a448a9fe3b898,tensorflow/tensorflow,"[SE] [XLA:GPU] Inform --xla_hlo_profile of the GPU's memory bandwidth. Add a memory_bandwidth() property to StreamExecutor's DeviceDescription, and use this in the GPU's --xla_hlo_profile. PiperOrigin-RevId: 189157407",cuda_driver.h,"@@ -400,12 +400,20 @@ class CUDADriver { // Returns a grab-bag of device properties in a caller-owned device_properties // structure for device_ordinal via cuDeviceGetProperties. - // This call is deprecated in the NVIDIA driver API. + // + // This call is deprecated in the NVIDIA driver API; its replacement is + // GetDeviceAttribute // // http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__DEVICE__DEPRECATED.html#group__CUDA__DEVICE__DEPRECATED_1g65a5b4e25186bd257df80b98c98cffe6 static bool GetDeviceProperties(CUdevprop *device_properties, int device_ordinal); + // Gets a specific integer-valued property about the given device. + // + // http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__DEVICE.html#group__CUDA__DEVICE_1g9c3e1414f0ad901d3278a4d6645fc266 + static port::StatusOr GetDeviceAttribute(CUdevice_attribute attribute, + CUdevice device); + // Returns whether ECC is enabled for the given CUdevice via // cuDeviceGetattribute with CU_DEVICE_ATTRIBUTE_ECC_ENABLED. // http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__DEVICE.html#group__CUDA__DEVICE_1g9c3e1414f0ad901d3278a4d6645fc266 ",0,test b08c54271084b05ea822b3348a3a448a9fe3b898,tensorflow/tensorflow,"[SE] [XLA:GPU] Inform --xla_hlo_profile of the GPU's memory bandwidth. Add a memory_bandwidth() property to StreamExecutor's DeviceDescription, and use this in the GPU's --xla_hlo_profile. PiperOrigin-RevId: 189157407",cuda_gpu_executor.cc,"@@ -1103,6 +1103,18 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const { builder.set_device_memory_size(device_memory_size); } + port::StatusOr mem_clock_khz = CUDADriver::GetDeviceAttribute( + CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, device_ordinal_); + port::StatusOr mem_bus_width_bits = CUDADriver::GetDeviceAttribute( + CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, device_ordinal_); + if (mem_clock_khz.ok() && mem_bus_width_bits.ok()) { + // Times 2 because HBM is DDR memory; it gets two data bits per each data + // lane. + builder.set_memory_bandwidth(2 * int64_t{mem_clock_khz.ValueOrDie()} * + 1000 * + int64_t{mem_bus_width_bits.ValueOrDie()} / 8); + } + { BlockDim block_dim_limit; FillBlockDimLimit(&block_dim_limit); ",0,test b08c54271084b05ea822b3348a3a448a9fe3b898,tensorflow/tensorflow,"[SE] [XLA:GPU] Inform --xla_hlo_profile of the GPU's memory bandwidth. Add a memory_bandwidth() property to StreamExecutor's DeviceDescription, and use this in the GPU's --xla_hlo_profile. PiperOrigin-RevId: 189157407",device_description.cc,"@@ -50,6 +50,7 @@ DeviceDescription::DeviceDescription() shared_memory_alloc_granularity_(1), device_address_bits_(kUninitializedUint64), device_memory_size_(kUninitializedUint64), + memory_bandwidth_(kUninitializedUint64), shared_memory_per_core_(kUninitializedUint64), shared_memory_per_block_(kUninitializedUint64), clock_rate_ghz_(-1.0), @@ -85,6 +86,8 @@ std::unique_ptr> DeviceDescription::ToMap() const { result[""Device Address Bits""] = port::StrCat(device_address_bits()); result[""Device Memory Size""] = port::HumanReadableNumBytes::ToString(device_memory_size()); + result[""Memory Bandwidth""] = port::StrCat( + port::HumanReadableNumBytes::ToString(memory_bandwidth_), ""/s""); result[""Shared Memory Per Core""] = port::HumanReadableNumBytes::ToString(shared_memory_per_core_); ",0,test b08c54271084b05ea822b3348a3a448a9fe3b898,tensorflow/tensorflow,"[SE] [XLA:GPU] Inform --xla_hlo_profile of the GPU's memory bandwidth. Add a memory_bandwidth() property to StreamExecutor's DeviceDescription, and use this in the GPU's --xla_hlo_profile. PiperOrigin-RevId: 189157407",device_description.h,"@@ -140,6 +140,11 @@ class DeviceDescription { // Returns the device memory size in bytes. uint64 device_memory_size() const { return device_memory_size_; } + // Returns the device's memory bandwidth in bytes/sec. (This is for + // reads/writes to/from the device's own memory, not for transfers between the + // host and device.) + uint64 memory_bandwidth() const { return memory_bandwidth_; } + // Returns the device's core clock rate in GHz. float clock_rate_ghz() const { return clock_rate_ghz_; } @@ -212,6 +217,7 @@ class DeviceDescription { uint64 device_address_bits_; uint64 device_memory_size_; + uint64 memory_bandwidth_; // Shared memory limits on a given device. uint64 shared_memory_per_core_; @@ -305,6 +311,9 @@ class DeviceDescriptionBuilder { void set_device_memory_size(uint64 value) { device_description_->device_memory_size_ = value; } + void set_memory_bandwidth(uint64 value) { + device_description_->memory_bandwidth_ = value; + } void set_shared_memory_per_core(int64 value) { device_description_->shared_memory_per_core_ = value; ",0,test 063bdbe773ba520d6a232f278567a16411a90597,tensorflow/tensorflow,"Make sparse_reshape work well with output of tf.shape. PiperOrigin-RevId: 155902266",feature_column.py,"@@ -1330,13 +1330,6 @@ class _CategoricalColumn(_FeatureColumn): pass -def _sparse_reshape(inputs, shape): - # Satisfies sparse_reshape assumptions such as dtype int64. - # shape is a list. - return sparse_ops.sparse_reshape(inputs, - math_ops.cast(shape, dtypes.int64)) - - def _create_categorical_column_weighted_sum( column, builder, units, sparse_combiner, weight_collections, trainable): """"""Create a weighted sum of a categorical column for linear_model."""""" @@ -1344,13 +1337,13 @@ def _create_categorical_column_weighted_sum( builder, weight_collections=weight_collections, trainable=trainable) - id_tensor = _sparse_reshape(sparse_tensors.id_tensor, [ + id_tensor = sparse_ops.sparse_reshape(sparse_tensors.id_tensor, [ array_ops.shape(sparse_tensors.id_tensor)[0], -1 ]) weight_tensor = sparse_tensors.weight_tensor if weight_tensor is not None: - weight_tensor = _sparse_reshape(weight_tensor, - [array_ops.shape(weight_tensor)[0], -1]) + weight_tensor = sparse_ops.sparse_reshape( + weight_tensor, [array_ops.shape(weight_tensor)[0], -1]) weight = variable_scope.get_variable( name='weight', ",0,train 063bdbe773ba520d6a232f278567a16411a90597,tensorflow/tensorflow,"Make sparse_reshape work well with output of tf.shape. PiperOrigin-RevId: 155902266",sparse_reshape_op_test.py,"@@ -78,6 +78,18 @@ class SparseReshapeTest(test.TestCase): self.assertAllEqual(output_val.values, input_val.values) self.assertAllEqual(output_val.dense_shape, input_val.dense_shape) + def testWorksWellWithTfShape(self): + with self.test_session(use_gpu=False) as sess: + sp_input = self._SparseTensorPlaceholder() + input_val = self._SparseTensorValue_5x6() + shape = array_ops.shape(sp_input) # tf.shape generates int32 output + sp_output = sparse_ops.sparse_reshape(sp_input, shape) + + output_val = sess.run(sp_output, {sp_input: input_val}) + self.assertAllEqual(output_val.indices, input_val.indices) + self.assertAllEqual(output_val.values, input_val.values) + self.assertAllEqual(output_val.dense_shape, input_val.dense_shape) + def testFeedSameShapeWithInferredDim(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensorPlaceholder() ",0,train 063bdbe773ba520d6a232f278567a16411a90597,tensorflow/tensorflow,"Make sparse_reshape work well with output of tf.shape. PiperOrigin-RevId: 155902266",sparse_ops.py,"@@ -556,7 +556,7 @@ def sparse_reshape(sp_input, shape, name=None): number of elements than `sp_input`. """""" sp_input = _convert_to_sparse_tensor(sp_input) - shape = ops.convert_to_tensor(shape, dtype=dtypes.int64) + shape = math_ops.cast(shape, dtype=dtypes.int64) with ops.name_scope(name, ""SparseReshape"", [sp_input]) as name: reshaped_ind, reshaped_shape = gen_sparse_ops._sparse_reshape( ",0,train fc670504413590e3c6e665ed233c44b53be5daed,tensorflow/tensorflow,Update nn_ops.py,nn_ops.py,"@@ -3832,7 +3832,7 @@ def softmax_v2(logits, axis=None, name=None): This function performs the equivalent of - softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis) + softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis, keepdims=True) Example usage: ",0,train e3b0a4291984f1af0cb8bf512542dffaca2d6cb5,tensorflow/tensorflow,"Allow non-integer values for Poisson CDF/PMF. PiperOrigin-RevId: 186502845",poisson_test.py,"@@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function import numpy as np +from scipy import special from scipy import stats from tensorflow.contrib.distributions.python.ops import poisson as poisson_lib from tensorflow.python.framework import constant_op @@ -110,7 +111,7 @@ class PoissonTest(test.TestCase): batch_size = 6 lam = constant_op.constant([3.0] * batch_size) lam_v = 3.0 - x = [2.2, 3.1, 4., 5.5, 6., 7.] + x = [2., 3., 4., 5., 6., 7.] poisson = self._make_poisson(rate=lam) log_cdf = poisson.log_cdf(x) @@ -121,12 +122,31 @@ class PoissonTest(test.TestCase): self.assertEqual(cdf.get_shape(), (6,)) self.assertAllClose(cdf.eval(), stats.poisson.cdf(x, lam_v)) + def testPoissonCDFNonIntegerValues(self): + with self.test_session(): + batch_size = 6 + lam = constant_op.constant([3.0] * batch_size) + lam_v = 3.0 + x = np.array([2.2, 3.1, 4., 5.5, 6., 7.], dtype=np.float32) + + poisson = self._make_poisson(rate=lam) + cdf = poisson.cdf(x) + self.assertEqual(cdf.get_shape(), (6,)) + + # The Poisson CDF should be valid on these non-integer values, and + # equal to igammac(1 + x, rate). + self.assertAllClose(cdf.eval(), special.gammaincc(1. + x, lam_v)) + + with self.assertRaisesOpError(""cannot contain fractional components""): + poisson_validate = self._make_poisson(rate=lam, validate_args=True) + poisson_validate.cdf(x).eval() + def testPoissonCdfMultidimensional(self): with self.test_session(): batch_size = 6 lam = constant_op.constant([[2.0, 4.0, 5.0]] * batch_size) lam_v = [2.0, 4.0, 5.0] - x = np.array([[2.2, 3.1, 4., 5.5, 6., 7.]], dtype=np.float32).T + x = np.array([[2., 3., 4., 5., 6., 7.]], dtype=np.float32).T poisson = self._make_poisson(rate=lam) log_cdf = poisson.log_cdf(x) ",0,train e3b0a4291984f1af0cb8bf512542dffaca2d6cb5,tensorflow/tensorflow,"Allow non-integer values for Poisson CDF/PMF. PiperOrigin-RevId: 186502845",poisson.py,"@@ -35,9 +35,15 @@ __all__ = [ _poisson_sample_note = """""" -Note that the input value must be a non-negative floating point tensor with -dtype `dtype` and whose shape can be broadcast with `self.rate`. `x` is only -legal if it is non-negative and its components are equal to integer values. +The Poisson distribution is technically only defined for non-negative integer +values. When `validate_args=False`, non-integral inputs trigger an assertion. + +When `validate_args=False` calculations are otherwise unchanged despite +integral or non-integral inputs. + +When `validate_args=False`, evaluating the pmf at non-integral values, +corresponds to evaluations of an unnormalized distribution, that does not +correspond to evaluations of the cdf. """""" @@ -150,10 +156,6 @@ class Poisson(distribution.Distribution): def _cdf(self, x): if self.validate_args: x = distribution_util.embed_check_nonnegative_integer_form(x) - else: - # Whether or not x is integer-form, the following is well-defined. - # However, scipy takes the floor, so we do too. - x = math_ops.floor(x) return math_ops.igammac(1. + x, self.rate) def _log_normalization(self): @@ -162,9 +164,6 @@ class Poisson(distribution.Distribution): def _log_unnormalized_prob(self, x): if self.validate_args: x = distribution_util.embed_check_nonnegative_integer_form(x) - else: - # For consistency with cdf, we take the floor. - x = math_ops.floor(x) return x * self.log_rate - math_ops.lgamma(1. + x) def _mean(self): ",0,train 9cd2f8cb1a2b67c6d8d558349746a367168fc3b4,tensorflow/tensorflow,"Add delegate support to external C API Add TFL_InterpreterOptionsAddDelegate to allow delegate injection during interpreter creation. PiperOrigin-RevId: 260734684",c_api.cc,"@@ -123,6 +123,12 @@ TFL_Interpreter* TFL_NewInterpreter( TFL_InterpreterOptions::kDefaultNumThreads) { interpreter->SetNumThreads(optional_options->num_threads); } + + for (auto* delegate : optional_options->delegates) { + if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) { + return nullptr; + } + } } return new TFL_Interpreter{model->impl, std::move(optional_error_reporter), ",0,train 9cd2f8cb1a2b67c6d8d558349746a367168fc3b4,tensorflow/tensorflow,"Add delegate support to external C API Add TFL_InterpreterOptionsAddDelegate to allow delegate injection during interpreter creation. PiperOrigin-RevId: 260734684",c_api_experimental.cc,"@@ -41,6 +41,11 @@ void TFL_InterpreterOptionsAddCustomOp(TFL_InterpreterOptions* options, options->op_resolver.AddCustom(name, registration, min_version, max_version); } +void TFL_InterpreterOptionsAddDelegate(TFL_InterpreterOptions* options, + TFL_Delegate* delegate) { + options->delegates.push_back(delegate); +} + #ifdef __cplusplus } // extern ""C"" #endif // __cplusplus ",0,train 9cd2f8cb1a2b67c6d8d558349746a367168fc3b4,tensorflow/tensorflow,"Add delegate support to external C API Add TFL_InterpreterOptionsAddDelegate to allow delegate injection during interpreter creation. PiperOrigin-RevId: 260734684",c_api_experimental.h,"@@ -23,6 +23,7 @@ extern ""C"" { #endif // __cplusplus typedef TfLiteBuiltinOperator TFL_BuiltinOperator; +typedef TfLiteDelegate TFL_Delegate; // Resets all variable tensors to zero. TFL_CAPI_EXPORT extern TFL_Status TFL_InterpreterResetVariableTensors( @@ -42,12 +43,22 @@ TFL_CAPI_EXPORT void TFL_InterpreterOptionsAddBuiltinOp( // // NOTE: The interpreter will make a copy of `registration` internally, so the // caller should ensure that its contents (function pointers, etc...) remain -// valid for the duration of the interpreter's lifetime. A common practice is -// making the provided TFL_Registration instance static. +// valid for the duration of any created interpreter's lifetime. A common +// practice is making the provided TFL_Registration instance static. TFL_CAPI_EXPORT void TFL_InterpreterOptionsAddCustomOp( TFL_InterpreterOptions* options, const char* name, const TFL_Registration* registration, int min_version, int max_version); +// Adds a delegate to be applied during `TFL_Interpreter` creation. +// +// If delegate application fails, interpreter creation will also fail with an +// associated error logged. +// +// NOTE: The caller retains ownership of the delegate and should ensure that it +// remains valid for the duration of any created interpreter's lifetime. +TFL_CAPI_EXPORT extern void TFL_InterpreterOptionsAddDelegate( + TFL_InterpreterOptions* options, TFL_Delegate* delegate); + #ifdef __cplusplus } // extern ""C"" #endif // __cplusplus ",0,train 9cd2f8cb1a2b67c6d8d558349746a367168fc3b4,tensorflow/tensorflow,"Add delegate support to external C API Add TFL_InterpreterOptionsAddDelegate to allow delegate injection during interpreter creation. PiperOrigin-RevId: 260734684",c_api_experimental_test.cc,"@@ -32,7 +32,7 @@ TfLiteRegistration* GetDummyRegistration() { return ®istration; } -TEST(CApiExperimentalSimple, Smoke) { +TEST(CApiExperimentalTest, Smoke) { TFL_Model* model = TFL_NewModelFromFile( ""tensorflow/lite/testdata/add.bin""); ASSERT_NE(model, nullptr); @@ -52,6 +52,52 @@ TEST(CApiExperimentalSimple, Smoke) { TFL_DeleteModel(model); } +TEST(CApiExperimentalTest, Delegate) { + TFL_Model* model = + TFL_NewModelFromFile(""tensorflow/lite/testdata/add.bin""); + + // Create and install a delegate instance. + bool delegate_prepared = false; + TfLiteDelegate delegate = TfLiteDelegateCreate(); + delegate.data_ = &delegate_prepared; + delegate.Prepare = [](TfLiteContext* context, TfLiteDelegate* delegate) { + *static_cast(delegate->data_) = true; + return kTfLiteOk; + }; + TFL_InterpreterOptions* options = TFL_NewInterpreterOptions(); + TFL_InterpreterOptionsAddDelegate(options, &delegate); + TFL_Interpreter* interpreter = TFL_NewInterpreter(model, options); + + // The delegate should have been applied. + EXPECT_TRUE(delegate_prepared); + + // Subsequent exectuion should behave properly (the delegate is a no-op). + TFL_DeleteInterpreterOptions(options); + TFL_DeleteModel(model); + EXPECT_EQ(TFL_InterpreterInvoke(interpreter), kTfLiteOk); + TFL_DeleteInterpreter(interpreter); +} + +TEST(CApiExperimentalTest, DelegateFails) { + TFL_Model* model = + TFL_NewModelFromFile(""tensorflow/lite/testdata/add.bin""); + + // Create and install a delegate instance. + TfLiteDelegate delegate = TfLiteDelegateCreate(); + delegate.Prepare = [](TfLiteContext* context, TfLiteDelegate* delegate) { + return kTfLiteError; + }; + TFL_InterpreterOptions* options = TFL_NewInterpreterOptions(); + TFL_InterpreterOptionsAddDelegate(options, &delegate); + TFL_Interpreter* interpreter = TFL_NewInterpreter(model, options); + + // Interpreter creation should fail as delegate preparation failed. + EXPECT_EQ(nullptr, interpreter); + + TFL_DeleteInterpreterOptions(options); + TFL_DeleteModel(model); +} + } // namespace int main(int argc, char** argv) { ",0,train 9cd2f8cb1a2b67c6d8d558349746a367168fc3b4,tensorflow/tensorflow,"Add delegate support to external C API Add TFL_InterpreterOptionsAddDelegate to allow delegate injection during interpreter creation. PiperOrigin-RevId: 260734684",c_api_internal.h,"@@ -43,6 +43,8 @@ struct TFL_InterpreterOptions { void (*error_reporter)(void* user_data, const char* format, va_list args) = nullptr; void* error_reporter_user_data = nullptr; + + std::vector delegates; }; struct TFL_Interpreter { ",0,train 67b82da322acd410b00d08bc05997ecbdb88717f,tensorflow/tensorflow,+ Test for DaskDataFeeder,data_feeder.py,"@@ -311,12 +311,18 @@ class DaskDataFeeder(object): # combine into a data frame self.df = dd.multi.concat([X, y], axis=1) self.n_classes = n_classes - X_shape = tuple([X.count().compute()]) + + X_count = X.count().compute() + if len(X_count) == 1: + X_shape = tuple([X.count().compute()]) + else: + # TODO: Support multi-dimensional + ValueError(""Only one dimensional input for DaskDataFeeder is supported now."") y_shape = tuple([y.count().compute()]) self.sample_fraction = batch_size/float(list(X_shape)[0]) self.input_shape, self.output_shape = _get_in_out_shape( X_shape, y_shape, n_classes, batch_size) - self.input_dtype, self.output_dtype = X.dtype, y.dtype + self.input_dtype, self.output_dtype = X.dtypes, y.dtypes # TODO: dtypes for dataframe if random_state is None: self.random_state = np.random.RandomState(42) else: @@ -336,8 +342,8 @@ class DaskDataFeeder(object): def _feed_dict_fn(): # TODO: option for with/without replacement (dev version of dask) sample = self.df.random_split([self.sample_fraction, 1-self.sample_fraction], - random_state=self.random_state)[0] - inp = sample[self.X_columns] - out = sample[self.y_columns] + random_state=self.random_state) + inp = sample[0][self.X_columns] + out = sample[0][self.y_columns] return {input_placeholder.name: inp, output_placeholder.name: out} return _feed_dict_fn ",0,train 67b82da322acd410b00d08bc05997ecbdb88717f,tensorflow/tensorflow,+ Test for DaskDataFeeder,test_data_feeder.py,"@@ -83,6 +83,21 @@ class DataFeederTest(tf.test.TestCase): self.assertAllClose(feed_dict['input'], [[1, 2], [3, 4]]) self.assertAllClose(feed_dict['output'], [1, 2]) + def test_dask_data_feeder(self): + X = pd.DataFrame(dict(a=list('aabbcc'))) + X = dd.from_pandas(X, npartitions=3) + y = pd.DataFrame(dict(labels=list('010011'))) + y = dd.from_pandas(y, npartitions=3) + X = _construct_dask_df_with_divisions(X) + y = _construct_dask_df_with_divisions(y) + + df = DaskDataFeeder(X, y, n_classes=2, batch_size=2) + feed_dict_fn = df.get_feed_dict_fn( + MockPlaceholder(name='input'), + MockPlaceholder(name='output')) + feed_dict = feed_dict_fn() + + if __name__ == '__main__': tf.test.main() ",0,train 5ff27167b274a7471b35ba80491004093a3f6133,tensorflow/tensorflow,"Don't automatically add control deps to collective ops. These ops need to run asynchronously to avoid deadlock. PiperOrigin-RevId: 226397820",auto_control_deps.py,"@@ -29,13 +29,22 @@ from tensorflow.python.ops import tensor_array_ops from tensorflow.python.util import nest from tensorflow.python.util import tf_decorator +# Op types that should not run in program order, e.g. because they need to run +# asynchronously to avoid deadlock. +ASYNC_STATEFUL_OPS = [ + ""CollectiveReduce"", + ""CollectiveBcastSend"", + ""CollectiveBcastRecv"", +] + class AutomaticControlDependencies(object): """"""Context manager to automatically add control dependencies. Code under this context manager will act as if a sensible set of control dependencies were present. More specifically: - 1. All stateful ops in the scope will execute + 1. All stateful ops in the scope will execute (with the exception of ops in + ASYNC_STATEFUL_OPS) 2. Stateful ops which modify the same resource will execute in program order Note: creating variables in an automatic control dependencies context is not @@ -223,7 +232,8 @@ class AutomaticControlDependencies(object): control_inputs = set() # Ensure stateful ops run if (op.type not in self._graph._registered_ops # pylint: disable=protected-access - or self._graph._registered_ops[op.type].is_stateful): # pylint: disable=protected-access + or (self._graph._registered_ops[op.type].is_stateful # pylint: disable=protected-access + and op.type not in ASYNC_STATEFUL_OPS)): ops_which_must_run.add(op) # Ignore switches (they're handled separately) if op.type == ""Switch"" and op.inputs[0].dtype == dtypes_module.resource: @@ -255,8 +265,8 @@ class AutomaticControlDependencies(object): if inp in merge_for_resource: merge_for_resource[inp]._add_control_input(op) # pylint: disable=protected-access last_op_using_resource_tensor[inp] = op - if (op.op_def.is_stateful and not found_resource - and op._control_flow_context is None): # pylint: disable=protected-access + if (op.op_def.is_stateful and op.type not in ASYNC_STATEFUL_OPS + and not found_resource and op._control_flow_context is None): # pylint: disable=protected-access if None in last_op_using_resource_tensor: op._add_control_input(last_op_using_resource_tensor[None]) # pylint: disable=protected-access last_op_using_resource_tensor[None] = op ",0,train a74f9c3c612586ba4581bd6324a7c1ced69ec5a3,tensorflow/tensorflow,"Add support to for composite tensors, such as SparseTensor and RaggedTensor, to while_v2 PiperOrigin-RevId: 245285953",ops.py,"@@ -1761,10 +1761,10 @@ class IndexedSlices(_TensorLike, composite_tensor.CompositeTensor): if shape is None: shape = self._values.shape if self._dense_shape is None: - return [shape, shape[:1]] # values, indices + return (shape, shape[:1]) # values, indices else: # values, indices, dense_shape - return [shape, shape[:1], tensor_shape.TensorShape([shape.ndims])] + return (shape, shape[:1], tensor_shape.TensorShape([shape.ndims])) @property def _is_graph_tensor(self): ",0,train a74f9c3c612586ba4581bd6324a7c1ced69ec5a3,tensorflow/tensorflow,"Add support to for composite tensors, such as SparseTensor and RaggedTensor, to while_v2 PiperOrigin-RevId: 245285953",sparse_tensor.py,"@@ -250,11 +250,11 @@ class SparseTensor(_TensorLike, composite_tensor.CompositeTensor): raise ValueError(""Shape invariant for SparseTensor must have the form "" ""TensorShape([r]), got %r"" % shape) rank = tensor_shape.dimension_value(shape[0]) - return [ + return ( tensor_shape.TensorShape([None, rank]), # indices tensor_shape.TensorShape([None]), # values tensor_shape.TensorShape([rank]) # dense_shape - ] + ) @property def _is_graph_tensor(self): ",0,train a74f9c3c612586ba4581bd6324a7c1ced69ec5a3,tensorflow/tensorflow,"Add support to for composite tensors, such as SparseTensor and RaggedTensor, to while_v2 PiperOrigin-RevId: 245285953",control_flow_ops_py_test.py,"@@ -1790,6 +1790,18 @@ class ControlFlowTest(test.TestCase): r = r[1] * array_ops.ones([8, 8]) self.assertAllEqual(np.ones((8, 8)), self.evaluate(r)) + @test_util.disable_control_flow_v2(""b/131265085"") + @test_util.run_v1_only(""b/131265085"") + def testWhileBadShape(self): + x = constant_op.constant([2.0, 4.0], name=""values"") + i = constant_op.constant(0) + c = lambda i, _: math_ops.less(i, 10) + b = lambda i, x: [i + 1, x + 1] + with self.assertRaisesRegexp(ValueError, ""is not compatible with""): + # Shape of x is [2], but we specify a shape of [5]. + control_flow_ops.while_loop( + c, b, [i, x], [i.shape, tensor_shape.TensorShape([5])]) + @test_util.run_deprecated_v1 def testWhileWithNonTensorInput_Scalar(self): with self.cached_session(): @@ -1807,7 +1819,6 @@ class ControlFlowTest(test.TestCase): r = control_flow_ops.while_loop(c, b, [n], parallel_iterations=20) self.assertEqual([10000], self.evaluate(r)) - @test_util.run_v1_only(""b/120545219"") def testWhileShapeInference(self): with self.cached_session(): i = constant_op.constant(0) @@ -1822,19 +1833,23 @@ class ControlFlowTest(test.TestCase): r = control_flow_ops.while_loop( c, b, [i, m], [i.get_shape(), tensor_shape.TensorShape([None, 2])]) - self.assertIsNone(r[1].shape.dims[0].value) - self.assertEqual(r[1].shape.dims[1], tensor_shape.Dimension(2)) + self.assertTrue(r[1].shape.is_compatible_with([8, 2])) + @test_util.run_v1_only(""b/120545219"") + def testWhileShapeInferenceBadShape(self): + with self.cached_session(): + i = constant_op.constant(0) + m = array_ops.ones([2, 2]) + c = lambda i, j: math_ops.less(i, 2) + b = lambda i, j: [i + 1, array_ops.concat([j, j], 0)] with self.assertRaisesRegexp( ValueError, r""Input tensor 'ones:0' enters the loop with shape \(2, 2\), but has "" r""shape \(4, 2\) after one iteration. To allow the shape to vary "" r""across iterations, use the `shape_invariants` argument of "" r""tf.while_loop to specify a less-specific shape.""): - r = control_flow_ops.while_loop(c, b, [i, m]) + control_flow_ops.while_loop(c, b, [i, m]) - @test_util.disable_control_flow_v2(""b/116328420 (SparseTensor)"") - @test_util.run_v1_only(""b/120545219"") def testWhileShapeInferenceSparseTensor(self): values = constant_op.constant([2.0, 4.0], name=""values"") indices = constant_op.constant([[0], [3]], @@ -1873,61 +1888,72 @@ class ControlFlowTest(test.TestCase): array_ops.concat([x.dense_shape, [10]], axis=0)) ] + def check_shapes(r, indices, values, dense_shape): + self.assertTrue(r.indices.shape.is_compatible_with(indices)) + self.assertTrue(r.values.shape.is_compatible_with(values)) + self.assertTrue(r.dense_shape.shape.is_compatible_with(dense_shape)) + # Default shape invariant; b1 only modifies values. _, r = control_flow_ops.while_loop(c, b1, [i, x]) - self.assertEqual(r.indices.get_shape().as_list(), [None, 1]) - self.assertEqual(r.values.get_shape().as_list(), [None]) - self.assertEqual(r.dense_shape.get_shape().as_list(), [1]) + check_shapes(r, indices=[None, 1], values=[None], dense_shape=[1]) # Default shape invariant; b2 adds new values _, r = control_flow_ops.while_loop(c, b2, [i, x]) - self.assertEqual(r.indices.get_shape().as_list(), [None, 1]) - self.assertEqual(r.values.get_shape().as_list(), [None]) - self.assertEqual(r.dense_shape.get_shape().as_list(), [1]) - - # Default shape invariant; b3 modifies rank (which is not allowed). - with self.assertRaises(ValueError): - _, r = control_flow_ops.while_loop(c, b3, [i, x]) + check_shapes(r, indices=[None, 1], values=[None], dense_shape=[1]) # Explicit shape invariant, allowing any rank; b1 only modifies values. _, r = control_flow_ops.while_loop( c, b1, [i, x], [i.get_shape(), tensor_shape.TensorShape([None])]) - self.assertEqual(r.indices.get_shape().as_list(), [None, None]) - self.assertEqual(r.values.get_shape().as_list(), [None]) - self.assertEqual(r.dense_shape.get_shape().as_list(), [None]) + check_shapes(r, indices=[None, None], values=[None], dense_shape=[None]) # Explicit shape invariant, allowing any rank; b3 modifies rank. _, r = control_flow_ops.while_loop( c, b3, [i, x], [i.get_shape(), tensor_shape.TensorShape([None])]) - self.assertEqual(r.indices.get_shape().as_list(), [None, None]) - self.assertEqual(r.values.get_shape().as_list(), [None]) - self.assertEqual(r.dense_shape.get_shape().as_list(), [None]) + check_shapes(r, indices=[None, None], values=[None], dense_shape=[None]) # Shape invariant with ndims=None. Technically, this isn't supported # according to the docs, but we support it for backwards compatibility. _, r = control_flow_ops.while_loop( c, b1, [i, x], [i.get_shape(), tensor_shape.TensorShape(None)]) - self.assertEqual(r.indices.get_shape().as_list(), [None, None]) - self.assertEqual(r.values.get_shape().as_list(), [None]) - self.assertEqual(r.dense_shape.get_shape().as_list(), [None]) + check_shapes(r, indices=[None, None], values=[None], dense_shape=[None]) _, r = control_flow_ops.while_loop( c, b3, [i, x], [i.get_shape(), tensor_shape.TensorShape(None)]) - self.assertEqual(r.indices.get_shape().as_list(), [None, None]) - self.assertEqual(r.values.get_shape().as_list(), [None]) - self.assertEqual(r.dense_shape.get_shape().as_list(), [None]) + check_shapes(r, indices=[None, None], values=[None], dense_shape=[None]) + + @test_util.disable_control_flow_v2(""b/131265085"") + @test_util.run_v1_only(""b/131265085"") + def testWhileBadShapeSparseTensor(self): + values = constant_op.constant([2.0, 4.0], name=""values"") + indices = constant_op.constant([[0], [3]], + dtype=dtypes.int64, + name=""indices"") + shape = constant_op.constant([10], dtype=dtypes.int64, name=""dense_shape"") + i = constant_op.constant(0) + x = sparse_tensor.SparseTensor(indices, values, dense_shape=shape) + c = lambda i, _: i < 10 + b1 = lambda i, x: [i+1, x] + def b2(i, x): # modifies rank. (shape of all components is changed.) + return [ + i + 1, + sparse_tensor.SparseTensor( + array_ops.concat([x.indices, [[i], [i]]], axis=1), x.values * 2.0, + array_ops.concat([x.dense_shape, [10]], axis=0)) + ] # Explicit shape invariant, with a specific (incompatible) rank. with self.assertRaisesRegexp(ValueError, ""is not compatible with""): - _, r = control_flow_ops.while_loop( + control_flow_ops.while_loop( c, b1, [i, x], [i.get_shape(), tensor_shape.TensorShape([5])]) - @test_util.disable_control_flow_v2(""b/116282023 (IndexedSlices)"") - @test_util.run_v1_only(""b/120545219"") + # Default shape invariant, but b2 modifies rank (which is not allowed). + with self.assertRaises(ValueError): + control_flow_ops.while_loop(c, b2, [i, x]) + def testWhileShapeInferenceIndexedSlices(self): with self.cached_session(): values = constant_op.constant([[2.0, 4.0], [3.0, 5.0]], name=""values"") @@ -1953,17 +1979,28 @@ class ControlFlowTest(test.TestCase): c, b, [i, x], [i.get_shape(), tensor_shape.TensorShape([None, 2])]) self.assertEqual(r.dense_shape.get_shape()[0], 2) - self.assertEqual(r.values.get_shape().as_list(), [None, 2]) + self.assertTrue(r.values.get_shape().is_compatible_with([None, 2])) + + @test_util.disable_control_flow_v2(""b/131265085"") + @test_util.run_v1_only(""b/131265085"") + def testWhileBadShapeIndexedSlices(self): + values = constant_op.constant([2.0, 4.0], name=""values"") + indices = constant_op.constant([[0], [3]], + dtype=dtypes.int64, + name=""indices"") + shape = constant_op.constant([10], dtype=dtypes.int64, name=""dense_shape"") + i = constant_op.constant(0) + x = sparse_tensor.SparseTensor(indices, values, dense_shape=shape) + c = lambda i, _: 10 + b = lambda i, x: [i+1, x] - with self.assertRaisesRegexp(ValueError, ""is not compatible with""): - _, r = control_flow_ops.while_loop( - c, b, [i, x], - [i.get_shape(), tensor_shape.TensorShape([None, 5])]) + # Explicit shape invariant, with a specific (incompatible) rank. + with self.assertRaisesRegexp(ValueError, ""is not compatible with""): + control_flow_ops.while_loop( + c, b, [i, x], + [i.get_shape(), tensor_shape.TensorShape([5])]) - @test_util.disable_control_flow_v2(""b/116328420 (RaggedTensor)"") def testWhileShapeInferenceRaggedTensor(self): - if context.executing_eagerly(): - self.skipTest(""b/116328420"") i = constant_op.constant(0) x = ragged_factory_ops.constant([[1, 2], [3], [4, 5, 6]]) c = lambda i, _: i < 10 @@ -1980,11 +2017,13 @@ class ControlFlowTest(test.TestCase): array_ops.concat([x, x], axis=0) ] + def check_shapes(r, values, splits): + self.assertTrue(r.values.shape.is_compatible_with(values)) + self.assertTrue(r.row_splits.shape.is_compatible_with(splits)) + # Default shape invariant; b1 adds new values to rows. _, r = control_flow_ops.while_loop(c, b1, [i, x]) - self.assertEqual(r.row_splits.shape.as_list(), [4]) - - self.assertTrue(r.values.shape.as_list() in ([6 * 2**10], [None])) + check_shapes(r, values=[None], splits=[4]) # Default shape invariant; b2 adds new rows (not allowed). if not context.executing_eagerly(): @@ -1995,20 +2034,15 @@ class ControlFlowTest(test.TestCase): _, r = control_flow_ops.while_loop( c, b1, [i, x], [i.get_shape(), tensor_shape.TensorShape([None, None])]) - self.assertTrue(r.row_splits.shape.as_list() in ([4], [None])) - self.assertTrue(r.values.shape.as_list() in ([6 * 2**10], [None])) + check_shapes(r, values=[None], splits=[None]) # Explicit shape invariant; b2 adds new rows. _, r = control_flow_ops.while_loop( c, b2, [i, x], [i.get_shape(), tensor_shape.TensorShape([None, None])]) - self.assertTrue(r.row_splits.shape.as_list() in ([3 * 2**10 + 1], [None])) - self.assertTrue(r.values.shape.as_list() in ([6 * 2**10], [None])) + check_shapes(r, values=[None], splits=[None]) - @test_util.disable_control_flow_v2(""b/116328420 (RaggedTensor)"") def testWhileShapeInferenceRaggedTensorRaggedRank2(self): - if context.executing_eagerly(): - self.skipTest(""b/116328420"") i = constant_op.constant(0) x = ragged_factory_ops.constant([[[1, 2], [3], [4, 5, 6]], [[], [8, 9, 10]]]) @@ -3473,8 +3507,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(0, value_x) self.assertEqual(73, value_x_grad) - @test_util.disable_control_flow_v2(""b/116282023 (IndexedSlices)"") - @test_util.run_v1_only(""b/120545219"") + @test_util.deprecated_graph_mode_only def testWhileGrad_IndexedSlices(self): with self.cached_session(): values = constant_op.constant([2.0, 4.0], name=""values"") @@ -3496,8 +3529,7 @@ class ControlFlowTest(test.TestCase): r = gradients_impl.gradients(r.values, values)[0] self.assertAllClose(np.array([1024.0, 1024.0]), self.evaluate(r)) - @test_util.disable_control_flow_v2(""b/116328420 (SparseTensor)"") - @test_util.run_v1_only(""b/120545219"") + @test_util.deprecated_graph_mode_only def testWhileGrad_SparseTensor(self): with self.cached_session(): values = constant_op.constant([2.0, 4.0], name=""values"") @@ -3520,7 +3552,7 @@ class ControlFlowTest(test.TestCase): r = gradients_impl.gradients(r.values, values)[0] self.assertAllClose(np.array([1024.0, 1024.0]), self.evaluate(r)) - @test_util.run_v1_only(""b/120545219"") + @test_util.deprecated_graph_mode_only def testCallGradInLoop(self): with self.cached_session() as sess: i0 = constant_op.constant(0) ",0,train a74f9c3c612586ba4581bd6324a7c1ced69ec5a3,tensorflow/tensorflow,"Add support to for composite tensors, such as SparseTensor and RaggedTensor, to while_v2 PiperOrigin-RevId: 245285953",control_flow_ops.py,"@@ -3466,7 +3466,7 @@ def while_loop(cond, return x return ops.convert_to_tensor(x) - loop_vars = nest.map_structure(convert, loop_vars) + loop_vars = nest.map_structure(convert, loop_vars, expand_composites=True) if maximum_iterations is not None: return loop_vars[1] else: ",0,train a74f9c3c612586ba4581bd6324a7c1ced69ec5a3,tensorflow/tensorflow,"Add support to for composite tensors, such as SparseTensor and RaggedTensor, to while_v2 PiperOrigin-RevId: 245285953",while_v2.py,"@@ -72,12 +72,18 @@ def while_loop(cond, # `wrapped_body` below. loop_vars = list(_tensor_array_to_flow(orig_loop_vars)) loop_vars = nest.map_structure( - ops.internal_convert_to_tensor_or_indexed_slices, loop_vars) + ops.internal_convert_to_tensor_or_indexed_slices, loop_vars, + expand_composites=True) if shape_invariants is not None: - nest.assert_same_structure(orig_loop_vars, shape_invariants) + nest.assert_same_structure(orig_loop_vars, shape_invariants, + expand_composites=False) + shape_invariants = nest.map_structure( + control_flow_ops._get_shape_invariant, loop_vars, + list(shape_invariants), expand_composites=False) else: - shape_invariants = nest.map_structure(lambda t: t.shape, loop_vars) - + shape_invariants = nest.map_structure( + control_flow_ops._get_shape_invariant, loop_vars, + expand_composites=False) if not name: name = ""while"" @@ -150,11 +156,12 @@ def while_loop(cond, # `orig_loop_vars` and `args`, converts flows in `args` to TensorArrays # and packs it into the structure of `orig_loop_vars`. outputs = body(*_pack_sequence_as(orig_loop_vars, args)) - if not nest.is_sequence(outputs): + if not nest.is_sequence_or_composite(outputs): outputs = [outputs] # Compare the structure of input and output of body converting the # top-level tuples to list to be compatible with legacy while_loop. - nest.assert_same_structure(list(outputs), list(orig_loop_vars)) + nest.assert_same_structure(list(outputs), list(orig_loop_vars), + expand_composites=True) outputs = _tensor_array_to_flow(outputs) @@ -193,7 +200,8 @@ def while_loop(cond, # Make sure that the shapes of the loop outputs are compatible with the # shape invariants, or the shapes of the loop vars if the invariants are not # specified. - num_flattened_outputs = len(nest.flatten(orig_loop_vars)) + num_flattened_outputs = len(nest.flatten(orig_loop_vars, + expand_composites=True)) # First var is loop counter and second var is maximum_iterations. first_loop_var_index = 2 _check_shapes_compat( @@ -201,10 +209,10 @@ def while_loop(cond, num_flattened_outputs], nest.flatten( shape_invariants[first_loop_var_index:first_loop_var_index + - len_orig_loop_vars]), + len_orig_loop_vars], expand_composites=True), nest.flatten(loop_vars[first_loop_var_index:first_loop_var_index + - len_orig_loop_vars])) - flattened_loop_vars = nest.flatten(loop_vars) + len_orig_loop_vars], expand_composites=True)) + flattened_loop_vars = nest.flatten(loop_vars, expand_composites=True) _check_num_inputs_outputs(cond_graph, body_graph, len(flattened_loop_vars)) @@ -237,7 +245,7 @@ def while_loop(cond, if return_same_structure: return outputs - flattened_outputs = nest.flatten(outputs) + flattened_outputs = nest.flatten(outputs, expand_composites=True) if len(flattened_outputs) == 1: return flattened_outputs[0] else: @@ -905,9 +913,11 @@ def _pack_sequence_as(structure_with_tas, loop_vars): flattened_loop_vars = [ flow_to_tensor_array(*z) - for z in zip(nest.flatten(loop_vars), nest.flatten(structure_with_tas)) + for z in zip(nest.flatten(loop_vars, expand_composites=True), + nest.flatten(structure_with_tas, expand_composites=True)) ] - return nest.pack_sequence_as(structure_with_tas, flattened_loop_vars) + return nest.pack_sequence_as(structure_with_tas, flattened_loop_vars, + expand_composites=True) def _tensor_array_to_flow(loop_vars): @@ -917,14 +927,15 @@ def _tensor_array_to_flow(loop_vars): return maybe_ta.flow return maybe_ta - return nest.map_structure(f, loop_vars) + return nest.map_structure(f, loop_vars, expand_composites=True) def _build_signature(loop_vars, shape_invariants): return nest.pack_sequence_as(loop_vars, [ tensor_spec.TensorSpec(s, t.dtype, name=t.op.name) - for s, t in zip(nest.flatten(shape_invariants), nest.flatten(loop_vars)) - ]) + for s, t in zip(nest.flatten(shape_invariants, expand_composites=True), + nest.flatten(loop_vars, expand_composites=True)) + ], expand_composites=True) def _build_maximum_iterations_loop_var(maximum_iterations): ",0,train aeef5bba09657c009de6162235302d0c6fd54998,tensorflow/tensorflow,"Delay transpiler initialzation to mitigate effects of circular imports. PiperOrigin-RevId: 350751088 Change-Id: Ib5431b139e6d0adf2d0eec2d34bc1b21ad687256",tfr_gen.py,"@@ -334,7 +334,7 @@ _AG_FIXED_RETURN_TYPE = { QN = qual_names.QN # TODO(mdan): Fix this with an importable module. -AG_MODULE = api._TRANSPILER._extra_locals['ag__'] # pylint:disable=protected-access +AG_MODULE = api._TRANSPILER.get_extra_locals()['ag__'] # pylint:disable=protected-access class TFRTypeResolver(type_inference.Resolver): ",0,train aeef5bba09657c009de6162235302d0c6fd54998,tensorflow/tensorflow,"Delay transpiler initialzation to mitigate effects of circular imports. PiperOrigin-RevId: 350751088 Change-Id: Ib5431b139e6d0adf2d0eec2d34bc1b21ad687256",api.py,"@@ -209,30 +209,31 @@ class PyToTF(transpiler.PyToPy): def __init__(self): super(PyToTF, self).__init__() - - # TODO(mdan): Move into core or replace with an actual importable module. - # Craft a module that exposes the external API as well as certain - # internal modules. - ag_internal = imp.new_module('autograph') - ag_internal.__dict__.update(inspect.getmodule(PyToTF).__dict__) - ag_internal.ConversionOptions = converter.ConversionOptions - ag_internal.STD = converter.STANDARD_OPTIONS - ag_internal.Feature = converter.Feature - ag_internal.utils = utils - ag_internal.FunctionScope = function_wrappers.FunctionScope - ag_internal.with_function_scope = function_wrappers.with_function_scope - # TODO(mdan): Add safeguards against name clashes. - # We don't want to create a submodule because we want the operators to be - # accessible as ag__. - ag_internal.__dict__.update(special_functions.__dict__) - ag_internal.__dict__.update(operators.__dict__) - - self._extra_locals = {'ag__': ag_internal} + self._extra_locals = None def get_transformed_name(self, node): return 'tf__' + super(PyToTF, self).get_transformed_name(node) def get_extra_locals(self): + if self._extra_locals is None: + # TODO(mdan): Move into core or replace with an actual importable module. + # Craft a module that exposes the external API as well as certain + # internal modules. + ag_internal = imp.new_module('autograph') + ag_internal.__dict__.update(inspect.getmodule(PyToTF).__dict__) + ag_internal.ConversionOptions = converter.ConversionOptions + ag_internal.STD = converter.STANDARD_OPTIONS + ag_internal.Feature = converter.Feature + ag_internal.utils = utils + ag_internal.FunctionScope = function_wrappers.FunctionScope + ag_internal.with_function_scope = function_wrappers.with_function_scope + # TODO(mdan): Add safeguards against name clashes. + # We don't want to create a submodule because we want the operators to be + # accessible as ag__. + ag_internal.__dict__.update(special_functions.__dict__) + ag_internal.__dict__.update(operators.__dict__) + + self._extra_locals = {'ag__': ag_internal} return self._extra_locals def get_caching_key(self, ctx): ",0,train c58b80eb60443f97161ff674670166346a586b05,tensorflow/tensorflow,remove redundant test in random_seed_test,random_seed_test.py,"@@ -24,7 +24,6 @@ from absl.testing import parameterized from tensorflow.python.data.kernel_tests import test_base from tensorflow.python.data.util import random_seed as data_random_seed -from tensorflow.python.eager import context from tensorflow.python.framework import combinations from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -125,24 +124,15 @@ class RandomSeedTest(test_base.DatasetTestBase, parameterized.TestCase): def testRandomSeed(self, input_fn, output_fn): tinput, toutput = input_fn._obj(), output_fn._obj() # pylint: disable=protected-access - def check(tinput, toutput): - random_seed.set_random_seed(tinput[0]) - g_seed, op_seed = data_random_seed.get_seed(tinput[1]) - g_seed = self.evaluate(g_seed) - op_seed = self.evaluate(op_seed) - msg = 'test_case = {0}, got {1}, want {2}'.format( - tinput, (g_seed, op_seed), toutput) - self.assertEqual((g_seed, op_seed), toutput, msg=msg) - random_seed.set_random_seed(None) + random_seed.set_random_seed(tinput[0]) + g_seed, op_seed = data_random_seed.get_seed(tinput[1]) + g_seed = self.evaluate(g_seed) + op_seed = self.evaluate(op_seed) + msg = 'test_case = {0}, got {1}, want {2}'.format( + tinput, (g_seed, op_seed), toutput) + self.assertEqual((g_seed, op_seed), toutput, msg=msg) + random_seed.set_random_seed(None) - check(tinput=tinput, toutput=toutput) - - if not context.executing_eagerly(): - random_seed.set_random_seed(1) - for i in range(10): - tinput = (1, None) - toutput = (1, i) - check(tinput=tinput, toutput=toutput) if __name__ == '__main__': test.main() ",0,train 8bf2eafa476610aa60bfe4194d3517742dc3ebcc,tensorflow/tensorflow,"Update tensorrt test to not rely on Keras. Replace the initializer with tf init_ops, and the keras.dataset with tfds. PiperOrigin-RevId: 304659361 Change-Id: I8016c1607aa57c419dfca229c42fd4c0403b90f1",quantization_mnist_test.py,"@@ -18,10 +18,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function + +import tensorflow_datasets as tfds + from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import get_linked_tensorrt_version from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import is_tensorrt_enabled from tensorflow.core.protobuf import config_pb2 -from tensorflow.python import keras from tensorflow.python.compiler.tensorrt import trt_convert from tensorflow.python.data.ops import dataset_ops from tensorflow.python.estimator.estimator import Estimator @@ -33,10 +35,10 @@ from tensorflow.python.framework import graph_util from tensorflow.python.framework import importer from tensorflow.python.framework import ops from tensorflow.python.framework import test_util -from tensorflow.python.keras.datasets import mnist from tensorflow.python.layers import layers from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_array_ops +from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import metrics from tensorflow.python.ops import nn @@ -81,12 +83,12 @@ class QuantizationAwareTrainingMNISTTest(test_util.TensorFlowTestCase): 'kernel', shape=[num_inputs, num_outputs], dtype=dtypes.float32, - initializer=keras.initializers.glorot_uniform()) + initializer=init_ops.GlorotUniform()) bias = variable_scope.get_variable( 'bias', shape=[num_outputs], dtype=dtypes.float32, - initializer=keras.initializers.zeros()) + initializer=init_ops.Zeros()) x = math_ops.matmul(x, kernel) x = _Quantize(x, quantization_range) x = nn.bias_add(x, bias) @@ -179,19 +181,15 @@ class QuantizationAwareTrainingMNISTTest(test_util.TensorFlowTestCase): Returns: The Estimator evaluation result. """""" - # Get dataset - train_data, test_data = mnist.load_data() - - def _PreprocessFn(x, y): + def _PreprocessFn(entry): + x, y = entry['image'], entry['label'] x = math_ops.cast(x, dtypes.float32) - x = array_ops.expand_dims(x, axis=2) x = 2.0 * (x / 255.0) - 1.0 y = math_ops.cast(y, dtypes.int32) return x, y def _EvalInputFn(): - mnist_x, mnist_y = test_data - dataset = dataset_ops.Dataset.from_tensor_slices((mnist_x, mnist_y)) + dataset = tfds.load('mnist', split='test') dataset = dataset.map( map_func=_PreprocessFn, num_parallel_calls=8).batch(batch_size=batch_size) @@ -201,9 +199,8 @@ class QuantizationAwareTrainingMNISTTest(test_util.TensorFlowTestCase): return features, labels def _TrainInputFn(): - mnist_x, mnist_y = train_data - dataset = dataset_ops.Dataset.from_tensor_slices((mnist_x, mnist_y)) - dataset = dataset.shuffle(2 * len(mnist_x)) + dataset = tfds.load('mnist', split='train') + dataset = dataset.shuffle(60000) dataset = dataset.map( map_func=_PreprocessFn, num_parallel_calls=8).batch(batch_size=batch_size) ",0,test 80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo. PiperOrigin-RevId: 343190551 Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",gpu_info.cc,"@@ -343,7 +343,8 @@ bool MaliInfo::IsValhall() const { } void GetGpuInfoFromDeviceDescription(const std::string& gpu_description, - GpuInfo* gpu_info) { + GpuApi gpu_api, GpuInfo* gpu_info) { + gpu_info->gpu_api = gpu_api; std::string lowered = gpu_description; absl::AsciiStrToLower(&lowered); gpu_info->vendor = GetGpuVendor(lowered); @@ -392,5 +393,35 @@ int GpuInfo::GetComputeUnitsCount() const { } } +int GpuInfo::GetMaxImageArguments() const { + if (IsApiOpenGl()) { + return opengl_info.max_image_units; + } else if (IsApiVulkan()) { + return vulkan_info.max_per_stage_descriptor_sampled_images; + } else if (IsApiMetal()) { + return 32; + } else if (IsApiOpenCl()) { + return 128; + } else { + return 1; + } +} + +bool GpuInfo::IsApiOpenGl() const { return gpu_api == GpuApi::kOpenGl; } + +bool GpuInfo::IsApiVulkan() const { return gpu_api == GpuApi::kVulkan; } + +bool GpuInfo::IsApiMetal() const { return gpu_api == GpuApi::kMetal; } + +bool GpuInfo::IsApiOpenCl() const { return gpu_api == GpuApi::kOpenCl; } + +bool GpuInfo::IsApiOpenGl31OrAbove() const { + if (!IsApiOpenGl()) { + return false; + } + return (opengl_info.major_version == 3 && opengl_info.minor_version >= 1) || + opengl_info.major_version > 3; +} + } // namespace gpu } // namespace tflite ",0,train 80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo. PiperOrigin-RevId: 343190551 Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",gpu_info.h,"@@ -34,6 +34,14 @@ enum class GpuVendor { kUnknown }; +enum class GpuApi { + kUnknown, + kOpenCl, + kMetal, + kVulkan, + kOpenGl, +}; + enum class AdrenoGpu { // Adreno 6xx series kAdreno685, @@ -190,6 +198,28 @@ struct MaliInfo { bool IsValhall() const; }; +struct OpenGlInfo { + std::string renderer_name; + std::string vendor_name; + std::string version; + int major_version = -1; + int minor_version = -1; + + int max_image_units = 0; + int max_ssbo_bindings = 0; + int max_image_bindings = 0; +}; + +struct VulkanInfo { + std::string vendor_name; + uint32_t api_version = -1; + uint32_t api_version_major = -1; + uint32_t api_version_minor = -1; + uint32_t api_version_patch = -1; + + uint32_t max_per_stage_descriptor_sampled_images = 0; +}; + struct GpuInfo { bool IsAdreno() const; bool IsApple() const; @@ -207,20 +237,15 @@ struct GpuInfo { int GetComputeUnitsCount() const; + int GetMaxImageArguments() const; + GpuVendor vendor = GpuVendor::kUnknown; + GpuApi gpu_api = GpuApi::kUnknown; - std::string renderer_name; - std::string vendor_name; - std::string version; - int major_version = -1; - int minor_version = -1; std::vector extensions; - int max_ssbo_bindings = 0; - int max_image_bindings = 0; std::vector max_work_group_size; int max_work_group_invocations; int max_texture_size = 0; - int max_image_units = 0; int max_array_texture_layers = 0; std::vector supported_subgroup_sizes; @@ -228,19 +253,34 @@ struct GpuInfo { AdrenoInfo adreno_info; AppleInfo apple_info; MaliInfo mali_info; + + // OpenGL specific, gpu_api should be kOpenGl + OpenGlInfo opengl_info; + bool IsApiOpenGl() const; + bool IsApiOpenGl31OrAbove() const; + + // Vulkan specific, gpu_api should be kVulkan + VulkanInfo vulkan_info; + bool IsApiVulkan() const; + + bool IsApiMetal() const; + + bool IsApiOpenCl() const; }; inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) { - return (gpu_info.major_version == 3 && gpu_info.minor_version >= 1) || - gpu_info.major_version > 3; + return (gpu_info.opengl_info.major_version == 3 && + gpu_info.opengl_info.minor_version >= 1) || + gpu_info.opengl_info.major_version > 3; } // Currently it initializes: // vendor // AdrenoInfo if vendor is kQualcomm // AppleInfo if vendor is kApple +// MaliInfo if vendor is kMali void GetGpuInfoFromDeviceDescription(const std::string& gpu_description, - GpuInfo* gpu_info); + GpuApi gpu_api, GpuInfo* gpu_info); } // namespace gpu } // namespace tflite ",0,train 80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo. PiperOrigin-RevId: 343190551 Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",api.cc,"@@ -385,7 +385,7 @@ absl::Status Compile(const CompilationOptions& options, } GpuInfo gpu_info; RETURN_IF_ERROR(RequestGpuInfo(&gpu_info)); - if (!IsOpenGl31OrAbove(gpu_info)) { + if (!gpu_info.IsApiOpenGl31OrAbove()) { return absl::InternalError( ""OpenGL ES 3.1 or above is required to use OpenGL inference.""); } @@ -406,7 +406,7 @@ absl::Status ReadSerializedModel( std::unique_ptr* compiled_model) { GpuInfo gpu_info; RETURN_IF_ERROR(RequestGpuInfo(&gpu_info)); - if (!IsOpenGl31OrAbove(gpu_info)) { + if (!gpu_info.IsApiOpenGl31OrAbove()) { return absl::InternalError( ""OpenGL ES 3.1 or above is required to use OpenGL inference.""); } ",0,train 80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo. PiperOrigin-RevId: 343190551 Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",api2.cc,"@@ -636,7 +636,7 @@ class InferenceEnvironmentImpl : public InferenceEnvironment { RETURN_IF_ERROR(EglEnvironment::NewEglEnvironment(&egl_env_)); RETURN_IF_ERROR(RequestGpuInfo(&gpu_info_)); - properties_.is_opengl_available = IsOpenGl31OrAbove(gpu_info_); + properties_.is_opengl_available = gpu_info_.IsApiOpenGl31OrAbove(); if (!properties_.is_opengl_available) { return absl::InternalError( ""OpenGL ES 3.1 or above is required to use OpenGL inference.""); ",0,train 80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo. PiperOrigin-RevId: 343190551 Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",compiler.cc,"@@ -201,7 +201,7 @@ class CompilerImpl : public Compiler { return; } bool is_ref = IsRef(*object); - if (num_textures < gpu_info_.max_image_units && + if (num_textures < gpu_info_.GetMaxImageArguments() && !ExceedsMaxSize(*object, gpu_info_) && (object->object_type == ObjectType::TEXTURE || (is_ref && options_.ref_obj_type == ObjectType::TEXTURE) || @@ -251,8 +251,7 @@ class CompilerImpl : public Compiler { attr.outputs.push_back(object); } - // Allocate bindings. Textures must be bound first. max_image_units also - // defines max binding number for a texture. + // Allocate bindings. Textures must be bound first. uint32_t binding = 0; auto set_binding = [&](ObjectType type, Object& object) { if (object.object_type == type) { ",0,train 80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo. PiperOrigin-RevId: 343190551 Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",request_gpu_info.cc,"@@ -28,27 +28,34 @@ namespace tflite { namespace gpu { namespace gl { -absl::Status RequestGpuInfo(GpuInfo* gpu_info) { - GpuInfo info; - +absl::Status RequestOpenGlInfo(OpenGlInfo* gl_info) { const GLubyte* renderer_name = glGetString(GL_RENDERER); if (renderer_name) { - info.renderer_name = reinterpret_cast(renderer_name); - GetGpuInfoFromDeviceDescription(info.renderer_name, &info); + gl_info->renderer_name = reinterpret_cast(renderer_name); } const GLubyte* vendor_name = glGetString(GL_VENDOR); if (vendor_name) { - info.vendor_name = reinterpret_cast(vendor_name); + gl_info->vendor_name = reinterpret_cast(vendor_name); } const GLubyte* version_name = glGetString(GL_VERSION); if (version_name) { - info.version = reinterpret_cast(version_name); + gl_info->version = reinterpret_cast(version_name); } - glGetIntegerv(GL_MAJOR_VERSION, &info.major_version); - glGetIntegerv(GL_MINOR_VERSION, &info.minor_version); + glGetIntegerv(GL_MAJOR_VERSION, &gl_info->major_version); + glGetIntegerv(GL_MINOR_VERSION, &gl_info->minor_version); + + return absl::OkStatus(); +} + +absl::Status RequestGpuInfo(GpuInfo* gpu_info) { + GpuInfo info; + RETURN_IF_ERROR(RequestOpenGlInfo(&info.opengl_info)); + + GetGpuInfoFromDeviceDescription(info.opengl_info.renderer_name, + GpuApi::kOpenGl, &info); GLint extensions_count; glGetIntegerv(GL_NUM_EXTENSIONS, &extensions_count); @@ -57,8 +64,10 @@ absl::Status RequestGpuInfo(GpuInfo* gpu_info) { info.extensions[i] = std::string( reinterpret_cast(glGetStringi(GL_EXTENSIONS, i))); } - glGetIntegerv(GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, &info.max_ssbo_bindings); - glGetIntegerv(GL_MAX_COMPUTE_IMAGE_UNIFORMS, &info.max_image_bindings); + glGetIntegerv(GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, + &info.opengl_info.max_ssbo_bindings); + glGetIntegerv(GL_MAX_COMPUTE_IMAGE_UNIFORMS, + &info.opengl_info.max_image_bindings); info.max_work_group_size.resize(3); glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &info.max_work_group_size[0]); @@ -69,7 +78,7 @@ absl::Status RequestGpuInfo(GpuInfo* gpu_info) { glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &info.max_work_group_invocations); glGetIntegerv(GL_MAX_TEXTURE_SIZE, &info.max_texture_size); - glGetIntegerv(GL_MAX_IMAGE_UNITS, &info.max_image_units); + glGetIntegerv(GL_MAX_IMAGE_UNITS, &info.opengl_info.max_image_units); glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &info.max_array_texture_layers); RETURN_IF_ERROR(GetOpenGlErrors()); *gpu_info = info; ",0,train 80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo. PiperOrigin-RevId: 343190551 Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",request_gpu_info.h,"@@ -26,6 +26,10 @@ namespace tflite { namespace gpu { namespace gl { +// This method performs multiple GL calls, therefore, egl context needs to be +// created upfront. +absl::Status RequestOpenGlInfo(OpenGlInfo* gl_info); + // This method performs multiple GL calls, therefore, egl context needs to be // created upfront. absl::Status RequestGpuInfo(GpuInfo* gpu_info); ",0,train 80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo. PiperOrigin-RevId: 343190551 Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",calculator_from_metadata.cc,"@@ -69,7 +69,7 @@ class WorkgroupsCalculatorFromMetadata : public WorkgroupsCalculator { const data::HardcodedWorkgroups* FindWorkgroups( const data::CustomWorkgroups& workgroups, const GpuInfo& gpu_info) { for (auto workgroup : *workgroups.hardcoded_workgroups()) { - if (workgroup->gpu_info()->c_str() == gpu_info.renderer_name) { + if (workgroup->gpu_info()->c_str() == gpu_info.opengl_info.renderer_name) { return workgroup; } } ",0,train 80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo. PiperOrigin-RevId: 343190551 Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",gpu_compatibility.cc,"@@ -70,10 +70,11 @@ std::map GPUCompatibilityList::CalculateVariables( variables[kDeviceModel] = android_info.model; variables[kDeviceName] = android_info.device; variables[kManufacturer] = android_info.manufacturer; - variables[kGPUModel] = gpu_info.renderer_name; + const auto& gl_info = gpu_info.opengl_info; + variables[kGPUModel] = gl_info.renderer_name; char buffer[128]; - int len = snprintf(buffer, 128 - 1, ""%d.%d"", gpu_info.major_version, - gpu_info.minor_version); + int len = snprintf(buffer, 128 - 1, ""%d.%d"", gl_info.major_version, + gl_info.minor_version); buffer[len] = '\0'; variables[kOpenGLESVersion] = std::string(buffer); CanonicalizeValues(&variables); ",0,train 80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo. PiperOrigin-RevId: 343190551 Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",gpu_compatibility_test.cc,"@@ -39,10 +39,9 @@ TEST_F(GPUCompatibilityTest, ReturnsSupportedForFullMatch) { tflite::acceleration::AndroidInfo android_info = {.android_sdk_version = ""24"", .model = ""m712c""}; - tflite::gpu::GpuInfo tflite_gpu_info = { - .major_version = 3, - .minor_version = 1, - }; + tflite::gpu::GpuInfo tflite_gpu_info; + tflite_gpu_info.opengl_info.major_version = 3; + tflite_gpu_info.opengl_info.minor_version = 1; EXPECT_TRUE(list_->Includes(android_info, tflite_gpu_info)); } @@ -54,11 +53,10 @@ TEST_F(GPUCompatibilityTest, ReturnsUnsupportedForFullMatch) { .model = ""SM-G960F"", .device = ""starlte"", .manufacturer = ""Samsung""}; - tflite::gpu::GpuInfo tflite_gpu_info = { - .renderer_name = ""Mali-G72"", - .major_version = 3, - .minor_version = 2, - }; + tflite::gpu::GpuInfo tflite_gpu_info; + tflite_gpu_info.opengl_info.renderer_name = ""Mali-G72""; + tflite_gpu_info.opengl_info.major_version = 3; + tflite_gpu_info.opengl_info.minor_version = 2; EXPECT_FALSE(list_->Includes(android_info, tflite_gpu_info)); } ",0,train f050412ecddfb771008165989946dcea3b9b60f8,tensorflow/tensorflow,"Lazily construct no-op OpKernelContext::Params::{inc,dec}_num_deferred_ops_function. Each time we create an OpKernelContext::Params, we default-create no-op functions for these members. Since these functions are rarely used, this change defers their creation until the point of use. PiperOrigin-RevId: 282854876 Change-Id: Ibdf5c034cffb001d2055413b29c328386b011693",op_kernel.h,"@@ -726,8 +726,8 @@ class OpKernelContext { const int* forward_from_array = nullptr; // For tracking actively running deferred ops. - std::function inc_num_deferred_ops_function = []() {}; - std::function dec_num_deferred_ops_function = []() {}; + std::function inc_num_deferred_ops_function; + std::function dec_num_deferred_ops_function; }; // params must outlive the OpKernelContext. @@ -1271,10 +1271,14 @@ class OpKernelContext { // functions. It then must call these two functions in pairs, before and after // device execution, respectively. TF_MUST_USE_RESULT std::function inc_num_deferred_ops_function() { - return params_->inc_num_deferred_ops_function; + return params_->inc_num_deferred_ops_function + ? params_->inc_num_deferred_ops_function + : []() {}; } TF_MUST_USE_RESULT std::function dec_num_deferred_ops_function() { - return params_->dec_num_deferred_ops_function; + return params_->dec_num_deferred_ops_function + ? params_->dec_num_deferred_ops_function + : []() {}; } Allocator* get_allocator(AllocatorAttributes attr); ",0,train 3ed8e55bf6710378c8abd3c3e24bc13a60bd50fd,tensorflow/tensorflow,Update image_ops_impl.py,image_ops_impl.py,"@@ -2944,15 +2944,15 @@ def rgb_to_yiq(images): Returns: images: tensor with the same shape as `images`. - Usage Example: + Usage Example: - >>> image = [[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], - ... [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]], - ... [[13.0, 14.0, 15.0], [16.0, 17.0, 18.0]]] - >>> image = tf.constant(image) - >>> tf.image.rgb_to_yiq(image) - >> image = [[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], + ... [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]], + ... [[13.0, 14.0, 15.0], [16.0, 17.0, 18.0]]] + >>> image = tf.constant(image) + >>> tf.image.rgb_to_yiq(image) + >> image = [[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], - ... [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]], - ... [[13.0, 14.0, 15.0], [16.0, 17.0, 18.0]]] - >>> image = tf.constant(image) - >>> tf.image.yiq_to_rgb(image) - >> image = [[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], + ... [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]], + ... [[13.0, 14.0, 15.0], [16.0, 17.0, 18.0]]] + >>> image = tf.constant(image) + >>> tf.image.yiq_to_rgb(image) + max_buffer_size: - max_buffer_size = prefetch_buffer_size + if self._prefetch_buffer_size > self._max_buffer_size: + self._max_buffer_size = self._prefetch_buffer_size # Create the MultiDeviceIterator. with ops.device(self._source_device): @@ -171,7 +221,7 @@ class MultiDeviceIterator(object): devices=self._devices, shared_name=shared_name, container="""", - **dataset_ops.flat_structure(dataset))) + **dataset_ops.flat_structure(self._dataset))) if context.executing_eagerly(): # Delete the resource when this object is deleted self._resource_deleter = resource_variable_ops.EagerResourceDeleter( @@ -183,7 +233,15 @@ class MultiDeviceIterator(object): self._incarnation_id = gen_dataset_ops.multi_device_iterator_init( self._dataset._variant_tensor, # pylint: disable=protected-access self._multi_device_iterator_resource, - max_buffer_size=max_buffer_size) + max_buffer_size=self._max_buffer_size) + + self._prototype_device_datasets = [] + for i, device in enumerate(self._devices): + with ops.device(device): + ds = _PerDeviceGenerator( + i, self._multi_device_iterator_resource, self._incarnation_id, + self._source_device_tensor, self._dataset._element_structure) # pylint: disable=protected-access + self._prototype_device_datasets.append(ds) # TODO(rohanj): Explore the possibility of the MultiDeviceIterator to # initialize the device side of the pipeline. This would allow the @@ -193,17 +251,7 @@ class MultiDeviceIterator(object): self._device_iterators = [] for i, device in enumerate(self._devices): with ops.device(device): - ds = _PerDeviceGenerator( - i, self._multi_device_iterator_resource, self._incarnation_id, - self._source_device_tensor, dataset._element_structure) # pylint: disable=protected-access - if prefetch_buffer_size > 0: - ds = ds.prefetch(prefetch_buffer_size) - # TODO(jsimsa): Enable auto-tuning and optimizations when supported for - # non-CPU devices. - options = dataset_ops.Options() - options.experimental_autotune = False - options.experimental_optimization.apply_default_optimizations = False - ds = ds.with_options(options) + ds = self._create_device_dataset(i) if context.executing_eagerly(): self._device_iterators.append(dataset_ops.make_one_shot_iterator(ds)) else: @@ -216,6 +264,20 @@ class MultiDeviceIterator(object): ] self._initializer = control_flow_ops.group(*device_iterator_initializers) + def _create_device_dataset(self, i): + """"""Uses _prototype_device_datasets[i] to build a dataset for the device."""""" + ds = self._prototype_device_datasets[i] + ds = _ReincarnatedPerDeviceGenerator(ds, self._incarnation_id) + if self._prefetch_buffer_size > 0: + ds = ds.prefetch(self._prefetch_buffer_size) + # TODO(jsimsa): Enable auto-tuning and optimizations when supported for + # non-CPU devices. + options = dataset_ops.Options() + options.experimental_autotune = False + options.experimental_optimization.apply_default_optimizations = False + ds = ds.with_options(options) + return ds + def get_next(self, device=None): """"""Returns the next element given a `device`, else returns all in a list."""""" if device is not None: @@ -242,6 +304,23 @@ class MultiDeviceIterator(object): return control_flow_ops.no_op() return self._initializer + def _eager_reset(self): + """"""Resets the MultiDeviceIterator in eager mode."""""" + if not context.executing_eagerly(): + raise ValueError(""Eager reset is only supported in eager mode."") + # pylint: disable=protected-access + self._incarnation_id = gen_dataset_ops.multi_device_iterator_init( + self._dataset._variant_tensor, + self._multi_device_iterator_resource, + max_buffer_size=self._max_buffer_size) + for i, device in enumerate(self._devices): + with ops.device(device): + ds = self._create_device_dataset(i) + # Reset the device iterator resources with the new dataset. + ds_variant = ds._variant_tensor + gen_dataset_ops.make_iterator(ds_variant, + self._device_iterators[i]._resource) + @property def output_types(self): return self._dataset.output_types ",0,train 6f6eb52a89ec6e360d8604fa68516cf2d819207f,tensorflow/tensorflow,"Fixed typos, comments",strided_slice_op.cc,"@@ -219,7 +219,7 @@ Status ValidateStridedSliceOp( // Step 2: Make a sparse spec into a full index spec // // The sparse spec does not correspond to the number of dimensions - // Make a dense spec that correspond to the number of dimensions + // Make a dense spec that corresponds to the number of dimensions // // For example suppose foo[...,3:] on foo.shape=(2,2,3) then // we need to produce the missing begin_mask for the first two ",0,train ac70125923a3315802f867837521377a6a18f283,tensorflow/tensorflow,"Fix some races detected by the analysis tool. collective_rma_distributed: Return WorkerInterface to cache prior to invoking RecvFromPeer callback, instead of after. broadcaster: put status_ updates inside mutex. PiperOrigin-RevId: 196192631",broadcaster.cc,"@@ -134,7 +134,7 @@ void Broadcaster::TreeSendTo(const CollectiveParams& cp, // Execute a tree broadcast, i.e. each non-source device receives from // one other and sends to up-to two others. void Broadcaster::RunTree() { - mutex mu; + mutex mu; // also guards status_ while callbacks are pending int pending_count = 0; // GUARDED_BY(mu) condition_variable all_done; std::vector send_to_ranks; @@ -164,13 +164,11 @@ void Broadcaster::RunTree() { DispatchSend( target_rank, output_, [this, target_rank, &mu, &pending_count, &all_done](const Status& s) { + mutex_lock l(mu); status_.Update(s); - { - mutex_lock l(mu); - --pending_count; - if (pending_count == 0) { - all_done.notify_all(); - } + --pending_count; + if (pending_count == 0) { + all_done.notify_all(); } }); } @@ -191,13 +189,11 @@ void Broadcaster::RunTree() { op_dev_ctx, op_dev_ctx, device_, device_, ctx_->input_alloc_attr(0), ctx_->output_alloc_attr(0), input, output_, [this, &mu, &pending_count, &all_done](const Status& s) { + mutex_lock l(mu); status_.Update(s); - { - mutex_lock l(mu); - --pending_count; - if (0 == pending_count) { - all_done.notify_all(); - } + --pending_count; + if (0 == pending_count) { + all_done.notify_all(); } }); } ",0,train ac70125923a3315802f867837521377a6a18f283,tensorflow/tensorflow,"Fix some races detected by the analysis tool. collective_rma_distributed: Return WorkerInterface to cache prior to invoking RecvFromPeer callback, instead of after. broadcaster: put status_ updates inside mutex. PiperOrigin-RevId: 196192631",collective_rma_distributed.cc,"@@ -122,7 +122,6 @@ void CollectiveRemoteAccessDistributed::RecvFromPeer( // Logic to be executed on the RecvBufferAsync callback. auto recv_buf_callback = [this, state, peer_task, to_device, to_alloc_attr, to_device_ctx, to_tensor, done](const Status& s) { - std::unique_ptr del_on_exit(state); if (s.ok()) { // In this generic implementation the bytes come back in the // RPC response protobuf rather than via RDMA so we need to copy @@ -134,6 +133,7 @@ void CollectiveRemoteAccessDistributed::RecvFromPeer( done(errors::Internal(""RecvBufResponse returned "", num_bytes, "" bytes where to_tensor expected "", to_tensor->TotalBytes())); + delete state; return; } if (to_device->tensorflow_gpu_device_info()) { @@ -144,6 +144,7 @@ void CollectiveRemoteAccessDistributed::RecvFromPeer( Status status = dev_mgr_->LookupDevice(""CPU:0"", &cpu_dev); if (!status.ok()) { done(status); + delete state; return; } AllocatorAttributes cpu_attr; @@ -163,6 +164,7 @@ void CollectiveRemoteAccessDistributed::RecvFromPeer( // done in another thread. SchedClosure([s, done] { done(s); }); }); + delete state; return; } else { // CPU device @@ -174,6 +176,7 @@ void CollectiveRemoteAccessDistributed::RecvFromPeer( dev_resolver_->ClearTask(peer_task); } + delete state; done(s); }; ",0,train a45373d1764ca99433a4e0b4ac5080a3afce4ba8,tensorflow/tensorflow,ensure all keys have same shape for stackkeys,map_kernels.h,"@@ -118,7 +118,8 @@ class TensorMapLookup : public OpKernel { OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); OP_REQUIRES(c, m->tensors().find(key) != m->tensors().end(), - errors::InvalidArgument(""Trying to lookup non-existent key. Could not find "" + key.DeviceSafeDebugString())); + errors::InvalidArgument(""Trying to lookup non-existent key. Could"" + ""not find "" + key.DeviceSafeDebugString())); c->set_output(0, m->tensors().find(key)->second); } @@ -189,18 +190,21 @@ class TensorMapStackKeys : public OpKernel { errors::InvalidArgument(""TensorMapStackKeys cannot be called on empty map."")); auto it = m->tensors().begin(); - size_t sz = m->tensors().size(); - TensorShape shape = it->first.shape(); - shape.InsertDim(0, m->tensors().size()); + TensorShape output_shape = it->first.shape(); + output_shape.InsertDim(0, m->tensors().size()); Tensor* result; - OP_REQUIRES_OK(c, c->allocate_output(0, shape, &result)); + OP_REQUIRES_OK(c, c->allocate_output(0, output_shape, &result)); + + //string error_str = ""Key does not match requested dtype. Requested "" + DataTypeString(key_dtype_) + "", but saw "" + DataTypeString(it->first.dtype()); int i = 0; - string error_str = ""Key does not match requested dtype. Requested "" + DataTypeString(key_dtype_) + "", but saw "" + DataTypeString(it->first.dtype()); - string simple = ""Key does not match requested dtype.""; + size_t sz = m->tensors().size(); + TensorShape key_shape = it->first.shape(); while (it != m->tensors().end() && i < sz) { OP_REQUIRES(c, it->first.dtype() == key_dtype_, - errors::InvalidArgument(""Key does not match requested dtype."")); - batch_util::CopyElementToSlice(it->first, result, i); + errors::InvalidArgument(""Key does not match requested dtype."")); + OP_REQUIRES(c, it->first.shape() == key_shape, + errors::InvalidArgument(""Keys must all have the same shape."")); + OP_REQUIRES_OK(c, batch_util::CopyElementToSlice(it->first, result, i)); i++; it++; } ",0,train a45373d1764ca99433a4e0b4ac5080a3afce4ba8,tensorflow/tensorflow,ensure all keys have same shape for stackkeys,map_ops_test.py,"@@ -68,13 +68,10 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): k2 = constant_op.constant(2.0) v = constant_op.constant(11.0) m = map_ops.tensor_map_insert(m, k, v) - simple = ""Trying to lookup non-existent key."" - error_str = simple + "" Could not find "" + str(k2) - with self.assertRaisesRegex(errors.InvalidArgumentError, - ""Trying to lookup non-existent key. *""): + with self.assertRaisesRegex(errors.InvalidArgumentError, ""Trying to lookup non-existent key. *""): l = map_ops.tensor_map_lookup(m, k2, dtypes.float32) self.evaluate(l) -''' + def testTensorMapErase(self): m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) @@ -158,11 +155,11 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testStackKeysEmptyMapFails(self): m = map_ops.empty_tensor_map() with self.assertRaisesRegex(errors.InvalidArgumentError, - ""Empty map has no keys.""): + ""TensorMapStackKeys cannot be called on empty map.""): keys = map_ops.tensor_map_stack_keys(m, dtypes.float32) self.evaluate(keys) - def testStackKeysMismatchedDtypeFails(self): + def testStackKeysIncorrectDtypeFails(self): m = map_ops.empty_tensor_map() k = constant_op.constant(""mismatched_key"") v = constant_op.constant(2.0) @@ -174,6 +171,19 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): keys = map_ops.tensor_map_stack_keys(m, dtypes.float32) self.evaluate(keys) + def testStackKeysIncorrectShapeFails(self): + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + k2 = constant_op.constant([1.0, 11.0]) + v = constant_op.constant(2.0) + v2 = constant_op.constant(22.0) + m = map_ops.tensor_map_insert(m, k, v) + m = map_ops.tensor_map_insert(m, k2, v2) + with self.assertRaisesRegex(errors.InvalidArgumentError, + ""Keys must all have the same shape.""): + keys = map_ops.tensor_map_stack_keys(m, dtypes.float32) + self.evaluate(keys) + def testInsertLookupGrad(self): with backprop.GradientTape() as tape: m = map_ops.empty_tensor_map() @@ -437,6 +447,6 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): s = map_ops.tensor_map_size(m) self.assertAllEqual(s, 0) self.assertAllEqual(map_ops.tensor_map_has_key(m, k), False) -''' + if __name__ == ""__main__"": test.main() ",0,train a6421c4dda1a83ea975bae545df1de16d38726b0,tensorflow/tensorflow,"Swap NaN count from index 7 to 2 within DebugNumericSummary ops. Change: 147888410",debug_ops.h,"@@ -241,12 +241,12 @@ class DebugNumericSummaryOp : public OpKernel { OP_REQUIRES_OK(context, context->allocate_output(0, shape, &output_tensor)); output_tensor->vec()(0) = static_cast(is_initialized); output_tensor->vec()(1) = static_cast(element_count); - output_tensor->vec()(2) = static_cast(negative_inf_count); - output_tensor->vec()(3) = static_cast(negative_count); - output_tensor->vec()(4) = static_cast(zero_count); - output_tensor->vec()(5) = static_cast(positive_count); - output_tensor->vec()(6) = static_cast(positive_inf_count); - output_tensor->vec()(7) = static_cast(nan_count); + output_tensor->vec()(2) = static_cast(nan_count); + output_tensor->vec()(3) = static_cast(negative_inf_count); + output_tensor->vec()(4) = static_cast(negative_count); + output_tensor->vec()(5) = static_cast(zero_count); + output_tensor->vec()(6) = static_cast(positive_count); + output_tensor->vec()(7) = static_cast(positive_inf_count); output_tensor->vec()(8) = min; output_tensor->vec()(9) = max; output_tensor->vec()(10) = mean; ",0,train a6421c4dda1a83ea975bae545df1de16d38726b0,tensorflow/tensorflow,"Swap NaN count from index 7 to 2 within DebugNumericSummary ops. Change: 147888410",debug_ops_test.cc,"@@ -254,12 +254,12 @@ TEST_F(DebugNumericSummaryOpTest, Float_full_house) { &expected, {1.0, // Is initialized. 18.0, // Total element count. + 4.0, // nan count. 2.0, // -inf count. 2.0, // negative number count (excluding -inf). 3.0, // zero count. 2.0, // positive number count (excluding +inf). 5.0, // +inf count. - 4.0, // nan count. -3.0, // minimum of non-inf and non-nan elements. 7.0, // maximum of non-inf and non-nan elements. 0.85714285714, // mean of non-inf and non-nan elements. @@ -290,12 +290,12 @@ TEST_F(DebugNumericSummaryOpTest, Double_full_house) { &expected, {1.0, // Is initialized. 18.0, // Total element count. + 4.0, // nan count. 2.0, // -inf count. 2.0, // negative count (excluding -inf). 3.0, // zero count. 2.0, // positive count (excluding +inf). 5.0, // +inf count. - 4.0, // nan count. -3.0, // minimum of non-inf and non-nan elements. 7.0, // maximum of non-inf and non-nan elements. 0.85714285714, // mean of non-inf and non-nan elements. @@ -315,12 +315,12 @@ TEST_F(DebugNumericSummaryOpTest, Float_only_valid_values) { &expected, {1.0, // Is initialized. 6.0, // Total element count. + 0.0, // nan count. 0.0, // -inf count. 1.0, // negative count (excluding -inf). 2.0, // zero count. 3.0, // positive count (excluding +inf). 0.0, // +inf count. - 0.0, // nan count. -1.0, // minimum of non-inf and non-nan elements. 7.0, // maximum of non-inf and non-nan elements. 2.0, // mean of non-inf and non-nan elements. @@ -351,12 +351,12 @@ TEST_F(DebugNumericSummaryOpTest, Float_all_Inf_or_NaN) { // NaNs. ASSERT_NEAR(1.0, output[0], 1e-8); // Is initialized. ASSERT_NEAR(9.0, output[1], 1e-8); // Total element count. - ASSERT_NEAR(2.0, output[2], 1e-8); // -inf count. - ASSERT_NEAR(0.0, output[3], 1e-8); // negative count (excluding -inf). - ASSERT_NEAR(0.0, output[4], 1e-8); // zero count. - ASSERT_NEAR(0.0, output[5], 1e-8); // positive count (excluding +inf). - ASSERT_NEAR(3.0, output[6], 1e-8); // +inf count. - ASSERT_NEAR(4.0, output[7], 1e-8); // nan count. + ASSERT_NEAR(4.0, output[2], 1e-8); // nan count. + ASSERT_NEAR(2.0, output[3], 1e-8); // -inf count. + ASSERT_NEAR(0.0, output[4], 1e-8); // negative count (excluding -inf). + ASSERT_NEAR(0.0, output[5], 1e-8); // zero count. + ASSERT_NEAR(0.0, output[6], 1e-8); // positive count (excluding +inf). + ASSERT_NEAR(3.0, output[7], 1e-8); // +inf count. // Due to the absence of any non-inf and non-nan values, the output of min, // max, mean and var are all degenerate. ASSERT_EQ(std::numeric_limits::infinity(), output[8]); ",0,train a6421c4dda1a83ea975bae545df1de16d38726b0,tensorflow/tensorflow,"Swap NaN count from index 7 to 2 within DebugNumericSummary ops. Change: 147888410",session_debug_testlib.py,"@@ -1060,7 +1060,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): self.assertTrue(dump.loaded_partition_graphs()) self.assertAllClose([[ - 1.0, 18.0, 2.0, 2.0, 3.0, 2.0, 5.0, 4.0, -3.0, 7.0, 0.85714286, + 1.0, 18.0, 4.0, 2.0, 2.0, 3.0, 2.0, 5.0, -3.0, 7.0, 0.85714286, 8.97959184 ]], dump.get_tensors(""numeric_summary/a/read"", 0, ""DebugNumericSummary"")) ",0,train b7ae5f28b4b55098ae64ed2bcab21f6e29d5abee,tensorflow/tensorflow,"Simplify MemcpyRewritePattern by using the MemcpyOpAdaptor. PiperOrigin-RevId: 399226638 Change-Id: Iebccf00cdce622d33e40f8a98704d09d8139dbed",memcpy_pattern.cc,"@@ -33,6 +33,7 @@ #include ""mlir/Transforms/DialectConversion.h"" #include ""llvm/ADT/ArrayRef.h"" #include ""llvm/ADT/StringRef.h"" +#include ""mlir/Dialect/GPU/GPUDialect.h"" // from @llvm-project #include ""mlir/Dialect/StandardOps/IR/Ops.h"" // from @llvm-project #include ""mlir/IR/BlockAndValueMapping.h"" // from @llvm-project #include ""mlir/IR/Value.h"" // from @llvm-project @@ -61,20 +62,16 @@ struct MemcpyRewritePattern mlir::gpu::MemcpyOp op, Value chain, Value stream, ArrayRef operands, ConversionPatternRewriter& rewriter) const override { - if (!all_of(operands, [](Value operand) { - return operand.getType().isa(); - })) + mlir::gpu::MemcpyOpAdaptor adaptor = + mlir::gpu::MemcpyOpAdaptor(operands, op->getAttrDictionary()); + if (!adaptor.src().getType().isa() || + !adaptor.dst().getType().isa()) { return rewriter.notifyMatchFailure(op, ""expected buffer operands""); - - BlockAndValueMapping mapping; - for (auto pair : llvm::zip_first(op->getOperands(), operands)) - mapping.map(std::get<0>(pair), std::get<1>(pair)); - + } rewriter.eraseOp(op); - return rewriter - .create(op.getLoc(), mapping.lookup(op.dst()), - mapping.lookup(op.src()), stream, chain) + .create(op.getLoc(), adaptor.dst(), adaptor.src(), + stream, chain) .getResult(); } }; ",0,train 16765079bb77aa2ff24d96bd6781baf80f1c9ca8,tensorflow/tensorflow,"[XLIR] Log and skip instances where memref.global op is absent for a constant. PiperOrigin-RevId: 411604942 Change-Id: I9eb23ef6896fd3d9ca37715c8d35be4315bea6be",kernel_ops_pattern.cc,"@@ -291,12 +291,16 @@ static void Rewrite(Operation* op, mlir::PatternRewriter& rewriter, rewriter.getStringAttr(gpu_module_data)); // Annotate memref.global ops with the gpu.module symbol, and annotate the - // gpu.module op with memref.global symbols which requiring initialization. + // gpu.module op with memref.global symbols which require initialization. SmallVector const_attrs; for (const auto& constant : constants) { auto global_op = mlir::SymbolTable::lookupNearestSymbolFrom( op, rewriter.getStringAttr(constant.symbol_name)); - assert(global_op); + if (!global_op) { + LOG(WARNING) << ""memref.global op not found for constant. Possibly "" + << ""unused (spurious) constant.""; + continue; + } global_op->setAttr(tfrt::gpu::getGpuModuleAttrName(), mlir::SymbolRefAttr::get(gpu_module)); if (!constant.content.empty()) ",0,train f85d825500357603afb7a02d2c88ad306ee43006,tensorflow/tensorflow,"Allow differentiating tfe.defun functions which contain conds. PiperOrigin-RevId: 205732423",direct_session.cc,"@@ -717,7 +717,8 @@ Status DirectSession::Run(const RunOptions& run_options, // Receive outputs. if (outputs) { std::vector sorted_outputs; - const Status s = call_frame.ConsumeRetvals(&sorted_outputs); + const Status s = call_frame.ConsumeRetvals( + &sorted_outputs, /* allow_dead_tensors = */ false); if (errors::IsInternal(s)) { return errors::InvalidArgument(s.error_message()); } else if (!s.ok()) { ",0,test f85d825500357603afb7a02d2c88ad306ee43006,tensorflow/tensorflow,"Allow differentiating tfe.defun functions which contain conds. PiperOrigin-RevId: 205732423",function.cc,"@@ -746,6 +746,8 @@ void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle, rets_alloc_attrs.push_back(ret_alloc_attrs); } + bool allow_dead_tensors = opts.allow_dead_tensors; + // The ProcFLR sends the arguments to the function from the source_device to // the target_device. So here we receive those arguments. Similarly, when the // computation is done and stored in *rets, we send the return values back @@ -756,7 +758,7 @@ void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle, device_context, args_alloc_attrs, rendezvous, remote_args, [frame, remote_args, item, source_device, target_device, target_incarnation, rendezvous, device_context, rets, done, exec_args, - rets_alloc_attrs](const Status& status) { + rets_alloc_attrs, allow_dead_tensors](const Status& status) { Status s = status; if (s.ok()) { s = frame->SetArgs(*remote_args); @@ -769,13 +771,13 @@ void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle, return; } item->exec->RunAsync( - *exec_args, - [frame, rets, done, source_device, target_device, - target_incarnation, rendezvous, device_context, remote_args, - exec_args, rets_alloc_attrs](const Status& status) { + *exec_args, [frame, rets, done, source_device, target_device, + target_incarnation, rendezvous, device_context, + remote_args, exec_args, rets_alloc_attrs, + allow_dead_tensors](const Status& status) { Status s = status; if (s.ok()) { - s = frame->ConsumeRetvals(rets); + s = frame->ConsumeRetvals(rets, allow_dead_tensors); } delete frame; if (!s.ok()) { @@ -859,14 +861,15 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle, return; } + bool allow_dead_tensors = opts.allow_dead_tensors; item->exec->RunAsync( // Executor args *exec_args, // Done callback. - [frame, rets, done, exec_args](const Status& status) { + [frame, rets, done, exec_args, allow_dead_tensors](const Status& status) { Status s = status; if (s.ok()) { - s = frame->ConsumeRetvals(rets); + s = frame->ConsumeRetvals(rets, allow_dead_tensors); } delete frame; delete exec_args; ",0,test f85d825500357603afb7a02d2c88ad306ee43006,tensorflow/tensorflow,"Allow differentiating tfe.defun functions which contain conds. PiperOrigin-RevId: 205732423",function.cc,"@@ -865,12 +865,15 @@ Status FunctionCallFrame::GetRetvals(std::vector* rets) const { return Status::OK(); } -Status FunctionCallFrame::ConsumeRetvals(std::vector* rets) { +Status FunctionCallFrame::ConsumeRetvals(std::vector* rets, + bool allow_dead_tensors) { rets->clear(); rets->reserve(rets_.size()); for (size_t i = 0; i < rets_.size(); ++i) { if (rets_[i].has_val) { rets->emplace_back(std::move(rets_[i].val)); + } else if (allow_dead_tensors) { + rets->emplace_back(); } else { return errors::Internal(""Retval["", i, ""] does not have value""); } ",0,test f85d825500357603afb7a02d2c88ad306ee43006,tensorflow/tensorflow,"Allow differentiating tfe.defun functions which contain conds. PiperOrigin-RevId: 205732423",function.h,"@@ -261,7 +261,10 @@ class FunctionCallFrame : public CallFrameInterface { // Caller methods. Status SetArgs(gtl::ArraySlice args); Status GetRetvals(std::vector* rets) const; - Status ConsumeRetvals(std::vector* rets); + + // Moves the return values from the frame to rets. If allow_dead_tensors is + // false it will fail if any of the retvals do not have a value. + Status ConsumeRetvals(std::vector* rets, bool allow_dead_tensors); size_t num_args() const override { return arg_types_.size(); } size_t num_retvals() const override { return ret_types_.size(); } @@ -510,6 +513,9 @@ class FunctionLibraryRuntime { // If true, we create a new IntraProcessRendezvous, else use the existing // one. bool create_rendezvous = false; + + // If True, allow returning dead tensors. + bool allow_dead_tensors = false; }; typedef std::function DoneCallback; virtual void Run(const Options& opts, Handle handle, ",0,test f85d825500357603afb7a02d2c88ad306ee43006,tensorflow/tensorflow,"Allow differentiating tfe.defun functions which contain conds. PiperOrigin-RevId: 205732423",partitioned_function_ops.cc,"@@ -330,6 +330,7 @@ class PartitionedCallOp : public AsyncOpKernel { // using device-specific threadpools when available. opts.runner = ctx->runner(); opts.source_device = local_device_name_; + opts.allow_dead_tensors = true; // TODO(akshayka): Accommodate the multiple-worker scenario by adding the // constructed rendezvous to a rendezvous manager. Rendezvous* rendez = new IntraProcessRendezvous(lib->device_mgr()); ",0,test f85d825500357603afb7a02d2c88ad306ee43006,tensorflow/tensorflow,"Allow differentiating tfe.defun functions which contain conds. PiperOrigin-RevId: 205732423",function_test.py,"@@ -213,6 +213,19 @@ class FunctionTest(test.TestCase): self.assertEqual(fn_op.output_shapes, None) self.assertAllEqual(fn_op(x, x), None) + @test_util.run_in_graph_and_eager_modes() + def testDefunCondGradient(self): + + @function.defun + def f(x): + return control_flow_ops.cond(x > 0.5, lambda: 2 * x, lambda: 3 * x) + + with backprop.GradientTape() as t: + x = constant_op.constant(1.0) + t.watch(x) + y = f(x) + self.assertAllEqual(self.evaluate(t.gradient(y, x)), 2.0) + def testDefunCapturedInt32(self): x = constant_op.constant(1, dtype=dtypes.int32) ",0,test c3f713e1cc1d1c14ce9e19a792e4179ca3fc92bf,tensorflow/tensorflow,Address review comment on new PR #23109,check_ops.py,"@@ -236,7 +236,6 @@ def _make_assert_msg_data(sym, x, y, summarize, test_op): data.append('Corresponding y values:') data.append(y_vals.numpy().reshape((-1,))[:num_vals]) - if summarize > 0: # reshape((-1,)) is the fastest way to get a flat array view. x_np = x.numpy().reshape((-1,)) y_np = y.numpy().reshape((-1,)) ",0,train 7e5cf28779087a2ca36b79e8d1b02083d77cb8ff,tensorflow/tensorflow,"Replace unneeded TODO with clarification comment. PiperOrigin-RevId: 263128148",collective_ops_test.py,"@@ -141,8 +141,8 @@ class CollectiveOpTest(test.TestCase): with ops.device(device): device_collectives = [] for j in range(num_vars): - # TODO(ayushd): figure out why identity is necessary to get the - # right device on the input here with TF2_BEHAVIOR=1. + # NOTE(ayushd): we need the `identity` here to ensure that the + # input to `all_reduce` has an explicit device string. input_tensor = array_ops.identity(device_tensors[j]) collective_op = collective_ops.all_reduce( input_tensor, group_size, group_key, instances[j], ",0,test 62f0dee7a386259a245f3d7e7b481715f3d018c2,tensorflow/tensorflow,"[TFR] Improve error message on undefined default value PiperOrigin-RevId: 388371834 Change-Id: I8678bf9fb500b72f096f8294d9a47650f77da02d",tfr_gen.py,"@@ -1232,7 +1232,11 @@ class TFRGen(transformer.CodeGenerator): """"""emit mlir constant statement from default value of the ArgDef proto."""""" name = self._ssa_name('cst') cst_ty = _get_type_from_proto(None, attr_def) - cst_val = _get_val_from_proto(cst_ty, attr_def.default_value) + try: + cst_val = _get_val_from_proto(cst_ty, attr_def.default_value) + except AttributeError: + raise AttributeError( + f'attribute ""{attr_def.name}"" does not have default_value') if cst_ty == TFRTypes.ATTR: self._emit_with_loc('\n{} = tfr.constant {} -> {}'.format( name, cst_val, cst_ty)) ",0,train 5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,cholesky_op_test.py,"@@ -15,7 +15,6 @@ """"""Tests for tensorflow.ops.tf.Cholesky."""""" import numpy as np -from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.compiler.tests import xla_test from tensorflow.python.framework import constant_op @@ -76,7 +75,7 @@ class CholeskyOpTest(xla_test.XLATestCase): # Generate random positive-definite matrices. matrices = np.random.rand(10, 5, 5).astype(dtype) - for i in xrange(10): + for i in range(10): matrices[i] = np.dot(matrices[i].T, matrices[i]) self._verifyCholesky(matrices, atol=1e-4) ",0,test 5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,clustering_test.py,"@@ -15,7 +15,6 @@ """"""Tests for the behavior of the auto-compilation pass."""""" import numpy as np -from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.compiler.tests import xla_test from tensorflow.python.framework import constant_op @@ -52,7 +51,7 @@ class ClusteringTest(xla_test.XLATestCase): input2 = constant_op.constant(val2, name=""const2"") with self.test_scope(): output = math_ops.add(input1, input2) - for _ in xrange(10): + for _ in range(10): result = self.evaluate(output) self.assertAllClose(result, expected, rtol=1e-3) ",0,test 5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,conv3d_test.py,"@@ -15,7 +15,6 @@ """"""Tests for 3D convolutions using the XLA JIT."""""" import numpy as np -from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.compiler.tests import xla_test from tensorflow.python.framework import constant_op @@ -96,11 +95,11 @@ class Conv3DTransposeTest(xla_test.XLATestCase): # kernel_depth * ceil(kernel_height/2) * kernel_width or # kernel_depth * kernel_height * ceil(kernel_width/2) - for n in xrange(x_shape[0]): - for k in xrange(f_shape[3]): - for w in xrange(y_shape[3]): - for h in xrange(y_shape[2]): - for d in xrange(y_shape[1]): + for n in range(x_shape[0]): + for k in range(f_shape[3]): + for w in range(y_shape[3]): + for h in range(y_shape[2]): + for d in range(y_shape[1]): d_in = d > 0 and d < y_shape[1] - 1 h_in = h > 0 and h < y_shape[2] - 1 w_in = w > 0 and w < y_shape[3] - 1 @@ -133,11 +132,11 @@ class Conv3DTransposeTest(xla_test.XLATestCase): x, f, y_shape, strides=strides, padding=""SAME"") value = self.evaluate(output) - for n in xrange(x_shape[0]): - for k in xrange(f_shape[3]): - for w in xrange(y_shape[3]): - for h in xrange(y_shape[2]): - for d in xrange(y_shape[1]): + for n in range(x_shape[0]): + for k in range(f_shape[3]): + for w in range(y_shape[3]): + for h in range(y_shape[2]): + for d in range(y_shape[1]): # We add a case for locations divisible by the stride. d_in = d % strides[1] == 0 and 0 < d < y_shape[1] - 1 h_in = h % strides[2] == 0 and 0 < h < y_shape[2] - 1 @@ -176,11 +175,11 @@ class Conv3DTransposeTest(xla_test.XLATestCase): # The amount of padding added pad = 1 - for n in xrange(x_shape[0]): - for k in xrange(f_shape[3]): - for w in xrange(y_shape[3]): - for h in xrange(y_shape[2]): - for d in xrange(y_shape[1]): + for n in range(x_shape[0]): + for k in range(f_shape[3]): + for w in range(y_shape[3]): + for h in range(y_shape[2]): + for d in range(y_shape[1]): # We add a case for locations divisible by the stride. d_in = d % strides[1] == 0 and pad < d < y_shape[1] - 1 - pad h_in = h % strides[2] == 0 and pad < h < y_shape[2] - 1 - pad ",0,test 5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,depthwise_conv_op_test.py,"@@ -15,7 +15,6 @@ """"""Functional tests for depthwise convolutional operations."""""" import numpy as np -from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.compiler.tests import xla_test from tensorflow.python.framework import constant_op @@ -35,7 +34,7 @@ def ReferenceDepthwiseConv2D(input_tensor, filter_tensor, strides, padding, convs = [] in_channels = filter_tensor.shape[2] # Use a custom implementation of depthwise conv2d using slicing. - for channel in xrange(in_channels): + for channel in range(in_channels): # Slice the input along channel if data_format == ""NCHW"": input_slice = input_tensor[:, channel:channel+1, :, :] ",0,test 5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,fifo_queue_test.py,"@@ -16,8 +16,6 @@ import time -from six.moves import xrange # pylint: disable=redefined-builtin - from tensorflow.compiler.tests import xla_test from tensorflow.python.framework import dtypes as dtypes_lib from tensorflow.python.ops import data_flow_ops @@ -86,7 +84,7 @@ class FIFOQueueTest(xla_test.XLATestCase): # Dequeue every element using a single thread. results = [] - for _ in xrange(len(elems)): + for _ in range(len(elems)): results.append(dequeued_t.eval()) self.assertItemsEqual(elems, results) @@ -124,7 +122,7 @@ class FIFOQueueTest(xla_test.XLATestCase): for enqueue_op in enqueue_ops: enqueue_op.run() - for i in xrange(len(elems)): + for i in range(len(elems)): vals = self.evaluate(dequeued_t) self.assertEqual([elems[i]], vals) @@ -145,7 +143,7 @@ class FIFOQueueTest(xla_test.XLATestCase): results = [] def dequeue(): - for _ in xrange(len(elems)): + for _ in range(len(elems)): results.append(sess.run(dequeued_t)) enqueue_thread = self.checkedThread(target=enqueue) @@ -168,7 +166,7 @@ class FIFOQueueTest(xla_test.XLATestCase): for enqueue_op in enqueue_ops: enqueue_op.run() - for i in xrange(len(elems)): + for i in range(len(elems)): x_val, y_val = sess.run(dequeued_t) x, y = elems[i] self.assertEqual([x], x_val) ",0,test 5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,image_ops_test.py,"@@ -21,8 +21,6 @@ import os from absl.testing import parameterized import numpy as np -from six.moves import xrange # pylint: disable=redefined-builtin - from tensorflow.compiler.tests import xla_test from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -227,7 +225,7 @@ class AdjustHueTest(xla_test.XLATestCase): x_v = x_np.reshape([-1, 3]) y_v = np.ndarray(x_v.shape, dtype=x_v.dtype) channel_count = x_v.shape[0] - for i in xrange(channel_count): + for i in range(channel_count): r = x_v[i][0] g = x_v[i][1] b = x_v[i][2] @@ -347,7 +345,7 @@ class AdjustSaturationTest(xla_test.XLATestCase): x_v = x_np.reshape([-1, 3]) y_v = np.ndarray(x_v.shape, dtype=x_v.dtype) channel_count = x_v.shape[0] - for i in xrange(channel_count): + for i in range(channel_count): r = x_v[i][0] g = x_v[i][1] b = x_v[i][2] ",0,test 5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,listdiff_op_test.py,"@@ -15,7 +15,6 @@ """"""Tests for XLA listdiff operator."""""" import numpy as np -from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.compiler.tests import xla_test from tensorflow.python.framework import dtypes @@ -61,7 +60,7 @@ class ListDiffTest(xla_test.XLATestCase): int_low = -7 int_high = 8 max_size = 50 - for _ in xrange(num_random_tests): + for _ in range(num_random_tests): x_size = np.random.randint(max_size + 1) x = np.random.randint(int_low, int_high, size=x_size) y_size = np.random.randint(max_size + 1) ",0,test 5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,unary_ops_test.py,"@@ -18,7 +18,6 @@ import unittest import numpy as np import six -from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.compiler.tests import xla_test from tensorflow.python.framework import dtypes @@ -80,7 +79,7 @@ class UnaryOpsTest(xla_test.XLATestCase): def ListsAreClose(self, result, expected, rtol, atol): """"""Tests closeness of two lists of floats."""""" self.assertEqual(len(result), len(expected)) - for i in xrange(len(result)): + for i in range(len(result)): self.assertAllClose(result[i], expected[i], rtol, atol) def AssertCloseAndSorted(self, result, expected, rtol, atol): ",0,test 5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,xla_shape.py,"@@ -16,8 +16,6 @@ import numpy as _np # Avoids becoming a part of public Tensorflow API. -from six.moves import xrange - from tensorflow.compiler.xla import xla_data_pb2 from tensorflow.compiler.xla.python_api import types @@ -117,7 +115,7 @@ def _CreateShapeFromNumpy(ndarray): # pylint: disable=invalid-name else: # Row-major layout. This corresponds to a ""dimension order is # major-to-minor"" layout int XLA. - layout = list(reversed(xrange(ndarray.ndim))) + layout = list(reversed(range(ndarray.ndim))) return Shape(element_type, dimensions, layout) ",0,test fca2509e3b3d6252fa34f6e35d8a359c0e5cbf64,tensorflow/tensorflow,"Format tf.function's error message when input and signature does not match This fix tries to address the issue raised in 30576 where the error message is hard to interpret: ``` ValueError: Python inputs incompatible with input_signature: inputs ((, , , , , , , , )), input_signature ((TensorSpec(shape=(?, ?, 1), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 2), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 3), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 4), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 5), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 6), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 7), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 8), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 9), dtype=tf.float32, name=None))) ``` This fix formats the error message: ``` ValueError: Python inputs incompatible with input_signature: inputs: ( Tensor(""random_normal:0"", shape=(1, 123, 1), dtype=float32), Tensor(""random_normal_1:0"", shape=(1, 123, 2), dtype=float32), Tensor(""random_normal_2:0"", shape=(1, 123, 3), dtype=float32), Tensor(""random_normal_3:0"", shape=(1, 123, 4), dtype=float32), Tensor(""random_normal_4:0"", shape=(1, 123, 5), dtype=float32), Tensor(""random_normal_5:0"", shape=(1, 123, 6), dtype=float32), Tensor(""random_normal_6:0"", shape=(1, 123, 7), dtype=float32), Tensor(""random_normal_7:0"", shape=(1, 123, 8), dtype=float32), Tensor(""random_normal_8:0"", shape=(1, 123, 1), dtype=float32)) input_signature: ( TensorSpec(shape=(?, ?, 1), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 2), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 3), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 4), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 5), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 6), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 7), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 8), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 9), dtype=tf.float32, name=None)) ``` This fix fixes 30576. Signed-off-by: Yong Tang ",function.py,"@@ -1548,12 +1548,17 @@ def _convert_inputs_to_signature(inputs, input_signature, flat_input_signature): ""Inputs (%s), input_signature(%s)."" % (str(inputs), str(input_signature))) + def format_error_message(inputs, input_signature): + return ("" inputs: (\n "" + + "",\n "".join([str(i) for i in inputs]) + + "")\n input_signature: (\n "" + + "",\n "".join([str(i) for i in input_signature]) + + "")"") if any(not spec.is_compatible_with(other) for spec, other in zip( flat_input_signature, flatten_inputs)): - raise ValueError(""Python inputs incompatible with input_signature: "" - ""inputs (%s), input_signature (%s)"" % - (str(inputs), str(input_signature))) + raise ValueError(""Python inputs incompatible with input_signature:\n%s"" % + format_error_message(inputs, input_signature)) if need_packing: inputs = nest.pack_sequence_as( ",0,train b2933c618260edc039fb8a7e2dce4d2e185f0892,tensorflow/tensorflow,"[XLA:GPU] Allow multi-output fusion of element-wise instructions, in addition to loop fusions. PiperOrigin-RevId: 207253181",multi_output_fusion.cc,"@@ -115,15 +115,23 @@ bool IsInputFusibleReduction(HloInstruction* instr) { // will be broadcasted and have not been observed to cause data locality issues. // TODO(b/111977086): Improve reduce emitters to remove this limitation. bool ReduceFriendlyInputLayouts(HloInstruction* instr) { + std::vector params; + if (instr->opcode() == HloOpcode::kFusion) { + params = instr->fused_parameters(); + } else { + for (HloInstruction* operand : instr->operands()) { + params.push_back(operand); + } + } int64 max_rank = 0; const Layout* max_rank_layout; - for (HloInstruction* param : instr->fused_parameters()) { + for (HloInstruction* param : params) { if (ShapeUtil::Rank(param->shape()) > max_rank) { max_rank = ShapeUtil::Rank(param->shape()); max_rank_layout = ¶m->shape().layout(); } } - return c_all_of(instr->fused_parameters(), [&](HloInstruction* param) { + return c_all_of(params, [&](HloInstruction* param) { return (ShapeUtil::Rank(param->shape()) < max_rank) || (LayoutUtil::Equal(param->shape().layout(), *max_rank_layout)); }); @@ -221,7 +229,7 @@ bool GpuMultiOutputFusion::DoProducerConsumerMultiOutputFusion() { const bool is_loop_fusion = producer->opcode() == HloOpcode::kFusion && producer->fusion_kind() == HloInstruction::FusionKind::kLoop; - if (!is_loop_fusion) { + if (!producer->IsElementwise() && !is_loop_fusion) { VLOG(3) << producer->name() << "" is not a loop fusion.""; continue; } ",0,test b2933c618260edc039fb8a7e2dce4d2e185f0892,tensorflow/tensorflow,"[XLA:GPU] Allow multi-output fusion of element-wise instructions, in addition to loop fusions. PiperOrigin-RevId: 207253181",multi_output_fusion_test.cc,"@@ -256,6 +256,26 @@ TEST_F(MultiOutputFusionTest, MultiOutputFusionTwoLoops) { op::Tuple(op::Multiply(), op::Divide())); } +TEST_F(MultiOutputFusionTest, ProducerConsumerFusionElementwiseAndReduce) { + auto module = ParseHloString(tensorflow::strings::StrCat(kModulePrefix, R""( + ENTRY reduce { + p0 = f32[2,2,2]{2,1,0} parameter(0) + c0 = f32[] constant(0) + exp = f32[2,2,2]{2,1,0} exponential(p0) + reduce = f32[2,2]{1,0} reduce(exp, c0), dimensions={2}, to_apply=scalar_add_computation + ROOT root = (f32[2,2]{1,0}, f32[2,2,2]{2,1,0}) tuple(reduce, exp) + })"")) + .ValueOrDie(); + ASSERT_TRUE(GpuMultiOutputFusion().Run(module.get()).ValueOrDie()); + SCOPED_TRACE(module->ToString()); + const HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, op::Tuple(op::GetTupleElement(), op::GetTupleElement())); + const HloInstruction* fusion = root->operand(0)->operand(0); + ASSERT_TRUE(fusion->IsMultiOutputFusion()); + EXPECT_THAT(fusion->fused_expression_root(), + op::Tuple(op::Reduce(), op::Exp())); +} + TEST_F(MultiOutputFusionTest, ProducerConsumerFusionLoopFusionAndReduce) { auto module = ParseHloString(tensorflow::strings::StrCat(kModulePrefix, R""( fused_add { ",0,test 21b91300e9e18dbfa2d1a503721ed3d0a08f37e2,tensorflow/tensorflow,"boosted_trees: infer the output shapes of Quantiles Op from the input shapes. PiperOrigin-RevId: 188750079",quantile_ops.cc,"@@ -272,6 +272,20 @@ REGISTER_OP(""Quantiles"") .Input(""sparse_indices: num_sparse_features * int64"") .Output(""dense_quantiles: num_dense_features * int32"") .Output(""sparse_quantiles: num_sparse_features * int32"") + .SetShapeFn([](InferenceContext* c) { + int num_dense_features; + TF_RETURN_IF_ERROR(c->GetAttr(""num_dense_features"", &num_dense_features)); + int num_sparse_features; + TF_RETURN_IF_ERROR( + c->GetAttr(""num_sparse_features"", &num_sparse_features)); + // Set output shapes (dense_quantiles and sparse_quantiles) by the + // relevant inputs (dense_values and sparse_values). Note that the output + // has an additional dimension for dimension_ids. + for (int i = 0; i < num_dense_features + num_sparse_features; ++i) { + c->set_output(i, c->MakeShape({c->Dim(c->input(i), 0), 2})); + } + return Status::OK(); + }) .Doc(R""doc( Computes quantile for each a given list of dense and sparse feature values using the given buckets. ",0,train 571d0114eda553e2d1b5c9c71f77c2211b5914e3,tensorflow/tensorflow,"Internal Cleanup. PiperOrigin-RevId: 227929845",optimizer_v2_test.py,"@@ -27,7 +27,6 @@ import numpy as np from tensorflow.python import keras from tensorflow.python.eager import context from tensorflow.python.eager import def_function -from tensorflow.python.eager import function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -262,23 +261,6 @@ class OptimizerTest(test.TestCase): self.evaluate(sgd.iterations.initializer) self.assertEqual(0, self.evaluate(sgd.iterations)) - @test_util.run_in_graph_and_eager_modes - def testSerializationWithinDefun(self): - with self.cached_session(): - sgd = gradient_descent.SGD(3.0) - var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], - dtype=dtypes.float32) - loss = lambda: 3 * var0 - sgd.minimize(loss, [var0]) - - def serialize(): - config = sgd.get_config() - gradient_descent.SGD.from_config(config) - - compiled_serialize = function.defun(serialize) - with self.assertRaisesRegexp(RuntimeError, 'inside Tensorflow graph'): - compiled_serialize() - @test_util.run_in_graph_and_eager_modes def testConfig(self): with self.cached_session(): ",0,test 571d0114eda553e2d1b5c9c71f77c2211b5914e3,tensorflow/tensorflow,"Internal Cleanup. PiperOrigin-RevId: 227929845",optimizers.py,"@@ -575,7 +575,7 @@ class Adamax(Optimizer): def get_updates(self, loss, params): grads = self.get_gradients(loss, params) - self.updates = [state_ops.assign_add(self.iterations, 1)] + self.updates = [] lr = self.lr if self.initial_decay > 0: @@ -583,7 +583,8 @@ class Adamax(Optimizer): 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) - t = math_ops.cast(self.iterations, K.floatx()) + 1 + with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]): + t = math_ops.cast(self.iterations, K.floatx()) lr_t = lr / (1. - math_ops.pow(self.beta_1, t)) shapes = [K.int_shape(p) for p in params] ",0,test d27dd98f3f48e860f0cdfebb32871ba994a8e87f,tensorflow/tensorflow,"Raise the TypeError at the line of code that causes the issue. PiperOrigin-RevId: 226068866",tpu_estimator.py,"@@ -1672,7 +1672,7 @@ class _OutfeedHostCall(object): 'Exception while calling %s: %s. It is likely the tensors ' '(%s[1]) do not match the ' 'function\'s arguments', name, e, name) - raise e + raise return ret def record(self, host_calls): @@ -1805,7 +1805,7 @@ class _OutfeedHostCall(object): 'Exception while calling %s: %s. It is likely the tensors ' '(%s[1]) do not match the ' 'function\'s arguments', name, e, name) - raise e + raise else: ret[name] = self._host_fns[name](*dequeue_ops) ",0,train c4b0364a37b36155673bc108ff56ed506ff96f1d,tensorflow/tensorflow,"Remove overrides of ConversionTarget::isDynamicallyLegal in favor of callbacks It's just confusing to have two ways of doing this. This is in preparation of removing the virtual method in mlir core, see https://reviews.llvm.org/D106786 PiperOrigin-RevId: 387409346 Change-Id: I2bf6a3db0a768fefeba76e824d53f2e06b373932",legalize_tfl.cc,"@@ -2949,13 +2949,14 @@ static bool isIllegalType(Type type) { return false; } -class TosaConversionTarget : public ConversionTarget { - public: - using ConversionTarget::ConversionTarget; +void LegalizeTFL::runOnFunction() { + QuantTypeConverter converter; + ConversionTarget target(getContext()); - protected: + target.addIllegalDialect(); + target.addIllegalDialect(); // Operations are legal if they don't contain any illegal type. - bool isDynamicallyLegal(Operation* op) const override { + target.markUnknownOpDynamicallyLegal([](Operation* op) { if (auto constantOp = dyn_cast(op)) { return constantOp.getType().isa(); } @@ -2974,16 +2975,7 @@ class TosaConversionTarget : public ConversionTarget { if (type && isIllegalType(type)) return false; } return true; - } -}; - -void LegalizeTFL::runOnFunction() { - QuantTypeConverter converter; - TosaConversionTarget target(getContext()); - - target.addIllegalDialect(); - target.addIllegalDialect(); - target.markUnknownOpDynamicallyLegal(); + }); auto* ctx = &getContext(); auto func = getFunction(); ",0,train c4b0364a37b36155673bc108ff56ed506ff96f1d,tensorflow/tensorflow,"Remove overrides of ConversionTarget::isDynamicallyLegal in favor of callbacks It's just confusing to have two ways of doing this. This is in preparation of removing the virtual method in mlir core, see https://reviews.llvm.org/D106786 PiperOrigin-RevId: 387409346 Change-Id: I2bf6a3db0a768fefeba76e824d53f2e06b373932",legalize_tf_types.cc,"@@ -98,17 +98,14 @@ class TfTypeConversionTarget : public ConversionTarget { public: explicit TfTypeConversionTarget(MLIRContext &ctx, TfTypeConverter &converter) : ConversionTarget(ctx), converter_(converter) { - markUnknownOpDynamicallyLegal(); - } - - protected: - bool isDynamicallyLegal(Operation *op) const override { - // The FuncOp type can contain types that the op's operand and result types - // do not contain. - if (auto func = dyn_cast(op)) { - if (!converter_.isSignatureLegal(func.getType())) return false; - } - return converter_.isLegal(op); + markUnknownOpDynamicallyLegal([this](Operation *op) { + // The FuncOp type can contain types that the op's operand and result + // types do not contain. + if (auto func = dyn_cast(op)) { + if (!converter_.isSignatureLegal(func.getType())) return false; + } + return converter_.isLegal(op); + }); } private: ",0,train a3f6850276217b8b172f303204423c332db5fbf7,tensorflow/tensorflow,Renaming from CudnnConvRewriter to GpuConvRewriter,amdgpu_compiler.cc,"@@ -73,7 +73,7 @@ Status AMDGPUCompiler::OptimizeHloConvolutionCanonicalization( HloPassPipeline pipeline(""conv_canonicalization""); pipeline.AddInvariantChecker(/*layout_sensitive=*/false, /*allow_mixed_precision=*/false); - pipeline.AddPass(); + pipeline.AddPass(); pipeline.AddPass(); pipeline.AddPass(); ",0,train a3f6850276217b8b172f303204423c332db5fbf7,tensorflow/tensorflow,Renaming from CudnnConvRewriter to GpuConvRewriter,gpu_conv_rewriter.cc,"@@ -735,7 +735,7 @@ StatusOr RunOnComputation(HloComputation* computation) { } } // namespace -StatusOr CudnnConvRewriter::Run(HloModule* module) { +StatusOr GpuConvRewriter::Run(HloModule* module) { bool changed = false; for (HloComputation* computation : module->MakeNonfusionComputations()) { TF_ASSIGN_OR_RETURN(bool result, RunOnComputation(computation)); ",0,train a3f6850276217b8b172f303204423c332db5fbf7,tensorflow/tensorflow,Renaming from CudnnConvRewriter to GpuConvRewriter,gpu_conv_rewriter.h,"@@ -32,7 +32,7 @@ namespace gpu { // Note that this pattern is necessary but not sufficient to map convolutions // to CuDNN. More patterns will be matched in cudnn_fused_conv_rewriter. -class CudnnConvRewriter : public HloModulePass { +class GpuConvRewriter : public HloModulePass { public: absl::string_view name() const override { return ""cudnn-conv-rewriter""; } ",0,train a3f6850276217b8b172f303204423c332db5fbf7,tensorflow/tensorflow,Renaming from CudnnConvRewriter to GpuConvRewriter,gpu_conv_rewriter_test.cc,"@@ -85,7 +85,7 @@ class GpuConvRewriterTest : public HloTestBase { protected: bool RunPass(HloModule* module) { - return CudnnConvRewriter().Run(module).ValueOrDie(); + return GpuConvRewriter().Run(module).ValueOrDie(); } // A convolution window with stride 1 and zero padding. The size fields are @@ -724,7 +724,7 @@ TEST_F(GpuConvRewriterTest, TestForwardInt8Convolution) { })""); TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(module_str)); - ASSERT_FALSE(CudnnConvRewriter().Run(m.get()).ok()); + ASSERT_FALSE(GpuConvRewriter().Run(m.get()).ok()); } } // anonymous namespace } // namespace gpu ",0,train a3f6850276217b8b172f303204423c332db5fbf7,tensorflow/tensorflow,Renaming from CudnnConvRewriter to GpuConvRewriter,ir_emission_utils.h,"@@ -113,7 +113,7 @@ bool IsCustomCallToDnnBatchNorm(const HloInstruction& hlo); // memory used by cudnn. Callers shouldn't inspect scratch_memory, as its value // is not well-defined. // -// CudnnConvRewriter lowers kConvolution HLOs to these custom calls. +// GpuConvRewriter lowers kConvolution HLOs to these custom calls. // When it does so, it chooses algorithm -1 and 0 bytes of scratch space. Later // on in the pipeline, CudnnConvAlgorithmChooser chooses an explicit // algorithm for each conv and sets the amount of scratch space needed. ",0,train a3f6850276217b8b172f303204423c332db5fbf7,tensorflow/tensorflow,Renaming from CudnnConvRewriter to GpuConvRewriter,nvptx_compiler.cc,"@@ -112,7 +112,7 @@ Status NVPTXCompiler::OptimizeHloConvolutionCanonicalization( pipeline.AddInvariantChecker(/*layout_sensitive=*/false, /*allow_mixed_precision=*/false); pipeline.AddPass(); - pipeline.AddPass(); + pipeline.AddPass(); pipeline.AddPass(); pipeline.AddPass(); pipeline.AddPass(IsVoltaOrLater(*stream_exec)); @@ -121,7 +121,7 @@ Status NVPTXCompiler::OptimizeHloConvolutionCanonicalization( // fixes. pipeline.AddPass(); - // tf2xla bridge, DepthwiseConvolutionConverter and CudnnConvRewriter + // tf2xla bridge, DepthwiseConvolutionConverter and GpuConvRewriter // introduces reshapes and transposes that can be eliminated using // AlgebraicSimplifier { @@ -134,7 +134,7 @@ Status NVPTXCompiler::OptimizeHloConvolutionCanonicalization( pass.AddPass(options); } - // CudnnConvRewriter, GpuConvPaddingLegalization and + // GpuConvRewriter, GpuConvPaddingLegalization and // CudnnConvPadForTensorCores may add instructions which can be simplified // by constant folding. pipeline.AddPass(); @@ -170,7 +170,7 @@ Status NVPTXCompiler::OptimizeHloPostLayoutAssignment( // Choose the fastest algorithm for each conv. // // We pick the algorithm before fusion so we can generate better HLO. After - // CudnnConvRewriter, our convolutions are CustomCalls which return a + // GpuConvRewriter, our convolutions are CustomCalls which return a // tuple (conv_result, scratch_memory), and the each conv uses 0 bytes of // scratch: // ",0,train 2469ba8003194f92829f4119718f9ce2efd9eae9,tensorflow/tensorflow,"Update docstring for sequence_feature_column Signed-off-by: Yong Tang ",sequence_feature_column.py,"@@ -371,6 +371,12 @@ def sequence_numeric_column( default_value: A single value compatible with `dtype` that is used for padding the sparse data into a dense `Tensor`. dtype: The type of values. + normalizer_fn: If not `None`, a function that can be used to normalize the + value of the tensor after `default_value` is applied for parsing. + Normalizer function takes the input `Tensor` as its argument, and returns + the output `Tensor`. (e.g. lambda x: (x - 3.0) / 4.2). Please note that + even though the most common use case of this function is normalization, it + can be used for any kind of Tensorflow transformations. Returns: A `_SequenceNumericColumn`. ",0,train d4c975c2893c35a58300cf395d326aafc21e751b,tensorflow/tensorflow,Enable row_vectorization for small row.,ir_emitter_unnested.cc,"@@ -174,6 +174,26 @@ void AnnotateThunkLaunchDimensions(const LaunchDimensions& launch_dims, {llvm::ConstantAsMetadata::get(ir_kernel), llvm::MDString::get(llvm_context, ""reqntidx""), llvm::ConstantAsMetadata::get(threads_per_block_ir_value)})); + if (launch_dims.thread_counts_per_block().y > 1) { + threads_per_block_ir_value = llvm::ConstantInt::get( + llvm::IntegerType::get(llvm_context, /*NumBits=*/32), + launch_dims.thread_counts_per_block().y); + nvvm_annotations_node->addOperand(llvm::MDNode::get( + llvm_context, + {llvm::ConstantAsMetadata::get(ir_kernel), + llvm::MDString::get(llvm_context, ""reqntidy""), + llvm::ConstantAsMetadata::get(threads_per_block_ir_value)})); + } + if (launch_dims.thread_counts_per_block().z > 1) { + threads_per_block_ir_value = llvm::ConstantInt::get( + llvm::IntegerType::get(llvm_context, /*NumBits=*/32), + launch_dims.thread_counts_per_block().z); + nvvm_annotations_node->addOperand(llvm::MDNode::get( + llvm_context, + {llvm::ConstantAsMetadata::get(ir_kernel), + llvm::MDString::get(llvm_context, ""reqntidz""), + llvm::ConstantAsMetadata::get(threads_per_block_ir_value)})); + } } bool BinarySearchDenseElementsAttr(mlir::DenseIntElementsAttr elements, ",0,test d4c975c2893c35a58300cf395d326aafc21e751b,tensorflow/tensorflow,Enable row_vectorization for small row.,launch_dimensions.cc,"@@ -70,9 +70,8 @@ int64_t ThreadsPerBlockRowVectorized(const Shape& shape, (shape.dimensions().back() % 256) != 0 && // Do not trigger the row vectorized codepath if this create too // small block size as this hurt performance. - (threads_per_block_row_vectorized >= 128 && - threads_per_block_row_vectorized <= - gpu_device_info.threads_per_block_limit)) { + threads_per_block_row_vectorized <= + gpu_device_info.threads_per_block_limit) { return threads_per_block_row_vectorized; } return -1; @@ -101,12 +100,20 @@ StatusOr CalculateLaunchDimensions( // TODO(jlebar): Investigate this further, and tune this heuristic so we can // run faster on the few benchmarks where smaller block size helps. int64_t threads_per_block = ThreadsPerBlockLimit(gpu_device_info); + int64_t threads_per_block_y = 1; int64_t threads_per_block_row_vectorized = ThreadsPerBlockRowVectorized(shape, gpu_device_info, dim_config); if (threads_per_block_row_vectorized > 0) { + CHECK(dim_config.row_vectorized); threads_per_block = threads_per_block_row_vectorized; - VLOG(2) << ""Update # of threads per block to ("" << threads_per_block + if (threads_per_block < 128 && num_elements > 128) { + // This case happens for small row size. + threads_per_block_y = + CeilOfRatio((int64_t)128, threads_per_block); + VLOG(2) << ""Update # of threads per block to (.x="" << threads_per_block + << "", .y="" << threads_per_block_y << "") to be row_vectorized.""; + } } else { CHECK(!dim_config.row_vectorized); // We unroll kernels to make use of vectorized loads/stores. This means we @@ -122,27 +129,27 @@ StatusOr CalculateLaunchDimensions( } } - int64_t block_count = CeilOfRatio(num_elements, threads_per_block); + int64_t block_count = CeilOfRatio(num_elements, + threads_per_block * threads_per_block_y); if (dim_config.few_waves && !dim_config.row_vectorized) { int64_t capped_threads_per_block = std::min(threads_per_block, 128); int64_t capped_block_count = gpu_device_info.core_count * - (gpu_device_info.threads_per_core_limit / capped_threads_per_block); + (gpu_device_info.threads_per_core_limit / + (capped_threads_per_block * threads_per_block_y)); if (capped_block_count < block_count) { threads_per_block = capped_threads_per_block; block_count = capped_block_count; + VLOG(2) << ""Update the # of blocks to "" << block_count + << "" and the # of threads per blocks to "" + << threads_per_block << "" as the few waves mode is enabled.""; } } else if (dim_config.few_waves && dim_config.row_vectorized) { - int64_t capped_threads_per_block = - std::min(threads_per_block, 128); - if (dim_config.row_vectorized) { - // Keep the threads_per_block found for row_vectorized. - capped_threads_per_block = threads_per_block; - } int64_t min_block_count = gpu_device_info.core_count * - (gpu_device_info.threads_per_core_limit / capped_threads_per_block); + (gpu_device_info.threads_per_core_limit / + (threads_per_block * threads_per_block_y)); int64_t capped_block_count = block_count; // This multiple of 32 was tuned to not cause regression on multiple // benchmarks. It isn't a value that is optimal for all @@ -154,7 +161,7 @@ StatusOr CalculateLaunchDimensions( // Do not increase the number of blocks. This can happens for // small num_elements. if (capped_block_count < block_count) { - threads_per_block = capped_threads_per_block; + VLOG(2) << ""Update # of blocks to block_count as few_waves is enabled.""; block_count = capped_block_count; } } @@ -167,11 +174,11 @@ StatusOr CalculateLaunchDimensions( } VLOG(2) << absl::StrFormat( - ""Initialized the block count to ceil(# of elements / threads per "" - ""block) = ceil(%d/%d) = %d"", - num_elements, threads_per_block, block_count); - - return LaunchDimensions(block_count, threads_per_block); + ""Initialized the block count to %d, the block size .x=%d and .y=%d"" + "" for %d elements in the tensor."", + block_count, threads_per_block, threads_per_block_y, num_elements); + return LaunchDimensions({block_count, 1, 1}, + {threads_per_block, threads_per_block_y, 1}); } } // namespace gpu ",0,test 2929f873bb5111316befe9e4804d6b4a8ad999cb,tensorflow/tensorflow,Update ctc_ops.py,ctc_ops.py,"@@ -313,8 +313,8 @@ def ctc_greedy_decoder(inputs, Notes: - - Unlike `ctc_beam_search_decoder`, `ctc_greedy_decoder` omits blanks up-to - the special treatment under `merge_repeated`. + - Unlike `ctc_beam_search_decoder`, `ctc_greedy_decoder` considers blanks + as regular elements when computing the probability of a sequence. - Default `blank_index` is `(num_classes - 1)`, unless overriden. If `merge_repeated` is `True`, merge repeated classes in output. @@ -374,7 +374,14 @@ def ctc_beam_search_decoder(inputs, top_paths=1, merge_repeated=True): """"""Performs beam search decoding on the logits given in input. - + + **Note** Although in general greedy search is a special case of beam-search + with `top_paths=1` and `beam_width=1`, `ctc_beam_search_decoder` differs + from `ctc_gready_decoder` in the treatment of blanks when computing the + probability of a sequence: + - `ctc_beam_search_decoder` treats blanks as sequence termination + - `ctc_gready_decoder` treats blanks as regular elements + If `merge_repeated` is `True`, merge repeated classes in the output beams. This means that if consecutive entries in a beam are the same, only the first of these is emitted. That is, when the sequence is @@ -433,9 +440,12 @@ def ctc_beam_search_decoder_v2(inputs, top_paths=1): """"""Performs beam search decoding on the logits given in input. - **Note** The `ctc_greedy_decoder` is a special case of the - `ctc_beam_search_decoder` with `top_paths=1` and `beam_width=1` (but - that decoder is faster for this special case). + **Note** Although in general greedy search is a special case of beam-search + with `top_paths=1` and `beam_width=1`, `ctc_beam_search_decoder` differs + from `ctc_gready_decoder` in the treatment of blanks when computing the + probability of a sequence: + - `ctc_beam_search_decoder` treats blanks as sequence termination + - `ctc_gready_decoder` treats blanks as regular elements Args: inputs: 3-D `float` `Tensor`, size `[max_time, batch_size, num_classes]`. ",0,test 2a8b52fc0c5f1fc257ad9c042126b00edfeca705,tensorflow/tensorflow,"Don't use hex floats. Hex float literals are in C11 and C++17, but not in C++11, so use plain float notation. PiperOrigin-RevId: 197933744",hlo_evaluator_test.cc,"@@ -262,13 +262,13 @@ TEST_P(HloEvaluatorTest, DoesCosR2) { auto operand = Literal::CreateR2({{0, M_PI}, {-M_PI, 2 * M_PI}}); auto expected = Literal::CreateR2({{1, -1}, {-1, 1}}); TestUnaryOp(HloOpcode::kCos, std::move(expected), std::move(operand), - use_bfloat16_ ? 0x1.0P-5 : 0x1.0P-20); + use_bfloat16_ ? 0.031250 : 9.5367431640625E-7); } TEST_P(HloEvaluatorTest, DoesSinR2) { auto operand = Literal::CreateR2({{0, M_PI}, {-M_PI, 2 * M_PI}}); auto expected = Literal::CreateR2({{0, 0}, {0, 0}}); TestUnaryOp(HloOpcode::kSin, std::move(expected), std::move(operand), - use_bfloat16_ ? 0x1.0P-5 : 0x1.0P-20); + use_bfloat16_ ? 0.031250 : 9.5367431640625E-7); } TEST_P(HloEvaluatorTest, DoesNotR2) { auto operand = @@ -333,7 +333,7 @@ TEST_P(HloEvaluatorTest, DoesReshape) { result->EachCell( [&](tensorflow::gtl::ArraySlice indices, NativeT value) { std::vector rindexes = Permute(permutation, indices); - EXPECT_NEAR(value, literal_clone->Get(rindexes), 0x1.0P-5); + EXPECT_NEAR(value, literal_clone->Get(rindexes), 0.031250); }); } @@ -567,7 +567,7 @@ TEST_P(HloEvaluatorTest, NegativePadding2D) { (*expected_array)(0, 4) = 2.718f; auto expected = Literal::CreateR2FromArray2D(*expected_array); - EXPECT_TRUE(LiteralTestUtil::Near(*expected, *result, ErrorSpec(0x1.0P-5))); + EXPECT_TRUE(LiteralTestUtil::Near(*expected, *result, ErrorSpec(0.031250))); } TEST_P(HloEvaluatorTest, NegativeAndInteriorPadding2D) { ",0,train 2a8b52fc0c5f1fc257ad9c042126b00edfeca705,tensorflow/tensorflow,"Don't use hex floats. Hex float literals are in C11 and C++17, but not in C++11, so use plain float notation. PiperOrigin-RevId: 197933744",convert_test.cc,"@@ -249,10 +249,10 @@ XLA_TEST_F(ConvertTest, ConvertR1F32ToR1S64) { -1.99f, -2.0f, -2.01f, - 0x1.FFFFFEp+62F, - 0x1.FFFFFCp+62F, - -0x1.FFFFFEp+62F, - -0x1.FFFFFCp+62F}; + 9223371487098961920.f, + 9223370937343148032.f, + -9223371487098961920.f, + -9223370937343148032.f}; std::unique_ptr arg_literal = Literal::CreateR1({arg}); auto arg_param = builder.Parameter(0, arg_literal->shape(), ""arg_param""); std::unique_ptr arg_data = ",0,train 01e1696cfca77dfe2438f55a43bf342c0c913510,tensorflow/tensorflow,"Update GraphDef version to 557. PiperOrigin-RevId: 337643578 Change-Id: I9d8be717c6c4eb9148725b2dc0f4da4f7fff0f47",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 556 // Updated: 2020/10/16 +#define TF_GRAPH_DEF_VERSION 557 // Updated: 2020/10/17 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,train 9e3fa2c884063dbcd772b055f6104a7eae1d84ff,tensorflow/tensorflow,"Modify registration code so that the registered savers are compatible with V1 SavedModel loading APIs. PiperOrigin-RevId: 434835675",registration_saving_test.py,"@@ -20,6 +20,7 @@ import tempfile from absl.testing import parameterized from google.protobuf import wrappers_pb2 +from tensorflow.python.client import session from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.eager import test @@ -30,6 +31,7 @@ from tensorflow.python.ops import io_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables from tensorflow.python.saved_model import load +from tensorflow.python.saved_model import loader from tensorflow.python.saved_model import registration from tensorflow.python.saved_model import save from tensorflow.python.training.tracking import tracking @@ -46,6 +48,12 @@ class Part(resource_variable_ops.ResourceVariable): def _deserialize_from_proto(cls, **kwargs): return cls([0, 0]) + def _export_to_saved_model_graph(self, object_map, tensor_map, **kwargs): + p = Part(array_ops.zeros(self.shape, self.dtype)) + object_map[self] = p + tensor_map[self.handle] = p.handle + return [self.handle] + @registration.register_serializable() class Stack(tracking.AutoTrackable): @@ -247,6 +255,25 @@ class SingleCycleTest(test.TestCase): util.Checkpoint(s2=restore_s).read(ckpt_path).expect_partial() self.assertAllEqual(expected_value_s2, restore_s.value()) + def test_compatible_with_v1_savedmodel(self): + p1 = Part([1, 4]) + p2 = Part([2, 5]) + p3 = Part([3, 6]) + s = Stack([p1, p2, p3]) + save_path = os.path.join(self.get_temp_dir(), ""savedmodel"") + + @def_function.function(input_signature=[]) + def serve(): + return {""value"": s.value()} + + exported_value = serve()[""value""] + + save.save(s, save_path, signatures=serve) + with ops.Graph().as_default(), session.Session() as sess: + metagraph = loader.load(sess, [""serve""], save_path) + value_output = metagraph.signature_def[""serving_default""].outputs[""value""] + self.assertAllEqual(exported_value, sess.run(value_output.name)) + if __name__ == ""__main__"": test.main() ",0,train 9e3fa2c884063dbcd772b055f6104a7eae1d84ff,tensorflow/tensorflow,"Modify registration code so that the registered savers are compatible with V1 SavedModel loading APIs. PiperOrigin-RevId: 434835675",functional_saver.py,"@@ -135,35 +135,39 @@ def registered_saver_filename(filename_tensor, saver_name): def _get_mapped_registered_save_fn(fn, trackables, call_with_mapped_captures): """"""Converts the function to a python or tf.function with a single file arg."""""" + + def save_fn(file_prefix): + return fn(trackables=trackables, file_prefix=file_prefix) if call_with_mapped_captures is None: - def mapped_fn(file_prefix): - return fn(trackables=trackables, file_prefix=file_prefix) - return mapped_fn + return save_fn else: - tf_fn = def_function.function(fn, autograph=False) + tf_fn = def_function.function(save_fn, autograph=False) concrete = tf_fn.get_concrete_function( - trackables=trackables, file_prefix=tensor_spec.TensorSpec(shape=(), dtype=dtypes.string)) - def mapped_fn(file_prefix): + + def save_fn_with_replaced_captures(file_prefix): return call_with_mapped_captures(concrete, [file_prefix]) - return mapped_fn + + return save_fn_with_replaced_captures def _get_mapped_registered_restore_fn(fn, trackables, call_with_mapped_captures): """"""Converts the function to a python or tf.function with a single file arg."""""" + + def restore_fn(merged_prefix): + return fn(trackables=trackables, merged_prefix=merged_prefix) if call_with_mapped_captures is None: - def mapped_fn(merged_prefix): - return fn(trackables=trackables, merged_prefix=merged_prefix) - return mapped_fn + return restore_fn else: - tf_fn = def_function.function(fn, autograph=False) + tf_fn = def_function.function(restore_fn, autograph=False) concrete = tf_fn.get_concrete_function( - trackables=trackables, merged_prefix=tensor_spec.TensorSpec(shape=(), dtype=dtypes.string)) - def mapped_fn(merged_prefix): + + def restore_fn_with_replaced_captures(merged_prefix): return call_with_mapped_captures(concrete, [merged_prefix]) - return mapped_fn + + return restore_fn_with_replaced_captures class MultiDeviceSaver(object): ",0,train ae11a063040ee1d13cc71eaa74b2ab7c6f19873f,tensorflow/tensorflow,"iOS Metal delegate: support no biases in transposed_conv. PiperOrigin-RevId: 273561634",compute_task_descriptor.cc,"@@ -51,7 +51,7 @@ std::vector GetByteBufferConverted( for (const float value : input_vector) { const HalfBits converted = fp16_ieee_from_fp32_value(value); const uint8_t* bytes = reinterpret_cast(&converted); - result.insert(result.end(), bytes, bytes + sizeof(*bytes)); + result.insert(result.end(), bytes, bytes + sizeof(HalfBits)); } return result; } ",0,train ae11a063040ee1d13cc71eaa74b2ab7c6f19873f,tensorflow/tensorflow,"iOS Metal delegate: support no biases in transposed_conv. PiperOrigin-RevId: 273561634",transpose_conv.cc,"@@ -1047,11 +1047,8 @@ std::vector ConvolutionTransposed3x3( auto filters = options.storage_precision == RuntimeOptions::Precision::FP32 ? GetByteBuffer(filters_reordered) : VectorFloatToHalf(filters_reordered); - auto resized_bias = params.bias.data; - resized_bias.resize(params.weights.shape.o, 0.0f); - auto biases = options.storage_precision == RuntimeOptions::Precision::FP32 - ? GetByteBuffer(resized_bias) - : VectorFloatToHalf(resized_bias); + auto biases = GetByteBufferConvertedResized( + params.bias.data, options.storage_precision, params.weights.shape.o); border_desc->immutable_buffers = { {""device FilterStripe* const filters"", filters}, {""constant FLT4* const biases"", biases}, @@ -1139,8 +1136,8 @@ std::vector ConvolutionTransposed3x3( }}; desc->immutable_buffers = { - {""device FilterStripe* const filters"", GetByteBuffer(filters)}, - {""constant FLT4* const biases"", GetByteBuffer(biases)}, + {""device FilterStripe* const filters"", filters}, + {""constant FLT4* const biases"", biases}, }; desc->uniform_buffers = { ",0,train 795cd91aad486e28098b07d6e3651f243b2bde64,tensorflow/tensorflow,"Fix monitored_session docstring. PiperOrigin-RevId: 257040451",monitored_session.py,"@@ -762,17 +762,24 @@ class _MonitoredSession(object): computations with access to a raw session. The returned value of the `step_fn` will be returned from `run_step_fn`, unless a stop is requested. In that case, the next `should_stop` call will return True. - Example usage: ```python - with tf.Graph().as_default(): c = - tf.compat.v1.placeholder(dtypes.float32) v = tf.add(c, 4.0) w = - tf.add(c, 0.5) - def step_fn(step_context): - a = step_context.session.run(fetches=v, feed_dict={c: 0.5}) - if a <= 4.5: step_context.request_stop() - return step_context.run_with_hooks(fetches=w, feed_dict={c: 0.1}) - with tf.MonitoredSession() as session: - while not session.should_stop(): a = session.run_step_fn(step_fn) - ``` Hooks interact with the `run_with_hooks()` call inside the + Example usage: + ```python + with tf.Graph().as_default(): + c = tf.compat.v1.placeholder(dtypes.float32) + v = tf.add(c, 4.0) + w = tf.add(c, 0.5) + def step_fn(step_context): + a = step_context.session.run(fetches=v, feed_dict={c: 0.5}) + if a <= 4.5: + step_context.request_stop() + return step_context.run_with_hooks(fetches=w, + feed_dict={c: 0.1}) + + with tf.MonitoredSession() as session: + while not session.should_stop(): + a = session.run_step_fn(step_fn) + ``` + Hooks interact with the `run_with_hooks()` call inside the `step_fn` as they do with a `MonitoredSession.run` call. Returns: ",0,train cfa374cefe132be886c26a374c51454177c68868,tensorflow/tensorflow,Fix the segfault in convert_nodes.cc,convert_nodes.cc,"@@ -119,26 +119,29 @@ static std::vector> CreateSamePadding( class TRT_ShapedWeights { public: TRT_ShapedWeights(tensorflow::DataType type, const void* values, - nvinfer1::Dims shape, bool owned_values = false) + nvinfer1::Dims shape, + const std::vector* owned_values = nullptr) : shape_(shape), type_(type), values_(values), - owned_values_(owned_values), + owned_values_(owned_values ? *owned_values : std::vector({})), dummy_flag_(false) { // Note: this->shape.type[] is not used } explicit TRT_ShapedWeights(tensorflow::DataType type) - : type_(type), + : shape_(), + type_(type), values_(nullptr), - owned_values_(false), + owned_values_(), dummy_flag_(true) {} - ~TRT_ShapedWeights() { - if (values_ && owned_values_) delete static_cast(values_); - } - - TRT_ShapedWeights(const TRT_ShapedWeights&) = default; + TRT_ShapedWeights(const TRT_ShapedWeights& rhs) + : shape_(rhs.shape_), + type_(rhs.type_), + values_(rhs.values_), + owned_values_(rhs.owned_values_), + dummy_flag_(rhs.dummy_flag_) {} int64_t count() const { int64_t c = 1; @@ -152,7 +155,18 @@ class TRT_ShapedWeights { if (dummy_flag_) return nvinfer1::Weights{trt_type, nullptr, 0}; // Note: this->shape.type[] is not used - return nvinfer1::Weights{trt_type, values_, GetShapeSize(shape_)}; + return nvinfer1::Weights{trt_type, GetValues(), GetShapeSize(shape_)}; + } + + const void* GetValues() const { + if (values_) return values_; + if (owned_values_.size()) return owned_values_.data(); + return nullptr; + } + + void SetValues(const void* values) { + values_ = values; + owned_values_.clear(); } size_t size_bytes() const { @@ -165,51 +179,55 @@ class TRT_ShapedWeights { nvinfer1::Dims shape_; tensorflow::DataType type_; + + private: const void* values_; - bool owned_values_; + std::vector owned_values_; bool dummy_flag_; }; class TRT_TensorOrWeights { public: explicit TRT_TensorOrWeights(nvinfer1::ITensor* tensor) - : _tensor_(tensor), _variant_(TRT_NODE_TENSOR) {} - TRT_TensorOrWeights(const TRT_ShapedWeights& weights) - : _weights_(weights), _variant_(TRT_NODE_WEIGHTS) {} + : _tensor_(tensor), _weights_(DT_FLOAT), _variant_(TRT_NODE_TENSOR) {} + explicit TRT_TensorOrWeights(const TRT_ShapedWeights& weights) + : _tensor_(nullptr), _weights_(weights), _variant_(TRT_NODE_WEIGHTS) {} + TRT_TensorOrWeights(const TRT_TensorOrWeights& rhs) + : _tensor_(rhs._tensor_), + _weights_(rhs._weights_), + _variant_(rhs._variant_) {} ~TRT_TensorOrWeights() {} bool is_tensor() const { return _variant_ == TRT_NODE_TENSOR; } bool is_weights() const { return _variant_ == TRT_NODE_WEIGHTS; } nvinfer1::ITensor* tensor() { - CHECK_EQ(this->is_tensor(), true); + CHECK_EQ(is_tensor(), true); return _tensor_; } - nvinfer1::ITensor const* tensor() const { - CHECK_EQ(this->is_tensor(), true); + const nvinfer1::ITensor* tensor() const { + CHECK_EQ(is_tensor(), true); return _tensor_; } TRT_ShapedWeights& weights() { - CHECK_EQ(this->is_weights(), true); + CHECK_EQ(is_weights(), true); return _weights_; } const TRT_ShapedWeights& weights() const { - CHECK_EQ(this->is_weights(), true); + CHECK_EQ(is_weights(), true); return _weights_; } nvinfer1::Dims shape() const { - if (this->is_tensor()) { - return this->tensor()->getDimensions(); + if (is_tensor()) { + return tensor()->getDimensions(); } else { - return this->weights().shape_; + return weights().shape_; } } private: - union { - nvinfer1::ITensor* _tensor_; - TRT_ShapedWeights _weights_; - }; + nvinfer1::ITensor* _tensor_; + TRT_ShapedWeights _weights_; enum { TRT_NODE_TENSOR, TRT_NODE_WEIGHTS } _variant_; }; @@ -307,7 +325,7 @@ tensorflow::DataType TFAttrs::get(string key) const { } template -void Reorder4(nvinfer1::DimsNCHW shape, T const* idata, +void Reorder4(nvinfer1::DimsNCHW shape, const T* idata, nvinfer1::DimsNCHW istrides, T* odata, nvinfer1::DimsNCHW ostrides) { for (int n = 0; n < shape.n(); ++n) { @@ -339,9 +357,10 @@ void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights, nvinfer1::DimsNCHW ostrides = {c * r * s, r * s, s, 1}; switch (iweights.type_) { case tensorflow::DataType::DT_FLOAT: - Reorder4( - {k, c, r, s}, static_cast(iweights.values_), istrides, - static_cast(const_cast(oweights->values_)), ostrides); + Reorder4({k, c, r, s}, static_cast(iweights.GetValues()), + istrides, + static_cast(const_cast(oweights->GetValues())), + ostrides); break; default: LOG(FATAL) << ""!!!!!!!!!!!!!!!!!!!!!!!!broke!!!!!!!!!!!!""; @@ -399,7 +418,7 @@ class Converter { TRT_ShapedWeights weights(type, nullptr, shape); // TODO(jie): check weights size_bytes. 0 means type error _temp_bufs.push_back(std::vector(weights.size_bytes())); - weights.values_ = _temp_bufs.back().data(); + weights.SetValues(_temp_bufs.back().data()); return weights; } @@ -579,8 +598,8 @@ tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights, CHECK_EQ(iweights.type_, oweights->type_); switch (iweights.type_) { case tensorflow::DataType::DT_FLOAT: { - auto inp = static_cast(iweights.values_); - auto oup = static_cast(const_cast(oweights->values_)); + auto inp = static_cast(iweights.GetValues()); + auto oup = static_cast(const_cast(oweights->GetValues())); std::transform(inp, inp + iweights.count(), oup, unary_op.unary()); break; } @@ -603,9 +622,9 @@ tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l, switch (iweights_l.type_) { case tensorflow::DataType::DT_FLOAT: { - auto inp_l = static_cast(iweights_l.values_); - auto inp_r = static_cast(iweights_r.values_); - auto oup = static_cast(const_cast(oweights->values_)); + auto inp_l = static_cast(iweights_l.GetValues()); + auto inp_r = static_cast(iweights_r.GetValues()); + auto oup = static_cast(const_cast(oweights->GetValues())); if (iweights_l.count() != iweights_r.count()) { // We only supports broadcast of RankZero @@ -1117,7 +1136,7 @@ tensorflow::Status ConvertConst(Converter& ctx, // Get trt type & shape TFAttrs attrs(node_def); - tensorflow::DataType dtype = attrs.get(""dtype""); + const tensorflow::DataType dtype = attrs.get(""dtype""); // Create shaped weights as output tensorflow::Tensor tensor; @@ -1148,11 +1167,18 @@ tensorflow::Status ConvertConst(Converter& ctx, } else if (!weights_tensor.tensor_content().empty()) { VLOG(2) << ""TENSOR!!!"" << node_def.name(); const auto& content = weights_tensor.tensor_content(); - char* buf = new char[content.size() + 1]; - buf[content.size()] = 0; - port::CopyToArray(content, buf); - weights = TRT_ShapedWeights(dtype, buf, GetTensorShape(tensor), - /*owned_values=*/true); + + std::vector values; + if (content.size() > 0) { + const int dtype_size = tensorflow::DataTypeSize(dtype); + CHECK_EQ(0, content.size() % dtype_size) + << ""Tensor content size ("" << content.size() + << "") is not a multiple of "" << dtype_size; + values.resize(content.size()); + port::CopyToArray(content, values.data()); + } + weights = + TRT_ShapedWeights(dtype, nullptr, GetTensorShape(tensor), &values); } else { return tensorflow::errors::Unimplemented( ""Not supported constant type, at "" + node_def.name()); @@ -1242,7 +1268,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, if (index_type != tensorflow::DataType::DT_INT32) return tensorflow::errors::Unimplemented(""Tidx supports only DT_INT32""); auto index_list_data = - static_cast(const_cast(index_list.values_)); + static_cast(const_cast(index_list.GetValues())); // Hack warning: have to fall back to pool layer since reduce is not in public // TRT yet. @@ -1340,7 +1366,7 @@ tensorflow::Status ConvertPad(Converter& ctx, if (padding_type != tensorflow::DataType::DT_INT32) return tensorflow::errors::Unimplemented( ""Tpaddings supports only DT_INT32""); - auto pad_data = static_cast(const_cast(pads.values_)); + auto pad_data = static_cast(const_cast(pads.GetValues())); std::vector pad_index; for (int i = 0; i < nb_dims; i++) { ",0,train 2174f5e1dfd24a55aaced21990e3c53148035dc6,tensorflow/tensorflow,"Fix a bug introduced in cl/130951050. The correct CL description should be: make sure that no control edges by an outer control dependency context are added to nodes inside a while loop. Change: 131231314",control_flow_ops.py,"@@ -1158,8 +1158,17 @@ class ControlFlowContext(object): def _MaybeRemoveExternalControlEdges(self, op): """"""Remove any external control dependency on this op."""""" - internal_control_inputs = [x for x in op.control_inputs - if _GetOutputContext(x) == self] + while_ctxt = self.GetWhileContext() + # A control input of `op` is internal if it is in the same while + # loop context as the enclosing while loop context of self. + if while_ctxt is None: + internal_control_inputs = op.control_inputs + else: + internal_control_inputs = [] + for x in op.control_inputs: + ctxt = _GetOutputContext(x) + if ctxt is not None and ctxt.GetWhileContext() == while_ctxt: + internal_control_inputs.append(x) if len(internal_control_inputs) != len(op.control_inputs): del op.control_inputs[:] op._add_control_inputs(internal_control_inputs) ",0,test 59b88e9bfbfe0f2042b18387b082c015b90c1158,tensorflow/tensorflow,"[ROCm] Fix for a test regression on the ROCm platform - 200207 - 2 The following commit introduces a test regression on the ROCm platform https://github.com/tensorflow/tensorflow/commit/7a931a2349591f4e2250ac2d3b6c3ca66538b740 That commit adds an explicit check for GPU device in the profiler output (if a GPU is present in the list of physical devices). Since ROCm platform does not yet support device tracing, this test now fails on the ROCm platform The ""fix"" (until ROCm adds support for device tracing) is to disable that check on the ROCm platform",profiler_test.py,"@@ -47,7 +47,8 @@ class ProfilerTest(test_util.TensorFlowTestCase): profile_pb.ParseFromString(profile_result) devices = frozenset(device.name for device in profile_pb.devices.values()) self.assertIn('/host:CPU', devices) - if config.list_physical_devices('GPU'): + if not test_util.IsBuiltWithROCm() and config.list_physical_devices('GPU'): + # device tracing is not yet supported on the ROCm platform self.assertIn('/device:GPU:0', devices) events = frozenset(event.name for event in profile_pb.trace_events) self.assertIn('three_times_five', events) ",0,train 13bc7a5343dbbd27a9244f4756adcf98ead326b8,tensorflow/tensorflow,"Move StatType to open-source MetadataMatcher. PiperOrigin-RevId: 284796115 Change-Id: Icb541c6e652914cea8a5144a0faa4f6410ac99b5",tf_op_utils.h,"@@ -54,6 +54,66 @@ inline bool IsUnknownOp(absl::string_view tf_op_type) { inline bool IsDatasetOp(absl::string_view tf_op_type) { return tf_op_type == kDatasetOp; } + +constexpr size_t kNumStatType = 27; + +enum class StatType { + kUnknown = 0, + // TraceMe arguments. + kStepId, + kParentStepId, + kFunctionStepId, + kDeviceOrdinal, + kChipOrdinal, + kNodeOrdinal, + kModelId, + kQueueAddr, + kRequestId, + kRunId, + kCorrelationId, + kGraphType, + kStepNum, + kIterNum, + kIndexOnHost, + kBytesReserved, + kBytesAllocated, + kBytesAvailable, + kFragmentation, + kKernelDetails, + // Stats added when processing traces. + kGroupId, + kStepName, + kLevel0, + kTfOp, + kHloOp, + kHloModule, +}; + +constexpr std::array kStatTypeStrMap({ + ""unknown"", ""id"", + ""parent_step_id"", ""function_step_id"", + ""device_ordinal"", ""chip_ordinal"", + ""node_ordinal"", ""model_id"", + ""queue_addr"", ""request_id"", + ""run_id"", ""correlation_id"", + ""graph_type"", ""step_num"", + ""iter_num"", ""index_on_host"", + ""bytes_reserved"", ""bytes_allocated"", + ""bytes_available"", ""fragmentation"", + ""kernel_details"", ""group_id"", + ""step_name"", ""level 0"", + ""tf_op"", ""hlo_op"", + ""hlo_module"", +}); + +inline absl::string_view GetStatTypeStr(StatType stat_type) { + return kStatTypeStrMap.at(static_cast(stat_type)); +} + +inline bool IsStatType(StatType stat_type, absl::string_view stat_name) { + return kStatTypeStrMap.at(static_cast(stat_type)) == stat_name; +} + } // namespace profiler } // namespace tensorflow ",0,train 0f2f5b978524f3306e415d18701ea64bd2c688b3,tensorflow/tensorflow,Fix PEP8 (#15378),__init__.py,"@@ -47,7 +47,7 @@ _allowed_symbols = [ 'VariableClippingOptimizer', 'MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm', - 'ElasticAverageOptimizer', + 'ElasticAverageOptimizer', 'ElasticAverageCustomGetter' ] ",0,train 0f2f5b978524f3306e415d18701ea64bd2c688b3,tensorflow/tensorflow,Fix PEP8 (#15378),elastic_average_optimizer.py,"@@ -79,9 +79,9 @@ class ElasticAverageCustomGetter(object): if trainable: with ops.device(self._worker_device): local_var = getter(name, trainable=True, - collections=[ops.GraphKeys.LOCAL_VARIABLES], + collections=[ops.GraphKeys.LOCAL_VARIABLES], *args, **kwargs) - + global_center_variable = variable_scope.variable( name='%s/%s' % (GLOBAL_VARIABLE_NAME, @@ -96,7 +96,7 @@ class ElasticAverageCustomGetter(object): initial_value=local_var.initialized_value(), trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES]) - + self._local_map[local_var] = local_center_variable self._global_map[local_var] = global_center_variable return local_var @@ -173,7 +173,7 @@ class ElasticAverageOptimizer(optimizer.Optimizer): colocate_gradients_with_ops=False, grad_loss=None): """"""Compute gradients of `loss` for the variables in `var_list`. - + Add rho*elastic_difference to loss to control the exploration This is the first part of `minimize()`. It returns a list of (gradient, variable) pairs where ""gradient"" is the gradient @@ -204,7 +204,7 @@ class ElasticAverageOptimizer(optimizer.Optimizer): """""" if not var_list: var_list = variables.trainable_variables() - + elastic_difference = [math_ops.subtract(v, lv) for v, lv in zip( variables.trainable_variables(), [self._local_map[var] for var in var_list])] ",0,train 0f2f5b978524f3306e415d18701ea64bd2c688b3,tensorflow/tensorflow,Fix PEP8 (#15378),setup.py,"@@ -70,7 +70,7 @@ setup( 'Topic :: Scientific/Engineering :: Mathematics', 'Topic :: Scientific/Engineering :: Artificial Intelligence', 'Topic :: Software Development', - 'Topic :: Software Development :: Libraries', + 'Topic :: Software Development :: Libraries', 'Topic :: Software Development :: Libraries :: Python Modules', ], license='Apache 2.0', ",0,train 0f2f5b978524f3306e415d18701ea64bd2c688b3,tensorflow/tensorflow,Fix PEP8 (#15378),fully_connected_reader.py,"@@ -62,7 +62,7 @@ def decode(serialized_example): # Convert label from a scalar uint8 tensor to an int32 scalar. label = tf.cast(features['label'], tf.int32) - + return image, label def augment(image, label): @@ -172,7 +172,7 @@ def run_training(): step += 1 except tf.errors.OutOfRangeError: print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step)) - + def main(_): run_training() ",0,train 0b537e5b7d4eca61b058d2415f8f93b253506a1a,tensorflow/tensorflow,Don't dump the whole literal into VLOG(1),xla_device_context.cc,"@@ -131,7 +131,7 @@ void XlaDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor, xla::ShapeUtil::MakeShape(shape.element_type(), xla::AsInt64Slice(shape.dimensions()))); - VLOG(1) << ""Transfer to device as literal: "" << literal.ToString() << "" "" + VLOG(2) << ""Transfer to device as literal: "" << literal.ToString() << "" "" << xla_tensor->shaped_buffer().ToString(); if (UseMultipleStreams() && !transfer_manager_->CanShapedBufferBeAccessedNow( @@ -214,7 +214,7 @@ void XlaDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor, device_to_host_stream_.get(), xla_tensor->shaped_buffer(), literal, [ref, xla_tensor, done](xla::Status status) { done([&]() -> Status { - VLOG(1) << ""Transfer from device as literal: "" + VLOG(2) << ""Transfer from device as literal: "" << xla_tensor->shaped_buffer().ToString(); return status; }()); ",0,test b0641138b866a5ffdc511f4ab055735513c57c92,tensorflow/tensorflow,"convert_to_tensor calls eager_convert_to_tensor in eager mode Temporary hack to make most composite ops work. PiperOrigin-RevId: 165205218",ops.py,"@@ -961,6 +961,8 @@ def convert_to_tensor(value, dtype=None, name=None, preferred_dtype=None): RuntimeError: If a registered conversion function returns an invalid value. """""" + if context.in_eager_mode(): + return convert_to_eager_tensor(value, dtype=dtype) return internal_convert_to_tensor( value=value, dtype=dtype, @@ -1005,6 +1007,8 @@ def internal_convert_to_tensor(value, RuntimeError: If a registered conversion function returns an invalid value. """""" + if context.in_eager_mode(): + return convert_to_eager_tensor(value, dtype=dtype) error_prefix = """" if name is None else ""%s: "" % name if dtype is not None: dtype = dtypes.as_dtype(dtype) ",0,train b0641138b866a5ffdc511f4ab055735513c57c92,tensorflow/tensorflow,"convert_to_tensor calls eager_convert_to_tensor in eager mode Temporary hack to make most composite ops work. PiperOrigin-RevId: 165205218",ops_test.py,"@@ -25,6 +25,7 @@ from tensorflow.core.framework import attr_value_pb2 from tensorflow.core.framework import types_pb2 from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session +from tensorflow.python.eager import context from tensorflow.python.framework import common_shapes from tensorflow.python.framework import constant_op from tensorflow.python.framework import device as pydev @@ -290,6 +291,12 @@ class OperationTest(test_util.TensorFlowTestCase): self.assertAllEqual((4, 1), tensor.get_shape().as_list()) self.assertAllEqual(values, tensor.eval()) + def testConvertToTensorEager(self): + with context.eager_mode(): + t = ops.EagerTensor(1) + converted = ops.convert_to_tensor(t) + self.assertTrue(isinstance(converted, ops.EagerTensor)) + def testConvertToTensorNestedTuple(self): with self.test_session(): values = ((2,), (3,), (5,), (7,)) ",0,train 94bf823c136dce0f7846176f9be6129b990b1c1c,tensorflow/tensorflow,"Set all tf.VarIsInitializedOp to true in resource op lifting pass. Currently the resource op lifting pass is only used for the TPU bridge and it is assumed for all TPU computations variables are all initialized. PiperOrigin-RevId: 337593565 Change-Id: I8a5d687eb8b0cafc3de8d6d5ead6ec690d809679",resource_op_lifting.cc,"@@ -159,6 +159,26 @@ Type GetResourceSubtype(Value value) { return nullptr; } +// Replaces all `tf.VarIsInitializedOp` in a block with a constant true. +// TODO(b/171039585): Replace this with proper analysis of +// `tf.VarIsInitializedOp` in regards to resource writes and control flow. +void SetAllVarIsInitializedToTrue(Block* block) { + auto builder = OpBuilder::atBlockBegin(block); + TF::ConstOp const_true = nullptr; + for (auto op : + llvm::make_early_inc_range(block->getOps())) { + builder.setInsertionPoint(op); + if (!const_true) + const_true = builder.create( + op.getLoc(), + DenseIntElementsAttr::get( + RankedTensorType::get(/*shape=*/{}, builder.getI1Type()), true)); + + op.is_initialized().replaceAllUsesWith(const_true); + op.erase(); + } +} + // Performs store-load forwarding. This effectively removes // 1) Any resource loads after a store to that same resource is done // 2) Any resource stores except the last one. @@ -767,8 +787,6 @@ LogicalResult LiftArgRetResourcesForFunction( FuncOp func_op, const llvm::SmallDenseMap& resource_data_types, llvm::function_ref handle_updated_arg_value) { - ForwardStoreToLoad(&func_op.front()); - RegionResourceHoister hoister(func_op); if (failed(hoister.Analyze())) return failure(); @@ -1167,7 +1185,7 @@ void UpdatePartitionedCallOpWithNewCallee( } LogicalResult HoistForControlFlow( - Block*, ModuleOp, + Block*, ModuleOp, bool, llvm::SmallDenseMap*); // A templated routine for handling both PartitionedCallOp and @@ -1176,14 +1194,15 @@ LogicalResult HoistForControlFlow( // flow, then performs lifting on the callee. template LogicalResult HandlePartitionedCallOp( - CallOpType call_op, FuncOp callee, ModuleOp module, + CallOpType call_op, FuncOp callee, ModuleOp module, bool vars_initialized, llvm::SmallDenseMap* lifted_callees) { auto emplace_res = lifted_callees->try_emplace(callee.getName(), PartitionedCallLiftingInfo()); if (emplace_res.second) { // Unseen callee. Perform resource lifting on it. - if (failed(HoistForControlFlow(&callee.front(), module, lifted_callees))) + if (failed(HoistForControlFlow(&callee.front(), module, vars_initialized, + lifted_callees))) return failure(); if (failed(HandlePartitionedCallOpCallee( @@ -1198,26 +1217,28 @@ LogicalResult HandlePartitionedCallOp( // Hoists resource loads/stores from control flow ops in `block` outside the // body/cond/branch/callee functions. LogicalResult HoistForControlFlow( - Block* block, ModuleOp module, + Block* block, ModuleOp module, bool vars_initialized, llvm::SmallDenseMap* lifted_partitioned_call_callees) { + if (vars_initialized) SetAllVarIsInitializedToTrue(block); + for (Operation& op : llvm::make_early_inc_range(*block)) { if (auto while_op = llvm::dyn_cast(&op)) { auto body = while_op.body_function(); auto cond = while_op.cond_function(); // Recursively handle the nested control flow. - HoistForControlFlow(&body.front(), module, + HoistForControlFlow(&body.front(), module, vars_initialized, lifted_partitioned_call_callees); - HoistForControlFlow(&cond.front(), module, + HoistForControlFlow(&cond.front(), module, vars_initialized, lifted_partitioned_call_callees); if (failed(HandleWhileLoop(while_op, body, cond))) return failure(); } else if (auto if_op = llvm::dyn_cast(&op)) { auto then_branch = if_op.then_function(); auto else_branch = if_op.else_function(); // Recursively handle the nested control flow. - HoistForControlFlow(&then_branch.front(), module, + HoistForControlFlow(&then_branch.front(), module, vars_initialized, lifted_partitioned_call_callees); - HoistForControlFlow(&else_branch.front(), module, + HoistForControlFlow(&else_branch.front(), module, vars_initialized, lifted_partitioned_call_callees); if (failed(HandleCaseOrIfOp(if_op, {then_branch, else_branch}))) return failure(); @@ -1226,7 +1247,7 @@ LogicalResult HoistForControlFlow( case_op.get_branch_functions(branch_functions); for (FuncOp func : branch_functions) { // Recursively handle the nested control flow. - HoistForControlFlow(&func.front(), module, + HoistForControlFlow(&func.front(), module, vars_initialized, lifted_partitioned_call_callees); } if (failed(HandleCaseOrIfOp(case_op, branch_functions))) return failure(); @@ -1237,6 +1258,7 @@ LogicalResult HoistForControlFlow( ""resource lifting does not support call with nested references.""); } if (failed(HandlePartitionedCallOp(call_op, callee, module, + vars_initialized, lifted_partitioned_call_callees))) { // Nested control flow handling is done in HandlePartitionedCallOp(). return failure(); @@ -1244,12 +1266,13 @@ LogicalResult HoistForControlFlow( } else if (auto call_op = llvm::dyn_cast(&op)) { if (failed(HandlePartitionedCallOp(call_op, call_op.func(), module, + vars_initialized, lifted_partitioned_call_callees))) { return failure(); } } else if (isa(op)) { for (Region& region : op.getRegions()) - HoistForControlFlow(®ion.front(), module, + HoistForControlFlow(®ion.front(), module, vars_initialized, lifted_partitioned_call_callees); LogicalResult result = RegionResourceHoister::ReplaceOpWithNewOp(&op); if (failed(result)) return failure(); @@ -1277,7 +1300,8 @@ void ResourceOpLiftingPass::runOnOperation() { auto walk_result = module.walk([&](FuncOp func_op) { return func_op.walk([&](tf_device::ClusterOp cluster) { LogicalResult result = HoistForControlFlow( - &cluster.GetBody(), module, &lifted_partitioned_call_callees); + &cluster.GetBody(), module, /*vars_initialized=*/true, + &lifted_partitioned_call_callees); if (failed(result)) return WalkResult::interrupt(); result = RegionResourceHoister::ReplaceOpWithNewOp(cluster); if (failed(result)) return WalkResult::interrupt(); @@ -1340,9 +1364,9 @@ LogicalResult ResourceLiftingForFunctionalControlFlow(FuncOp function) { llvm::SmallDenseMap lifted_partitioned_call_callees; - if (failed(HoistForControlFlow(&function.front(), - cast(function.getParentOp()), - &lifted_partitioned_call_callees))) + if (failed(HoistForControlFlow( + &function.front(), cast(function.getParentOp()), + /*vars_initialized=*/false, &lifted_partitioned_call_callees))) return failure(); // Clean up and canonicalize to remove dead local variables as some local ",0,train 82aa53ec3664f5bbd48fa498901d16ef151164ff,tensorflow/tensorflow,"Adjust setup to fix Tensorboard entrypoint (run_main -> main). PiperOrigin-RevId: 182119760",setup.py,"@@ -79,13 +79,13 @@ CONSOLE_SCRIPTS = [ # is now declared by the tensorboard pip package. If we remove the # TensorBoard command, pip will inappropriately remove it during install, # even though the command is not removed, just moved to a different wheel. - 'tensorboard = tensorboard.main:run_main', + 'tensorboard = tensorboard.main:main', ] # pylint: enable=line-too-long # remove the tensorboard console script if building tf_nightly if 'tf_nightly' in project_name: - CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:run_main') + CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:main') TEST_PACKAGES = [ 'scipy >= 0.15.1', ",0,test e58e4c754fa6145af2a411b940d8f7347a071b6f,tensorflow/tensorflow,"Minor adjustments to an error message. PiperOrigin-RevId: 190972253",tpu_system_metadata.py,"@@ -72,9 +72,9 @@ def _query_tpu_system_metadata(master_address, run_config, tpu_core_count += 1 break except errors.DeadlineExceededError: - msg = ('Fail to connect Tensorflow master. It could be the TPU worker is ' - 'not ready (still under scheduling) or Tensorflow ' - 'master address is correct: got (%s).' % + msg = ('Failed to connect to the Tensorflow master. The TPU worker may ' + 'not be ready (still scheduling) or the Tensorflow master address ' + 'is incorrect: got (%s).' % (master_address)) # TODO(xiejw): For local or grpc master we might not need retry logic ",0,train 20199e91b3503881ce9a4253d64fa783f731230f,tensorflow/tensorflow,"Don't prematurely return streams PiperOrigin-RevId: 173214110",local_client.cc,"@@ -175,10 +175,15 @@ StatusOr> LocalExecutable::Run( TF_RETURN_IF_ERROR(ValidateExecutionOptions(arguments, options, *backend_)); ExecutableRunOptions actual_options = options; + + Backend::StreamPtr stream; if (options.stream() == nullptr) { + // NB! The lifetime of `stream` needs to match the lifetime of + // `actual_options` (otherwise we will end up using a returned stream in + // ExecuteOnStreamWrapper), which is why it isn't declared in the inner ""if"" + // scope. TF_ASSIGN_OR_RETURN( - Backend::StreamPtr stream, - BorrowStreamForDevice(options.device_ordinal(), backend_)); + stream, BorrowStreamForDevice(options.device_ordinal(), backend_)); actual_options.set_stream(stream.get()); } if (options.allocator() == nullptr) { ",0,train bb405194390f1a60682c07915fa11e60fc027ec0,tensorflow/tensorflow,"[TF 2.0 API Docs] tf.image.adjust_saturation Updated adjust_saturation by adding a usage example in the docstring in image_ops_impl.py. Also added a raise InvalidArgumentError for incorrect shape in the docstring. The issue has been raised and is provided in this link https://github.com/tensorflow/tensorflow/issues/29332",image_ops_impl.py,"@@ -2041,6 +2041,16 @@ def adjust_saturation(image, saturation_factor, name=None): Returns: Adjusted image(s), same shape and DType as `image`. + + Usage Example: + ```python + >> import tensorflow as tf + >> x = tf.random.normal(shape=(256, 256, 3)) + >> tf.image.adjust_saturation(x, 0.5) + ``` + + Raises: + InvalidArgumentError: input must have 3 channels """""" with ops.name_scope(name, 'adjust_saturation', [image]) as name: image = ops.convert_to_tensor(image, name='image') ",0,train edd1f81155294073649dee0b3ec4f6b0e235f328,tensorflow/tensorflow,"[TF:XLA] Update resource_operation_safety_analysis file comment. Read->read and write->write dependencies are not considered a safety problem, but the comment still stated they would not be clustered. PiperOrigin-RevId: 258474807",resource_operation_safety_analysis.h,"@@ -25,11 +25,10 @@ namespace tensorflow { // execution and all the resource writes to the end. This means it cannot // enforce arbitrary ordering dependencies (via control or data edges) between // resource operations. Since all resource reads happen before all resource -// writes, edges constraining resource reads to happen before resource writes -// are fine, but all other kinds of edges are problematic. This analysis -// returns the set of pairs of resource operations that cannot be put in the -// same cluster because XLA cannot respect the dependencies between them in the -// TensorFlow program. +// writes, edges constraining resource writes to happen before resource reads +// are problematic. This analysis returns the set of pairs of resource +// operations that cannot be put in the same cluster because XLA cannot respect +// the dependencies between them in the TensorFlow program. // // The restrictions are not transitive: it is fine to put A and C in the same // cluster even if the returned set contains (A,B) and (B,C). @@ -41,19 +40,15 @@ namespace tensorflow { // // For instance if we auto-cluster all operations in this TensorFlow graph: // -// ReadVariablepOp0 -> ReadVariableOp1 +// AssignVariablepOp0 -> AssignVariableOp1 // | // v -// AssignVariableOp0 -> AssignVariableOp1 +// ReadVariableOp0 -> ReadVariableOp1 // -// we will lose the ReadVariablepOp0 -> ReadVariableOp1 and the -// AssignVariableOp0 -> AssignVariableOp1 dependencies. I.e. it is possible for -// XlaLaunchOp to issue ReadVariableOp1 before ReadVariablepOp0 since it reads -// all the resource variables when the cluster starts executing without any -// particular ordering between them; same holds for the AssignVariableOp0 -> -// AssignVariableOp1 edge. The ReadVariableOp1 -> AssignVariableOp0 edge will -// be respected by XlaLaunchOp though because all reads happen before all -// writes. +// we will lose the AssignVariablepOp1 -> ReadVariableOp0. The ReadVariableOp0 +// -> ReadVariableOp1 and AssignVariableOp0 -> AssignVariableOp1 edges will be +// respected by XlaLaunchOp though because all reads happen before all writes +// with that limited clustering.. // // // NB! The result computed by this analysis assumes that we don't auto-cluster ",0,train 5de6f68848b8bc431e18a53fa03700820bcee57f,tensorflow/tensorflow,"Forward declare condition_variable Necessary to enable friendship with mutex",mutex.h,"@@ -31,6 +31,8 @@ namespace tensorflow { enum LinkerInitialized { LINKER_INITIALIZED }; +class condition_variable; + // Mimic std::mutex + C++17's shared_mutex, adding a LinkerInitialized // constructor interface. This type is as fast as mutex, but is also a shared // lock. ",0,train 59cf62cc475651e75fc8d2948daf2444cc0e8c15,tensorflow/tensorflow,"Change estimator dep to >1.13rc0. PiperOrigin-RevId: 231246464",setup.py,"@@ -58,7 +58,7 @@ REQUIRED_PACKAGES = [ 'six >= 1.10.0', 'protobuf >= 3.6.1', 'tensorboard >= 1.12.0, < 1.13.0', - 'tensorflow_estimator >= 1.13.0, < 1.14.0', + 'tensorflow_estimator >= 1.13.0rc0, < 1.14.0rc0', 'termcolor >= 1.1.0', ] ",0,train 569095ba3d5a57a95595d7db685b4bb748ca7337,tensorflow/tensorflow,"Make is_resource_variable() an tf.__internal__ API. PiperOrigin-RevId: 352613683 Change-Id: I92b67dc0d6d93dccf096690ff84c99cbd1221295",recurrent_v2.py,"@@ -37,7 +37,6 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_cudnn_rnn_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn -from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variables from tensorflow.python.platform import sysconfig @@ -419,19 +418,6 @@ class GRU(recurrent.DropoutRNNCellMixin, recurrent.GRU): if _use_new_code(): self._defun_wrapper = _DefunWrapper(time_major, go_backwards, 'gru') - def build(self, input_shape): - super(GRU, self).build(input_shape) - - if not all(isinstance(v, resource_variable_ops.ResourceVariable) - for v in self.weights): - # Non-resource variables, such as DistributedVariables and - # AutoCastVariables, do not work properly with the implementation - # selector, which is used when cuDNN is used. However, by chance, such - # variables happen to work in LSTM, so this check is only needed for GRU. - # TODO(b/136512020): Make non-resource variables work with the - # implementation selector. - self._could_use_gpu_kernel = False - def call(self, inputs, mask=None, training=None, initial_state=None): # The input should be dense, padded with zeros. If a ragged input is fed # into the layer, it is padded and the row lengths are used for masking. ",0,train 569095ba3d5a57a95595d7db685b4bb748ca7337,tensorflow/tensorflow,"Make is_resource_variable() an tf.__internal__ API. PiperOrigin-RevId: 352613683 Change-Id: I92b67dc0d6d93dccf096690ff84c99cbd1221295",tracking_util_test.py,"@@ -39,7 +39,6 @@ from tensorflow.python.keras.optimizer_v2 import adam from tensorflow.python.module import module from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops -from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import template from tensorflow.python.ops import variable_scope @@ -273,7 +272,7 @@ class CheckpointingTests(keras_parameterized.TestCase): # Optimizer slot variables are created when the original variable is # restored. self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) - dummy_var = resource_variable_ops.ResourceVariable([1.]) + dummy_var = variables_lib.Variable([1.]) on_create_optimizer.minimize(loss=dummy_var.read_value, var_list=[dummy_var]) status.assert_existing_objects_matched() @@ -459,8 +458,8 @@ class CheckpointingTests(keras_parameterized.TestCase): def __init__(self): super(Model, self).__init__() - self.w = resource_variable_ops.ResourceVariable(0.0) - self.b = resource_variable_ops.ResourceVariable(0.0) + self.w = variables_lib.Variable(0.0) + self.b = variables_lib.Variable(0.0) self.vars = [self.w, self.b] def call(self, x): @@ -874,8 +873,7 @@ class CheckpointCompatibilityTests(keras_parameterized.TestCase): self._check_sentinels(root) # Check that there is no error when keys are missing from the name-based # checkpoint. - root.not_in_name_checkpoint = resource_variable_ops.ResourceVariable( - [1.]) + root.not_in_name_checkpoint = variables_lib.Variable([1.]) status = object_saver.restore(save_path) with self.assertRaises(AssertionError): status.assert_existing_objects_matched() ",0,train 569095ba3d5a57a95595d7db685b4bb748ca7337,tensorflow/tensorflow,"Make is_resource_variable() an tf.__internal__ API. PiperOrigin-RevId: 352613683 Change-Id: I92b67dc0d6d93dccf096690ff84c99cbd1221295",resource_variable_ops.py,"@@ -55,6 +55,7 @@ from tensorflow.python.types import core from tensorflow.python.util import _pywrap_utils from tensorflow.python.util import compat from tensorflow.python.util.deprecation import deprecated +from tensorflow.python.util.tf_export import tf_export acd.register_read_only_resource_op(""ReadVariableOp"") acd.register_read_only_resource_op(""VariableShape"") @@ -2211,6 +2212,7 @@ ops.register_proto_function( from_proto=_from_proto_fn) +@tf_export(""__internal__.ops.is_resource_variable"", v1=[]) def is_resource_variable(var): """"""""Returns True if `var` is to be considered a ResourceVariable."""""" return isinstance(var, BaseResourceVariable) or hasattr( ",0,train 137c954eed6e5800d1ab6dda74c73049791fdac7,tensorflow/tensorflow,"[MLIR][KernelGen] Register MLIR context flags in `tf_to_kernel` This change adds support for MLIR context flags like `--mlir-disable-threading` to the `tf_to_kernel` tool. PiperOrigin-RevId: 369658258 Change-Id: I73e06caf8c8e72916d9746b1f9828dbf62b67fc1",tf_to_kernel.cc,"@@ -173,6 +173,7 @@ int main(int argc, char** argv) { llvm::InitializeNativeTarget(); llvm::InitializeNativeTargetAsmPrinter(); mlir::registerPassManagerCLOptions(); + mlir::registerMLIRContextCLOptions(); llvm::cl::ParseCommandLineOptions(argc, argv, ""TF op kernel generator\n""); auto status = tensorflow::kernel_gen::Run( ",0,test bb7f4079afbcb11bb360846849278253207ea8cc,tensorflow/tensorflow,"[tf.data service] Enable zero-copy data transfer for AUTO mode. Previously, zero-copy data transfer is only enabled for LOCAL target workers: https://github.com/tensorflow/tensorflow/blob/fdfd1e09894e082e13314dffc9d36990524ac3f1/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc#L1141 Now that we're supporting the hybrid mode, we should use the local read protocol whenever there's a local worker. PiperOrigin-RevId: 395266341 Change-Id: I37b881de55f6e7c858c70e1c98a4fad540f5568b",worker_client.cc,"@@ -50,6 +50,16 @@ limitations under the License. namespace tensorflow { namespace data { +StatusOr> +CreateDataServiceWorkerClient(const std::string& address, + const std::string& protocol, + const std::string& transfer_protocol) { + auto client = absl::make_unique(address, protocol, + transfer_protocol); + TF_RETURN_IF_ERROR(client->Initialize()); + return client; +} + Status DataServiceWorkerClient::GetElement(const GetElementRequest& req, GetElementResult& result) { TF_RETURN_IF_ERROR(EnsureInitialized()); @@ -62,22 +72,20 @@ Status DataServiceWorkerClient::EnsureInitialized() { return Status::OK(); } TF_RETURN_IF_ERROR(DataTransferClient::Build( - transfer_protocol_, {protocol_, address_}, &client_)); + GetDataTransferProtocol(), {protocol_, address_}, &client_)); return Status::OK(); } -void DataServiceWorkerClient::TryCancel() { client_->TryCancel(); } - -StatusOr> -CreateDataServiceWorkerClient(const std::string& address, - const std::string& protocol, - const std::string& transfer_protocol) { - auto client = absl::make_unique(address, protocol, - transfer_protocol); - TF_RETURN_IF_ERROR(client->Initialize()); - return client; +std::string DataServiceWorkerClient::GetDataTransferProtocol() const { + if (transfer_protocol_ == kGrpcTransferProtocol && + LocalWorkers::Get(address_) != nullptr) { + return kLocalTransferProtocol; + } + return transfer_protocol_; } +void DataServiceWorkerClient::TryCancel() { client_->TryCancel(); } + class GrpcDataTransferClient : public DataTransferClient { public: GrpcDataTransferClient(std::shared_ptr credentials, @@ -217,8 +225,8 @@ class LocalDataTransferClient : public DataTransferClient { LocalWorkers::Get(worker_address_); if (!worker) { return errors::Cancelled(absl::Substitute( - ""Worker at address $0 is no longer available; cancel request for "" - ""task $1."", + ""Local worker at address $0 is no longer available; cancel request "" + ""for task $1."", worker_address_, req.task_id())); } return worker; ",0,train bb7f4079afbcb11bb360846849278253207ea8cc,tensorflow/tensorflow,"[tf.data service] Enable zero-copy data transfer for AUTO mode. Previously, zero-copy data transfer is only enabled for LOCAL target workers: https://github.com/tensorflow/tensorflow/blob/fdfd1e09894e082e13314dffc9d36990524ac3f1/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc#L1141 Now that we're supporting the hybrid mode, we should use the local read protocol whenever there's a local worker. PiperOrigin-RevId: 395266341 Change-Id: I37b881de55f6e7c858c70e1c98a4fad540f5568b",worker_client.h,"@@ -52,6 +52,10 @@ class DataServiceWorkerClient : public DataServiceClientBase { Status EnsureInitialized() override; private: + // Returns the data transfer protocol, preferring to use the local transfer + // protocol if a local tf.data worker exists. + std::string GetDataTransferProtocol() const; + const std::string transfer_protocol_; mutex mu_; // Initialization is guarded by `mu_`, but using the stub does not require ",0,train bb7f4079afbcb11bb360846849278253207ea8cc,tensorflow/tensorflow,"[tf.data service] Enable zero-copy data transfer for AUTO mode. Previously, zero-copy data transfer is only enabled for LOCAL target workers: https://github.com/tensorflow/tensorflow/blob/fdfd1e09894e082e13314dffc9d36990524ac3f1/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc#L1141 Now that we're supporting the hybrid mode, we should use the local read protocol whenever there's a local worker. PiperOrigin-RevId: 395266341 Change-Id: I37b881de55f6e7c858c70e1c98a4fad540f5568b",worker_client_test.cc,"@@ -142,7 +142,7 @@ TEST_F(WorkerClientTest, LocalRead) { LocalWorkers::Remove(GetWorkerAddress()); EXPECT_THAT(GetElement(*client, task_id), StatusIs(error::CANCELLED, - MatchesRegex(""Worker.*is no longer available.*""))); + MatchesRegex(""Local worker.*is no longer available.*""))); } TEST_F(WorkerClientTest, LocalReadEmptyDataset) { @@ -161,7 +161,7 @@ TEST_F(WorkerClientTest, LocalReadEmptyDataset) { LocalWorkers::Remove(GetWorkerAddress()); EXPECT_THAT(GetElement(*client, task_id), StatusIs(error::CANCELLED, - MatchesRegex(""Worker.*is no longer available.*""))); + MatchesRegex(""Local worker.*is no longer available.*""))); } TEST_F(WorkerClientTest, GrpcRead) { @@ -178,12 +178,12 @@ TEST_F(WorkerClientTest, GrpcRead) { EXPECT_FALSE(result.end_of_sequence); } - // Remove the local worker from `LocalWorkers`. Since the client reads from - // gRPC, this will not cause the request to fail. + // Remove the local worker from `LocalWorkers`. Since the client reads from a + // local server, this should cause the request to fail. LocalWorkers::Remove(GetWorkerAddress()); - TF_ASSERT_OK_AND_ASSIGN(GetElementResult result, - GetElement(*client, task_id)); - EXPECT_TRUE(result.end_of_sequence); + EXPECT_THAT(GetElement(*client, task_id), + StatusIs(error::CANCELLED, + MatchesRegex(""Local worker.*is no longer available.*""))); } TEST_F(WorkerClientTest, LocalServerShutsDown) { @@ -198,7 +198,7 @@ TEST_F(WorkerClientTest, LocalServerShutsDown) { test_cluster_->StopWorkers(); EXPECT_THAT(GetElement(*client, task_id), StatusIs(error::CANCELLED, - MatchesRegex(""Worker.*is no longer available.*""))); + MatchesRegex(""Local worker.*is no longer available.*""))); } TEST_F(WorkerClientTest, CancelClient) { ",0,train 3cb4deb6ddd20162ac4aa40db842de318d94f77b,tensorflow/tensorflow,"Make cuda_py_test create a gpu and cpu target. PiperOrigin-RevId: 217838326",pip_smoke_test.py,"@@ -146,7 +146,7 @@ def main(): missing_dependencies = [] # File extensions and endings to ignore - ignore_extensions = [""_test"", ""_test.py""] + ignore_extensions = [""_test"", ""_test.py"", ""_test_gpu"", ""_test_gpu.py""] ignored_files = 0 blacklisted_files = len(BLACKLIST) ",0,train 9361cd6fe21c78fea9260935d5121c9c9cd76f93,tensorflow/tensorflow,"Update docstring for keras.layers.ReLU. PiperOrigin-RevId: 290215220 Change-Id: Iff1321a25f7ee3c7a25c9725de8bbfcb7b65434c",advanced_activations.py,"@@ -276,22 +276,42 @@ class ReLU(Layer): With default values, it returns element-wise `max(x, 0)`. Otherwise, it follows: - `f(x) = max_value` for `x >= max_value`, - `f(x) = x` for `threshold <= x < max_value`, - `f(x) = negative_slope * (x - threshold)` otherwise. + $$f(x) = max_value if x >= max_value$$ + $$f(x) = x if threshold <= x < max_value$$ + $$f(x) = negative_slope * (x - threshold) otherwise$$ + + Usage: + + >>> layer = tf.keras.layers.ReLU() + >>> output = layer([-3.0, -1.0, 0.0, 2.0]) + >>> list(output.numpy()) + [0.0, 0.0, 0.0, 2.0] + >>> layer = tf.keras.layers.ReLU(max_value=1.0) + >>> output = layer([-3.0, -1.0, 0.0, 2.0]) + >>> list(output.numpy()) + [0.0, 0.0, 0.0, 1.0] + >>> layer = tf.keras.layers.ReLU(negative_slope=1.0) + >>> output = layer([-3.0, -1.0, 0.0, 2.0]) + >>> list(output.numpy()) + [-3.0, -1.0, 0.0, 2.0] + >>> layer = tf.keras.layers.ReLU(threshold=1.5) + >>> output = layer([-3.0, -1.0, 1.0, 2.0]) + >>> list(output.numpy()) + [0.0, 0.0, 0.0, 2.0] Input shape: Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) + (tuple of integers, does not include the batch axis) when using this layer as the first layer in a model. Output shape: Same shape as the input. Arguments: - max_value: Float >= 0. Maximum activation value. - negative_slope: Float >= 0. Negative slope coefficient. - threshold: Float. Threshold value for thresholded activation. + max_value: Float >= 0. Maximum activation value. Default to None, which + means unlimited. + negative_slope: Float >= 0. Negative slope coefficient. Default to 0. + threshold: Float. Threshold value for thresholded activation. Default to 0. """""" def __init__(self, max_value=None, negative_slope=0, threshold=0, **kwargs): ",0,train 579b142eb191b50deed006b264c4180e8575cd6a,tensorflow/tensorflow,"[MLIR] Add verification for XLA HLO -> LMHLO, and fix a verification failure. Specifically, sometimes the arg type doesn't equal to the type of its use, even if the arg is an entry computation parameter. This is because program_shape in the computation may not match the operand shape. Add a MemrefReinterpretCast if two shapes don't equal. The verification failure is discovered in https://github.com/google/jax/discussions/6645. PiperOrigin-RevId: 372179297 Change-Id: I0865ff030217410cb5913e6386a53d7a68d3b7dd",mhlo_to_lhlo_with_xla.cc,"@@ -38,7 +38,6 @@ limitations under the License. #include ""mlir/IR/Operation.h"" // from @llvm-project #include ""mlir/IR/PatternMatch.h"" // from @llvm-project #include ""mlir/IR/SymbolTable.h"" // from @llvm-project -#include ""mlir/IR/Verifier.h"" // from @llvm-project #include ""mlir/Pass/Pass.h"" // from @llvm-project #include ""mlir/Pass/PassOptions.h"" // from @llvm-project #include ""mlir/Translation.h"" // from @llvm-project @@ -98,10 +97,6 @@ StatusOr> HloModuleFromProto( return HloModule::CreateFromProto(module_proto, module_config); } -bool AllocationShouldLowerToTypedArg(const BufferAllocation* alloc) { - return alloc->is_entry_computation_parameter() && !alloc->maybe_live_out(); -} - } // namespace // Convert the MLIR `module` from HLO dialect to LHLO dialect using XLA for the @@ -1518,42 +1513,39 @@ StatusOr LhloDialectEmitter::GetOrCreateArrayView( TF_ASSIGN_OR_RETURN(BufferAllocation::Slice slice, assignment_.GetUniqueSlice(instr, shape_index)); Value alloc = allocations_[slice.allocation()]; + if (alloc.getType() == out_type && slice.offset() == 0) { + return cached_value = alloc; + } + + auto out_memref_type = out_type.dyn_cast(); + if (!out_memref_type) + return tensorflow::errors::Internal( + ""Expected memref type when creating a view for leaf type of a "" + ""tuple.""); + + Value byte_shift = + builder_.create(alloc.getLoc(), slice.offset()); + + xla::Shape physical_shape = + xla::ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( + static_shape); + TF_ASSIGN_OR_RETURN( + Type physical_out_type, + xla::ConvertShapeToType(physical_shape, builder_)); // TODO(timshen): revisit location handling. Location loc = builder_.getUnknownLoc(); - Value result; - if (AllocationShouldLowerToTypedArg(slice.allocation())) { - TF_RET_CHECK(slice.offset() == 0); - TF_RET_CHECK(slice.size() == slice.allocation()->size()); - result = alloc; - } else { - Value byte_shift = - builder_.create(alloc.getLoc(), slice.offset()); - - xla::Shape physical_shape = - xla::ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( - static_shape); - TF_ASSIGN_OR_RETURN( - Type physical_out_type, - xla::ConvertShapeToType(physical_shape, builder_)); - - // ViewOp only takes memrefs without affine maps (layouts). Let ViewOp - // produce the physical shape (where dimensions are ordered in major to - // minor) first, then follow up with a MemRefReinterpretCast to cast the - // resulting memref to the original layout. - result = builder_.create(loc, physical_out_type, alloc, - byte_shift, - /*sizes=*/ValueRange{}); - } - if (result.getType() != out_type) { + // ViewOp only takes memrefs without affine maps (layouts). Let ViewOp produce + // the physical shape (where dimensions are ordered in major to minor) first, + // then follow up with a MemRefReinterpretCast to cast the resulting memref to + // the original layout. + Value result = + builder_.create(loc, physical_out_type, alloc, byte_shift, + /*sizes=*/ValueRange{}); + if (physical_out_type != out_type) { int64_t out_offset; SmallVector out_strides; - auto out_memref_type = out_type.dyn_cast(); - if (!out_memref_type) - return tensorflow::errors::Internal( - ""Expected memref type when creating a view for leaf type of a "" - ""tuple.""); if (failed(getStridesAndOffset(out_memref_type, out_strides, out_offset))) return tensorflow::errors::Internal( ""Failed to get strides and offset from the output type.""); @@ -1696,7 +1688,7 @@ Status LhloDialectEmitter::Initialize() { NamedAttrList arg_attr_list; mlir::Type arg_type; - if (AllocationShouldLowerToTypedArg(alloc)) { + if (alloc->is_entry_computation_parameter() && !alloc->maybe_live_out()) { xla::Shape buffer_shape = xla::ShapeUtil::GetSubshape( computation_.parameter_instruction(alloc->parameter_number()) ->shape(), @@ -1790,9 +1782,7 @@ Status HloToLhloModule(const BufferAssignment& assignment, if (!schedule) return xla::Unimplemented(""Missing sequential order for the computation""); const std::vector& ordering = schedule->instructions(); - TF_RETURN_IF_ERROR(computation->AcceptOrdered(&emitter, ordering)); - TF_RET_CHECK(succeeded(mlir::verify(module))); - return Status::OK(); + return computation->AcceptOrdered(&emitter, ordering); } OwningModuleRef HloTextToLhloTranslateFunction(llvm::StringRef input, ",0,train 579b142eb191b50deed006b264c4180e8575cd6a,tensorflow/tensorflow,"[MLIR] Add verification for XLA HLO -> LMHLO, and fix a verification failure. Specifically, sometimes the arg type doesn't equal to the type of its use, even if the arg is an entry computation parameter. This is because program_shape in the computation may not match the operand shape. Add a MemrefReinterpretCast if two shapes don't equal. The verification failure is discovered in https://github.com/google/jax/discussions/6645. PiperOrigin-RevId: 372179297 Change-Id: I0865ff030217410cb5913e6386a53d7a68d3b7dd",gpu_compiler.cc,"@@ -169,6 +169,7 @@ Status GpuCompiler::OptimizeHloModule( pipeline.AddInvariantChecker(/*layout_sensitive=*/false, /*allow_mixed_precision=*/false); pipeline.AddPass(); + pipeline.AddPass(); pipeline.AddPass(); pipeline.AddPass(); @@ -268,7 +269,6 @@ Status GpuCompiler::OptimizeHloModule( pass.AddPass(); pass.AddPass(); pass.AddPass(); - pipeline.AddPass(); } pipeline.AddPass( ",0,train 579b142eb191b50deed006b264c4180e8575cd6a,tensorflow/tensorflow,"[MLIR] Add verification for XLA HLO -> LMHLO, and fix a verification failure. Specifically, sometimes the arg type doesn't equal to the type of its use, even if the arg is an entry computation parameter. This is because program_shape in the computation may not match the operand shape. Add a MemrefReinterpretCast if two shapes don't equal. The verification failure is discovered in https://github.com/google/jax/discussions/6645. PiperOrigin-RevId: 372179297 Change-Id: I0865ff030217410cb5913e6386a53d7a68d3b7dd",ir_emission_utils.cc,"@@ -850,49 +850,52 @@ StatusOr GetAllocationSliceForMlir( int64 size = GetMemRefSizeInBytes(v.getType().cast()); + if (auto arg = v.dyn_cast()) { + return BufferAllocation::Slice( + &allocations[GetAllocationIndex(arg, constant_name)], 0, size); + } + // We match the following patterns here: - // base := ViewOp(arg) | get_global_memref (global_memref) | arg + // base := ViewOp(arg) | get_global_memref (global_memref) // root := base | MemRefReinterpretCastOp(base) - if (auto cast = mlir::dyn_cast_or_null( - v.getDefiningOp())) { - v = cast.getViewSource(); - } - if (auto view = - mlir::dyn_cast_or_null(v.getDefiningOp())) { - TF_RET_CHECK(view.source().isa()); - - return BufferAllocation::Slice( - &allocations[GetAllocationIndex( - view.source().cast(), constant_name)], - mlir::cast(view.byte_shift().getDefiningOp()) - .value() - .cast() - .getValue() - .getSExtValue(), - size); - } - if (auto get_global = mlir::dyn_cast_or_null( - v.getDefiningOp())) { - auto module = get_global->getParentOfType(); - if (constant_name) { - *constant_name = get_global.name().str(); + if (mlir::Operation* op = v.getDefiningOp()) { + if (auto cast = mlir::dyn_cast(op)) { + mlir::Value source = cast.getViewSource(); + op = source.getDefiningOp(); + if (!op) { + return Unimplemented(""MemRefReinterpretCastOp has to wrap an op""); + } } - auto global = mlir::cast( - module.lookupSymbol(get_global.name())); - int64_t index = - global->getAttrOfType(""lmhlo.alloc"").getInt(); - return BufferAllocation::Slice(&allocations[index], 0, - allocations[index].size()); - } - if (auto arg = v.dyn_cast()) { - return BufferAllocation::Slice( - &allocations[GetAllocationIndex(arg, constant_name)], 0, size); + if (auto view = mlir::dyn_cast(op)) { + return BufferAllocation::Slice( + &allocations[GetAllocationIndex( + view.source().cast(), constant_name)], + mlir::cast(view.byte_shift().getDefiningOp()) + .value() + .cast() + .getValue() + .getSExtValue(), + size); + } else if (auto get_global = + mlir::dyn_cast(op)) { + auto module = get_global->getParentOfType(); + if (constant_name) { + *constant_name = get_global.name().str(); + } + auto global = mlir::cast( + module.lookupSymbol(get_global.name())); + int64_t index = + global->getAttrOfType(""lmhlo.alloc"").getInt(); + return BufferAllocation::Slice(&allocations[index], 0, + allocations[index].size()); + } + return Unimplemented(""MemRefReinterpretCastOp has to wrap a ViewOp""); } return Unimplemented( ""Operand has to be in the form of ViewOp(arg) or "" - ""StaticMemRefCastOp(ViewOp(arg)) or arg""); + ""StaticMemRefCastOp(ViewOp(arg))""); } bool CanEmitFusedDynamicUpdateSliceInPlaceForGpu( ",0,train dd3b812879671d633ddbb644a48f6fc44faae0bc,tensorflow/tensorflow,"Raise a better error message when a list element is not convertible to Tensor. Previously, we hit an erroneous assertion when converting a list argument to a list of tensors. This change makes it clearer what caused the error when one or more of the arguments is an object that is not convertible to `tf.Tensor`. Fixes #2385. Change: 122600354",op_def_library.py,"@@ -408,25 +408,36 @@ class OpDefLibrary(object): values = ops.convert_n_to_tensor( values, name=input_arg.name, dtype=dtype if dtype else None, as_ref=input_arg.is_ref) + if input_arg.number_attr and len( + set(v.dtype.base_dtype for v in values)) > 1: + raise TypeError() # All types should match. except (TypeError, ValueError): - assert dtype is not None, ""Should not fail if dtype is None"" - assert input_arg.number_attr, ""Should be number_attr case"" # What types does the conversion function think values have? - values = ops.convert_n_to_tensor(values, as_ref=input_arg.is_ref) - observed = "", "".join(v.dtype.base_dtype.name for v in values) + observed_types = [] + for value in values: + try: + converted_value = ops.convert_to_tensor( + value, as_ref=input_arg.is_ref) + observed_types.append(converted_value.dtype.base_dtype.name) + except (TypeError, ValueError): + observed_types.append("""") + observed = "", "".join(observed_types) prefix = ( ""Tensors in list passed to '%s' of '%s' Op have types [%s]"" % (input_name, op_type_name, observed)) - if input_arg.type != types_pb2.DT_INVALID: - raise TypeError(""%s that do not match expected type %s."" % - (prefix, dtype.name)) - elif input_arg.type_attr in attrs: - raise TypeError(""%s that do not match type %s inferred from "" - ""earlier arguments."" % - (prefix, dtype.name)) + if input_arg.number_attr: + if input_arg.type != types_pb2.DT_INVALID: + raise TypeError(""%s that do not match expected type %s."" % + (prefix, dtype.name)) + elif input_arg.type_attr in attrs: + raise TypeError(""%s that do not match type %s inferred from "" + ""earlier arguments."" % + (prefix, dtype.name)) + else: + raise TypeError(""%s that don't all match."" % prefix) else: - raise TypeError(""%s that don't all match."" % prefix) + raise TypeError(""%s that are invalid."" % prefix) types = [x.dtype for x in values] inputs.extend(values) ",0,train dd3b812879671d633ddbb644a48f6fc44faae0bc,tensorflow/tensorflow,"Raise a better error message when a list element is not convertible to Tensor. Previously, we hit an erroneous assertion when converting a list argument to a list of tensors. This change makes it clearer what caused the error when one or more of the arguments is an object that is not convertible to `tf.Tensor`. Fixes #2385. Change: 122600354",op_def_library_test.py,"@@ -400,6 +400,12 @@ class OpDefLibraryTest(test_util.TensorFlowTestCase): ""Expected list for 'a' "" ""argument to 'TypeList' Op, not "") + with self.assertRaises(TypeError) as cm: + self._lib.apply_op(""TypeList"", a=[self.Tensor(dtypes.int32), None]) + self.assertStartsWith(str(cm.exception), + ""Tensors in list passed to 'a' of 'TypeList' Op "" + ""have types [int32, ]"") + def testTypeListTwice(self): self._add_op(""name: 'TypeListTwice' "" ""input_arg { name: 'a' type_list_attr: 'T' } "" @@ -957,6 +963,16 @@ class OpDefLibraryTest(test_util.TensorFlowTestCase): attr { key: 'N' value { i: 2 } } """""", op.node_def) + op = self._lib.apply_op(""NPolymorphicIn"", + a=[self.Tensor(dtypes.float32, name=""y""), + self.Tensor(dtypes.float32_ref, name=""z"")], + name=""r"") + self.assertProtoEquals("""""" + name: 'r' op: 'NPolymorphicIn' input: 'y' input: 'z' + attr { key: 'T' value { type: DT_FLOAT } } + attr { key: 'N' value { i: 2 } } + """""", op.node_def) + with self.assertRaises(ValueError) as cm: self._lib.apply_op(""NPolymorphicIn"", a=[99]) self.assertEqual(str(cm.exception), @@ -966,8 +982,8 @@ class OpDefLibraryTest(test_util.TensorFlowTestCase): with self.assertRaises(TypeError) as cm: self._lib.apply_op(""NPolymorphicIn"", a=[38, ""bar""]) self.assertEqual(str(cm.exception), - ""All tensors passed to 'a' of 'NPolymorphicIn' "" - ""Op must have the same type."") + ""Tensors in list passed to 'a' of 'NPolymorphicIn' Op "" + ""have types [int32, string] that don't all match."") with self.assertRaises(TypeError) as cm: self._lib.apply_op(""NPolymorphicIn"", @@ -976,6 +992,13 @@ class OpDefLibraryTest(test_util.TensorFlowTestCase): ""Tensors in list passed to 'a' of 'NPolymorphicIn' Op "" ""have types [int32, string] that don't all match."") + with self.assertRaises(TypeError) as cm: + self._lib.apply_op(""NPolymorphicIn"", a=[38, None]) + self.assertEqual(str(cm.exception), + ""Tensors in list passed to 'a' of 'NPolymorphicIn' Op "" + ""have types [int32, ] that "" + ""don't all match."") + with self.assertRaises(TypeError) as cm: self._lib.apply_op(""NPolymorphicIn"", a=[""abcd"", self.Tensor(dtypes.int32)]) ",0,train 8b4d11790b87efa4165d15612c60cc41c21f6bf8,tensorflow/tensorflow,"[XLA:CPU] Enable transpose folding of LLVM gemv Naive LLVM doesn't care, tiled LLVM gemm gets promoted to Eigen for transposed inputs. PiperOrigin-RevId: 241754291",dot_op_emitter.cc,"@@ -1007,11 +1007,8 @@ bool DotImplementationCanHandleTranspose( GetDotImplementationStrategy(dot_instr.parent()->parent()->config(), DotInfo(dot_instr), target_machine_features); - // TODO(sanjoy): This is not quite right, it should be `impl_strategy == - // kEigen || impl_strategy == kTiledLlvmIrGemv || impl_strategy == - // kNaiveLlvmIr` but I'll fix this in a later CL in the interest of keeping - // the CL adding this comment NFC. - return impl_strategy == DotImplementationStrategy::kTiledLlvmIrGemm || + return impl_strategy == DotImplementationStrategy::kNaiveLlvmIr || + impl_strategy == DotImplementationStrategy::kTiledLlvmIrGemv || impl_strategy == DotImplementationStrategy::kEigen; } ",0,train 5c545e646ce54934e92ea59a24ff9bcfe52991df,tensorflow/tensorflow,"[tf.data] Minor changes to xprof metadata. PiperOrigin-RevId: 353164453 Change-Id: If38813e6d7cd8ab843859d0d3b0bc7294a2e3a95",map_and_batch_dataset_op.cc,"@@ -33,6 +33,7 @@ limitations under the License. #include ""tensorflow/core/lib/random/random.h"" #include ""tensorflow/core/lib/strings/strcat.h"" #include ""tensorflow/core/platform/cpu_info.h"" +#include ""tensorflow/core/platform/env_time.h"" #include ""tensorflow/core/platform/status.h"" #include ""tensorflow/core/platform/stringprintf.h"" #include ""tensorflow/core/platform/tracing.h"" @@ -254,7 +255,7 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase { } profiler::TraceMe traceme([&] { return profiler::TraceMeEncode(""MapAndBatchConsume"", - {{""element_id"", result->id}}); + {{""element_id"", result->uid}}); }); return ProcessResult(ctx, result, out_tensors, end_of_sequence); } @@ -328,14 +329,14 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase { // BatchResult encapsulates the output batch, as well as ancillary // metadata required to execute the fused map-and-batch operation. struct BatchResult { - explicit BatchResult(int64 batch_size, int64 id) + explicit BatchResult(int64 batch_size) : end_of_input(false), num_elements(0), output_allocated(false), status(Status::OK()), status_offset(-1), num_calls(batch_size), - id(id) {} + uid(tensorflow::EnvTime::NowNanos()) {} // UpdateStatus updates the batch's aggregate Status. // @@ -362,7 +363,7 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase { int64 status_offset TF_GUARDED_BY(mu); // Counts the number of outstanding calls for this batch. int64 num_calls TF_GUARDED_BY(&Iterator::mu_); - const int64 id; + const uint64 uid = -1; }; void CallCompleted(const std::shared_ptr& ctx, @@ -387,7 +388,7 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase { TF_LOCKS_EXCLUDED(*mu_) { profiler::TraceMe traceme([&] { return profiler::TraceMeEncode(""MapAndBatchProduce"", - {{""element_id"", result->id}}); + {{""element_id"", result->uid}}); }); // Get the next input element. std::vector input_element; @@ -606,8 +607,6 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase { (batch_results_.size() == max_batch_results_ && call_counter_ % dataset()->batch_size_ == 0)); }; - // Counts the total number of batches to use as an id of BatchResult. - int64 num_total_batches = 1; while (true) { { mutex_lock l(*mu_); @@ -632,8 +631,8 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase { while (!busy()) { if (call_counter_ % dataset()->batch_size_ == 0) { - batch_results_.push_back(std::make_shared( - dataset()->batch_size_, num_total_batches++)); + batch_results_.push_back( + std::make_shared(dataset()->batch_size_)); } int64 offset = call_counter_++ % dataset()->batch_size_; new_calls.emplace_back(batch_results_.back(), offset); @@ -659,7 +658,7 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase { Status ReadBatchResult(IteratorContext* ctx, IteratorStateReader* reader, size_t index) TF_EXCLUSIVE_LOCKS_REQUIRED(*mu_) { batch_results_.push_back( - std::make_shared(dataset()->batch_size_, -1)); + std::make_shared(dataset()->batch_size_)); std::shared_ptr result = batch_results_.back(); string prefix = strings::StrCat(kBatchResults, ""_"", index); mutex_lock l(result->mu); ",0,train 5c545e646ce54934e92ea59a24ff9bcfe52991df,tensorflow/tensorflow,"[tf.data] Minor changes to xprof metadata. PiperOrigin-RevId: 353164453 Change-Id: If38813e6d7cd8ab843859d0d3b0bc7294a2e3a95",parallel_map_dataset_op.cc,"@@ -257,7 +257,7 @@ class ParallelMapDatasetOp::Dataset : public DatasetBase { RecordStart(ctx); profiler::TraceMe traceme([&] { return profiler::TraceMeEncode(""ParallelMapConsume"", - {{""element_id"", result->id}}); + {{""element_id"", result->uid}}); }); return ProcessResult(ctx, result, out_tensors, end_of_sequence); } @@ -371,14 +371,13 @@ class ParallelMapDatasetOp::Dataset : public DatasetBase { private: struct InvocationResult { - InvocationResult() = default; - explicit InvocationResult(int64 id) : id(id) {} + InvocationResult() : uid(tensorflow::EnvTime::NowNanos()) {} Notification notification; Status status; std::vector return_values; bool end_of_input = false; - int64 id = -1; + const int64 uid; }; void CancelThreads(bool wait) TF_LOCKS_EXCLUDED(mu_) { @@ -420,7 +419,7 @@ class ParallelMapDatasetOp::Dataset : public DatasetBase { TF_LOCKS_EXCLUDED(*mu_) { profiler::TraceMe traceme([&] { return profiler::TraceMeEncode(""ParallelMapProduce"", - {{""element_id"", result->id}}); + {{""element_id"", result->uid}}); }); // Get the next input element. std::vector input_element; @@ -514,8 +513,6 @@ class ParallelMapDatasetOp::Dataset : public DatasetBase { return num_calls_ >= num_parallel_calls || invocation_results_.size() >= num_parallel_calls; }; - // Counts the total number of calls to use as an id of InvocationResult. - int64 num_total_calls = 0; while (true) { { mutex_lock l(*mu_); @@ -528,8 +525,7 @@ class ParallelMapDatasetOp::Dataset : public DatasetBase { return; } while (!busy()) { - invocation_results_.push_back( - std::make_shared(num_total_calls++)); + invocation_results_.push_back(std::make_shared()); new_calls.push_back(invocation_results_.back()); num_calls_++; } ",0,train 5c545e646ce54934e92ea59a24ff9bcfe52991df,tensorflow/tensorflow,"[tf.data] Minor changes to xprof metadata. PiperOrigin-RevId: 353164453 Change-Id: If38813e6d7cd8ab843859d0d3b0bc7294a2e3a95",prefetch_dataset_op.cc,"@@ -338,12 +338,14 @@ class PrefetchDatasetOp::Dataset : public DatasetBase { // A buffer element comprises a status and (if that status is // OK) a vector of tensors, representing an element of the input dataset. struct BufferElement { + BufferElement() : uid(tensorflow::EnvTime::NowNanos()) {} + // The producer sets `status` if getting the input element fails. Status status; // The buffered data element. std::vector value; int64 created_us; - int64 id; + const uint64 uid; }; int64 buffer_limit() const TF_EXCLUSIVE_LOCKS_REQUIRED(*mu_) { @@ -380,7 +382,7 @@ class PrefetchDatasetOp::Dataset : public DatasetBase { // (if we successfully got an element) the output values. Status s = buffer_.front().status; if (s.ok()) { - int64 buffer_element_id = buffer_.front().id; + int64 buffer_element_id = buffer_.front().uid; profiler::TraceMe traceme( [&] { return profiler::TraceMeEncode( @@ -479,8 +481,8 @@ class PrefetchDatasetOp::Dataset : public DatasetBase { { profiler::TraceMe traceme( [&] { - return profiler::TraceMeEncode(""PrefetchProduce"", - {{""element_id"", num_produced}}); + return profiler::TraceMeEncode( + ""PrefetchProduce"", {{""element_id"", buffer_element.uid}}); }, profiler::kInfo); buffer_element.status = input_impl_->GetNext( @@ -498,7 +500,6 @@ class PrefetchDatasetOp::Dataset : public DatasetBase { mutex_lock l(*mu_); RecordBufferEnqueue(ctx.get(), buffer_element.value); buffer_element.created_us = EnvTime::NowMicros(); - buffer_element.id = num_produced; buffer_.push_back(std::move(buffer_element)); cond_var_->notify_all(); } ",0,train 9e45772d16bbcb3adb3c5faa298969e183cdc89e,tensorflow/tensorflow,Update metric_ops.py (#16712),metric_ops.py,"@@ -739,7 +739,7 @@ def _streaming_confusion_matrix_at_thresholds(predictions, else: for include in includes: if include not in all_includes: - raise ValueError('Invaild key: %s.' % include) + raise ValueError('Invalid key: %s.' % include) predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( # pylint: disable=protected-access predictions, labels, weights) ",0,train 65e1e1ebbc86a3c4740168445dae4c2075ea2932,tensorflow/tensorflow,"Small changes to API generation to help creation of virtual pip. - Underscore some imports like _print_function so we don't have to delete them. Deleting them doesn't work well since they get added to our __all__ list before being deleted which makes ""import *"" fail. - Give a more unique name to some generated target in the api genrule. Doing this since I want multiple genrules that generate the same version of the API. (a compat_v1 and root_v1 for example). PiperOrigin-RevId: 221524470",create_python_api.py,"@@ -45,10 +45,10 @@ _GENERATED_FILE_HEADER = """"""# This file is MACHINE GENERATED! Do not edit. \""\""\""%s \""\""\"" -from __future__ import print_function +from __future__ import print_function as _print_function """""" -_GENERATED_FILE_FOOTER = '\n\ndel print_function\n' +_GENERATED_FILE_FOOTER = '\n\ndel _print_function\n' class SymbolExposedTwiceError(Exception): ",0,train 6c6f5f144c8a780edbc9cc44d957b3cda363ee86,tensorflow/tensorflow,"Fix index out of bounds bug in GetNameFromURI() Change: 133402965",file_system.cc,"@@ -76,8 +76,8 @@ string GetNameFromURI(const string& name) { // If the URI confirmed to scheme://filename, skip the two '/'s and return // filename. Otherwise return the original 'name', and leave it up to the // implementations to handle the full URI. - if (filename[0] == '/' && filename[1] == '/') { - return filename.substr(2).ToString(); + if (filename.Consume(""//"")) { + return filename.ToString(); } return name; } ",0,train 6c6f5f144c8a780edbc9cc44d957b3cda363ee86,tensorflow/tensorflow,"Fix index out of bounds bug in GetNameFromURI() Change: 133402965",file_system_test.cc,"@@ -0,0 +1,31 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/core/platform/file_system.h"" + +#include ""tensorflow/core/platform/test.h"" + +namespace tensorflow { +namespace { + +TEST(FileSystemTest, GetNameFromURI) { + EXPECT_EQ(""foo"", GetNameFromURI(""file://foo"")); + EXPECT_EQ(""file:/"", GetNameFromURI(""file:/"")); + EXPECT_EQ(""file:"", GetNameFromURI(""file:"")); + EXPECT_EQ(""bar"", GetNameFromURI(""bar"")); +} + +} // namespace +} // namespace tensorflow ",0,train 43c7131efcbf8aca957c293498bfd1a300ee4c76,tensorflow/tensorflow,"Removed PyArray_Return from TF_TensorToPyArray and ConvertTensorToNdarray This makes the array->scalar conversion explicit at conversion site. PiperOrigin-RevId: 249792188",tf_session_helper.cc,"@@ -147,7 +147,8 @@ void TF_Run_wrapper_helper(TF_DeprecatedSession* session, const char* handle, Set_TF_Status_from_Status(out_status, s); return; } - py_outputs_safe.emplace_back(make_safe(py_array)); + py_outputs_safe.emplace_back( + make_safe(PyArray_Return(reinterpret_cast(py_array)))); } // 6. If we reach this point, we have successfully built a list of objects @@ -274,7 +275,8 @@ void RunCallableHelper(tensorflow::Session* session, int64_t handle, Set_TF_Status_from_Status(out_status, s); return; } - py_outputs_safe.push_back(make_safe(py_array)); + py_outputs_safe.push_back( + make_safe(PyArray_Return(reinterpret_cast(py_array)))); } // If we reach this point, we have successfully built a list of objects @@ -423,7 +425,8 @@ void TF_SessionRun_wrapper_helper(TF_Session* session, const char* handle, Set_TF_Status_from_Status(out_status, s); return; } - py_outputs_safe.emplace_back(make_safe(py_array)); + py_outputs_safe.emplace_back( + make_safe(PyArray_Return(reinterpret_cast(py_array)))); } // If we reach this point, we have successfully built a list of objects so we @@ -672,7 +675,7 @@ PyObject* TF_TryEvaluateConstant_wrapper(TF_Graph* graph, TF_Output output, Status s = TF_TensorToPyArray(std::move(safe_result_tensor), &out); Set_TF_Status_from_Status(status, s); if (!s.ok()) Py_RETURN_NONE; - return out; + return PyArray_Return(reinterpret_cast(out)); } } // namespace tensorflow ",0,train 43c7131efcbf8aca957c293498bfd1a300ee4c76,tensorflow/tensorflow,"Removed PyArray_Return from TF_TensorToPyArray and ConvertTensorToNdarray This makes the array->scalar conversion explicit at conversion site. PiperOrigin-RevId: 249792188",pywrap_tensor.cc,"@@ -653,7 +653,7 @@ static PyObject* EagerTensor_numpy(EagerTensor* self) { dims.size(), dims.data(), data, t->dtype(), [copy] { delete copy; }, &ret) .ok()) { - return ret; + return PyArray_Return(reinterpret_cast(ret)); } } @@ -662,7 +662,7 @@ static PyObject* EagerTensor_numpy(EagerTensor* self) { Py_XDECREF(ret); return nullptr; } else { - return ret; + return PyArray_Return(reinterpret_cast(ret)); } } ",0,train 43c7131efcbf8aca957c293498bfd1a300ee4c76,tensorflow/tensorflow,"Removed PyArray_Return from TF_TensorToPyArray and ConvertTensorToNdarray This makes the array->scalar conversion explicit at conversion site. PiperOrigin-RevId: 249792188",ndarray_tensor.cc,"@@ -407,9 +407,7 @@ Status TF_TensorToPyArray(Safe_TF_TensorPtr tensor, PyObject** out_ndarray) { PyArray_NBYTES(py_array)); } - // PyArray_Return turns rank 0 arrays into numpy scalars - *out_ndarray = PyArray_Return( - reinterpret_cast(safe_out_array.release())); + *out_ndarray = safe_out_array.release(); return Status::OK(); } ",0,train 43c7131efcbf8aca957c293498bfd1a300ee4c76,tensorflow/tensorflow,"Removed PyArray_Return from TF_TensorToPyArray and ConvertTensorToNdarray This makes the array->scalar conversion explicit at conversion site. PiperOrigin-RevId: 249792188",ndarray_tensor_bridge.cc,"@@ -218,7 +218,7 @@ Status ArrayFromMemory(int dim_size, npy_intp* dims, void* data, DataType dtype, Py_DECREF(releaser); return errors::Unknown(""Python array refused to use memory.""); } - *result = PyArray_Return(np_array); + *result = reinterpret_cast(np_array); return Status::OK(); } ",0,train 43c7131efcbf8aca957c293498bfd1a300ee4c76,tensorflow/tensorflow,"Removed PyArray_Return from TF_TensorToPyArray and ConvertTensorToNdarray This makes the array->scalar conversion explicit at conversion site. PiperOrigin-RevId: 249792188",py_func.cc,"@@ -92,6 +92,7 @@ Status MakeArgTuple(const PyCall* call, PyObject** tuple) { Py_DECREF(lst); return s; } + arg = PyArray_Return(reinterpret_cast(arg)); } PyList_SetItem(lst, i, arg); } @@ -467,7 +468,7 @@ Status ConvertTensorToNdarray(const Tensor& t, PyObject** ret) { StringPiece p = t.tensor_data(); memcpy(PyArray_DATA(np_array), p.data(), p.size()); } - *ret = PyArray_Return(np_array); + *ret = reinterpret_cast(np_array); return Status::OK(); } ",0,train b8cd771a055ef8a124245f0a8423c47dea19d6c5,tensorflow/tensorflow,"Disable a few failing tests in tensorflow.v2_tfrt and fix tensorflow/python/ops/ragged:ragged_map_fn_op_test. PiperOrigin-RevId: 351449157 Change-Id: Ie6486b9c6e13e2f10143c403350aaca84d1b3441",xla_test.py,"@@ -217,6 +217,8 @@ class XLACompileContextTest(test.TestCase, parameterized.TestCase): class XlaCompileTest(test.TestCase): @test_util.run_v2_only + @test_util.disable_tfrt( + 'Legacy XLA test. It depends on EncapsulateXlaComputationsPass.') def test_xla_compile_eager(self): """"""Tests that xla.compile raises proper exception when used eagerly."""""" @@ -225,6 +227,8 @@ class XlaCompileTest(test.TestCase): self.assertEqual(self.evaluate(xla.compile(computation, [1, 2])[0]), 3) + @test_util.disable_tfrt( + 'Legacy XLA test. It depends on EncapsulateXlaComputationsPass.') def test_xla_compile_in_function(self): """"""Tests that xla.compile works in tf.function."""""" @@ -238,6 +242,8 @@ class XlaCompileTest(test.TestCase): self.assertEqual(self.evaluate(func_wrapper(1))[0], 2) + @test_util.disable_tfrt( + 'Legacy XLA test. It depends on EncapsulateXlaComputationsPass.') def test_xla_compile_write_variable_in_function(self): """"""Tests that xla.compile works with variable in tf.function."""""" a = variable_scope.get_variable( ",0,train b8cd771a055ef8a124245f0a8423c47dea19d6c5,tensorflow/tensorflow,"Disable a few failing tests in tensorflow.v2_tfrt and fix tensorflow/python/ops/ragged:ragged_map_fn_op_test. PiperOrigin-RevId: 351449157 Change-Id: Ie6486b9c6e13e2f10143c403350aaca84d1b3441",replicate_test.py,"@@ -248,16 +248,22 @@ class EagerClusterReplicateTest(test_base.DatasetTestBase, def __init__(self, methodName=""runTest""): # pylint: disable=invalid-name super(EagerClusterReplicateTest, self).__init__(methodName) self._job_name = ""remove_device"" - self._cached_server1 = server_lib.Server.create_local_server() - self._cached_server2 = server_lib.Server.create_local_server() - self._cached_server1_target = self._cached_server1.target[len(""grpc://""):] - self._cached_server2_target = self._cached_server2.target[len(""grpc://""):] self._device0 = ""/job:%s/replica:0/task:0/device:CPU:0"" % self._job_name self._device1 = ""/job:%s/replica:0/task:1/device:CPU:0"" % self._job_name self._device2 = ""/job:%s/replica:0/task:2/device:CPU:0"" % self._job_name def setUp(self): super(EagerClusterReplicateTest, self).setUp() + + if context.context().use_tfrt: + self.skipTest(""b/171412104: This test requires distributed support."") + + # TODO(b/171412104): Move create server to __init__ once tfrt support it. + self._cached_server1 = server_lib.Server.create_local_server() + self._cached_server2 = server_lib.Server.create_local_server() + self._cached_server1_target = self._cached_server1.target[len(""grpc://""):] + self._cached_server2_target = self._cached_server2.target[len(""grpc://""):] + # Start the local server. local_port = pywrap_tfe.TF_PickUnusedPortOrDie() context.set_server_def( ",0,train b8cd771a055ef8a124245f0a8423c47dea19d6c5,tensorflow/tensorflow,"Disable a few failing tests in tensorflow.v2_tfrt and fix tensorflow/python/ops/ragged:ragged_map_fn_op_test. PiperOrigin-RevId: 351449157 Change-Id: Ie6486b9c6e13e2f10143c403350aaca84d1b3441",snapshot_test.py,"@@ -30,6 +30,7 @@ from tensorflow.python.data.experimental.ops import snapshot from tensorflow.python.data.kernel_tests import test_base from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import readers as core_readers +from tensorflow.python.eager import context from tensorflow.python.framework import combinations from tensorflow.python.framework import errors from tensorflow.python.ops import gen_array_ops @@ -371,6 +372,8 @@ class SnapshotDatasetTest(reader_dataset_ops_test_base.TFRecordDatasetTestBase, @combinations.generate(test_base.default_test_combinations()) def testReadOptimizableUsingFlatMap(self): + if context.context().use_tfrt: + self.skipTest(""b/177260096: Flaky test."") dataset = dataset_ops.Dataset.range(100) # Will be optimized into ShuffleAndRepeat. dataset = dataset.shuffle(10) ",0,train b8cd771a055ef8a124245f0a8423c47dea19d6c5,tensorflow/tensorflow,"Disable a few failing tests in tensorflow.v2_tfrt and fix tensorflow/python/ops/ragged:ragged_map_fn_op_test. PiperOrigin-RevId: 351449157 Change-Id: Ie6486b9c6e13e2f10143c403350aaca84d1b3441",check_numerics_callback_test.py,"@@ -250,14 +250,12 @@ class CheckNumericsCallbackUnhealthyTest(test_util.TensorFlowTestCase): # Check that the correct line for op creation is printed. self.assertTrue(re.search(r""Stack trace of op's creation"", message)) self.assertIn(""return math_ops.log(-x)"", message) - if context.executing_eagerly(): - # The code path for raising error is slightly different under graph mode. - self.assertTrue(message.endswith(""\n"")) @test_util.run_in_graph_and_eager_modes @test_util.disable_xla( ""There is a small inconsistency in the step at which overflow happens: "" ""128 (without XLA) and 127 (with XLA)."") + @test_util.disable_tfrt(""b/177261532: TFRT cannot detect overflow yet."") def testOverflowInTfFunction(self): """"""Test catching Infinity caused by overflow in a tf.function with while."""""" check_numerics_callback.enable_check_numerics() ",0,train b8cd771a055ef8a124245f0a8423c47dea19d6c5,tensorflow/tensorflow,"Disable a few failing tests in tensorflow.v2_tfrt and fix tensorflow/python/ops/ragged:ragged_map_fn_op_test. PiperOrigin-RevId: 351449157 Change-Id: Ie6486b9c6e13e2f10143c403350aaca84d1b3441",cond_v2_test.py,"@@ -1461,6 +1461,7 @@ class CondV2ContainerTest(test.TestCase): self.assertEqual(compat.as_bytes(""""), container(q5.queue_ref)) +@test_util.disable_tfrt(""b/171412104: This test requires distributed support."") class CondV2ColocationGroupAndDeviceTest(test.TestCase, parameterized.TestCase): def setUp(self): ",0,train b8cd771a055ef8a124245f0a8423c47dea19d6c5,tensorflow/tensorflow,"Disable a few failing tests in tensorflow.v2_tfrt and fix tensorflow/python/ops/ragged:ragged_map_fn_op_test. PiperOrigin-RevId: 351449157 Change-Id: Ie6486b9c6e13e2f10143c403350aaca84d1b3441",collective_ops_test.py,"@@ -444,6 +444,8 @@ class CollectiveOpTest(test.TestCase): self.assertAllClose(results_[1], expected_output_, rtol=1e-5, atol=1e-5) @test_util.run_v2_only + @test_util.disable_tfrt( + 'b/177270918: TFRT has dead lock when executing collective ops.') def testCollectiveGroupSizeMismatch(self): cpus = config.list_physical_devices('CPU') self.assertEqual(len(cpus), 1) ",0,train 281d056c9e29727c93a20e2170fd233aab076147,tensorflow/tensorflow,"Automated rollback of change 150082087 Change: 150140770",op_kernel.cc,"@@ -1098,7 +1098,8 @@ Status ValidateKernelRegistrations(const OpRegistryInterface& op_registry) { const OpRegistrationData* op_reg_data; const Status status = op_registry.LookUp(kernel_def.op(), &op_reg_data); if (!status.ok()) { - LOG(FATAL) << ""OpKernel ('"" << ProtoShortDebugString(kernel_def) + // TODO(josh11b): Make this a hard error. + LOG(ERROR) << ""OpKernel ('"" << ProtoShortDebugString(kernel_def) << ""') for unknown op: "" << kernel_def.op(); continue; } ",0,train b7552cff4e9bb4f9d0b5a9f80c8a607e8db82901,tensorflow/tensorflow,Update prefetching_ops.py,prefetching_ops.py,"@@ -76,7 +76,6 @@ def copy_to_device(target_device, source_device=""/cpu:0""): return _CopyToDeviceDataset( dataset, target_device=target_device, source_device=source_device).with_options(options) - source_device=source_device) return _apply_fn ",0,train a7e6b483d3b14be2f2cb419693d16d0639be4822,tensorflow/tensorflow,"Use a fallback graphdef based conversion when saved model schema version is zero PiperOrigin-RevId: 321067895 Change-Id: I604657fdbd3c41a1ddc0b7bbfb21b919b3d8a187",lite.py,"@@ -510,6 +510,10 @@ class TFLiteConverterBase(object): if not self._saved_model_exported_names: self._saved_model_exported_names = [] self._saved_model_version = saved_model_proto.saved_model_schema_version + if self._saved_model_version == 0: + self.saved_model_dir = None + logging.warning(""SavedModel schema version is zero."") + return if self._saved_model_version not in [1, 2]: raise ValueError(""SavedModel file format({0}) is not supported"".format( self._saved_model_version)) ",0,train a7e6b483d3b14be2f2cb419693d16d0639be4822,tensorflow/tensorflow,"Use a fallback graphdef based conversion when saved model schema version is zero PiperOrigin-RevId: 321067895 Change-Id: I604657fdbd3c41a1ddc0b7bbfb21b919b3d8a187",lite_v2_test.py,"@@ -36,9 +36,11 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.keras.layers import recurrent from tensorflow.python.keras.layers import recurrent_v2 +from tensorflow.python.lib.io import file_io from tensorflow.python.platform import test from tensorflow.python.saved_model import save_options from tensorflow.python.saved_model import saved_model +from tensorflow.python.saved_model.loader_impl import parse_saved_model from tensorflow.python.saved_model.save import save from tensorflow.python.training.tracking import tracking @@ -548,6 +550,25 @@ class FromSavedModelTest(lite_v2_test_util.ModelTest): self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) self.assertEqual((0., 0.), output_details[0]['quantization']) + @test_util.run_v2_only + def testTF1HubFormattedModel(self): + """"""Test a TF1 hub formatted model."""""" + saved_model_dir = self._createV1SavedModel(shape=[1, 16, 16, 3]) + + # TF1 hub model is based on V1 saved model and they omit the saved model + # schema version setting. + saved_model_proto = parse_saved_model(saved_model_dir) + saved_model_proto.saved_model_schema_version = 0 + + saved_model_pb_file_path = os.path.join(saved_model_dir, 'saved_model.pb') + with file_io.FileIO(saved_model_pb_file_path, 'wb') as writer: + writer.write(saved_model_proto.SerializeToString()) + + # Convert model and ensure model is not None. + converter = lite.TFLiteConverterV2.from_saved_model(saved_model_dir) + tflite_model = converter.convert() + self.assertTrue(tflite_model) + @test_util.run_v2_only def testConstModel(self): """"""Test a basic model with functions to make sure functions are inlined."""""" ",0,train 4bab47f1d12cfdaee10d0e8ad087973a5a1c2560,tensorflow/tensorflow,"Update Materialize Broadcasts for same-rank broadcasts PiperOrigin-RevId: 311001875 Change-Id: Ib5743ffa5d3605c9a58def1952ad8bd0eed24682",materialize_broadcasts.cc,"@@ -50,12 +50,6 @@ static DenseIntElementsAttr GetI64ElementsAttrForSeq(int start, int end, template bool CreateStaticBroadcastsForBinaryOp(SrcOp op, PatternRewriter *rewriter, Value *out_lhs, Value *out_rhs) { - if (!op.broadcast_dimensions().hasValue()) { - // Note: the op may still have an implicit broadcast on it, such as - // for (tensor<1xf32>, tensor<4xf32>). - return false; - } - // Insert BroadcastInDimOps for the left-hand-side and right-hand-side args, // replacing the original LHS and RHS args in the source op with the results // of the broadcasts. @@ -79,25 +73,7 @@ bool CreateStaticBroadcastsForBinaryOp(SrcOp op, PatternRewriter *rewriter, auto lhs_rank = lhs_ranked_type.getRank(); auto rhs_rank = rhs_ranked_type.getRank(); - - // Set broadcast_dimensions to [0, ..., rank] for the higher rank arg. - // Use the original op.broadcast_dimensions for the lower rank arg. - auto higher_rank_broadcast_dims = - GetI64ElementsAttrForSeq(0, std::max(lhs_rank, rhs_rank), rewriter); - DenseIntElementsAttr lhs_broadcast_dims; - DenseIntElementsAttr rhs_broadcast_dims; - if (lhs_rank > rhs_rank) { - lhs_broadcast_dims = higher_rank_broadcast_dims; - rhs_broadcast_dims = op.broadcast_dimensions().getValue(); - } else if (lhs_rank < rhs_rank) { - lhs_broadcast_dims = op.broadcast_dimensions().getValue(); - rhs_broadcast_dims = higher_rank_broadcast_dims; - } else { - // This shouldn't happen for legal ops. If the broadcast_dimensions - // attribute is set, the ranks should be different. - // TODO(scotttodd): Add a custom verification for ops and assert here. - return false; - } + ArrayRef op_shape = op_ranked_type.getShape(); // BroadcastInDimOp must have the same element type for operands and results, // so preserve the original output shape and the original input element type. @@ -105,16 +81,32 @@ bool CreateStaticBroadcastsForBinaryOp(SrcOp op, PatternRewriter *rewriter, // broadcast_in_dim (tensor<1x4xf32>) -> tensor<1x4xf32> // broadcast_in_dim (tensor<4xf32>) -> tensor<1x4xf32> // SrcOp (tensor<1x4xf32>, tensor<1x4xf32>) -> tensor<1x4xi1> - ArrayRef op_shape = op_ranked_type.getShape(); - auto lhs_type = - RankedTensorType::get(op_shape, lhs_ranked_type.getElementType()); - auto rhs_type = - RankedTensorType::get(op_shape, rhs_ranked_type.getElementType()); + if (lhs_ranked_type.getShape() != op_ranked_type.getShape()) { + auto type = + RankedTensorType::get(op_shape, lhs_ranked_type.getElementType()); + DenseIntElementsAttr attr = GetI64ElementsAttrForSeq(0, lhs_rank, rewriter); + if (lhs_rank < rhs_rank) { + attr = op.broadcast_dimensions().getValue(); + } + + lhs = + rewriter->createOrFold(op.getLoc(), type, lhs, attr); + } + + if (rhs_ranked_type.getShape() != op_ranked_type.getShape()) { + auto type = + RankedTensorType::get(op_shape, rhs_ranked_type.getElementType()); + DenseIntElementsAttr attr = GetI64ElementsAttrForSeq(0, rhs_rank, rewriter); + if (rhs_rank < lhs_rank) { + attr = op.broadcast_dimensions().getValue(); + } + + rhs = + rewriter->createOrFold(op.getLoc(), type, rhs, attr); + } - *out_lhs = rewriter->createOrFold(op.getLoc(), lhs_type, - lhs, lhs_broadcast_dims); - *out_rhs = rewriter->createOrFold(op.getLoc(), rhs_type, - rhs, rhs_broadcast_dims); + *out_lhs = lhs; + *out_rhs = rhs; return true; } @@ -359,9 +351,15 @@ struct CompareWithBroadcastConvert : public OpRewritePattern { void SetupMaterializeBroadcastsLegality(MLIRContext *context, ConversionTarget *conversionTarget) { -#define ADD_DYNAMICALLY_LEGAL_OP_WITH_BROADCAST(OpType) \ - conversionTarget->addDynamicallyLegalOp( \ - [](OpType op) { return !op.broadcast_dimensions().hasValue(); }); +#define ADD_DYNAMICALLY_LEGAL_OP_WITH_BROADCAST(OpType) \ + conversionTarget->addDynamicallyLegalOp([](OpType op) { \ + if (op.broadcast_dimensions().hasValue()) return false; \ + auto l = op.lhs().getType().cast(); \ + auto r = op.rhs().getType().cast(); \ + if (!l.hasRank() || !r.hasRank()) return false; \ + return l.getShape() == r.getShape(); \ + }); + // Binary elementwise ops. ADD_DYNAMICALLY_LEGAL_OP_WITH_BROADCAST(AddOp); ADD_DYNAMICALLY_LEGAL_OP_WITH_BROADCAST(Atan2Op); ",0,test 716b7713e4c8b2d8f093f639ca41816cf4e1c696,tensorflow/tensorflow,Fix tensorboard import path,setup.py,"@@ -43,7 +43,7 @@ else: # pylint: disable=line-too-long CONSOLE_SCRIPTS = [ - 'tensorboard = tensorflow.tensorboard.backend.tensorboard:main', + 'tensorboard = tensorflow.tensorboard.tensorboard:main', ] # pylint: enable=line-too-long ",0,test a559acfb25886aa62077765a7c3739a50ca94b83,tensorflow/tensorflow,"Disable buggy ""small"" CUDA kernel for DepthwiseConv2dBackpropInput. This fixes an issue where DepthwiseConv2dBackpropInput had incorrect outputs in some cases. PiperOrigin-RevId: 300692210 Change-Id: Ib830d64df5c6dbfa5a04354db1031603b6c58bdc",depthwise_conv_op_gpu.h,"@@ -987,7 +987,9 @@ Status LaunchDepthwiseConv2dBackpropInputGPU(OpKernelContext* ctx, const T* filter, T* in_backprop, TensorFormat data_format) { if (args.depth_multiplier == 1) { - if (CanLaunchDepthwiseConv2dGPUSmall(args)) { + // This kernel doesn't currently work in all cases so it is disabled. + // TODO(b/150988950): Fix and reenable this kernel. + if (/* CanLaunchDepthwiseConv2dGPUSmall(args) */ false) { return LaunchDepthwiseConv2dGPUSmall< T, DIRECTION_BACKWARD, kKnownFilterWidth, kKnownFilterHeight>( ctx, args, out_backprop, filter, in_backprop, data_format); ",0,train a559acfb25886aa62077765a7c3739a50ca94b83,tensorflow/tensorflow,"Disable buggy ""small"" CUDA kernel for DepthwiseConv2dBackpropInput. This fixes an issue where DepthwiseConv2dBackpropInput had incorrect outputs in some cases. PiperOrigin-RevId: 300692210 Change-Id: Ib830d64df5c6dbfa5a04354db1031603b6c58bdc",depthwise_conv_op_test.py,"@@ -186,6 +186,11 @@ def CheckGradConfigsToTest(): Config([1, 15, 15, 2], [1, 3, 2, 1], [1, 15, 15, 2]), Config([2, 15, 16, 1], [3, 3, 1, 2], [2, 5, 5, 2], 3, padding=""VALID""), Config([2, 5, 8, 1], [4, 3, 1, 2], [2, 5, 8, 2], dilations=[1, 2]), + # These cases test the kernels in depthwise_conv_op_gpu.h which are used + # if the input size is small. + Config([1, 3, 1, 2], [2, 1, 2, 1], [1, 3, 1, 2]), + Config([2, 2, 3, 2], [2, 1, 2, 1], [2, 2, 3, 2]), + Config([2, 2, 3, 1], [2, 2, 1, 1], [2, 2, 3, 1]), ] ",0,train 61368b23ac560d158a27e679ac570a9b7ae94e0a,tensorflow/tensorflow,"[XLA] Use IsInf function in implementation of lgamma. PiperOrigin-RevId: 235275004",math.cc,"@@ -402,9 +402,7 @@ XlaOp Lgamma(XlaOp input) { // lgamma(+/-inf) = +inf. XlaOp inf_bcast = FullLike(input, std::numeric_limits::infinity()); - return Select(Or(IsFinite(input), // is finite, or - Not(Or(Lt(input, one), Ge(input, one)))), // is nan - result, inf_bcast); + return Select(IsInf(input), inf_bcast, result); }; auto& b = *input.builder(); ",0,train 93d4af9c859cf82e10bb443bf8fc1c4df6a293f9,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2022-04-10 PiperOrigin-RevId: 440674055",compat.py,"@@ -29,7 +29,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2022, 4, 9) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2022, 4, 10) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train 1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type. This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops. PiperOrigin-RevId: 212652588",quantile_ops.cc,"@@ -296,8 +296,9 @@ class QuantileAccumulatorAddSummariesOp : public OpKernel { int64 start, int64 end) { for (int resource_handle_idx = start; resource_handle_idx < end; ++resource_handle_idx) { - ResourceHandle handle = resource_handle_list[resource_handle_idx] - .flat()(0); + const ResourceHandle& handle = + resource_handle_list[resource_handle_idx] + .flat()(0); QuantileStreamResource* streams_resource; // Create a reference to the underlying resource using the handle. OP_REQUIRES_OK(context, @@ -709,8 +710,9 @@ class QuantileAccumulatorGetBucketsOp : public OpKernel { &buckets_list, stamp_token](int64 start, int64 end) { for (int resource_handle_idx = start; resource_handle_idx < end; ++resource_handle_idx) { - ResourceHandle handle = resource_handle_list[resource_handle_idx] - .flat()(0); + const ResourceHandle& handle = + resource_handle_list[resource_handle_idx] + .flat()(0); QuantileStreamResource* streams_resource; OP_REQUIRES_OK(context, LookupResource(context, handle, &streams_resource)); ",0,test 1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type. This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops. PiperOrigin-RevId: 212652588",stats_accumulator_ops.cc,"@@ -448,8 +448,9 @@ class StatsAccumulatorScalarAddOp : public OpKernel { stamp_token](int64 start, int64 end) { for (int resource_handle_idx = start; resource_handle_idx < end; ++resource_handle_idx) { - ResourceHandle handle = resource_handle_list[resource_handle_idx] - .flat()(0); + const ResourceHandle& handle = + resource_handle_list[resource_handle_idx] + .flat()(0); StatsAccumulatorScalarResource* accumulator_resource; OP_REQUIRES_OK(context, LookupResource(context, handle, @@ -512,8 +513,9 @@ class StatsAccumulatorTensorAddOp : public OpKernel { stamp_token](int64 start, int64 end) { for (int resource_handle_idx = start; resource_handle_idx < end; ++resource_handle_idx) { - ResourceHandle handle = resource_handle_list[resource_handle_idx] - .flat()(0); + const ResourceHandle& handle = + resource_handle_list[resource_handle_idx] + .flat()(0); StatsAccumulatorTensorResource* accumulator_resource; OP_REQUIRES_OK(context, LookupResource(context, handle, ",0,test 1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type. This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops. PiperOrigin-RevId: 212652588",direct_session_test.cc,"@@ -1255,7 +1255,7 @@ TEST(DirectSessionTest, RunHandleTest) { ASSERT_TRUE(s.ok()); ASSERT_EQ(1, outputs.size()); - ResourceHandle resource_handle = outputs[0].scalar()(); + const ResourceHandle& resource_handle = outputs[0].scalar()(); Tensor string_handle(DT_STRING, {}); string_handle.flat().setConstant(resource_handle.name()); @@ -1308,7 +1308,7 @@ TEST(DirectSessionTest, RunHandleTest_Callable) { ASSERT_TRUE(s.ok()); ASSERT_EQ(1, outputs.size()); - ResourceHandle resource_handle = outputs[0].scalar()(); + const ResourceHandle& resource_handle = outputs[0].scalar()(); Tensor string_handle(DT_STRING, {}); string_handle.flat().setConstant(resource_handle.name()); ",0,test 1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type. This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops. PiperOrigin-RevId: 212652588",resource_mgr.cc,"@@ -271,7 +271,7 @@ string ContainerInfo::DebugString() const { ""]""); } -ResourceHandle HandleFromInput(OpKernelContext* ctx, int input) { +const ResourceHandle& HandleFromInput(OpKernelContext* ctx, int input) { return ctx->input(input).flat()(0); } ",0,test 1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type. This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops. PiperOrigin-RevId: 212652588",resource_mgr.h,"@@ -79,7 +79,7 @@ class ResourceBase : public core::RefCounted { virtual string DebugString() = 0; // Returns memory used by this resource. - virtual int64 MemoryUsed() const { return 0; }; + virtual int64 MemoryUsed() const { return 0; } }; // Container used for per-step resources. @@ -234,7 +234,7 @@ ResourceHandle MakePerStepResourceHandle(OpKernelContext* ctx, const string& name); // Returns a resource handle from a numbered op input. -ResourceHandle HandleFromInput(OpKernelContext* ctx, int input); +const ResourceHandle& HandleFromInput(OpKernelContext* ctx, int input); Status HandleFromInput(OpKernelContext* ctx, StringPiece input, ResourceHandle* handle); ",0,test 1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type. This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops. PiperOrigin-RevId: 212652588",partitioned_function_ops.cc,"@@ -210,7 +210,7 @@ class PartitionedCallOp : public AsyncOpKernel { TF_RETURN_IF_ERROR(node->attrs().Find(""T"", &attr_value)); DataType dtype = attr_value->type(); if (dtype == DT_RESOURCE) { - ResourceHandle handle = args[index].flat()(0); + const ResourceHandle& handle = args[index].flat()(0); node->set_assigned_device_name(handle.device()); } } ",0,test 1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type. This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops. PiperOrigin-RevId: 212652588",queue_ops.cc,"@@ -65,7 +65,7 @@ class FakeQueueOp : public OpKernel { } void Compute(OpKernelContext* context) override { - ResourceHandle ref = context->input(0).flat()(0); + const ResourceHandle& ref = context->input(0).flat()(0); handle_.AccessTensor(context)->flat()(0) = ref.container(); handle_.AccessTensor(context)->flat()(1) = ref.name(); context->set_output_ref(0, &mu_, handle_.AccessTensor(context)); ",0,test 1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type. This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops. PiperOrigin-RevId: 212652588",resource_variable_ops.cc,"@@ -79,7 +79,7 @@ ReadVariableOp::ReadVariableOp(OpKernelConstruction* c) : OpKernel(c) { void ReadVariableOp::Compute(OpKernelContext* ctx) { Var* variable = nullptr; - ResourceHandle handle = HandleFromInput(ctx, 0); + const ResourceHandle& handle = HandleFromInput(ctx, 0); const auto status = LookupResource(ctx, handle, &variable); OP_REQUIRES(ctx, status.ok(), errors::FailedPrecondition( ",0,test 1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type. This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops. PiperOrigin-RevId: 212652588",stack_ops.cc,"@@ -131,10 +131,8 @@ class Stack : public ResourceBase { }; Status GetStack(OpKernelContext* ctx, Stack** stack) { - string key; if (ctx->input_dtype(0) == DT_RESOURCE) { - auto resource = ctx->input(0).flat()(0); - key = resource.name(); + return LookupResource(ctx, HandleFromInput(ctx, 0), stack); } else { Tensor Tstack_handle = ctx->mutable_input(0, false); if (Tstack_handle.NumElements() != 2) { @@ -144,18 +142,18 @@ Status GetStack(OpKernelContext* ctx, Stack** stack) { } const string& container = Tstack_handle.flat()(0); const string& stack_name = Tstack_handle.flat()(1); - key = strings::StrCat(container, stack_name); - } - ResourceMgr* rm = ctx->resource_manager(); - if (rm == nullptr) { - return errors::Internal(""No resource manager.""); - } - auto* step_container = ctx->step_container(); - if (step_container == nullptr) { - return errors::Internal(""No step container.""); + string key = strings::StrCat(container, stack_name); + ResourceMgr* rm = ctx->resource_manager(); + if (rm == nullptr) { + return errors::Internal(""No resource manager.""); + } + auto* step_container = ctx->step_container(); + if (step_container == nullptr) { + return errors::Internal(""No step container.""); + } + TF_RETURN_IF_ERROR(rm->Lookup(step_container->name(), key, stack)); + return Status::OK(); } - TF_RETURN_IF_ERROR(rm->Lookup(step_container->name(), key, stack)); - return Status::OK(); } std::atomic Stack::stack_counter{0}; ",0,test 1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type. This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops. PiperOrigin-RevId: 212652588",tensor_array_ops.cc,"@@ -290,7 +290,7 @@ class TensorArrayGradOp : public TensorArrayCreationOp { } } else { container = ""_tensor_arrays""; - auto resource = ctx->input(0).flat()(0); + const auto& resource = ctx->input(0).flat()(0); if (StringPiece(resource.name()).substr(0, container.size()) != container) { return errors::InvalidArgument(""Wrong input container. "", ",0,test 2e6f8b3f05fe2d212c19b9598f93f4e6ee07675f,tensorflow/tensorflow,"Provide a hint about the number of iterations to while_loop in the case of for loops over tensors of known size. This allows using this type of for loops on TPU. PiperOrigin-RevId: 192166460",control_flow.py,"@@ -83,7 +83,8 @@ def _known_len_for_loop(iterated, extra_cond, loop_body, init_state): while_cond, while_body, init_state=(0,) + init_state, - extra_deps=(iterated,)) + extra_deps=(iterated,), + opts=dict(maximum_iterations=n)) # Dropping the iteration index because it's not syntactically visible. results = results[1:] @@ -136,7 +137,7 @@ def _dataset_for_loop(ds, extra_cond, loop_body, init_state): return results -def while_loop(loop_cond, loop_body, init_state, extra_deps): +def while_loop(loop_cond, loop_body, init_state, extra_deps, opts=None): """"""Functional form of a while statement. The loop operates on a so-called state, which includes all symbols that are @@ -153,6 +154,7 @@ def while_loop(loop_cond, loop_body, init_state, extra_deps): extra_deps: Tuple containing additional entities on which the loop may depend, such as loop invariants referenced by loop_cond. Used exclusively for dispatch control. + opts: Optional dict of extra loop parameters. Returns: Tuple containing the final state. @@ -161,18 +163,21 @@ def while_loop(loop_cond, loop_body, init_state, extra_deps): # That could be somethins as simple as a collection of dispatch rules, with # some prioritization. if any(tensor_util.is_tensor(v) for v in init_state + extra_deps): - return _tf_while_loop(loop_cond, loop_body, init_state) + return _tf_while_loop(loop_cond, loop_body, init_state, opts) else: - return _py_while_loop(loop_cond, loop_body, init_state) + return _py_while_loop(loop_cond, loop_body, init_state, opts) -def _tf_while_loop(loop_cond, loop_body, init_state): +def _tf_while_loop(loop_cond, loop_body, init_state, opts): """"""Overload of while_loop that stages a TF while_loop."""""" - return control_flow_ops.while_loop(loop_cond, loop_body, init_state) + if opts is None: + opts = {} + return control_flow_ops.while_loop(loop_cond, loop_body, init_state, **opts) -def _py_while_loop(loop_cond, loop_body, init_state): +def _py_while_loop(loop_cond, loop_body, init_state, opts): """"""Overload of while_loop that executes a Python while loop."""""" + del opts state = init_state while loop_cond(*state): state = loop_body(*state) ",0,test 3c922d7df747ce3c25a0ad75a41f23c7e8d1df1e,tensorflow/tensorflow,"Print out bounded shape in HumanStringWithLayout PiperOrigin-RevId: 229282536",shape_util.cc,"@@ -530,7 +530,8 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( string result = StrCat( primitive_util::LowercasePrimitiveTypeName(shape.element_type()), ""[""); for (int i = 0; i < shape.dimensions().size(); i++) { - StrAppend(&result, (i > 0) ? "","" : """", shape.dimensions(i)); + StrAppend(&result, (i > 0) ? "","" : """", + shape.is_dynamic_dimension(i) ? ""<="" : """", shape.dimensions(i)); } result += ""]""; if (!IsScalar(shape) && shape.IsArray()) { ",0,train a0bbeb10e2dada2a44caed9fd0bc1cd85e4ff93f,tensorflow/tensorflow,"Unreachable input gradients (#13071) * Check if inputs are reachable from outputs in AddSymbolicGradients. * Removing LOG. * Edit following the PR comments. * Lines > 80 chars. * Formatting comments in gradients_test. * Eliminate m2 and renamed m1->m, dm1->diff_m * Edit InvalidArgument string concatenation.",gradients.cc,"@@ -175,8 +175,14 @@ Status SymbolicGradientBuilder::Initialize() { ""Must specify a gradient input for each output.""); } std::vector reachable_nodes = GetReachableNodes(); - // TODO(theflofly) Check that inputs_ are reachable from - // outputs_ using reachable_nodes + for (const Output& input : inputs_) { + if (!reachable_nodes[input.node()->id()]) { + return errors::InvalidArgument( + ""Cannot compute the partial derivative for node '"", + input.node()->name(), + ""' as it's unreachable from the output node(s).""); + } + } grad_outputs_->clear(); grad_outputs_->resize(inputs_.size()); // Populate `output_nodes_` from node ids in `outputs_`. ",0,train a0bbeb10e2dada2a44caed9fd0bc1cd85e4ff93f,tensorflow/tensorflow,"Unreachable input gradients (#13071) * Check if inputs are reachable from outputs in AddSymbolicGradients. * Removing LOG. * Edit following the PR comments. * Lines > 80 chars. * Formatting comments in gradients_test. * Eliminate m2 and renamed m1->m, dm1->diff_m * Edit InvalidArgument string concatenation.",gradients_test.cc,"@@ -48,9 +48,9 @@ class GradientsTest : public ::testing::Test { Scope scope_test_; }; -// EX. +// Example: // ^ ^ -// dy| dx| // MatMul Gradient Graph +// dy| dx| (MatMul Gradient Graph) // | | // MatMul_1 MatMul_2 // ^ ^ ^ ^ @@ -61,7 +61,7 @@ class GradientsTest : public ::testing::Test { // | Const_3 | // | | // | ^ | -// | z| | // MatMul Forward Graph +// | z| | (MatMul Forward Graph) // | | | // | MatMul_0 | // | / \ | @@ -373,24 +373,22 @@ TEST_F(GradientsTest, UnreachableEdgeGradOneOutput) { auto y_const = Const(scope_test_, {{1.0}, {2.0}, {3.0}}); auto y_assign = Assign(scope_test_, y, y_const); - auto m1 = MatMul(scope_test_, x, y); + auto m = MatMul(scope_test_, x, y); auto z = Variable(scope_test_, {1, 3}, DT_DOUBLE); auto z_const = Const(scope_test_, {{9.0, 10.0, 11.0}}); auto z_assign = Assign(scope_test_, z, z_const); - auto m2 = MatMul(scope_test_, y, z); - - auto dm1 = Const(scope_test_, {{0.5}, {0.5}}); + auto diff_m = Const(scope_test_, {{0.5}, {0.5}}); std::vector grad_outputs; TF_ASSERT_OK( - AddSymbolicGradients(scope_test_, {m1}, {y}, {dm1}, &grad_outputs)); + AddSymbolicGradients(scope_test_, {m}, {y}, {diff_m}, &grad_outputs)); std::vector outputs; test::GetTensors(scope_test_, {x_assign, y_assign, z_assign}, {grad_outputs[0]}, &outputs); - // dz/dy = xT * dm1 + // dz/dy = xT * diff_m test::ExpectTensorNear( outputs[0], test::AsTensor({2.5, 3.5, 4.5}, {3, 1}), 1e-5); } @@ -424,13 +422,36 @@ TEST_F(GradientsTest, UnreachableEdgeGradTwoOutputs) { test::GetTensors(scope_test_, {x_assign, y_assign, z_assign}, {grad_outputs[0]}, &outputs); - // the gradients from m1 and m2 will be summed to compute the gradient - // w.r.t y + // The gradients from m1 and m2 will be summed to compute the gradient + // w.r.t y: // dz/dy = xT * dm1 + dm2 * zT test::ExpectTensorNear( outputs[0], test::AsTensor({17.5, 24.7, 26.8}, {3, 1}), 1e-5); } +TEST_F(GradientsTest, UnreachableInput) { + auto x = Const(scope_test_, {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}); + auto y = Const(scope_test_, {{1.0}, {2.0}, {3.0}}); + auto z = Const(scope_test_.WithOpName(""z""), {{9.0, 10.0, 11.0}}); + + auto m1 = MatMul(scope_test_, x, y); + auto m2 = MatMul(scope_test_, y, z); + auto dm1 = Const(scope_test_, {{0.5}, {0.5}}); + + // From m1, z is unreachable, so an error status should be returned. + // m2 m1 + // | | + // * * + // / \ / \ + // z y x + std::vector grad_outputs; + Status status = AddSymbolicGradients(scope_test_, {m1}, {z}, {dm1}, + &grad_outputs); + EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); + EXPECT_EQ(status.error_message(), ""Cannot compute the partial derivative"" + "" for node 'z' as it's unreachable from the output node(s).""); +} + // StopGradientSingleOutputMultiEdgeTest tests combinations of valid and // 'NoGradient' (induced by StopGradient op) returned along multiple edges from // a single nodes output. ",0,train d41d3f4c27722d2dbdd00227fbddb0713310f313,tensorflow/tensorflow,"[NNAPI] Enable delegation of NNAPI FL6 ops in NNAPI delegate PiperOrigin-RevId: 407951625 Change-Id: I656a621a291aa4fdd887009fad952849c27ecd16",acceleration_test_list.cc,"@@ -95,6 +95,11 @@ ArgMinMaxOpTest/ArgMinMaxOpTest/Get.+ArgOutput64/[46],29 # basic_rnn_test RnnOpTest/BlackBoxTest +# batch_matmul_test +# broadcasting is not supported +-BatchMatMulOpTest/BatchMatMulOpTest/.+Broadcast.+ +BatchMatMulOpTest/BatchMatMulOpTest/.+,1000006 + # batch_to_space_nd_test BatchToSpaceNDOpTest/SimpleConstTest.* BatchToSpaceNDOpTest/BatchOneConstTest.* @@ -282,6 +287,19 @@ FloatMulOpTest/.+ -NegOpModel/.+Int64 NegOpModel/.+,29 +# pack_test +# int32 and uint8 are supported since NNAPI FL6 +PackOpTest/Int32.+,1000006 +PackOpTestInt/1/.+,1000006 +# PACK along last axis is supported since NNAPI FL6 +PackOpTest/FloatThreeInputsDifferentAxis,1000006 +PackOpTest/FloatThreeInputsNegativeAxis,1000006 +PackOpTestInt/0/ThreeInputsDifferentAxis,1000006 +PackOpTestInt/0/ThreeInputsNegativeAxis,1000006 +# f32 and int8 are supported since NNAPI 1.3 by decomposition +PackOpTest/Float.+,30 +PackOpTestInt/0/.+,30 + # pad_test -PadOpTest/TooManyDimensions -PadOpTest/UnequalDimensions @@ -349,9 +367,13 @@ ConstFloat(Sum|Prod|Max|Min)OpTest/ScalarAxis,29 # reshape_test # Acceleration would be only for the test with shape being a constant tensor or # as hardcoded options. -VariedShapeSpec/ReshapeOpTest/InvalidShape/[01] -VariedShapeSpec/ReshapeOpTest/RegularShapes/[01] -VariedShapeSpec/ReshapeOpTest/WithStretchDimension/[01] +ReshapeOpTest/[01]/InvalidShape +ReshapeOpTest/[01]/RegularShapes +ReshapeOpTest/[01]/WithStretchDimension +# int32 is supported since NNAPI FL6 +ReshapeOpTest/3/InvalidShape,1000006 +ReshapeOpTest/3/RegularShapes,1000006 +ReshapeOpTest/3/WithStretchDimension,1000006 # resize_bilinear_test // align_corners & half_pixel_centers are not implemented in NNAPI before API 30 ",0,test d41d3f4c27722d2dbdd00227fbddb0713310f313,tensorflow/tensorflow,"[NNAPI] Enable delegation of NNAPI FL6 ops in NNAPI delegate PiperOrigin-RevId: 407951625 Change-Id: I656a621a291aa4fdd887009fad952849c27ecd16",nnapi_delegate.cc,"@@ -364,6 +364,21 @@ bool IsMeanWithDifferentInputOutputQuantization(const TfLiteContext* context, input.params.zero_point != output.params.zero_point; } +bool IsBroadcastBatchMatMul(const TfLiteContext* context, + const TfLiteNode* node) { + const auto& input0 = context->tensors[node->inputs->data[0]]; + const auto& input1 = context->tensors[node->inputs->data[1]]; + if (input0.dims->size != input1.dims->size) { + return true; + } + for (int i = 0; i < input0.dims->size - 2; i++) { + if (input0.dims->data[i] != input1.dims->data[i]) { + return true; + } + } + return false; +} + bool IsHybridOperator(const TfLiteContext* context, int builtin_code, const TfLiteNode* node) { switch (builtin_code) { @@ -2366,7 +2381,11 @@ bool NNAPIDelegateKernel::Validate( } break; case kTfLiteBuiltinReshape: { ExpectOpVersion(version, 1, &val_ctx); - ExpectIsFloatOrQuant8Operator(context, node, &val_ctx); + if (android_sdk_version < kNNAPIRuntimeFeatureLevel6) { + ExpectIsFloatOrQuant8Operator(context, node, &val_ctx); + } else { + ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx); + } if (node->inputs->size >= 2) { Expect(context->tensors[node->inputs->data[1]].allocation_type == kTfLiteMmapRo, @@ -3270,14 +3289,19 @@ bool NNAPIDelegateKernel::Validate( ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13, &val_ctx); const auto input_type = context->tensors[node->inputs->data[0]].type; - EXPECT_INPUT_TYPE_IN(input_type, kTfLiteInt32, kTfLiteFloat32, - kTfLiteInt8); - auto builtin = reinterpret_cast(node->builtin_data); - Expect(builtin->axis != -1 && - builtin->axis != - context->tensors[node->inputs->data[0]].dims->size, - NNAPIValidationFailureType::kUnsupportedOperandValue, - ""NNAPI does not support axis being the last dimension"", &val_ctx); + if (android_sdk_version >= kNNAPIRuntimeFeatureLevel6) { + EXPECT_INPUT_TYPE_IN(input_type, kTfLiteInt32, kTfLiteFloat32, + kTfLiteInt8, kTfLiteUInt8); + } else { + EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt8); + auto builtin = reinterpret_cast(node->builtin_data); + Expect(builtin->axis != -1 && + builtin->axis != + context->tensors[node->inputs->data[0]].dims->size, + NNAPIValidationFailureType::kUnsupportedOperandValue, + ""NNAPI does not support axis being the last dimension"", + &val_ctx); + } } break; case kTfLiteBuiltinUnpack: { ExpectOpVersion(version, 2, &val_ctx); @@ -3322,6 +3346,24 @@ bool NNAPIDelegateKernel::Validate( NNAPIValidationFailureType::kUnsupportedOperandRank, ""NNAPI does not support input rank greater than 4"", &val_ctx); } break; + case kTfLiteBuiltinBatchMatmul: { + ExpectOpVersion(version, 2, &val_ctx); + ExpectMinAndroidSdkVersion(android_sdk_version, + kNNAPIRuntimeFeatureLevel6, &val_ctx); + const auto& input0 = context->tensors[node->inputs->data[0]]; + const auto& input1 = context->tensors[node->inputs->data[1]]; + EXPECT_INPUT_TYPE_IN(input0.type, kTfLiteFloat32, kTfLiteInt32); + Expect(input0.type == input1.type, + NNAPIValidationFailureType::kUnsupportedHybridOperator, + ""NNAPI does not support hybrid batch matmul"", &val_ctx); + Expect(input0.dims->size <= 4 && input0.dims->size >= 2, + NNAPIValidationFailureType::kUnsupportedOperandRank, + ""NNAPI does not support input rank greater than 4 or less than 2"", + &val_ctx); + Expect(!IsBroadcastBatchMatMul(context, node), + NNAPIValidationFailureType::kUnsupportedInputType, + ""NNAPI does not support broadcast batch matmul"", &val_ctx); + } break; default: // All other operators are not mapped. AddValidationFailure(NNAPIValidationFailureType::kUnsupportedOperator, @@ -4187,6 +4229,16 @@ TfLiteStatus NNAPIDelegateKernel::Map( case kTfLiteBuiltinFill: { *nn_op_type = ANEURALNETWORKS_FILL; } break; + case kTfLiteBuiltinBatchMatmul: { + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + mapping_args.builder->AddScalarBoolOperand(builtin->adj_x); + mapping_args.builder->AddScalarBoolOperand(builtin->adj_y); + *nn_op_type = ANEURALNETWORKS_BATCH_MATMUL; + } break; + case kTfLiteBuiltinPack: { + *nn_op_type = ANEURALNETWORKS_PACK; + } break; default: // All other operators are not mapped. return kTfLiteError; @@ -5023,7 +5075,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors( } // Delegate PACK by lowering it into CONCAT + RESHAPE. - if (reg->builtin_code == kTfLiteBuiltinPack) { + if (reg->builtin_code == kTfLiteBuiltinPack && + target_feature_level_ < kNNAPIRuntimeFeatureLevel6) { TF_LITE_ENSURE_STATUS( builder.TransformPackIntoSupportedOps(node_index, node, reg)); continue; @@ -5172,6 +5225,16 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors( node_index); continue; } + // For PACK, NNAPI expects the axis scalar before all input tensors. + if (reg->builtin_code == kTfLiteBuiltinPack) { + const auto* builtin = + reinterpret_cast(node->builtin_data); + // NNAPI only accepts non-negative axis. + auto& input_tensor = context->tensors[node->inputs->data[0]]; + int axis = builtin->axis < 0 ? input_tensor.dims->size + builtin->axis + 1 + : builtin->axis; + TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(axis)); + } // Map inputs to NN API tensor indices. for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) { if (reg->builtin_code == kTfLiteBuiltinTransposeConv) { ",0,test d41d3f4c27722d2dbdd00227fbddb0713310f313,tensorflow/tensorflow,"[NNAPI] Enable delegation of NNAPI FL6 ops in NNAPI delegate PiperOrigin-RevId: 407951625 Change-Id: I656a621a291aa4fdd887009fad952849c27ecd16",nnapi_delegate_kernel.h,"@@ -35,6 +35,7 @@ constexpr int32_t kMinSdkVersionForNNAPI13 = 30; // TODO(b/185838597): change the remaining kMinSdkVersionForNNAPI* to // kNNAPIRuntimeFeatureLevel*. constexpr int32_t kNNAPIRuntimeFeatureLevel5 = 31; +constexpr int32_t kNNAPIRuntimeFeatureLevel6 = 1000006; // Track tensor indices to NN API tensor indices mapping. class OperandMapping { ",0,test d41d3f4c27722d2dbdd00227fbddb0713310f313,tensorflow/tensorflow,"[NNAPI] Enable delegation of NNAPI FL6 ops in NNAPI delegate PiperOrigin-RevId: 407951625 Change-Id: I656a621a291aa4fdd887009fad952849c27ecd16",reshape_test.cc,"@@ -95,6 +95,12 @@ TYPED_TEST(ReshapeOpTest, TooManySpecialDimensions) { TYPED_TEST(ReshapeOpTest, InvalidShape) { for (ShapeSpecificationType shape_type : ReshapeOpTest::_range_) { + if (SingleOpModel::GetForceUseNnapi() && + shape_type == ShapeSpecificationType::kAsTensor) { + // NNAPI delegate does not support RESHAPE with shape as a non-constant + // tensor. + continue; + } ReshapeOpModel m({1, 2, 2}, {2, 2}, {1, 2, 2, 1}, shape_type); m.SetInput({5, 6, 7, 8}); m.Invoke(); @@ -107,6 +113,12 @@ TYPED_TEST(ReshapeOpTest, InvalidShape) { TYPED_TEST(ReshapeOpTest, RegularShapes) { for (ShapeSpecificationType shape_type : ReshapeOpTest::_range_) { + if (SingleOpModel::GetForceUseNnapi() && + shape_type == ShapeSpecificationType::kAsTensor) { + // NNAPI delegate does not support RESHAPE with shape as a non-constant + // tensor. + continue; + } ReshapeOpModel m({1, 2, 4, 1}, {3}, {2, 2, 2}, shape_type); m.SetInput({1, 2, 3, 4, 5, 6, 7, 8}); m.Invoke(); @@ -118,6 +130,12 @@ TYPED_TEST(ReshapeOpTest, RegularShapes) { TYPED_TEST(ReshapeOpTest, WithStretchDimension) { for (ShapeSpecificationType shape_type : ReshapeOpTest::_range_) { + if (SingleOpModel::GetForceUseNnapi() && + shape_type == ShapeSpecificationType::kAsTensor) { + // NNAPI delegate does not support RESHAPE with shape as a non-constant + // tensor. + continue; + } ReshapeOpModel m({1, 2, 4, 1}, {3}, {2, 1, -1}, shape_type); m.SetInput({1, 2, 3, 4, 5, 6, 7, 8}); m.Invoke(); ",0,test d41d3f4c27722d2dbdd00227fbddb0713310f313,tensorflow/tensorflow,"[NNAPI] Enable delegation of NNAPI FL6 ops in NNAPI delegate PiperOrigin-RevId: 407951625 Change-Id: I656a621a291aa4fdd887009fad952849c27ecd16",NeuralNetworksTypes.h,"@@ -145,6 +145,8 @@ enum { ANEURALNETWORKS_HARD_SWISH = 99, ANEURALNETWORKS_FILL = 100, ANEURALNETWORKS_RANK = 101, + ANEURALNETWORKS_BATCH_MATMUL = 102, + ANEURALNETWORKS_PACK = 103, }; /** @@ -255,6 +257,8 @@ enum { * API releases. */ ANEURALNETWORKS_FEATURE_LEVEL_5 = 31, + /** Android NNAPI feature level 6 */ + ANEURALNETWORKS_FEATURE_LEVEL_6 = 1000006, }; /** ",0,test e874244346b3945de77d304d5f12e192aaa6f539,tensorflow/tensorflow,"Add C++ loop memory leak test for MemoryChecker PiperOrigin-RevId: 346579012 Change-Id: I4dcf861efac18f665ec9b8f951ab8b082a0d7777",memory_checker_test.py,"@@ -108,6 +108,18 @@ class MemoryCheckerTest(test.TestCase): with self.assertRaises(AssertionError): memory_checker.assert_no_leak_if_all_possibly_except_one() + def testLeak4(self): + helper = _memory_checker_test_helper.MemoryCheckerTestHelper() + + with MemoryChecker() as memory_checker: + for i in range(10): + helper.list_push_back(i) + memory_checker.record_snapshot() + + memory_checker.report() + with self.assertRaises(AssertionError): + memory_checker.assert_no_leak_if_all_possibly_except_one() + def testNoNewPythonObjectsEmpty(self): self.skipTest('TODO(b/150324603): Flaky test.') with MemoryChecker() as memory_checker: ",0,test dc7bc9e4053e8b643937447f1f31a2bf980a1d3a,tensorflow/tensorflow,"Add TTI pass initialization to pass managers. Many LLVM transformations benefits from knowing the targets. This enables optimizations, especially in a JIT context when the target is (generally) well-known. Closes #49 PiperOrigin-RevId: 261840617",OptUtils.h,"@@ -31,6 +31,7 @@ namespace llvm { class Module; class Error; +class TargetMachine; } // namespace llvm namespace mlir { @@ -41,17 +42,23 @@ void initializeLLVMPasses(); /// Create a module transformer function for MLIR ExecutionEngine that runs /// LLVM IR passes corresponding to the given speed and size optimization -/// levels (e.g. -O2 or -Os). +/// levels (e.g. -O2 or -Os). If not null, `targetMachine` is used to +/// initialize passes that provide target-specific information to the LLVM +/// optimizer. `targetMachine` must outlive the returned std::function. std::function -makeOptimizingTransformer(unsigned optLevel, unsigned sizeLevel); +makeOptimizingTransformer(unsigned optLevel, unsigned sizeLevel, + llvm::TargetMachine *targetMachine); /// Create a module transformer function for MLIR ExecutionEngine that runs /// LLVM IR passes explicitly specified, plus an optional optimization level, /// Any optimization passes, if present, will be inserted before the pass at -/// position optPassesInsertPos. +/// position optPassesInsertPos. If not null, `targetMachine` is used to +/// initialize passes that provide target-specific information to the LLVM +/// optimizer. `targetMachine` must outlive the returned std::function. std::function makeLLVMPassesTransformer(llvm::ArrayRef llvmPasses, llvm::Optional mbOptLevel, + llvm::TargetMachine *targetMachine, unsigned optPassesInsertPos = 0); } // end namespace mlir ",0,train c762c4501ca017994c1fa5554c3c8e47b7c80b66,tensorflow/tensorflow,"Squash REQUIRED_PACKAGES logic This cleanup should be a no-op. I moved a bunch of package-selection logic into the list of REQUIRED_PACKAGES so that all dependencies are specified in just one place. For example, one big difference is that the TF ecosystem packages are right next to each other. I also know that gast 0.5.2 breaks one of our tests, so I've pinned the gast version to 0.4.0 or below, which is what our CI currently installs for testing. PiperOrigin-RevId: 419650242 Change-Id: I30a70e6a54f89e55e117d8484c84b2b39709b711",setup.py,"@@ -59,6 +59,10 @@ if '--project_name' in sys.argv: sys.argv.remove('--project_name') sys.argv.pop(project_name_idx) +# Returns standard if a tensorflow-* package is being built, and nightly if a +# tf_nightly-* package is being built. +def if_nightly(standard, nightly): + return nightly if 'tf_nightly' in project_name else standard # All versions of TF need these packages. We indicate the widest possible range # of package releases possible to be as up-to-date as possible as well as to @@ -69,12 +73,13 @@ if '--project_name' in sys.argv: # NOTE: This assumes that all packages follow SemVer. If a package follows a # different versioning scheme (e.g., PVP), we use different bound specifier and # comment the versioning scheme. -# NOTE: Please add test only packages to `TEST_PACKAGES` below. REQUIRED_PACKAGES = [ 'absl-py >= 0.4.0', 'astunparse >= 1.6.0', 'flatbuffers >= 1.12', - 'gast >= 0.2.1', + # gast versions above 0.4.0 are incompatible with some of TF's tests. + # TODO(angerson): File a bug for these incompatible tests and the limitation + 'gast >= 0.2.1, <= 0.4.0', 'google_pasta >= 0.1.1', 'h5py >= 2.9.0', 'keras_preprocessing >= 1.1.1', # 1.1.0 needs tensorflow==1.7 @@ -87,49 +92,26 @@ REQUIRED_PACKAGES = [ 'termcolor >= 1.1.0', 'typing_extensions >= 3.6.6', 'wrapt >= 1.11.0', - # TensorFlow ecosystem packages that TF exposes API for - # These need to be in sync with the existing TF version - # They are updated during the release process - # When updating these, please also update the nightly versions below - 'tensorboard >= 2.7, < 2.8', - 'tensorflow_estimator >= 2.8.0rc0, < 2.9', - 'keras >= 2.8.0rc0, < 2.9', 'tensorflow-io-gcs-filesystem >= 0.23.1', -] - - -# For nightly packages, instead of depending on tensorboard, -# tensorflow_estimator and keras, we depend on their nightly equivalent. -# When updating these, make sure to also update the release versions above. -# NOTE: the nightly versions are one version ahead of the release ones! -# NOTE: the nightly versions specify alpha/dev! -if 'tf_nightly' in project_name: - for i, pkg in enumerate(REQUIRED_PACKAGES): - if 'tensorboard' in pkg: - REQUIRED_PACKAGES[i] = 'tb-nightly ~= 2.8.0.a' - elif 'tensorflow_estimator' in pkg: - REQUIRED_PACKAGES[i] = 'tf-estimator-nightly ~= 2.9.0.dev' - elif 'keras' in pkg and 'keras_preprocessing' not in pkg: - REQUIRED_PACKAGES[i] = 'keras-nightly ~= 2.9.0.dev' - - -# grpcio does not build correctly on big-endian machines due to lack of -# BoringSSL support. -# See https://github.com/tensorflow/tensorflow/issues/17882. -if sys.byteorder == 'little': - REQUIRED_PACKAGES.append('grpcio >= 1.24.3, < 2.0') - - -# Packages which are only needed for testing code. -# Please don't add test-only packages to `REQUIRED_PACKAGES`! -# Follows the same conventions as `REQUIRED_PACKAGES` -TEST_PACKAGES = [ - 'portpicker >= 1.3.1', - 'scipy >= 1.5.2', - 'tblib >= 1.4.0', - 'dill >= 0.2.9', -] - + # grpcio does not build correctly on big-endian machines due to lack of + # BoringSSL support. + # See https://github.com/tensorflow/tensorflow/issues/17882. + 'grpcio >= 1.24.3, < 2.0' if sys.byteorder == 'little' else None, + # TensorFlow exposes the TF API for certain TF ecosystem packages like + # keras. When TF depends on those packages, the package version needs to + # match the current TF version. For tf_nightly, we install the nightly + # variant of each package instead, which must be one version ahead of the + # current release version. These also usually have ""alpha"" or ""dev"" in their + # version name. + # These are all updated during the TF release process. + if_nightly('tensorboard >= 2.7, < 2.8', + 'tb-nightly ~= 2.8.0.a'), + if_nightly('tensorflow_estimator >= 2.8.0rc0, < 2.9', + 'tf-estimator-nightly ~= 2.9.0.dev'), + if_nightly('keras >= 2.8.0rc0, < 2.9', + 'keras-nightly ~= 2.9.0.dev'), +])) +REQUIRED_PACKAGES = [ p for p in REQUIRED_PACKAGES if p is not None ] DOCLINES = __doc__.split('\n') if project_name.endswith('-gpu'): @@ -152,17 +134,15 @@ CONSOLE_SCRIPTS = [ # is now declared by the tensorboard pip package. If we remove the # TensorBoard command, pip will inappropriately remove it during install, # even though the command is not removed, just moved to a different wheel. - 'tensorboard = tensorboard.main:run_main', + # We exclude it anyway if building tf_nightly. + if_nightly(None, 'tensorboard = tensorboard.main:run_main') 'tf_upgrade_v2 = tensorflow.tools.compatibility.tf_upgrade_v2_main:main', 'estimator_ckpt_converter = ' 'tensorflow_estimator.python.estimator.tools.checkpoint_converter:main', ] +CONSOLE_SCRIPTS = [ s for s in CONSOLE_SCRIPTS if s is not None ] # pylint: enable=line-too-long -# remove the tensorboard console script if building tf_nightly -if 'tf_nightly' in project_name: - CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:run_main') - class BinaryDistribution(Distribution): @@ -310,7 +290,6 @@ setup( }, headers=headers, install_requires=REQUIRED_PACKAGES, - tests_require=REQUIRED_PACKAGES + TEST_PACKAGES, # Add in any packaged data. include_package_data=True, package_data={ ",0,train fc33e0f3783cab0d7486f6e277e77e1c95ce291d,tensorflow/tensorflow,"Reshaped real valued column for DNN input layer. Change: 123786681",feature_column.py,"@@ -543,7 +543,6 @@ class _RealValuedColumn(_FeatureColumn, collections.namedtuple( def __new__(cls, column_name, dimension, default_value, dtype): if default_value is not None: default_value = tuple(default_value) - return super(_RealValuedColumn, cls).__new__(cls, column_name, dimension, default_value, dtype) @@ -573,7 +572,10 @@ class _RealValuedColumn(_FeatureColumn, collections.namedtuple( input_tensor, weight_collections=None, trainable=True): - return input_tensor + batch_size = input_tensor.get_shape().as_list()[0] + batch_size = int(batch_size) if batch_size else -1 + flattened_shape = [batch_size, self.dimension] + return array_ops.reshape(math_ops.to_float(input_tensor), flattened_shape) def to_weighted_sum(self, input_tensor, ",0,test a189502cc3032f0bc8f3294b0e39062e89fe9181,tensorflow/tensorflow,"Activates Eigen path for CPU implementation of atrous/dilated convolution (only forward path). PiperOrigin-RevId: 186071285",conv_2d.h,"@@ -54,10 +54,12 @@ struct InflatePadAndShuffle { template void SpatialConvolutionFunc(const Device& d, Output output, Input input, Filter filter, int row_stride, int col_stride, + int row_dilation, int col_dilation, const Eigen::PaddingType& padding) { // Need to swap row/col when calling Eigen. output.device(d) = - Eigen::SpatialConvolution(input, filter, col_stride, row_stride, padding); + Eigen::SpatialConvolution(input, filter, col_stride, row_stride, padding, + col_dilation, row_dilation); } template @@ -65,9 +67,10 @@ struct SpatialConvolution { void operator()(const Device& d, typename TTypes::Tensor output, typename TTypes::ConstTensor input, typename TTypes::ConstTensor filter, int row_stride, - int col_stride, const Eigen::PaddingType& padding) { + int col_stride, int row_dilation, int col_dilation, + const Eigen::PaddingType& padding) { SpatialConvolutionFunc(d, output, input, filter, row_stride, col_stride, - padding); + row_dilation, col_dilation, padding); } }; @@ -77,11 +80,12 @@ struct SpatialConvolution { typename TTypes::Tensor output, typename TTypes::ConstTensor input, typename TTypes::ConstTensor filter, - int row_stride, int col_stride, - const Eigen::PaddingType& padding) { + int row_stride, int col_stride, int row_dilation, + int col_dilation, const Eigen::PaddingType& padding) { output.device(d) = Eigen::SpatialConvolution(input.cast(), filter.cast(), - col_stride, row_stride, padding) + col_stride, row_stride, padding, col_dilation, + row_dilation) .cast(); } }; @@ -91,11 +95,13 @@ struct SpatialConvolutionBackwardInput { void operator()(const Device& d, typename TTypes::Tensor input_backward, typename TTypes::ConstTensor kernel, typename TTypes::ConstTensor output_backward, - int row_stride, int col_stride) { + int row_stride, int col_stride, int row_dilation, + int col_dilation) { // Need to swap row/col when calling Eigen. input_backward.device(d) = Eigen::SpatialConvolutionBackwardInput( kernel, output_backward, input_backward.dimension(2), - input_backward.dimension(1), col_stride, row_stride); + input_backward.dimension(1), col_stride, row_stride, col_dilation, + row_dilation); } }; @@ -105,11 +111,13 @@ struct SpatialConvolutionBackwardFilter { typename TTypes::Tensor kernel_backward, typename TTypes::ConstTensor input, typename TTypes::ConstTensor output_backward, - int row_stride, int col_stride) { + int row_stride, int col_stride, int row_dilation, + int col_dilation) { // Need to swap row/col when calling Eigen. kernel_backward.device(d) = Eigen::SpatialConvolutionBackwardKernel( input, output_backward, kernel_backward.dimension(1), - kernel_backward.dimension(0), col_stride, row_stride); + kernel_backward.dimension(0), col_stride, row_stride, col_dilation, + row_dilation); } }; ",0,test a189502cc3032f0bc8f3294b0e39062e89fe9181,tensorflow/tensorflow,"Activates Eigen path for CPU implementation of atrous/dilated convolution (only forward path). PiperOrigin-RevId: 186071285",conv_grad_filter_ops.cc,"@@ -101,7 +101,8 @@ struct LaunchConv2DBackpropFilterOp { const CPUDevice& d = ctx->eigen_device(); functor::SpatialConvolutionBackwardFilter()( d, filter_backprop->tensor(), input.tensor(), - out_backprop.tensor(), row_stride, col_stride); + out_backprop.tensor(), row_stride, col_stride, + /*row_dilation=*/1, /*col_dilation=*/1); } }; ",0,test a189502cc3032f0bc8f3294b0e39062e89fe9181,tensorflow/tensorflow,"Activates Eigen path for CPU implementation of atrous/dilated convolution (only forward path). PiperOrigin-RevId: 186071285",conv_grad_input_ops.cc,"@@ -106,7 +106,8 @@ struct LaunchConv2DBackpropInputOp { const CPUDevice& d = ctx->eigen_device(); functor::SpatialConvolutionBackwardInput()( d, in_backprop->tensor(), filter.tensor(), - out_backprop.tensor(), row_stride, col_stride); + out_backprop.tensor(), row_stride, col_stride, + /*row_dilation=*/1, /*col_dilation=*/1); } }; ",0,test a189502cc3032f0bc8f3294b0e39062e89fe9181,tensorflow/tensorflow,"Activates Eigen path for CPU implementation of atrous/dilated convolution (only forward path). PiperOrigin-RevId: 186071285",conv_ops.cc,"@@ -60,8 +60,8 @@ template struct LaunchGeneric { void operator()(OpKernelContext* ctx, const Tensor& input, const Tensor& filter, int row_stride, int col_stride, - const Padding& padding, Tensor* output, - TensorFormat data_format) { + int row_dilation, int col_dilation, const Padding& padding, + Tensor* output, TensorFormat data_format) { CHECK(data_format == FORMAT_NHWC) << ""Generic conv implementation only "" ""supports NHWC tensor format for now.""; if (filter.dim_size(0) == 1 && filter.dim_size(1) == 1 && row_stride == 1 && @@ -86,7 +86,8 @@ struct LaunchGeneric { filter.shaped({filter.dim_size(2), filter.dim_size(3)}), dim_pair); } else if (filter.dim_size(0) == input.dim_size(1) && - filter.dim_size(1) == input.dim_size(2) && padding == VALID) { + filter.dim_size(1) == input.dim_size(2) && row_dilation == 1 && + col_dilation == 1 && padding == VALID) { // If the input data and filter have the same height/width, // the 2D convolution is reduced to matrix multiplication. const int k = // Length of reduction dimension. @@ -103,7 +104,7 @@ struct LaunchGeneric { functor::SpatialConvolution()( ctx->eigen_device(), output->tensor(), input.tensor(), filter.tensor(), row_stride, col_stride, - BrainPadding2EigenPadding(padding)); + row_dilation, col_dilation, BrainPadding2EigenPadding(padding)); } } }; @@ -122,15 +123,9 @@ struct LaunchConv2DOp { ""NHWC tensor format for now."")); return; } - // TODO(yangzihao): Add the CPU implementation of dilated conv 2D. - if (row_dilation > 1 || col_dilation > 1) { - ctx->SetStatus( - errors::Unimplemented(""Generic conv implementation only supports "" - ""dilated rate of 1 for now."")); - return; - } LaunchGeneric()(ctx, input, filter, row_stride, col_stride, - padding, output, data_format); + row_dilation, col_dilation, padding, output, + data_format); } }; @@ -792,7 +787,8 @@ namespace functor { const GPUDevice& d, typename TTypes::Tensor output, \ typename TTypes::ConstTensor input, \ typename TTypes::ConstTensor filter, int row_stride, \ - int col_stride, const Eigen::PaddingType& padding); \ + int col_stride, int row_dilation, int col_dilation, \ + const Eigen::PaddingType& padding); \ extern template struct SpatialConvolution; \ template <> \ void MatMulConvFunctor::operator()( \ ",0,test a189502cc3032f0bc8f3294b0e39062e89fe9181,tensorflow/tensorflow,"Activates Eigen path for CPU implementation of atrous/dilated convolution (only forward path). PiperOrigin-RevId: 186071285",conv_ops_test.py,"@@ -302,25 +302,20 @@ class Conv2DTest(test.TestCase): padding, dilations): expected_results = [] computed_results = [] - default_dilations = (dilations[0] == 1 and dilations[1] == 1) for data_format, use_gpu in GetTestConfigs(): - # If any dilation rate is larger than 1, only do test on the GPU - # because we currently do not have a CPU implementation for arbitrary - # dilation rates. - if default_dilations or use_gpu: - expected, computed = self._ComputeReferenceDilatedConv( - tensor_in_sizes, filter_in_sizes, strides, dilations, padding, - data_format, use_gpu) - expected_results.append(expected) - computed_results.append(computed) - tolerance = 1e-2 if use_gpu else 1e-5 - expected_values = self.evaluate(expected_results) - computed_values = self.evaluate(computed_results) - for e_value, c_value in zip(expected_values, computed_values): - print(""expected = "", e_value) - print(""actual = "", c_value) - self.assertAllClose( - e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=1e-4) + expected, computed = self._ComputeReferenceDilatedConv( + tensor_in_sizes, filter_in_sizes, strides, dilations, padding, + data_format, use_gpu) + expected_results.append(expected) + computed_results.append(computed) + tolerance = 1e-2 if use_gpu else 1e-5 + expected_values = self.evaluate(expected_results) + computed_values = self.evaluate(computed_results) + for e_value, c_value in zip(expected_values, computed_values): + print(""expected = "", e_value) + print(""actual = "", c_value) + self.assertAllClose( + e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=1e-4) def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, strides, padding, expected): @@ -365,13 +360,12 @@ class Conv2DTest(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testConv2D2x2Filter2x1Dilation(self): - if test.is_gpu_available(cuda_only=True): - self._VerifyDilatedConvValues( - tensor_in_sizes=[1, 4, 4, 1], - filter_in_sizes=[2, 2, 1, 1], - strides=[1, 1], - dilations=[2, 1], - padding=""VALID"") + self._VerifyDilatedConvValues( + tensor_in_sizes=[1, 4, 4, 1], + filter_in_sizes=[2, 2, 1, 1], + strides=[1, 1], + dilations=[2, 1], + padding=""VALID"") @test_util.run_in_graph_and_eager_modes() def testConv2DEmpty(self): @@ -385,13 +379,12 @@ class Conv2DTest(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testConv2DEmptyDilation(self): - if test.is_gpu_available(cuda_only=True): - self._VerifyDilatedConvValues( - tensor_in_sizes=[0, 2, 3, 3], - filter_in_sizes=[1, 1, 3, 3], - strides=[1, 1], - dilations=[2, 1], - padding=""VALID"") + self._VerifyDilatedConvValues( + tensor_in_sizes=[0, 2, 3, 3], + filter_in_sizes=[1, 1, 3, 3], + strides=[1, 1], + dilations=[2, 1], + padding=""VALID"") @test_util.run_in_graph_and_eager_modes() def testConv2D2x2Filter(self): @@ -406,13 +399,12 @@ class Conv2DTest(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testConv2D2x2FilterDilation(self): - if test.is_gpu_available(cuda_only=True): - self._VerifyDilatedConvValues( - tensor_in_sizes=[1, 2, 3, 3], - filter_in_sizes=[2, 2, 3, 3], - strides=[1, 1], - dilations=[1, 2], - padding=""VALID"") + self._VerifyDilatedConvValues( + tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[2, 2, 3, 3], + strides=[1, 1], + dilations=[1, 2], + padding=""VALID"") @test_util.run_in_graph_and_eager_modes() def testConv2D1x2Filter(self): @@ -430,13 +422,12 @@ class Conv2DTest(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testConv2D1x2FilterDilation(self): - if test.is_gpu_available(cuda_only=True): - self._VerifyDilatedConvValues( - tensor_in_sizes=[1, 2, 3, 3], - filter_in_sizes=[1, 2, 3, 3], - strides=[1, 1], - dilations=[2, 1], - padding=""VALID"") + self._VerifyDilatedConvValues( + tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[1, 2, 3, 3], + strides=[1, 1], + dilations=[2, 1], + padding=""VALID"") @test_util.run_in_graph_and_eager_modes() def testConv2D2x2FilterStride2(self): @@ -512,13 +503,12 @@ class Conv2DTest(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testConv2DKernelSizeMatchesInputSizeDilation(self): - if test.is_gpu_available(cuda_only=True): - self._VerifyDilatedConvValues( - tensor_in_sizes=[1, 3, 3, 1], - filter_in_sizes=[2, 2, 1, 2], - strides=[1, 1], - dilations=[2, 2], - padding=""VALID"") + self._VerifyDilatedConvValues( + tensor_in_sizes=[1, 3, 3, 1], + filter_in_sizes=[2, 2, 1, 2], + strides=[1, 1], + dilations=[2, 2], + padding=""VALID"") # TODO(yzhwang): this currently fails. # self._VerifyValues(tensor_in_sizes=[1, 8, 8, 1], @@ -1538,21 +1528,6 @@ class Conv2DTest(test.TestCase): use_gpu=False) self.evaluate(conv) - def testCPUConv2DDilatedUnimplemented(self): - with self.test_session(use_gpu=False): - with self.assertRaisesRegexp(errors_impl.UnimplementedError, - ""dilated rate of 1 for now""): - conv = self._SetupValuesForDevice( - tensor_in_sizes=[1, 4, 4, 1], - filter_in_sizes=[2, 2, 1, 1], - dilations=[2, 1], - strides=[1, 1], - padding=""VALID"", - data_format=""NHWC"", - dtype=dtypes.float32, - use_gpu=False) - self.evaluate(conv) - class DepthwiseConv2DTest(test.TestCase): @@ -1887,7 +1862,7 @@ def GetInceptionFwdTest(input_size, filter_size, stride, padding, def GetInceptionFwdDilatedConvTest(input_size, filter_size, stride, padding): def Test(self): - if test.is_gpu_available(cuda_only=True) and stride == 1: + if stride == 1: tf_logging.info(""Testing InceptionFwd with dilations %s"", (input_size, filter_size, stride, padding)) self._VerifyDilatedConvValues( ",0,test 6e4b0a4a351260ea3a15457a24332fdba46abab7,tensorflow/tensorflow,"Refactor kernel thunk's launch dimension setting - part 3. Move SetThunkLaunchDimensions() to right after KernelThunk construction. Launch dimension will be passed to KernelThunk's constructor as a parameter. PiperOrigin-RevId: 386164406 Change-Id: Ifdbed56d1daaae2f2bde1da37f87216d1014909d",ir_emitter_unnested.cc,"@@ -5119,11 +5119,6 @@ Status IrEmitterUnnested::EmitReductionFromOrToContiguousDimensions( ""doesn't set the input layout of "" << MlirToString(first_reduce); - std::vector ir_arrays; - TF_ASSIGN_OR_RETURN( - std::unique_ptr kernel_thunk, - BuildKernelThunk(unnested_hlo, Thunk::ThunkInfo(), &ir_arrays)); - HloComputation* fused_computation = nullptr; TF_ASSIGN_OR_RETURN(fused_computation, GetOrCreateSubComputationFromRegion(&fusion.region(), @@ -5136,6 +5131,29 @@ Status IrEmitterUnnested::EmitReductionFromOrToContiguousDimensions( VLOG(2) << StrCat(""Generate in "", instr_index_groups.size(), "" groups for "", MlirToString(unnested_hlo)); + ReductionCodegenInfo reduction_info = + ComputeReductionCodegenInfo(unnested_hlo, first_reduce, layout_analysis); + const KernelMappingScheme& mapping_scheme = + reduction_info.GetKernelMappingScheme(); + // block_y_count is set to instr_index_groups.size(), so that each reduction + // group can be run in parallel by a different BlockIdy. + LaunchDimensions launch_dimensions( + {/*x=*/mapping_scheme.GetNumberOfBlocks(), + /*y=*/static_cast(instr_index_groups.size()), + /*z=*/1}, + {/*x=*/mapping_scheme.GetThreadsPerBlock(), /*y=*/1, /*z=*/1}); + VLOG(3) << ""Launch dimensions of "" + << mlir::GetNameFromLoc(unnested_hlo->getLoc()) + << "": number of blocks: "" << mapping_scheme.GetNumberOfBlocks() + << "" - threads per block: "" << mapping_scheme.GetThreadsPerBlock(); + + std::vector ir_arrays; + TF_ASSIGN_OR_RETURN( + std::unique_ptr kernel_thunk, + BuildKernelThunk(unnested_hlo, Thunk::ThunkInfo(), &ir_arrays)); + SetThunkLaunchDimensions(launch_dimensions, kernel_thunk.get(), + ir_emitter_context_->llvm_module()); + absl::optional elemental_emitter; absl::optional optional_fused_emitter; FusedIrEmitter* fused_emitter = nullptr; @@ -5189,23 +5207,6 @@ Status IrEmitterUnnested::EmitReductionFromOrToContiguousDimensions( b_.CreateICmpEQ(raw_block_id_y, b_.getInt32(i)); ksl.If(StrCat(""reduce-group-"", i), guarding_cond, emit_reduction_func); } - ReductionCodegenInfo reduction_info = - ComputeReductionCodegenInfo(unnested_hlo, first_reduce, layout_analysis); - const KernelMappingScheme& mapping_scheme = - reduction_info.GetKernelMappingScheme(); - // block_y_count is set to instr_index_groups.size(), so that each reduction - // group can be run in parallel by a different BlockIdy. - LaunchDimensions launch_dimensions( - {/*x=*/mapping_scheme.GetNumberOfBlocks(), - /*y=*/static_cast(instr_index_groups.size()), - /*z=*/1}, - {/*x=*/mapping_scheme.GetThreadsPerBlock(), /*y=*/1, /*z=*/1}); - VLOG(3) << ""Launch dimensions of "" - << mlir::GetNameFromLoc(unnested_hlo->getLoc()) - << "": number of blocks: "" << mapping_scheme.GetNumberOfBlocks() - << "" - threads per block: "" << mapping_scheme.GetThreadsPerBlock(); - SetThunkLaunchDimensions(launch_dimensions, kernel_thunk.get(), - ir_emitter_context_->llvm_module()); thunks.push_back(std::move(kernel_thunk)); std::unique_ptr sequential_thunk = ",0,train 93cc43bef97f4371379c2ea6e87b260a2a2cf7af,tensorflow/tensorflow,add lockFile argument to save_model(),save.py,"@@ -48,6 +48,7 @@ _KERAS_SAVED_MODEL_STILL_EXPERIMENTAL = True def save_model(model, filepath, overwrite=True, + lockFile=True, include_optimizer=True, save_format=None, signatures=None, @@ -95,6 +96,7 @@ def save_model(model, overwrite: Whether we should overwrite any existing model at the target location, or instead ask the user with a manual prompt. include_optimizer: If True, save optimizer's state together. + lockFile: If True, protect model file while saving model. save_format: Either 'tf' or 'h5', indicating whether to save the model to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X, and 'h5' in TF 1.X. @@ -128,7 +130,7 @@ def save_model(model, 'to the Tensorflow SavedModel format (by setting save_format=""tf"") ' 'or using `save_weights`.') hdf5_format.save_model_to_hdf5( - model, filepath, overwrite, include_optimizer) + model, filepath, overwrite, lockFile, include_optimizer) else: saved_model_save.save(model, filepath, overwrite, include_optimizer, signatures, options) ",0,train 5cdf8f26c806e893e0773ad34e2b59008cc6f8ec,tensorflow/tensorflow,"Update parameter_server_strategy_test to not use Keras dense layer. Replaced it with a variable and then matmul with the input. It doesn't fully copy all the keras behavior (like mix precision, etc), but it should be good enough for the existing test cases that uses it. PiperOrigin-RevId: 319836812 Change-Id: I97f9979d927b8187fa6c72ceff6ff521dab4cc2d",parameter_server_strategy_test.py,"@@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import copy +import functools import threading from absl.testing import parameterized @@ -43,13 +44,14 @@ from tensorflow.python.eager import context from tensorflow.python.estimator import run_config from tensorflow.python.framework import constant_op from tensorflow.python.framework import device as tf_device +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util -from tensorflow.python.keras.layers import core from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradients +from tensorflow.python.ops import init_ops_v2 from tensorflow.python.ops import math_ops from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import resource_variable_ops @@ -450,10 +452,14 @@ class ParameterServerStrategyTestBase( self.cached_session(target=master_target, config=sess_config) as sess, \ d.scope(): - l = core.Dense(1, use_bias=False) + initializer = functools.partial( + init_ops_v2.GlorotUniform(), (1, 1), dtype=dtypes.float32) + kernel = variables.Variable( + initial_value=initializer, name='kernel', trainable=True) def loss_fn(x): - y = array_ops.reshape(l(x), []) - constant_op.constant(1.) + y = array_ops.reshape( + math_ops.matmul(x, kernel), []) - constant_op.constant(1.) return y * y # TODO(yuefengz, apassos): eager.backprop.implicit_grad is not safe for ",0,train 553b04f292eb18dbb9f1a0a9c8459db8360dce5a,tensorflow/tensorflow,"examples change to sklearn dataset load method (#7512) * examples change to sklean dataset load method * fix blank lines * fix blank lines and import order",boston.py,"@@ -16,15 +16,18 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function + +from sklearn import datasets from sklearn import model_selection from sklearn import metrics from sklearn import preprocessing + import tensorflow as tf def main(unused_argv): # Load dataset - boston = tf.contrib.learn.datasets.load_dataset('boston') + boston = datasets.load_boston() x, y = boston.data, boston.target # Split dataset into train / test ",0,test 553b04f292eb18dbb9f1a0a9c8459db8360dce5a,tensorflow/tensorflow,"examples change to sklearn dataset load method (#7512) * examples change to sklean dataset load method * fix blank lines * fix blank lines and import order",iris.py,"@@ -17,7 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function - +from sklearn import datasets from sklearn import metrics from sklearn import model_selection @@ -26,7 +26,7 @@ import tensorflow as tf def main(unused_argv): # Load dataset. - iris = tf.contrib.learn.datasets.load_dataset('iris') + iris = datasets.load_iris() x_train, x_test, y_train, y_test = model_selection.train_test_split( iris.data, iris.target, test_size=0.2, random_state=42) ",0,test 52a91a0fb4c39f8f6e8c8b65130c6c3a252dcfea,tensorflow/tensorflow,"Make TpuPlatform::event_map_ access thread-safe to fix a race. PiperOrigin-RevId: 320473357 Change-Id: I58c2e2b8b4a9cde46e4cd6ea23f994ef9335501c",tpu_executor.cc,"@@ -99,7 +99,10 @@ bool TpuExecutor::CreateStreamDependency(Stream* dependent, Stream* other) { Status TpuExecutor::AllocateEvent(Event* event) { return Status::OK(); } -Status TpuExecutor::DeallocateEvent(Event* event) { return Status::OK(); } +Status TpuExecutor::DeallocateEvent(Event* event) { + tpu_platform().EraseEvent(event->implementation()); + return Status::OK(); +} // AllocateTimer/DeallocateTimer have no specialization. bool TpuExecutor::AllocateTimer(Timer* timer) { return true; } @@ -120,26 +123,29 @@ bool TpuExecutor::StopTimer(Stream* stream, ::stream_executor::Timer* timer) { stream_executor::Event::Status TpuExecutor::PollForEventStatus( stream_executor::Event* event) { + auto se_event = tpu_platform().LookupEvent(event->implementation()); return stream_executor::Event::Status( - tpu::ExecutorApiFn()->TpuExecutor_PollForEventStatusFn( - executor_, event_map().at(event->implementation()))); + tpu::ExecutorApiFn()->TpuExecutor_PollForEventStatusFn(executor_, + se_event)); } Status TpuExecutor::RecordEvent(Stream* stream, ::stream_executor::Event* event) { StatusHelper status; + auto se_event = tpu_platform().LookupEvent(event->implementation()); tpu::ExecutorApiFn()->TpuExecutor_RecordEventFn( - executor_, stream_map().at(stream->implementation()), - event_map().at(event->implementation()), status.c_status); + executor_, stream_map().at(stream->implementation()), se_event, + status.c_status); return status.status(); } Status TpuExecutor::WaitForEvent(Stream* stream, ::stream_executor::Event* event) { StatusHelper status; + auto se_event = tpu_platform().LookupEvent(event->implementation()); tpu::ExecutorApiFn()->TpuExecutor_WaitForEventFn( - executor_, stream_map().at(stream->implementation()), - event_map().at(event->implementation()), status.c_status); + executor_, stream_map().at(stream->implementation()), se_event, + status.c_status); return status.status(); } @@ -172,7 +178,7 @@ std::unique_ptr<::stream_executor::internal::EventInterface> TpuExecutor::CreateEventImplementation() { SE_Event* tpu_event = tpu::ExecutorApiFn()->TpuEvent_NewFn(executor_); auto ptr = absl::make_unique(tpu_event); - event_map()[ptr.get()] = tpu_event; + tpu_platform().InsertEvent(ptr.get(), tpu_event); return ptr; } ",0,train 52a91a0fb4c39f8f6e8c8b65130c6c3a252dcfea,tensorflow/tensorflow,"Make TpuPlatform::event_map_ access thread-safe to fix a race. PiperOrigin-RevId: 320473357 Change-Id: I58c2e2b8b4a9cde46e4cd6ea23f994ef9335501c",tpu_executor.h,"@@ -17,6 +17,8 @@ limitations under the License. #define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_H_ #include ""absl/container/flat_hash_map.h"" +#include ""tensorflow/core/platform/casts.h"" +#include ""tensorflow/core/platform/mutex.h"" #include ""tensorflow/core/platform/types.h"" #include ""tensorflow/stream_executor/device_memory.h"" #include ""tensorflow/stream_executor/device_options.h"" @@ -223,17 +225,16 @@ class TpuExecutor : public tensorflow::tpu::TpuExecutorInterface { } private: - TimerMap timer_map_; - - TpuPlatform::StreamMap& stream_map() { - return *(static_cast(platform_)->stream_map()); + TpuPlatform& tpu_platform() { + return *(tensorflow::down_cast(platform_)); } - TpuPlatform::EventMap& event_map() { - return *(static_cast(platform_)->event_map()); + TpuPlatform::StreamMap& stream_map() { + return *(tpu_platform().stream_map()); } - ::tensorflow::tpu::TpuPlatformInterface* platform_; + TimerMap timer_map_; + tensorflow::tpu::TpuPlatformInterface* platform_; SE_StreamExecutor* executor_; }; ",0,train 52a91a0fb4c39f8f6e8c8b65130c6c3a252dcfea,tensorflow/tensorflow,"Make TpuPlatform::event_map_ access thread-safe to fix a race. PiperOrigin-RevId: 320473357 Change-Id: I58c2e2b8b4a9cde46e4cd6ea23f994ef9335501c",tpu_platform.cc,"@@ -118,6 +118,23 @@ bool TpuPlatform::ShouldRegisterTpuDeviceToDeviceCopy() { ->TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopyFn(platform_); } +void TpuPlatform::InsertEvent(stream_executor::internal::EventInterface* key, + SE_Event* val) { + tensorflow::mutex_lock lock(event_map_mu_); + event_map_[key] = val; +} + +SE_Event* TpuPlatform::LookupEvent( + stream_executor::internal::EventInterface* key) { + tensorflow::tf_shared_lock lock(event_map_mu_); + return event_map_.at(key); +} + +void TpuPlatform::EraseEvent(stream_executor::internal::EventInterface* key) { + tensorflow::mutex_lock lock(event_map_mu_); + event_map_.erase(key); +} + Status TpuPlatform::TpusPerHost(int* tpus) { TF_Status* status = TF_NewStatus(); tpu::ConfigApiFn()->TpuConfigurationApi_TpusPerHostFn(tpus, status); ",0,train 52a91a0fb4c39f8f6e8c8b65130c6c3a252dcfea,tensorflow/tensorflow,"Make TpuPlatform::event_map_ access thread-safe to fix a race. PiperOrigin-RevId: 320473357 Change-Id: I58c2e2b8b4a9cde46e4cd6ea23f994ef9335501c",tpu_platform.h,"@@ -19,6 +19,7 @@ limitations under the License. #include #include ""absl/container/flat_hash_map.h"" +#include ""tensorflow/core/platform/mutex.h"" #include ""tensorflow/core/platform/types.h"" #include ""tensorflow/stream_executor/executor_cache.h"" #include ""tensorflow/stream_executor/platform.h"" @@ -111,7 +112,10 @@ class TpuPlatform : public ::tensorflow::tpu::TpuPlatformInterface { StreamMap* stream_map() { return &stream_map_; } - EventMap* event_map() { return &event_map_; } + void InsertEvent(stream_executor::internal::EventInterface* key, + SE_Event* val); + SE_Event* LookupEvent(stream_executor::internal::EventInterface* key); + void EraseEvent(stream_executor::internal::EventInterface* key); // Returns the number of TPUs per host. static Status TpusPerHost(int* tpus); @@ -125,6 +129,7 @@ class TpuPlatform : public ::tensorflow::tpu::TpuPlatformInterface { stream_executor::ExecutorCache executor_cache_; StreamMap stream_map_; EventMap event_map_; + tensorflow::mutex event_map_mu_; }; bool RegisterTpuPlatform(); ",0,train 72ac5a16b80410d33067f69ee422df8aa2140578,tensorflow/tensorflow,"Add a migration docs block for `tf.compat.v1.train.init_from_checkpoint`. PiperOrigin-RevId: 386466272 Change-Id: Id4571fc0a1a695b26a69fef6af55efe4966c44b7",checkpoint_utils.py,"@@ -220,6 +220,45 @@ def checkpoints_iterator(checkpoint_dir, def init_from_checkpoint(ckpt_dir_or_file, assignment_map): """"""Replaces `tf.Variable` initializers so they load from a checkpoint file. + @compatibility(TF2) + `tf.compat.v1.train.init_from_checkpoint` is not recommended for restoring + variable values in TF2. + + To restore checkpoints in TF2, please use + `tf.keras.Model.load_weights` or `tf.train.Checkpoint.restore`. These APIs use + use an [object-based method of checkpointing] + (https://www.tensorflow.org/guide/checkpoint#loading_mechanics), while + `tf.compat.v1.init_from_checkpoint` relies on a more-fragile variable-name + based method of checkpointing. There is no object-based equivalent of + `init_from_checkpoint` in TF2. + + Please re-write your checkpoints immediately using the object-based APIs, + see [migration guide] + (https://www.tensorflow.org/guide/migrate#checkpoint_compatibility) for more + details. + + You can load a name-based checkpoint written by `tf.compat.v1.train.Saver` + using `tf.train.Checkpoint.restore` or `tf.keras.Model.load_weights`. However, + you may have to change the names of the variables in your model to match the + variable names in the name-based checkpoint, which can be viewed with + `tf.train.list_variables(path)`. + + Another option is to create an `assignment_map` that maps the name of the + variables in the name-based checkpoint to the variables in your model, eg: + ``` + { + 'sequential/dense/bias': model.variables[0], + 'sequential/dense/kernel': model.variables[1] + } + ``` + and use `tf.compat.v1.train.init_from_checkpoint(path, assignment_map)` to + restore the name-based checkpoint. + + After restoring, re-encode your checkpoint using `tf.train.Checkpoint.save` + or `tf.keras.Model.save_weights`. + + @end_compatibility + Values are not loaded immediately, but when the initializer is run (typically by running a `tf.compat.v1.global_variables_initializer` op). ",0,train 2145b44339642796dc382153d26b434c2cc18559,tensorflow/tensorflow,"Fix two bugs int the Graph editor; - compute_boundary_ts was sometimes adding spurious inputs - sgv.consumers was returning op inside the subgraph Change: 142645987",select.py,"@@ -272,7 +272,7 @@ def get_ops_ios(ops, control_inputs=False, control_outputs=None, return res -def compute_boundary_ts(ops, ambiguous_ts_are_outputs=True): +def compute_boundary_ts(ops): """"""Compute the tensors at the boundary of a set of ops. This function looks at all the tensors connected to the given ops (in/out) @@ -281,17 +281,18 @@ def compute_boundary_ts(ops, ambiguous_ts_are_outputs=True): 2) output tensors: tensors whose consumer operations are not in ops 3) inside tensors: tensors which are neither input nor output tensors. + Note that a tensor can be both an inside tensor and an output tensor if it is + consumed by operations both outside and inside of `ops`. + Args: ops: an object convertible to a list of tf.Operation. - ambiguous_ts_are_outputs: a tensor can have consumers both inside and - outside ops. Such tensors are treated as outside tensor if - ambiguous_ts_are_outputs is True, otherwise they are treated as - inside tensor. Returns: A tuple `(outside_input_ts, outside_output_ts, inside_ts)` where: `outside_input_ts` is a Python list of input tensors; `outside_output_ts` is a python list of output tensors; `inside_ts` is a python list of inside tensors. + Since a tensor can be both an inside tensor and an output tensor, + `outside_output_ts` and `inside_ts` might intersect. Raises: TypeError: if ops cannot be converted to a list of tf.Operation. """""" @@ -301,22 +302,25 @@ def compute_boundary_ts(ops, ambiguous_ts_are_outputs=True): output_ts_set = frozenset(output_ts) ops_set = frozenset(ops) - # fill in inside + # Compute inside tensors. inside_ts = [] + only_inside_ts = [] for t in input_ts: - # is also output? + # Skip if the input tensor is not also an output tensor. if t not in output_ts_set: continue - # is ambiguous_ts_are_outputs is True, don't add to inside if ambiguous - if ambiguous_ts_are_outputs: - consumers = frozenset(t.consumers()) - if consumers - ops_set: - continue + # Mark as ""inside"". inside_ts.append(t) + # Mark as ""only inside"" if the tensor is not both inside and output. + consumers = frozenset(t.consumers()) + if consumers - ops_set: + continue + only_inside_ts.append(t) inside_ts_set = frozenset(inside_ts) + only_inside_ts_set = frozenset(only_inside_ts) + outside_output_ts = [t for t in output_ts if t not in only_inside_ts_set] outside_input_ts = [t for t in input_ts if t not in inside_ts_set] - outside_output_ts = [t for t in output_ts if t not in inside_ts_set] return outside_input_ts, outside_output_ts, inside_ts ",0,train 2145b44339642796dc382153d26b434c2cc18559,tensorflow/tensorflow,"Fix two bugs int the Graph editor; - compute_boundary_ts was sometimes adding spurious inputs - sgv.consumers was returning op inside the subgraph Change: 142645987",subgraph.py,"@@ -561,10 +561,19 @@ class SubGraphView(object): return subgraph_id def consumers(self): - """"""Return a Python set of all the consumers of this subgraph view."""""" + """"""Return a Python set of all the consumers of this subgraph view. + + A consumer of a subgraph view is a tf.Operation which is a consumer + of one of the output tensors and is not in the subgraph. + + Returns: + A list of `tf.Operation` which are the consumers of this subgraph view. + """""" + ops_set = frozenset(self._ops) res = [] for output in self._output_ts: - util.concatenate_unique(res, output.consumers()) + consumers = [op for op in output.consumers() if op not in ops_set] + util.concatenate_unique(res, consumers) return res ",0,train 2145b44339642796dc382153d26b434c2cc18559,tensorflow/tensorflow,"Fix two bugs int the Graph editor; - compute_boundary_ts was sometimes adding spurious inputs - sgv.consumers was returning op inside the subgraph Change: 142645987",edit_test.py,"@@ -49,10 +49,10 @@ class EditTest(tf.test.TestCase): """"""Test for ge.detach."""""" sgv = ge.sgv(self.c.op, self.a.op) control_outputs = ge.util.ControlOutputs(self.graph) - ge.detach(sgv, control_inputs=control_outputs) + ge.detach(sgv, control_ios=control_outputs) # make sure the detached graph is as expected. self.assertTrue(ge.matcher(""^foo/c$"") - .input_ops(""geph__a_0"", ""geph__b_0"")(self.c.op)) + .input_ops(""a"", ""geph__b_0"")(self.c.op)) def test_connect(self): """"""Test for ge.connect."""""" ",0,train 2145b44339642796dc382153d26b434c2cc18559,tensorflow/tensorflow,"Fix two bugs int the Graph editor; - compute_boundary_ts was sometimes adding spurious inputs - sgv.consumers was returning op inside the subgraph Change: 142645987",select_test.py,"@@ -101,6 +101,19 @@ class SelectTest(tf.test.TestCase): self.assertEqual(list(output_ts), [self.h]) self.assertEqual(list(inside_ts), [self.g]) + def test_compute_boundary_ts_2(self): + """"""Test for ge.select.compute_boundary_ts."""""" + graph = tf.Graph() + with graph.as_default(): + a = tf.constant(1, name=""a"") + b = tf.constant(1, name=""b"") + c = tf.add(a, b, name=""c"") + _ = a + c + input_ts, output_ts, inside_ts = ge.select.compute_boundary_ts([a.op, c.op]) + self.assertEqual(list(input_ts), [b]) + self.assertEqual(list(output_ts), [a, c]) + self.assertEqual(list(inside_ts), [a]) + def test_get_within_boundary_ops_0(self): """"""Test for test_get_within_boundary_ops."""""" control_outputs = ge.util.ControlOutputs(self.graph) ",0,train 6f6cfdc99a2156bcd67441d46b71dcf7d98b5c14,tensorflow/tensorflow,"Add _cache_size for testing and debugging purposes. PiperOrigin-RevId: 387650902 Change-Id: Icec03f1cc4a0f07cee6d0d939684811b558395f1",pmap_lib.cc,"@@ -358,6 +358,8 @@ void BuildPmapSubmodule(pybind11::module& m) { ""PmapFunction""); cfun.def(""__call__"", &PmapFunction::Call); cfun.def_property_readonly(""__signature__"", &PmapFunction::PythonSignature); + // All private members are only for testing/debugging purposes + cfun.def(""_cache_size"", &PmapFunction::cache_size); pmap_lib.def( ""pmap"", ",0,train 1838163152217eac4d8cb9bf960beac29f38b969,tensorflow/tensorflow,"Fix comments, return non-gradient tests for real inputs",eig_op_test.py,"@@ -150,7 +150,7 @@ def _GetEigTest(dtype_, shape_, compute_v_): np_dtype = dtype_.as_numpy_dtype def RandomInput(): - # most of matrices are diagonalizable # TODO + # Most matrices are diagonalizable a = np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) if dtype_.is_complex: @@ -202,7 +202,7 @@ def _GetEigGradTest(dtype_, shape_, compute_v_): np_dtype = dtype_.as_numpy_dtype def RandomInput(): - # most of matrices are diagonalizable # TODO + # Most matrices are diagonalizable a = np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) if dtype_.is_complex: @@ -264,6 +264,10 @@ if __name__ == ""__main__"": shape = batch_dims + (size, size) name = ""%s_%s_%s"" % (dtype.name, ""_"".join(map(str, shape)), compute_v) _AddTest(EigTest, ""Eig"", name, _GetEigTest(dtype, shape, compute_v)) - _AddTest(EigGradTest, ""EigGrad"", name, - _GetEigGradTest(dtype, shape, compute_v)) + + # TODO: gradient_check gets wrong numeric output for real inputs + # (might be connected with the fact that outputs are complex) + if dtype not in [dtypes_lib.float32, dtypes_lib.float64]: + _AddTest(EigGradTest, ""EigGrad"", name, + _GetEigGradTest(dtype, shape, compute_v)) test.main() ",0,train 1838163152217eac4d8cb9bf960beac29f38b969,tensorflow/tensorflow,"Fix comments, return non-gradient tests for real inputs",linalg_grad.py,"@@ -635,7 +635,7 @@ def _MatrixTriangularSolveGrad(op, grad): # To avoid nan in cases with degenerate eigenvalues or # degenerate/zero singular values in calculations of -# f and s_inv_mat, we introduce a Lorentz brodening. +# f and s_inv_mat, we introduce a Lorentz broadening. def _SafeReciprocal(x, epsilon=1E-20): return x * math_ops.reciprocal(x * x + epsilon) ",0,train 8e29dc771442e8ca9df0c37277080d11599f1043,tensorflow/tensorflow,"Sort control edges on input considering src node Else the order of control edges could change from run to run. The order of the control edges shouldn't matter, but providing consistent input order for given graph makes changes easier to identify. PiperOrigin-RevId: 310024796 Change-Id: I45c6e0801093b2037e5950cca6210df32625038b",import_model.cc,"@@ -1817,6 +1817,8 @@ Status ImporterBase::ConvertNode(const Node& node) { absl::c_stable_sort(in_edges, [](const Edge* e1, const Edge* e2) { if (e1->IsControlEdge() && !e2->IsControlEdge()) return false; if (!e1->IsControlEdge() && e2->IsControlEdge()) return true; + if (e1->IsControlEdge() && e2->IsControlEdge()) + return e1->src()->id() < e2->src()->id(); return e1->dst_input() < e2->dst_input(); }); ",0,train de7b004fc72b30ec55011173b76f63a7e0e93279,tensorflow/tensorflow,"Handle rank 1 broadcasts in unranked kernel lowering. Previously this started at rank 2 after checking for scalars and equal shapes. This resulted in cases such as <1xf32> + <2xf32> being treated as impossible. PiperOrigin-RevId: 341043965 Change-Id: Id8539c58795bb50c4dda6e7c13a47040cfec96b4",transform_unranked_hlo.cc,"@@ -386,14 +386,14 @@ struct ConvertUnrankedDynamicBroadcastBinaryOp rewriter.create(loc, greater_rank_lhs, lhs_rank, rhs_rank); // Generate a list of nested if/else statements to handle rank - // specializations from 2-6. + // specializations from 1-6. scf::IfOp if_op = createRankSpecializedBroadcastAndOp(rewriter, op, lhs, - rhs, greater_rank, 2); + rhs, greater_rank, 1); // Put each subsequent rank specialization inside the else statement of the // previous one. OpBuilder else_builder = if_op.getElseBodyBuilder(rewriter.getListener()); - for (int i = 3; i < max_rank_specialization; i++) { + for (int i = 2; i < max_rank_specialization; i++) { auto inner_if = createRankSpecializedBroadcastAndOp(else_builder, op, lhs, rhs, greater_rank, i); ",0,train 417dd3793a648406feb8668cad2e341fe979c391,tensorflow/tensorflow,"[XLA] Fix opensource build breakage caused by undefined int64. Include xla/types.h in versioned_computation_handle. int64 was previously not defined in this file in the opensource build. Change: 148357352",versioned_computation_handle.h,"@@ -18,8 +18,8 @@ limitations under the License. #include +#include ""tensorflow/compiler/xla/types.h"" #include ""tensorflow/compiler/xla/xla_data.pb.h"" -#include ""tensorflow/core/platform/types.h"" namespace xla { ",0,train d3cc268d017ca22a2259befa521894cb8e3ed002,tensorflow/tensorflow,Added example related to top_k parameter in tf.keras.metrics.Precision,metrics.py,"@@ -1191,6 +1191,17 @@ class Precision(Metric): >>> m.result().numpy() 1.0 + >>> # With top_k=2, it will calculate precision over y_true[:2] and y_pred[:2] + >>> m = tf.keras.metrics.Precision(top_k=2) + >>> _ = m.update_state([0, 0, 1, 1], [1, 1, 1, 1]) + >>> m.result().numpy() + 0.0 + >>> # With top_k=2, it will calculate precision over y_true[:4] and y_pred[:4] + >>> m = tf.keras.metrics.Precision(top_k=4) + >>> _ = m.update_state([0, 0, 1, 1], [1, 1, 1, 1]) + >>> m.result().numpy() + 0.5 + Usage with tf.keras API: ```python ",0,train e52ac76b773693d5d289205162ca43ee23561251,tensorflow/tensorflow,support IndexedSlices in `add_n`,math_ops.py,"@@ -2105,7 +2105,8 @@ def add_n(inputs, name=None): """"""Adds all input tensors element-wise. Args: - inputs: A list of `Tensor` objects, each with same shape and type. + inputs: A list of `Tensor` or `IndexedSlices` objects, each with same shape + and type. name: A name for the operation (optional). Returns: @@ -2116,17 +2117,21 @@ def add_n(inputs, name=None): cannot be inferred. """""" if not inputs or not isinstance(inputs, (list, tuple)): - raise ValueError(""inputs must be a list of at least one Tensor with the "" - ""same dtype and shape"") + raise ValueError(""inputs must be a list of at least one Tensor/IndexedSlices"" + ""with the same dtype and shape"") inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs) - if not all(isinstance(x, ops.Tensor) for x in inputs): - raise ValueError(""inputs must be a list of at least one Tensor with the "" - ""same dtype and shape"") + if not all(isinstance(x, (ops.Tensor, ops.IndexedSlices)) for x in inputs): + raise ValueError(""inputs must be a list of at least one Tensor/IndexedSlices"" + ""with the same dtype and shape"") if len(inputs) == 1: + if isinstance(inputs[0], ops.IndexedSlices): + values = inputs[0].values + else: + values = inputs[0] if name: - return array_ops.identity(inputs[0], name=name) - return inputs[0] + return array_ops.identity(values, name=name) + return values return gen_math_ops.add_n(inputs, name=name) ",0,test d1b08cf5159bba6033df87f93f27778c2b94e14a,tensorflow/tensorflow,"NFC: Move the Type::is* predicates to StandardTypes.cpp These methods are currently defined 'inline' in StandardTypes.h, but this may create linker errors if StandardTypes.h isn't included at the use site. PiperOrigin-RevId: 263850328",StandardTypes.h,"@@ -71,13 +71,6 @@ enum Kind { } // namespace StandardTypes -inline bool Type::isBF16() { return getKind() == StandardTypes::BF16; } -inline bool Type::isF16() { return getKind() == StandardTypes::F16; } -inline bool Type::isF32() { return getKind() == StandardTypes::F32; } -inline bool Type::isF64() { return getKind() == StandardTypes::F64; } - -inline bool Type::isIndex() { return getKind() == StandardTypes::Index; } - /// Index is a special integer-like type with unknown platform-dependent bit /// width. class IndexType : public Type::TypeBase { @@ -123,25 +116,6 @@ public: static constexpr unsigned kMaxWidth = 4096; }; -/// Return true if this is an integer type with the specified width. -inline bool Type::isInteger(unsigned width) { - if (auto intTy = dyn_cast()) - return intTy.getWidth() == width; - return false; -} - -inline bool Type::isIntOrIndex() { - return isa() || isa(); -} - -inline bool Type::isIntOrIndexOrFloat() { - return isa() || isa() || isa(); -} - -inline bool Type::isIntOrFloat() { - return isa() || isa(); -} - class FloatType : public Type::TypeBase { public: using Base::Base; ",0,train e9deb127980812d2925d701c919f094c977b359f,tensorflow/tensorflow,"Expose tf.summary.record_if(condition) context manager in TF 2.0 This generalizes the TF 1.x contrib summary APIs always_record_summaries(), never_record_summaries(), and record_summaries_every_n_global_steps(). The new context manager accepts a ""condition"" that can be a constant boolean, a boolean tensor value, or a callable returning such. PiperOrigin-RevId: 233823923",context.py,"@@ -141,8 +141,8 @@ class _EagerContext(threading.local): self.mode = default_execution_mode self.is_eager = default_execution_mode == EAGER_MODE self.scope_name = """" - self.recording_summaries = False self.summary_writer_resource = None + self.recording_summaries = None self.scalar_cache = {} self._ones_rank_cache = None self._zeros_cache = None @@ -520,6 +520,16 @@ class Context(object): """"""Sets summary writer resource."""""" self._eager_context.summary_writer_resource = resource + @property + def recording_summaries(self): + """"""Returns summary recording condition."""""" + return self._eager_context.recording_summaries + + @recording_summaries.setter + def recording_summaries(self, condition): + """"""Sets summary recording condition."""""" + self._eager_context.recording_summaries = condition + @property def device_name(self): """"""Returns the device name for the current thread."""""" ",0,train e9deb127980812d2925d701c919f094c977b359f,tensorflow/tensorflow,"Expose tf.summary.record_if(condition) context manager in TF 2.0 This generalizes the TF 1.x contrib summary APIs always_record_summaries(), never_record_summaries(), and record_summaries_every_n_global_steps(). The new context manager accepts a ""condition"" that can be a constant boolean, a boolean tensor value, or a callable returning such. PiperOrigin-RevId: 233823923",summary_ops_v2.py,"@@ -45,11 +45,6 @@ from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -# Dictionary mapping graph keys to a boolean Tensor (or callable returning -# a boolean Tensor) indicating whether we should record summaries for the -# graph identified by the key of the dictionary. -_SHOULD_RECORD_SUMMARIES = {} - # A global dictionary mapping graph keys to a list of summary writer init ops. _SUMMARY_WRITER_INIT_OP = {} @@ -61,10 +56,8 @@ _USER_NAME_PATTERNS = re.compile(r""^[a-z]([-a-z0-9]{0,29}[a-z0-9])?$"", re.I) def _should_record_summaries_internal(): """"""Returns boolean Tensor if summaries should/shouldn't be recorded, or None. """""" - global _SHOULD_RECORD_SUMMARIES - key = ops.get_default_graph()._graph_key # pylint: disable=protected-access - should = _SHOULD_RECORD_SUMMARIES.get(key) - return should() if callable(should) else should + condition = context.context().recording_summaries + return condition() if callable(condition) else condition def _should_record_summaries_v2(): @@ -83,32 +76,28 @@ def should_record_summaries(): return False if result is None else result +@tf_export(""summary.record_if"", v1=[]) @tf_contextlib.contextmanager -def _record_summaries(boolean=True): +def record_if(condition): """"""Sets summary recording on or off per the provided boolean value. The provided value can be a python boolean, a scalar boolean Tensor, or or a callable providing such a value; if a callable is passed it will be - invoked each time should_record_summaries() is called to determine whether - summary writing should be enabled. + invoked on-demand to determine whether summary writing will occur. Args: - boolean: can be True, False, a bool Tensor, or a callable providing such. - Defaults to True. + condition: can be True, False, a bool Tensor, or a callable providing such. Yields: Returns a context manager that sets this value on enter and restores the previous value on exit. """""" - # TODO(nickfelt): make this threadlocal - global _SHOULD_RECORD_SUMMARIES - key = ops.get_default_graph()._graph_key # pylint: disable=protected-access - old = _SHOULD_RECORD_SUMMARIES.setdefault(key, None) + old = context.context().recording_summaries try: - _SHOULD_RECORD_SUMMARIES[key] = boolean + context.context().recording_summaries = condition yield finally: - _SHOULD_RECORD_SUMMARIES[key] = old + context.context().recording_summaries = old # TODO(apassos) consider how to handle local step here. @@ -120,17 +109,17 @@ def record_summaries_every_n_global_steps(n, global_step=None): should = lambda: math_ops.equal(global_step % n, 0) if not context.executing_eagerly(): should = should() - return _record_summaries(should) + return record_if(should) def always_record_summaries(): """"""Sets the should_record_summaries Tensor to always true."""""" - return _record_summaries(True) + return record_if(True) def never_record_summaries(): """"""Sets the should_record_summaries Tensor to always false."""""" - return _record_summaries(False) + return record_if(False) @tf_export(""summary.SummaryWriter"", v1=[]) ",0,train 491ea166528922468d5b5b7b826f42df44e3b88f,tensorflow/tensorflow,"[XLA] Do not reserve large hash maps when there are many small computations. PiperOrigin-RevId: 380641027 Change-Id: I51ae2adcaefbc94a139eac4a5cafb5df724cb280",hlo_instruction.cc,"@@ -3470,12 +3470,7 @@ template static Status PostOrderDFS(HloInstruction* root, Visitor* visitor, const InternalCompareFunction* operand_order, bool ignore_control_predecessors) { - // Calculating the instruction count within a module can be expensive on large - // models so only do it if the visit state is empty. This will help when the - // same visitor is reused across many computations of a single module. - if (visitor->VisitStateCapacity() == 0) { - visitor->ReserveVisitStates(root->GetModule()->instruction_count()); - } + visitor->ReserveVisitStates(root->parent()->instruction_count()); // dfs_stack holds pairs of unique_id(), HloInstruction*>. // ",0,test 5bc685d7f16b0fc27b936e63fa01668e4af4034c,tensorflow/tensorflow,"[XLA] If an op has a single ""large"" operand, we want to fuse this op into some of its consumers, even if we can't fuse into all of them. PiperOrigin-RevId: 157779106",instruction_fusion.cc,"@@ -151,7 +151,26 @@ StatusOr InstructionFusion::Run(HloModule* module) { return true; }; - if (std::all_of(hlo->users().begin(), hlo->users().end(), + // An ""effectively unary"" operation is one that has one ""large"" + // input with the others being negligible in terms of memory usage. + // We use ""has a smaller true rank than the output"" as a heuristic + // for ""negligible"" memory usage. + auto effectively_unary = [](HloInstruction* hlo) { + if (hlo->operands().size() == 1) { + return true; + } + auto output_rank = ShapeUtil::TrueRank(hlo->shape()); + return std::count_if( + hlo->operands().begin(), hlo->operands().end(), + [output_rank](HloInstruction* operand) { + return ((operand->opcode() != HloOpcode::kBroadcast) && + ShapeUtil::TrueRank(operand->shape()) >= + output_rank); + }) <= 1; + }; + + if (effectively_unary(hlo) || + std::all_of(hlo->users().begin(), hlo->users().end(), user_fusable_into_hlo)) { all_consumers_fusable.insert(hlo); } ",0,test 5bc685d7f16b0fc27b936e63fa01668e4af4034c,tensorflow/tensorflow,"[XLA] If an op has a single ""large"" operand, we want to fuse this op into some of its consumers, even if we can't fuse into all of them. PiperOrigin-RevId: 157779106",instruction_fusion_test.cc,"@@ -156,18 +156,64 @@ TEST_F(InstructionFusionTest, PotentialBitcastTransposeOfParameterUnfused) { TEST_F(InstructionFusionTest, AvoidDuplicationIfNotAllFusable) { HloComputation::Builder builder(TestName()); - auto param0 = builder.AddInstruction(HloInstruction::CreateParameter( - 0, ShapeUtil::MakeShape(F32, {16, 16}), ""0"")); - HloInstruction* unary1 = builder.AddInstruction(HloInstruction::CreateUnary( - ShapeUtil::MakeShape(S32, {}), HloOpcode::kFloor, param0)); + auto shape = ShapeUtil::MakeShape(F32, {16, 16}); + auto param0 = + builder.AddInstruction(HloInstruction::CreateParameter(0, shape, ""0"")); + auto param1 = + builder.AddInstruction(HloInstruction::CreateParameter(1, shape, ""1"")); + HloInstruction* binary1 = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1)); + builder.AddInstruction(HloInstruction::CreateSend(binary1, 0)); + HloInstruction* unary = builder.AddInstruction( + HloInstruction::CreateUnary(shape, HloOpcode::kAbs, binary1)); + + auto module = MakeUnique(TestName()); + auto computation = module->AddEntryComputation(builder.Build()); + EXPECT_EQ(unary, computation->root_instruction()); + EXPECT_FALSE( + InstructionFusion(InstructionFusion::IsExpensive, /*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()); +} + +TEST_F(InstructionFusionTest, AllowUnaryDuplication) { + HloComputation::Builder builder(TestName()); + auto shape = ShapeUtil::MakeShape(F32, {16, 16}); + auto param0 = + builder.AddInstruction(HloInstruction::CreateParameter(0, shape, ""0"")); + HloInstruction* unary1 = builder.AddInstruction( + HloInstruction::CreateUnary(shape, HloOpcode::kFloor, param0)); builder.AddInstruction(HloInstruction::CreateSend(unary1, 0)); - HloInstruction* unary2 = builder.AddInstruction(HloInstruction::CreateUnary( - ShapeUtil::MakeShape(S32, {}), HloOpcode::kAbs, unary1)); + HloInstruction* unary2 = builder.AddInstruction( + HloInstruction::CreateUnary(shape, HloOpcode::kAbs, unary1)); auto module = MakeUnique(TestName()); auto computation = module->AddEntryComputation(builder.Build()); EXPECT_EQ(unary2, computation->root_instruction()); - EXPECT_FALSE( + EXPECT_TRUE( + InstructionFusion(InstructionFusion::IsExpensive, /*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()); +} + +TEST_F(InstructionFusionTest, AllowEffectiveUnaryDuplication) { + auto shape = ShapeUtil::MakeShape(F32, {16, 16}); + auto small_shape = ShapeUtil::MakeShape(F32, {16}); + HloComputation::Builder builder(TestName()); + auto param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, small_shape, ""0"")); + auto param1 = + builder.AddInstruction(HloInstruction::CreateParameter(1, shape, ""1"")); + HloInstruction* binary1 = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1)); + builder.AddInstruction(HloInstruction::CreateSend(binary1, 0)); + HloInstruction* unary = builder.AddInstruction( + HloInstruction::CreateUnary(shape, HloOpcode::kAbs, binary1)); + + auto module = MakeUnique(TestName()); + auto computation = module->AddEntryComputation(builder.Build()); + EXPECT_EQ(unary, computation->root_instruction()); + EXPECT_TRUE( InstructionFusion(InstructionFusion::IsExpensive, /*may_duplicate=*/true) .Run(module.get()) .ValueOrDie()); ",0,test d1198909057c635de2bae3e1c4fb8505466ee325,tensorflow/tensorflow,"Naming for valid scope name in eager mode. PiperOrigin-RevId: 223392695",data_flow_ops.py,"@@ -171,7 +171,10 @@ class QueueBase(object): self._names = None self._queue_ref = queue_ref if context.executing_eagerly(): - self._name = context.context().scope_name + if context.context().scope_name: + self._name = context.context().scope_name + else: + self._name = ""Empty"" self._resource_deleter = resource_variable_ops.EagerResourceDeleter( queue_ref, None) else: ",0,train df94f63281a40acdd695a9a28862905af976adfd,tensorflow/tensorflow,"Minor: fix typo in Defun doc. Change: 124731079",function.py,"@@ -428,7 +428,7 @@ class Defun(object): argument of the function to decorate, with the expected type of the argument as value. - For example if the function to decorate accepts to `tf.float32` arguments + For example if the function to decorate accepts two `tf.float32` arguments named `x` and `y`, call the decorator with: @Defun(tf.float32, tf.float32) ",0,train 0ae47a7f3a3cbfb47a432741d525bafc50c6b68a,tensorflow/tensorflow,"Fix unit test for matrix square root: Don't try to take the matrix square root of a matrix for which the square root may not exist. PiperOrigin-RevId: 223078107",matrix_square_root_op_test.py,"@@ -102,13 +102,13 @@ class SquareRootOpTest(test.TestCase): self.evaluate(gen_linalg_ops.matrix_square_root(tensor)) def testConcurrentExecutesWithoutError(self): - self.skipTest(""Triggers assert in matrix_sqrt_quasi_triangular_diagonal"") - with test_util.use_gpu(): matrix1 = random_ops.random_normal([5, 5], seed=42) matrix2 = random_ops.random_normal([5, 5], seed=42) - sqrt1 = gen_linalg_ops.matrix_square_root(matrix1) - sqrt2 = gen_linalg_ops.matrix_square_root(matrix2) + square1 = math_ops.matmul(matrix1, matrix1) + square2 = math_ops.matmul(matrix2, matrix2) + sqrt1 = gen_linalg_ops.matrix_square_root(square1) + sqrt2 = gen_linalg_ops.matrix_square_root(square2) all_ops = [sqrt1, sqrt2] sqrt = self.evaluate(all_ops) self.assertAllEqual(sqrt[0], sqrt[1]) ",0,train bb0190f6c26bf11f601102dfe2166a68a7833020,tensorflow/tensorflow,"Added support for fp16 to the softmax operation Change: 123382264",softmax_op.cc,"@@ -40,6 +40,9 @@ struct SoftmaxFunctor { }; } // namespace functor +REGISTER_KERNEL_BUILDER( + Name(""Softmax"").Device(DEVICE_CPU).TypeConstraint(""T""), + SoftmaxOp); REGISTER_KERNEL_BUILDER(Name(""Softmax"") .Device(DEVICE_CPU) .TypeConstraint(""T""), @@ -48,24 +51,30 @@ REGISTER_KERNEL_BUILDER(Name(""Softmax"") .Device(DEVICE_CPU) .TypeConstraint(""T""), SoftmaxOp); -REGISTER_KERNEL_BUILDER(Name(""LogSoftmax"") - .Device(DEVICE_CPU) - .TypeConstraint(""T""), - SoftmaxOp); +REGISTER_KERNEL_BUILDER( + Name(""LogSoftmax"").Device(DEVICE_CPU).TypeConstraint(""T""), + SoftmaxOp); +REGISTER_KERNEL_BUILDER( + Name(""LogSoftmax"").Device(DEVICE_CPU).TypeConstraint(""T""), + SoftmaxOp); REGISTER_KERNEL_BUILDER(Name(""LogSoftmax"") .Device(DEVICE_CPU) .TypeConstraint(""T""), SoftmaxOp); #if GOOGLE_CUDA -REGISTER_KERNEL_BUILDER(Name(""Softmax"") - .Device(DEVICE_GPU) - .TypeConstraint(""T""), - SoftmaxOp); -REGISTER_KERNEL_BUILDER(Name(""LogSoftmax"") - .Device(DEVICE_GPU) - .TypeConstraint(""T""), - SoftmaxOp); +REGISTER_KERNEL_BUILDER( + Name(""Softmax"").Device(DEVICE_GPU).TypeConstraint(""T""), + SoftmaxOp); +REGISTER_KERNEL_BUILDER( + Name(""Softmax"").Device(DEVICE_GPU).TypeConstraint(""T""), + SoftmaxOp); +REGISTER_KERNEL_BUILDER( + Name(""LogSoftmax"").Device(DEVICE_GPU).TypeConstraint(""T""), + SoftmaxOp); +REGISTER_KERNEL_BUILDER( + Name(""LogSoftmax"").Device(DEVICE_GPU).TypeConstraint(""T""), + SoftmaxOp); #endif // GOOGLE_CUDA } // namespace tensorflow ",0,test bb0190f6c26bf11f601102dfe2166a68a7833020,tensorflow/tensorflow,"Added support for fp16 to the softmax operation Change: 123382264",softmax_op_gpu.cu.cc,"@@ -39,6 +39,7 @@ struct SoftmaxFunctor { } // end namespace functor // Instantiate the GPU implementation for float. +template struct functor::SoftmaxFunctor; template struct functor::SoftmaxFunctor; } // end namespace tensorflow ",0,test bb0190f6c26bf11f601102dfe2166a68a7833020,tensorflow/tensorflow,"Added support for fp16 to the softmax operation Change: 123382264",nn_ops.cc,"@@ -856,7 +856,7 @@ backprops: The gradients: `gradients / (1 + abs(-features)) ** 2`. REGISTER_OP(""Softmax"") .Input(""logits: T"") .Output(""softmax: T"") - .Attr(""T: {float, double}"") + .Attr(""T: {half, float, double}"") .Doc(R""doc( Computes softmax activations. @@ -873,7 +873,7 @@ softmax: Same shape as `logits`. REGISTER_OP(""LogSoftmax"") .Input(""logits: T"") .Output(""logsoftmax: T"") - .Attr(""T: {float, double}"") + .Attr(""T: {half, float, double}"") .Doc(R""doc( Computes log softmax activations. ",0,test bb0190f6c26bf11f601102dfe2166a68a7833020,tensorflow/tensorflow,"Added support for fp16 to the softmax operation Change: 123382264",softmax_op_test.py,"@@ -50,13 +50,13 @@ class SoftmaxTest(tf.test.TestCase): else: tf_softmax = tf.nn.softmax(np_features, name=name) out = tf_softmax.eval() - self.assertAllClose(np_softmax, out) + self.assertAllCloseAccordingToType(np_softmax, out) self.assertShapeEqual(np_softmax, tf_softmax) if not log: # Bonus check: the softmaxes should add to one in each # batch element. - self.assertAllClose(np.ones(out.shape[0]), - np.sum(out, axis=1)) + self.assertAllCloseAccordingToType(np.ones(out.shape[0]), + np.sum(out, axis=1)) def _testAll(self, features): self._testSoftmax(features, use_gpu=False) @@ -118,6 +118,10 @@ class SoftmaxTest(tf.test.TestCase): self._testAll( np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float32)) + def testHalf(self): + self._testAll( + np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float16)) + def testDouble(self): self._testSoftmax( np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float64), ",0,test 667f63f9a473150caef5ba9c2445dfbd4d3cf3e2,tensorflow/tensorflow,"[tf:tfrt] Check returned memrefs/tensors alignment PiperOrigin-RevId: 410496254 Change-Id: Ie707a0be520aeb725a5cfc48bef59dae2595941c",tf_cpurt.h,"@@ -142,10 +142,11 @@ struct ConvertTensor { // Incorrect alignment will lead to a segfault in the downstream Tensorflow // kernels, check it before returning to the runtime. + // TODO(ezhulenev): Downgrade CHECKs to DCHECKs. if (internal::IsStaticStorageDuration(memref)) { - DCHECK(tensor.IsAligned()) << ""global memref is not aligned""; + CHECK(tensor.IsAligned()) << ""global memref is not aligned""; } else { - DCHECK(tensor.IsAligned()) << ""allocated memref is not aligned""; + CHECK(tensor.IsAligned()) << ""allocated memref is not aligned""; } return tensor; ",0,test 5574d6041a5a5d91c4be3449d7a456a146da4c0e,tensorflow/tensorflow,"Enrich update ops from inputs PiperOrigin-RevId: 204223077",training.py,"@@ -599,7 +599,7 @@ class Model(Network): # Unconditional updates updates += self.get_updates_for(None) # Conditional updates relevant to this model - updates += self.get_updates_for(self._feed_inputs) + updates += self.get_updates_for(self.inputs) # Stateful metrics updates updates += self.metrics_updates # Gets loss and metrics. Updates weights at each call. ",0,train 5574d6041a5a5d91c4be3449d7a456a146da4c0e,tensorflow/tensorflow,"Enrich update ops from inputs PiperOrigin-RevId: 204223077",models_test.py,"@@ -37,6 +37,7 @@ class TestModelCloning(test.TestCase): model = keras.models.Sequential() model.add(keras.layers.Dense(4, input_shape=(4,))) + model.add(keras.layers.BatchNormalization()) model.add(keras.layers.Dropout(0.5)) model.add(keras.layers.Dense(4)) @@ -46,6 +47,8 @@ class TestModelCloning(test.TestCase): with self.test_session(): # With placeholder creation new_model = keras.models.clone_model(model) + # update ops from batch norm needs to be included + self.assertEquals(len(new_model.get_updates_for(new_model.inputs)), 2) new_model.compile('rmsprop', 'mse') new_model.train_on_batch(val_a, val_out) @@ -53,6 +56,7 @@ class TestModelCloning(test.TestCase): input_a = keras.Input(shape=(4,)) new_model = keras.models.clone_model( model, input_tensors=input_a) + self.assertEquals(len(new_model.get_updates_for(new_model.inputs)), 2) new_model.compile('rmsprop', 'mse') new_model.train_on_batch(val_a, val_out) @@ -60,6 +64,7 @@ class TestModelCloning(test.TestCase): input_a = keras.backend.variable(val_a) new_model = keras.models.clone_model( model, input_tensors=input_a) + self.assertEquals(len(new_model.get_updates_for(new_model.inputs)), 2) new_model.compile('rmsprop', 'mse') new_model.train_on_batch(None, val_out) @@ -76,6 +81,7 @@ class TestModelCloning(test.TestCase): x_a = dense_1(input_a) x_a = keras.layers.Dropout(0.5)(x_a) + x_a = keras.layers.BatchNormalization()(x_a) x_b = dense_1(input_b) x_a = dense_2(x_a) outputs = keras.layers.add([x_a, x_b]) @@ -87,6 +93,7 @@ class TestModelCloning(test.TestCase): with self.test_session(): # With placeholder creation new_model = keras.models.clone_model(model) + self.assertEquals(len(new_model.get_updates_for(new_model.inputs)), 2) new_model.compile('rmsprop', 'mse') new_model.train_on_batch([val_a, val_b], val_out) @@ -95,6 +102,7 @@ class TestModelCloning(test.TestCase): input_b = keras.Input(shape=(4,), name='b') new_model = keras.models.clone_model( model, input_tensors=[input_a, input_b]) + self.assertEquals(len(new_model.get_updates_for(new_model.inputs)), 2) new_model.compile('rmsprop', 'mse') new_model.train_on_batch([val_a, val_b], val_out) @@ -103,6 +111,7 @@ class TestModelCloning(test.TestCase): input_b = keras.backend.variable(val_b) new_model = keras.models.clone_model( model, input_tensors=[input_a, input_b]) + self.assertEquals(len(new_model.get_updates_for(new_model.inputs)), 2) new_model.compile('rmsprop', 'mse') new_model.train_on_batch(None, val_out) ",0,train f2e46bddc9639b643829778011111932e49b6241,tensorflow/tensorflow,"Propogate attrs from the forward function call to its corresponding backward function call. Change: 137480188",function_test.py,"@@ -166,6 +166,32 @@ class FunctionTest(tf.test.TestCase): self.assertEqual(x.get_shape(), dx.get_shape()) self.assertEqual(y.get_shape(), dy.get_shape()) + def testSymGradAttr(self): + @function.Defun(noinline=True) + def Foo(x): + return x * 2 + + g = tf.Graph() + with g.as_default(): + x = tf.constant(3.0) + y = Foo(x) + dx, = tf.gradients(y, [x]) + + self.assertTrue(y.op.node_def.attr[""_noinline""].b) + self.assertTrue(dx.op.node_def.attr['f'].func.attr['_noinline'].b) + + cfg = tf.ConfigProto(graph_options=tf.GraphOptions( + optimizer_options=tf.OptimizerOptions( + opt_level=tf.OptimizerOptions.L0, + do_common_subexpression_elimination=True, + do_function_inlining=True, + do_constant_folding=True))) + + with self.test_session(graph=g, config=cfg): + self.assertAllClose(y.eval(), 6.) + self.assertAllClose(dx.eval(), 2.) + + def testZNoDepOnY(self): @function.Defun(tf.float32, tf.float32) ",0,train f2e46bddc9639b643829778011111932e49b6241,tensorflow/tensorflow,"Propogate attrs from the forward function call to its corresponding backward function call. Change: 137480188",op_def_library.py,"@@ -695,7 +695,9 @@ class OpDefLibrary(object): attr_value.list.tensor.extend( [_MakeTensor(x, key) for x in value]) elif attr_def.type == ""func"": - if isinstance(value, compat.bytes_or_text_types): + if isinstance(value, attr_value_pb2.NameAttrList): + attr_value.func.CopyFrom(value) + elif isinstance(value, compat.bytes_or_text_types): attr_value.func.name = value else: value.add_to_graph(ops.get_default_graph()) ",0,train f2e46bddc9639b643829778011111932e49b6241,tensorflow/tensorflow,"Propogate attrs from the forward function call to its corresponding backward function call. Change: 137480188",gradients.py,"@@ -26,6 +26,7 @@ import numpy as np import six from six.moves import xrange # pylint: disable=redefined-builtin +from tensorflow.core.framework import attr_value_pb2 from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -89,10 +90,8 @@ def _IndexedSlicesToTensor(value, dtype=None, name=None, as_ref=False): warnings.warn( ""Converting sparse IndexedSlices to a dense Tensor of unknown shape. "" ""This may consume a large amount of memory."") - return math_ops.unsorted_segment_sum(value.values, - value.indices, - value.dense_shape[0], - name=name) + return math_ops.unsorted_segment_sum( + value.values, value.indices, value.dense_shape[0], name=name) ops.register_tensor_conversion_function(ops.IndexedSlices, @@ -224,8 +223,8 @@ def _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops): if grad_y is None: with _maybe_colocate_with(y.op, colocate_gradients_with_ops): grad_ys[i] = array_ops.fill( - array_ops.shape(y), - constant_op.constant(1, dtype=y.dtype)) + array_ops.shape(y), constant_op.constant( + 1, dtype=y.dtype)) else: if grad_y.dtype != y.dtype: raise ValueError(""Y and ys_grad must be of the same type, "" @@ -304,6 +303,20 @@ def _maybe_colocate_with(op, colocate_gradients_with_ops): yield +def _SymGrad(op, out_grads): + """"""Backprop through a function call node op given its outputs' gradients."""""" + f_in = [x for x in op.inputs] + out_grads + f_types = [x.dtype for x in op.inputs] + f = attr_value_pb2.NameAttrList() + f.name = op.type + for k in op.node_def.attr: + f.attr[k].CopyFrom(op.node_def.attr[k]) + # pylint: disable=protected-access + in_grads = functional_ops._symbolic_gradient(input=f_in, Tout=f_types, f=f) + # pylint: enable=protected-access + return in_grads + + def gradients(ys, xs, grad_ys=None, @@ -376,8 +389,8 @@ def gradients(ys, # to the xs. to_ops = [t.op for t in ys] from_ops = [t.op for t in xs] - pending_count, loop_state = _PendingCount(ops.get_default_graph(), - to_ops, from_ops, + pending_count, loop_state = _PendingCount(ops.get_default_graph(), to_ops, + from_ops, colocate_gradients_with_ops) # Iterate over the collected ops. @@ -451,8 +464,8 @@ def gradients(ys, # output, it means that the cost does not depend on output[i], # therefore dC/doutput[i] is 0. for i, out_grad in enumerate(out_grads): - if (not isinstance(out_grad, ops.Tensor) - and not out_grad) and _IsTrainable(op.outputs[i]): + if (not isinstance(out_grad, ops.Tensor) and + not out_grad) and _IsTrainable(op.outputs[i]): # Only floating-point outputs get a zero gradient. Gradient # functions should ignore the gradient for other outputs. if loop_state: @@ -466,16 +479,12 @@ def gradients(ys, if grad_fn: # If grad_fn was found, do not use SymbolicGradient even for # functions. - in_grads = _AsList(grad_fn(op, *out_grads)) + in_grads = grad_fn(op, *out_grads) else: # For function call ops, we add a 'SymbolicGradient' # node to the graph to compute gradients. - f_in = [x for x in op.inputs] + out_grads - f_types = [x.dtype for x in op.inputs] - # pylint: disable=protected-access - in_grads = _AsList(functional_ops._symbolic_gradient( - f_in, f_types, op.type)) - # pylint: enable=protected-access + in_grads = _SymGrad(op, out_grads) + in_grads = _AsList(in_grads) _VerifyGeneratedGradients(in_grads, op) if gate_gradients and len( [x for x in in_grads if x is not None]) > 1: @@ -595,8 +604,9 @@ def _HandleNestedIndexedSlices(grad): else: assert isinstance(grad.values, ops.IndexedSlices) g = _HandleNestedIndexedSlices(grad.values) - return ops.IndexedSlices( - g.values, array_ops.gather(grad.indices, g.indices), g.dense_shape) + return ops.IndexedSlices(g.values, + array_ops.gather(grad.indices, g.indices), + g.dense_shape) def _AccumulatorShape(inputs): @@ -610,6 +620,7 @@ def _AccumulatorShape(inputs): def _LogOpGradients(op, out_grads, in_grads): """"""Log the in and out grads of an op."""""" logging.vlog(1, ""Gradient for '"" + op.name + ""'"") + def _FilterGrad(x): if x is None: return False @@ -617,6 +628,7 @@ def _LogOpGradients(op, out_grads, in_grads): return bool(x) else: return True + logging.vlog(1, "" in --> %s"", "", "".join([x.name for x in out_grads if _FilterGrad(x)])) logging.vlog(1, "" out --> %s"", @@ -636,8 +648,10 @@ def _MultiDeviceAddN(tensor_list): # TODO(sjhwang): Create hierarchical aggregation tree as pbar's suggestion. # E.g., aggregate per GPU, then per task, and so on. summands = [] + def DeviceKey(dev): return """" if dev is None else dev + for dev in sorted(six.iterkeys(tensors_on_device), key=DeviceKey): tensors = tensors_on_device[dev] with ops.colocate_with(tensors[0].op, ignore_existing=True): @@ -689,11 +703,12 @@ def _AggregatedGrads(grads, op, loop_state, aggregation_method=None): """""" if aggregation_method is None: aggregation_method = AggregationMethod.DEFAULT - if aggregation_method not in [AggregationMethod.ADD_N, - AggregationMethod.EXPERIMENTAL_TREE, - AggregationMethod.EXPERIMENTAL_ACCUMULATE_N]: - raise ValueError( - ""Invalid aggregation_method specified %s."" % aggregation_method) + if aggregation_method not in [ + AggregationMethod.ADD_N, AggregationMethod.EXPERIMENTAL_TREE, + AggregationMethod.EXPERIMENTAL_ACCUMULATE_N + ]: + raise ValueError(""Invalid aggregation_method specified %s."" % + aggregation_method) out_grads = _GetGrads(grads, op) for i, out_grad in enumerate(out_grads): if loop_state: @@ -701,9 +716,10 @@ def _AggregatedGrads(grads, op, loop_state, aggregation_method=None): assert control_flow_ops.IsLoopSwitch(op) continue # Grads have to be Tensors or IndexedSlices - if (isinstance(out_grad, collections.Sequence) and - not all([isinstance(g, (ops.Tensor, ops.IndexedSlices)) - for g in out_grad if g is not None])): + if (isinstance(out_grad, collections.Sequence) and not all([ + isinstance(g, (ops.Tensor, ops.IndexedSlices)) for g in out_grad + if g is not None + ])): raise TypeError(""gradients have to be either all Tensors "" ""or all IndexedSlices"") # Aggregate multiple gradients, and convert [] to None. @@ -725,9 +741,10 @@ def _AggregatedGrads(grads, op, loop_state, aggregation_method=None): # 2 grads then we fall through to the ""tree"" case below. used = ""accumulate_n"" out_grads[i] = math_ops.accumulate_n(out_grad) - elif aggregation_method in [AggregationMethod.EXPERIMENTAL_TREE, - AggregationMethod.EXPERIMENTAL_ACCUMULATE_N - ]: + elif aggregation_method in [ + AggregationMethod.EXPERIMENTAL_TREE, + AggregationMethod.EXPERIMENTAL_ACCUMULATE_N + ]: # Aggregate all gradients by doing pairwise sums: this may # reduce performance, but it can improve memory because the # gradients can be released earlier. @@ -744,18 +761,18 @@ def _AggregatedGrads(grads, op, loop_state, aggregation_method=None): else: used = ""add_n"" out_grads[i] = _MultiDeviceAddN(out_grad) - logging.vlog(2, "" _AggregatedGrads %d x %s using %s"", len(out_grad), - tensor_shape, used) + logging.vlog(2, "" _AggregatedGrads %d x %s using %s"", + len(out_grad), tensor_shape, used) else: - out_grad = math_ops._as_indexed_slices_list([g for g in out_grad - if g is not None]) + out_grad = math_ops._as_indexed_slices_list( + [g for g in out_grad if g is not None]) out_grad = [_HandleNestedIndexedSlices(x) for x in out_grad] # Form IndexedSlices out of the concatenated values and # indices. out_grads[i] = ops.IndexedSlices( array_ops.concat(0, [x.values for x in out_grad]), - array_ops.concat(0, [x.indices - for x in out_grad]), out_grad[0].dense_shape) + array_ops.concat(0, [x.indices for x in out_grad]), + out_grad[0].dense_shape) else: out_grads[i] = [] return out_grads @@ -805,9 +822,10 @@ def _hessian_vector_product(ys, xs, v): grads = gradients(ys, xs) assert len(grads) == length - elemwise_products = [math_ops.mul(grad_elem, array_ops.stop_gradient(v_elem)) - for grad_elem, v_elem in zip(grads, v) - if grad_elem is not None] + elemwise_products = [ + math_ops.mul(grad_elem, array_ops.stop_gradient(v_elem)) + for grad_elem, v_elem in zip(grads, v) if grad_elem is not None + ] # Second backprop return gradients(elemwise_products, xs) ",0,train 455db2a13c5c5d738f240c11531c4d198605efb9,tensorflow/tensorflow,[Grappler] Add support for QuantizeAndDequantizeV4,op_level_cost_estimator.cc,"@@ -582,6 +582,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() { elementwise_ops_.emplace(""Prod"", EIGEN_COST(scalar_product_op)); elementwise_ops_.emplace(""QuantizeAndDequantizeV2"", quantize_and_dequantize_v2_cost); + elementwise_ops_.emplace(""QuantizeAndDequantizeV4"", + quantize_and_dequantize_v2_cost); elementwise_ops_.emplace(""QuantizedSigmoid"", EIGEN_COST(scalar_logistic_op)); elementwise_ops_.emplace(""QuantizeV2"", quantize_v2_cost); ",0,test 455db2a13c5c5d738f240c11531c4d198605efb9,tensorflow/tensorflow,[Grappler] Add support for QuantizeAndDequantizeV4,generic_layout_optimizer_transposer.cc,"@@ -2027,6 +2027,7 @@ bool IsDefaultLayoutAgnosticOp(const NodeDef& node) { ""PreventGradient"", ""QuantizeAndDequantizeV2"", ""QuantizeAndDequantizeV3"", + ""QuantizeAndDequantizeV4"", ""Real"", ""Reciprocal"", ""Relu"", ",0,test 455db2a13c5c5d738f240c11531c4d198605efb9,tensorflow/tensorflow,[Grappler] Add support for QuantizeAndDequantizeV4,layout_optimizer.cc,"@@ -170,6 +170,7 @@ std::set GetOpsFormatAgnostic() { ""Polygamma"", ""QuantizeAndDequantizeV2"", ""QuantizeAndDequantizeV3"", + ""QuantizeAndDequantizeV4"", ""Pow"", ""Real"", ""RealDiv"", ",0,test d194dd4b823a1e74efd47a5b4215d839858244e9,tensorflow/tensorflow,"Use RichLine instead of explicit font_attr_segs in two more files Change: 148335792",cli_shared.py,"@@ -26,6 +26,7 @@ from tensorflow.python.debug.cli import tensor_format from tensorflow.python.framework import ops from tensorflow.python.ops import variables +RL = debugger_cli_common.RichLine # Default threshold number of elements above which ellipses will be used # when printing the value of the tensor. @@ -152,9 +153,8 @@ def error(msg): for screen output. """""" - full_msg = ""ERROR: "" + msg - return debugger_cli_common.RichTextLines( - [full_msg], font_attr_segs={0: [(0, len(full_msg), ""red"")]}) + return debugger_cli_common.rich_text_lines_from_rich_line_list([ + RL(""ERROR: "" + msg, ""red"")]) def _get_fetch_name(fetch): @@ -214,16 +214,16 @@ def _recommend_command(command, description, indent=2, create_link=False): """""" indent_str = "" "" * indent - lines = [indent_str + command + "":"", indent_str + "" "" + description] if create_link: - font_attr_segs = { - 0: [(indent, indent + len(command), [ - debugger_cli_common.MenuItem("""", command), ""bold""])]} + font_attr = [debugger_cli_common.MenuItem("""", command), ""bold""] else: - font_attr_segs = {0: [(indent, indent + len(command), ""bold"")]} + font_attr = ""bold"" - return debugger_cli_common.RichTextLines(lines, font_attr_segs=font_attr_segs) + lines = [RL(indent_str) + RL(command, font_attr) + "":"", + indent_str + "" "" + description] + + return debugger_cli_common.rich_text_lines_from_rich_line_list(lines) def get_tfdbg_logo(): @@ -308,23 +308,19 @@ def get_run_start_intro(run_call_count, ""Keep executing run() calls until a dumped tensor passes a given, "" ""registered filter (conditional breakpoint mode)"")) - more_font_attr_segs = {} more_lines = ["" Registered filter(s):""] if tensor_filters: filter_names = [] for filter_name in tensor_filters: filter_names.append(filter_name) - more_lines.append("" * "" + filter_name) command_menu_node = debugger_cli_common.MenuItem( """", ""run -f %s"" % filter_name) - more_font_attr_segs[len(more_lines) - 1] = [ - (10, len(more_lines[-1]), command_menu_node)] + more_lines.append(RL("" * "") + RL(filter_name, command_menu_node)) else: more_lines.append("" (None)"") out.extend( - debugger_cli_common.RichTextLines( - more_lines, font_attr_segs=more_font_attr_segs)) + debugger_cli_common.rich_text_lines_from_rich_line_list(more_lines)) out.extend( _recommend_command( @@ -334,11 +330,10 @@ def get_run_start_intro(run_call_count, ""inspect/modify their values"", create_link=True)) out.append("""") - suggest_help = ""For more details, see help."" - out.append( - suggest_help, - font_attr_segs=[(len(suggest_help) - 5, len(suggest_help) - 1, - debugger_cli_common.MenuItem("""", ""help""))]) + + out.append_rich_line(RL(""For more details, see "") + + RL(""help."", debugger_cli_common.MenuItem("""", ""help"")) + + ""."") out.append("""") # Make main menu for the run-start intro. @@ -407,14 +402,12 @@ def get_error_intro(tf_error): intro_lines = [ ""--------------------------------------"", - ""!!! An error occurred during the run !!!"", + RL(""!!! An error occurred during the run !!!"", ""blink""), """", ""You may use the following commands to debug:"", ] - intro_font_attr_segs = {1: [(0, len(intro_lines[1]), ""blink"")]} - out = debugger_cli_common.RichTextLines( - intro_lines, font_attr_segs=intro_font_attr_segs) + out = debugger_cli_common.rich_text_lines_from_rich_line_list(intro_lines) out.extend( _recommend_command(""ni -a -d -t %s"" % op_name, ",0,train d194dd4b823a1e74efd47a5b4215d839858244e9,tensorflow/tensorflow,"Use RichLine instead of explicit font_attr_segs in two more files Change: 148335792",debugger_cli_common.py,"@@ -23,6 +23,7 @@ import re import sre_constants import traceback +import six from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.platform import gfile @@ -89,7 +90,7 @@ class RichLine(object): attributes applied to the corresponding substrings. """""" ret = RichLine() - if isinstance(other, str): + if isinstance(other, six.string_types): ret.text = self.text + other ret.font_attr_segs = self.font_attr_segs[:] return ret @@ -105,19 +106,23 @@ class RichLine(object): def rich_text_lines_from_rich_line_list(rich_text_list): - """"""Convert a list of RichLine objects to a RichTextLines object. + """"""Convert a list of RichLine objects or strings to a RichTextLines object. Args: - rich_text_list: a list of RichLine objects + rich_text_list: a list of RichLine objects or strings Returns: A corresponding RichTextLines object. """""" - lines = [rl.text for rl in rich_text_list] + lines = [] font_attr_segs = {} for i, rl in enumerate(rich_text_list): - if rl.font_attr_segs: - font_attr_segs[i] = rl.font_attr_segs + if isinstance(rl, RichLine): + lines.append(rl.text) + if rl.font_attr_segs: + font_attr_segs[i] = rl.font_attr_segs + else: + lines.append(rl) return RichTextLines(lines, font_attr_segs) @@ -314,6 +319,9 @@ class RichTextLines(object): if font_attr_segs: self._font_attr_segs[len(self._lines) - 1] = font_attr_segs + def append_rich_line(self, rich_line): + self.append(rich_line.text, rich_line.font_attr_segs) + def prepend(self, line, font_attr_segs=None): """"""Prepend (i.e., add to the front) a single line of text. ",0,train d194dd4b823a1e74efd47a5b4215d839858244e9,tensorflow/tensorflow,"Use RichLine instead of explicit font_attr_segs in two more files Change: 148335792",debugger_cli_common_test.py,"@@ -77,6 +77,17 @@ class RichTextLinesTest(test_util.TensorFlowTestCase): self.assertEqual(1, len(screen_output.font_attr_segs[0])) self.assertEqual(1, len(screen_output.annotations)) + def testRichLinesAppendRichLine(self): + rtl = debugger_cli_common.RichTextLines( + ""Roses are red"", + font_attr_segs={0: [(0, 5, ""red"")]}) + rtl.append_rich_line(debugger_cli_common.RichLine(""Violets are "") + + debugger_cli_common.RichLine(""blue"", ""blue"")) + self.assertEqual(2, len(rtl.lines)) + self.assertEqual(2, len(rtl.font_attr_segs)) + self.assertEqual(1, len(rtl.font_attr_segs[0])) + self.assertEqual(1, len(rtl.font_attr_segs[1])) + def testRichTextLinesConstructorIncomplete(self): # Test RichTextLines constructor, with incomplete keyword arguments. screen_output = debugger_cli_common.RichTextLines( ",0,train d194dd4b823a1e74efd47a5b4215d839858244e9,tensorflow/tensorflow,"Use RichLine instead of explicit font_attr_segs in two more files Change: 148335792",stepper_cli.py,"@@ -247,42 +247,31 @@ class NodeStepperCLI(object): ] lines = [] - font_attr_segs = {} if verbose: lines.extend( [""Topologically-sorted transitive input(s) and fetch(es):"", """"]) - line_counter = len(lines) for i, element_name in enumerate(self._sorted_nodes): if i < index_range[0] or i >= index_range[1]: continue - font_attr_segs[line_counter] = [] - # TODO(cais): Use fixed-width text to show node index. - node_prefix = ""(%d / %d)"" % (i + 1, len(self._sorted_nodes)) if i == self._next: - node_prefix = "" "" + self.NEXT_NODE_POINTER_STR + node_prefix - font_attr_segs[line_counter].append((0, 3, ""bold"")) + node_prefix = RL("" "") + RL(self.NEXT_NODE_POINTER_STR, ""bold"") else: - node_prefix = "" "" + node_prefix + node_prefix = RL("" "") - node_prefix += "" ["" - labels, label_font_attr_segs = self._get_status_labels( + node_prefix += ""(%d / %d)"" % (i + 1, len(self._sorted_nodes)) + "" ["" + node_prefix += self._get_status_labels( element_name, handle_node_names, intermediate_tensor_names, override_names, - dirty_variable_names, - len(node_prefix)) - node_prefix += labels - font_attr_segs[line_counter].extend(label_font_attr_segs) + dirty_variable_names) lines.append(node_prefix + ""] "" + element_name) - line_counter += 1 - output = debugger_cli_common.RichTextLines( - lines, font_attr_segs=font_attr_segs) + output = debugger_cli_common.rich_text_lines_from_rich_line_list(lines) if verbose: output.extend(self._node_status_label_legend()) @@ -294,8 +283,7 @@ class NodeStepperCLI(object): handle_node_names, intermediate_tensor_names, override_names, - dirty_variable_names, - offset): + dirty_variable_names): """"""Get a string of status labels for a graph element. A status label indicates that a node has a certain state in this @@ -313,15 +301,13 @@ class NodeStepperCLI(object): override_names: (list of str) Names of the tensors of which the values are overridden. dirty_variable_names: (list of str) Names of the dirty variables. - offset: (int) Initial offset of the font attribute segments. Returns: - (str) The string made of status labels that currently apply to the graph - element. - (list of tuples) The font attribute segments, with offset applied. + (RichLine) The rich text string of status labels that currently apply to + the graph element. """""" - status = RL("" "" * offset) + status = RL() node_name = element_name.split("":"")[0] status += (RL(self.STATE_IS_PLACEHOLDER, @@ -350,9 +336,7 @@ class NodeStepperCLI(object): self._STATE_COLORS[self.STATE_DIRTY_VARIABLE]) if element_name in dirty_variable_names else "" "") - # TODO(ebreck) Return status here, once the caller is updated with the - # RichLine API. - return status.text[offset:], status.font_attr_segs + return status def _node_status_label_legend(self): """"""Get legend for node-status labels. @@ -362,8 +346,8 @@ class NodeStepperCLI(object): """""" return debugger_cli_common.rich_text_lines_from_rich_line_list([ - RL(""""), - RL(""Legend:""), + """", + ""Legend:"", (RL("" "") + RL(self.STATE_IS_PLACEHOLDER, self._STATE_COLORS[self.STATE_IS_PLACEHOLDER]) + @@ -444,18 +428,18 @@ class NodeStepperCLI(object): """""" feed_types = self._node_stepper.last_feed_types() - out = debugger_cli_common.RichTextLines([""Stepper used feeds:""]) + out = [""Stepper used feeds:""] if feed_types: for feed_name in feed_types: feed_info = RL("" %s : "" % feed_name) feed_info += RL(feed_types[feed_name], self._FEED_COLORS[feed_types[feed_name]]) - out.append(feed_info.text, font_attr_segs=feed_info.font_attr_segs) + out.append(feed_info) else: out.append("" (No feeds)"") out.append("""") - return out + return debugger_cli_common.rich_text_lines_from_rich_line_list(out) def _report_last_updated(self): """"""Generate a report of the variables updated in the last cont/step call. @@ -472,8 +456,8 @@ class NodeStepperCLI(object): rich_lines = [RL(""Updated:"", self._UPDATED_ATTRIBUTE)] sorted_last_updated = sorted(list(last_updated)) for updated in sorted_last_updated: - rich_lines.append(RL("" %s"" % updated)) - rich_lines.append(RL("""")) + rich_lines.append("" %s"" % updated) + rich_lines.append("""") return debugger_cli_common.rich_text_lines_from_rich_line_list(rich_lines) def step(self, args, screen_info=None): ",0,train ac01d27997e73942c2e598b4f203c84f756c35c3,tensorflow/tensorflow,"Translation to LLVM: check the validity of module-level Ops Translation to LLVM expects the entry module to have only specific types of ops that correspond to LLVM IR entities allowed in a module. Currently those are restricted to functions and globals. Introduce an additional check at the module level. Inside individual functions, the check for supported Ops is already performed, but it accepts all LLVM dialect Ops and wouldn't be immediately applicable at the module level. PiperOrigin-RevId: 274058651",ModuleTranslation.h,"@@ -51,7 +51,11 @@ class ModuleTranslation { public: template static std::unique_ptr translateModule(ModuleOp m) { + if (failed(checkSupportedModuleOps(m))) + return nullptr; auto llvmModule = prepareLLVMModule(m); + if (!llvmModule) + return nullptr; T translator(m); translator.llvmModule = std::move(llvmModule); @@ -74,6 +78,9 @@ protected: static std::unique_ptr prepareLLVMModule(ModuleOp m); private: + /// Check whether the module contains only supported ops directly in its body. + static LogicalResult checkSupportedModuleOps(ModuleOp m); + LogicalResult convertFunctions(); void convertGlobals(); LogicalResult convertOneFunction(LLVMFuncOp func); ",0,train 09fa4a4e355171fa30f5793ff9eb1b61a4e34ed0,tensorflow/tensorflow,"Fix ConvBackpropComputeDimensionsV2() interface. PiperOrigin-RevId: 171165222",conv_grad_ops.h,"@@ -248,7 +248,7 @@ Status ConvBackpropComputeDimensions(StringPiece label, int num_spatial_dims, Status ConvBackpropComputeDimensionsV2( StringPiece label, int num_spatial_dims, const TensorShape& input_shape, const TensorShape& filter_shape, const TensorShape& out_backprop_shape, - const std::vector& dilations, const std::vector& strides, + const gtl::ArraySlice& dilations, const std::vector& strides, Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims); } // namespace tensorflow ",0,train cf4eaffb3ddcedb723355f01c5fec2cdc020a40b,tensorflow/tensorflow,"Add support for equal() and not_equal() to tf.contrib.util.constant_value(). PiperOrigin-RevId: 166920279",tensor_util.py,"@@ -689,6 +689,22 @@ def _ConstantValue(tensor, partial): return np.full(fill_shape.as_list(), fill_value, dtype=fill_value.dtype) else: return None + elif tensor.op.type == ""Equal"": + value1 = constant_value(tensor.op.inputs[0]) + if value1 is None: + return None + value2 = constant_value(tensor.op.inputs[1]) + if value2 is None: + return None + return np.equal(value1, value2) + elif tensor.op.type == ""NotEqual"": + value1 = constant_value(tensor.op.inputs[0]) + if value1 is None: + return None + value2 = constant_value(tensor.op.inputs[1]) + if value2 is None: + return None + return np.not_equal(value1, value2) else: return None ",0,train cf4eaffb3ddcedb723355f01c5fec2cdc020a40b,tensorflow/tensorflow,"Add support for equal() and not_equal() to tf.contrib.util.constant_value(). PiperOrigin-RevId: 166920279",tensor_util_test.py,"@@ -800,6 +800,36 @@ class ConstantValueTest(test.TestCase): self.assertAllClose(input_, c_val[0]) self.assertIsNone(c_val[1]) + def testEqual(self): + # Scalar inputs. + tf_val = math_ops.equal(constant_op.constant(1), constant_op.constant(1)) + self.assertEqual(tensor_util.constant_value(tf_val), True) + + tf_val = math_ops.equal(constant_op.constant(1), constant_op.constant(0)) + self.assertEqual(tensor_util.constant_value(tf_val), False) + + # Shaped inputs with broadcast semantics. + tf_val = math_ops.equal(constant_op.constant([[0, 1]]), + constant_op.constant([[0], [1]])) + c_val = tensor_util.constant_value(tf_val) + self.assertAllEqual(c_val, [[True, False], [False, True]]) + + def testNotEqual(self): + # Scalar inputs. + tf_val = math_ops.not_equal(constant_op.constant(1), + constant_op.constant(1)) + self.assertEqual(tensor_util.constant_value(tf_val), False) + + tf_val = math_ops.not_equal(constant_op.constant(1), + constant_op.constant(0)) + self.assertEqual(tensor_util.constant_value(tf_val), True) + + # Shaped inputs with broadcast semantics. + tf_val = math_ops.not_equal(constant_op.constant([[0, 1]]), + constant_op.constant([[0], [1]])) + c_val = tensor_util.constant_value(tf_val) + self.assertAllEqual(c_val, [[False, True], [True, False]]) + class ConstantValueAsShapeTest(test.TestCase): ",0,train 23a3e222562cba97b4b03ccf8d4027a91d179051,tensorflow/tensorflow,"GPU registration for resource scatter add PiperOrigin-RevId: 162289810",resource_variable_ops.cc,"@@ -445,6 +445,17 @@ class ResourceScatterUpdateOp : public OpKernel { TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ARITHEMTIC_CPU); +// Registers GPU kernels. +#if GOOGLE_CUDA +#define REGISTER_SCATTER_ARITHEMTIC_GPU(type) \ + REGISTER_SCATTER_ARITHEMTIC(type, GPU); + +#define REGISTER_SCATTER_UPDATE_GPU(type) REGISTER_SCATTER_UPDATE(type, GPU); + +TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ARITHEMTIC_GPU); + +#endif // GOOGLE_CUDA + #undef REGISTER_SCATTER_ARITHEMTIC #undef REGISTER_SCATTER_ARITHEMTIC_CPU #undef REGISTER_SCATTER_KERNEL ",0,train 23a3e222562cba97b4b03ccf8d4027a91d179051,tensorflow/tensorflow,"GPU registration for resource scatter add PiperOrigin-RevId: 162289810",resource_variable_ops_test.py,"@@ -93,7 +93,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): self.assertEqual(read.eval(), 2) def testScatterAdd(self): - with self.test_session(): + with self.test_session(use_gpu=True): handle = resource_variable_ops.var_handle_op( dtype=dtypes.int32, shape=[1, 1]) resource_variable_ops.assign_variable_op( ",0,train d0c470be2e573e9f82d54d13807726b0a8cca89d,tensorflow/tensorflow,"Use uint32 for tid. PiperOrigin-RevId: 297901590 Change-Id: Id5d03f41f0442f8c7a18b668b7267e6d8c727583",traceme_recorder.cc,"@@ -196,12 +196,12 @@ class TraceMeRecorder::ThreadLocalRecorder { return singleton; } -void TraceMeRecorder::RegisterThread(int32 tid, ThreadLocalRecorder* thread) { +void TraceMeRecorder::RegisterThread(uint32 tid, ThreadLocalRecorder* thread) { mutex_lock lock(mutex_); threads_.emplace(tid, thread); } -void TraceMeRecorder::UnregisterThread(int32 tid) { +void TraceMeRecorder::UnregisterThread(uint32 tid) { mutex_lock lock(mutex_); auto it = threads_.find(tid); if (it != threads_.end()) { ",0,train d0c470be2e573e9f82d54d13807726b0a8cca89d,tensorflow/tensorflow,"Use uint32 for tid. PiperOrigin-RevId: 297901590 Change-Id: Id5d03f41f0442f8c7a18b668b7267e6d8c727583",traceme_recorder.h,"@@ -59,7 +59,7 @@ class TraceMeRecorder { uint64 end_time; // 0 = missing }; struct ThreadInfo { - int32 tid; + uint32 tid; string name; }; struct ThreadEvents { @@ -101,8 +101,8 @@ class TraceMeRecorder { TF_DISALLOW_COPY_AND_ASSIGN(TraceMeRecorder); - void RegisterThread(int32 tid, ThreadLocalRecorder* thread); - void UnregisterThread(int32 tid); + void RegisterThread(uint32 tid, ThreadLocalRecorder* thread); + void UnregisterThread(uint32 tid); bool StartRecording(int level); Events StopRecording(); @@ -113,7 +113,7 @@ class TraceMeRecorder { mutex mutex_; // Map of the static container instances (thread_local storage) for each // thread. While active, a ThreadLocalRecorder stores trace events. - absl::flat_hash_map threads_ GUARDED_BY(mutex_); + absl::flat_hash_map threads_ GUARDED_BY(mutex_); // Events from threads that died during recording. TraceMeRecorder::Events orphaned_events_ GUARDED_BY(mutex_); }; ",0,train 0e61131b5a20916e2445821e8b18f1416b375dcf,tensorflow/tensorflow,"Log the start of filling up the shuffle buffer When debugging performance issues we only saw 2 logs, the intermediate state and the end. However, we did not know when the shuffle buffer processing started. The updated logging should be clearer and will record both the start and end. PiperOrigin-RevId: 240624588",shuffle_dataset_op.cc,"@@ -129,6 +129,10 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel { ctx, this->prefix(), &input_impl_)); } if (!end_of_input_sequence) { + if (num_elements_ == 0) { + VLOG(1) << ""Starting to fill up shuffle buffer of size: "" + << this->dataset()->buffer_size_; + } this->RecordBufferEnqueue(ctx, input_element); buffer_[slices_.back()->end % this->dataset()->buffer_size_] = std::move(input_element); ",0,train 0ef76693fdab2a4d1a4923444a2593f79a6b7873,tensorflow/tensorflow,"Automated g4 rollback of changelist 199308328 PiperOrigin-RevId: 199809082",algebraic_simplifier_test.cc,"@@ -1714,7 +1714,7 @@ TEST_F(AlgebraicSimplifierTest, RemoveNoopPad) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), param); } @@ -1759,7 +1759,7 @@ TEST_F(AlgebraicSimplifierTest, NegativePadding) { EXPECT_THAT(computation->root_instruction(), op::Pad(param, zero)); EXPECT_TRUE(has_negative_padding(pad)); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Slice(op::Pad(param, zero))); EXPECT_FALSE( @@ -1781,7 +1781,7 @@ TEST_F(AlgebraicSimplifierTest, RemoveNoopReshape) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), param); } @@ -1804,7 +1804,7 @@ TEST_F(AlgebraicSimplifierTest, RemoveNoopSlice) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), param); } @@ -1932,7 +1932,8 @@ TEST_F(AlgebraicSimplifierTest, ConvertConvToMatmul) { b.AddInstruction(HloInstruction::CreateConvolve(out_shape, input, filter, window, dnums)); - auto module = CreateNewModule(); + // TODO(b/80488902): verify this module. + auto module = HloTestBase::CreateNewModule(); auto* computation = module->AddEntryComputation(b.Build()); AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/true, @@ -2060,7 +2061,7 @@ TEST_F(AlgebraicSimplifierTest, MaxMinToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Clamp(max_value, param0, min_value)); @@ -2090,7 +2091,7 @@ TEST_F(AlgebraicSimplifierTest, MinMaxToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Clamp(max_value, param0, min_value)); @@ -2121,7 +2122,7 @@ TEST_F(AlgebraicSimplifierTest, MinMaxWithBroadcastToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Clamp(max_value, param0, min_value)); @@ -2151,7 +2152,7 @@ TEST_F(AlgebraicSimplifierTest, MinMaxNotToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - EXPECT_FALSE(simplifier.Run(module.get()).ValueOrDie()); + EXPECT_FALSE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Minimum(op::Maximum(param0, max_value), min_value)); @@ -2184,7 +2185,7 @@ TEST_F(AlgebraicSimplifierTest, MinEquationWithMaxNotToClamp) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - EXPECT_FALSE(simplifier.Run(module.get()).ValueOrDie()); + EXPECT_FALSE(simplifier.Run(module).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Minimum(op::Add(op::Maximum(param0, max_value), max_value), @@ -2200,10 +2201,8 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToSlice) { HloInstruction::CreateParameter(0, r0f32, ""scalar_param"")); Shape broadcast_shape = ShapeUtil::MakeShape(F32, {4, 5, 6, 7}); - HloInstruction* broadcast = - builder.AddInstruction(HloInstruction::CreateBroadcast( - broadcast_shape, scalar_param, - AsInt64Slice(broadcast_shape.dimensions()))); + HloInstruction* broadcast = builder.AddInstruction( + HloInstruction::CreateBroadcast(broadcast_shape, scalar_param, {})); Shape slice_shape = ShapeUtil::MakeShape(F32, {2, 2, 3, 3}); HloInstruction* slice = builder.AddInstruction(HloInstruction::CreateSlice( @@ -2219,10 +2218,10 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToSlice) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); // Running simplification again should not result in any further changes. - ASSERT_FALSE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_FALSE(simplifier.Run(module).ValueOrDie()); root = computation->root_instruction(); EXPECT_THAT(root, op::Broadcast(scalar_param)); @@ -2237,10 +2236,8 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToTransposeReshape) { HloInstruction::CreateConstant(Literal::CreateR0(42.0f))); Shape broadcast_shape = ShapeUtil::MakeShape(F32, {4, 5, 6}); - HloInstruction* broadcast = - builder.AddInstruction(HloInstruction::CreateBroadcast( - broadcast_shape, forty_two, - AsInt64Slice(broadcast_shape.dimensions()))); + HloInstruction* broadcast = builder.AddInstruction( + HloInstruction::CreateBroadcast(broadcast_shape, forty_two, {})); HloInstruction* transpose = builder.AddInstruction(HloInstruction::CreateTranspose( @@ -2259,7 +2256,7 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToTransposeReshape) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); root = computation->root_instruction(); EXPECT_THAT(root, op::Broadcast(forty_two)); @@ -2268,7 +2265,8 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToTransposeReshape) { // Test that ReduceWindow(Pad(op, x), y) can simplify to ReduceWindow(op, x). TEST_F(AlgebraicSimplifierTest, FoldPadIntoReduceWindow) { - auto module = CreateNewModule(); + // TODO(b/80488902): verify this module. + auto module = HloTestBase::CreateNewModule(); HloComputation::Builder builder(TestName()); // Create operand to the pad. @@ -2349,7 +2347,8 @@ TEST_F(AlgebraicSimplifierTest, FoldPadIntoReduceWindow) { // Test that ReduceWindow(Convert(Pad(op, x)), y) can simplify to // ReduceWindow(Convert(op), x). TEST_F(AlgebraicSimplifierTest, FoldConvertedPadIntoReduceWindow) { - auto module = CreateNewModule(); + // TODO(b/80488902): verify this module. + auto module = HloTestBase::CreateNewModule(); HloComputation::Builder builder(TestName()); // Create operand to the pad. @@ -2444,7 +2443,7 @@ TEST_F(AlgebraicSimplifierTest, ReversalOfTrivialDimensionsToBitcast) { AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); HloInstruction* root = computation->root_instruction(); EXPECT_EQ(a, root); ",0,train 0ef76693fdab2a4d1a4923444a2593f79a6b7873,tensorflow/tensorflow,"Automated g4 rollback of changelist 199308328 PiperOrigin-RevId: 199809082",hlo_test_base.h,"@@ -66,6 +66,15 @@ namespace xla { // // For a more detailed example, see ""../tests/sample_text_test.cc"". class HloTestBase : public ::testing::Test { + public: + // Creates a new HLO module for a test. The module created will have + // TestName() for its name; it will also automatically populate its debug + // options from command-line flags. If you want a fresh HloModule object and + // then add HloComputations to it, it's recommended to use this method in your + // tests. + static std::unique_ptr CreateNewModule( + const string& name = TestName()); + protected: // This uses the interpreter backend as the reference backend and // automatically finds another supported backend as the test backend. If the @@ -80,14 +89,6 @@ class HloTestBase : public ::testing::Test { ~HloTestBase() override {} - // Creates a new HLO module for a test. The module created will have - // TestName() for its name; it will also automatically populate its debug - // options from command-line flags. If you want a fresh HloModule object and - // then add HloComputations to it, it's recommended to use this method in your - // tests. - static std::unique_ptr CreateNewModule( - const string& name = TestName()); - // Populates debug options from command-line flags and adjusts the options for // testing. It is recommended to use this when you need to pass in // DebugOptions, e.g. when creating a module from a string or a file. ",0,train 0ef76693fdab2a4d1a4923444a2593f79a6b7873,tensorflow/tensorflow,"Automated g4 rollback of changelist 199308328 PiperOrigin-RevId: 199809082",hlo_verified_test_base.cc,"@@ -41,14 +41,17 @@ void HloVerifiedTestBase::TearDown() { << ""TearDown called more than once; it should be called exactly once.""; tear_down_called_ = true; if (module_) { - VerifyModule(); + VerifyModule(module_.get()); + } + for (int i = 0; i < modules_.size(); ++i) { + VerifyModule(modules_.at(i).get()); } HloTestBase::TearDown(); } -void HloVerifiedTestBase::VerifyModule() { - HloVerifier verifier; - xla::StatusOr mutated = verifier.Run(module_.get()); +void HloVerifiedTestBase::VerifyModule(HloModule* module) { + HloVerifier verifier(/*allow_mixed_precision=*/true); + xla::StatusOr mutated = verifier.Run(module); if (!mutated.ok()) { ADD_FAILURE() << ""HloVerifier failed: "" << mutated.status(); } else { @@ -59,15 +62,20 @@ void HloVerifiedTestBase::VerifyModule() { HloModule& HloVerifiedTestBase::module() { if (!module_) { - module_ = CreateNewModule(); + module_ = HloTestBase::CreateNewModule(); } return *module_; } +HloModule* HloVerifiedTestBase::CreateNewModule(const string& name) { + modules_.emplace_back(HloTestBase::CreateNewModule()); + return modules_.back().get(); +} + void HloVerifiedTestBase::ParseAndVerifyModule( tensorflow::StringPiece hlo_text) { CHECK(!module_) << ""Called ParseModule when test already has a module.""; TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text)); - VerifyModule(); + VerifyModule(module_.get()); } } // namespace xla ",0,train 0ef76693fdab2a4d1a4923444a2593f79a6b7873,tensorflow/tensorflow,"Automated g4 rollback of changelist 199308328 PiperOrigin-RevId: 199809082",hlo_verified_test_base.h,"@@ -52,11 +52,23 @@ class HloVerifiedTestBase : public HloTestBase { shape_verifier_ = std::move(shape_verifier); } + // Creates a new module for a test, and stores it in modules_ so it can be + // verified. Intentionally hides HloTestBase::CreateNewModule, to prevent + // creation of unverified modules. + HloModule* CreateNewModule(const string& name = TestName()); + + // It is confusing to store modules created by module() and CreateNewModule() + // in different fields, but it allows us to migrate tests to + // HloVerifiedTestBase more easily, so it's a win because we can verify more + // modules. See b/80488902. private: - std::unique_ptr module_; // Lazily populated. Access via module(). + // Lazily populated. Access via module(). + std::unique_ptr module_; + // Populated by calls to CreateNewModule. + std::vector> modules_; std::unique_ptr shape_verifier_; bool tear_down_called_ = false; - void VerifyModule(); + static void VerifyModule(HloModule* module); }; } // namespace xla ",0,train c446422e3857344d9b94a1521ff86734b700f1ae,tensorflow/tensorflow,"Fix bug in and speed up ConstantFolding::CreateNodeDef(): * Fix bug trying to store more than kintmax32 values in a repeated proto field. * Speed up populating compressed format. Example: tensorflow/python/kernel_tests/large_concat_op_test with size = 2**29+6 goes from ~30 seconds to ~15 seconds. The fraction of time spent in ConstantFolding::CreateNodeDef() goes down from about 35% to about 12%. PiperOrigin-RevId: 184693749",constant_folding.cc,"@@ -808,20 +808,26 @@ NodeDef ConstantFolding::CreateNodeDef(const string& name, // Use the packed representation whenever possible to avoid generating large // graphdefs. Moreover, avoid repeating the last values if they're equal. if (tensor->NumElements() > 4) { -#define POPULATE_TENSOR_PROTO(tensor, t, TYPE, NAME) \ - optimized = true; \ - TYPE last = tensor->flat()(0); \ - int last_index = 0; \ - for (int i = 0; i < tensor->NumElements(); ++i) { \ - TYPE cur = tensor->flat()(i); \ - t->add_##NAME##_val(cur); \ - if (cur != last) { \ - last = cur; \ - last_index = i; \ - } \ - } \ - /* Remove all identical trailing values to save memory. */ \ - t->mutable_##NAME##_val()->Truncate(last_index + 1); +#define POPULATE_TENSOR_PROTO(tensor, t, TYPE, NAME) \ + const TYPE* val_ptr = tensor->flat().data(); \ + TYPE last = *val_ptr; \ + int64 last_index = 0; \ + for (int64 i = 0; i < tensor->NumElements(); ++i) { \ + TYPE cur = *val_ptr++; \ + if (cur != last) { \ + last = cur; \ + last_index = i; \ + } \ + } \ + if (last_index < kint32max) { \ + optimized = true; \ + t->mutable_##NAME##_val()->Reserve(last_index + 1); \ + t->mutable_##NAME##_val()->AddNAlreadyReserved(last_index + 1); \ + val_ptr = tensor->flat().data(); \ + for (int64 i = 0; i <= last_index; ++i) { \ + t->set_##NAME##_val(i, *val_ptr++); \ + } \ + } if (tensor->dtype() == DT_FLOAT) { POPULATE_TENSOR_PROTO(tensor, t, float, float) ",0,train ec3edaf0277041350ad312e477db48266cbd860f,tensorflow/tensorflow,Remove not working code path for odd row size.,ir_emitter_unnested.cc,"@@ -2042,73 +2042,23 @@ void IrEmitterUnnested::EmitTile( }; char * env_if = getenv(""RED_IF""); - int red_if = 1; + int red_if = 0; if (env_if) { red_if = atoi(env_if); printf(""RED_IF2 = %d %s\n"", red_if, env_if); } - if (red_if == 1) { + if (red_if == 1 || x_tile_fits) { std::cout << ""IF_NB 1: "" << std::endl; unroll(!x_tile_fits, x_num_steps, vec_stride); - } else if (red_if == 2) { + } else { std::cout << ""IF_NB 2"" << std::endl; ksl->If(loop_name + ""_is_full_tile"", - //b->CreateICmpULT(last_element, tile_width), - // If (the thread fully unrolled) {no condition path} else {condition path} + // if (block fully fit) {fast path} else {slow path} + // tile_width is always exact. For the last block, + // it will be the exact number of elements left. b_.CreateICmpEQ(constant(mapping_scheme.GetTileSizeFor(2)), tile_width), [&] {unroll(false, x_num_steps, vec_stride);}, [&] {unroll(true, x_num_steps, vec_stride);}); - } else { - std::cout << ""IF_NB 3"" << std::endl; - //b->CreateICmpULT(start_offset_x+j * step_x * vec_stride + i, tile_width) - int last_block_left_element = mapping_scheme.GetDimsInElems()[2] % x_num_steps; - std::cout << ""MAPPING "" << mapping_scheme.GetDimsInElems()[0] << "" "" - << mapping_scheme.GetDimsInElems()[1] << "" "" - << mapping_scheme.GetDimsInElems()[2] << std::endl; - std::cout << ""LAST_BLOCK x_num_steps "" << x_num_steps - << "" last_block"" << last_block_left_element << std::endl; - // NB block per reduction. - int nb_block = CeilOfRatio(mapping_scheme.GetDimsInElems()[2], - tile_size_x); - std::cout << ""NB_BLOCK"" << nb_block << std::endl; - if (x_tile_fits) { - // All threads will completly unroll - unroll(false, x_num_steps, vec_stride); - } else if(nb_block == 1) { - // No thread will completly unroll. - // TODO: unroll by the right amount - unroll(true, x_num_steps, vec_stride); - } else { - // For some blocks, all threads will will completly unroll. - // For other blocks, some of its threads will completly unroll, others will partially and some won't be used. - // So do an if(thread fully unroll) {code with no if between elements} else {code with if between each elements} - // TODO: in the else part, unroll without if but with the right number of elements left. - - llvm::Value* block_id = gpu::EmitCallToTargetIntrinsic( - gpu::TargetIntrinsicID::kBlockIdx, {}, {}, &b_); - llvm::Value* last_element = b_.CreateAdd(constant(x_num_steps * tile_size_x), - start_offset_x, ""last_element""); - int x_num_steps_partial = mapping_scheme.GetDimsInElems()[2] % tile_size_x; - x_num_steps_partial *= 2; - //x_num_steps_partial = x_num_steps; - ksl->If(loop_name + ""_is_full_tile"", - // Test if all the elements of this thread is withing tile. - b_.CreateICmpULT(last_element, tile_width), - // Not the last block, so unroll without ifs. - [&] {unroll(false, x_num_steps, vec_stride);}, - // The last block isn't completly unrolled. - - // TODO: unroll the right size. Take care - // vec_stride must match the above unroll for - // now. - // TODO: after unroll of the right size, remove the IFs. - // ONGOING, try to make it work with less - // then unroll x_num_steps - - [&] {unroll(true, x_num_steps, vec_stride);}); // works - //[&] {unroll(true, x_num_steps, x_num_steps);}); - //[&] {unroll(true, x_num_steps_partial, vec_stride);}); - } } }}); } ",0,train 01c56613967c6cf12dbea7256342b75ba58087ab,tensorflow/tensorflow,"Correct the punctuation in the deprecation message. PiperOrigin-RevId: 243893700",normalization.py,"@@ -170,8 +170,8 @@ class BatchNormalization(keras_layers.BatchNormalization, base.Layer): @deprecation.deprecated( date=None, instructions='Use keras.layers.BatchNormalization instead. In ' 'particular, `tf.control_dependencies(tf.GraphKeys.UPDATE_OPS)` should not ' - 'be used (consult the `tf.keras.layers.batch_normalization`).' - 'documentation.') + 'be used (consult the `tf.keras.layers.batch_normalization` ' + 'documentation).') @tf_export(v1=['layers.batch_normalization']) def batch_normalization(inputs, axis=-1, ",0,train 6c7e0cc4831b892b7fe5f91b7297f534ea99940b,tensorflow/tensorflow,Fixed typos in GradientTape warning message,backprop.py,"@@ -926,14 +926,14 @@ class GradientTape(object): else: logging.log_first_n(logging.WARN, ""Calling GradientTape.gradient on a persistent "" - ""tape inside it's context is significantly less "" + ""tape inside its context is significantly less "" ""efficient than calling it outside the context (it "" ""causes the gradient ops to be recorded on the "" ""tape, leading to increased CPU and memory usage). "" ""Only call GradientTape.gradient inside the "" ""context if you actually want to trace the "" ""gradient in order to compute higher order "" - ""derrivatives."", 1) + ""derivatives."", 1) flat_targets = [] for t in nest.flatten(target): ",0,train 8b20ddf3e0eedb52a7ae0f10a55658e64efc4d1a,tensorflow/tensorflow,"[XLA] Sanity check the list of called computations for fusion nodes called_compuatations for a fusion node should only include the fusion computation that it calls. PiperOrigin-RevId: 167149669",hlo_instruction.cc,"@@ -793,13 +793,6 @@ HloInstruction* HloInstruction::CloneAndFuseInternal( } } - for (HloComputation* computation : - instruction_to_fuse->called_computations()) { - if (std::find(called_computations_.begin(), called_computations_.end(), - computation) == called_computations_.end()) { - called_computations_.push_back(computation); - } - } VLOG(2) << ""New clone:\n"" << clone->ToString(); return clone; } ",0,train 8b20ddf3e0eedb52a7ae0f10a55658e64efc4d1a,tensorflow/tensorflow,"[XLA] Sanity check the list of called computations for fusion nodes called_compuatations for a fusion node should only include the fusion computation that it calls. PiperOrigin-RevId: 167149669",hlo_instruction.h,"@@ -797,8 +797,7 @@ class HloInstruction { const Shape& shape, tensorflow::gtl::ArraySlice operands); - // Returns the computations this instruction calls (if any). This includes - // computations called by fused instructions inside of a fusion instruction. + // Returns the computations this instruction directly calls (if any). const std::vector& called_computations() const { return called_computations_; } ",0,train 8b20ddf3e0eedb52a7ae0f10a55658e64efc4d1a,tensorflow/tensorflow,"[XLA] Sanity check the list of called computations for fusion nodes called_compuatations for a fusion node should only include the fusion computation that it calls. PiperOrigin-RevId: 167149669",hlo_instruction_test.cc,"@@ -758,16 +758,13 @@ TEST_F(HloInstructionTest, FusionOpWithCalledComputations) { auto* fusion = computation->CreateFusionInstruction( {map_3_y}, HloInstruction::FusionKind::kLoop); auto* fused_computation = fusion->fused_instructions_computation(); - EXPECT_THAT(fusion->called_computations(), - ElementsAre(fused_computation, computation_y)); + EXPECT_THAT(fusion->called_computations(), ElementsAre(fused_computation)); fusion->FuseInstruction(map_2_x); - EXPECT_THAT(fusion->called_computations(), - ElementsAre(fused_computation, computation_y, computation_x)); + EXPECT_THAT(fusion->called_computations(), ElementsAre(fused_computation)); fusion->FuseInstruction(map_1_x); - EXPECT_THAT(fusion->called_computations(), - ElementsAre(fused_computation, computation_y, computation_x)); + EXPECT_THAT(fusion->called_computations(), ElementsAre(fused_computation)); } TEST_F(HloInstructionTest, ComplexFusionOp) { ",0,train 8b20ddf3e0eedb52a7ae0f10a55658e64efc4d1a,tensorflow/tensorflow,"[XLA] Sanity check the list of called computations for fusion nodes called_compuatations for a fusion node should only include the fusion computation that it calls. PiperOrigin-RevId: 167149669",hlo_rematerialization.cc,"@@ -1248,7 +1248,8 @@ StatusOr HloRematerialization::Run( sequence->at(node.computation()))); } return Status::OK(); - })); + }, + /*visit_unreachable_nodes=*/false)); // The peak memory usage of the module equals the peak memory use of the entry // computation plus the output size of the computation. This is because the ",0,train 8b20ddf3e0eedb52a7ae0f10a55658e64efc4d1a,tensorflow/tensorflow,"[XLA] Sanity check the list of called computations for fusion nodes called_compuatations for a fusion node should only include the fusion computation that it calls. PiperOrigin-RevId: 167149669",hlo_verifier.cc,"@@ -280,6 +280,14 @@ class ShapeVerifier : public DfsHloVisitor { const std::function shape_size_fn_; }; +string ComputationsToString( + tensorflow::gtl::ArraySlice computations) { + return tensorflow::str_util::Join( + computations, "","", [](string* s, const HloComputation* computation) { + s->append(computation->name()); + }); +} + } // namespace StatusOr HloVerifier::Run(HloModule* module) { @@ -290,6 +298,17 @@ StatusOr HloVerifier::Run(HloModule* module) { for (const auto& instruction : computation->instructions()) { TF_RET_CHECK(instruction->parent() == computation.get()); if (instruction->opcode() == HloOpcode::kFusion) { + TF_RET_CHECK( + ContainersEqual(instruction->called_computations(), + {instruction->fused_instructions_computation()})) + << ""Fusion HLO calls computations other than the "" + ""fused_instructions_computation: "" + << instruction->ToString() + << "" instruction->fused_instructions_computation(): "" + << instruction->fused_instructions_computation()->ToString() + << "" instruction->called_computations(): "" + << ComputationsToString(instruction->called_computations()); + for (const auto& fused : instruction->fused_instructions()) { TF_RET_CHECK(fused->parent() == instruction->fused_instructions_computation()) ",0,train da0f64b27e619d3ba509dcd6ce1d29fe29f374e7,tensorflow/tensorflow,"Fix tensorshape for static sized tensorarray PiperOrigin-RevId: 265702387",tensor_array_ops_test.py,"@@ -1365,7 +1365,7 @@ class TensorArrayTest(test.TestCase): x = constant_op.constant([1.0, 2.0, 3.0]) ta = ta.write(0, x) t = ta.stack() - self.assertEqual(t.shape.as_list(), [None, 3]) + self.assertEqual(t.shape.as_list(), [3, 3]) return t ta_stack() @@ -1790,6 +1790,11 @@ class TensorArrayTest(test.TestCase): dtypes.float32, size=0, element_shape=(5, None), dynamic_size=True) self.assertEqual([None, 5, None], ta.stack().shape.as_list()) + def testStackShapeOnStaticSize(self): + ta = tensor_array_ops.TensorArray(dtypes.float32, size=42) + ta = ta.write(0, [0]) + self.assertEqual([42, 1], ta.stack().shape.as_list()) + class TensorArrayBenchmark(test.Benchmark): ",0,train da0f64b27e619d3ba509dcd6ce1d29fe29f374e7,tensorflow/tensorflow,"Fix tensorshape for static sized tensorarray PiperOrigin-RevId: 265702387",tensor_array_ops.py,"@@ -137,6 +137,7 @@ class _GraphTensorArray(object): # shape equality. self._element_shape = [tensor_shape.as_shape(element_shape)] self._infer_shape = infer_shape + self._size = size with ops.name_scope(name, ""TensorArray"", [handle, size, flow]) as scope: if handle is not None: self._handle = handle @@ -281,7 +282,12 @@ class _GraphTensorArray(object): """"""See TensorArray."""""" with ops.colocate_with(self._handle): with ops.name_scope(name, ""TensorArrayStack"", [self._handle]): - return self.gather(math_ops.range(0, self.size()), name=name) + value = self.gather(math_ops.range(0, self.size()), name=name) + if (self.element_shape and not self._dynamic_size and + self._size is not None): + value.set_shape([tensor_util.constant_value(self._size)] + + self.element_shape.dims) + return value def gather(self, indices, name=None): """"""See TensorArray."""""" @@ -365,8 +371,11 @@ class _GraphTensorArray(object): def size(self, name=None): """"""See TensorArray."""""" - return gen_data_flow_ops.tensor_array_size_v3( - handle=self._handle, flow_in=self.flow, name=name) + if not self._dynamic_size and self._size is not None: + return ops.convert_to_tensor(self._size, dtype=dtypes.int32) + else: + return gen_data_flow_ops.tensor_array_size_v3( + handle=self._handle, flow_in=self.flow, name=name) @tf_should_use.should_use_result def close(self, name=None): @@ -427,6 +436,7 @@ class _GraphTensorArrayV2(object): del colocate_with_first_write_call self._dynamic_size = dynamic_size + self._size = size if (flow is not None and (not isinstance(flow, ops.Tensor) or flow.dtype != dtypes.variant)): @@ -536,9 +546,15 @@ class _GraphTensorArrayV2(object): def stack(self, name=None): """"""See TensorArray."""""" with ops.name_scope(name, ""TensorArrayV2Stack"", [self._flow]): + # TODO(b/139941163): remove constant_value after changing num_elements to regular input + if not self._dynamic_size and self._size is not None: + ta_size = tensor_util.constant_value(self._size) + else: + ta_size = -1 value = list_ops.tensor_list_stack( input_handle=self._flow, element_dtype=self._dtype, + num_elements=ta_size, element_shape=self.element_shape) return value @@ -619,7 +635,10 @@ class _GraphTensorArrayV2(object): def size(self, name=None): """"""See TensorArray."""""" - return list_ops.tensor_list_length(input_handle=self._flow, name=name) + if not self._dynamic_size and self._size is not None: + return ops.convert_to_tensor(self._size, dtype=dtypes.int32) + else: + return list_ops.tensor_list_length(input_handle=self._flow, name=name) @tf_should_use.should_use_result def close(self, name=None): @@ -1227,6 +1246,7 @@ def build_ta_with_new_flow(old_ta, flow): colocate_with_first_write_call=impl._colocate_with_first_write_call) new_impl = new_ta._implementation new_impl._dynamic_size = impl._dynamic_size + new_impl._size = impl._size new_impl._colocate_with = impl._colocate_with new_impl._element_shape = impl._element_shape # Share _element_shape. return new_ta ",0,train ffc651af58ebacdf3ddbe9537efda694c71a64f3,tensorflow/tensorflow,"Update LogToSTDErr for TF Lite usage PiperOrigin-RevId: 192379483",arg_max_test.cc,"@@ -100,8 +100,7 @@ TEST(ArgMaxOpTest, GetMaxArgOutput64) { } // namespace tflite int main(int argc, char** argv) { - // On Linux, add: FLAGS_logtostderr = true; - FLAGS_logtostderr = true; + ::tflite::LogToStderr(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } ",0,test 9274bcebb31322370139467039034f8ff852b004,tensorflow/tensorflow,"Internal fixes to sample_distored_bounding_box. PiperOrigin-RevId: 268719203",sample_distorted_bounding_box_op.cc,"@@ -126,11 +126,19 @@ bool GenerateRandomCrop(int original_width, int original_height, int height = static_cast(lrintf(std::sqrt(min_area / aspect_ratio))); int max_height = static_cast(lrintf(std::sqrt(max_area / aspect_ratio))); + // TODO(b/140767341): Rewrite the generation logic to be more tolerant + // of floating point behavior. if (lrintf(max_height * aspect_ratio) > original_width) { // We must find the smallest max_height satisfying // round(max_height * aspect_ratio) <= original_width: const float kEps = 0.0000001; max_height = static_cast((original_width + 0.5 - kEps) / aspect_ratio); + // If due some precision issues, we still cannot guarantee + // round(max_height * aspect_ratio) <= original_width, subtract 1 from + // max height. + if (lrintf(max_height * aspect_ratio) > original_width) { + max_height -= 1; + } } if (max_height > original_height) { ",0,train 333abd19d9c49393abb2466401b040cc417801d8,tensorflow/tensorflow,"Allow users to register objects that return `None` in `_serialize_to_proto`. PiperOrigin-RevId: 396428995 Change-Id: I54d87e48e23a899c570c4cf5a91b630f2d8215d7",load_test.py,"@@ -31,7 +31,6 @@ import weakref from absl.testing import parameterized import numpy as np -from google.protobuf import wrappers_pb2 from tensorflow.python.client import session as session_lib from tensorflow.python.compat import compat @@ -69,7 +68,6 @@ from tensorflow.python.ops.ragged import ragged_factory_ops from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.saved_model import load from tensorflow.python.saved_model import load_options -from tensorflow.python.saved_model import registration from tensorflow.python.saved_model import save from tensorflow.python.saved_model import save_options from tensorflow.python.saved_model import tag_constants @@ -2130,36 +2128,6 @@ class LoadTest(test.TestCase, parameterized.TestCase): self.assertAllClose(grads, expected_grads) - def test_load_registered(self, cycles): - - @registration.register_serializable(name=f""Module{cycles}"") - class Module(tracking.AutoTrackable): - - def __init__(self, name=""module""): - self.v = variables.Variable(1.) - self.name = name - - def _serialize_to_proto(self, **unused_kwargs): - return wrappers_pb2.StringValue(value=self.name) - - @classmethod - def _deserialize_from_proto(cls, proto, **unused_kwargs): - if proto.Is(wrappers_pb2.StringValue.DESCRIPTOR): - unpacked = wrappers_pb2.StringValue() - proto.Unpack(unpacked) - return cls(name=unpacked.value) - raise AssertionError( - ""Did not receive proto of correct type during deserialization. "" - f""Expected type {wrappers_pb2.StringValue.DESCRIPTOR.full_name}, "" - f""got {proto.TypeName()}"") - - m = Module(""a"") - m.v.assign(5) - loaded = cycle(m, cycles) - self.assertIsInstance(loaded, Module) - self.assertEqual(5, loaded.v.numpy()) - self.assertEqual(""a"", loaded.name) - class SingleCycleTests(test.TestCase, parameterized.TestCase): ",0,test 333abd19d9c49393abb2466401b040cc417801d8,tensorflow/tensorflow,"Allow users to register objects that return `None` in `_serialize_to_proto`. PiperOrigin-RevId: 396428995 Change-Id: I54d87e48e23a899c570c4cf5a91b630f2d8215d7",registration_saving_test.py,"@@ -0,0 +1,111 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""""""Tests saving with registered Trackable classes and checkpoint functions."""""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tempfile +from absl.testing import parameterized + +from google.protobuf import wrappers_pb2 +from tensorflow.python.eager import test +from tensorflow.python.framework import test_util +from tensorflow.python.ops import variables +from tensorflow.python.saved_model import load +from tensorflow.python.saved_model import registration +from tensorflow.python.saved_model import save +from tensorflow.python.training.tracking import tracking + + +def cycle(obj, cycles, signatures=None, options=None): + to_save = obj + for _ in range(cycles): + path = tempfile.mkdtemp(prefix=test.get_temp_dir()) + # If available, we'll run the save and restore preferring the GPU. This + # just makes sure we aren't throwing errors and have enough + # device(""CPU"") blocks to satisfy the placer. + with test_util.use_gpu(): + save.save(to_save, path, signatures, options=options) + loaded = load.load(path) + signatures = loaded.signatures + to_save = loaded + return loaded + + +@parameterized.named_parameters( + dict(testcase_name=""ReloadOnce"", cycles=1), + dict(testcase_name=""ReloadTwice"", cycles=2), + dict(testcase_name=""ReloadThrice"", cycles=3) +) +class SavedModelTest(test.TestCase, parameterized.TestCase): + + def test_save_and_load(self, cycles): + + @registration.register_serializable(name=f""SaveAndLoad{cycles}"") + class Module(tracking.AutoTrackable): + + def __init__(self, name=""module""): + self.v = variables.Variable(1.) + self.name = name + + def _serialize_to_proto(self, **unused_kwargs): + return wrappers_pb2.StringValue(value=self.name) + + @classmethod + def _deserialize_from_proto(cls, proto, **unused_kwargs): + if proto.Is(wrappers_pb2.StringValue.DESCRIPTOR): + unpacked = wrappers_pb2.StringValue() + proto.Unpack(unpacked) + return cls(name=unpacked.value) + raise AssertionError( + ""Did not receive proto of correct type during deserialization. "" + f""Expected type {wrappers_pb2.StringValue.DESCRIPTOR.full_name}, "" + f""got {proto.TypeName()}"") + + m = Module(""a"") + m.v.assign(5) + loaded = cycle(m, cycles) + self.assertIsInstance(loaded, Module) + self.assertEqual(5, loaded.v.numpy()) + self.assertEqual(""a"", loaded.name) + + def test_none_proto(self, cycles): + + @registration.register_serializable(name=f""NoneProto{cycles}"") + class Module(tracking.AutoTrackable): + + def __init__(self, name=""module""): + self.v = variables.Variable(1.) + self.name = name + + # Leave _serialize_to_proto as the default (returns `None`). + + @classmethod + def _deserialize_from_proto(cls, proto, **unused_kwargs): + self.assertEqual(proto.ByteSize(), 0) + return cls(""deserialized"") + + m = Module(""a"") + m.v.assign(5) + loaded = cycle(m, cycles) + self.assertIsInstance(loaded, Module) + self.assertEqual(5, loaded.v.numpy()) + self.assertEqual(""deserialized"", loaded.name) + + +if __name__ == ""__main__"": + test.main() ",0,test 333abd19d9c49393abb2466401b040cc417801d8,tensorflow/tensorflow,"Allow users to register objects that return `None` in `_serialize_to_proto`. PiperOrigin-RevId: 396428995 Change-Id: I54d87e48e23a899c570c4cf5a91b630f2d8215d7",registration_test.py,"@@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -""""""Trackable class registration tests."""""" +""""""Trackable class registration tests. + +For integrated tests, see registration_saving_test.py. +"""""" from __future__ import absolute_import from __future__ import division @@ -20,7 +23,7 @@ from __future__ import print_function from absl.testing import parameterized -from tensorflow.python.platform import test +from tensorflow.python.eager import test from tensorflow.python.saved_model import registration from tensorflow.python.training.tracking import base ",0,test 333abd19d9c49393abb2466401b040cc417801d8,tensorflow/tensorflow,"Allow users to register objects that return `None` in `_serialize_to_proto`. PiperOrigin-RevId: 396428995 Change-Id: I54d87e48e23a899c570c4cf5a91b630f2d8215d7",save.py,"@@ -1003,7 +1003,9 @@ def _write_object_proto(obj, proto, asset_file_def_index, function_name_map): registered_name = registration.get_registered_name(obj) if registered_name: proto.registered_name = registered_name - proto.serialized_user_proto.Pack(obj._serialize_to_proto()) # pylint: disable=protected-access + serialized_user_proto = obj._serialize_to_proto() # pylint: disable=protected-access + if serialized_user_proto is not None: + proto.serialized_user_proto.Pack(serialized_user_proto) def _export_debug_info(exported_graph, export_dir): ",0,test 2f2b41a42d80c57cee5171beb89675f0875546d3,tensorflow/tensorflow,"Improved the performance of the batch normalization gradient computation by leveraging index lists whenever possible. Change: 110482898",batch_norm_op.h,"@@ -86,9 +86,20 @@ struct BatchNormGrad { const int rest_size = input.size() / depth; typedef typename TTypes::ConstVec::Index Index; + Eigen::DSizes rest_by_depth(rest_size, depth); +#if !defined(EIGEN_HAS_INDEX_LIST) Eigen::DSizes rest_by_one(rest_size, 1); Eigen::DSizes one_by_depth(1, depth); + Eigen::array reduction_axis; + reduction_axis[0] = 0; // Reduces on first dimension. +#else + Eigen::IndexList > rest_by_one; + rest_by_one.set(0, rest_size); + Eigen::IndexList, Index> one_by_depth; + one_by_depth.set(1, depth); + Eigen::IndexList > reduction_axis; +#endif // db = out_backprop // @@ -100,9 +111,6 @@ struct BatchNormGrad { // dm = sum_over_rest(out_backprop * gamma) * (-1 / rsqrt(v + epsilon)) // // dx = out_backprop * (gamma * rsqrt(v + epsilon)) - Eigen::array reduction_axis; - reduction_axis[0] = 0; // Reduces on first dimension. - db.device(d) = out_backprop.reshape(rest_by_depth).sum(reduction_axis); // scratch1 = rsqrt(v + epsilon) ",0,train c2c4c208679305d6d538255be569a2822f1c920f,tensorflow/tensorflow,"Delete scope argument (#6332) * _linear scope bug: use the scope if provided * Remove the scope argument * Remove obsolete scope arg from _linear calls",rnn_cell_impl.py,"@@ -177,7 +177,7 @@ class BasicRNNCell(RNNCell): """"""Most basic RNN: output = new_state = act(W * input + U * state + B)."""""" with vs.variable_scope(scope or ""basic_rnn_cell""): output = self._activation( - _linear([inputs, state], self._num_units, True, scope=scope)) + _linear([inputs, state], self._num_units, True)) return output, output @@ -205,14 +205,13 @@ class GRUCell(RNNCell): # We start with bias of 1.0 to not reset and not update. r, u = array_ops.split( value=_linear( - [inputs, state], 2 * self._num_units, True, 1.0, scope=scope), + [inputs, state], 2 * self._num_units, True, 1.0), num_or_size_splits=2, axis=1) r, u = sigmoid(r), sigmoid(u) with vs.variable_scope(""candidate""): c = self._activation(_linear([inputs, r * state], - self._num_units, True, - scope=scope)) + self._num_units, True)) new_h = u * state + (1 - u) * c return new_h, new_h @@ -292,7 +291,7 @@ class BasicLSTMCell(RNNCell): c, h = state else: c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1) - concat = _linear([inputs, h], 4 * self._num_units, True, scope=scope) + concat = _linear([inputs, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1) @@ -450,8 +449,7 @@ class LSTMCell(RNNCell): partitioned_variables.fixed_size_partitioner( self._num_unit_shards)) # i = input_gate, j = new_input, f = forget_gate, o = output_gate - lstm_matrix = _linear([inputs, m_prev], 4 * self._num_units, bias=True, - scope=scope) + lstm_matrix = _linear([inputs, m_prev], 4 * self._num_units, bias=True) i, j, f, o = array_ops.split( value=lstm_matrix, num_or_size_splits=4, axis=1) @@ -490,7 +488,7 @@ class LSTMCell(RNNCell): proj_scope.set_partitioner( partitioned_variables.fixed_size_partitioner( self._num_proj_shards)) - m = _linear(m, self._num_proj, bias=False, scope=scope) + m = _linear(m, self._num_proj, bias=False) if self._proj_clip is not None: # pylint: disable=invalid-unary-operand-type @@ -542,7 +540,7 @@ class OutputProjectionWrapper(RNNCell): output, res_state = self._cell(inputs, state) # Default scope: ""OutputProjectionWrapper"" with vs.variable_scope(scope or ""output_projection_wrapper""): - projected = _linear(output, self._output_size, True, scope=scope) + projected = _linear(output, self._output_size, True) return projected, res_state @@ -584,7 +582,7 @@ class InputProjectionWrapper(RNNCell): """"""Run the input projection and then the cell."""""" # Default scope: ""InputProjectionWrapper"" with vs.variable_scope(scope or ""input_projection_wrapper""): - projected = _linear(inputs, self._num_proj, True, scope=scope) + projected = _linear(inputs, self._num_proj, True) return self._cell(projected, state) @@ -820,7 +818,7 @@ class _SlimRNNCell(RNNCell): return output, state -def _linear(args, output_size, bias, bias_start=0.0, scope=None): +def _linear(args, output_size, bias, bias_start=0.0): """"""Linear map: sum_i(args[i] * W[i]), where W[i] is a variable. Args: @@ -828,7 +826,6 @@ def _linear(args, output_size, bias, bias_start=0.0, scope=None): output_size: int, second dimension of W[i]. bias: boolean, whether to add a bias term or not. bias_start: starting value to initialize the bias; 0 by default. - scope: (optional) Variable scope to create parameters in. Returns: A 2D Tensor with shape [batch x output_size] equal to ",0,train 432852375ec07cde915246c841d18d3993236f17,tensorflow/tensorflow,"Lazily allocate the referenced tensors set in UniqueTensorReferences. Since the common case is that no elements will be put into the set, this helps with both CPU cost and reduces the cache-trashing footprint of constructing a UniqueTensorReferences object (which is done on every op in setting up the OpKernelContext). Change: 113377001",unique_tensor_references.h,"@@ -21,6 +21,7 @@ limitations under the License. #include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/tensor_reference.h"" #include ""tensorflow/core/lib/gtl/inlined_vector.h"" +#include ""tensorflow/core/platform/macros.h"" namespace tensorflow { @@ -32,7 +33,7 @@ namespace tensorflow { // references switches to using an unordered set. class UniqueTensorReferences { public: - UniqueTensorReferences() : frozen_(false) {} + UniqueTensorReferences() : frozen_(false), referenced_tensors_set_(nullptr) {} ~UniqueTensorReferences() { if (!frozen_) { @@ -44,6 +45,7 @@ class UniqueTensorReferences { tensor.Unref(); } } + delete referenced_tensors_set_; } // Adds a reference to tensor if its buffer is not already referenced. @@ -51,11 +53,11 @@ class UniqueTensorReferences { DCHECK(!frozen_); // Do nothing if the tensor has a null buffer. if (tensor.IsInitialized()) { - if (referenced_tensors_set_.size() > 0) { + if (referenced_tensors_set_ != nullptr) { // There are enough tensors that we are using a hash set to // de-duplicate. const TensorReference tensor_ref(tensor); - if (!referenced_tensors_set_.insert(tensor_ref).second) { + if (!referenced_tensors_set_->insert(tensor_ref).second) { // The tensor was a duplicate, so discard the reference. tensor_ref.Unref(); } @@ -70,12 +72,13 @@ class UniqueTensorReferences { if (kInVector == referenced_tensors_vector_.size()) { // There are too many tensors to keep using the N^2 algorithm // so start de-duplicating using a set. - DCHECK_EQ(0, referenced_tensors_set_.size()); // Transfer the refs from the vector to the set. - referenced_tensors_set_.reserve(kInVector); - referenced_tensors_set_.insert(referenced_tensors_vector_.begin(), - referenced_tensors_vector_.end()); - DCHECK_EQ(kInVector, referenced_tensors_set_.size()); + DCHECK(referenced_tensors_set_ == nullptr); + referenced_tensors_set_ = new ReferencedTensorsSet; + referenced_tensors_set_->reserve(kInVector); + referenced_tensors_set_->insert(referenced_tensors_vector_.begin(), + referenced_tensors_vector_.end()); + DCHECK_EQ(kInVector, referenced_tensors_set_->size()); referenced_tensors_vector_.clear(); } } @@ -87,13 +90,15 @@ class UniqueTensorReferences { void FreezeAndReturnReferences(TensorReferenceVector* out_vector) { // Prevent any further additions. frozen_ = true; - if (referenced_tensors_set_.size() > 0) { + if (referenced_tensors_set_ != nullptr) { DCHECK(referenced_tensors_vector_.empty()); - out_vector->reserve(referenced_tensors_set_.size()); - for (const auto& ref : referenced_tensors_set_) { + out_vector->reserve(referenced_tensors_set_->size()); + for (const auto& ref : *referenced_tensors_set_) { out_vector->push_back(ref); } - referenced_tensors_set_.clear(); + referenced_tensors_set_->clear(); + delete referenced_tensors_set_; + referenced_tensors_set_ = nullptr; } else { out_vector->reserve(referenced_tensors_vector_.size()); for (const auto& ref : referenced_tensors_vector_) { @@ -123,9 +128,16 @@ class UniqueTensorReferences { bool frozen_; TensorReferenceVector referenced_tensors_vector_; - std::unordered_set - referenced_tensors_set_; + + typedef std::unordered_set + ReferencedTensorsSet; + // Lazily allocated hash set for when the number of tensors becomes too large. + // If this is non-NULL, then we use the hash set, otherwise, we use the + // referenced_tensors_vector_ (and do O(N^2) work per insertion). + ReferencedTensorsSet* referenced_tensors_set_; + + TF_DISALLOW_COPY_AND_ASSIGN(UniqueTensorReferences); }; } // end namespace tensorflow ",0,train a64ff0f2cda9d4e35ea450d4e945009a90ddee9a,tensorflow/tensorflow,"Add device annotations to gradient_function This colocates the gradient functions with the input tensors. Otherwise they would be executed on the device that is current when calling GradientTape.gradient() which breaks splitting a large model across multiple GPUs. Fixes #33688",backprop.py,"@@ -116,13 +116,14 @@ class _MockOp(object): ) -def _gradient_function(op_name, attr_tuple, num_inputs, inputs, outputs, +def _gradient_function(op_name, attr_tuple, device, num_inputs, inputs, outputs, out_grads, skip_input_indices): """"""Calls the gradient function of the op. Args: op_name: the name of the op to be differentiated. attr_tuple: the attrs, as a tuple. + device: the device of the op. num_inputs: the number of inputs to the op. inputs: inputs to the original operation. outputs: outputs to the original operation. @@ -138,7 +139,8 @@ def _gradient_function(op_name, attr_tuple, num_inputs, inputs, outputs, if grad_fn is None: return [None] * num_inputs - return grad_fn(mock_op, *out_grads) + with ops.device(device): + return grad_fn(mock_op, *out_grads) pywrap_tensorflow.TFE_Py_RegisterGradientFunction(_gradient_function) ",0,train a64ff0f2cda9d4e35ea450d4e945009a90ddee9a,tensorflow/tensorflow,"Add device annotations to gradient_function This colocates the gradient functions with the input tensors. Otherwise they would be executed on the device that is current when calling GradientTape.gradient() which breaks splitting a large model across multiple GPUs. Fixes #33688",device_placement_test.py,"@@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.eager import remote @@ -27,6 +28,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.ops import variables class SoftDevicePlacementTest(test.TestCase): @@ -86,6 +88,23 @@ class SoftDevicePlacementTest(test.TestCase): # We don't support nested device placement right now. self.assertIn('GPU:0', c.device) + @test_util.run_gpu_only + def testGradientPlacement(self): + with ops.device('GPU:0'): + x = variables.Variable(1.0) + with ops.device('CPU:0'): + y = variables.Variable(1.0) + + with backprop.GradientTape() as tape: + with ops.device('GPU:0'): + x1 = constant_op.constant(2.0) * x + with ops.device('CPU:0'): + y1 = constant_op.constant(2.0) * y + z = x1 + y1 + grads = tape.gradient(z, [x, y]) + self.assertIn('GPU:0', grads[0].device) + self.assertIn('CPU:0', grads[1].device) + class ClusterPlacementTest(test.TestCase): ",0,train a64ff0f2cda9d4e35ea450d4e945009a90ddee9a,tensorflow/tensorflow,"Add device annotations to gradient_function This colocates the gradient functions with the input tensors. Otherwise they would be executed on the device that is current when calling GradientTape.gradient() which breaks splitting a large model across multiple GPUs. Fixes #33688",pywrap_tfe_src.cc,"@@ -3007,6 +3007,22 @@ PyObject* CopySequenceSettingIndicesToNull( return result; } +PyObject* DeviceFromTensorSeq(PyObject* seq) { + for (Py_ssize_t i = 0; i < PySequence_Size(seq); i++) { + PyObject* item = PySequence_ITEM(seq, i); + PyObject* dev = PyObject_GetAttrString(item, ""device""); + Py_DECREF(item); + if (dev) { + const char* devStr = TFE_GetPythonString(dev); + if (devStr && !string(devStr).empty()) { + return dev; + } + Py_DECREF(dev); + } + } + return Py_None; +} + PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, PyObject* results) { std::vector input_ids = MakeTensorIDList(inputs); @@ -3033,6 +3049,11 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, string c_op_name = TFE_GetPythonString(op_name); + PyObject* device = DeviceFromTensorSeq(results); + if (device == Py_None) { + device = DeviceFromTensorSeq(inputs); + } + PyObject* op_outputs; bool op_outputs_tuple_created = false; std::pair>* outputs_not_required; @@ -3091,14 +3112,15 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, TapeSetRecordOperation( op_name, inputs, results, input_ids, input_dtypes, - [op_name, attrs, num_inputs, op_inputs, op_outputs]() { + [op_name, attrs, device, num_inputs, op_inputs, op_outputs]() { Py_INCREF(op_name); Py_INCREF(attrs); + Py_INCREF(device); Py_INCREF(num_inputs); Py_INCREF(op_inputs); Py_INCREF(op_outputs); PyBackwardFunction* function = new PyBackwardFunction( - [op_name, attrs, num_inputs, op_inputs, op_outputs]( + [op_name, attrs, device, num_inputs, op_inputs, op_outputs]( PyObject* output_grads, const std::vector& unneeded_gradients) { if (PyErr_Occurred()) { @@ -3118,8 +3140,8 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, skip_input_indices.reset(Py_None); } tensorflow::Safe_PyObjectPtr callback_args(Py_BuildValue( - ""OOOOOOO"", op_name, attrs, num_inputs, op_inputs, op_outputs, - output_grads, skip_input_indices.get())); + ""OOOOOOOO"", op_name, attrs, device, num_inputs, op_inputs, + op_outputs, output_grads, skip_input_indices.get())); tensorflow::Safe_PyObjectPtr result( PyObject_CallObject(gradient_function, callback_args.get())); @@ -3130,10 +3152,11 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, }); return function; }, - [op_name, attrs, num_inputs, op_inputs, + [op_name, attrs, device, num_inputs, op_inputs, op_outputs](PyBackwardFunction* backward_function) { Py_DECREF(op_name); Py_DECREF(attrs); + Py_DECREF(device); Py_DECREF(num_inputs); Py_DECREF(op_inputs); Py_DECREF(op_outputs); @@ -3143,6 +3166,7 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, forward_function); Py_DECREF(num_inputs); + Py_DECREF(device); if (op_outputs_tuple_created) Py_DECREF(op_outputs); if (op_inputs_tuple_created) Py_DECREF(op_inputs); ",0,train 6d10ef2c2ecd30bc61e125da41b80c1c02ac4cbd,tensorflow/tensorflow,fix v1 test,gradients_test.py,"@@ -1028,7 +1028,10 @@ class GetDependentVariablesTest(test_util.TensorFlowTestCase): with context.graph_mode(): init = constant_op.constant(100.0) var = variable_scope.variable(init, name='a/replica_1') - var._handle = array_ops.identity(var, name='a') + if isinstance(var, variables.RefVariable): + var._variable = array_ops.identity(var, name='a') + else: + var._handle = array_ops.identity(var, name='a') var2 = custom_gradient.get_variable_by_name('a') self.assertEqual(var2.name, var.name) ",0,train a9b386407034529769a7fc6f95e7aaf4c1c0a15f,tensorflow/tensorflow,"[XLA:CPU] Always set the unnamed_addr bit on constants This allows LLVM (and the linker) to merge or pool the constants. In many cases LLVM can derive unnamed_addr by itself, but not if it's a constant being passed to a runtime function. PiperOrigin-RevId: 236298140",ir_emitter.cc,"@@ -189,6 +189,7 @@ llvm::Constant* IrEmitter::EmitGlobalForLiteral(const Literal& literal) { /*Initializer=*/initializer, /*Name=*/""""); result_global->setAlignment(MinimumAlignmentForShape(literal.shape())); + result_global->setUnnamedAddr(llvm::GlobalVariable::UnnamedAddr::Global); return llvm::ConstantExpr::getBitCast( result_global, IrShapeType(literal.shape())->getPointerTo()); } ",0,train a9b386407034529769a7fc6f95e7aaf4c1c0a15f,tensorflow/tensorflow,"[XLA:CPU] Always set the unnamed_addr bit on constants This allows LLVM (and the linker) to merge or pool the constants. In many cases LLVM can derive unnamed_addr by itself, but not if it's a constant being passed to a runtime function. PiperOrigin-RevId: 236298140",cpu_external_constants_test.cc,"@@ -56,8 +56,8 @@ class CpuExternalConstantsTest : public CpuCodegenTest { TEST_F(CpuExternalConstantsTest, Basic) { TestWithArray(/*rows=*/1024, /*cols=*/1024, R""( -CHECK-NOT: @constant_global_0 = external constant [1024 x [1024 x float]], align 16 -CHECK: @0 = private constant [4194304 x i8] {{.*}}, align 16 +CHECK-NOT: @constant_global_0 = external unnamed_addr constant [1024 x [1024 x float]], align 16 +CHECK: @0 = private unnamed_addr constant [4194304 x i8] {{.*}}, align 16 )""); } @@ -65,8 +65,8 @@ TEST_F(CpuExternalConstantsTest, BasicNegative) { // The constant array in this test case is small enough that there is no need // to externalize it. TestWithArray(/*rows=*/4, /*cols=*/4, R""( -CHECK-NOT: @constant_global_0 = external constant [16 x float], align 8 -CHECK: @0 = private constant [64 x i8] {{.*}}, align 8 +CHECK-NOT: @constant_global_0 = external unnamed_addr constant [16 x float], align 8 +CHECK: @0 = private unnamed_addr constant [64 x i8] {{.*}}, align 8 )""); } } // namespace ",0,train a9b386407034529769a7fc6f95e7aaf4c1c0a15f,tensorflow/tensorflow,"[XLA:CPU] Always set the unnamed_addr bit on constants This allows LLVM (and the linker) to merge or pool the constants. In many cases LLVM can derive unnamed_addr by itself, but not if it's a constant being passed to a runtime function. PiperOrigin-RevId: 236298140",cpu_literal_caching_test.cc,"@@ -56,8 +56,8 @@ ENTRY main { )""; string filecheck_pattern = R""( -CHECK: private constant [48 x i8] -CHECK-NOT: private constant [48 x i8] +CHECK: private unnamed_addr constant [48 x i8] +CHECK-NOT: private unnamed_addr constant [48 x i8] )""; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, @@ -102,10 +102,10 @@ ENTRY main { )""; string filecheck_pattern = R""( -CHECK-DAG: private constant [4 x i8] -CHECK-DAG: private constant [8 x i8] -CHECK-NOT: private constant [4 x i8] -CHECK-NOT: private constant [8 x i8] +CHECK-DAG: private unnamed_addr constant [4 x i8] +CHECK-DAG: private unnamed_addr constant [8 x i8] +CHECK-NOT: private unnamed_addr constant [4 x i8] +CHECK-NOT: private unnamed_addr constant [8 x i8] )""; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ",0,train a9b386407034529769a7fc6f95e7aaf4c1c0a15f,tensorflow/tensorflow,"[XLA:CPU] Always set the unnamed_addr bit on constants This allows LLVM (and the linker) to merge or pool the constants. In many cases LLVM can derive unnamed_addr by itself, but not if it's a constant being passed to a runtime function. PiperOrigin-RevId: 236298140",cpu_outfeed_test.cc,"@@ -38,7 +38,7 @@ ENTRY main { )""; string filecheck_pattern = R""( -CHECK: private constant [48 x i8] +CHECK: private unnamed_addr constant [48 x i8] )""; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ",0,train a9b386407034529769a7fc6f95e7aaf4c1c0a15f,tensorflow/tensorflow,"[XLA:CPU] Always set the unnamed_addr bit on constants This allows LLVM (and the linker) to merge or pool the constants. In many cases LLVM can derive unnamed_addr by itself, but not if it's a constant being passed to a runtime function. PiperOrigin-RevId: 236298140",fused_ir_emitter.cc,"@@ -82,6 +82,7 @@ Status FusedIrEmitter::HandleConstant(HloInstruction* constant) { /*Linkage=*/llvm::GlobalValue::PrivateLinkage, /*Initializer=*/initializer, /*Name=*/""""); + global->setUnnamedAddr(llvm::GlobalVariable::UnnamedAddr::Global); llvm::Constant* shape_constant = llvm::ConstantExpr::getBitCast( global, llvm_ir::ShapeToIrType(literal.shape(), module_)->getPointerTo()); ",0,train 1d02bb4cbaa9308dd7b4ad21ff1c74dd4134f920,tensorflow/tensorflow,Fixed indentation in test_ps_session_config,run_config_test.py,"@@ -1197,8 +1197,8 @@ class RunConfigSessionConfigTest(test.TestCase): } run_config = _create_run_config_with_cluster_spec(tf_config) self._assert_equal_session_config( - run_config.session_config, - ['/job:ps', '/job:worker', '/job:chief', '/job:master']) + run_config.session_config, + ['/job:ps', '/job:worker', '/job:chief', '/job:master']) def test_evaluator_session_config(self): tf_config = { ",0,train be5d98a8bd541bdc45a9c727a2b0a7195c85f739,tensorflow/tensorflow,"Adds integration tests for DNNClassifier. PiperOrigin-RevId: 157592010",dnn_test.py,"@@ -695,6 +695,169 @@ class DNNRegressorIntegrationTest(test.TestCase): batch_size=batch_size) +class DNNClassifierIntegrationTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + shutil.rmtree(self._model_dir) + + def _test_complete_flow( + self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, + n_classes, batch_size): + feature_columns = [ + feature_column.numeric_column('x', shape=(input_dimension,))] + est = dnn.DNNClassifier( + hidden_units=(2, 2), + feature_columns=feature_columns, + n_classes=n_classes, + model_dir=self._model_dir) + + # TRAIN + num_steps = 10 + est.train(train_input_fn, steps=num_steps) + + # EVALUTE + scores = est.evaluate(eval_input_fn) + self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) + self.assertIn('loss', six.iterkeys(scores)) + + # PREDICT + predicted_proba = np.array([ + x[prediction_keys.PredictionKeys.PROBABILITIES] + for x in est.predict(predict_input_fn) + ]) + self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) + + # EXPORT + feature_spec = feature_column.make_parse_example_spec(feature_columns) + serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( + feature_spec) + export_dir = est.export_savedmodel(tempfile.mkdtemp(), + serving_input_receiver_fn) + self.assertTrue(gfile.Exists(export_dir)) + + def test_numpy_input_fn(self): + """"""Tests complete flow with numpy_input_fn."""""" + n_classes = 2 + input_dimension = 2 + batch_size = 10 + data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32) + x_data = data.reshape(batch_size, input_dimension) + y_data = np.reshape(data[:batch_size], (batch_size, 1)) + # learn y = x + train_input_fn = numpy_io.numpy_input_fn( + x={'x': x_data}, + y=y_data, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + eval_input_fn = numpy_io.numpy_input_fn( + x={'x': x_data}, + y=y_data, + batch_size=batch_size, + shuffle=False) + predict_input_fn = numpy_io.numpy_input_fn( + x={'x': x_data}, + batch_size=batch_size, + shuffle=False) + + self._test_complete_flow( + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + predict_input_fn=predict_input_fn, + input_dimension=input_dimension, + n_classes=n_classes, + batch_size=batch_size) + + def test_pandas_input_fn(self): + """"""Tests complete flow with pandas_input_fn."""""" + if not HAS_PANDAS: + return + input_dimension = 1 + n_classes = 2 + batch_size = 10 + data = np.linspace(0., 2., batch_size, dtype=np.float32) + x = pd.DataFrame({'x': data}) + y = pd.Series(data) + train_input_fn = pandas_io.pandas_input_fn( + x=x, + y=y, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + eval_input_fn = pandas_io.pandas_input_fn( + x=x, + y=y, + batch_size=batch_size, + shuffle=False) + predict_input_fn = pandas_io.pandas_input_fn( + x=x, + batch_size=batch_size, + shuffle=False) + + self._test_complete_flow( + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + predict_input_fn=predict_input_fn, + input_dimension=input_dimension, + n_classes=n_classes, + batch_size=batch_size) + + def test_input_fn_from_parse_example(self): + """"""Tests complete flow with input_fn constructed from parse_example."""""" + input_dimension = 2 + n_classes = 2 + batch_size = 10 + data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32) + data = data.reshape(batch_size, input_dimension) + + serialized_examples = [] + for datum in data: + example = example_pb2.Example(features=feature_pb2.Features( + feature={ + 'x': feature_pb2.Feature( + float_list=feature_pb2.FloatList(value=datum)), + 'y': feature_pb2.Feature( + float_list=feature_pb2.FloatList(value=datum[:1])), + })) + serialized_examples.append(example.SerializeToString()) + + feature_spec = { + 'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32), + 'y': parsing_ops.FixedLenFeature([1], dtypes.float32), + } + def _train_input_fn(): + feature_map = parsing_ops.parse_example(serialized_examples, feature_spec) + features = _queue_parsed_features(feature_map) + labels = features.pop('y') + return features, labels + def _eval_input_fn(): + feature_map = parsing_ops.parse_example( + input_lib.limit_epochs(serialized_examples, num_epochs=1), + feature_spec) + features = _queue_parsed_features(feature_map) + labels = features.pop('y') + return features, labels + def _predict_input_fn(): + feature_map = parsing_ops.parse_example( + input_lib.limit_epochs(serialized_examples, num_epochs=1), + feature_spec) + features = _queue_parsed_features(feature_map) + features.pop('y') + return features, None + + self._test_complete_flow( + train_input_fn=_train_input_fn, + eval_input_fn=_eval_input_fn, + predict_input_fn=_predict_input_fn, + input_dimension=input_dimension, + n_classes=n_classes, + batch_size=batch_size) + + def _full_var_name(var_name): return '%s/part_0:0' % var_name ",0,train 966ed1cafc770e81e6a56be3f5715e0fe257b742,tensorflow/tensorflow,Use provided host name/ip instead of localhost if possible,grpc_server_lib.cc,"@@ -132,8 +132,9 @@ GrpcServer::~GrpcServer() { void GrpcServer::MaybeMutateBuilder(::grpc::ServerBuilder* builder) {} // Look up the port that has been requested for this task in `server_def`. -Status GrpcServer::GetPort(const ServerDef& server_def, int* port) const { +Status GrpcServer::GetHostAndPort(const ServerDef& server_def, string* host_name, int* port) const { *port = -1; + *host_name = ""localhost""; for (const auto& job : server_def.cluster().job()) { if (job.name() == server_def.job_name()) { auto iter = job.tasks().find(server_def.task_index()); @@ -153,6 +154,10 @@ Status GrpcServer::GetPort(const ServerDef& server_def, int* port) const { ""Could not parse port for local server from \"""", iter->second, ""\"".""); } + + if (colon_index != string::npos && !iter->second.substr(0, colon_index).empty()) { + *host_name = iter->second.substr(0, colon_index); + } } break; } @@ -175,7 +180,9 @@ Status GrpcServer::Init(const GrpcServerOptions& opts) { // otherwise if 'task_index=-1' the program will abort. int requested_port; - TF_RETURN_IF_ERROR(GetPort(server_def_, &requested_port)); + string host_name; + TF_RETURN_IF_ERROR(GetHostAndPort(server_def_, &host_name, &requested_port)); + host_name_ = host_name; SessionOptions sess_opts; ConfigProto config = server_def_.default_session_config(); @@ -325,7 +332,7 @@ Status GrpcServer::ParseChannelSpec(const WorkerCacheFactoryOptions& options, task.second); } if (job.name() == *options.job_name && task.first == options.task_index) { - host_port = strings::StrCat(""localhost:"", bound_port_); + host_port = strings::StrCat(host_name_, "":"", bound_port_); } else { host_port = task.second; } @@ -478,7 +485,7 @@ Status GrpcServer::Join() { } const string GrpcServer::target() const { - return strings::StrCat(""grpc://localhost:"", bound_port_); + return strings::StrCat(""grpc://"", host_name_, "":"", bound_port_); } std::shared_ptr<::grpc::ServerCredentials> GrpcServer::GetServerCredentials( ",0,train 966ed1cafc770e81e6a56be3f5715e0fe257b742,tensorflow/tensorflow,Use provided host name/ip instead of localhost if possible,grpc_server_lib.h,"@@ -104,7 +104,7 @@ class GrpcServer : public ServerInterface { Status UpdateServerDef(const ServerDef& server_def); protected: - virtual Status GetPort(const ServerDef& server_def, int* port) const; + virtual Status GetHostAndPort(const ServerDef& server_def, string* host_name, int* port) const; Status Init(const GrpcServerOptions& opts = GrpcServerOptions()); // A subclass can override this method to support secure credentials. @@ -136,6 +136,9 @@ class GrpcServer : public ServerInterface { // The port to which this server is bound. int bound_port_ = 0; + // The host name of this server + string host_name_; + // Guards server configuration, server, and state. mutex mu_; ",0,train 0a335dc4fd8cae06d331589eab5858fd0a3ffc73,tensorflow/tensorflow,"[XLA] Prevent using XlaOp from the wrong XlaBuilder. PiperOrigin-RevId: 190312254",xla_builder.cc,"@@ -284,10 +284,12 @@ XlaOp XlaBuilder::Mul(const XlaOp& lhs, const XlaOp& rhs, } XlaOp XlaBuilder::ConstantLiteral(const Literal& literal) { - HloInstructionProto instr; - *instr.mutable_shape() = literal.shape(); - *instr.mutable_literal() = literal.ToProto(); - return AddInstruction(std::move(instr), HloOpcode::kConstant); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + *instr.mutable_shape() = literal.shape(); + *instr.mutable_literal() = literal.ToProto(); + return AddInstruction(std::move(instr), HloOpcode::kConstant); + }()); } XlaOp XlaBuilder::Call(const XlaComputation& computation, @@ -794,8 +796,9 @@ XlaOp XlaBuilder::Recv(const Shape& shape, const ChannelHandle& handle) { return UnimplementedOp(); } -XlaOp XlaBuilder::AddInstruction(HloInstructionProto&& instr, HloOpcode opcode, - tensorflow::gtl::ArraySlice operands) { +StatusOr XlaBuilder::AddInstruction( + HloInstructionProto&& instr, HloOpcode opcode, + tensorflow::gtl::ArraySlice operands) { const int64 handle = instructions_.size(); instr.set_id(handle); instr.set_opcode(HloOpcodeString(opcode)); @@ -806,6 +809,10 @@ XlaOp XlaBuilder::AddInstruction(HloInstructionProto&& instr, HloOpcode opcode, instr.set_name(StrCat(instr.name(), ""."", handle)); } for (const auto& operand : operands) { + TF_RET_CHECK(operand.builder_ != nullptr); + TF_RET_CHECK(operand.builder_ == this) + << ""Do not add XlaOp from builder "" << operand.builder_->name() + << "" to builder "" << this->name(); instr.add_operand_ids(operand.handle()); // TODO(b/74197823): Set metadata and sharding. } ",0,train 0a335dc4fd8cae06d331589eab5858fd0a3ffc73,tensorflow/tensorflow,"[XLA] Prevent using XlaOp from the wrong XlaBuilder. PiperOrigin-RevId: 190312254",xla_builder.h,"@@ -706,8 +706,9 @@ class XlaBuilder { StatusOr GetProgramShape(); private: - XlaOp AddInstruction(HloInstructionProto&& instr, HloOpcode opcode, - tensorflow::gtl::ArraySlice operands = {}); + StatusOr AddInstruction( + HloInstructionProto&& instr, HloOpcode opcode, + tensorflow::gtl::ArraySlice operands = {}); // Notes that the error occurred by: // * storing it internally and capturing a backtrace if it's the first error ",0,train 0a335dc4fd8cae06d331589eab5858fd0a3ffc73,tensorflow/tensorflow,"[XLA] Prevent using XlaOp from the wrong XlaBuilder. PiperOrigin-RevId: 190312254",xla_builder_test.cc,"@@ -179,5 +179,16 @@ TEST_F(XlaBuilderTest, BinopHasInDimAndDegenerateBroadcast) { op::Broadcast(op::Reshape(op::Parameter(1))))); } +TEST_F(XlaBuilderTest, OperandFromWrongBuilder) { + XlaBuilder b1(""b1""); + auto p0 = b1.Parameter(0, ShapeUtil::MakeShape(F32, {}), ""p0""); + XlaBuilder builder(""main""); + builder.Add(p0, p0); + auto statusor = builder.Build(); + ASSERT_FALSE(statusor.ok()); + EXPECT_THAT(statusor.status().error_message(), + HasSubstr(""Do not add XlaOp from builder b1 to builder main"")); +} + } // namespace } // namespace xla ",0,train 10522e2c58649fd8588ec32f8e11d25f18d271e4,tensorflow/tensorflow,"if condition modified if condition modified to not use the else part",image_ops_impl.py,"@@ -1737,9 +1737,7 @@ def convert_image_dtype(image, dtype, saturate=False, name=None): """""" image = ops.convert_to_tensor(image, name='image') dtype = tf.dtypes.as_dtype(dtype) - if dtype.is_floating or dtype.is_integer: - pass - else: + if not dtype.is_floating and not dtype.is_integer: raise AttributeError(""dtype must be either floating point or integer"") if dtype == image.dtype: return array_ops.identity(image, name=name) ",0,train c5b8e150645e5c2178b65477ad575c35279124d8,tensorflow/tensorflow,"Fix reference count leak with scatter_nd Fixes #27288 PiperOrigin-RevId: 250102793",scatter_nd_op.cc,"@@ -273,6 +273,7 @@ class ScatterNdUpdateOp : public OpKernel { if (dtype_ == DT_RESOURCE) { Var* v; OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v)); + core::ScopedUnref scoped_unref(v); Tensor* t = v->tensor(); params = *t; params_shape = params.shape(); ",0,test 0e3574d39c66d937fa9f9d2e25554aab0066f250,tensorflow/tensorflow,"Add rank check to Sub op delegation to NNAPI PiperOrigin-RevId: 307821863 Change-Id: Ib98448d67e9948576e6c9fb43a98d364ab434e37",nnapi_delegate.cc,"@@ -1799,7 +1799,7 @@ bool NNAPIDelegateKernel::Validate( "" NNAPI only support float tanh."", &val_ctx); } break; case kTfLiteBuiltinSub: { - ExpectMaxOpVersion(version, 2, &val_ctx); + ExpectMaxOpVersion(version, 3, &val_ctx); const TfLiteType input_type = context->tensors[node->inputs->data[0]].type; Expect((android_sdk_version >= kMinSdkVersionForNNAPI11 && @@ -1808,6 +1808,13 @@ bool NNAPIDelegateKernel::Validate( IsQuantized(input_type)), NNAPIValidationFailureType::kUnsupportedInputType, ""NNAPI only support float sub."", &val_ctx); + const int input0_rank = + context->tensors[node->inputs->data[0]].dims->size; + const int input1_rank = + context->tensors[node->inputs->data[1]].dims->size; + Expect(input0_rank <= 4 && input1_rank <= 4, + NNAPIValidationFailureType::kUnsupportedOperandRank, + ""Input rank must be <= 4"", &val_ctx); } break; case kTfLiteBuiltinDiv: { ExpectOpVersion(version, 1, &val_ctx); @@ -2327,7 +2334,7 @@ bool NNAPIDelegateKernel::Validate( ""Unsupported operation type."", &val_ctx); } return val_ctx.is_valid; -} +} // NOLINT(readability/fn_size) TfLiteStatus NNAPIDelegateKernel::Map( TfLiteContext* context, int builtin_code, int version, ",0,test 175e49a0df73d6256146152591bf599bd3a9734b,tensorflow/tensorflow,[CostModel] Add TF_ASSERT_OK,costmodel_test.cc,"@@ -24,6 +24,7 @@ limitations under the License. #include ""tensorflow/core/framework/cost_graph.pb.h"" #include ""tensorflow/core/framework/step_stats.pb.h"" #include ""tensorflow/core/graph/graph.h"" +#include ""tensorflow/core/lib/core/status_test_util.h"" #include ""tensorflow/core/platform/protobuf.h"" #include ""tensorflow/core/platform/test.h"" #include ""tensorflow/core/util/dump_graph.h"" @@ -83,8 +84,10 @@ TEST(CostModelTest, GlobalId) { CostModelManager cost_model_manager; collector.BuildCostModel(&cost_model_manager, device_map); CostGraphDef cost_graph_def; - cost_model_manager.AddToCostGraphDef(graph1.get(), &cost_graph_def); - cost_model_manager.AddToCostGraphDef(graph2.get(), &cost_graph_def); + TF_ASSERT_OK( + cost_model_manager.AddToCostGraphDef(graph1.get(), &cost_graph_def)); + TF_ASSERT_OK( + cost_model_manager.AddToCostGraphDef(graph2.get(), &cost_graph_def)); ASSERT_EQ(cost_graph_def.node_size(), 12); absl::flat_hash_map ids; for (auto node : cost_graph_def.node()) { ",0,train 3df3f818da59771113d9b2b24cd06072ea948dc3,tensorflow/tensorflow,"Use 'JNI_FALSE' rather than 'false' for Java boolean value. PiperOrigin-RevId: 383832302 Change-Id: Ifefbe56bbd44b9814f323fde560b46081bca3fe1",nativeinterpreterwrapper_jni.cc,"@@ -254,7 +254,7 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_hasUnresolvedFlexOp( JNIEnv* env, jclass clazz, jlong handle) { #if TFLITE_DISABLE_SELECT_JAVA_APIS TFLITE_LOG(tflite::TFLITE_LOG_WARNING, ""Not supported: hasUnresolvedFlexOp""); - return false; + return JNI_FALSE; #else Interpreter* interpreter = convertLongToInterpreter(env, handle); if (interpreter == nullptr) return JNI_FALSE; ",0,train e6f22ee5f4d483c1b05fbdaf8e8f5d55033f2bdb,tensorflow/tensorflow,"[tfdbg2] Ensure Const ops in graphs are captured by op_callbacks Details of the changes: - In the Python API of tensorflow, Const ops are created by calling `_create_op_internal()` from constant_op.py. This differs from how most other ops are created, and is similar to Placeholder ops, which are already instrumented by tfdbg2' op_callbacks. In this CL, we add a op_callback hook to the code in constant_op.py to allow instrumentation of Const ops. that makes that call. - In `_ConstantValue()` in tensor_util.py, add a special case for `CheckNumericsV2` op, so the `constant_value()` does not treat the `CheckNumericsV2` op as the constant tensor value. Similarly, add special cases for `Identity` and `DebugIdentityV2`. - In `dumping_callback_test.py`, replace use of a deprecated Dataset API (`make_one_shot_iterator()`) with non-deprecated API (`iter()` and `next()`) - Make other necessary changes to tfdbg2's tests to accommodate the Const ops which were previously not instrumented, but are now. - Increase the shard_count of learning/brain/python/debug/tpu_callbacks_test.py to 6 to avoid timeouts under the instrumented number of instrumented ops. PiperOrigin-RevId: 307723353 Change-Id: Iecdbfcb439f6e04fc12c1503ad5339d42703e8bc",check_numerics_callback_test.py,"@@ -94,10 +94,16 @@ class CheckNumericsCallbackTest(test_util.TensorFlowTestCase): dataset = dataset_ops.Dataset.from_tensor_slices(tensor).batch(2).map( map_fn) - iterator = dataset_ops.make_one_shot_iterator(dataset) - self.assertAllClose(self.evaluate(iterator.get_next()), np.log([1.25, 2])) - self.assertAllClose(self.evaluate(iterator.get_next()), np.log([3.25, 5])) + @def_function.function + def get_batches(): + iterator = iter(dataset) + return [next(iterator), next(iterator)] + + batches = self.evaluate(get_batches()) + self.assertLen(batches, 2) + self.assertAllClose(batches[0], np.log([1.25, 2])) + self.assertAllClose(batches[1], np.log([3.25, 5])) class CheckNumericsCallbackUnhealthyTest(test_util.TensorFlowTestCase): @@ -267,6 +273,23 @@ class CheckNumericsCallbackUnhealthyTest(test_util.TensorFlowTestCase): self.assertTrue(re.search(r""Stack trace of op's creation"", message)) self.assertIn(""accum.assign(accum * 2.0)"", message) + @test_util.run_in_graph_and_eager_modes + def testNanInConstIsCaptured(self): + check_numerics_callback.enable_check_numerics() + v = variables.Variable(3.0, dtype=dtypes.float32) + @def_function.function + def add_a_bad_constant(x): + c = constant_op.constant(np.nan) + return x + c + if not context.executing_eagerly(): + self.evaluate(v.initializer) + message = self._assertRaisesInvalidArgumentErrorAndGetMessage( + lambda: self.evaluate(add_a_bad_constant(v))) + self.assertTrue(re.search(r""graph op.*\""Const\"""", message)) + self.assertTrue(re.search(r""dtype:.*float32"", message)) + self.assertTrue(re.search(r""shape:.*\(\)"", message)) + self.assertTrue(re.search(r""Graph name:.*add_a_bad_constant"", message)) + @test_util.run_in_graph_and_eager_modes def testCatchInfinityInDatasetMapFunction(self): """"""Test that callback catches NaN in a tf.dataset map function."""""" ",0,train e6f22ee5f4d483c1b05fbdaf8e8f5d55033f2bdb,tensorflow/tensorflow,"[tfdbg2] Ensure Const ops in graphs are captured by op_callbacks Details of the changes: - In the Python API of tensorflow, Const ops are created by calling `_create_op_internal()` from constant_op.py. This differs from how most other ops are created, and is similar to Placeholder ops, which are already instrumented by tfdbg2' op_callbacks. In this CL, we add a op_callback hook to the code in constant_op.py to allow instrumentation of Const ops. that makes that call. - In `_ConstantValue()` in tensor_util.py, add a special case for `CheckNumericsV2` op, so the `constant_value()` does not treat the `CheckNumericsV2` op as the constant tensor value. Similarly, add special cases for `Identity` and `DebugIdentityV2`. - In `dumping_callback_test.py`, replace use of a deprecated Dataset API (`make_one_shot_iterator()`) with non-deprecated API (`iter()` and `next()`) - Make other necessary changes to tfdbg2's tests to accommodate the Const ops which were previously not instrumented, but are now. - Increase the shard_count of learning/brain/python/debug/tpu_callbacks_test.py to 6 to avoid timeouts under the instrumented number of instrumented ops. PiperOrigin-RevId: 307723353 Change-Id: Iecdbfcb439f6e04fc12c1503ad5339d42703e8bc",debug_events_monitors_test.py,"@@ -173,7 +173,8 @@ class DebugEventsMonitorTest(dumping_callback_test_lib.DumpingCallbackTestBase, self.assertLen(traces[1].debug_tensor_value, 11) self.assertLen(traces[2].debug_tensor_value, 11) elif tensor_debug_mode == ""FULL_TENSOR"": - self.assertLen(traces, 4) # [Placeholder:0, Unique:0, Unique:1, Sum:0]. + # [Placeholder:0, Unique:0, Unique:1, Const:0, Sum:0]. + self.assertLen(traces, 5) self.assertEqual(traces[0].op_type, ""Placeholder"") self.assertEqual(traces[0].output_slot, 0) self.assertIsNone(traces[0].debug_tensor_value) @@ -192,11 +193,16 @@ class DebugEventsMonitorTest(dumping_callback_test_lib.DumpingCallbackTestBase, self.assertAllEqual( reader.graph_execution_trace_to_tensor_value(traces[2]), [0, 1, 2, 3, 0]) - self.assertEqual(traces[3].op_type, ""Sum"") + self.assertEqual(traces[3].op_type, ""Const"") self.assertEqual(traces[3].output_slot, 0) self.assertIsNone(traces[3].debug_tensor_value) self.assertAllClose( - reader.graph_execution_trace_to_tensor_value(traces[3]), 17.) + reader.graph_execution_trace_to_tensor_value(traces[3]), [0]) + self.assertEqual(traces[4].op_type, ""Sum"") + self.assertEqual(traces[4].output_slot, 0) + self.assertIsNone(traces[4].debug_tensor_value) + self.assertAllClose( + reader.graph_execution_trace_to_tensor_value(traces[4]), 17.) class AlertDataObjectsTest(test_util.TensorFlowTestCase): ",0,train e6f22ee5f4d483c1b05fbdaf8e8f5d55033f2bdb,tensorflow/tensorflow,"[tfdbg2] Ensure Const ops in graphs are captured by op_callbacks Details of the changes: - In the Python API of tensorflow, Const ops are created by calling `_create_op_internal()` from constant_op.py. This differs from how most other ops are created, and is similar to Placeholder ops, which are already instrumented by tfdbg2' op_callbacks. In this CL, we add a op_callback hook to the code in constant_op.py to allow instrumentation of Const ops. that makes that call. - In `_ConstantValue()` in tensor_util.py, add a special case for `CheckNumericsV2` op, so the `constant_value()` does not treat the `CheckNumericsV2` op as the constant tensor value. Similarly, add special cases for `Identity` and `DebugIdentityV2`. - In `dumping_callback_test.py`, replace use of a deprecated Dataset API (`make_one_shot_iterator()`) with non-deprecated API (`iter()` and `next()`) - Make other necessary changes to tfdbg2's tests to accommodate the Const ops which were previously not instrumented, but are now. - Increase the shard_count of learning/brain/python/debug/tpu_callbacks_test.py to 6 to avoid timeouts under the instrumented number of instrumented ops. PiperOrigin-RevId: 307723353 Change-Id: Iecdbfcb439f6e04fc12c1503ad5339d42703e8bc",dumping_callback.py,"@@ -292,7 +292,12 @@ class _DumpingCallback(object): # TODO(cais): Evaluate performance optimization options. For the # `NO_TENSOR` debug mode, an alternative is to add `debug_tensor` as a # control dependency of `tensor.op` without an additional identity op. - if tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR: + if (tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR and + op_type != ""Const""): + # NOTE(b/153716279): Under v1 graph mode, overriding the output tensor + # of Const ops can lead to downstream errors related to shapes. We opt + # to use an identity op to avoid this issue at the cost of slightly + # larger graph size. return debug_tensor else: identity = array_ops.identity(tensor) @@ -530,8 +535,8 @@ class _DumpingCallback(object): is_v1_graph_mode = not ops.executing_eagerly_outside_functions() context_id = self._get_context_id(graph) # Innermost context ID. output_tensor_ids = self._get_symbolic_tensor_ids(len(outputs)) - if op_type in (""Placeholder"", ""PlaceholderWithDefault""): - # In some cases, the op name of a Placeholder op in a graph + if op_type in (""Const"", ""Placeholder"", ""PlaceholderWithDefault""): + # In some cases, the op name of a Const or Placeholder op in a graph # can be duplicate (e.g., with the name ""resource""). # When this happens, we give the op an debugger-generated name # in order to prevent problems and check failures down the pipe. ",0,train e6f22ee5f4d483c1b05fbdaf8e8f5d55033f2bdb,tensorflow/tensorflow,"[tfdbg2] Ensure Const ops in graphs are captured by op_callbacks Details of the changes: - In the Python API of tensorflow, Const ops are created by calling `_create_op_internal()` from constant_op.py. This differs from how most other ops are created, and is similar to Placeholder ops, which are already instrumented by tfdbg2' op_callbacks. In this CL, we add a op_callback hook to the code in constant_op.py to allow instrumentation of Const ops. that makes that call. - In `_ConstantValue()` in tensor_util.py, add a special case for `CheckNumericsV2` op, so the `constant_value()` does not treat the `CheckNumericsV2` op as the constant tensor value. Similarly, add special cases for `Identity` and `DebugIdentityV2`. - In `dumping_callback_test.py`, replace use of a deprecated Dataset API (`make_one_shot_iterator()`) with non-deprecated API (`iter()` and `next()`) - Make other necessary changes to tfdbg2's tests to accommodate the Const ops which were previously not instrumented, but are now. - Increase the shard_count of learning/brain/python/debug/tpu_callbacks_test.py to 6 to avoid timeouts under the instrumented number of instrumented ops. PiperOrigin-RevId: 307723353 Change-Id: Iecdbfcb439f6e04fc12c1503ad5339d42703e8bc",dumping_callback_test.py,"@@ -289,7 +289,8 @@ class DumpingCallbackTest( with debug_events_reader.DebugDataReader(self.dump_root) as reader: reader.update() graph_exec_traces = reader.graph_execution_traces() - executed_op_types = [trace.op_type for trace in graph_exec_traces] + executed_op_types = [trace.op_type for trace in graph_exec_traces + if trace.op_type != ""Const""] self.assertCountEqual( executed_op_types, [""Placeholder"", ""Placeholder"", ""AddV2"", ""Sub"", ""RealDiv""]) @@ -344,6 +345,46 @@ class DumpingCallbackTest( self.assertAllClose(trace.debug_tensor_value, [tensor_id, 19, 1, 8, 8, 0, 0, 0, 0, 0]) + @parameterized.named_parameters( + (""CurtHealth"", ""CURT_HEALTH""), + (""FullTensor"", ""FULL_TENSOR""), + ) + @test_util.run_in_graph_and_eager_modes + def testConstTensorsAreCaptured(self, tensor_debug_mode): + writer = dumping_callback.enable_dump_debug_info( + self.dump_root, tensor_debug_mode=tensor_debug_mode) + @def_function.function + def times_two_plus_three(x): + return x * constant_op.constant(2.0) + constant_op.constant(3.0) + self.assertAllEqual( + self.evaluate(times_two_plus_three(10.0)), 23.0) + writer.FlushNonExecutionFiles() + writer.FlushExecutionFiles() + + with debug_events_reader.DebugDataReader(self.dump_root) as reader: + reader.update() + const_traces = [trace for trace in reader.graph_execution_traces() + if trace.op_type == ""Const""] + self.assertGreaterEqual(len(const_traces), 3) + if tensor_debug_mode == ""CURT_HEALTH"": + # Under CURT_HEALTH, each debug tensor value has the form + # [tensor_id, has_inf_or_nan]. + self.assertLen(const_traces[0].debug_tensor_value, 2) + self.assertEqual(const_traces[0].debug_tensor_value[1], 0) + self.assertLen(const_traces[1].debug_tensor_value, 2) + self.assertEqual(const_traces[1].debug_tensor_value[1], 0) + self.assertLen(const_traces[2].debug_tensor_value, 2) + self.assertEqual(const_traces[2].debug_tensor_value[1], 0) + else: # FULL_TENSOR. + const_tensor_values = [ + reader.graph_execution_trace_to_tensor_value(const_trace) + for const_trace in const_traces] + # Avoid making assertion on the particular order of the debug tensors + # for the three Consts because it may be indeterminate. + self.assertIn(10.0, const_tensor_values) + self.assertIn(2.0, const_tensor_values) + self.assertIn(3.0, const_tensor_values) + @parameterized.named_parameters( (""Shape"", ""SHAPE""), ) @@ -367,7 +408,8 @@ class DumpingCallbackTest( with debug_events_reader.DebugDataReader(self.dump_root) as reader: reader.update() graph_exec_traces = reader.graph_execution_traces() - executed_op_types = [trace.op_type for trace in graph_exec_traces] + executed_op_types = [trace.op_type for trace in graph_exec_traces + if trace.op_type != ""Const""] self.assertEqual( executed_op_types, [""Placeholder"", ""Placeholder"", ""LogicalAnd"", ""LogicalNot""]) @@ -489,7 +531,8 @@ class DumpingCallbackTest( _, stack_frames = reader.read_graph_op_creation_stack_trace(op_digest) self._verifyStackFrames(stack_frames) - graph_exec_traces = reader.graph_execution_traces() + graph_exec_traces = [trace for trace in reader.graph_execution_traces() + if trace.op_type != ""Const""] executed_op_types = [digest.op_type for digest in graph_exec_traces] self.assertEqual( executed_op_types, @@ -902,10 +945,10 @@ class DumpingCallbackTest( reader.update() graph_exec_digests = reader.graph_execution_traces(digest=True) executed_op_types = [digest.op_type for digest in graph_exec_digests - if digest.op_type != ""Placeholder""] + if digest.op_type not in (""Const"", ""Placeholder"")] tensor_values = [reader.graph_execution_trace_to_tensor_value(digest) for digest in graph_exec_digests - if digest.op_type != ""Placeholder""] + if digest.op_type not in (""Const"", ""Placeholder"")] if tensor_dtypes == [dtypes.float32] and not op_regex: self.assertEqual(executed_op_types, [""Unique"", ""Sum""]) @@ -1003,7 +1046,8 @@ class DumpingCallbackTest( self.assertAllClose(tensor_values, [8.0]) graph_exec_traces = reader.graph_execution_traces() - executed_op_types = [trace.op_type for trace in graph_exec_traces] + executed_op_types = [trace.op_type for trace in graph_exec_traces + if trace.op_type != ""Const""] if tensor_debug_mode != ""CURT_HEALTH"": # Less outputs a boolean tensor, which is not tracked under CURT_HEALTH. # The Less op should have been executed 5 times. ",0,train e6f22ee5f4d483c1b05fbdaf8e8f5d55033f2bdb,tensorflow/tensorflow,"[tfdbg2] Ensure Const ops in graphs are captured by op_callbacks Details of the changes: - In the Python API of tensorflow, Const ops are created by calling `_create_op_internal()` from constant_op.py. This differs from how most other ops are created, and is similar to Placeholder ops, which are already instrumented by tfdbg2' op_callbacks. In this CL, we add a op_callback hook to the code in constant_op.py to allow instrumentation of Const ops. that makes that call. - In `_ConstantValue()` in tensor_util.py, add a special case for `CheckNumericsV2` op, so the `constant_value()` does not treat the `CheckNumericsV2` op as the constant tensor value. Similarly, add special cases for `Identity` and `DebugIdentityV2`. - In `dumping_callback_test.py`, replace use of a deprecated Dataset API (`make_one_shot_iterator()`) with non-deprecated API (`iter()` and `next()`) - Make other necessary changes to tfdbg2's tests to accommodate the Const ops which were previously not instrumented, but are now. - Increase the shard_count of learning/brain/python/debug/tpu_callbacks_test.py to 6 to avoid timeouts under the instrumented number of instrumented ops. PiperOrigin-RevId: 307723353 Change-Id: Iecdbfcb439f6e04fc12c1503ad5339d42703e8bc",constant_op.py,"@@ -28,6 +28,7 @@ from tensorflow.core.framework import types_pb2 from tensorflow.python.eager import context from tensorflow.python.eager import execute from tensorflow.python.framework import dtypes +from tensorflow.python.framework import op_callbacks from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util @@ -299,11 +300,17 @@ def _constant_impl( value, dtype=dtype, shape=shape, verify_shape=verify_shape, allow_broadcast=allow_broadcast)) dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype) + attrs = {""value"": tensor_value, ""dtype"": dtype_value} const_tensor = g._create_op_internal( # pylint: disable=protected-access - ""Const"", [], [dtype_value.type], - attrs={""value"": tensor_value, - ""dtype"": dtype_value}, - name=name).outputs[0] + ""Const"", [], [dtype_value.type], attrs=attrs, name=name).outputs[0] + + if op_callbacks.should_invoke_op_callbacks(): + # TODO(b/147670703): Once the special-op creation code paths + # are unified. Remove this `if` block. + callback_outputs = op_callbacks.invoke_op_callbacks( + ""Const"", tuple(), attrs, (const_tensor,), op_name=name, graph=g) + if callback_outputs is not None: + const_tensor, = callback_outputs return const_tensor ",0,train e6f22ee5f4d483c1b05fbdaf8e8f5d55033f2bdb,tensorflow/tensorflow,"[tfdbg2] Ensure Const ops in graphs are captured by op_callbacks Details of the changes: - In the Python API of tensorflow, Const ops are created by calling `_create_op_internal()` from constant_op.py. This differs from how most other ops are created, and is similar to Placeholder ops, which are already instrumented by tfdbg2' op_callbacks. In this CL, we add a op_callback hook to the code in constant_op.py to allow instrumentation of Const ops. that makes that call. - In `_ConstantValue()` in tensor_util.py, add a special case for `CheckNumericsV2` op, so the `constant_value()` does not treat the `CheckNumericsV2` op as the constant tensor value. Similarly, add special cases for `Identity` and `DebugIdentityV2`. - In `dumping_callback_test.py`, replace use of a deprecated Dataset API (`make_one_shot_iterator()`) with non-deprecated API (`iter()` and `next()`) - Make other necessary changes to tfdbg2's tests to accommodate the Const ops which were previously not instrumented, but are now. - Increase the shard_count of learning/brain/python/debug/tpu_callbacks_test.py to 6 to avoid timeouts under the instrumented number of instrumented ops. PiperOrigin-RevId: 307723353 Change-Id: Iecdbfcb439f6e04fc12c1503ad5339d42703e8bc",op_callbacks_test.py,"@@ -109,7 +109,8 @@ class _NumpyFunctionCallback(object): if compat.as_bytes(op_type) in (_ENTER_OP, _EXIT_OP, _IF_OP, _MERGE_OP, _NEXT_ITERATION_OP, _STATELESS_IF_OP, _SWITCH_OP, _WHILE_OP, _IDENTITY_OP, - _VAR_HANDLE_OP, _PLACEHOLDER_OP): + _VAR_HANDLE_OP, _PLACEHOLDER_OP, + _CONSTANT_OP): # TODO(cais): Overriding the output of StatelessIf, If and While ops # currently fails with error. Investigate (b/139668453). # Avoid instrumenting Identity ops as well, as they are inserted @@ -724,7 +725,7 @@ class OpCallbacksTest(test_util.TensorFlowTestCase): def testOverrideDTypeInFuncGraph(self): def to_float64(op_type, inputs, attrs, outputs, op_name=None, graph=None): del inputs, attrs, op_name, graph # Unused. - if op_type == ""Placeholder"": + if op_type in (""Const"", ""Placeholder""): return outputs else: return [math_ops.cast(output, dtypes.float64) for output in outputs] @@ -751,6 +752,17 @@ class OpCallbacksTest(test_util.TensorFlowTestCase): self.assertIsNone(w) self.assertEqual(instrument.eager_op_types, [_ADD_OP]) + def testOpCallbackCapturesConstTensors(self): + instrument = _NumpyFunctionCallback() + op_callbacks.add_op_callback(instrument.callback) + + @def_function.function + def times_two_plus_three(x): + return x * 2.0 + 3.0 + + self.assertAllClose(times_two_plus_three(constant_op.constant(10.0)), 23.0) + self.assertEqual(instrument.graph_op_types.count(b""Const""), 2) + @test_util.run_in_graph_and_eager_modes def testOpCallbackWorksWithGradientTape(self): instrument = _NumpyFunctionCallback() ",0,train e6f22ee5f4d483c1b05fbdaf8e8f5d55033f2bdb,tensorflow/tensorflow,"[tfdbg2] Ensure Const ops in graphs are captured by op_callbacks Details of the changes: - In the Python API of tensorflow, Const ops are created by calling `_create_op_internal()` from constant_op.py. This differs from how most other ops are created, and is similar to Placeholder ops, which are already instrumented by tfdbg2' op_callbacks. In this CL, we add a op_callback hook to the code in constant_op.py to allow instrumentation of Const ops. that makes that call. - In `_ConstantValue()` in tensor_util.py, add a special case for `CheckNumericsV2` op, so the `constant_value()` does not treat the `CheckNumericsV2` op as the constant tensor value. Similarly, add special cases for `Identity` and `DebugIdentityV2`. - In `dumping_callback_test.py`, replace use of a deprecated Dataset API (`make_one_shot_iterator()`) with non-deprecated API (`iter()` and `next()`) - Make other necessary changes to tfdbg2's tests to accommodate the Const ops which were previously not instrumented, but are now. - Increase the shard_count of learning/brain/python/debug/tpu_callbacks_test.py to 6 to avoid timeouts under the instrumented number of instrumented ops. PiperOrigin-RevId: 307723353 Change-Id: Iecdbfcb439f6e04fc12c1503ad5339d42703e8bc",tensor_util.py,"@@ -791,6 +791,10 @@ def _ConstantValue(tensor, partial): return np.not_equal(value1, value2) elif tensor.op.type == ""StopGradient"": return constant_value(tensor.op.inputs[0], partial) + elif tensor.op.type == ""Identity"": + return constant_value(tensor.op.inputs[0], partial) + elif tensor.op.type in (""CheckNumericsV2"", ""DebugIdentityV2""): + return constant_value(tensor.op.inputs[0], partial) else: return None ",0,train e6f22ee5f4d483c1b05fbdaf8e8f5d55033f2bdb,tensorflow/tensorflow,"[tfdbg2] Ensure Const ops in graphs are captured by op_callbacks Details of the changes: - In the Python API of tensorflow, Const ops are created by calling `_create_op_internal()` from constant_op.py. This differs from how most other ops are created, and is similar to Placeholder ops, which are already instrumented by tfdbg2' op_callbacks. In this CL, we add a op_callback hook to the code in constant_op.py to allow instrumentation of Const ops. that makes that call. - In `_ConstantValue()` in tensor_util.py, add a special case for `CheckNumericsV2` op, so the `constant_value()` does not treat the `CheckNumericsV2` op as the constant tensor value. Similarly, add special cases for `Identity` and `DebugIdentityV2`. - In `dumping_callback_test.py`, replace use of a deprecated Dataset API (`make_one_shot_iterator()`) with non-deprecated API (`iter()` and `next()`) - Make other necessary changes to tfdbg2's tests to accommodate the Const ops which were previously not instrumented, but are now. - Increase the shard_count of learning/brain/python/debug/tpu_callbacks_test.py to 6 to avoid timeouts under the instrumented number of instrumented ops. PiperOrigin-RevId: 307723353 Change-Id: Iecdbfcb439f6e04fc12c1503ad5339d42703e8bc",confusion_matrix_test.py,"@@ -188,7 +188,7 @@ class ConfusionMatrixTest(test.TestCase): def testLabelsTooLarge(self): labels = np.asarray([1, 1, 0, 3, 5], dtype=np.int32) predictions = np.asarray([2, 1, 0, 2, 2], dtype=np.int32) - with self.assertRaisesOpError(""`labels`.*x < y""): + with self.assertRaisesOpError(""`labels`.*out of bound""): self._testConfMatrix( labels=labels, predictions=predictions, num_classes=3, truth=None) @@ -203,7 +203,7 @@ class ConfusionMatrixTest(test.TestCase): def testPredictionsTooLarge(self): labels = np.asarray([1, 1, 0, 2, 2], dtype=np.int32) predictions = np.asarray([2, 1, 0, 3, 5], dtype=np.int32) - with self.assertRaisesOpError(""`predictions`.*x < y""): + with self.assertRaisesOpError(""`predictions`.*out of bound""): self._testConfMatrix( labels=labels, predictions=predictions, num_classes=3, truth=None) ",0,train 607ffbc56cf054a02b86d05e232cb6640e11519d,tensorflow/tensorflow,"Propagate value bounds into control flow. If an argument has to be resolved into constant and if the input is dynamic, propagate the bound into the control flow using SetBound API. PiperOrigin-RevId: 358048382 Change-Id: Ib49dac4f7a94206380a16d59cc2f550b21ac1b82",arg_op.cc,"@@ -13,12 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include ""tensorflow/compiler/tf2xla/literal_util.h"" #include ""tensorflow/compiler/tf2xla/type_util.h"" #include ""tensorflow/compiler/tf2xla/xla_compilation_device.h"" #include ""tensorflow/compiler/tf2xla/xla_compiler.h"" #include ""tensorflow/compiler/tf2xla/xla_helpers.h"" #include ""tensorflow/compiler/tf2xla/xla_op_kernel.h"" #include ""tensorflow/compiler/tf2xla/xla_op_registry.h"" +#include ""tensorflow/compiler/xla/client/xla_builder.h"" #include ""tensorflow/core/framework/kernel_def_builder.h"" #include ""tensorflow/core/lib/core/errors.h"" @@ -60,6 +62,17 @@ class XlaArgOp : public XlaOpKernel { errors::InvalidArgument(""Invalid/missing argument expression"")); if (ctx->expected_output_dtype(0) == DT_VARIANT) { ctx->SetTensorListOutput(0, arg.handle()); + } else if (arg.value_bound().has_value()) { + // The argument has a bound attached to it, call SetBound op on the + // argument. + xla::XlaBuilder* builder = ctx->builder(); + auto input_op = arg.AsXlaOp(builder); + xla::Literal bound = HostTensorToLiteral(*arg.value_bound()).ValueOrDie(); + ctx->SetOutput( + 0, xla::CustomCall(builder, ""SetBound"", {input_op}, + builder->GetShape(input_op).ValueOrDie(), """", + false, {}, &bound)); + return; } else { ctx->SetOutputExpression(0, arg); } ",0,train 607ffbc56cf054a02b86d05e232cb6640e11519d,tensorflow/tensorflow,"Propagate value bounds into control flow. If an argument has to be resolved into constant and if the input is dynamic, propagate the bound into the control flow using SetBound API. PiperOrigin-RevId: 358048382 Change-Id: Ib49dac4f7a94206380a16d59cc2f550b21ac1b82",if_while_utils.cc,"@@ -16,6 +16,8 @@ limitations under the License. #include ""tensorflow/compiler/tf2xla/kernels/if_while_utils.h"" #include ""tensorflow/compiler/tf2xla/const_analysis.h"" +#include ""tensorflow/compiler/tf2xla/literal_util.h"" +#include ""tensorflow/compiler/xla/literal.h"" namespace tensorflow { @@ -38,11 +40,28 @@ absl::InlinedVector ConvertCompileTimeConstArgumentsToConst( xla::StatusOr> maybe_constant = expression.ResolveConstant(ctx->compiler()->client()); if (maybe_constant.ok() && maybe_constant.ValueOrDie().has_value()) { - arg->kind = XlaCompiler::Argument::kConstant; - arg->type = expression.dtype(); - arg->constant_value = std::move(maybe_constant.ValueOrDie().value()); - arg->shape = expression.GetShape().ValueOrDie(); - resolved_constant_idxs.push_back(i); + xla::StatusOr values_are_dynamic = + expression.ResolveDynamism(ctx->compiler()->client()); + bool all_values_are_static = false; + if (!values_are_dynamic.ok()) { + // Conservatiely assume all values are dynamic. + all_values_are_static = true; + } else { + xla::Literal literal = + HostTensorToLiteral(values_are_dynamic.ValueOrDie()).ValueOrDie(); + all_values_are_static = literal.IsAll(0); + } + + if (all_values_are_static) { + arg->kind = XlaCompiler::Argument::kConstant; + arg->type = expression.dtype(); + arg->constant_value = std::move(maybe_constant.ValueOrDie().value()); + arg->shape = expression.GetShape().ValueOrDie(); + resolved_constant_idxs.push_back(i); + } else { + arg->value_bound.emplace( + std::move(maybe_constant.ValueOrDie().value())); + } } } } ",0,train 607ffbc56cf054a02b86d05e232cb6640e11519d,tensorflow/tensorflow,"Propagate value bounds into control flow. If an argument has to be resolved into constant and if the input is dynamic, propagate the bound into the control flow using SetBound API. PiperOrigin-RevId: 358048382 Change-Id: Ib49dac4f7a94206380a16d59cc2f550b21ac1b82",xla_argument.h,"@@ -16,6 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_TF2XLA_XLA_ARGUMENT_H_ #define TENSORFLOW_COMPILER_TF2XLA_XLA_ARGUMENT_H_ +#include + #include ""absl/types/span.h"" #include ""tensorflow/compiler/tf2xla/host_compute_metadata.pb.h"" #include ""tensorflow/compiler/tf2xla/xla_resource.h"" @@ -75,6 +77,9 @@ struct XlaArgument { // host-memory tensor. Tensor constant_value; + // The upper bounds of the value. + absl::optional value_bound; + // The name of this argument, used for debugging. string name; ",0,train 607ffbc56cf054a02b86d05e232cb6640e11519d,tensorflow/tensorflow,"Propagate value bounds into control flow. If an argument has to be resolved into constant and if the input is dynamic, propagate the bound into the control flow using SetBound API. PiperOrigin-RevId: 358048382 Change-Id: Ib49dac4f7a94206380a16d59cc2f550b21ac1b82",xla_compiler.cc,"@@ -1159,6 +1159,10 @@ Status XlaCompiler::BuildArguments( xla::Reshape(arg_handles[i], arg.DimensionSizes()), arg.type); } else { arg_expression = XlaExpression::XlaOp(arg_handles[i], arg.type); + if (arg.value_bound) { + // Propagate upper bound to arg_expression. + arg_expression.set_value_bound(arg.value_bound.value()); + } } break; case XlaCompiler::Argument::kTensorList: { ",0,train 607ffbc56cf054a02b86d05e232cb6640e11519d,tensorflow/tensorflow,"Propagate value bounds into control flow. If an argument has to be resolved into constant and if the input is dynamic, propagate the bound into the control flow using SetBound API. PiperOrigin-RevId: 358048382 Change-Id: Ib49dac4f7a94206380a16d59cc2f550b21ac1b82",xla_expression.cc,"@@ -170,7 +170,9 @@ xla::StatusOr> XlaExpression::ResolveConstant( TF_ASSIGN_OR_RETURN(bool is_constant, handle().builder()->IsConstant(handle())); - if (!is_constant) return {absl::nullopt}; + if (!is_constant) { + return {absl::nullopt}; + } if (!client) return errors::InvalidArgument(""client is required to resolve constant""); ",0,train 607ffbc56cf054a02b86d05e232cb6640e11519d,tensorflow/tensorflow,"Propagate value bounds into control flow. If an argument has to be resolved into constant and if the input is dynamic, propagate the bound into the control flow using SetBound API. PiperOrigin-RevId: 358048382 Change-Id: Ib49dac4f7a94206380a16d59cc2f550b21ac1b82",xla_expression.h,"@@ -94,6 +94,13 @@ class XlaExpression { return constant_value_; } + // Set the bound of the expression. + void set_value_bound(Tensor tensor) { + value_bound_.emplace(std::move(tensor)); + } + + // Return the bound of the expression, if available. + absl::optional value_bound() const { return value_bound_; } XlaResource* resource() const { return resource_; } // Returns a human-readable summary of the expression. @@ -138,6 +145,9 @@ class XlaExpression { // The value of the constant, if available. absl::optional constant_value_; + // The bound of the expression, if available. + absl::optional value_bound_; + // The resource, if kind_ == kResource. Not owned. XlaResource* resource_ = nullptr; }; ",0,train 674048cad145f8e4000aec4d1ec7f9854ad9c44c,tensorflow/tensorflow,Remove unnecessary checks,generic_layout_optimizer_transposer.cc,"@@ -1061,8 +1061,7 @@ Status DefaultLayoutAgnosticOpTransposer::TransposeNode( dst_format_3d); } if (!ShouldProcess(*context, *node) || (rank != 4 && rank != 5) || - !IsFanoutPortRankN(*node, 0, rank) || !IsAfterDstToSrcTransform(*context, - *node)) { + !IsAfterDstToSrcTransform(*context, *node)) { if (allow_5d) { context->AssignDeviceAndDataFormats(context->target_device, src_format, dst_format); ",0,train bf62fcec003636338386f5246103b90a9580181c,tensorflow/tensorflow,"Automated rollback of commit 23e33f871b2bf2879b40ebf3b883e104f30f389b. Revert #31450. PiperOrigin-RevId: 262675086",core.py,"@@ -26,7 +26,6 @@ import warnings import numpy as np from tensorflow.python.eager import context -from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -581,29 +580,9 @@ class Flatten(Layer): permutation.append(1) inputs = array_ops.transpose(inputs, perm=permutation) - input_shape = inputs.shape - if input_shape[1:].is_fully_defined(): - flattened_dim = tensor_shape.dimension_value( - np.prod(input_shape[1:], dtype=int)) - # Temporary fix for integer overflow issue. - if flattened_dim > np.iinfo(np.int32).max: - shape_dtype = dtypes.int64 - else: - shape_dtype = dtypes.int32 - outputs = array_ops.reshape( - inputs, constant_op.constant((-1, flattened_dim), dtype=shape_dtype)) - else: - batch_size = tensor_shape.dimension_value(inputs.shape[0]) - if batch_size: - # Temporary fix for integer overflow issue. - if batch_size > np.iinfo(np.int32).max: - shape_dtype = dtypes.int64 - else: - shape_dtype = dtypes.int32 - outputs = array_ops.reshape( - inputs, constant_op.constant((batch_size, -1), dtype=shape_dtype)) - else: - outputs = array_ops.reshape(inputs, (array_ops.shape(inputs)[0], -1)) + outputs = array_ops.reshape( + inputs, (tensor_shape.dimension_value(inputs.shape[0]) or + array_ops.shape(inputs)[0], -1)) if not context.executing_eagerly(): outputs.set_shape(self.compute_output_shape(inputs.shape)) return outputs ",0,test bf62fcec003636338386f5246103b90a9580181c,tensorflow/tensorflow,"Automated rollback of commit 23e33f871b2bf2879b40ebf3b883e104f30f389b. Revert #31450. PiperOrigin-RevId: 262675086",core_test.py,"@@ -556,20 +556,6 @@ class FlattenTest(test.TestCase): self.assertEqual(list(np_output.shape), [5, 6]) self.assertEqual(y.get_shape().as_list(), [5, None]) - @test_util.run_deprecated_v1 - def testFlattenLargeDim(self): - x = array_ops.placeholder(shape=(None, 21316, 21316, 80), dtype='float32') - y = core_layers.Flatten()(x) - self.assertEqual(y.shape.as_list(), [None, 21316 * 21316 * 80]) - - @test_util.run_deprecated_v1 - def testFlattenLargeBatchDim(self): - batch_size = np.iinfo(np.int32).max + 10 - x = array_ops.placeholder( - shape=(batch_size, None, None, 1), dtype='float32') - y = core_layers.Flatten()(x) - self.assertEqual(y.shape.as_list(), [batch_size, None]) - if __name__ == '__main__': test.main() ",0,test e8d4a3d079ec9c49c75e93978c5b9a3709a623fd,tensorflow/tensorflow,"Adds numerical correctness tests for all Keras modes and model types PiperOrigin-RevId: 225584709",correctness_test.py,"@@ -0,0 +1,147 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""""""Tests for numerical correctness."""""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import numpy as np + +from tensorflow.python import keras +from tensorflow.python.keras import keras_parameterized +from tensorflow.python.keras import testing_utils +from tensorflow.python.platform import test + + +class Bias(keras.layers.Layer): + """"""Layer that add a bias to its inputs."""""" + + def build(self, input_shape): + self.bias = self.add_variable('bias', (1,), initializer='zeros') + + def call(self, inputs): + return inputs + self.bias + + +class MultiInputSubclassed(keras.Model): + """"""Subclassed Model that adds its inputs and then adds a bias."""""" + + def __init__(self): + super(MultiInputSubclassed, self).__init__() + self.add = keras.layers.Add() + self.bias = Bias() + + def call(self, inputs): + added = self.add(inputs) + return self.bias(added) + + +def multi_input_functional(): + """"""Functional Model that adds its inputs and then adds a bias."""""" + input_1 = keras.Input(shape=(1,)) + input_2 = keras.Input(shape=(1,)) + input_3 = keras.Input(shape=(1,)) + added = keras.layers.Add()([input_1, input_2, input_3]) + output = Bias()(added) + return keras.Model([input_1, input_2, input_3], output) + + +@keras_parameterized.run_with_all_model_types +@keras_parameterized.run_all_keras_modes +class SimpleBiasTest(keras_parameterized.TestCase): + + def _get_simple_bias_model(self): + model = testing_utils.get_model_from_layers([Bias()], input_shape=(1,)) + model.compile(keras.optimizer_v2.gradient_descent.SGD(0.1), 'mae') + return model + + def test_simple_bias_fit(self): + x = np.array([[0.], [1.], [2.]]) + y = np.array([[0.5], [2.], [3.5]]) + model = self._get_simple_bias_model() + + history = model.fit(x, y, batch_size=3, epochs=5) + self.assertAllClose(history.history['loss'], [1., 0.9, 0.8, 0.7, 0.6]) + + def test_simple_bias_evaluate(self): + x = np.array([[0.], [1.], [2.]]) + y = np.array([[1.], [3.], [5.]]) + model = self._get_simple_bias_model() + + loss = model.evaluate(x, y, batch_size=1) + self.assertAlmostEqual(loss, 2.) + + def test_simple_bias_predict(self): + x = np.array([[0.], [1.], [2.]]) + model = self._get_simple_bias_model() + + pred = model.predict(x, batch_size=1) + self.assertAllClose(x, pred) + + +@keras_parameterized.run_all_keras_modes +class MultipleInputTest(keras_parameterized.TestCase): + + def _get_multiple_input_model(self, subclassed=True): + if subclassed: + model = MultiInputSubclassed() + else: + model = multi_input_functional() + model.compile(keras.optimizer_v2.gradient_descent.SGD(0.1), 'mae') + return model + + @parameterized.named_parameters(('subclassed', True), ('functional', False)) + def test_multiple_input_fit(self, subclassed): + x = [ + np.array([[1.], [2.], [3.]]), + np.array([[4.], [5.], [6.]]), + np.array([[7.], [8.], [9.]]) + ] + y = np.array([[12.5], [16.], [19.5]]) + + model = self._get_multiple_input_model(subclassed) + history = model.fit(x, y, batch_size=3, epochs=5) + self.assertAllClose(history.history['loss'], [1., 0.9, 0.8, 0.7, 0.6]) + + @parameterized.named_parameters(('subclassed', True), ('functional', False)) + def test_multiple_input_evaluate(self, subclassed): + x = [ + np.array([[1.], [2.], [3.]]), + np.array([[4.], [5.], [6.]]), + np.array([[7.], [8.], [9.]]) + ] + y = np.array([[13.], [17.], [21.]]) + + model = self._get_multiple_input_model(subclassed) + loss = model.evaluate(x, y, batch_size=3) + self.assertAlmostEqual(loss, 2.) + + @parameterized.named_parameters(('subclassed', True), ('functional', False)) + def test_multiple_input_predict(self, subclassed): + x = [ + np.array([[1.], [2.], [3.]]), + np.array([[4.], [5.], [6.]]), + np.array([[7.], [8.], [9.]]) + ] + + model = self._get_multiple_input_model(subclassed) + pred = model.predict(x, batch_size=1) + self.assertAllClose(pred, [[12.], [15.], [18.]]) + + +if __name__ == '__main__': + test.main() ",0,train e8d4a3d079ec9c49c75e93978c5b9a3709a623fd,tensorflow/tensorflow,"Adds numerical correctness tests for all Keras modes and model types PiperOrigin-RevId: 225584709",training_eager_test.py,"@@ -248,21 +248,6 @@ class CorrectnessTest(keras_parameterized.TestCase): layer(1.) # Plain-value inputs are only valid in eager mode. self.assertEqual(1, len(layer.losses)) - def test_predict_correctness(self): - i1 = keras.layers.Input(shape=(4, 5)) - i2 = keras.layers.Input(shape=(4, 5)) - i3 = keras.layers.Input(shape=(4, 5)) - o = keras.layers.add([i1, i2, i3]) - model = keras.models.Model([i1, i2, i3], o) - model.run_eagerly = True - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 4, 5)) - x3 = np.random.random((2, 4, 5)) - out = model.predict([x1, x2, x3]) - - self.assertAllClose(out, x1 + x2 + x3) - if __name__ == '__main__': ops.enable_eager_execution() ",0,train 7ee26f7e144849d07e985b0a1c8abf7bf36adb27,tensorflow/tensorflow,"Fix some ClangTidy warnings in third_party/tensorflow/core/common_runtime. Change: 153861629",constant_folding.cc,"@@ -43,7 +43,7 @@ namespace tensorflow { namespace { bool IsConstantFoldable(const Node* n, - std::function consider) { + const std::function& consider) { if (n->op_def().is_stateful()) { return false; } ",0,train 7ee26f7e144849d07e985b0a1c8abf7bf36adb27,tensorflow/tensorflow,"Fix some ClangTidy warnings in third_party/tensorflow/core/common_runtime. Change: 153861629",copy_tensor.cc,"@@ -71,7 +71,8 @@ void CopyTensor::ViaDMA(StringPiece edge_name, DeviceContext* send_dev_context, if (ri.sender_device_type == src_device_type && ri.receiver_device_type == dst_device_type) { ri.copy_function(send_dev_context, recv_dev_context, src, dst, - src_alloc_attr, dst_alloc_attr, input, output, done); + src_alloc_attr, dst_alloc_attr, input, output, + std::move(done)); return; } } ",0,train 7ee26f7e144849d07e985b0a1c8abf7bf36adb27,tensorflow/tensorflow,"Fix some ClangTidy warnings in third_party/tensorflow/core/common_runtime. Change: 153861629",executor.cc,"@@ -1434,7 +1434,7 @@ void ExecutorState::RunAsync(Executor::DoneCallback done) { } else { num_outstanding_ops_ = ready.size(); root_frame_->iterations[0]->outstanding_ops = ready.size(); - done_cb_ = done; + done_cb_ = std::move(done); // Schedule to run all the ready ops in thread pool. ScheduleReady(ready, nullptr); } @@ -2560,7 +2560,7 @@ bool ExecutorState::FrameState::CleanupIterations(const GraphView* gview, } void ExecutorImpl::RunAsync(const Args& args, DoneCallback done) { - (new ExecutorState(args, this))->RunAsync(done); + (new ExecutorState(args, this))->RunAsync(std::move(done)); } } // end namespace ",0,train 7ee26f7e144849d07e985b0a1c8abf7bf36adb27,tensorflow/tensorflow,"Fix some ClangTidy warnings in third_party/tensorflow/core/common_runtime. Change: 153861629",function.cc,"@@ -604,7 +604,7 @@ struct CustomCreatorSingleton { void Set(CustomKernelCreator cb) { mutex_lock l(mu); - custom_creator = cb; + custom_creator = std::move(cb); } CustomKernelCreator Get() { @@ -621,7 +621,7 @@ CustomCreatorSingleton* GetCustomCreatorSingleton() { } // end namespace void RegisterDefaultCustomKernelCreator(CustomKernelCreator cb) { - GetCustomCreatorSingleton()->Set(cb); + GetCustomCreatorSingleton()->Set(std::move(cb)); } FunctionLibraryRuntime* NewFunctionLibraryRuntime( @@ -631,7 +631,7 @@ FunctionLibraryRuntime* NewFunctionLibraryRuntime( CustomKernelCreator custom_kernel_creator) { return new FunctionLibraryRuntimeImpl(dmgr, env, device, graph_def_version, lib_def, optimizer_options, - custom_kernel_creator); + std::move(custom_kernel_creator)); } FunctionLibraryRuntime* NewFunctionLibraryRuntime( ",0,train 7ee26f7e144849d07e985b0a1c8abf7bf36adb27,tensorflow/tensorflow,"Fix some ClangTidy warnings in third_party/tensorflow/core/common_runtime. Change: 153861629",function_test.cc,"@@ -44,7 +44,7 @@ Status GetOpSig(const string& op, const OpDef** sig) { void FunctionTestSchedClosure(std::function fn) { static thread::ThreadPool* w = new thread::ThreadPool(Env::Default(), ""Test"", 8); - w->Schedule(fn); + w->Schedule(std::move(fn)); } void HasError(const Status& s, const string& substr) { @@ -654,7 +654,8 @@ namespace { bool DoNothing(Graph* g) { return false; } -string Optimize(std::function pass, const FunctionDef& fdef) { +string Optimize(const std::function& pass, + const FunctionDef& fdef) { InstantiationResult result; InstantiateAttrValueMap empty; TF_CHECK_OK(InstantiateFunction(fdef, empty, GetOpSig, &result)); ",0,train 7ee26f7e144849d07e985b0a1c8abf7bf36adb27,tensorflow/tensorflow,"Fix some ClangTidy warnings in third_party/tensorflow/core/common_runtime. Change: 153861629",rendezvous_mgr.cc,"@@ -106,7 +106,7 @@ void IntraProcessRendezvous::SameWorkerRecvDone( CopyTensor::ViaDMA(parsed.edge_name, send_args.device_context, recv_args.device_context, src_device, dst_device, send_args.alloc_attrs, recv_args.alloc_attrs, &in, out, - done); + std::move(done)); } void IntraProcessRendezvous::RecvAsync(const ParsedKey& parsed, @@ -132,7 +132,8 @@ void IntraProcessRendezvous::RecvAsync(const ParsedKey& parsed, }; if (status.ok() && in.IsInitialized()) { - SameWorkerRecvDone(parsed, send_args, recv_args, in, out, final_callback); + SameWorkerRecvDone(parsed, send_args, recv_args, in, out, + std::move(final_callback)); } else { final_callback(status); } ",0,train 771f93bd9e1a62036217e1958bb272682923d28c,tensorflow/tensorflow,"Update python package description to include python 3.9 As tf-nightly have python 3.9 available, this PR updates the python package description to include python 3.9 entry. Signed-off-by: Yong Tang ",setup.py,"@@ -343,6 +343,7 @@ setup( 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3 :: Only', 'Topic :: Scientific/Engineering', 'Topic :: Scientific/Engineering :: Mathematics', ",0,test 92976ad1f05fd2f4946837700855892e47f516ac,tensorflow/tensorflow,Minor change.,mkl_reshape_op.cc,"@@ -83,7 +83,7 @@ class MklReshapeOp : public OpKernel { TensorShape shape; int64 product = 1; int unknown_index = -1; - bool sizes_has_zero_dim; + bool sizes_has_zero_dim = false; switch (sizes.dtype()) { case DT_INT32: OP_REQUIRES_OK(context, ",0,train 4e329e4dd8e59b0d96122e2a8241bda9ba80ffb4,tensorflow/tensorflow,"Changed InputColocationExemptionRegistry::ops_ to gtl::FlatSet instead of set. PiperOrigin-RevId: 268973789",input_colocation_exemption_registry.cc,"@@ -27,8 +27,6 @@ InputColocationExemptionRegistry* InputColocationExemptionRegistry::Global() { return registry; } -const std::set& InputColocationExemptionRegistry::Get() { return ops_; } - void InputColocationExemptionRegistry::Register(const string& op) { auto it = ops_.find(op); if (it != ops_.end()) { ",0,train 4e329e4dd8e59b0d96122e2a8241bda9ba80ffb4,tensorflow/tensorflow,"Changed InputColocationExemptionRegistry::ops_ to gtl::FlatSet instead of set. PiperOrigin-RevId: 268973789",input_colocation_exemption_registry.h,"@@ -15,9 +15,9 @@ limitations under the License. #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_INPUT_COLOCATION_EXEMPTION_REGISTRY_H_ #define TENSORFLOW_CORE_COMMON_RUNTIME_INPUT_COLOCATION_EXEMPTION_REGISTRY_H_ -#include #include +#include ""tensorflow/core/lib/gtl/flatset.h"" #include ""tensorflow/core/platform/types.h"" namespace tensorflow { @@ -40,13 +40,13 @@ class InputColocationExemptionRegistry { static InputColocationExemptionRegistry* Global(); // Returns the set of ops exempt from the input colocation constraints. - const std::set& Get(); + const gtl::FlatSet& Get() { return ops_; } // Registers an op to be excluded from the input colocation constraints. void Register(const string& op); private: - std::set ops_; + gtl::FlatSet ops_; }; namespace input_colocation_exemption_registration { ",0,train e692dda4c8b199555e2fa32132a7784e0893c870,tensorflow/tensorflow,"Fixed a bug in CollectiveAllReduce that sometimes the variable names it sees are not complete and thus not unique, leading to same collective keys for different variables. PiperOrigin-RevId: 214117466",collective_all_reduce_strategy.py,"@@ -143,8 +143,10 @@ class CollectiveAllReduceStrategy(mirrored_strategy.MirroredStrategy): def _real_mirrored_creator(devices, *args, **kwargs): """"""Creates one MirroredVariable on the current worker."""""" index = {} + unique_var_name = ops.get_default_graph().unique_name( + kwargs[""name""], mark_as_used=False).rstrip(""/"") collective_instance_key = self._collective_keys.get_instance_key( - key_id=kwargs[""name""]) + key_id=unique_var_name) if ""initial_value"" not in kwargs: raise ValueError(""Initial value must be specified."") initial_value = kwargs[""initial_value""] @@ -188,6 +190,10 @@ class CollectiveAllReduceStrategy(mirrored_strategy.MirroredStrategy): with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): v = next_creator(*args, **kwargs) + if i == 0: + actual_var_name = v.name.split("":"")[0] + assert unique_var_name == actual_var_name, ""%r vs %r"" % ( + unique_var_name, actual_var_name) assert not isinstance(v, values.DistributedVariable) index[d] = v return index ",0,train e692dda4c8b199555e2fa32132a7784e0893c870,tensorflow/tensorflow,"Fixed a bug in CollectiveAllReduce that sometimes the variable names it sees are not complete and thus not unique, leading to same collective keys for different variables. PiperOrigin-RevId: 214117466",collective_all_reduce_strategy_test.py,"@@ -26,6 +26,7 @@ from tensorflow.contrib.distribute.python import combinations from tensorflow.contrib.distribute.python import cross_tower_utils from tensorflow.contrib.distribute.python import multi_worker_test_base from tensorflow.core.protobuf import config_pb2 +from tensorflow.python import keras from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -34,9 +35,14 @@ from tensorflow.python.layers import core from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradients from tensorflow.python.ops import init_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import random_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables +from tensorflow.python.ops.losses import losses from tensorflow.python.platform import test +from tensorflow.python.training import adam +from tensorflow.python.training import training_util class CollectiveAllReduceStrategyTestBase( @@ -146,6 +152,56 @@ class CollectiveAllReduceStrategyTestBase( self.assertLess(error_after, error_before) return error_after < error_before + def _test_complex_model(self, task_type, task_id, num_gpus): + d, master_target = self._get_test_object(task_type, task_id, num_gpus) + + def model_fn(): + """"""Mnist model with synthetic input."""""" + data_format = 'channels_last' + input_shape = [28, 28, 1] + l = keras.layers + max_pool = l.MaxPooling2D((2, 2), (2, 2), + padding='same', + data_format=data_format) + model = keras.Sequential([ + l.Reshape(target_shape=input_shape, input_shape=(28 * 28,)), + l.Conv2D( + 32, + 5, + padding='same', + data_format=data_format, + activation=nn.relu), max_pool, + l.Conv2D( + 64, + 5, + padding='same', + data_format=data_format, + activation=nn.relu), max_pool, + l.Flatten(), + l.Dense(1024, activation=nn.relu), + l.Dropout(0.4), + l.Dense(10) + ]) + image = random_ops.random_uniform([2, 28, 28]) + label = random_ops.random_uniform([2, 1], maxval=10, dtype=dtypes.int32) + logits = model(image, training=True) + loss = losses.sparse_softmax_cross_entropy(labels=label, logits=logits) + optimizer = adam.AdamOptimizer(learning_rate=1e-4) + train_op = optimizer.minimize(loss, + training_util.get_or_create_global_step()) + return train_op + + with ops.Graph().as_default(), \ + self.test_session(config=self._sess_config, + target=master_target) as sess: + with d.scope(): + train_op = d.call_for_each_tower(model_fn) + train_op = d.group(d.unwrap(train_op)) + + sess.run(variables.global_variables_initializer()) + sess.run(train_op) + return True + def _test_variable_initialization(self, task_type, task_id, num_gpus): distribution, master_target = self._get_test_object(task_type, task_id, num_gpus) @@ -206,6 +262,14 @@ class DistributedCollectiveAllReduceStrategyTest( self._cluster_spec, num_gpus=num_gpus) + @combinations.generate( + combinations.combine(mode=['graph'], num_gpus=[0, 1, 2], required_gpus=1)) + def testComplexModel(self, num_gpus): + if context.num_gpus() < num_gpus: + return + self._run_between_graph_clients( + self._test_complex_model, self._cluster_spec, num_gpus=num_gpus) + class DistributedCollectiveAllReduceStrategyTestWithChief( CollectiveAllReduceStrategyTestBase, parameterized.TestCase): @@ -236,6 +300,14 @@ class DistributedCollectiveAllReduceStrategyTestWithChief( self._cluster_spec, num_gpus=num_gpus) + @combinations.generate( + combinations.combine(mode=['graph'], num_gpus=[0, 1, 2], required_gpus=1)) + def testComplexModel(self, num_gpus): + if context.num_gpus() < num_gpus: + return + self._run_between_graph_clients( + self._test_complex_model, self._cluster_spec, num_gpus=num_gpus) + class LocalCollectiveAllReduceStrategy( CollectiveAllReduceStrategyTestBase, parameterized.TestCase): @@ -246,6 +318,12 @@ class LocalCollectiveAllReduceStrategy( return self._test_minimize_loss_graph(None, None, num_gpus) + def testComplexModel(self, num_gpus=2): + # Collective ops doesn't support strategy with one device. + if context.num_gpus() < num_gpus: + return + self._test_complex_model(None, None, num_gpus) + if __name__ == '__main__': test.main() ",0,train 3f803a9421fddf10a30745fc145d565d9737bd40,tensorflow/tensorflow,Make add_n() handle a single IndexedSlices argument properly,math_ops.py,"@@ -2135,6 +2135,8 @@ def _as_indexed_slices_list(inputs, optimize=True): def add_n(inputs, name=None): """"""Adds all input tensors element-wise. + Converts `IndexedSlices` objects into dense tensors prior to adding. + Args: inputs: A list of `Tensor` or `IndexedSlices` objects, each with same shape and type. @@ -2157,7 +2159,7 @@ def add_n(inputs, name=None): if len(inputs) == 1: if isinstance(inputs[0], ops.IndexedSlices): - values = inputs[0].values + values = ops.convert_to_tensor(inputs[0]) else: values = inputs[0] if name: ",0,train 3f803a9421fddf10a30745fc145d565d9737bd40,tensorflow/tensorflow,Make add_n() handle a single IndexedSlices argument properly,math_ops_test.py,"@@ -359,6 +359,17 @@ class AddNTest(test_util.TensorFlowTestCase): [g.eval() for g in add_n_grad]) + def testIndexedSlices(self): + slc = tf.IndexedSlices(array_ops.constant([1, 2], shape=[1, 2]), + array_ops.constant([2]), array_ops.constant([2,2]) + slc_as_dense = np.array([[0, 0], [1, 2]]) + with self.test_session(use_gpu=True): + # add_n currently always converts IndexedSlices to dense + self.assertAllEqual(slc_as_dense, math_ops.add_n([slc]).eval()) + self.assertAllEqual(2 * slc_as_dense, math_ops.add_n([slc, slc]).eval()) + + + class DivAndModTest(test_util.TensorFlowTestCase): # TODO(aselle): Test more types before exposing new division operators. ",0,train ed6357cbd4f6e47ab87b219a0e0840739c92c970,tensorflow/tensorflow,"Added missing definition for the Packet16q16i. Fixed a couple of bugs in the implementation of max reductions for avx512",PacketMathAVX2.h,"@@ -11,6 +11,13 @@ typedef struct Packet32q8i { Packet32q8i(__m256i val) : val(val) {} } Packet32q8i; +typedef struct Packet16q16i { + __m256i val; + operator __m256i() const { return val; } + Packet16q16i(); + Packet16q16i(__m256i val) : val(val) {} +} Packet16q16i; + typedef struct Packet32q8u { __m256i val; operator __m256i() const { return val; } @@ -32,6 +39,13 @@ typedef struct Packet16q8u { Packet16q8u(__m128i val) : val(val) {} } Packet16q8u; +typedef struct Packet8q16i { + __m128i val; + operator __m128i() const { return val; } + Packet8q16i(); + Packet8q16i(__m128i val) : val(val) {} +} Packet8q16i; + typedef struct Packet8q32i { __m256i val; operator __m256i() const { return val; } @@ -92,6 +106,28 @@ struct packet_traits : default_packet_traits { }; }; template <> +struct packet_traits : default_packet_traits { + typedef Packet16q16i type; + typedef Packet8q16i half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = 16, + }; + enum { + HasAdd = 0, + HasSub = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0 + }; +}; +template <> struct packet_traits : default_packet_traits { typedef Packet8q32i type; typedef Packet4q32i half; @@ -122,6 +158,12 @@ struct unpacket_traits { enum { size = 32, alignment=Aligned32 }; }; template <> +struct unpacket_traits { + typedef QInt16 type; + typedef Packet8q16i half; + enum { size = 16, alignment=Aligned32 }; +}; +template <> struct unpacket_traits { typedef QUInt8 type; typedef Packet16q8u half; @@ -146,6 +188,11 @@ EIGEN_STRONG_INLINE Packet32q8u ploadu(const QUInt8* from) { reinterpret_cast(from)); } template <> +EIGEN_STRONG_INLINE Packet16q16i ploadu(const QInt16* from) { + EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256( + reinterpret_cast(from)); +} +template <> EIGEN_STRONG_INLINE Packet8q32i ploadu(const QInt32* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256( reinterpret_cast(from)); ",0,train ed6357cbd4f6e47ab87b219a0e0840739c92c970,tensorflow/tensorflow,"Added missing definition for the Packet16q16i. Fixed a couple of bugs in the implementation of max reductions for avx512",PacketMathAVX512.h,"@@ -457,7 +457,7 @@ EIGEN_STRONG_INLINE QInt16 predux_max(const Packet32q16i& a) { std::uint32_t w = pfirst( _mm_max_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)))); - return std::min({ + return std::max({ static_cast(w >> 16), static_cast(w) }); @@ -493,7 +493,7 @@ EIGEN_STRONG_INLINE QUInt8 predux_max(const Packet64q8u& a) { std::uint32_t w = pfirst( _mm_max_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)))); - return std::min({ + return std::max({ static_cast(w >> 24), static_cast(w >> 16), static_cast(w >> 8), ",0,train b797bfb750504e03a38a988c44e3c52e902e87c4,tensorflow/tensorflow,"[HloOrdering] Make parameter always defined before other instructions. - Make parameter always defined before other instructions. - Add extra indentations to the predecessor field in ToString() method to make it clear. PiperOrigin-RevId: 215162840",hlo_ordering.cc,"@@ -92,14 +92,18 @@ bool HloOrdering::ExecutesBefore(const HloInstruction* a, } bool HloOrdering::IsDefinedBefore(const HloValue& a, const HloValue& b) const { - // If 'b' is an entry param then 'a' cannot be defined before 'b' because 'b' - // is live into the module. + // Entry parameter should always be defined before other instructions. const HloModule* module = b.defining_instruction()->parent()->parent(); if (b.defining_instruction()->parent() == module->entry_computation() && b.defining_instruction()->opcode() == HloOpcode::kParameter) { return false; } + if (a.defining_instruction()->parent() == module->entry_computation() && + a.defining_instruction()->opcode() == HloOpcode::kParameter) { + return true; + } + // Phi values require special handling. Because XLA does not have a phi // instruction, the definition instruction of the phis values are // placeholders: either the subcomputation parameter (body or condition) or @@ -316,7 +320,7 @@ string PredecessorHloOrdering::ToStringHelper(const string& name) const { for (auto predecessor : all) { if (predecessors_.at(computation) ->IsReachable(predecessor, instruction)) { - pieces.push_back(absl::StrFormat("" %s"", predecessor->name())); + pieces.push_back(absl::StrFormat("" %s"", predecessor->name())); } } } ",0,train b797bfb750504e03a38a988c44e3c52e902e87c4,tensorflow/tensorflow,"[HloOrdering] Make parameter always defined before other instructions. - Make parameter always defined before other instructions. - Add extra indentations to the predecessor field in ToString() method to make it clear. PiperOrigin-RevId: 215162840",hlo_ordering_test.cc,"@@ -174,6 +174,26 @@ TEST_F(HloOrderingTest, InstructionsInWhileComputations) { EXPECT_FALSE(ordering.ExecutesBefore(body_param, cond_param)); } +TEST_F(HloOrderingTest, ParametersDefinedBeforeOthers) { + // Entry parameter should always be defined before other instruction. + auto module = CreateNewModule(); + const Shape scalar_shape = ShapeUtil::MakeShape(xla::F32, {}); + auto builder = HloComputation::Builder(TestName()); + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(1.0))); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, ""param"")); + module->AddEntryComputation(builder.Build()); + TF_ASSERT_OK_AND_ASSIGN(auto dataflow, + HloDataflowAnalysis::Run(*module, /*ssa_form=*/true)); + + DependencyHloOrdering ordering(module.get()); + EXPECT_TRUE(ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(param), + dataflow->GetValueDefinedAt(constant))); + EXPECT_TRUE(!ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(constant), + dataflow->GetValueDefinedAt(param))); +} + TEST_F(HloOrderingTest, ValuesInWhileComputations) { // Tests the ordering of values (defined by dataflow analysis) in the body and // condition of a while instruction. HLO code: ",0,train 0fdb21c045c7a7c085e2da3766747c8287a7962c,tensorflow/tensorflow,"Function for better work-group picking moved to util. Pooling is using this function for better performance. PiperOrigin-RevId: 242153291",pooling.cc,"@@ -27,6 +27,7 @@ limitations under the License. #include ""tensorflow/lite/delegates/gpu/common/shape.h"" #include ""tensorflow/lite/delegates/gpu/common/util.h"" #include ""tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"" +#include ""tensorflow/lite/delegates/gpu/metal/kernels/util.h"" namespace tflite { namespace gpu { @@ -40,8 +41,8 @@ std::string GetMaxPoolingCode(const HW& kernel_size) { constant int window_w = $0; constant int window_h = $1; struct uniforms { - int2 src_size; - int2 dst_size; + int4 src_size; + int4 dst_size; int2 stride; int2 offset; }; @@ -51,7 +52,8 @@ std::string GetMaxPoolingCode(const HW& kernel_size) { $$1 uint3 gid[[thread_position_in_grid]]) { if (static_cast(gid.x) >= params.dst_size.x || - static_cast(gid.y) >= params.dst_size.y) { + static_cast(gid.y) >= params.dst_size.y || + static_cast(gid.z) >= params.dst_size.z) { return; } @@ -84,8 +86,8 @@ std::string GetMaxPoolingIndicesCode(const HW& kernel_size) { constant int window_w = $0; constant int window_h = $1; struct uniforms { - int2 src_size; - int2 dst_size; + int4 src_size; + int4 dst_size; int2 stride; int2 offset; }; @@ -95,7 +97,8 @@ std::string GetMaxPoolingIndicesCode(const HW& kernel_size) { $$1 uint3 gid[[thread_position_in_grid]]) { if (static_cast(gid.x) >= params.dst_size.x || - static_cast(gid.y) >= params.dst_size.y) { + static_cast(gid.y) >= params.dst_size.y || + static_cast(gid.z) >= params.dst_size.z) { return; } @@ -147,8 +150,8 @@ std::string GetAveragePoolingCode(const HW& kernel_size) { constant int window_h = $1; constant float multiplier = $2; struct uniforms { - int2 src_size; - int2 dst_size; + int4 src_size; + int4 dst_size; int2 stride; int2 offset; }; @@ -158,7 +161,8 @@ std::string GetAveragePoolingCode(const HW& kernel_size) { uint tid[[thread_index_in_threadgroup]], uint3 gid[[thread_position_in_grid]]) { if (static_cast(gid.x) >= params.dst_size.x || - static_cast(gid.y) >= params.dst_size.y) { + static_cast(gid.y) >= params.dst_size.y || + static_cast(gid.z) >= params.dst_size.z) { return; } @@ -219,8 +223,12 @@ ComputeTaskDescriptorPtr PoolingInternal(int id, ValueId input_id, std::vector uniform_params = { dimension.w, dimension.h, + IntegralDivideRoundUp(dimension.c, 4), + dimension.w * dimension.h, output_dimension.w, output_dimension.h, + IntegralDivideRoundUp(dimension.c, 4), + output_dimension.w * output_dimension.h, params.strides.w, params.strides.h, params.padding.prepended.w, @@ -230,14 +238,14 @@ ComputeTaskDescriptorPtr PoolingInternal(int id, ValueId input_id, }}, }; - desc->resize_function = [input_id, - params](const std::map& buffers) { - const uint3 groups_size{16, 16, 1}; - const auto& src_shape = buffers.find(input_id)->second; - BHWC dst_shape = CalculateOutputShape(src_shape, params); - int groups_x = IntegralDivideRoundUp(dst_shape.w, groups_size.x); - int groups_y = IntegralDivideRoundUp(dst_shape.h, groups_size.y); - int groups_z = IntegralDivideRoundUp(dst_shape.c, 4); + desc->resize_function = [output_id](const std::map& buffers) { + BHWC dst_shape = buffers.find(output_id)->second; + const uint3 grid = + uint3(dst_shape.w, dst_shape.h, IntegralDivideRoundUp(dst_shape.c, 4)); + const uint3 groups_size = GetWorkGroupSizeForGrid(grid); + int groups_x = IntegralDivideRoundUp(grid.x, groups_size.x); + int groups_y = IntegralDivideRoundUp(grid.y, groups_size.y); + int groups_z = IntegralDivideRoundUp(grid.z, groups_size.z); return std::make_pair(groups_size, uint3{groups_x, groups_y, groups_z}); }; ",0,train 0fdb21c045c7a7c085e2da3766747c8287a7962c,tensorflow/tensorflow,"Function for better work-group picking moved to util. Pooling is using this function for better performance. PiperOrigin-RevId: 242153291",reshape.cc,"@@ -27,31 +27,12 @@ limitations under the License. #include ""tensorflow/lite/delegates/gpu/common/types.h"" #include ""tensorflow/lite/delegates/gpu/common/util.h"" #include ""tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"" +#include ""tensorflow/lite/delegates/gpu/metal/kernels/util.h"" namespace tflite { namespace gpu { namespace metal { namespace { - -uint GetBestSize(int grid_size) { - if (grid_size % 8 == 0 || grid_size % 8 >= 4 || grid_size >= 16) { - return 8; - } else if (grid_size % 4 == 0 || grid_size % 4 >= 2 || grid_size >= 8) { - return 4; - } else if (grid_size % 2 == 0 || grid_size >= 4) { - return 2; - } else { - return 1; - } -} - -uint3 GetWorkGroupSize(const BHWC& dst_shape) { - uint x_size = GetBestSize(dst_shape.w); - uint y_size = GetBestSize(dst_shape.h); - uint z_size = std::max(1u, 32u / (x_size * y_size)); - return {x_size, y_size, z_size}; -} - std::string GetReshapeCode() { std::string code = R""( #include @@ -177,11 +158,12 @@ std::vector Reshape(int id, ValueId input_id, }; desc->resize_function = [attr](const std::map& buffers) { - const uint3 groups_size = GetWorkGroupSize(attr.new_shape); - int groups_x = IntegralDivideRoundUp(attr.new_shape.w, groups_size.x); - int groups_y = IntegralDivideRoundUp(attr.new_shape.h, groups_size.y); - const int dst_layers = IntegralDivideRoundUp(attr.new_shape.c, 4); - int groups_z = IntegralDivideRoundUp(dst_layers, groups_size.z); + const uint3 grid = uint3(attr.new_shape.w, attr.new_shape.h, + IntegralDivideRoundUp(attr.new_shape.c, 4)); + const uint3 groups_size = GetWorkGroupSizeForGrid(grid); + int groups_x = IntegralDivideRoundUp(grid.x, groups_size.x); + int groups_y = IntegralDivideRoundUp(grid.y, groups_size.y); + int groups_z = IntegralDivideRoundUp(grid.z, groups_size.z); return std::make_pair(groups_size, uint3{groups_x, groups_y, groups_z}); }; @@ -235,11 +217,12 @@ std::vector Reshapex4(int id, ValueId input_id, }; desc->resize_function = [attr](const std::map& buffers) { - const uint3 groups_size = GetWorkGroupSize(attr.new_shape); - int groups_x = IntegralDivideRoundUp(attr.new_shape.w, groups_size.x); - int groups_y = IntegralDivideRoundUp(attr.new_shape.h, groups_size.y); - const int dst_layers = IntegralDivideRoundUp(attr.new_shape.c, 4); - int groups_z = IntegralDivideRoundUp(dst_layers, groups_size.z); + const uint3 grid = uint3(attr.new_shape.w, attr.new_shape.h, + IntegralDivideRoundUp(attr.new_shape.c, 4)); + const uint3 groups_size = GetWorkGroupSizeForGrid(grid); + int groups_x = IntegralDivideRoundUp(grid.x, groups_size.x); + int groups_y = IntegralDivideRoundUp(grid.y, groups_size.y); + int groups_z = IntegralDivideRoundUp(grid.z, groups_size.z); return std::make_pair(groups_size, uint3{groups_x, groups_y, groups_z}); }; ",0,train 0fdb21c045c7a7c085e2da3766747c8287a7962c,tensorflow/tensorflow,"Function for better work-group picking moved to util. Pooling is using this function for better performance. PiperOrigin-RevId: 242153291",util.cc,"@@ -0,0 +1,47 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/lite/delegates/gpu/metal/kernels/util.h"" + +namespace tflite { +namespace gpu { +namespace metal { +namespace { + +unsigned int GetOptimalSize(unsigned int grid_size) { + if (grid_size % 8 == 0 || grid_size % 8 >= 4 || grid_size >= 16) { + return 8; + } + if (grid_size % 4 == 0 || grid_size % 4 >= 2 || grid_size >= 8) { + return 4; + } + if (grid_size % 2 == 0 || grid_size >= 4) { + return 2; + } + return 1; +} + +} // namespace + +uint3 GetWorkGroupSizeForGrid(const uint3& grid_size) { + unsigned int x_size = GetOptimalSize(grid_size.x); + unsigned int y_size = GetOptimalSize(grid_size.y); + unsigned int z_size = std::max(1u, 32u / (x_size * y_size)); + return {x_size, y_size, z_size}; +} + +} // namespace metal +} // namespace gpu +} // namespace tflite ",0,train 0fdb21c045c7a7c085e2da3766747c8287a7962c,tensorflow/tensorflow,"Function for better work-group picking moved to util. Pooling is using this function for better performance. PiperOrigin-RevId: 242153291",util.h,"@@ -0,0 +1,34 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_UTIL_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_UTIL_H_ + +#include ""tensorflow/lite/delegates/gpu/common/types.h"" + +namespace tflite { +namespace gpu { +namespace metal { + +// returns work-group size for grid that tries to cover grid optimaly +// If you use work-group size generated by this method you MUST check +// all three dimensions of thread on out of border in your kernel. +uint3 GetWorkGroupSizeForGrid(const uint3& grid_size); + +} // namespace metal +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_UTIL_H_ ",0,train 6c0a2a5beda866e75a1f8546463651360eb725e1,tensorflow/tensorflow,"Remove `tensorflow/core/tfrt/runtime` dependency from XLIR. PiperOrigin-RevId: 428698704 Change-Id: Ief6451010d3d9f20199c4cd90860df85df417ba2",bef_thunk.cc,"@@ -35,7 +35,6 @@ limitations under the License. #include ""tensorflow/compiler/xla/service/gpu/xlir_ops.h"" #include ""tensorflow/core/lib/core/status.h"" #include ""tensorflow/core/platform/cpu_info.h"" -#include ""tensorflow/core/tfrt/runtime/work_queue_interface.h"" #include ""tensorflow/stream_executor/device_memory.h"" #include ""tensorflow/stream_executor/gpu/gpu_executor.h"" #include ""tensorflow/stream_executor/gpu/gpu_stream.h"" @@ -52,11 +51,13 @@ limitations under the License. #include ""tfrt/host_context/async_dispatch.h"" // from @tf_runtime #include ""tfrt/host_context/async_value_ref.h"" // from @tf_runtime #include ""tfrt/host_context/chain.h"" // from @tf_runtime +#include ""tfrt/host_context/concurrent_work_queue.h"" // from @tf_runtime #include ""tfrt/host_context/diagnostic.h"" // from @tf_runtime #include ""tfrt/host_context/execution_context.h"" // from @tf_runtime #include ""tfrt/host_context/function.h"" // from @tf_runtime #include ""tfrt/host_context/host_allocator.h"" // from @tf_runtime #include ""tfrt/host_context/host_context.h"" // from @tf_runtime +#include ""tfrt/host_context/kernel_registry.h"" // from @tf_runtime #include ""tfrt/host_context/resource_context.h"" // from @tf_runtime #include ""tfrt/support/error_util.h"" // from @tf_runtime @@ -67,10 +68,9 @@ bool IsBefThunkEnabled() { return true; } namespace { -struct CoreRuntimeAndWorkQueue { +struct MlirAndTfrtHostCtx { mlir::MLIRContext* mlir_ctx; - tfrt::CoreRuntime* core_runtime; - tensorflow::tfrt_stub::WorkQueueInterface* work_queue; + tfrt::HostContext* host_ctx; }; class BefThunk : public Thunk { @@ -204,35 +204,18 @@ static StatusOr GetThunkKind(mlir::Operation* op) { ""Operation is not supported by BefThunk.""); } -static StatusOr GetCoreRuntimeAndWorkQueue() { - static auto runtime_and_queue_or = - [&]() -> StatusOr { - // TODO(hanbinyoon): Make these configurable. - int num_threads = tensorflow::port::MaxParallelism(); - int num_blocking_threads = 16; - - // Create work queue. - auto work_queue = tensorflow::tfrt_stub::WrapDefaultWorkQueue( - tfrt::CreateMultiThreadedWorkQueue(num_threads, num_blocking_threads)); - if (work_queue == nullptr) { - return tensorflow::errors::Internal(""Failed to create TFRT work queue.""); - } - auto* work_queue_ptr = work_queue.get(); - auto* mlir_ctx = new mlir::MLIRContext; - - // Create core runtime. - auto expected_core_runtime = tfrt::CoreRuntime::Create( +static MlirAndTfrtHostCtx GetMlirAndTfrtHostCtx() { + static auto* mlir_ctx = new mlir::MLIRContext; + static auto* host_ctx = [&] { + auto* result = new tfrt::HostContext( tfrt::gpu::GetDiagHandler(mlir_ctx), tfrt::CreateMallocAllocator(), - std::move(work_queue), kDefaultHostDeviceName); - if (!expected_core_runtime) { - auto error = expected_core_runtime.takeError(); - return tensorflow::errors::Internal(llvm::toString(std::move(error))); - } - - return CoreRuntimeAndWorkQueue{mlir_ctx, expected_core_runtime->release(), - work_queue_ptr}; + // TODO(hanbinyoon): Make these configurable. + tfrt::CreateMultiThreadedWorkQueue(/*num_threads=*/1, + /*num_blocking_threads=*/16)); + tfrt::RegisterStaticKernels(result->GetMutableRegistry()); + return result; }(); - return runtime_and_queue_or; + return {mlir_ctx, host_ctx}; } // Creates a TFRT module that loads the GPU module and launches the target @@ -313,10 +296,9 @@ StatusOr> CreateBefThunk( auto module = CreateModule(op); TF_RETURN_IF_ERROR(RunLmhloGpuToTfrtConversionPipeline(*module)); - TF_ASSIGN_OR_RETURN(auto runtime_and_queue, GetCoreRuntimeAndWorkQueue()); - TF_ASSIGN_OR_RETURN( - auto bef_result, - ConvertToBef(*module, runtime_and_queue.core_runtime->GetHostContext())); + auto mlir_and_host_ctx = GetMlirAndTfrtHostCtx(); + TF_ASSIGN_OR_RETURN(auto bef_result, + ConvertToBef(*module, mlir_and_host_ctx.host_ctx)); return std::unique_ptr( new BefThunk(kind, thunk_info, std::move(buffers), @@ -341,10 +323,9 @@ StatusOr> CreateBefCollectivePermuteThunk( TF_RETURN_IF_ERROR(RunLmhloGpuToTfrtConversionPipeline(*module)); - TF_ASSIGN_OR_RETURN(auto runtime_and_queue, GetCoreRuntimeAndWorkQueue()); - TF_ASSIGN_OR_RETURN( - auto bef_result, - ConvertToBef(*module, runtime_and_queue.core_runtime->GetHostContext())); + auto mlir_and_host_ctx = GetMlirAndTfrtHostCtx(); + TF_ASSIGN_OR_RETURN(auto bef_result, + ConvertToBef(*module, mlir_and_host_ctx.host_ctx)); return std::unique_ptr( new BefThunk(kind, thunk_info, std::move(buffers), @@ -362,11 +343,9 @@ StatusOr> CreateBefKernelThunk( mlir::OwningOpRef tfrt_module = CreateTfrtKernelLaunchModule( &mlir_context, kernel_name, args.size(), launch_dimensions); - TF_ASSIGN_OR_RETURN(auto runtime_and_queue, GetCoreRuntimeAndWorkQueue()); - TF_ASSIGN_OR_RETURN( - auto bef_result, - ConvertToBef(*tfrt_module, - runtime_and_queue.core_runtime->GetHostContext())); + auto mlir_and_host_ctx = GetMlirAndTfrtHostCtx(); + TF_ASSIGN_OR_RETURN(auto bef_result, + ConvertToBef(*tfrt_module, mlir_and_host_ctx.host_ctx)); std::vector arg_buffers; for (auto arg : args) { @@ -469,9 +448,9 @@ Status BefThunk::ExecuteOnStream(const ExecuteParams& params) { tfrt::gpu::MakeBorrowedStream(gpu_context.first, stream->gpu_stream()); // Create execution context. - TF_ASSIGN_OR_RETURN(auto runtime_and_queue, GetCoreRuntimeAndWorkQueue()); + auto mlir_and_host_ctx = GetMlirAndTfrtHostCtx(); tfrt::RequestContextBuilder request_context_builder( - runtime_and_queue.core_runtime->GetHostContext(), gpu_context.second); + mlir_and_host_ctx.host_ctx, gpu_context.second); if (kind() == Thunk::kKernel) { absl::MutexLock lock(&mutex_); TF_RETURN_IF_ERROR( @@ -506,7 +485,7 @@ Status BefThunk::ExecuteOnStream(const ExecuteParams& params) { std::string diag_str; llvm::raw_string_ostream diag_os(diag_str); llvm::SourceMgr src_mgr; - mlir::SourceMgrDiagnosticHandler handler(src_mgr, runtime_and_queue.mlir_ctx, + mlir::SourceMgrDiagnosticHandler handler(src_mgr, mlir_and_host_ctx.mlir_ctx, diag_os); // Execute the function. ",0,test 6c0a2a5beda866e75a1f8546463651360eb725e1,tensorflow/tensorflow,"Remove `tensorflow/core/tfrt/runtime` dependency from XLIR. PiperOrigin-RevId: 428698704 Change-Id: Ief6451010d3d9f20199c4cd90860df85df417ba2",gpu_executable.cc,"@@ -56,7 +56,6 @@ limitations under the License. #include ""mlir/IR/Diagnostics.h"" // from @llvm-project #include ""tensorflow/compiler/mlir/utils/name_utils.h"" #include ""tensorflow/compiler/xla/service/gpu/xlir_ops.h"" -#include ""tensorflow/core/tfrt/runtime/work_queue_interface.h"" #include ""tensorflow/stream_executor/gpu/gpu_executor.h"" #include ""tensorflow/stream_executor/gpu/gpu_stream.h"" #include ""tfrt/gpu/gpu_executor.h"" // from @tf_runtime ",0,test 73f7f1ae9212634cc2c6e0c9c9c6c273c78b4309,tensorflow/tensorflow,"Support entry function name that doesn't match the module name. PiperOrigin-RevId: 403706067 Change-Id: I76ed606fc5dfb46af48f7a94a8bef762ab20d4cf",gpu_compiler.cc,"@@ -755,7 +755,8 @@ StatusOr> GpuCompiler::AssignBuffers( } #if BEF_EXECUTABLE -static StatusOr LowerToBef(mlir::ModuleOp mlir_module) { +static StatusOr LowerToBef(mlir::ModuleOp mlir_module, + std::string entry_function_name) { if (!mlir_module) { return tensorflow::errors::FailedPrecondition( ""No mlir module to lower to BEF.""); @@ -785,7 +786,7 @@ static StatusOr LowerToBef(mlir::ModuleOp mlir_module) { auto ptr = static_cast( tfrt::AlignedAlloc(tfrt::GetRequiredBefAlignment(), bef.size())); std::copy(bef.begin(), bef.end(), ptr); - return OwnedBefBuffer(ptr, {bef.size()}); + return OwnedBefBuffer(ptr, {entry_function_name, bef.size()}); } #endif // BEF_EXECUTABLE @@ -891,7 +892,8 @@ static Status CompileModuleToLlvmIrImpl( } #if BEF_EXECUTABLE - TF_ASSIGN_OR_RETURN(results->thunks_or_bef, LowerToBef(*mlir_module)); + TF_ASSIGN_OR_RETURN(results->thunks_or_bef, + LowerToBef(*mlir_module, entry_function.getName().str())); #else // BEF_EXECUTABLE results->thunks_or_bef = absl::make_unique(ir_emitter->ConsumeThunkSequence()); @@ -1040,7 +1042,7 @@ GpuCompiler::CompileToTargetBinary(const HloModuleConfig& module_config, } llvm::SplitModule( - *llvm_module.get(), + *llvm_module, std::max( 1, std::min(thread_pool->NumThreads(), num_functions)), [&](std::unique_ptr module) { ",0,train 73f7f1ae9212634cc2c6e0c9c9c6c273c78b4309,tensorflow/tensorflow,"Support entry function name that doesn't match the module name. PiperOrigin-RevId: 403706067 Change-Id: I76ed606fc5dfb46af48f7a94a8bef762ab20d4cf",gpu_executable.cc,"@@ -771,9 +771,9 @@ StatusOr GpuExecutable::ExecuteAsyncOnStreamImpl( return InternalError(""Failed to load BEF file.""); } - TF_RETURN_IF_ERROR(ExecuteBef(bef_file, module_name_, run_options, - buffer_allocations, allocations_.size(), - block_host_until_done)); + TF_RETURN_IF_ERROR(ExecuteBef( + bef_file, bef_buffer.get_deleter().entry_function_name, run_options, + buffer_allocations, allocations_.size(), block_host_until_done)); } else { return FailedPrecondition(""Expected BefBuffer is not supplied.""); } ",0,train 73f7f1ae9212634cc2c6e0c9c9c6c273c78b4309,tensorflow/tensorflow,"Support entry function name that doesn't match the module name. PiperOrigin-RevId: 403706067 Change-Id: I76ed606fc5dfb46af48f7a94a8bef762ab20d4cf",gpu_executable.h,"@@ -52,6 +52,7 @@ namespace gpu { class GpuExecutable : public Executable { struct BefBufferDeleter { void operator()(uint8_t* ptr) const; + std::string entry_function_name; size_t size; }; ",0,train 708090d48995456bfa66615398d8c56dadebe018,tensorflow/tensorflow,"Make TF_DeleteKernelBuilder not crash on nullptr. After this change, TF_DeleteKernelBuilder will comply with the conventions established in c_api.h, namely that *Delete* functions are safe with nullptr parameters. PiperOrigin-RevId: 230009727",kernels.cc,"@@ -48,9 +48,10 @@ TF_KernelBuilder* TF_NewKernelBuilder( } void TF_DeleteKernelBuilder(TF_KernelBuilder* builder) { - DCHECK_NE(builder, nullptr); - delete builder->cc_builder; - delete builder; + if (builder != nullptr) { + delete builder->cc_builder; + delete builder; + } } namespace tensorflow { ",0,train 708090d48995456bfa66615398d8c56dadebe018,tensorflow/tensorflow,"Make TF_DeleteKernelBuilder not crash on nullptr. After this change, TF_DeleteKernelBuilder will comply with the conventions established in c_api.h, namely that *Delete* functions are safe with nullptr parameters. PiperOrigin-RevId: 230009727",kernels_test.cc,"@@ -224,4 +224,8 @@ TEST(TestKernel, TestInputAndOutputCount) { } } +TEST(TestKernel, DeleteKernelBuilderIsOkOnNull) { + TF_DeleteKernelBuilder(nullptr); +} + } // namespace tensorflow ",0,train 1c850ad297ff2b3236a440893b1a3b1ebc8a8ca7,tensorflow/tensorflow,"Change signature of tf.image.crop_and_resize and tf.image.extract_image_patches for TF 2.0. PiperOrigin-RevId: 222270163",array_ops.py,"@@ -3201,3 +3201,48 @@ def searchsorted(sorted_sequence, quantize.__doc__ = gen_array_ops.quantize_v2.__doc__ + + +@tf_export(""image.extract_image_patches"", v1=[]) +def extract_image_patches_v2( + images, + sizes, + strides, + rates, + padding, + name=None): + # pylint: disable=line-too-long + r""""""Extract `patches` from `images` and put them in the \""depth\"" output dimension. + + Args: + images: A 4-D Tensor with shape `[batch, in_rows, in_cols, depth] + sizes: The size of the sliding window for each dimension of `images`. + strides: A 1-D Tensor of length 4. How far the centers of two consecutive + patches are in the images. Must be: `[1, stride_rows, stride_cols, 1]`. + rates: A 1-D Tensor of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. + This is the input stride, specifying how far two consecutive patch samples + are in the input. Equivalent to extracting patches with `patch_sizes_eff = + patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by subsampling + them spatially by a factor of `rates`. This is equivalent to `rate` in + dilated (a.k.a. Atrous) convolutions. + padding: The type of padding algorithm to use. + We specify the size-related attributes as: ```python ksizes = [1, + ksize_rows, ksize_cols, 1] strides = [1, strides_rows, strides_cols, 1] + rates = [1, rates_rows, rates_cols, 1] + name: A name for the operation (optional). + + Returns: + A 4-D Tensor. Has the same type as `images`, and with shape `[batch, + out_rows, out_cols, ksize_rows * ksize_cols * depth]` containing image + patches with size `ksize_rows x ksize_cols x depth` vectorized in the + \""depth\"" dimension. Note `out_rows` and `out_cols` are the dimensions of + the output patches. + """""" + # pylint: enable=line-too-long + return gen_array_ops.extract_image_patches( + images, sizes, strides, rates, padding, name) + +extract_image_patches_deprecation = deprecation.deprecated_args( + None, ""ksizes is deprecated, use sizes instead"", ""ksizes"") +tf_export(v1=[""image.extract_image_patches"", ""extract_image_patches""])( + extract_image_patches_deprecation(gen_array_ops.extract_image_patches)) ",0,train 1c850ad297ff2b3236a440893b1a3b1ebc8a8ca7,tensorflow/tensorflow,"Change signature of tf.image.crop_and_resize and tf.image.extract_image_patches for TF 2.0. PiperOrigin-RevId: 222270163",image_ops_impl.py,"@@ -2861,3 +2861,72 @@ resize_nearest_neighbor_deprecation = deprecation.deprecated( 'instead.')) tf_export(v1=['image.resize_nearest_neighbor'])( resize_nearest_neighbor_deprecation(gen_image_ops.resize_nearest_neighbor)) + + +@tf_export('image.crop_and_resize', v1=[]) +def crop_and_resize_v2( + image, + boxes, + box_indices, + crop_size, + method='bilinear', + extrapolation_value=0, + name=None): + """"""Extracts crops from the input image tensor and resizes them. + + Extracts crops from the input image tensor and resizes them using bilinear + sampling or nearest neighbor sampling (possibly with aspect ratio change) to a + common output size specified by `crop_size`. This is more general than the + `crop_to_bounding_box` op which extracts a fixed size slice from the input + image and does not allow resizing or aspect ratio change. + + Returns a tensor with `crops` from the input `image` at positions defined at + the bounding box locations in `boxes`. The cropped boxes are all resized (with + bilinear or nearest neighbor interpolation) to a fixed + `size = [crop_height, crop_width]`. The result is a 4-D tensor + `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned. + In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical + results to using `tf.image.resize_bilinear()` or + `tf.image.resize_nearest_neighbor()`(depends on the `method` argument) with + `align_corners=True`. + + Args: + image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. + Both `image_height` and `image_width` need to be positive. + boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor + specifies the coordinates of a box in the `box_ind[i]` image and is + specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized + coordinate value of `y` is mapped to the image coordinate at `y * + (image_height - 1)`, so as the `[0, 1]` interval of normalized image + height is mapped to `[0, image_height - 1]` in image height coordinates. + We do allow `y1` > `y2`, in which case the sampled crop is an up-down + flipped version of the original image. The width dimension is treated + similarly. Normalized coordinates outside the `[0, 1]` range are allowed, + in which case we use `extrapolation_value` to extrapolate the input image + values. + box_indices: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, + batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box + refers to. + crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. + All cropped image patches are resized to this size. The aspect ratio of + the image content is not preserved. Both `crop_height` and `crop_width` + need to be positive. + method: An optional string specifying the sampling method for resizing. It + can be either `""bilinear""` or `""nearest""` and default to `""bilinear""`. + Currently two sampling methods are supported: Bilinear and Nearest + Neighbor. + extrapolation_value: An optional `float`. Defaults to `0`. Value used for + extrapolation, when applicable. + name: A name for the operation (optional). + + Returns: + A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. + """""" + return gen_image_ops.crop_and_resize( + image, boxes, box_indices, crop_size, method, extrapolation_value, name) + + +crop_and_resize_deprecation = deprecation.deprecated_args( + None, 'box_ind is deprecated, use box_indices instead', 'box_ind') +tf_export(v1=['image.crop_and_resize'])( + crop_and_resize_deprecation(gen_image_ops.crop_and_resize)) ",0,train 1c850ad297ff2b3236a440893b1a3b1ebc8a8ca7,tensorflow/tensorflow,"Change signature of tf.image.crop_and_resize and tf.image.extract_image_patches for TF 2.0. PiperOrigin-RevId: 222270163",tf_upgrade_v2.py,"@@ -31,6 +31,15 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec): # Maps from a function name to a dictionary that describes how to # map from an old argument keyword to the new argument keyword. self.function_keyword_renames = { + ""tf.image.crop_and_resize"": { + ""box_ind"": ""box_indices"", + }, + ""tf.image.extract_image_patches"": { + ""ksizes"": ""sizes"", + }, + ""tf.extract_image_patches"": { + ""ksizes"": ""sizes"", + }, ""tf.expand_dims"": { ""dim"": ""axis"", }, ",0,train dfaa328f06e6af0e0a84a6035749bf8be62ee5e2,tensorflow/tensorflow,"Scalar / Trivial folding for mhlo.select This covers the case where the predicate is a splat or the on_true/on_false values are the same. PiperOrigin-RevId: 329622785 Change-Id: I5761e3260b7177f602fd3a4f999d193186071481",hlo_ops.cc,"@@ -1410,6 +1410,29 @@ static LogicalResult Verify(SelectOp op) { return success(); } +OpFoldResult SelectOp::fold(ArrayRef operands) { + if (on_true() == on_false()) { + return on_true(); + } + + auto predicate = operands[0].dyn_cast_or_null(); + if (!predicate) { + return {}; + } + + auto predicateTy = predicate.getType().cast(); + if (!predicateTy.getElementType().isInteger(1)) { + return {}; + } + + if (predicate.isSplat()) { + return predicate.getSplatValue().getBoolValue() ? on_true() + : on_false(); + } + + return {}; +} + // Makes it such that a SelectOp that is a non-root operation in a DRR infers // the return type based on operand type. LogicalResult SelectOp::inferReturnTypes( ",0,train 7dfe1e7c1b03ed55d60bccfcc4bd20ec3a981480,tensorflow/tensorflow,"Add a note to `tf.Print` explaining where it prints to. Change: 150493886",logging_ops.py,"@@ -42,6 +42,10 @@ def Print(input_, data, message=None, first_n=None, summarize=None, This is an identity op with the side effect of printing `data` when evaluating. + Note: This op prints to the standard error. It is not currently compatible + with jupyter notebook (printing to the notebook *server's* output, not into + the notebook). + Args: input_: A tensor passed through this op. data: A list of tensors to print out when op is evaluated. ",0,train 0dbe3bf9898ea80b95fc23e872458e0f9df306a4,tensorflow/tensorflow,"Fix error msg typo. PiperOrigin-RevId: 248223631",lite.py,"@@ -172,7 +172,7 @@ class TFLiteConverterBase(object): ""Provide an input generator for representative_dataset"") elif self._int8_target_required(): raise ValueError(""representative_dataset is required when specifying "" - ""TFLITE_BUILTINs_INT8 target."") + ""TFLITE_BUILTINS_INT8 target."") def _int8_target_required(self): return set([OpsSet.TFLITE_BUILTINS_INT8]) == set(self._target_ops) ",0,train 604489d05f36647dd8815452ce22b295881f834b,tensorflow/tensorflow,"Removes the `SetIsStateful` mark on op `StatelessRandomGetKeyCounter` (the mark causes problems in tf.data iterator checkpointing). Theoretically the op's output is device-dependent so it's kind of ""stateful"", but the V2 stateless RNG ops are also device-dependent (when alg=AUTO_SELECT) and not marked as stateful. We are using the same criterion here. Note that removing the ""stateful"" mark won't cause the op to be constant-folded away (like `StatelessRandomGetAlg`). PiperOrigin-RevId: 387358689 Change-Id: I1aa46d260b8684894aca7c9f69dbfd0b95cbbcd5",stateless_random_ops_v2.cc,"@@ -120,7 +120,6 @@ REGISTER_OP(""StatelessRandomGetKeyCounter"") .Output(""key: uint64"") .Output(""counter: uint64"") .Attr(""Tseed: {int32, int64} = DT_INT64"") - .SetIsStateful() // because outputs depend on device .SetShapeFn([](InferenceContext* c) { // Check seed shape ShapeHandle seed; ",0,train 7d7dce16b8e7aef53467d8eb08d4249ef6cd71fb,tensorflow/tensorflow,"Fix typo (#16509) * fix typos",hlo_parser.cc,"@@ -2173,7 +2173,7 @@ bool HloParser::ParseConvolutionDimensionNumbers( // // {[2:3:4], [5:6:7], [8:9]} // -// The the parsed result will be: +// The parsed result will be: // // {/*starts=*/{2, 5, 8}, /*limits=*/{3, 6, 9}, /*strides=*/{4, 7, 1}} // ",0,train e498730b1745f78d980a988b2551520d51aea340,tensorflow/tensorflow,"Fix configuration script and add nvinfer_plugin library, add initialization for the plugins in Converter constructor, and fix code formatting.",convert_graph.cc,"@@ -60,7 +60,6 @@ limitations under the License. #if GOOGLE_TENSORRT #include ""cuda/include/cuda_runtime_api.h"" #include ""tensorrt/include/NvInfer.h"" -#include ""tensorrt/include/NvInferPlugin.h"" namespace tensorflow { namespace tensorrt { namespace convert { @@ -842,26 +841,6 @@ Status ConvertAfterShapes(const ConversionParams& params) { LOG(INFO) << ""Number of TensorRT candidate segments: "" << initial_segments.size(); - // Check if plugins can be aaccessed. - int num_trt_plugins = 0; - nvinfer1::IPluginCreator* const* trt_plugin_creator_list = - getPluginRegistry()->getPluginCreatorList(&num_trt_plugins); - if (!trt_plugin_creator_list) { - LOG(WARNING) << ""Can not find any TensorRT plugins in registry.""; - } - else { - VLOG(1) << ""Found the following "" << num_trt_plugins << "" TensorRT plugins in registry:""; - for (int i = 0; i < num_trt_plugins; ++i) { - if (!trt_plugin_creator_list[i]) { - LOG(WARNING) << ""TensorRT plugin at index "" << i << - "" is not accessible (null pointer returned by getPluginCreatorList for this plugin)""; - } - else { - VLOG(1) << "" "" << trt_plugin_creator_list[i]->getPluginName(); - } - } - } - // Get the EngineInfo for each segment. std::unordered_map node_map; TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); ",0,train e498730b1745f78d980a988b2551520d51aea340,tensorflow/tensorflow,"Fix configuration script and add nvinfer_plugin library, add initialization for the plugins in Converter constructor, and fix code formatting.",convert_nodes.cc,"@@ -45,6 +45,7 @@ limitations under the License. #include ""tensorflow/core/lib/strings/str_util.h"" #include ""tensorflow/core/lib/strings/strcat.h"" #include ""tensorflow/core/platform/logging.h"" +#include ""tensorflow/core/platform/mutex.h"" #include ""tensorflow/core/platform/protobuf.h"" #include ""tensorflow/core/platform/tensor_coding.h"" #include ""tensorflow/core/platform/types.h"" @@ -52,6 +53,7 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT #include ""tensorrt/include/NvInfer.h"" +#include ""tensorrt/include/NvInferPlugin.h"" // Check if the types are equal. Cast to int first so that failure log message // would work! @@ -59,7 +61,7 @@ limitations under the License. #define TFTRT_INTERNAL_ERROR_AT_NODE(node) \ do { \ - return errors::Internal(""TFTRT::"", __FUNCTION__, \ + return errors::Internal(""TFTRT::"", __FUNCTION__, "":"", __LINE__, \ "" failed to add TRT layer, at: "", node); \ } while (0) @@ -970,11 +972,45 @@ Status TrtNodeValidator::ConvertConstToWeights( return status; } +static void InitializeTrtPlugins() { + static mutex plugin_mutex(LINKER_INITIALIZED); + static bool plugin_initialized = false; + mutex_lock lock(plugin_mutex); + if (!plugin_initialized) { + Logger trt_logger; + plugin_initialized = initLibNvInferPlugins(&trt_logger, """"); + if (!plugin_initialized) { + LOG(ERROR) << ""Failed to initialize TensorRT plugins, and conversion may "" + ""fail later.""; + } + + int num_trt_plugins = 0; + nvinfer1::IPluginCreator* const* trt_plugin_creator_list = + getPluginRegistry()->getPluginCreatorList(&num_trt_plugins); + if (!trt_plugin_creator_list) { + LOG(WARNING) << ""Can not find any TensorRT plugins in registry.""; + } else { + VLOG(1) << ""Found the following "" << num_trt_plugins + << "" TensorRT plugins in registry:""; + for (int i = 0; i < num_trt_plugins; ++i) { + if (!trt_plugin_creator_list[i]) { + LOG(WARNING) << ""TensorRT plugin at index "" << i + << "" is not accessible (null pointer returned by "" + ""getPluginCreatorList for this plugin)""; + } else { + VLOG(1) << "" "" << trt_plugin_creator_list[i]->getPluginName(); + } + } + } + } +} + Converter::Converter(nvinfer1::INetworkDefinition* trt_network, TrtPrecisionMode precision_mode, bool use_calibration) : trt_network_(trt_network), precision_mode_(precision_mode), use_calibration_(use_calibration) { + InitializeTrtPlugins(); this->RegisterOpConverters(); } @@ -3880,28 +3916,33 @@ Status ConvertTopK(OpConverterParams* params) { } #if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1) -tensorflow::Status ConvertCombinedNMS(OpConverterParams* params) { +Status ConvertCombinedNMS(OpConverterParams* params) { const auto& inputs = params->inputs; const auto& node_def = params->node_def; if (inputs.size() != 6) { - return tensorflow::errors::InvalidArgument( - ""Six inputs expected for CombinedNonMaxSuppression, at "", node_def.name()); + return errors::InvalidArgument( + ""Six inputs expected for CombinedNonMaxSuppression, at "", + node_def.name()); } if (!(inputs.at(0).is_tensor() || inputs.at(1).is_tensor())) { - return tensorflow::errors::Unimplemented( - ""CombinedNonMaxSuppression expects tensor for boxes and scores, at "", node_def.name()); + return errors::Unimplemented( + ""CombinedNonMaxSuppression expects tensor for boxes and scores, at "", + node_def.name()); } if (!(inputs.at(2).is_weights()) || !(inputs.at(3).is_weights()) || - (!inputs.at(4).is_weights()) || !(inputs.at(5).is_weights())) { - return tensorflow::errors::InvalidArgument( + (!inputs.at(4).is_weights()) || !(inputs.at(5).is_weights())) { + return errors::InvalidArgument( ""CombinedNonMaxSuppression expects weights for "" - ""max_output_size_per_class, max_total_size, iou_threshold, score_threshold, at "", + ""max_output_size_per_class, max_total_size, iou_threshold, "" + ""score_threshold, at "", node_def.name()); } - nvinfer1::ITensor* boxes_tensor = const_cast(inputs.at(0).tensor()); - nvinfer1::ITensor* scores_tensor = const_cast(inputs.at(1).tensor()); + nvinfer1::ITensor* boxes_tensor = + const_cast(inputs.at(0).tensor()); + nvinfer1::ITensor* scores_tensor = + const_cast(inputs.at(1).tensor()); TRT_ShapedWeights output_size_per_class = inputs.at(2).weights(); TRT_ShapedWeights total_size = inputs.at(3).weights(); TRT_ShapedWeights iou_threshold = inputs.at(4).weights(); @@ -3911,54 +3952,56 @@ tensorflow::Status ConvertCombinedNMS(OpConverterParams* params) { const auto boxes_dims = boxes_tensor->getDimensions(); const auto scores_dims = scores_tensor->getDimensions(); if (boxes_dims.nbDims != 3) { - return tensorflow::errors::InvalidArgument( + return errors::InvalidArgument( ""TensorRT BatchedNMS Plugin input boxes must be 3-D excluding batch "", - node_def.name()); + node_def.name()); } const int num_classes = scores_dims.d[1]; bool box_check = boxes_dims.d[1] == 1 || boxes_dims.d[1] == num_classes; if (!box_check) { - return tensorflow::errors::InvalidArgument( - ""TensorRT BatchedNMS Plugin third dimension of boxes must be either 1 or num_classes "", + return errors::InvalidArgument( + ""TensorRT BatchedNMS Plugin third dimension of boxes must be either 1 "" + ""or num_classes "", node_def.name()); } if (output_size_per_class.shape_.nbDims != 1) { - return tensorflow::errors::InvalidArgument( + return errors::InvalidArgument( ""TensorRT BatchedNMS Plugin max_output_size_per_class must be 0-D "", node_def.name()); } - int max_size_per_class = *(static_cast(const_cast( - output_size_per_class.GetValues()))); - if (max_size_per_class <=0) { - return tensorflow::errors::InvalidArgument( + int max_size_per_class = *( + static_cast(const_cast(output_size_per_class.GetValues()))); + if (max_size_per_class <= 0) { + return errors::InvalidArgument( ""TensorRT BatchedNMS Plugin max_output_size_per_class should be > 0"", node_def.name()); } if (total_size.shape_.nbDims != 1) { - return tensorflow::errors::InvalidArgument( + return errors::InvalidArgument( ""TensorRT BatchedNMS Plugin max_total_size must be 0-D "", - node_def.name()); + node_def.name()); } - int max_total_size = *(static_cast(const_cast( - total_size.GetValues()))); - if (max_total_size <=0) { - return tensorflow::errors::InvalidArgument( + int max_total_size = + *(static_cast(const_cast(total_size.GetValues()))); + if (max_total_size <= 0) { + return errors::InvalidArgument( ""TensorRT BatchedNMS Plugin max_total_size should be > 0"", node_def.name()); } if (iou_threshold.shape_.nbDims != 1) { - return tensorflow::errors::InvalidArgument( + return errors::InvalidArgument( ""TensorRT BatchedNMS Plugin iou_threshold must be 0-D "", node_def.name()); } - float iou_thresh = *(static_cast(const_cast(iou_threshold.GetValues()))); + float iou_thresh = + *(static_cast(const_cast(iou_threshold.GetValues()))); if (iou_thresh < 0.0 || iou_thresh > 1.0) { - return tensorflow::errors::InvalidArgument( + return errors::InvalidArgument( ""TensorRT BatchedNMS Plugin iou_threshold must be in [0, 1]"", node_def.name()); } if (score_threshold.shape_.nbDims != 1) { - return tensorflow::errors::InvalidArgument( + return errors::InvalidArgument( ""TensorRT BatchedNMS Plugin score_threshold must be 0-D "", node_def.name()); } @@ -3967,42 +4010,44 @@ tensorflow::Status ConvertCombinedNMS(OpConverterParams* params) { // Set plugin fields and the field collection TFAttrs attrs(node_def); - bool share_location = (boxes_dims.d[1] == 1); + bool share_location = (boxes_dims.d[1] == 1); const bool pad_per_class = attrs.get(""pad_per_class""); int topK; if (pad_per_class) { topK = std::min(max_size_per_class * num_classes, max_total_size); - } - else { + } else { topK = max_total_size; } const int keepTopK = topK; - float score_thresh = *(static_cast(const_cast(score_threshold.GetValues()))); + float score_thresh = + *(static_cast(const_cast(score_threshold.GetValues()))); const int background_id = -1; nvinfer1::PluginField fields[7] = { - nvinfer1::PluginField{""shareLocation"", &share_location, - nvinfer1::PluginFieldType::kINT32, 1}, - nvinfer1::PluginField{""backgroundLabelId"", &background_id, - nvinfer1::PluginFieldType::kINT32, 1}, - nvinfer1::PluginField{""numClasses"", &num_classes, - nvinfer1::PluginFieldType::kINT32, 1}, - nvinfer1::PluginField{""topK"", &topK, - nvinfer1::PluginFieldType::kINT32, 1}, - nvinfer1::PluginField{""keepTopK"", &keepTopK, - nvinfer1::PluginFieldType::kINT32, 1}, - nvinfer1::PluginField{""scoreThreshold"", &score_thresh, - nvinfer1::PluginFieldType::kFLOAT32, 1}, - nvinfer1::PluginField{""iouThreshold"", &iou_thresh, - nvinfer1::PluginFieldType::kFLOAT32, 1}, + nvinfer1::PluginField{""shareLocation"", &share_location, + nvinfer1::PluginFieldType::kINT32, 1}, + nvinfer1::PluginField{""backgroundLabelId"", &background_id, + nvinfer1::PluginFieldType::kINT32, 1}, + nvinfer1::PluginField{""numClasses"", &num_classes, + nvinfer1::PluginFieldType::kINT32, 1}, + nvinfer1::PluginField{""topK"", &topK, nvinfer1::PluginFieldType::kINT32, + 1}, + nvinfer1::PluginField{""keepTopK"", &keepTopK, + nvinfer1::PluginFieldType::kINT32, 1}, + nvinfer1::PluginField{""scoreThreshold"", &score_thresh, + nvinfer1::PluginFieldType::kFLOAT32, 1}, + nvinfer1::PluginField{""iouThreshold"", &iou_thresh, + nvinfer1::PluginFieldType::kFLOAT32, 1}, }; nvinfer1::PluginFieldCollection fc{7, fields}; // Get plugin creator - auto creator = getPluginRegistry()->getPluginCreator(""BatchedNMS_TRT"", ""1"", """"); + auto creator = + getPluginRegistry()->getPluginCreator(""BatchedNMS_TRT"", ""1"", """"); TFTRT_RETURN_ERROR_IF_NULLPTR(creator, node_def.name()); // Create plugin - nvinfer1::IPluginV2* plugin = creator->createPlugin(node_def.name().c_str(), &fc); + nvinfer1::IPluginV2* plugin = + creator->createPlugin(node_def.name().c_str(), &fc); TFTRT_RETURN_ERROR_IF_NULLPTR(plugin, node_def.name()); // Set plugin inputs @@ -4012,7 +4057,7 @@ tensorflow::Status ConvertCombinedNMS(OpConverterParams* params) { // Add plugin to network nvinfer1::IPluginV2Layer* layer = params->converter->network()->addPluginV2( - &plugin_inputs[0], int(plugin_inputs.size()), *plugin); + &plugin_inputs[0], int(plugin_inputs.size()), *plugin); TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); // Set plugin outputs @@ -4025,7 +4070,7 @@ tensorflow::Status ConvertCombinedNMS(OpConverterParams* params) { params->outputs->push_back(TRT_TensorOrWeights(output_nmsed_classes)); params->outputs->push_back(TRT_TensorOrWeights(output_num_detections)); - return tensorflow::Status::OK(); + return Status::OK(); } #endif // CombinedNonMaxSuppression ",0,train 98f38b608073e761d75227373b2b2c7d26c483e5,tensorflow/tensorflow,"Add support for parsing the ""gather"" HLO PiperOrigin-RevId: 187050345",hlo_parser.cc,"@@ -1049,9 +1049,40 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, HloInstruction::CreateDot(shape, operands[0], operands[1], dnum)); break; } - case HloOpcode::kGather: - // TODO(b/72710576): HLO parsing is not implemented for Gather. - return TokenError(""HLO parsing is not implemented for Gather""); + case HloOpcode::kGather: { + optional> output_window_dims; + attrs[""output_window_dims""] = { + /*required=*/true, AttrTy::kBracedInt64List, &output_window_dims}; + optional> elided_window_dims; + attrs[""elided_window_dims""] = { + /*required=*/true, AttrTy::kBracedInt64List, &elided_window_dims}; + optional> gather_dims_to_operand_dims; + attrs[""gather_dims_to_operand_dims""] = {/*required=*/true, + AttrTy::kBracedInt64List, + &gather_dims_to_operand_dims}; + optional index_vector_dim; + attrs[""index_vector_dim""] = {/*required=*/true, AttrTy::kInt64, + &index_vector_dim}; + optional> window_bounds; + attrs[""window_bounds""] = {/*required=*/true, AttrTy::kBracedInt64List, + &window_bounds}; + + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { + return false; + } + + GatherDimensionNumbers dim_numbers = HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/*output_window_dims, + /*elided_window_dims=*/*elided_window_dims, + /*gather_dims_to_operand_dims=*/*gather_dims_to_operand_dims, + /*index_vector_dim=*/*index_vector_dim); + + instruction = builder->AddInstruction(HloInstruction::CreateGather( + shape, /*operand=*/operands[0], /*gather_indices=*/operands[1], + dim_numbers, *window_bounds)); + break; + } case HloOpcode::kTrace: return TokenError(StrCat(""parsing not yet implemented for op: "", HloOpcodeString(opcode))); ",0,train 98f38b608073e761d75227373b2b2c7d26c483e5,tensorflow/tensorflow,"Add support for parsing the ""gather"" HLO PiperOrigin-RevId: 187050345",hlo_parser_test.cc,"@@ -716,6 +716,18 @@ ENTRY %sparse_f32_r1 () -> f32[9] { ROOT %foo = f32[9]sparse{10} constant(f32[9]{1: 2, 3: 4, 5: 6}) } +)"" +}, +{ +""gather"", +R""(HloModule StringifyGather + +ENTRY %Gather (input_tensor: f32[50,49,48,47,46], gather_indices: s64[10,9,8,7,5]) -> f32[10,9,8,7,30,29,28,27,26] { + %input_tensor = f32[50,49,48,47,46]{4,3,2,1,0} parameter(0) + %gather_indices = s64[10,9,8,7,5]{4,3,2,1,0} parameter(1) + ROOT %gather = f32[10,9,8,7,30,29,28,27,26]{8,7,6,5,4,3,2,1,0} gather(f32[50,49,48,47,46]{4,3,2,1,0} %input_tensor, s64[10,9,8,7,5]{4,3,2,1,0} %gather_indices), output_window_dims={4,5,6,7,8}, elided_window_dims={}, gather_dims_to_operand_dims={0,1,2,3,4}, index_vector_dim=4, window_bounds={30,29,28,27,26} +} + )"" }, }); @@ -860,6 +872,18 @@ ENTRY dot { ROOT dot = f32[2,3]{1,0} dot(a, b), lhs_batch_dims={0}, lhs_contracting_dims={1}, rhs_contracting_dims={0} } +)"" +}, +{ +""gather"", +R""(HloModule gather + +ENTRY Gather { + input_tensor = f32[50,49,48,47,46]{4,3,2,1,0} parameter(0) + gather_indices = s64[10,9,8,7,5]{4,3,2,1,0} parameter(1) + ROOT gather = f32[10,9,8,7,30,29,28,27,26]{8,7,6,5,4,3,2,1,0} gather(input_tensor, gather_indices), output_window_dims={4,5,6,7,8}, elided_window_dims={}, gather_dims_to_operand_dims={0,1,2,3,4}, index_vector_dim=4, window_bounds={30,29,28,27,26} +} + )"" }, }); ",0,train 6958623f7330f7268d05c6753959db3093638d37,tensorflow/tensorflow,"Add simple microbenchmarks for SparseDenseCwiseMul. Change: 121850503",sparse_dense_binary_op_shared_test.cc,"@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include ""tensorflow/core/common_runtime/kernel_benchmark_testlib.h"" #include ""tensorflow/core/framework/allocator.h"" #include ""tensorflow/core/framework/fake_input.h"" #include ""tensorflow/core/framework/node_def_builder.h"" @@ -20,8 +21,11 @@ limitations under the License. #include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/tensor_testutil.h"" #include ""tensorflow/core/framework/types.h"" +#include ""tensorflow/core/graph/graph.h"" +#include ""tensorflow/core/graph/node_builder.h"" #include ""tensorflow/core/kernels/ops_testutil.h"" #include ""tensorflow/core/platform/test.h"" +#include ""tensorflow/core/platform/test_benchmark.h"" namespace tensorflow { @@ -194,6 +198,91 @@ TEST_F(SparseDenseCMulTest, BroadcastDense) { test::ExpectTensorEqual(expected, *GetOutput(0)); } +// Benchmarking code follows. + +static Graph* SparseMatCMulDenseMat(Graph* g, Node* sp_indices, Node* sp_vals, + Node* sp_shape, Node* dense) { + Node* ret; + TF_CHECK_OK( + NodeBuilder(g->NewName(""SparseDenseCwiseMul""), ""SparseDenseCwiseMul"") + .Input(sp_indices) + .Input(sp_vals) + .Input(sp_shape) + .Input(dense) + .Finalize(g, &ret)); + return g; +} + +static Node* MakeTensor(Graph* g, int B, int M, int N) { + Tensor data(DT_FLOAT, TensorShape({B, M, N})); + data.flat().setRandom(); + return test::graph::Constant(g, data); +} + +struct ST { + Node* indices; + Node* vals; + Node* shape; +}; + +static ST MakeSparseTensor(Graph* g, int B, int M, int N, int nnz_inner) { + const int total_nnz = B * M * nnz_inner; + const int kNumDims = 3; + + Tensor indices(DT_INT64, TensorShape({total_nnz, kNumDims})); + Tensor vals(DT_FLOAT, TensorShape({total_nnz})); + Tensor shape(DT_INT64, TensorShape({kNumDims})); + vals.flat().setRandom(); + test::FillValues(&shape, gtl::ArraySlice({B, M, N})); + auto indices_mat = indices.matrix(); + + int nnz_cnt = 0; + std::unordered_set picked; + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dist(0, N - 1); + + for (int i = 0; i < B; ++i) { + for (int j = 0; j < M; ++j) { + for (int k = 0; k < nnz_inner; ++k) { + indices_mat(nnz_cnt, 0) = i; + indices_mat(nnz_cnt, 1) = j; + + int inner = dist(gen); + while (picked.count(inner) == 1) { + inner = dist(gen); + } + picked.insert(inner); + indices_mat(nnz_cnt, 2) = inner; + + ++nnz_cnt; + } + } + } + + return ST{test::graph::Constant(g, indices), test::graph::Constant(g, vals), + test::graph::Constant(g, shape)}; +} + +// [8, 4, N{nnz}] cmul [8, 4, N] +static void BM_SparseMatCMulDenseMat(int iters, int N, int nnz_inner) { + Graph* g = new Graph(OpRegistry::Global()); + Node* dense = MakeTensor(g, 8, 4, N); + ST sp = MakeSparseTensor(g, 8, 4, N, nnz_inner); + + testing::ItemsProcessed(static_cast(iters * 8 * 4 * N * 2)); + test::Benchmark( + ""cpu"", SparseMatCMulDenseMat(g, sp.indices, sp.vals, sp.shape, dense)) + .Run(iters); +} +BENCHMARK(BM_SparseMatCMulDenseMat) + ->ArgPair(1 << 20, 1) + ->ArgPair(1 << 20, 8) + ->ArgPair(1 << 20, 32) + ->ArgPair(1 << 18, 1) + ->ArgPair(1 << 18, 8) + ->ArgPair(1 << 18, 32); + } // namespace } // namespace tensorflow ",0,train 1df80e82acd1209f6cc95e68c9eedc59953d4a95,tensorflow/tensorflow,"Minor: default-construct RunOptions and RunOutputs in vanilla Run(). Change: 115714930",direct_session.cc,"@@ -252,8 +252,9 @@ Status DirectSession::Run(const NamedTensorList& inputs, const std::vector& output_names, const std::vector& target_nodes, std::vector* outputs) { - return RunWithOpts(kEmptyRunOptions, inputs, output_names, target_nodes, - outputs, &kEmptyRunOutputs); + RunOutputs run_outputs; + return RunWithOpts(RunOptions(), inputs, output_names, target_nodes, outputs, + &run_outputs); } Status DirectSession::RunWithOpts(const RunOptions& run_options, ",0,train 1df80e82acd1209f6cc95e68c9eedc59953d4a95,tensorflow/tensorflow,"Minor: default-construct RunOptions and RunOutputs in vanilla Run(). Change: 115714930",direct_session.h,"@@ -155,9 +155,6 @@ class DirectSession : public Session { Graph* graph = nullptr; }; - const RunOptions kEmptyRunOptions = RunOptions(); - RunOutputs kEmptyRunOutputs = RunOutputs(); - // Retrieves an already existing set of executors to run 'inputs' and // 'outputs', or creates and caches them for future use. ::tensorflow::Status GetOrCreateExecutors( ",0,train f118ff1538ac7aa8a628bba03fe66dc6811cc7fc,tensorflow/tensorflow,"Memoize HostCPU device using atomic pointer to save mutex lock PiperOrigin-RevId: 384759508 Change-Id: I3a53c8a2b1b6c0c9582dcd97a8edd09efc4b12dc",device_mgr.h,"@@ -162,7 +162,7 @@ class DynamicDeviceMgr : public DeviceMgr { std::unordered_map device_type_counts_ TF_GUARDED_BY(devices_mu_); - mutable Device* cpu_device_ TF_GUARDED_BY(devices_mu_); + mutable std::atomic cpu_device_; // memoize `HostCPU` result class DeviceCircularBuffer { public: ",0,train f118ff1538ac7aa8a628bba03fe66dc6811cc7fc,tensorflow/tensorflow,"Memoize HostCPU device using atomic pointer to save mutex lock PiperOrigin-RevId: 384759508 Change-Id: I3a53c8a2b1b6c0c9582dcd97a8edd09efc4b12dc",dynamic_device_mgr.cc,"@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include #include #include #include @@ -222,17 +223,22 @@ Status DynamicDeviceMgr::RemoveDevicesByName( } Device* DynamicDeviceMgr::HostCPU() const { + Device* device = cpu_device_.load(std::memory_order_relaxed); + + // Host CPU device can't be removed, so if we found valid device once, we + // do not need to check that it is still in the device list. + if (device != nullptr) return device; + mutex_lock l(devices_mu_); - if (cpu_device_ == nullptr) { - for (int i = 0; i < dynamic_devices_.size(); ++i) { - auto* d = dynamic_devices_[i].get(); - if (d->device_type() == DEVICE_CPU && d->parsed_name().id == 0) { - cpu_device_ = d; - break; - } + for (int i = 0; i < dynamic_devices_.size(); ++i) { + Device* d = dynamic_devices_[i].get(); + if (d->device_type() == DEVICE_CPU && d->parsed_name().id == 0) { + cpu_device_ = d; + break; } } - return cpu_device_; + + return cpu_device_.load(std::memory_order_relaxed); } } // namespace tensorflow ",0,train cb5c61a3e11a37fb39a246aaf8ed6d02dd9ae9ab,tensorflow/tensorflow,Refine LeakyRelu codes and update APIs.,pywrap_tfe_src.cc,"@@ -1730,6 +1730,7 @@ bool OpDoesntRequireOutput(const string& op_name) { ""SoftplusGrad"", ""Softsign"", ""ReluGrad"", + ""LeakyRelu"", ""LeakyReluGrad"", ""Conv2D"", ""DepthwiseConv2dNative"", @@ -1800,7 +1801,6 @@ bool OpDoesntRequireInput(const string& op_name) { ""BiasAdd"", ""Relu"", ""Relu6"", - ""LeakyRelu"", ""Elu"", ""Selu"", ""SparseSoftmaxCrossEntropyWithLogits"", ",0,train 2bfb700080d6b3aa5b4ec00c5928e87db0d56677,tensorflow/tensorflow,"Change gradients to be computed with respect to variable ref rather than snapshot. Variables may create another snapshot or their ref may be exposed via public API (e.g., var.op.outputs[0] or graph.as_graph_element(var) which happens fairly often inside libraries or collection serialization). On the other hand, tf.gradients() use convert_to_tensor() which returns a snapshot, and gradients were computed with respect to this particular snapshot, which makes the gradients incorrect. Change: 147800865",imperative_graph.py,"@@ -102,7 +102,7 @@ class ImperativeGraph(ops.Graph): # calling the original gradient function. def _imperative_op_grad(op, *grad): with self.replace_outputs(op): - return self._gradient_function_map[op](op, *grad) + return self._gradient_function_map[op.name](op, *grad) ops.RegisterGradient(self._imperative_op_type)(_imperative_op_grad) @@ -166,7 +166,7 @@ class ImperativeGraph(ops.Graph): """"""Replaces the outputs of `op` with values recorded in `_outputs_map`."""""" # pylint: disable=protected-access old_outputs = op._outputs - op._outputs = self._outputs_map[op] + op._outputs = self._outputs_map[op.name] yield op._outputs = old_outputs # pylint: enable=protected-access @@ -318,9 +318,9 @@ class ImperativeGraph(ops.Graph): for i, _ in enumerate(shapes): values[i].set_shape(shapes[i]) - self._outputs_map[orig_op] = values + self._outputs_map[orig_op.name] = values try: - self._gradient_function_map[orig_op] = ops.get_gradient_function( + self._gradient_function_map[orig_op.name] = ops.get_gradient_function( orig_op) except (KeyError, LookupError): pass ",0,test 2bfb700080d6b3aa5b4ec00c5928e87db0d56677,tensorflow/tensorflow,"Change gradients to be computed with respect to variable ref rather than snapshot. Variables may create another snapshot or their ref may be exposed via public API (e.g., var.op.outputs[0] or graph.as_graph_element(var) which happens fairly often inside libraries or collection serialization). On the other hand, tf.gradients() use convert_to_tensor() which returns a snapshot, and gradients were computed with respect to this particular snapshot, which makes the gradients incorrect. Change: 147800865",imperative_test.py,"@@ -107,6 +107,17 @@ class ImperativeTest(test.TestCase): b = a + random_ops.random_uniform([], minval=0.1) self.assertGreaterEqual(b.value, a.value) + def testGradientThroughNewStep(self): + with imperative_mode.ImperativeMode(self._target) as mode: + x = constant_op.constant(np.random.rand(3)) + y = math_ops.tanh(x) + + with mode.new_step(): + z = constant_op.constant(np.random.rand(3)) + w = math_ops.multiply(y, z) + dx = gradients_impl.gradients(w, x) + self.assertAllClose(dx[0].value, z.value * (1.0 - y.value ** 2)) + def testEscape(self): """"""Makes sure that values don't escape a `new_step` scope."""""" with imperative_mode.ImperativeMode(self._target) as mode: ",0,test 2bfb700080d6b3aa5b4ec00c5928e87db0d56677,tensorflow/tensorflow,"Change gradients to be computed with respect to variable ref rather than snapshot. Variables may create another snapshot or their ref may be exposed via public API (e.g., var.op.outputs[0] or graph.as_graph_element(var) which happens fairly often inside libraries or collection serialization). On the other hand, tf.gradients() use convert_to_tensor() which returns a snapshot, and gradients were computed with respect to this particular snapshot, which makes the gradients incorrect. Change: 147800865",gradients_impl.py,"@@ -433,7 +433,8 @@ def gradients(ys, xs = [x.handle if isinstance(x, resource_variable_ops.ResourceVariable) else x for x in xs] - xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name=""x"") + xs = ops.internal_convert_n_to_tensor_or_indexed_slices(xs, name=""x"", + as_ref=True) grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops) # The approach we take here is as follows: Create a list of all ops in the ",0,test 2bfb700080d6b3aa5b4ec00c5928e87db0d56677,tensorflow/tensorflow,"Change gradients to be computed with respect to variable ref rather than snapshot. Variables may create another snapshot or their ref may be exposed via public API (e.g., var.op.outputs[0] or graph.as_graph_element(var) which happens fairly often inside libraries or collection serialization). On the other hand, tf.gradients() use convert_to_tensor() which returns a snapshot, and gradients were computed with respect to this particular snapshot, which makes the gradients incorrect. Change: 147800865",gradients_test.py,"@@ -44,6 +44,7 @@ from tensorflow.python.ops import nn_grad # pylint: disable=unused-import from tensorflow.python.ops import state_grad # pylint: disable=unused-import from tensorflow.python.ops import tensor_array_grad # pylint: disable=unused-import from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.ops import variables from tensorflow.python.ops.nn_ops import bias_add from tensorflow.python.platform import googletest @@ -311,6 +312,27 @@ class GradientsTest(test_util.TensorFlowTestCase): grad, = gradients.gradients(target, v) self.assertIsNone(grad) + def testVariableReadValueGradient(self): + with ops.Graph().as_default(): + init = constant_op.constant(100.0) + var = variables.Variable(init) + gradient = gradients.gradients(var.read_value(), var) + self.assertIsNotNone(gradient) + + def testVariableAsGraphElementGradient(self): + with ops.Graph().as_default() as graph: + init = constant_op.constant(100.0) + var = variables.Variable(init) + gradient = gradients.gradients(graph.as_graph_element(var), var) + self.assertIsNotNone(gradient) + + def testVariableRefGradient(self): + with ops.Graph().as_default(): + init = constant_op.constant(100.0) + var = variables.Variable(init) + gradient = gradients.gradients(var._ref(), var) + self.assertIsNotNone(gradient) + class FunctionGradientsTest(test_util.TensorFlowTestCase): ",0,test 9b6b179fe33a0daab4c6b4c7314f77e49825f999,tensorflow/tensorflow,"Make ControlFlowContext.AddInnerOp recursively propagate the inner op to the enclosing context by default. PiperOrigin-RevId: 170099939",control_flow_ops.py,"@@ -1496,7 +1496,8 @@ class ControlFlowContext(object): def AddInnerOp(self, op): """"""Notifies a scope about an operator added to an inner scope."""""" - pass + if self._outer_context: + self._outer_context.AddInnerOp(op) def GetControlPivot(self): """"""Returns the pivot node for this context, or None."""""" ",0,train 6629e3a9863d25a81764f3d3115c1bfc2a7d8e67,tensorflow/tensorflow,"Enabled model pruning by default PiperOrigin-RevId: 161234072",graph_rewriter.cc,"@@ -29,16 +29,7 @@ GraphRewriter::GraphRewriter(const GrapplerItem& item) { } for (auto& node : item.graph.node()) { - for (const auto& input : node.input()) { - int position = 0; - string input_node_name = ParseNodeName(input, &position); - if (position < 0) { - // This is a control edge - auto itr = nodes_.find(input_node_name); - CHECK(itr != nodes_.end()); - control_dependency_drivers_.insert(itr->second); - } - } + RecordControlDependencyDrivers(node); } } @@ -46,21 +37,9 @@ void GraphRewriter::ForwardInputs( const NodeDef& original_node, const std::unordered_set& nodes_to_delete, NodeDef* new_node) { - for (const auto& input : original_node.input()) { - string input_node_name = NodeName(input); - auto itr = nodes_.find(input_node_name); - if (itr == nodes_.end()) { - // Invalid input, preserve it as is. - *new_node->add_input() = input; - } - const NodeDef* input_node = itr->second; - if ((input_node->device().empty() || original_node.device().empty() || - input_node->device() == original_node.device()) && - nodes_to_delete.find(input_node) != nodes_to_delete.end()) { - ForwardInputs(*input_node, nodes_to_delete, new_node); - } else { - *new_node->add_input() = input; - } + ForwardInputsInternal(original_node, nodes_to_delete, new_node); + if (!new_node->name().empty()) { + optimized_nodes_[new_node->name()] = new_node; } } @@ -79,5 +58,50 @@ bool GraphRewriter::IsDrivenByControlDependency(const NodeDef& node) const { return false; } +void GraphRewriter::RecordControlDependencyDrivers(const NodeDef& node) { + for (const auto& input : node.input()) { + int position = 0; + string input_node_name = ParseNodeName(input, &position); + if (position < 0) { + // This is a control edge + auto itr = nodes_.find(input_node_name); + CHECK(itr != nodes_.end()); + control_dependency_drivers_.insert(itr->second); + } + } +} + +void GraphRewriter::ForwardInputsInternal( + const NodeDef& node, + const std::unordered_set& nodes_to_delete, + NodeDef* new_node) { + // To speed things up, use the optimized version of the node if + // available. + auto itr = optimized_nodes_.find(node.name()); + if (itr != optimized_nodes_.end()) { + for (const string& input : itr->second->input()) { + *new_node->add_input() = input; + } + return; + } + for (const auto& input : node.input()) { + string input_node_name = NodeName(input); + auto itr = nodes_.find(input_node_name); + if (itr == nodes_.end()) { + // Invalid input, preserve it as is. + *new_node->add_input() = input; + continue; + } + const NodeDef* input_node = itr->second; + if ((input_node->device().empty() || node.device().empty() || + input_node->device() == node.device()) && + nodes_to_delete.find(input_node) != nodes_to_delete.end()) { + ForwardInputsInternal(*input_node, nodes_to_delete, new_node); + } else { + *new_node->add_input() = input; + } + } +} + } // end namespace grappler } // end namespace tensorflow ",0,train 6629e3a9863d25a81764f3d3115c1bfc2a7d8e67,tensorflow/tensorflow,"Enabled model pruning by default PiperOrigin-RevId: 161234072",graph_rewriter.h,"@@ -48,7 +48,14 @@ class GraphRewriter { bool IsDrivenByControlDependency(const NodeDef& node) const; private: + void RecordControlDependencyDrivers(const NodeDef& node); + void ForwardInputsInternal( + const NodeDef& original_node, + const std::unordered_set& nodes_to_delete, + NodeDef* new_node); + std::unordered_map nodes_; + std::unordered_map optimized_nodes_; std::unordered_set control_dependency_drivers_; }; ",0,train 6629e3a9863d25a81764f3d3115c1bfc2a7d8e67,tensorflow/tensorflow,"Enabled model pruning by default PiperOrigin-RevId: 161234072",meta_optimizer.cc,"@@ -120,9 +120,9 @@ void MetaOptimizer::Feedback(Cluster* cluster, const GrapplerItem& item, } bool MetaOptimizerEnabled(const RewriterConfig& cfg) { - return cfg.optimize_tensor_layout() || cfg.constant_folding() || - cfg.auto_parallel().enable() || cfg.memory_optimization() > 0 || - !cfg.optimizers().empty(); + return !cfg.disable_model_pruning() || cfg.optimize_tensor_layout() || + cfg.constant_folding() || cfg.auto_parallel().enable() || + cfg.memory_optimization() > 0 || !cfg.optimizers().empty(); } Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg, ",0,train 6629e3a9863d25a81764f3d3115c1bfc2a7d8e67,tensorflow/tensorflow,"Enabled model pruning by default PiperOrigin-RevId: 161234072",model_pruner.cc,"@@ -63,6 +63,11 @@ Status ModelPruner::Optimize(Cluster* cluster, const GrapplerItem& item, } } + if (nodes_to_delete.empty()) { + *pruned_graph = item.graph; + return Status::OK(); + } + for (auto& node : item.graph.node()) { NodeDef* new_node = pruned_graph->add_node(); *new_node = node; ",0,train 05ca1e9bf3e7a04603eac921c4d95c5dbeca7dd6,tensorflow/tensorflow,"[Grappler] Re-enable DependencyOptimizer in FunctionTest.testControlFlowStrictness. PiperOrigin-RevId: 232351788",function_test.py,"@@ -497,8 +497,6 @@ class FunctionTest(test.TestCase): lambda y: AssertFail(y), [x]) # pylint: enable=unnecessary-lambda - rewriter_config = rewriter_config_pb2.RewriterConfig( - dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF) # Enables inlining. config = config_pb2.ConfigProto( graph_options=config_pb2.GraphOptions( @@ -506,8 +504,7 @@ class FunctionTest(test.TestCase): opt_level=config_pb2.OptimizerOptions.L0, do_common_subexpression_elimination=True, do_function_inlining=True, - do_constant_folding=True), - rewrite_options=rewriter_config)) + do_constant_folding=True))) with session.Session(config=config) as sess: # Since the 'False' branch is not taken, the assertion should not fire. ",0,train 16033c0b3484409a965acc0dd3054695145311a8,tensorflow/tensorflow,Python tf.config tf32 interface,config.py,"@@ -18,10 +18,36 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python import _pywrap_tf32_execution from tensorflow.python.eager import context from tensorflow.python.util import deprecation from tensorflow.python.util.tf_export import tf_export +def tensor_float32_execution_allowed(): + """"""Get if TensorFloat-32 operations are enabled on supported hardware. + + Returns: + True if TensorFloat-32 execution is enabled and False otherwise. + """""" + return _pywrap_tf32_execution.is_allowed() + +def allow_tensor_float_32_execution(allow): + """"""Allow use of TensorFloat-32 with float32 ops on supported hardware. + + TensorFloat-32 is a math mode introduced with the NVIDIA Ampere architecture. + TensorFloat-32 kernels take float32 inputs and produce float32 outputs. + Internally, the inputs are cast to a custom representation with 10-bit + mantissa (similar to float16) and 8-bit exponent (similar to float32) and are + executed using TensorCores with float32 accumulation. For more information, + see https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/. + + TensorFloat-32 execution is disabled by default, but this may change in a + future version. + + Args: + allow: whether to allow TensorFloat-32 execution + """""" + _pywrap_tf32_execution.allow(allow) @tf_export('config.threading.get_intra_op_parallelism_threads') def get_intra_op_parallelism_threads(): ",0,train 16033c0b3484409a965acc0dd3054695145311a8,tensorflow/tensorflow,Python tf.config tf32 interface,tf32.cc,"@@ -0,0 +1,22 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""pybind11/pybind11.h"" +#include ""tensorflow/core/platform/tf32_utils.h"" + +PYBIND11_MODULE(_pywrap_tf32_execution, m) { + m.def(""allow"", &tensorflow::allow_tf32_execution); + m.def(""is_allowed"", &tensorflow::tf32_execution_allowed); +} ",0,train 690a403686c341e03164b7e39fda6e9cec062296,tensorflow/tensorflow,"GPUToCUDA: attach CUBIN to the nested module rather than to the function Originally, we were attaching attributes containing CUBIN blobs to the kernel function called by `gpu.launch_func`. This kernel is now contained in a nested module that is used as a compilation unit. Attach compiled CUBIN blobs to the module rather than to the function since we were compiling the module. This also avoids duplication of the attribute on multiple kernels within the same module. PiperOrigin-RevId: 273497303",GPUToCUDAPass.h,"@@ -38,7 +38,8 @@ class LLVMDialect; template class OpPassBase; using OwnedCubin = std::unique_ptr>; -using CubinGenerator = std::function; +using CubinGenerator = + std::function; /// Creates a pass to convert kernel functions into CUBIN blobs. /// ",0,train 8df4f0ce0acc838dd252ae504d44676c35cb6a6b,tensorflow/tensorflow,"Fixed `Wrapper`'s `get_config` and `from_config`. * `get_config`: properly serialize the wrapped layer. This notably fixes issues when wrapping custom layers that have been registered using `tf.keras.utils.register_keras_serializable`. * `from_config`: properly copy input config to avoid side effects.",wrappers.py,"@@ -68,10 +68,7 @@ class Wrapper(Layer): def get_config(self): config = { - 'layer': { - 'class_name': self.layer.__class__.__name__, - 'config': self.layer.get_config() - } + 'layer': generic_utils.serialize_keras_object(self.layer) } base_config = super(Wrapper, self).get_config() return dict(list(base_config.items()) + list(config.items())) @@ -80,7 +77,7 @@ class Wrapper(Layer): def from_config(cls, config, custom_objects=None): from tensorflow.python.keras.layers import deserialize as deserialize_layer # pylint: disable=g-import-not-at-top # Avoid mutating the input dict - config = config.copy() + config = copy.deepcopy(config) layer = deserialize_layer( config.pop('layer'), custom_objects=custom_objects) return cls(layer, **config) ",0,test 2375810387068fb66ecba85c1fce0b0e4f5568b2,tensorflow/tensorflow,Eliminate the use of q_alpha. Use QuantizedMultiplier instead.,activations.cc,"@@ -76,11 +76,10 @@ struct LogSoftmaxOpData : public OpData { }; struct LeakyReluOpData : public OpData { - uint8_t q_alpha = 1; - uint8_t q_identity = 1; - uint8_t zero_point = 0; - int32_t output_multiplier = 0; - int output_shift = 0; + int32_t output_multiplier_alpha = 0; + int output_shift_alpha = 0; + int32_t output_multiplier_identity = 0; + int output_shift_identity = 0; }; struct PreluOpData : public OpData { @@ -367,26 +366,11 @@ TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) { if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { const auto* params = reinterpret_cast(node->builtin_data); - // Quantize the alpha with predetermined ZOOM_FACTOR and ZERO_POINT. - // Since in most cases 0 < alpha < 1, by setting ZOOM_FACTOR to be 200, - // quantized alpha can fit in INT8 range, and preserve most precision. - TF_LITE_ENSURE(context, params->alpha >= 0); - TF_LITE_ENSURE(context, params->alpha < 1); - static const uint8_t ZOOM_FACTOR = 200; - static const uint8_t ZERO_POINT = 0; - auto q_alpha = std::round(ZERO_POINT + params->alpha * ZOOM_FACTOR); - // Make sure quantized alpha is within INT8 range. - TF_LITE_ENSURE(context, q_alpha >= std::numeric_limits::min()); - TF_LITE_ENSURE(context, q_alpha <= std::numeric_limits::max()); - // q_alpha will be stored as uint8_t. It won't affect the input - data->q_alpha = q_alpha; - - // q_identity is used to make sure those>0 get correct value after dequantization. - data->q_identity = ZOOM_FACTOR; - data->zero_point = ZERO_POINT; - - double real_multiplier = input->params.scale / (output->params.scale * ZOOM_FACTOR); - QuantizeMultiplierSmallerThanOneExp(real_multiplier, &data->output_multiplier, &data->output_shift); + + double alpha_multiplier = input->params.scale * params->alpha / output->params.scale; + QuantizeMultiplier(alpha_multiplier, &data->output_multiplier_alpha, &data->output_shift_alpha); + double identity_multiplier = input->params.scale / output->params.scale; + QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity, &data->output_shift_identity); } return context->ResizeTensor(context, output, TfLiteIntArrayCopy(input->dims)); } @@ -1115,13 +1099,12 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) { } break; case kTfLiteUInt8: { - LeakyReluParamsQuant op_params{data->q_alpha, - data->q_identity, - data->zero_point, - input->params.zero_point, + LeakyReluParamsQuant op_params{input->params.zero_point, output->params.zero_point, - data->output_multiplier, - data->output_shift}; + data->output_multiplier_alpha, + data->output_shift_alpha, + data->output_multiplier_identity, + data->output_shift_identity}; reference_ops::QuantizeLeakyRelu( op_params, GetTensorShape(input), @@ -1132,21 +1115,20 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) { } break; case kTfLiteInt8: { - LeakyReluParamsQuant op_params{data->q_alpha, - data->q_identity, - data->zero_point, - input->params.zero_point, - output->params.zero_point, - data->output_multiplier, - data->output_shift}; - reference_ops::QuantizeLeakyRelu( - op_params, - GetTensorShape(input), - GetTensorData(input), - GetTensorShape(output), - GetTensorData(output)); - return kTfLiteOk; - } break; + LeakyReluParamsQuant op_params{input->params.zero_point, + output->params.zero_point, + data->output_multiplier_alpha, + data->output_shift_alpha, + data->output_multiplier_identity, + data->output_shift_identity}; + reference_ops::QuantizeLeakyRelu( + op_params, + GetTensorShape(input), + GetTensorData(input), + GetTensorShape(output), + GetTensorData(output)); + return kTfLiteOk; + } break; default: context->ReportError( context, ""Only float32, int8 and uint8 is supported currently, got %s."", ",0,train 2375810387068fb66ecba85c1fce0b0e4f5568b2,tensorflow/tensorflow,Eliminate the use of q_alpha. Use QuantizedMultiplier instead.,reference_ops.h,"@@ -270,20 +270,26 @@ inline void QuantizeLeakyRelu(const LeakyReluParamsQuant& params, const T* input_data, const RuntimeShape& output_shape, T* output_data) { - ruy::profiler::ScopeLabel label(""LeakyRelu (not fused)""); + ruy::profiler::ScopeLabel label(""Quantized LeakyRelu (not fused)""); const int flat_size = MatchingFlatSize(input_shape, output_shape); static const int32 quantized_min = std::numeric_limits::min(); static const int32 quantized_max = std::numeric_limits::max(); - static const int32 alpha_value = params.q_alpha - params.alpha_offset; - static const int32 identity_value = params.q_identity - params.alpha_offset; for (int i = 0; i < flat_size; ++i) { const int32 input_value = input_data[i] - params.input_offset; - auto q_mutliplier = (input_value >= 0) ? identity_value : alpha_value; - const int32 unclamped_output = - params.output_offset + MultiplyByQuantizedMultiplierSmallerThanOneExp( - input_value * q_mutliplier, - params.output_multiplier, - params.output_shift); + int32 unclamped_output; + if (input_value >= 0) { + unclamped_output = params.output_offset + + MultiplyByQuantizedMultiplier( + input_value, + params.output_multiplier_identity, + params.output_shift_identity); + } else { + unclamped_output = params.output_offset + + MultiplyByQuantizedMultiplier( + input_value, + params.output_multiplier_alpha, + params.output_shift_alpha); + } const T clamped_output = std::min(quantized_max, std::max(quantized_min, unclamped_output)); output_data[i] = static_cast(clamped_output); ",0,train 2375810387068fb66ecba85c1fce0b0e4f5568b2,tensorflow/tensorflow,Eliminate the use of q_alpha. Use QuantizedMultiplier instead.,types.h,"@@ -1085,13 +1085,12 @@ struct LeakyReluParams { struct LeakyReluParamsQuant { - uint8_t q_alpha; - uint8_t q_identity; - int32 alpha_offset; int32 input_offset; int32 output_offset; - int32 output_multiplier; - int output_shift; + int32 output_multiplier_alpha; + int output_shift_alpha; + int32 output_multiplier_identity; + int output_shift_identity; }; template ",0,train a6ee64cd216b3ac440262e1f4ec7872fe7026df6,tensorflow/tensorflow,"Conditionally allow changing a non-fusion computation root_instruction shape. PiperOrigin-RevId: 213191899",hlo_computation.cc,"@@ -279,11 +279,11 @@ Status HloComputation::RemoveInstruction(HloInstruction* instruction) { return Status::OK(); } -void HloComputation::set_root_instruction( - HloInstruction* new_root_instruction) { +void HloComputation::set_root_instruction(HloInstruction* new_root_instruction, + bool accept_different_shape) { // The shape of the root (ignoring layout) is an invariant of the computation // for non-fusion cases. - if (!IsFusionComputation()) { + if (!IsFusionComputation() && !accept_different_shape) { CHECK(ShapeUtil::Compatible(new_root_instruction->shape(), root_instruction_->shape())) << new_root_instruction->shape() << "" is incompatible with "" ",0,train a6ee64cd216b3ac440262e1f4ec7872fe7026df6,tensorflow/tensorflow,"Conditionally allow changing a non-fusion computation root_instruction shape. PiperOrigin-RevId: 213191899",hlo_computation.h,"@@ -134,9 +134,11 @@ class HloComputation { Status RemoveInstructionAndUnusedOperands(HloInstruction* instruction); // Set the root of the computation to the given instruction. The instruction - // must have already been added to the computation and have the same shape as - // the result of the computation for non fusion computations. - void set_root_instruction(HloInstruction* new_root_instruction); + // must have already been added to the computation. In addition it must have + // the same shape as the result of the computation for non fusion + // computations, except if accept_different_shape is set to true. + void set_root_instruction(HloInstruction* new_root_instruction, + bool accept_different_shape = false); // Return the root instruction of the computation. The root instruction is the // instruction which produces the output of the computation. ",0,train 31a492886cbc4f62494cbe08189ce72d8892c9c1,tensorflow/tensorflow,"Fix go_backwards/mask bug in recurrent_v2. - When explicitly assigning a GPU device, it complains an error, no argmax kernel for int32. Note that the use of argmax to juedge whether a mask is right padded or not, is not stable given that the argmax does not guarantee to return the lowest indices. - The behavior of go_backwards in V2 is in correct when cudnn is used. PiperOrigin-RevId: 254676863",gru_v2_test.py,"@@ -516,6 +516,31 @@ class GRUV2Test(keras_parameterized.TestCase): run_eagerly=testing_utils.should_run_eagerly()) model.fit(x, y, epochs=1, shuffle=False) + @test_util.run_v2_only + def test_explicit_device_with_go_backward_and_mask(self): + batch_size = 8 + timestep = 7 + masksteps = 5 + units = 4 + + inputs = np.random.randn(batch_size, timestep, units).astype(np.float32) + mask = np.ones((batch_size, timestep)).astype(np.bool) + mask[:, masksteps:] = 0 + + # Test for V1 behavior. + lstm_v1 = rnn_v1.GRU(units, return_sequences=True, go_backwards=True) + with test_util.device(use_gpu=True): + outputs_masked_v1 = lstm_v1(inputs, mask=constant_op.constant(mask)) + outputs_trimmed_v1 = lstm_v1(inputs[:, :masksteps]) + self.assertAllClose(outputs_masked_v1[:, -masksteps:], outputs_trimmed_v1) + + # Test for V2 behavior. + lstm = rnn.GRU(units, return_sequences=True, go_backwards=True) + with test_util.device(use_gpu=True): + outputs_masked = lstm(inputs, mask=constant_op.constant(mask)) + outputs_trimmed = lstm(inputs[:, :masksteps]) + self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed) + class GRULayerGradientTapeTest(test.TestCase): ",0,train 31a492886cbc4f62494cbe08189ce72d8892c9c1,tensorflow/tensorflow,"Fix go_backwards/mask bug in recurrent_v2. - When explicitly assigning a GPU device, it complains an error, no argmax kernel for int32. Note that the use of argmax to juedge whether a mask is right padded or not, is not stable given that the argmax does not guarantee to return the lowest indices. - The behavior of go_backwards in V2 is in correct when cudnn is used. PiperOrigin-RevId: 254676863",lstm_v2_test.py,"@@ -717,6 +717,31 @@ class LSTMV2Test(keras_parameterized.TestCase): model.evaluate(x, y) model.predict(x) + @test_util.run_v2_only + def test_explicit_device_with_go_backward_and_mask(self): + batch_size = 8 + timestep = 7 + masksteps = 5 + units = 4 + + inputs = np.random.randn(batch_size, timestep, units).astype(np.float32) + mask = np.ones((batch_size, timestep)).astype(np.bool) + mask[:, masksteps:] = 0 + + # Test for V1 behavior. + lstm_v1 = rnn_v1.LSTM(units, return_sequences=True, go_backwards=True) + with test_util.device(use_gpu=True): + outputs_masked_v1 = lstm_v1(inputs, mask=constant_op.constant(mask)) + outputs_trimmed_v1 = lstm_v1(inputs[:, :masksteps]) + self.assertAllClose(outputs_masked_v1[:, -masksteps:], outputs_trimmed_v1) + + # Test for V2 behavior. + lstm = rnn.LSTM(units, return_sequences=True, go_backwards=True) + with test_util.device(use_gpu=True): + outputs_masked = lstm(inputs, mask=constant_op.constant(mask)) + outputs_trimmed = lstm(inputs[:, :masksteps]) + self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed) + @keras_parameterized.run_all_keras_modes(config=_config) class LSTMGraphRewriteTest(keras_parameterized.TestCase): ",0,train 31a492886cbc4f62494cbe08189ce72d8892c9c1,tensorflow/tensorflow,"Fix go_backwards/mask bug in recurrent_v2. - When explicitly assigning a GPU device, it complains an error, no argmax kernel for int32. Note that the use of argmax to juedge whether a mask is right padded or not, is not stable given that the argmax does not guarantee to return the lowest indices. - The behavior of go_backwards in V2 is in correct when cudnn is used. PiperOrigin-RevId: 254676863",recurrent_v2.py,"@@ -529,11 +529,20 @@ def cudnn_gru(inputs, init_h, kernel, recurrent_kernel, bias, mask, time_major, if mask is not None: sequence_length = calculate_sequence_by_mask(mask, time_major) if go_backwards: + # Three reversals are required. E.g., + # normal input = [1, 2, 3, 0, 0] # where 0 need to be masked + # reversed_input_to_cudnn = [3, 2, 1, 0, 0] + # output_from_cudnn = [6, 5, 4, 0, 0] + # expected_output = [0, 0, 6, 5 ,4] inputs = array_ops.reverse_sequence_v2(inputs, sequence_length, seq_axis=0, batch_axis=1) outputs, h, _, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv3( inputs, input_h=init_h, input_c=0, params=params, is_training=True, rnn_mode='gru', sequence_lengths=sequence_length) + if go_backwards: + outputs = array_ops.reverse_sequence_v2(outputs, sequence_length, + seq_axis=0, batch_axis=1) + outputs = array_ops.reverse(outputs, axis=[0]) else: if go_backwards: # Reverse axis 0 since the input is already convert to time major. @@ -1111,11 +1120,20 @@ def cudnn_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, mask, if mask is not None: sequence_length = calculate_sequence_by_mask(mask, time_major) if go_backwards: + # Three reversals are required. E.g., + # normal input = [1, 2, 3, 0, 0] # where 0 need to be masked + # reversed_input_to_cudnn = [3, 2, 1, 0, 0] + # output_from_cudnn = [6, 5, 4, 0, 0] + # expected_output = [0, 0, 6, 5 ,4] inputs = array_ops.reverse_sequence_v2(inputs, sequence_length, seq_axis=0, batch_axis=1) outputs, h, c, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv3( inputs, input_h=init_h, input_c=init_c, params=params, is_training=True, rnn_mode='lstm', sequence_lengths=sequence_length) + if go_backwards: + outputs = array_ops.reverse_sequence_v2(outputs, sequence_length, + seq_axis=0, batch_axis=1) + outputs = array_ops.reverse(outputs, axis=[0]) else: # # Fill the array with shape [batch] with value of max timesteps. # sequence_length = array_ops.fill([array_ops.shape(inputs)[1]], @@ -1206,19 +1224,13 @@ def is_sequence_right_padded(mask, time_major): Returns: boolean scalar tensor, whether the mask is strictly right padded. """""" - timestep_index = 0 if time_major else 1 - max_seq_length = array_ops.shape(mask)[timestep_index] - reversed_mask = math_ops.cast(array_ops.reverse(mask, axis=[timestep_index]), - dtypes.int32) - # Use the argmax to find the index of leading 1 in the reversed mask, which is - # the index of the last True value in the original mask. - index = math_ops.argmax(reversed_mask, axis=timestep_index, - output_type=dtypes.int32) - count_of_true = math_ops.reduce_sum(reversed_mask, axis=timestep_index) - # If the data is strictly right padded, then the - # ""index = max_seq_length - count_of_true"" should hold. - return math_ops.reduce_all( - math_ops.equal(index, max_seq_length - count_of_true)) + if time_major: + mask = array_ops.transpose(mask) + max_seq_length = array_ops.shape(mask)[1] + count_of_true = math_ops.reduce_sum(math_ops.cast(mask, dtypes.int32), axis=1) + right_padded_mask = array_ops.sequence_mask( + count_of_true, maxlen=max_seq_length) + return math_ops.reduce_all(math_ops.equal(mask, right_padded_mask)) def calculate_sequence_by_mask(mask, time_major): @@ -1228,10 +1240,10 @@ def calculate_sequence_by_mask(mask, time_major): any timestep that should be masked, the corresponding field will be False. Consider the following example: a = [[True, True, False, False], - [True, False, True, False]] + [True, True, True, False]] It is a (2, 4) tensor, and the corresponding sequence length result should be - 1D tensor with value [2, 3]. Note that for the second example, we need to find - the index of the last True value, which is 2 and sequence length is 3. + 1D tensor with value [2, 3]. Note that the masking tensor must be right + padded that could be checked by, e.g., `is_sequence_right_padded()`. Args: mask: Boolean tensor with shape [batch, timestep] or [timestep, batch] if @@ -1242,14 +1254,8 @@ def calculate_sequence_by_mask(mask, time_major): sequence_length: 1D int32 tensor. """""" timestep_index = 0 if time_major else 1 - max_seq_length = array_ops.shape(mask)[timestep_index] - reversed_mask = math_ops.cast(array_ops.reverse(mask, axis=[timestep_index]), - dtypes.int32) - # Use the argmax to find the index of leading 1 in the reversed mask, which is - # the index of the last True value in the original mask. - reversed_index = math_ops.argmax(reversed_mask, axis=timestep_index, - output_type=dtypes.int32) - return max_seq_length - reversed_index + return math_ops.reduce_sum(math_ops.cast(mask, dtypes.int32), + axis=timestep_index) def _generate_defun_backend(unique_api_name, preferred_device, func): ",0,train 38c7871ecf1a4ede109fa0ce870c5d8d5401df05,tensorflow/tensorflow,"Manually specify `np.object` in `RaggedTensor.numpy()` as numpy otherwise complains If given ragged rows, numpy now raises a `VisibleDeprecationWarning` if `np.object` is not manually specified. PiperOrigin-RevId: 393076916 Change-Id: Ifce4a6fef429f2997df877d477fab25d69f7c183",ragged_tensor.py,"@@ -2088,7 +2088,10 @@ class RaggedTensor(composite_tensor.CompositeTensor, # np.ndarray with dtype=object and rank=1. If they have uniform lengths, # they will be combined into a single np.ndarray with dtype=row.dtype and # rank=row.rank+1. - return np.array(rows) + # + # Manually set dtype as numpy now complains when given ragged rows. + dtype = np.object if any(len(row) != len(rows[0]) for row in rows) else None + return np.array(rows, dtype=dtype) def to_list(self): """"""Returns a nested Python `list` with the values for this `RaggedTensor`. ",0,train 85d64fdd40eafc488649b806a85a7a168dc16510,tensorflow/tensorflow,"Apply clang-tidy fixes for llvm-else-after-return in map_mhlo_to_scalar_op.h (NFC) PiperOrigin-RevId: 434207416",map_mhlo_to_scalar_op.h,"@@ -477,7 +477,8 @@ inline Value MapMhloOpToStdScalarOp( targetType)) { return b->create(loc, result_types, args, mlir::None); - } else if (sourceType.isa() && targetType.isa()) { + } + if (sourceType.isa() && targetType.isa()) { FloatType src = sourceType.cast(); FloatType res = targetType.cast(); if (src.getWidth() > res.getWidth()) { @@ -884,7 +885,8 @@ inline Value MapMhloOpToStdScalarOp(Location loc, b->create<::mlir::arith::ShRSIOp>(loc, args[0], bitwidth_minus_one); Value or_op = b->create<::mlir::arith::OrIOp>(loc, ashr, one); return b->create<::mlir::arith::SelectOp>(loc, cmp, zero, or_op); - } else if (element_type.isa()) { + } + if (element_type.isa()) { return b->create<::mlir::complex::SignOp>(loc, element_type, args.front()); } return nullptr; ",0,train 1d89c2079931f401f0831e3d27b66dd942ae3388,tensorflow/tensorflow,Fixes typos and unnecessary import in example,text_classification_character_cnn.py,"@@ -29,7 +29,6 @@ from sklearn import metrics import pandas import tensorflow as tf -from tensorflow.models.rnn import rnn, rnn_cell import skflow ### Training data @@ -59,7 +58,7 @@ FILTER_SHAPE2 = [20, N_FILTERS] POOLING_WINDOW = 4 POOLING_STRIDE = 2 -def char_rnn_model(X, y): +def char_cnn_model(X, y): """"""Character level convolutional neural network model to predict classes."""""" byte_list = tf.reshape(skflow.ops.one_hot_matrix(X, 256), [-1, MAX_DOCUMENT_LENGTH, 256, 1]) @@ -82,7 +81,7 @@ def char_rnn_model(X, y): # Apply regular WX + B and classification. return skflow.models.logistic_regression(pool2, y) -classifier = skflow.TensorFlowEstimator(model_fn=char_rnn_model, n_classes=15, +classifier = skflow.TensorFlowEstimator(model_fn=char_cnn_model, n_classes=15, steps=100, optimizer='Adam', learning_rate=0.01, continue_training=True) # Continuesly train for 1000 steps & predict on test set. ",0,train 1d89c2079931f401f0831e3d27b66dd942ae3388,tensorflow/tensorflow,Fixes typos and unnecessary import in example,text_classification_cnn.py,"@@ -17,7 +17,6 @@ from sklearn import metrics import pandas import tensorflow as tf -from tensorflow.models.rnn import rnn, rnn_cell import skflow ### Training data ",0,train 52d21a8bf7b55b26498b203b902cd417cac1b040,tensorflow/tensorflow,"Remove stale TODOs in TFLite PiperOrigin-RevId: 369661250 Change-Id: Ib0ece62269524478dc68b7b38f4a7a8197db2abe",subgraph.cc,"@@ -774,8 +774,6 @@ TfLiteStatus Subgraph::AddNodeWithParameters( } node.builtin_data = builtin_data_deleter.release(); - // TODO(ycling): Filling `custom_initial_data` and `custom_initial_data_size` - // properly for nodes generated by ReplaceNodeSubsetsWithDelegateKernels. if (registration->builtin_code == BuiltinOperator_CUSTOM) { // When it's a CUSTOM op, the `custom_options` field in the Flatbuffer ",0,train 3296af253a0cc120175b88c383b27f02f16fb59b,tensorflow/tensorflow,"[XLA] Improve cost analysis for certain operations Fusion, map, select and scatter and reduce window were not correctly accounted for. This change makes it easier to analyze their performance in the HLO profile. Change: 145729113",hlo_cost_analysis.cc,"@@ -164,8 +164,10 @@ Status HloCostAnalysis::HandleMap( // Compute the cost of all elements for this Map operation. auto element_count = ShapeUtil::ElementsIn(map->shape()); - flop_count_ += element_count * visitor.flop_count(); transcendental_count_ += element_count * visitor.transcendental_count(); + auto hlo_flop_count = element_count * visitor.flop_count(); + hlo_to_flop_count_[map] = hlo_flop_count; + flop_count_ += hlo_flop_count; return Status::OK(); } @@ -180,7 +182,9 @@ Status HloCostAnalysis::HandleReduce( // Compute the cost of all elements for this Reduce operation. auto reduction_count = ShapeUtil::ElementsIn(arg->shape()) - ShapeUtil::ElementsIn(reduce->shape()); - flop_count_ += reduction_count * visitor.flop_count(); + auto hlo_flop_count = reduction_count * visitor.flop_count(); + hlo_to_flop_count_[reduce] = hlo_flop_count; + flop_count_ += hlo_flop_count; transcendental_count_ += reduction_count * visitor.transcendental_count(); return Status::OK(); } @@ -201,7 +205,9 @@ Status HloCostAnalysis::HandleReduceWindow(HloInstruction* reduce_window, for (const auto& dimension : window.dimensions()) { window_size *= dimension.size(); } - flop_count_ += output_size * (window_size - 1) * visitor.flop_count(); + auto hlo_flop_count = output_size * (window_size - 1) * visitor.flop_count(); + hlo_to_flop_count_[reduce_window] = hlo_flop_count; + flop_count_ += hlo_flop_count; transcendental_count_ += output_size * (window_size - 1) * visitor.transcendental_count(); return Status::OK(); @@ -225,9 +231,11 @@ Status HloCostAnalysis::HandleSelectAndScatter(HloInstruction* instruction) { for (const auto& dimension : instruction->window().dimensions()) { window_size *= dimension.size(); } - flop_count_ += + auto hlo_flop_count = source_element_count * ((window_size - 1) * select_visitor.flop_count() + scatter_visitor.flop_count()); + hlo_to_flop_count_[instruction] = hlo_flop_count; + flop_count_ += hlo_flop_count; transcendental_count_ += source_element_count * ((window_size - 1) * select_visitor.transcendental_count() + @@ -303,8 +311,37 @@ Status HloCostAnalysis::HandleRng(HloInstruction* random, } Status HloCostAnalysis::HandleFusion(HloInstruction* fusion) { - // Fusion instruction itself does not contribute to computation. - return fusion->fused_expression_root()->Accept(this); + switch (fusion->fusion_kind()) { + case HloInstruction::FusionKind::kLoop: + case HloInstruction::FusionKind::kInput: { + // Compute the cost of the fused expression. + HloInstruction* fused_expression_root = fusion->fused_expression_root(); + HloCostAnalysis visitor; + TF_RETURN_IF_ERROR(fused_expression_root->Accept(&visitor)); + + // Compute the cost of all elements for this Fusion operation. + auto element_count = ShapeUtil::ElementsIn(fusion->shape()); + transcendental_count_ += element_count * visitor.transcendental_count(); + auto hlo_flop_count = element_count * visitor.flop_count(); + hlo_to_flop_count_[fusion] = hlo_flop_count; + flop_count_ += hlo_flop_count; + return Status::OK(); + } + case HloInstruction::FusionKind::kTransposeDot: + case HloInstruction::FusionKind::kConvBackwardFilter: + case HloInstruction::FusionKind::kConvBackwardInput: { + // Compute the cost of the fused expression. + HloInstruction* fused_expression_root = fusion->fused_expression_root(); + HloCostAnalysis visitor; + TF_RETURN_IF_ERROR(fused_expression_root->Accept(&visitor)); + + // Attribute the cost of the fused expression to the fusion node. + transcendental_count_ += visitor.transcendental_count(); + hlo_to_flop_count_[fusion] += visitor.flop_count(); + flop_count_ += visitor.flop_count(); + return Status::OK(); + } + } } Status HloCostAnalysis::HandleCall( ",0,train 3296af253a0cc120175b88c383b27f02f16fb59b,tensorflow/tensorflow,"[XLA] Improve cost analysis for certain operations Fusion, map, select and scatter and reduce window were not correctly accounted for. This change makes it easier to analyze their performance in the HLO profile. Change: 145729113",hlo_cost_analysis.h,"@@ -134,10 +134,10 @@ class HloCostAnalysis : public DfsHloVisitor { std::map hlo_to_flop_count_; // The number of floating point operations in the graph. - double flop_count_ = 0; + double flop_count_ = 0.0; // The number of transcendental operations in the graph. - double transcendental_count_ = 0; + double transcendental_count_ = 0.0; TF_DISALLOW_COPY_AND_ASSIGN(HloCostAnalysis); }; ",0,train 3296af253a0cc120175b88c383b27f02f16fb59b,tensorflow/tensorflow,"[XLA] Improve cost analysis for certain operations Fusion, map, select and scatter and reduce window were not correctly accounted for. This change makes it easier to analyze their performance in the HLO profile. Change: 145729113",hlo_cost_analysis_test.cc,"@@ -333,5 +333,52 @@ TEST_F(HloCostAnalysisTest, TotalOverflowsInt64) { EXPECT_GT(matmul_analysis.flop_count(), std::numeric_limits::max()); } +class FusionCostAnalysis : public ::testing::Test { + protected: + FusionCostAnalysis() = default; + + Shape r0f32_ = ShapeUtil::MakeShape(F32, {}); +}; + +TEST_F(FusionCostAnalysis, LoopFusion) { + // Fuse all instructions in complicated expression: + // + // add = Add(C1, C2) + // clamp = Clamp(C2, add, add) + // exp = Exp(add) + // mul = Mul(exp, C3) + // sub = Sub(mul, clamp) + // tuple = Tuple({sub, sub, mul, C1}) + auto c1 = HloInstruction::CreateConstant(LiteralUtil::CreateR0(1.1f)); + auto c2 = HloInstruction::CreateConstant(LiteralUtil::CreateR0(2.1f)); + auto c3 = HloInstruction::CreateConstant(LiteralUtil::CreateR0(9.0f)); + + auto add = + HloInstruction::CreateBinary(r0f32_, HloOpcode::kAdd, c1.get(), c2.get()); + auto clamp = HloInstruction::CreateTernary(r0f32_, HloOpcode::kClamp, + c2.get(), add.get(), add.get()); + auto exp = HloInstruction::CreateUnary(r0f32_, HloOpcode::kExp, add.get()); + auto mul = HloInstruction::CreateBinary(r0f32_, HloOpcode::kMultiply, + exp.get(), c3.get()); + auto sub = HloInstruction::CreateBinary(r0f32_, HloOpcode::kSubtract, + mul.get(), clamp.get()); + auto tuple = + HloInstruction::CreateTuple({sub.get(), sub.get(), mul.get(), c1.get()}); + + auto fusion = HloInstruction::CreateFusion( + r0f32_, HloInstruction::FusionKind::kLoop, tuple.get()); + fusion->FuseInstruction(sub.get()); + fusion->FuseInstruction(mul.get()); + fusion->FuseInstruction(exp.get()); + fusion->FuseInstruction(clamp.get()); + fusion->FuseInstruction(add.get()); + + HloCostAnalysis fusion_analysis; + ASSERT_IS_OK(fusion->Accept(&fusion_analysis)); + + EXPECT_EQ(fusion_analysis.flop_count(), 4); + EXPECT_EQ(fusion_analysis.transcendental_count(), 1); +} + } // namespace } // namespace xla ",0,train 9c405cb0e9475d82ce2b0bef04ad75e206be1267,tensorflow/tensorflow,enable bf16 for Erf,cwise_op_erf.cc,"@@ -17,7 +17,8 @@ limitations under the License. namespace tensorflow { -REGISTER3(UnaryOp, CPU, ""Erf"", functor::erf, float, Eigen::half, double); +REGISTER4(UnaryOp, CPU, ""Erf"", functor::erf, float, Eigen::half, double, + bfloat16); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) ",0,test 9c405cb0e9475d82ce2b0bef04ad75e206be1267,tensorflow/tensorflow,enable bf16 for Erf,cwise_ops_unary_test.py,"@@ -436,6 +436,7 @@ class UnaryOpTest(test.TestCase): self._compareBoth(w, compute_f32(np.arccosh), math_ops.acosh) self._compareBoth(k, compute_f32(np.arctanh), math_ops.atanh, grad_tol=1e-2) + self._compareBoth(x, compute_f32(np.vectorize(math.erf)), math_ops.erf) @test.disable_with_predicate( pred=test.is_built_with_rocm, skip_message=""On ROCm this test fails"") ",0,test 068e68b762969fe37b08bf069bb97d0356af1949,tensorflow/tensorflow,"Use macros instead of functions for float16 buffer access. Significantly faster on Adreno devices. PiperOrigin-RevId: 272546757",object_accessor.cc,"@@ -577,24 +577,16 @@ std::string ObjectAccessor::GetObjectDeclarations() const { } std::string ObjectAccessor::GetFunctionsDeclarations() const { - std::string modifier = """"; - // Mali compiler does not want to compile a function without readonly - // modifier. See b/111601761 for the context. - if (is_mali_) { - modifier = ""readonly ""; - } - // If there is a single object SSBO with F16, then we need to output functions + // If there is a single object SSBO with F16, then we need to output macros // as well. for (const auto& o : name_to_object_) { if (o.second.data_type == DataType::FLOAT16 && o.second.object_type == ObjectType::BUFFER) { - return absl::StrCat(""vec4 Vec4FromHalf(in "", modifier, - ""uvec2 v) { return vec4(unpackHalf2x16(v.x), "" - ""unpackHalf2x16(v.y)); }\n"" - ""uvec2 Vec4ToHalf(in "", - modifier, - ""vec4 v) { return uvec2(packHalf2x16(v.xy), "" - ""packHalf2x16(v.zw)); }\n""); + return absl::StrCat( + ""#define Vec4FromHalf(v) vec4(unpackHalf2x16(v.x), "" + ""unpackHalf2x16(v.y))\n"", + ""#define Vec4ToHalf(v) uvec2(packHalf2x16(v.xy), "" + ""packHalf2x16(v.zw))""); } } return """"; ",0,test 425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks Note: This change may break clients who have custom TfLiteDelegate implementations; this API has been and remains experimental and subject to such changes. PiperOrigin-RevId: 208683190",context.h,"@@ -452,13 +452,15 @@ typedef struct _TfLiteDelegate { // Copy the data from delegate buffer handle to raw memory. // This can be null if the delegate doesn't use its own buffer. - TfLiteStatus (*CopyFromBufferHandle)(TfLiteDelegate* delegate, + TfLiteStatus (*CopyFromBufferHandle)(TfLiteContext* context, + TfLiteDelegate* delegate, TfLiteBufferHandle buffer_handle, void* data, size_t size); // Copy the data from raw memory to delegate buffer handle. // This can be null if the delegate doesn't use its own buffer. - TfLiteStatus (*CopyToBufferHandle)(TfLiteDelegate* delegate, + TfLiteStatus (*CopyToBufferHandle)(TfLiteContext* context, + TfLiteDelegate* delegate, TfLiteBufferHandle buffer_handle, void* data, size_t size); @@ -466,7 +468,7 @@ typedef struct _TfLiteDelegate { // this doesn't release the underlying resource (e.g. textures). The // resources are either owned by application layer or the delegate. // This can be null if the delegate doesn't use its own buffer. - void (*FreeBufferHandle)(TfLiteDelegate* delegate, + void (*FreeBufferHandle)(TfLiteContext* context, TfLiteDelegate* delegate, TfLiteBufferHandle* handle); } TfLiteDelegate; ",0,test 425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks Note: This change may break clients who have custom TfLiteDelegate implementations; this API has been and remains experimental and subject to such changes. PiperOrigin-RevId: 208683190",delegate.cc,"@@ -55,17 +55,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) { return kTfLiteOk; } -TfLiteStatus CopyFromBufferHandle(TfLiteDelegate* delegate, +TfLiteStatus CopyFromBufferHandle(TfLiteContext* context, + TfLiteDelegate* delegate, TfLiteBufferHandle buffer_handle, void* data, size_t size) { - // TODO(nupurgarg): Make BufferMap unique to each interpreter in order to - // support multiple interpreters using a single delegate. BufferMap* buffer_map = - reinterpret_cast(delegate->data_)->GetBufferMap(); + reinterpret_cast(delegate->data_)->GetBufferMap(context); - // TODO(nupurgarg): Use TfLiteContext's ReportError instead of fprinf. if (!buffer_map->HasTensor(buffer_handle)) { - fprintf(stderr, ""Invalid tensor index %d.\n"", buffer_handle); + context->ReportError(context, ""Invalid tensor index %d."", buffer_handle); return kTfLiteError; } @@ -73,7 +71,8 @@ TfLiteStatus CopyFromBufferHandle(TfLiteDelegate* delegate, tensorflow::StringPiece t_data = t.tensor_data(); if (size != t_data.size()) { - fprintf(stderr, ""Not enough space to store TensorFlow's aligned buffer.\n""); + context->ReportError( + context, ""Not enough space to store TensorFlow's aligned buffer.""); return kTfLiteError; } ",0,test 425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks Note: This change may break clients who have custom TfLiteDelegate implementations; this API has been and remains experimental and subject to such changes. PiperOrigin-RevId: 208683190",delegate.h,"@@ -26,8 +26,8 @@ namespace tflite { // executed by TensorFlow's runtime via Eager. // // The interpreter must be constructed after the EagerDelegate and destructed -// before the EagerDelegate. This delegate can only be used with one -// interpreter. +// before the EagerDelegate. This delegate may be used with multiple +// interpreters, but it is *not* thread-safe. // // Usage: // EagerDelegate delegate; ",0,test 425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks Note: This change may break clients who have custom TfLiteDelegate implementations; this API has been and remains experimental and subject to such changes. PiperOrigin-RevId: 208683190",delegate_data.h,"@@ -32,14 +32,18 @@ class DelegateData { // The EagerContext that is required for execution of Eager Ops. tensorflow::EagerContext* GetEagerContext() { return eager_context_.get(); } - // Map from TF Lite tensor index to TensorFlow tensor. - BufferMap* GetBufferMap() { return &buffer_map_; } + // Map from TF Lite tensor index to TensorFlow tensor for a given context. + BufferMap* GetBufferMap(const TfLiteContext* context) { + return &buffer_map_[context]; + } private: explicit DelegateData(tensorflow::EagerContext* eager_context); std::unique_ptr eager_context_; - BufferMap buffer_map_; + // TODO(b/112439500): Clean up stale BufferMap instances after adding the + // necessary cleanup hook from a TfLiteContext to a TfLiteDelegate. + std::unordered_map buffer_map_; }; } // namespace eager ",0,test 425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks Note: This change may break clients who have custom TfLiteDelegate implementations; this API has been and remains experimental and subject to such changes. PiperOrigin-RevId: 208683190",delegate_data_test.cc,"@@ -16,6 +16,7 @@ limitations under the License. #include #include +#include ""tensorflow/contrib/lite/context.h"" #include ""tensorflow/contrib/lite/testing/util.h"" namespace tflite { @@ -29,8 +30,12 @@ TEST(DelegateDataTest, Basic) { // binary. EXPECT_TRUE(DelegateData::Create(&data).ok()); + TfLiteContext dummy_context1 = {}; + TfLiteContext dummy_context2 = {}; EXPECT_NE(data->GetEagerContext(), nullptr); - EXPECT_NE(data->GetBufferMap(), nullptr); + EXPECT_NE(data->GetBufferMap(&dummy_context1), nullptr); + EXPECT_NE(data->GetBufferMap(&dummy_context1), + data->GetBufferMap(&dummy_context2)); } } // namespace ",0,test 425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks Note: This change may break clients who have custom TfLiteDelegate implementations; this API has been and remains experimental and subject to such changes. PiperOrigin-RevId: 208683190",delegate_test.cc,"@@ -25,8 +25,6 @@ namespace { using ::testing::ContainsRegex; using ::testing::ElementsAre; -// TODO(nupurgarg): Add a test with multiple interpreters for one delegate. - class DelegateTest : public testing::EagerModelTest { public: DelegateTest() { @@ -139,6 +137,56 @@ TEST_F(DelegateTest, OnlyTFLite) { ASSERT_THAT(GetValues(2), ElementsAre(1.1f, 4.4f, 9.9f, 17.6f)); } +TEST_F(DelegateTest, MultipleInterpretersSameDelegate) { + // Build a graph, configure the delegate and set inputs. + { + AddTensors(9, {0, 3}, {8}, kTfLiteFloat32, {3}); + AddTfOp(testing::kUnpack, {0}, {1, 2}); + AddTfOp(testing::kUnpack, {3}, {4, 5}); + AddTfOp(testing::kAdd, {1, 4}, {6}); + AddTfOp(testing::kAdd, {2, 5}, {7}); + AddTfOp(testing::kMul, {6, 7}, {8}); + ConfigureDelegate(); + SetShape(0, {2, 2, 1}); + SetValues(0, {1.1f, 2.2f, 3.3f, 4.4f}); + SetShape(3, {2, 2, 1}); + SetValues(3, {1.1f, 2.2f, 3.3f, 4.4f}); + } + + // Create a new interpreter, inject into the test framework and build + // a different graph using the *same* delegate. + std::unique_ptr interpreter(new Interpreter(&error_reporter_)); + interpreter_.swap(interpreter); + { + AddTensors(10, {0}, {9}, kTfLiteFloat32, {3}); + AddTfOp(testing::kUnpack, {0}, {1, 2}); + AddTfOp(testing::kAdd, {1, 2}, {3}); + AddTfOp(testing::kUnpack, {3}, {4, 5}); + AddTfLiteMulOp({4, 5}, {6}); + AddTfOp(testing::kUnpack, {6}, {7, 8}); + AddTfOp(testing::kAdd, {7, 8}, {9}); + ConfigureDelegate(); + SetShape(0, {2, 2, 2, 1}); + SetValues(0, {3.0f, 1.0f, 0.5f, -1.0f, 0.0f, 1.0f, 1.5f, 3.0f}); + } + + // Swap back in the first interpreter and validate inference. + interpreter_.swap(interpreter); + { + ASSERT_TRUE(Invoke()); + EXPECT_THAT(GetShape(8), ElementsAre(2, 1)); + EXPECT_THAT(GetValues(8), ElementsAre(14.52f, 38.72f)); + } + + // Swap in the second interpreter and validate inference. + interpreter_.swap(interpreter); + { + ASSERT_TRUE(Invoke()); + EXPECT_THAT(GetShape(9), ElementsAre(1)); + EXPECT_THAT(GetValues(9), ElementsAre(10.0f)); + } +} + } // namespace } // namespace eager } // namespace tflite ",0,test 425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks Note: This change may break clients who have custom TfLiteDelegate implementations; this API has been and remains experimental and subject to such changes. PiperOrigin-RevId: 208683190",kernel.cc,"@@ -150,8 +150,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) { op_data->eager_context = reinterpret_cast(params->delegate->data_) ->GetEagerContext(); - op_data->buffer_map = - reinterpret_cast(params->delegate->data_)->GetBufferMap(); + op_data->buffer_map = reinterpret_cast(params->delegate->data_) + ->GetBufferMap(context); CHECK(params->output_tensors); for (auto tensor_index : TfLiteIntArrayView(params->output_tensors)) { ",0,test 425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks Note: This change may break clients who have custom TfLiteDelegate implementations; this API has been and remains experimental and subject to such changes. PiperOrigin-RevId: 208683190",kernel_test.cc,"@@ -55,12 +55,14 @@ class KernelTest : public testing::EagerModelTest { delegate_.data_ = delegate_data_.get(); delegate_.FreeBufferHandle = nullptr; delegate_.Prepare = prepare_function; - delegate_.CopyFromBufferHandle = [](TfLiteDelegate* delegate, + delegate_.CopyFromBufferHandle = [](TfLiteContext* context, + TfLiteDelegate* delegate, TfLiteBufferHandle buffer_handle, void* data, size_t size) { auto* delegate_data = reinterpret_cast(delegate->data_); - tensorflow::StringPiece values = - delegate_data->GetBufferMap()->GetTensor(buffer_handle).tensor_data(); + tensorflow::StringPiece values = delegate_data->GetBufferMap(context) + ->GetTensor(buffer_handle) + .tensor_data(); memcpy(data, values.data(), values.size()); return kTfLiteOk; }; ",0,test 425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks Note: This change may break clients who have custom TfLiteDelegate implementations; this API has been and remains experimental and subject to such changes. PiperOrigin-RevId: 208683190",interpreter.cc,"@@ -157,7 +157,7 @@ Interpreter::~Interpreter() { TfLiteTensor* tensor = &context_.tensors[i]; if (tensor->buffer_handle != kTfLiteNullBufferHandle && tensor->delegate->FreeBufferHandle != nullptr) { - tensor->delegate->FreeBufferHandle(tensor->delegate, + tensor->delegate->FreeBufferHandle(&context_, tensor->delegate, &tensor->buffer_handle); } TfLiteTensorFree(tensor); @@ -988,7 +988,7 @@ TfLiteStatus Interpreter::SetBufferHandle(int tensor_index, tensor->delegate = delegate; if (tensor->buffer_handle != kTfLiteNullBufferHandle) { TF_LITE_ENSURE(&context_, tensor->delegate->FreeBufferHandle != nullptr); - tensor->delegate->FreeBufferHandle(tensor->delegate, + tensor->delegate->FreeBufferHandle(&context_, tensor->delegate, &tensor->buffer_handle); } tensor->buffer_handle = buffer_handle; ",0,test 425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks Note: This change may break clients who have custom TfLiteDelegate implementations; this API has been and remains experimental and subject to such changes. PiperOrigin-RevId: 208683190",interpreter.h,"@@ -350,7 +350,7 @@ class Interpreter { // This can be null if the delegate doesn't use its own buffer. TF_LITE_ENSURE(&context_, tensor->delegate->CopyFromBufferHandle != nullptr); - tensor->delegate->CopyFromBufferHandle(tensor->delegate, + tensor->delegate->CopyFromBufferHandle(&context_, tensor->delegate, tensor->buffer_handle, tensor->data.raw, tensor->bytes); tensor->data_is_stale = false; ",0,test 425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks Note: This change may break clients who have custom TfLiteDelegate implementations; this API has been and remains experimental and subject to such changes. PiperOrigin-RevId: 208683190",interpreter_test.cc,"@@ -1080,21 +1080,22 @@ class TestDelegate : public ::testing::Test { return kTfLiteOk; }; delegate_.CopyToBufferHandle = - [](TfLiteDelegate* delegate, TfLiteBufferHandle buffer_handle, - void* data, size_t size) -> TfLiteStatus { + [](TfLiteContext* context, TfLiteDelegate* delegate, + TfLiteBufferHandle buffer_handle, void* data, + size_t size) -> TfLiteStatus { // TODO(ycling): Implement tests to test buffer copying logic. return kTfLiteOk; }; delegate_.CopyFromBufferHandle = - [](TfLiteDelegate* delegate, TfLiteBufferHandle buffer_handle, - void* data, size_t size) -> TfLiteStatus { + [](TfLiteContext* context, TfLiteDelegate* delegate, + TfLiteBufferHandle buffer_handle, void* data, + size_t size) -> TfLiteStatus { // TODO(ycling): Implement tests to test buffer copying logic. return kTfLiteOk; }; - delegate_.FreeBufferHandle = [](TfLiteDelegate* delegate, - TfLiteBufferHandle* handle) { - *handle = kTfLiteNullBufferHandle; - }; + delegate_.FreeBufferHandle = + [](TfLiteContext* context, TfLiteDelegate* delegate, + TfLiteBufferHandle* handle) { *handle = kTfLiteNullBufferHandle; }; // Store type-punned data SimpleDelegate structure. delegate_.data_ = reinterpret_cast(this); } ",0,test 35a14f9ea24ea4d83fb6e279b7a2e03ac1c386eb,tensorflow/tensorflow,"Undo the removal of an import which was actually required for Ubuntu build. Fix usage of int64/uint64. PiperOrigin-RevId: 244229396",util.cc,"@@ -32,13 +32,15 @@ limitations under the License. #include ""tensorflow/core/platform/mutex.h"" #include ""tensorflow/core/platform/stacktrace.h"" +namespace xla { + namespace { tensorflow::mutex timer_stats_lock(tensorflow::LINKER_INITIALIZED); struct TimerStats { double cumulative_secs = 0; double max_secs = 0; - int64 times_called = 0; + uint64 times_called = 0; }; // Global mapping from timer IDs to timer statistics. @@ -46,8 +48,6 @@ auto& timers_stats GUARDED_BY(timer_stats_lock) = *new absl::flat_hash_map(); } // namespace -namespace xla { - Status WithLogBacktrace(const Status& status) { CHECK(!status.ok()); VLOG(1) << status.ToString(); ",0,test 1ed03f85921e36f20b0a27174a5b2d7f103c271d,tensorflow/tensorflow,"Update gmm_ops.py (#7614) * Update gmm_ops.py It should be int64 instead of int32 * Remove cast altogether * cast num_data into int64 As per ashish's comment",gmm_ops.py,"@@ -85,7 +85,7 @@ def _init_clusters_random(data, num_clusters, random_seed): maxval=math_ops.cast(num_data, dtypes.int64), seed=random_seed, dtype=dtypes.int64) - indices = math_ops.cast(indices, dtypes.int32) % num_data + indices = indices % math_ops.cast(num_data, dtypes.int64) clusters_init = embedding_lookup(data, indices, partition_strategy='div') return clusters_init ",0,train 8d42d1e0b8b00c04d34ea585d360fd54206f6cbb,tensorflow/tensorflow,fixes erroneous collection in contrib batch_norm,layers.py,"@@ -525,7 +525,7 @@ def batch_norm( if layer.beta: _add_variable_to_collections(layer.beta, variables_collections, 'beta') if layer.gamma: - _add_variable_to_collections(layer.beta, variables_collections, 'gamma') + _add_variable_to_collections(layer.gamma, variables_collections, 'gamma') if activation_fn is not None: outputs = activation_fn(outputs) ",0,train 8b0eee37b0e6d5a2161ef39a86e51ab2d7c9ebeb,tensorflow/tensorflow,"Annotate tf_device.return and tf.Yield with NoSideEffect and ReturnLike traits. Marking these ops as NoSideEffect will result in such terminators to not be considered as side effecting, in side effect analysis. PiperOrigin-RevId: 345323762 Change-Id: I82d216bdfd9a214a0f38d2888022e3aa00f69cc8",tf_device.h,"@@ -24,6 +24,8 @@ limitations under the License. #include ""mlir/IR/Dialect.h"" // from @llvm-project #include ""mlir/IR/OpDefinition.h"" // from @llvm-project #include ""mlir/IR/Value.h"" // from @llvm-project +#include ""mlir/Interfaces/ControlFlowInterfaces.h"" // from @llvm-project +#include ""mlir/Interfaces/SideEffectInterfaces.h"" // from @llvm-project namespace mlir { namespace tf_device { ",0,test 8b0eee37b0e6d5a2161ef39a86e51ab2d7c9ebeb,tensorflow/tensorflow,"Annotate tf_device.return and tf.Yield with NoSideEffect and ReturnLike traits. Marking these ops as NoSideEffect will result in such terminators to not be considered as side effecting, in side effect analysis. PiperOrigin-RevId: 345323762 Change-Id: I82d216bdfd9a214a0f38d2888022e3aa00f69cc8",tf_ops.h,"@@ -29,6 +29,7 @@ limitations under the License. #include ""mlir/IR/StandardTypes.h"" // from @llvm-project #include ""mlir/IR/TypeUtilities.h"" // from @llvm-project #include ""mlir/Interfaces/CallInterfaces.h"" // from @llvm-project +#include ""mlir/Interfaces/ControlFlowInterfaces.h"" // from @llvm-project #include ""mlir/Interfaces/DerivedAttributeOpInterface.h"" // from @llvm-project #include ""mlir/Interfaces/InferTypeOpInterface.h"" // from @llvm-project #include ""mlir/Interfaces/LoopLikeInterface.h"" // from @llvm-project ",0,test 8b0eee37b0e6d5a2161ef39a86e51ab2d7c9ebeb,tensorflow/tensorflow,"Annotate tf_device.return and tf.Yield with NoSideEffect and ReturnLike traits. Marking these ops as NoSideEffect will result in such terminators to not be considered as side effecting, in side effect analysis. PiperOrigin-RevId: 345323762 Change-Id: I82d216bdfd9a214a0f38d2888022e3aa00f69cc8",tf_ops_a_m.h,"@@ -26,6 +26,7 @@ limitations under the License. #include ""mlir/IR/StandardTypes.h"" // from @llvm-project #include ""mlir/IR/TypeUtilities.h"" // from @llvm-project #include ""mlir/Interfaces/CallInterfaces.h"" // from @llvm-project +#include ""mlir/Interfaces/ControlFlowInterfaces.h"" // from @llvm-project #include ""mlir/Interfaces/DerivedAttributeOpInterface.h"" // from @llvm-project #include ""mlir/Interfaces/InferTypeOpInterface.h"" // from @llvm-project #include ""mlir/Interfaces/LoopLikeInterface.h"" // from @llvm-project ",0,test 8b0eee37b0e6d5a2161ef39a86e51ab2d7c9ebeb,tensorflow/tensorflow,"Annotate tf_device.return and tf.Yield with NoSideEffect and ReturnLike traits. Marking these ops as NoSideEffect will result in such terminators to not be considered as side effecting, in side effect analysis. PiperOrigin-RevId: 345323762 Change-Id: I82d216bdfd9a214a0f38d2888022e3aa00f69cc8",tf_ops_n_z.h,"@@ -26,6 +26,7 @@ limitations under the License. #include ""mlir/IR/StandardTypes.h"" // from @llvm-project #include ""mlir/IR/TypeUtilities.h"" // from @llvm-project #include ""mlir/Interfaces/CallInterfaces.h"" // from @llvm-project +#include ""mlir/Interfaces/ControlFlowInterfaces.h"" // from @llvm-project #include ""mlir/Interfaces/DerivedAttributeOpInterface.h"" // from @llvm-project #include ""mlir/Interfaces/InferTypeOpInterface.h"" // from @llvm-project #include ""mlir/Interfaces/LoopLikeInterface.h"" // from @llvm-project ",0,test 0d9b07979d180d0a04e334b2ea3f3b4ca7790eba,tensorflow/tensorflow,"Retry ""PR #31106: speedup reduce op grads when keep_dims=True"" but now with forward compatibility guards to prevent breakages. PiperOrigin-RevId: 265151183",math_grad.py,"@@ -193,8 +193,16 @@ def _SumGrad(op, grad): return [array_ops.tile(grad, tile_scaling), None] input_shape = array_ops.shape(op.inputs[0]) - # TODO(apassos) remove this once device placement for eager ops makes more - # sense. + + if compat.forward_compatible(2019, 9, 23): + if not op.get_attr(""keep_dims""): + with ops.colocate_with(input_shape): + # TODO(apassos) remove this once device placement for eager ops makes + # more sense. + output_shape_kept_dims = math_ops.reduced_shape(input_shape, + op.inputs[1]) + grad = array_ops.reshape(grad, output_shape_kept_dims) + return [array_ops.broadcast_to(grad, input_shape), None] with ops.colocate_with(input_shape): output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims) @@ -205,10 +213,13 @@ def _SumGrad(op, grad): def _MinOrMaxGrad(op, grad): """"""Gradient for Min or Max. Amazingly it's precisely the same code."""""" input_shape = array_ops.shape(op.inputs[0]) - output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) y = op.outputs[0] - y = array_ops.reshape(y, output_shape_kept_dims) - grad = array_ops.reshape(grad, output_shape_kept_dims) + if not op.get_attr(""keep_dims""): + output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) + y = array_ops.reshape(y, output_shape_kept_dims) + grad = array_ops.reshape(grad, output_shape_kept_dims) + else: + output_shape_kept_dims = array_ops.shape(y) # Compute the number of selected (maximum or minimum) elements in each # reduction dimension. If there are multiple minimum or maximum elements @@ -263,11 +274,18 @@ def _ProdGrad(op, grad): # Reshape reduction indices for the case where the parameter is a scalar reduction_indices = array_ops.reshape(op.inputs[1], [-1]) - # Expand grad to full input shape - output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) - tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims) - grad = array_ops.reshape(grad, output_shape_kept_dims) - grad = array_ops.tile(grad, tile_scaling) + if compat.forward_compatible(2019, 9, 23): + # Expand grad to full input shape + if not op.get_attr(""keep_dims""): + output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) + grad = array_ops.reshape(grad, output_shape_kept_dims) + + grad = array_ops.broadcast_to(grad, input_shape) + else: + output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) + tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims) + grad = array_ops.reshape(grad, output_shape_kept_dims) + grad = array_ops.tile(grad, tile_scaling) # Pack all reduced dimensions into a single one, so we can perform the # cumprod ops. If the reduction dims list is empty, it defaults to float32, ",0,train ae32e9096028d0d0d8fc4c007e4192ba36f80408,tensorflow/tensorflow,"Added support of Floor/FloorDiv/FloorMod in model builder. PiperOrigin-RevId: 364625346 Change-Id: I29df47dab26f2958b6dfdeb71d18277785b0cac5",model_builder.cc,"@@ -725,6 +725,7 @@ class ElementwiseOperationParser : public TFLiteOperationParser { case OperationType::COS: case OperationType::ELU: case OperationType::EXP: + case OperationType::FLOOR: case OperationType::LOG: case OperationType::NEG: case OperationType::RSQRT: @@ -742,6 +743,8 @@ class ElementwiseOperationParser : public TFLiteOperationParser { bool IsTwoArgumentOperation() const { switch (operation_type_) { case OperationType::DIV: + case OperationType::FLOOR_DIV: + case OperationType::FLOOR_MOD: case OperationType::MAXIMUM: case OperationType::MINIMUM: case OperationType::POW: @@ -756,6 +759,8 @@ class ElementwiseOperationParser : public TFLiteOperationParser { bool IsTwoArgumentOperationWithConst() const { switch (operation_type_) { case OperationType::DIV: + case OperationType::FLOOR_DIV: + case OperationType::FLOOR_MOD: case OperationType::MAXIMUM: case OperationType::MINIMUM: case OperationType::POW: @@ -2367,6 +2372,14 @@ std::unique_ptr NewOperationParser( return std::make_unique(OperationType::ELU); case kTfLiteBuiltinExp: return std::make_unique(OperationType::EXP); + case kTfLiteBuiltinFloor: + return std::make_unique(OperationType::FLOOR); + case kTfLiteBuiltinFloorDiv: + return std::make_unique( + OperationType::FLOOR_DIV); + case kTfLiteBuiltinFloorMod: + return std::make_unique( + OperationType::FLOOR_MOD); case kTfLiteBuiltinFullyConnected: return std::make_unique(); case kTfLiteBuiltinHardSwish: ",0,train f5b3248917c55e91f77df97dd86d1fe77eadb4e3,tensorflow/tensorflow,"Fix logistic_regression() summary name conflict. (#2446) Error when calling logistic_regression() more than once: Duplicate tag logistic_regression.X found in summary inputs",models.py,"@@ -137,8 +137,8 @@ def logistic_regression(X, uniform_unit_scaling_initialzer will be used. """""" with vs.variable_scope('logistic_regression'): - logging_ops.histogram_summary('logistic_regression.X', X) - logging_ops.histogram_summary('logistic_regression.y', y) + logging_ops.histogram_summary('%s.X' % vs.get_variable_scope().name, X) + logging_ops.histogram_summary('%s.y' % vs.get_variable_scope().name, y) # Set up the requested initialization. if init_mean is None: weights = vs.get_variable('weights', @@ -152,8 +152,8 @@ def logistic_regression(X, bias = vs.get_variable('bias', [y.get_shape()[-1]], initializer=init_ops.random_normal_initializer( init_mean, init_stddev)) - logging_ops.histogram_summary('logistic_regression.weights', weights) - logging_ops.histogram_summary('logistic_regression.bias', bias) + logging_ops.histogram_summary('%s.weights' % vs.get_variable_scope().name, weights) + logging_ops.histogram_summary('%s.bias' % vs.get_variable_scope().name, bias) # If no class weight provided, try to retrieve one from pre-defined # tensor name in the graph. if not class_weight: ",0,test b83bc10e831c44488d56220aec27117f8dc0cc3d,tensorflow/tensorflow,"Avoid divisions when the divisor is a power of two. PiperOrigin-RevId: 260738949",block_map.cc,"@@ -183,11 +183,11 @@ void MakeBlockMap(int rows, int cols, int depth, int kernel_rows, const int smallc = round_down_pot(cols >> num_blocks_of_cols_log2, kernel_cols); const int missr = - round_up_pot(rows - (smallr << num_blocks_of_rows_log2), kernel_rows) / - kernel_rows; + round_up_pot(rows - (smallr << num_blocks_of_rows_log2), kernel_rows) >> + floor_log2(kernel_rows); const int missc = - round_up_pot(cols - (smallc << num_blocks_of_cols_log2), kernel_cols) / - kernel_cols; + round_up_pot(cols - (smallc << num_blocks_of_cols_log2), kernel_cols) >> + floor_log2(kernel_cols); block_map->dims[Side::kLhs] = rows; block_map->dims[Side::kRhs] = cols; ",0,train 19c51a72a3199b1abbdd41a9b89a01c2aef31a78,tensorflow/tensorflow,"Eliminate pass through return values from tf_device.cluster op Values that are not defined in the cluster doesn't need to be returned from the cluster return op. Otherwise, any value with unsupported type will fail legalization in the phase 2. PiperOrigin-RevId: 366764596 Change-Id: I239b56dfb38cabdca6c487da992224822c2665cc",tf_device.cc,"@@ -677,6 +677,69 @@ bool ReplicateOp::WrapsSingleOp() { return BlockWrapsSingleOp(&GetBody()); } // Canonicalization patterns //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// tf_device.cluster +//===----------------------------------------------------------------------===// + +namespace { + +// Eliminates cluster op results that are not defined within the cluster and are +// defined outside. cluster op can be rewritten to remove those results. +static LogicalResult EliminatePassThroughResults(ClusterOp op, + PatternRewriter& rewriter) { + mlir::Block& body = op.GetBody(); + Operation* return_op = body.getTerminator(); + int num_results = return_op->getNumOperands(); + + // Values defined within the cluster. + llvm::SmallVector cluster_vals; + cluster_vals.reserve(num_results); + + // New results stores values to use while replacing the old cluster op. + llvm::SmallVector new_results; + new_results.reserve(num_results); + for (Value result : return_op->getOperands()) { + if (result.getParentBlock() == &body) { + // This result will be populated with the new result after rewriting the + // cluster op. + new_results.push_back(nullptr); + cluster_vals.push_back(result); + } else { + new_results.push_back(result); + } + } + + // Return failure if there are no pass through results and op is already + // canonical. + if (cluster_vals.size() == num_results) return failure(); + + // Rewrite return op in the cluster. + rewriter.setInsertionPoint(return_op); + auto new_return = + rewriter.replaceOpWithNewOp(return_op, cluster_vals); + + // Rewrite the cluster op. + rewriter.setInsertionPoint(op); + auto new_op = rewriter.create( + op->getLoc(), new_return.getOperandTypes(), op->getOperands(), + op->getAttrs()); + rewriter.inlineRegionBefore(op.getBodyRegion(), new_op.getBodyRegion(), + new_op.getBodyRegion().end()); + + int idx = 0; + for (Value& result : new_results) { + if (result == nullptr) result = new_op.getResult(idx++); + } + rewriter.replaceOp(op, new_results); + return success(); +} +} // anonymous namespace + +void ClusterOp::getCanonicalizationPatterns(OwningRewritePatternList& results, + MLIRContext* context) { + results.insert(EliminatePassThroughResults); +} + //===----------------------------------------------------------------------===// // tf_device.launch //===----------------------------------------------------------------------===// ",0,test 82f4a53775ba338dbaed0c329959d9ed53d428e4,tensorflow/tensorflow,"Clarify error message when batch size not divisible by num_replicas PiperOrigin-RevId: 277162934 Change-Id: Ic65ae36bdb78f2e75ee14e69d24ac4005b6a7dde",keras_utils_test.py,"@@ -665,8 +665,8 @@ class TestDistributionStrategyWithStaticShapes(test.TestCase, def test_input_batch_size_not_divisible_by_num_replicas(self, distribution): with distribution.scope(): with self.assertRaisesRegexp( - ValueError, 'The `batch_size` argument value 5 cannot be divisible ' - 'by number of replicas 2'): + ValueError, r'The `batch_size` argument \(5\) must be divisible by ' + r'the number of replicas \(2\)'): keras.layers.Input(shape=(3,), batch_size=5, name='input') @combinations.generate( ",0,train 82f4a53775ba338dbaed0c329959d9ed53d428e4,tensorflow/tensorflow,"Clarify error message when batch size not divisible by num_replicas PiperOrigin-RevId: 277162934 Change-Id: Ic65ae36bdb78f2e75ee14e69d24ac4005b6a7dde",input_layer.py,"@@ -72,8 +72,8 @@ class InputLayer(base_layer.Layer): if strategy and batch_size is not None and \ distributed_training_utils.global_batch_size_supported(strategy): if batch_size % strategy.num_replicas_in_sync != 0: - raise ValueError('The `batch_size` argument value {} cannot be ' - 'divisible by number of replicas {}'.format( + raise ValueError('The `batch_size` argument ({}) must be divisible by ' + 'the number of replicas ({})'.format( batch_size, strategy.num_replicas_in_sync)) batch_size = batch_size // strategy.num_replicas_in_sync ",0,train 82f4a53775ba338dbaed0c329959d9ed53d428e4,tensorflow/tensorflow,"Clarify error message when batch size not divisible by num_replicas PiperOrigin-RevId: 277162934 Change-Id: Ic65ae36bdb78f2e75ee14e69d24ac4005b6a7dde",training.py,"@@ -1894,8 +1894,8 @@ class Model(network.Network): # Check `batch_size` argument is consistent with InputLayer. if batch_size is not None: if batch_size % num_splits_for_ds != 0: - raise ValueError('The `batch_size` argument value {} cannot be ' - 'divisible by number of replicas {}'.format( + raise ValueError('The `batch_size` argument ({}) must be divisible ' + 'the by number of replicas ({})'.format( batch_size, num_splits_for_ds)) per_replica_batch_size = batch_size // num_splits_for_ds ",0,train 478594457f185c4651120cf20453158a04cdbefe,tensorflow/tensorflow,Move code to a function to make the code more clear.,mark_for_compilation_pass.cc,"@@ -1076,42 +1076,9 @@ StatusOr IsIdentityDrivingConstsInLoop(Node* node) { return true; } -Status MarkForCompilationPassImpl::FindCompilationCandidates() { - OptimizerOptions opts; - std::unique_ptr pflr( - new ProcessFunctionLibraryRuntime(nullptr, env_, /*config=*/nullptr, - TF_GRAPH_DEF_VERSION, flib_def_, opts)); - FunctionLibraryRuntime* lib_runtime = - pflr->GetFLR(ProcessFunctionLibraryRuntime::kDefaultFLRDevice); - std::vector compile_time_const_nodes(graph_->num_node_ids(), false); - TF_RETURN_IF_ERROR(BackwardsConstAnalysis( - *graph_, /*compile_time_const_arg_indices=*/nullptr, - &compile_time_const_nodes, lib_runtime)); - - // Iterate over nodes in sorted order so that compiler fuel is deterministic. - // We can't simply pass op_nodes().begin() and op_nodes().end() to the - // std::vector constructor because they're not proper iterators, with - // iterator_traits defined and so on. - std::vector sorted_nodes; - for (Node* node : graph_->op_nodes()) { - sorted_nodes.push_back(node); - } - std::sort(sorted_nodes.begin(), sorted_nodes.end(), NodeComparatorID()); - - if (*debug_options_.fuel >= std::numeric_limits::max() / 2) { - // The assumption is that if fuel started out as INT64_MAX, it will forever - // stay greater than INT64_MAX / 2. - VLOG(2) << ""Starting fuel: infinity""; - } else { - VLOG(2) << ""Starting fuel: "" << *debug_options_.fuel; - } - - VLOG(2) << ""sorted_nodes.size() = "" << sorted_nodes.size(); - +std::unique_ptr> GetWhitelist() { MarkForCompilationPassFlags* flags = GetMarkForCompilationPassFlags(); - absl::flat_hash_set whitelist; - auto vall_ops = XlaOpRegistry::GetAllRegisteredOps(); - absl::flat_hash_set all_ops(vall_ops.begin(), vall_ops.end()); + auto whitelist = absl::WrapUnique(new absl::flat_hash_set()); for (auto s : absl::StrSplit(flags->tf_xla_supported_nodes, "","")) { bool fusible = s == ""FUSIBLE""; @@ -1119,7 +1086,7 @@ Status MarkForCompilationPassImpl::FindCompilationCandidates() { if (s == ""PW"" || fusible) { added = true; // Unary - whitelist.insert( + whitelist->insert( {""ComplexAbs"", ""Angle"", ""Conj"", ""Abs"", ""Acos"", ""Acosh"", ""Asin"", ""Atan"", ""Atanh"", ""Ceil"", ""Cos"", ""Cosh"", ""Sin"", ""Exp"", ""Expm1"", ""Floor"", ""IsFinite"", ""IsInf"", ""IsNan"", ""Inv"", ""Reciprocal"", ""Log"", @@ -1147,27 +1114,27 @@ Status MarkForCompilationPassImpl::FindCompilationCandidates() { } if (s == ""RED"" || fusible) { added = true; - whitelist.insert({""All"", ""Any"", ""Min"", ""Max"", ""Mean"", ""Prod"", ""Sum""}); + whitelist->insert({""All"", ""Any"", ""Min"", ""Max"", ""Mean"", ""Prod"", ""Sum""}); } if (s == ""PWRED"" || fusible) { added = true; - whitelist.insert({""ArgMax"", ""ArgMin"", ""DiagPart"", ""Softmax"", + whitelist->insert({""ArgMax"", ""ArgMin"", ""DiagPart"", ""Softmax"", ""SparseSoftmaxCrossEntropyWithLogits"", ""LogSoftmax""}); } if (s == ""REDUCEWINDOW"" || fusible) { added = true; - whitelist.insert({""MaxPoolV2"", ""MaxPool3D"", ""AvgPool"", ""AvgPool3D"", + whitelist->insert({""MaxPoolV2"", ""MaxPool3D"", ""AvgPool"", ""AvgPool3D"", ""MaxPoolGrad"", ""MaxPool3DGrad"", ""AvgPoolGrad"", ""AvgPool3DGrad"", ""MaxPoolGradGrad"", ""MaxPoolGradGradV2"", ""MaxPool3DGradGrad""}); } if (s == ""REDUCEWINDOPW"" || fusible) { added = true; - whitelist.insert({""LRN"", ""LRNGrad""}); + whitelist->insert({""LRN"", ""LRNGrad""}); } if (s == ""BN"" || fusible) { added = true; - whitelist.insert({""FusedBatchNorm"", ""FusedBatchNormV2"", + whitelist->insert({""FusedBatchNorm"", ""FusedBatchNormV2"", ""FusedBatchNormV3"", ""_FusedBatchNormEx"", ""FusedBatchNormGrad"", ""FusedBatchNormGradV2"", ""FusedBatchNormGradV3""}); @@ -1176,7 +1143,7 @@ Status MarkForCompilationPassImpl::FindCompilationCandidates() { // Fill => Broadcast // BroadcastTo => Broadcast + maybe Reshape added = true; - whitelist.insert({""BroadcastTo"", + whitelist->insert({""BroadcastTo"", ""ExpandDims"", ""Fill"", ""Max"", @@ -1222,21 +1189,61 @@ Status MarkForCompilationPassImpl::FindCompilationCandidates() { } if (!added && s.size() > 0) { - if (!all_ops.contains(string(s))) { - return errors::InvalidArgument( - ""The operation '"", s, - ""' passed to --tf_xla_supported_nodes is not supported by XLA.""); - } - whitelist.insert(string(s)); + whitelist->insert(string(s)); } } - if (VLOG_IS_ON(2) && whitelist.size() > 0) { - std::vector vwhitelist(whitelist.begin(), whitelist.end()); + if (VLOG_IS_ON(2) && whitelist->size() > 0) { + std::vector vwhitelist(whitelist->begin(), whitelist->end()); std::sort(vwhitelist.begin(), vwhitelist.end()); VLOG(2) << ""XLA clustering will only consider the following TF operations: "" << absl::StrJoin(vwhitelist, "" ""); } + return whitelist; +} + +Status MarkForCompilationPassImpl::FindCompilationCandidates() { + OptimizerOptions opts; + std::unique_ptr pflr( + new ProcessFunctionLibraryRuntime(nullptr, env_, /*config=*/nullptr, + TF_GRAPH_DEF_VERSION, flib_def_, opts)); + FunctionLibraryRuntime* lib_runtime = + pflr->GetFLR(ProcessFunctionLibraryRuntime::kDefaultFLRDevice); + std::vector compile_time_const_nodes(graph_->num_node_ids(), false); + TF_RETURN_IF_ERROR(BackwardsConstAnalysis( + *graph_, /*compile_time_const_arg_indices=*/nullptr, + &compile_time_const_nodes, lib_runtime)); + // Iterate over nodes in sorted order so that compiler fuel is deterministic. + // We can't simply pass op_nodes().begin() and op_nodes().end() to the + // std::vector constructor because they're not proper iterators, with + // iterator_traits defined and so on. + std::vector sorted_nodes; + for (Node* node : graph_->op_nodes()) { + sorted_nodes.push_back(node); + } + std::sort(sorted_nodes.begin(), sorted_nodes.end(), NodeComparatorID()); + + if (*debug_options_.fuel >= std::numeric_limits::max() / 2) { + // The assumption is that if fuel started out as INT64_MAX, it will forever + // stay greater than INT64_MAX / 2. + VLOG(2) << ""Starting fuel: infinity""; + } else { + VLOG(2) << ""Starting fuel: "" << *debug_options_.fuel; + } + + VLOG(2) << ""sorted_nodes.size() = "" << sorted_nodes.size(); + + auto whitelist = GetWhitelist(); + + auto vall_ops = XlaOpRegistry::GetAllRegisteredOps(); + absl::flat_hash_set all_ops(vall_ops.begin(), vall_ops.end()); + for (auto s = whitelist->begin(); s != whitelist->end(); ++s) { + if (!all_ops.contains(string(*s))) { + return errors::InvalidArgument( + ""The operation '"", *s, + ""' passed to --tf_xla_supported_nodes is not supported by XLA.""); + } + } for (Node* node : sorted_nodes) { if (*debug_options_.fuel <= 0) { @@ -1275,7 +1282,7 @@ Status MarkForCompilationPassImpl::FindCompilationCandidates() { continue; } - if (whitelist.size() > 0 && !whitelist.contains(node->def().op())) { + if (whitelist->size() > 0 && !whitelist->contains(node->def().op())) { VLOG(1) << ""Rejecting "" << node->name() << "" as is was not listed in --tf_xla_supported_nodes.""; continue; ",0,train 609a60b44bbf934b31a1dce4f0aa84e731b83c35,tensorflow/tensorflow,Refactor AutoCastVariable tests to rely on strategy_combinations,autocast_variable_test.py,"@@ -17,20 +17,23 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import contextlib import os from absl.testing import parameterized import numpy as np from tensorflow.python import tf2 +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import distribution_strategy_context as ds_context from tensorflow.python.distribute import mirrored_strategy +from tensorflow.python.distribute import strategy_combinations from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import indexed_slices from tensorflow.python.framework import ops -from tensorflow.python.keras import combinations from tensorflow.python.keras.mixed_precision.experimental import autocast_variable from tensorflow.python.keras.optimizer_v2 import gradient_descent as gradient_descent_v2 from tensorflow.python.ops import array_ops @@ -40,30 +43,17 @@ from tensorflow.python.platform import test from tensorflow.python.training import gradient_descent as gradient_descent_v1 from tensorflow.python.training.tracking import util as trackable_utils -TESTCASES = ({ - 'testcase_name': 'base', - 'distribute': False -}, { - 'testcase_name': 'distribute', - 'distribute': True -}) - - -def get_distribute_scope(distribute): - - class DummyContextManager(object): - - def __enter__(self): - pass - - def __exit__(self, *args): - pass - - if distribute: - return mirrored_strategy.MirroredStrategy(['cpu:0']).scope() - else: - return DummyContextManager() +class DummyStrategy(object): + @contextlib.contextmanager + def scope(self): + yield +maybe_distribute = combinations.combine( + distribution=[ + combinations.NamedDistribution( + ""Dummy"", lambda: DummyStrategy(), required_gpus=None), + strategy_combinations.mirrored_strategy_with_cpu_1_and_2 + ]) def get_var(val, dtype, name=None): return variables.VariableV1(val, use_resource=True, dtype=dtype, name=name) @@ -71,10 +61,13 @@ def get_var(val, dtype, name=None): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) class AutoCastVariableTest(test.TestCase, parameterized.TestCase): + def setUp(self): + strategy_combinations.set_virtual_cpus_to_at_least(3) + super(AutoCastVariableTest, self).setUp() - @parameterized.named_parameters(*TESTCASES) - def test_read(self, distribute): - with get_distribute_scope(distribute): + @combinations.generate(maybe_distribute) + def test_read(self, distribution): + with distribution.scope(): x = get_var(1., dtypes.float32) x = autocast_variable.create_autocast_variable(x) self.evaluate(x.initializer) @@ -116,9 +109,9 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): self.assertEqual(x.sparse_read([0]).dtype, dtypes.float16) self.assertEqual(x.gather_nd([0]).dtype, dtypes.float16) - @parameterized.named_parameters(*TESTCASES) - def test_read_nested_scopes(self, distribute): - with get_distribute_scope(distribute): + @combinations.generate(maybe_distribute) + def test_read_nested_scopes(self, distribution): + with distribution.scope(): x = get_var(1., dtypes.float32) x = autocast_variable.create_autocast_variable(x) self.evaluate(x.initializer) @@ -136,9 +129,9 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): self.assertEqual(x.dtype, dtypes.float16) self.assertEqual(x.read_value().dtype, dtypes.float16) - @parameterized.named_parameters(*TESTCASES) - def test_dtype_is_not_string(self, distribute): - with get_distribute_scope(distribute): + @combinations.generate(maybe_distribute) + def test_dtype_is_not_string(self, distribution): + with distribution.scope(): x = get_var(1., dtypes.float32) x = autocast_variable.create_autocast_variable(x) self.assertEqual(x.dtype, dtypes.float32) @@ -153,13 +146,13 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): self.assertEqual(x.true_dtype, dtypes.float32) self.assertIsInstance(x.true_dtype, dtypes.DType) - @parameterized.named_parameters(*TESTCASES) - def test_method_delegations(self, distribute): + @combinations.generate(maybe_distribute) + def test_method_delegations(self, distribution): # Test AutoCastVariable correctly delegates Variable methods to the # underlying variable. - with self.test_session(), get_distribute_scope(distribute): + with self.test_session(), distribution.scope(): for read_dtype in (dtypes.float32, dtypes.float16): - if distribute: + if ds_context.has_strategy(): # MirroredVariable.assign will (incorrectly) return a Mirrored value # instead of a MirroredVariable. So we cannot properly wrap it in an # AutoCastVariable. @@ -183,14 +176,14 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): self.assertEqual(x.aggregation, x._variable.aggregation) self.assertEqual(self.evaluate(x.initialized_value()), 7) if not context.executing_eagerly(): - if not distribute: + if not ds_context.has_strategy(): # These functions are not supported for DistributedVariables x.load(9) self.assertEqual(x.eval(), 9) self.assertEqual(self.evaluate(x.initial_value), 7) self.assertEqual(x.op, x._variable.op) self.assertEqual(x.graph, x._variable.graph) - if not distribute: + if not ds_context.has_strategy(): # These attributes are not supported for DistributedVariables self.assertIsNone(x.constraint) self.assertEqual(x.initializer, x._variable.initializer) @@ -202,7 +195,7 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): self.assertEqual(x.shape, ()) self.assertEqual(x.get_shape(), ()) - if not distribute: + if not ds_context.has_strategy(): # Test scatter_* methods. These are not supported for # DistributedVariables x = get_var([7, 8], dtypes.float32) @@ -233,9 +226,9 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): self.assertAllEqual( evaluate(x.scatter_nd_update([[0], [1]], [1., 2.])), [1, 2]) - @parameterized.named_parameters(*TESTCASES) - def test_operator_overloads(self, distribute): - with get_distribute_scope(distribute): + @combinations.generate(maybe_distribute) + def test_operator_overloads(self, distribution): + with distribution.scope(): for read_dtype in (dtypes.float32, dtypes.float16): x = get_var(7., dtypes.float32) x = autocast_variable.create_autocast_variable(x) @@ -280,9 +273,9 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): self.assertAllEqual(x == [7., 8., 10.], [True, True, False]) self.assertAllEqual(x != [7., 8., 10.], [False, False, True]) - @parameterized.named_parameters(*TESTCASES) - def test_assign(self, distribute): - with get_distribute_scope(distribute): + @combinations.generate(maybe_distribute) + def test_assign(self, distribution): + with distribution.scope(): x = get_var(0., dtypes.float32) x = autocast_variable.create_autocast_variable(x) self.evaluate(x.initializer) @@ -318,18 +311,19 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): self.assertAllClose(3., self.evaluate(x.assign_sub(3.))) # Assign multiple times - assign = x.assign(1.) - self.assertAllClose(1., self.evaluate(assign)) - self.assertAllClose(0., self.evaluate(assign.assign(0.))) - assign_add = x.assign_add(3.) - self.assertAllClose(3., self.evaluate(assign_add)) - self.assertAllClose(3. * 3, - self.evaluate(x.assign_add(3.).assign_add(3.))) - self.assertAllClose(3. * 3, x) - assign_sub = x.assign_sub(3.) - self.assertAllClose(3. * 2, self.evaluate(assign_sub)) - self.assertAllClose(0., - self.evaluate(x.assign_sub(3.).assign_sub(3.))) + if not ds_context.has_strategy(): + assign = x.assign(1.) + self.assertAllClose(1., self.evaluate(assign)) + self.assertAllClose(0., self.evaluate(assign.assign(0.))) + assign_add = x.assign_add(3.) + self.assertAllClose(3., self.evaluate(assign_add)) + self.assertAllClose(3. * 3, + self.evaluate(x.assign_add(3.).assign_add(3.))) + self.assertAllClose(3. * 3, x) + assign_sub = x.assign_sub(3.) + self.assertAllClose(3. * 2, self.evaluate(assign_sub)) + self.assertAllClose(0., + self.evaluate(x.assign_sub(3.).assign_sub(3.))) # Assign with read_value=False self.assertIsNone(self.evaluate(x.assign(1., read_value=False))) @@ -355,9 +349,9 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): # assign still expect float32 value even if in float16 scope run_and_check() - @parameterized.named_parameters(*TESTCASES) - def test_assign_stays_in_true_dtype(self, distribute): - with get_distribute_scope(distribute): + @combinations.generate(maybe_distribute) + def test_assign_stays_in_true_dtype(self, distribution): + with distribution.scope(): x = get_var(1., dtypes.float32) x = autocast_variable.create_autocast_variable(x) self.evaluate(x.initializer) @@ -382,10 +376,10 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): self.assertEqual(1., self.evaluate(x.value())) self.assertEqual(1. + small_val, self.evaluate(x.value())) - @parameterized.named_parameters(*TESTCASES) - def test_checkpoint(self, distribute): + @combinations.generate(maybe_distribute) + def test_checkpoint(self, distribution): with self.test_session(): - with get_distribute_scope(distribute): + with distribution.scope(): x = get_var(1., dtypes.float32) x = autocast_variable.create_autocast_variable(x) self.evaluate(x.initializer) @@ -398,9 +392,9 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): checkpoint.restore(save_path).assert_consumed().run_restore_ops() self.assertEqual(self.evaluate(x), 123.) - @parameterized.named_parameters(*TESTCASES) - def test_invalid_wrapped_variable(self, distribute): - with get_distribute_scope(distribute): + @combinations.generate(maybe_distribute) + def test_invalid_wrapped_variable(self, distribution): + with distribution.scope(): # Wrap a non-variable with self.assertRaisesRegexp(ValueError, 'variable must be of type'): x = constant_op.constant([1.], dtype=dtypes.float32) @@ -443,7 +437,7 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): ) def test_repr_distributed(self): - with get_distribute_scope(distribute=True): + with mirrored_strategy.MirroredStrategy([""/cpu:1"", ""/cpu:2""]).scope(): x = get_var(1., dtypes.float32) x = autocast_variable.create_autocast_variable(x) self.assertRegexpMatches( ",0,train cd8dd009b31f80344bd69f8dc5b404b5189646a5,tensorflow/tensorflow,"Expose pipeline as command line option for TPU graph exporting pipeline This pipeline is ran post tf-tpu-bridge and tf-tpu-bridge-v1 during RunTPUBridge, enables running the equivalent of that function via tf-opt without specifying individual passes. PiperOrigin-RevId: 359855632 Change-Id: I6f1287f8dcbf0c368c15a626fa0bec537c814559",bridge.cc,"@@ -39,21 +39,8 @@ void EnableLogging(PassManager *pm) { } // namespace namespace TFTPU { -namespace { -void AddGraphExportLoweringPasses(OpPassManager &pm) { - auto add_pass = [&](std::unique_ptr pass) { - pm.addNestedPass(std::move(pass)); - pm.addPass(CreateBreakUpIslandsPass()); - }; - - add_pass(CreateFunctionalToExecutorDialectConversionPass()); - add_pass(TFDevice::CreateReplicateToIslandPass()); - add_pass(TFDevice::CreateParallelExecuteToIslandsPass()); - add_pass(TFDevice::CreateLaunchToDeviceAttributePass()); - pm.addNestedPass(CreateTPUDevicePropagationPass()); - pm.addPass(createSymbolDCEPass()); -} +namespace { tensorflow::Status RunTPUBridge( ModuleOp module, bool enable_logging, llvm::function_ref pipeline_builder) { @@ -68,7 +55,7 @@ tensorflow::Status RunTPUBridge( pipeline_builder(bridge); // Add set of passes to lower back to graph (from tf_executor). - AddGraphExportLoweringPasses(bridge); + TF::AddGraphExportLoweringPasses(bridge); // Run the bridge on the module, in case of failure, the `diag_handler` // converts MLIR errors emitted to the MLIRContext into a tensorflow::Status. @@ -166,6 +153,20 @@ tensorflow::Status TPUBridgeV1Compat(ModuleOp module, bool enable_logging) { namespace TF { +void AddGraphExportLoweringPasses(OpPassManager &pm) { + auto add_pass = [&](std::unique_ptr pass) { + pm.addNestedPass(std::move(pass)); + pm.addPass(CreateBreakUpIslandsPass()); + }; + + add_pass(CreateFunctionalToExecutorDialectConversionPass()); + add_pass(TFDevice::CreateReplicateToIslandPass()); + add_pass(TFDevice::CreateParallelExecuteToIslandsPass()); + add_pass(TFDevice::CreateLaunchToDeviceAttributePass()); + pm.addNestedPass(TFTPU::CreateTPUDevicePropagationPass()); + pm.addPass(createSymbolDCEPass()); +} + tensorflow::Status RunBridgeWithStandardPipeline(ModuleOp module, bool enable_logging, bool enable_inliner) { ",0,train cd8dd009b31f80344bd69f8dc5b404b5189646a5,tensorflow/tensorflow,"Expose pipeline as command line option for TPU graph exporting pipeline This pipeline is ran post tf-tpu-bridge and tf-tpu-bridge-v1 during RunTPUBridge, enables running the equivalent of that function via tf-opt without specifying individual passes. PiperOrigin-RevId: 359855632 Change-Id: I6f1287f8dcbf0c368c15a626fa0bec537c814559",bridge_pass.cc,"@@ -20,20 +20,32 @@ limitations under the License. #include ""tensorflow/compiler/mlir/tensorflow/transforms/passes.h"" #include ""tensorflow/compiler/mlir/tensorflow/utils/error_util.h"" +namespace mlir { +namespace TFTPU { +extern void AddGraphExportLoweringPasses(OpPassManager &pm); +} // namespace TFTPU +} // namespace mlir + namespace { -// Registers an existing pipeline builder function. +// Registers a pipeline builder function for TF TPU bridge. mlir::PassPipelineRegistration<> tpu_pipeline( ""tf-tpu-bridge"", ""Run all the passes involved in transforming the graph before execution so "" ""that it is suitable for targeting TPUs."", mlir::TFTPU::CreateTPUBridgePipeline); -// Registers an existing pipeline builder function. +// Registers a pipeline builder function for TF TPU V1 bridge. mlir::PassPipelineRegistration<> tpu_pipeline_v1( ""tf-tpu-bridge-v1"", ""Run all the passes involved in transforming a TensorFlow V1 graph before "" ""execution so that it is suitable for targeting TPUs."", mlir::TFTPU::CreateTPUBridgePipelineV1); +// Registers a pipeline builder function for TF Graph export. +mlir::PassPipelineRegistration<> tpu_export( + ""tf-graph-export"", + ""Run passes to prepare for exporting module back to TF Graph."", + mlir::TF::AddGraphExportLoweringPasses); + } // anonymous namespace ",0,train cd8dd009b31f80344bd69f8dc5b404b5189646a5,tensorflow/tensorflow,"Expose pipeline as command line option for TPU graph exporting pipeline This pipeline is ran post tf-tpu-bridge and tf-tpu-bridge-v1 during RunTPUBridge, enables running the equivalent of that function via tf-opt without specifying individual passes. PiperOrigin-RevId: 359855632 Change-Id: I6f1287f8dcbf0c368c15a626fa0bec537c814559",passes.h,"@@ -203,6 +203,10 @@ std::unique_ptr> CreateCrossHostTransferPass(); // will replicate the tf.Const op once for each device. std::unique_ptr> CreateConstantOpDeviceAssignmentPass(); +// Populates the supplied passmanager with the passes required to export +// to TensorFlow Graph. +void AddGraphExportLoweringPasses(OpPassManager& pm); + } // namespace TF namespace tf_executor { ",0,train 44acd839c57494860666c799afd24360f1df3bed,tensorflow/tensorflow,"Fix reported cuDNN default version during configuration. PiperOrigin-RevId: 215272308",configure.py,"@@ -884,7 +884,7 @@ def set_tf_cudnn_version(environ_cp): """"""Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION."""""" ask_cudnn_version = ( 'Please specify the cuDNN version you want to use. ' - '[Leave empty to default to cuDNN %s.0]: ') % _DEFAULT_CUDNN_VERSION + '[Leave empty to default to cuDNN %s]: ') % _DEFAULT_CUDNN_VERSION for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): tf_cudnn_version = get_from_env_or_user_or_default( ",0,train 48eb150c8e044e233b60c9c65681aaba00f083b6,tensorflow/tensorflow,"Fix the comment about where the weak declaration of AcquireXNNPACKDelegate could be found, and combined code sections w.r.t TFLITE_BUILD_WITH_XNNPACK_DELEGATE macro. PiperOrigin-RevId: 364983825 Change-Id: I58400bd0023b5e09a84f68872a6f9199d4edd0bb",tflite_with_xnnpack.cc,"@@ -18,7 +18,8 @@ limitations under the License. #include ""tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"" namespace tflite { -// Corresponding weak declaration found in lite/interpreter_builder.cc. +// Corresponding weak declaration found in lite/tflite_with_xnnpack_optional.cc +// when TFLITE_BUILD_WITH_XNNPACK_DELEGATE macro isn't defined. std::unique_ptr AcquireXNNPACKDelegate(int num_threads) { auto opts = TfLiteXNNPackDelegateOptionsDefault(); ",0,train 48eb150c8e044e233b60c9c65681aaba00f083b6,tensorflow/tensorflow,"Fix the comment about where the weak declaration of AcquireXNNPACKDelegate could be found, and combined code sections w.r.t TFLITE_BUILD_WITH_XNNPACK_DELEGATE macro. PiperOrigin-RevId: 364983825 Change-Id: I58400bd0023b5e09a84f68872a6f9199d4edd0bb",tflite_with_xnnpack_optional.cc,"@@ -28,16 +28,6 @@ namespace tflite { using TfLiteDelegatePtr = std::unique_ptr; -#ifndef TFLITE_BUILD_WITH_XNNPACK_DELEGATE -// Using weak symbols to create a delegate allows automatic injection of the -// delegate simply by adding it as a dependency. See the strong override in -// lite/tflite_with_xnnpack.cc, -TFLITE_ATTRIBUTE_WEAK TfLiteDelegatePtr -AcquireXNNPACKDelegate(int num_threads) { - return TfLiteDelegatePtr(nullptr, [](TfLiteDelegate*) {}); -} -#endif - #ifdef TFLITE_BUILD_WITH_XNNPACK_DELEGATE TfLiteDelegatePtr MaybeCreateXNNPACKDelegate(int num_threads) { auto opts = TfLiteXNNPackDelegateOptionsDefault(); @@ -47,6 +37,14 @@ TfLiteDelegatePtr MaybeCreateXNNPACKDelegate(int num_threads) { TfLiteXNNPackDelegateDelete); } #else +// Using weak symbols to create a delegate allows automatic injection of the +// delegate simply by adding it as a dependency. See the strong override in +// lite/tflite_with_xnnpack.cc, +TFLITE_ATTRIBUTE_WEAK TfLiteDelegatePtr +AcquireXNNPACKDelegate(int num_threads) { + return TfLiteDelegatePtr(nullptr, [](TfLiteDelegate*) {}); +} + TfLiteDelegatePtr MaybeCreateXNNPACKDelegate(int num_threads) { return AcquireXNNPACKDelegate(num_threads); } ",0,train b03209bbc26f8947cbd1d49e1a232c09e7dcd17a,tensorflow/tensorflow,"Bugfix to error message reporting when portpicker is not available. PiperOrigin-RevId: 156895715",test_util.py,"@@ -29,9 +29,11 @@ import threading import numpy as np import six +_portpicker_import_error = None try: import portpicker # pylint: disable=g-import-not-at-top -except ImportError as _portpicker_import_error: +except ImportError as _error: + _portpicker_import_error = _error portpicker = None # pylint: disable=g-import-not-at-top @@ -820,8 +822,8 @@ def create_local_cluster(num_workers, num_ps, protocol=""grpc""): Raises: ImportError: if portpicker module was not found at load time """""" - if not portpicker: - raise _portpicker_import_error + if _portpicker_import_error: + raise _portpicker_import_error # pylint: disable=raising-bad-type worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)] ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)] cluster_dict = { ",0,test f8ac6c10fd9660b0575dcdb2eb3bc1d6ac90f399,tensorflow/tensorflow,"Switch from all_reduce_merge_scope to num_packs; generate single pack. `MultiWorkerMirroredStrategy` packs gradients into chunks of size `all_reduce_merge_scope` via the `ScopedAllocator` Grappler optimizer so that we can convert many small all-reduces into fewer larger all-reduces. This change switches the knob from merge scope to `num_packs`, essentially identical to `MirroredStrategy`. We also set default value to 1, like `MirroredStrategy`. The optimal value of `num_packs` may be dependent on various factors, including the choice of all reduce implementation. PiperOrigin-RevId: 265724850",cross_device_ops.py,"@@ -1007,21 +1007,19 @@ class CollectiveAllReduce(CrossDeviceOps): def __init__(self, num_workers=1, num_gpus_per_worker=0, - all_reduce_merge_scope=32, + num_packs=1, collective_keys=None): """"""Initializes the object. Args: num_workers: number of workers in the between-graph replicated training. num_gpus_per_worker: number of GPUs per worker. - all_reduce_merge_scope: size of groups into which to partition consecutive - gradients grouped under a common 'allreduce' name scope. This is useful - for some optimization of collective ops. + num_packs: gradients will be packed into `num_packs` chunks. collective_keys: an optional CollectiveKey object. """""" self._num_workers = num_workers self._num_gpus_per_worker = num_gpus_per_worker - self._all_reduce_merge_scope = all_reduce_merge_scope + self._num_packs = num_packs self._collective_keys = (collective_keys or cross_device_utils.CollectiveKeys()) super(CollectiveAllReduce, self).__init__() @@ -1075,21 +1073,31 @@ class CollectiveAllReduce(CrossDeviceOps): for t, v in value_destination_pairs ] - def _make_gradient_chunks(self, per_replica_values, all_reduce_merge_scope): + def _make_gradient_chunks(self, per_replica_values, num_packs): """"""Make `per_replica_values` into chunks."""""" - grouped_by_device = _group_value_by_device(per_replica_values) - - grouped_by_var = list(zip(*grouped_by_device)) - # grouped_by_var is grouped by variables and takes the following format: + chunked_by_device = _group_value_by_device(per_replica_values) + chunked_by_var = list(zip(*chunked_by_device)) + # chunked_by_var is chunked by variables and takes the following format: # [((grad0_gpu0, v0_gpu0), (grad0_gpu1, v0_gpu1), (grad0_gpu2, v0_gpu2) ..), # ((grad1_gpu0, v1_gpu0), (grad1_gpu1, v1_gpu1), (grad1_gpu0, v1_gpu2) ..), # ((grad2_gpu0, v2_gpu0), (grad2_gpu1, v2_gpu1), (grad2_gpu0, v2_gpu2) ..), # ... # ] + + # First n-1 chunks get `chunk_size` grads, last chunk gets leftover grads. + # This strategy can cause the last chunk to have larger size compared to the + # first n-1 chunks. Alternatively, we can increment chunk_size by 1 to get + # slightly larger first n-1 chunks and smaller last chunk. + # TODO(ayushd): compare different packing strategies. + chunk_size = len(chunked_by_var) // num_packs + leftover_size = len(chunked_by_var) - chunk_size * (num_packs - 1) + assert leftover_size > 0 chunked_gv = [ - grouped_by_var[x:x + all_reduce_merge_scope] - for x in range(0, len(grouped_by_var), all_reduce_merge_scope) + chunked_by_var[x:x + chunk_size] + for x in range(0, len(chunked_by_var) - leftover_size, chunk_size) ] + chunked_gv.append(chunked_by_var[-leftover_size:]) + return chunked_gv def _batch_all_reduce(self, reduce_op, per_replica_values): @@ -1115,11 +1123,13 @@ class CollectiveAllReduce(CrossDeviceOps): logging.INFO, ""Collective batch_all_reduce: %d all-reduces, "" ""num_workers = %d"" % (len(per_replica_values), self._num_workers), 10) - chunked_gv = self._make_gradient_chunks(per_replica_values, - self._all_reduce_merge_scope) + chunked_gv = self._make_gradient_chunks(per_replica_values, self._num_packs) reduced_gv_list = [] for chunk in chunked_gv: + # By placing all collective ops in a chunk under single name scope, we + # ensure they will be picked up by the `ScopedAllocator` grappler + # optimizer and packed into a single all-reduce. with ops.name_scope(""allreduce""): for grad_and_vars in chunk: # Gradients for the same variable but from different devices. @@ -1147,8 +1157,7 @@ class CollectiveAllReduce(CrossDeviceOps): ""%d all-reduces, num_workers = %d"" % (len(per_replica_values), self._num_workers), 10) - chunked_gv = self._make_gradient_chunks(per_replica_values, - self._all_reduce_merge_scope) + chunked_gv = self._make_gradient_chunks(per_replica_values, self._num_packs) reduced_gv_list = [] for chunk in chunked_gv: ",0,train e6dbfb7a221563336ed3c28178c6e908aa8a6943,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2021-01-26 PiperOrigin-RevId: 353817494 Change-Id: Id78484cbae933b3f44384241a4939fa5c94fa7d6",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 1, 25) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 1, 26) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train c40c656e78203afa78797dad5529f1b0f1e69519,tensorflow/tensorflow,"Fix OpKernel access issue to coordination service when executing eager functions. PiperOrigin-RevId: 387614843 Change-Id: I7bddb8e75180878a5d2ac28c6f525f6c658d7007",executor.cc,"@@ -366,6 +366,7 @@ class ExecutorState { const ImmutableExecutorState& immutable_state_; ExecutorImpl::KernelStats* const kernel_stats_; CancellationManager* cancellation_manager_; + CoordinationServiceAgent* coordination_service_agent_; // If not null, use this device to schedule intra-op operation std::unique_ptr user_device_; Executor::Args::Runner runner_; @@ -413,6 +414,7 @@ ExecutorState::ExecutorState( immutable_state_(immutable_state), kernel_stats_(kernel_stats), cancellation_manager_(args.cancellation_manager), + coordination_service_agent_(args.coordination_service_agent), runner_(args.runner), sync_on_finish_(args.sync_on_finish), run_all_kernels_inline_(args.run_all_kernels_inline), @@ -706,6 +708,7 @@ void ExecutorState::Process(TaggedNode tagged_node, params.session_metadata = session_metadata_; params.tensor_store = tensor_store_; params.cancellation_manager = cancellation_manager_; + params.coordination_service_agent = coordination_service_agent_; params.call_frame = call_frame_; params.function_library = immutable_state_.params().function_library; params.resource_manager = device->resource_manager(); ",0,train c40c656e78203afa78797dad5529f1b0f1e69519,tensorflow/tensorflow,"Fix OpKernel access issue to coordination service when executing eager functions. PiperOrigin-RevId: 387614843 Change-Id: I7bddb8e75180878a5d2ac28c6f525f6c658d7007",op_kernel.h,"@@ -669,7 +669,7 @@ class OpKernelContext { bool* outputs_required_array = nullptr; // For access to distributed coordination service. - CoordinationServiceAgent* coordination_service_agent; + CoordinationServiceAgent* coordination_service_agent = nullptr; }; // params must outlive the OpKernelContext. ",0,train 448a16182065bd08a202d9057dd8ca541e67996c,tensorflow/tensorflow,"Prevent stack overflow when FunctionLib in GraphDef has a self-recursive function. It is likely that no recursivity is supported, but we should handle this separately. PiperOrigin-RevId: 414860329 Change-Id: I02a2270e86282b37362ddd485eeef16fb986a9e0",loader.cc,"@@ -25,6 +25,7 @@ limitations under the License. #include ""tensorflow/core/framework/attr_value.pb.h"" #include ""tensorflow/core/framework/function.pb.h"" #include ""tensorflow/core/framework/node_def.pb.h"" +#include ""tensorflow/core/framework/op_def.pb.h"" #include ""tensorflow/core/framework/tensor.pb.h"" #include ""tensorflow/core/lib/io/path.h"" #include ""tensorflow/core/lib/monitoring/counter.h"" @@ -99,6 +100,19 @@ static Status ValidateNode(const NodeDef& node) { return Status::OK(); } +static Status ValidateFunctionNotRecursive(const FunctionDef& function) { + const auto& function_name = function.signature().name(); + for (const auto& node : function.node_def()) { + if (node.op() == function_name) { + return errors::FailedPrecondition( + ""Function "", function_name, + "" is self recursive and TensorFlow does not support this scenario.""); + } + } + + return Status::OK(); +} + static Status ValidateSavedTensors(const GraphDef& graph_def) { for (const auto& node : graph_def.node()) { TF_RETURN_IF_ERROR(ValidateNode(node)); @@ -110,6 +124,10 @@ static Status ValidateSavedTensors(const GraphDef& graph_def) { for (const auto& node : function.node_def()) { TF_RETURN_IF_ERROR(ValidateNode(node)); } + + // Also check that there is no recursivity in the library + // TODO(mihaimaruseac): Do more than self-recursivity + TF_RETURN_IF_ERROR(ValidateFunctionNotRecursive(function)); } } ",0,test c07a6e6568b776037f052bc0d385a509ec2647aa,tensorflow/tensorflow,"Add Timestamp Op which returns the current timestamp during graph execution PiperOrigin-RevId: 186342760",logging_ops.cc,"@@ -90,4 +90,23 @@ class PrintOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name(""Print"").Device(DEVICE_CPU), PrintOp); +class TimestampOp : public OpKernel { + public: + explicit TimestampOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { + TensorShape output_shape; // Default shape is 0 dim, 1 element + Tensor* output_tensor = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, output_shape, &output_tensor)); + + auto output_scalar = output_tensor->scalar(); + double now_us = static_cast(Env::Default()->NowMicros()); + double now_s = now_us / 1000000; + output_scalar() = now_s; + } +}; + +REGISTER_KERNEL_BUILDER(Name(""Timestamp"").Device(DEVICE_CPU), TimestampOp); + } // end namespace tensorflow ",0,train c07a6e6568b776037f052bc0d385a509ec2647aa,tensorflow/tensorflow,"Add Timestamp Op which returns the current timestamp during graph execution PiperOrigin-RevId: 186342760",logging_ops_test.cc,"@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include +#include + #include ""tensorflow/core/framework/fake_input.h"" #include ""tensorflow/core/framework/node_def_builder.h"" #include ""tensorflow/core/framework/tensor.h"" @@ -96,5 +99,27 @@ TEST_F(PrintingGraphTest, FirstNSuccess) { test::ExpectTensorEqual(expected, *GetOutput(0)); } +class TimestampTest : public OpsTestBase { + protected: + Status Init() { + TF_CHECK_OK(NodeDefBuilder(""op"", ""Timestamp"").Finalize(node_def())); + return InitOp(); + } +}; + +TEST_F(TimestampTest, WaitAtLeast) { + TF_ASSERT_OK(Init()); + TF_ASSERT_OK(RunOpKernel()); + double ts1 = *((*GetOutput(0)).flat().data()); + + // wait 1 second + std::this_thread::sleep_for(std::chrono::seconds(1)); + + TF_ASSERT_OK(RunOpKernel()); + double ts2 = *((*GetOutput(0)).flat().data()); + + EXPECT_LE(1.0, ts2 - ts1); +} + } // end namespace } // end namespace tensorflow ",0,train c07a6e6568b776037f052bc0d385a509ec2647aa,tensorflow/tensorflow,"Add Timestamp Op which returns the current timestamp during graph execution PiperOrigin-RevId: 186342760",logging_ops.cc,"@@ -111,4 +111,9 @@ REGISTER_OP(""MergeSummary"") .Attr(""N : int >= 1"") .SetShapeFn(shape_inference::ScalarShape); +REGISTER_OP(""Timestamp"") + .Output(""ts: float64"") + .SetIsStateful() + .SetShapeFn(shape_inference::ScalarShape); + } // end namespace tensorflow ",0,train c07a6e6568b776037f052bc0d385a509ec2647aa,tensorflow/tensorflow,"Add Timestamp Op which returns the current timestamp during graph execution PiperOrigin-RevId: 186342760",control_flow_ops.py,"@@ -44,6 +44,7 @@ See the @{$python/control_flow_ops} guide. @@add_check_numerics_ops @@Assert @@Print +@@timestamp """""" # pylint: disable=g-bad-name from __future__ import absolute_import ",0,train c07a6e6568b776037f052bc0d385a509ec2647aa,tensorflow/tensorflow,"Add Timestamp Op which returns the current timestamp during graph execution PiperOrigin-RevId: 186342760",logging_ops.py,"@@ -356,3 +356,4 @@ ops.NotDifferentiable(""AudioSummary"") ops.NotDifferentiable(""AudioSummaryV2"") ops.NotDifferentiable(""MergeSummary"") ops.NotDifferentiable(""ScalarSummary"") +ops.NotDifferentiable(""Timestamp"") ",0,train c07a6e6568b776037f052bc0d385a509ec2647aa,tensorflow/tensorflow,"Add Timestamp Op which returns the current timestamp during graph execution PiperOrigin-RevId: 186342760",standard_ops.py,"@@ -60,6 +60,7 @@ from tensorflow.python.ops.io_ops import * from tensorflow.python.ops.linalg_ops import * from tensorflow.python.ops.logging_ops import Print from tensorflow.python.ops.logging_ops import get_summary_op +from tensorflow.python.ops.logging_ops import timestamp from tensorflow.python.ops.lookup_ops import initialize_all_tables from tensorflow.python.ops.lookup_ops import tables_initializer from tensorflow.python.ops.manip_ops import * @@ -232,7 +233,7 @@ _allowed_symbols_clip_ops = [ ""global_norm"", ] -_allowed_symbols_image_ops = [ +_allowed_symbols_logging_ops = [ # Documented in training.py. # We are not importing training.py to avoid complex dependencies. ""audio_summary"", @@ -262,8 +263,8 @@ _allowed_symbols = (_allowed_symbols_array_ops + _allowed_symbols_clip_ops + _allowed_symbols_control_flow_ops + _allowed_symbols_functional_ops + - _allowed_symbols_image_ops + _allowed_symbols_gradients + + _allowed_symbols_logging_ops + _allowed_symbols_math_ops + _allowed_symbols_variable_scope_ops + _allowed_symbols_misc + ",0,train a0a4d37e44419edc582c069ecd2de15b6d0c19ac,tensorflow/tensorflow,"[XLA] Add support to specify boundary nodes in interactive_grahviz tool. The enhanced command is ` [] [/ ...]`. The boundary nodes are optional. This is useful in cases where one wants to furhter prune a graph when using a large .",hlo_graph_dumper.cc,"@@ -1281,8 +1281,9 @@ namespace { // Gets a NodeFilter that includes roughly all instructions whose distance from // root is <= radius. -NodeFilter MakeNodeRadiusAroundFilter(const HloInstruction* root, - int64 radius) { +NodeFilter MakeNodeRadiusAroundFilter( + const HloInstruction* root, int64 radius, + const std::set* boundary) { // First, find the neighborhood of nodes with distance from root <= radius. // These nodes are our initial set of ""normal"" nodes. absl::flat_hash_map nodes; @@ -1298,6 +1299,9 @@ NodeFilter MakeNodeRadiusAroundFilter(const HloInstruction* root, if (depth == radius) { continue; } + if (boundary->count(instr) != 0) { + continue; + } // Traverse into instr's operands. // @@ -1513,11 +1517,12 @@ string DumpGraph(const HloComputation& computation, const string& label, } string DumpNeighborhoodAround(const HloInstruction& node, int radius, + const std::set* boundary, bool show_backend_config) { auto debug_options = node.GetModule()->config().debug_options(); string label = StrCat(""Neighborhood of "", radius, "" nodes around "", node.name()); - NodeFilter filter = MakeNodeRadiusAroundFilter(&node, radius); + NodeFilter filter = MakeNodeRadiusAroundFilter(&node, radius, boundary); string graph = HloDotDumper(node.parent(), label, debug_options, show_backend_config, /*profile=*/nullptr, filter) ",0,train a0a4d37e44419edc582c069ecd2de15b6d0c19ac,tensorflow/tensorflow,"[XLA] Add support to specify boundary nodes in interactive_grahviz tool. The enhanced command is ` [] [/ ...]`. The boundary nodes are optional. This is useful in cases where one wants to furhter prune a graph when using a large .",hlo_graph_dumper.h,"@@ -63,8 +63,13 @@ string DumpGraph(const HloComputation& computation, const string& label, // The number of nodes dumped is controlled by the radius parameter, which // (roughly) corresponds to the max distance a node may be from the primary node // before it's omitted from the graph. -string DumpNeighborhoodAround(const HloInstruction& node, int radius, - bool show_backend_config = false); +// +// The optional boundary parameter specifies the set of boundary nodes which +// will be omitted when they are within the radius. +string DumpNeighborhoodAround( + const HloInstruction& node, int radius, + const std::set* boundary = nullptr, + bool show_backend_config = false); // Dumps nodes on any of the paths from `from` to `to`. If there are more than // max_nodes on all paths, restricts to the max_nodes nodes on the shortest ",0,train a0a4d37e44419edc582c069ecd2de15b6d0c19ac,tensorflow/tensorflow,"[XLA] Add support to specify boundary nodes in interactive_grahviz tool. The enhanced command is ` [] [/ ...]`. The boundary nodes are optional. This is useful in cases where one wants to furhter prune a graph when using a large .",interactive_graphviz.cc,"@@ -139,9 +139,10 @@ HloComputation* FindComputation(const HloModule& module, // Print a help message describing the various available commands. void DoHelpCommand() { std::cout << R""(Commands: - [] - Renders a neighborhood of nodes around . If - is not provided, the default value is )"" + [] [/ +] + Renders a neighborhood of nodes around , without going + beyond the optional boundary instructions. If is not provided, + the default value is )"" << kDefaultWidth << R""(. allpaths [] Renders a subset of all paths from one instruction to the other. Either @@ -457,12 +458,6 @@ void DoAllPathsCommand(const Options& opts, const HloModule& module, // Plot a given instruction neighborhood or computation with graphviz. void DoPlotCommand(const Options& opts, const HloModule& module, const std::vector& tokens) { - if (tokens.size() > 2) { - std::cerr << R""(Illegal input. Enter e.g. ""%fusion.1 42"" or ""%fusion.1"".)"" - << std::endl; - return; - } - string node_name = tokens[0]; // Find the node with the given name. @@ -475,16 +470,43 @@ void DoPlotCommand(const Options& opts, const HloModule& module, } uint64 graph_width = kDefaultWidth; - if (tokens.size() == 2) { + std::set boundary; + if (tokens.size() >= 2) { if (comp) { std::cerr << ""Can only use graph-size parameter with instructions, but "" << node_name << "" is a computation."" << std::endl; return; } + + int bound_index = tokens.size(); if (!absl::SimpleAtoi(tokens[1], &graph_width)) { - std::cerr << ""Can't parse '"" << tokens[1] << ""' as an integer."" - << std::endl; - return; + if (tokens[1] != ""/"") { + std::cerr << ""Can't parse '"" << tokens[1] << ""' as an integer."" + << std::endl; + return; + } + graph_width = kDefaultWidth; + bound_index = 2; + } else { + if (tokens.size() > 2) { + if (tokens[2] != ""/"") { + std::cerr << ""Expect a /, but get a '"" << tokens[1] << ""'."" + << std::endl; + return; + } + bound_index = 3; + } + } + while (bound_index < tokens.size()) { + string bnode_name = tokens[bound_index]; + const HloInstruction* binstr = FindInstruction(module, bnode_name); + if (!binstr) { + std::cerr << ""Couldn't find HloInstruction named "" << node_name << ""."" + << std::endl; + return; + } + boundary.insert(binstr); + bound_index++; } } @@ -496,7 +518,9 @@ void DoPlotCommand(const Options& opts, const HloModule& module, /*show_backend_config=*/show_backend_config)); } else { DisplayGraphHandle(opts, hlo_graph_dumper::DumpNeighborhoodAround( - *instr, graph_width, /*show_backend_config=*/show_backend_config)); + *instr, graph_width, + /*boundary=*/&boundary, + /*show_backend_config=*/show_backend_config)); } } ",0,train 1ba89338bdb4afb85ae56e64b47acc93a3a28703,tensorflow/tensorflow,"Fixing a subtle bug where in some cases the post cancellation work wasn't being done correctly. This is the scenario in which FunctionBufferingResource::Cancel() got called while buffering was being done, but then the buffer filled up in which case FillBuffer() wasn't ever called and the Cancel() method would get stuck waiting on a notification from the condition variable leading to timeouts. This CL fixes this by making sure FillBuffer() got called one last time in this case. Tested by running contrib/data/python/kernel_tests:prefetching_ops_test 500 times and ran contrib/distribute/python:values_test 500 times with no timeouts. PiperOrigin-RevId: 191007895",prefetching_kernels.cc,"@@ -224,6 +224,13 @@ class FunctionBufferingResource : public ResourceBase { if (buffer_.size() < buffer_size_ && !end_of_sequence_) { restart_buffering = true; } else { + // When the buffer is full, we don't want to call + // FillBuffer() unless we're in cancellation phase in which + // case FillBuffer() will do the final cleanup post + // cancellation. + if (cancelled_) { + restart_buffering = true; + } is_buffering_ = false; } } ",0,train e5249d6dddc469e68c09b3af32a9adfdffdb5ef1,tensorflow/tensorflow,"List all removed stable ops in the error message, not just the first. Change: 121309048",op_compatibility_lib.cc,"@@ -21,6 +21,7 @@ limitations under the License. #include ""tensorflow/core/lib/core/errors.h"" #include ""tensorflow/core/lib/core/status.h"" #include ""tensorflow/core/lib/io/path.h"" +#include ""tensorflow/core/lib/strings/str_util.h"" #include ""tensorflow/core/lib/strings/strcat.h"" #include ""tensorflow/core/platform/protobuf.h"" @@ -54,19 +55,26 @@ Status OpCompatibilityLib::ValidateCompatible(Env* env, int* changed_ops, if (stable_ops_ != nullptr) { printf(""Verifying no stable ops have been removed...\n""); + std::vector removed; // We rely on stable_ops_ and op_list_ being in sorted order. auto iter = stable_ops_->begin(); for (int cur = 0; iter != stable_ops_->end() && cur < op_list_.op_size(); ++cur) { const string& op_name = op_list_.op(cur).name(); - if (op_name > *iter) { - return errors::InvalidArgument(""Error, stable op removed: "", *iter); - } else if (op_name == *iter) { + while (op_name > *iter) { + removed.push_back(*iter); ++iter; } + if (op_name == *iter) { + ++iter; + } + } + for (; iter != stable_ops_->end(); ++iter) { + removed.push_back(*iter); } - if (iter != stable_ops_->end()) { - return errors::InvalidArgument(""Error, stable op removed: "", *iter); + if (!removed.empty()) { + return errors::InvalidArgument(""Error, stable op(s) removed: "", + str_util::Join(removed, "", "")); } } ",0,train 80ee15562599e4042675705936ccc24db2e74e9d,tensorflow/tensorflow,"Fix AlignedVector uniform value constructor - The old constructor didn't work as expected because Eigen::half has a template constructor that accepts anything (which means std::is_constructible always returns true) but its implementation can still fail for some types.",gpu_kernel_helper.h,"@@ -214,15 +214,18 @@ class alignas(alignof(T) * N) AlignedVector { AlignedVector() = default; - // Explicitly construct with uniform value. - // Note: This emulates an explicit constructor of T, so that - // AlignedVector(args...) works whenever T(args...) does. - template < - typename... Args, - typename std::enable_if::value, - int>::type = 0> - __host__ __device__ explicit AlignedVector(Args&&... args) { - value_type uniform(std::forward(args)...); + // Uniform initialization. + __host__ __device__ explicit AlignedVector(value_type uniform) { + UNROLL_ON_DEVICE for (int i = 0; i < kSize; ++i) { values_[i] = uniform; } + } + // Uniform initialization with explicit conversion. + // Note: This is required for T=Eigen::half because it only supports explicit + // conversions from other types and its template constructor is too relaxed + // to be able to use std::is_constructible. + template ::value, + int>::type = 0> + __host__ __device__ explicit AlignedVector(U uniform_u) { + value_type uniform(uniform_u); UNROLL_ON_DEVICE for (int i = 0; i < kSize; ++i) { values_[i] = uniform; } } ",0,train 88d8e664ce36476bde41aa032815ab66c0b08f59,tensorflow/tensorflow,"Add tf.acos unranked kernel and test PiperOrigin-RevId: 352533456 Change-Id: I04a0ac4cee8a01cc3da4c29e28d792fd59bc662b",cwise_op_acos.cc,"@@ -19,7 +19,10 @@ namespace tensorflow { REGISTER2(UnaryOp, CPU, ""Acos"", functor::acos, float, double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED) REGISTER2(UnaryOp, GPU, ""Acos"", functor::acos, float, double); #endif +#endif } // namespace tensorflow ",0,train 88d8e664ce36476bde41aa032815ab66c0b08f59,tensorflow/tensorflow,"Add tf.acos unranked kernel and test PiperOrigin-RevId: 352533456 Change-Id: I04a0ac4cee8a01cc3da4c29e28d792fd59bc662b",cwise_op_gpu_acos.cu.cc,"@@ -19,7 +19,10 @@ limitations under the License. namespace tensorflow { namespace functor { +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED) DEFINE_UNARY2(acos, float, double); +#endif } // namespace functor } // namespace tensorflow ",0,train 88d8e664ce36476bde41aa032815ab66c0b08f59,tensorflow/tensorflow,"Add tf.acos unranked kernel and test PiperOrigin-RevId: 352533456 Change-Id: I04a0ac4cee8a01cc3da4c29e28d792fd59bc662b",gpu_op_acos.cc,"@@ -0,0 +1,24 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" +#include ""tensorflow/core/kernels/mlir_generated/gpu_ops_base.h"" + +namespace tensorflow { + +GENERATE_AND_REGISTER_UNARY_KERNEL(Acos, f32, DT_FLOAT, float); +GENERATE_AND_REGISTER_UNARY_KERNEL(Acos, f64, DT_DOUBLE, double); + +} // namespace tensorflow ",0,train 88d8e664ce36476bde41aa032815ab66c0b08f59,tensorflow/tensorflow,"Add tf.acos unranked kernel and test PiperOrigin-RevId: 352533456 Change-Id: I04a0ac4cee8a01cc3da4c29e28d792fd59bc662b",gpu_unary_ops_test.cc,"@@ -180,6 +180,17 @@ GENERATE_DEFAULT_TEST_WITH_SPECIFIC_INPUT_VALUES( Abs, DT_INT64, DT_INT64, test::NearZeroAndExtremeInput(), std::abs, test::GpuOpsTestConfig().ExpectStrictlyEqual()) +/// Test `tf.Acos`. + +// Test only values in the function domain. The othweise returned nan value +// fails comparison for equality. +GENERATE_DEFAULT_TEST_WITH_SPECIFIC_INPUT_VALUES( + Acos, DT_FLOAT, DT_FLOAT, test::DefaultInputBetweenZeroAndOne(), + std::acos, test::GpuOpsTestConfig().ExpectStrictlyEqual()) +GENERATE_DEFAULT_TEST_WITH_SPECIFIC_INPUT_VALUES( + Acos, DT_DOUBLE, DT_DOUBLE, test::DefaultInputBetweenZeroAndOne(), + std::acos, test::GpuOpsTestConfig().ExpectStrictlyEqual()) + /// Test `tf.Asin`. // Test only values in the function domain. The othweise returned nan value ",0,train 982bd0d982b5907c05ffa4699566d0b3056734be,tensorflow/tensorflow,"Add fake quant ops to mobile build targets. Change: 138526951",fake_quant_ops_functor.h,"@@ -24,6 +24,15 @@ limitations under the License. #include ""tensorflow/core/framework/tensor_types.h"" #include ""tensorflow/core/platform/types.h"" +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float StdRound(float input) { +// On Android, std::round() isn't present, just round(). +#if defined(__ANDROID__) + return round(input); +#else + return std::round(input); +#endif +} + namespace tensorflow { static constexpr int kSteps = 255; @@ -45,7 +54,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void Nudge(const float min, } else if (zero_point_from_min > kStepsFloat) { return static_cast(kSteps); } else { - return static_cast(std::round(zero_point_from_min)); + return static_cast(StdRound(zero_point_from_min)); } }(); @@ -53,21 +62,25 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void Nudge(const float min, *nudged_max = (kStepsFloat - nudged_zero_point) * (*scale); } -template using ConstScalar = - typename tensorflow::TTypes::ConstScalar; -template using Scalar = typename tensorflow::TTypes::Scalar; -template using ConstVec = typename tensorflow::TTypes::ConstVec; -template using Vec = typename tensorflow::TTypes::Vec; -template using ConstFlat = - typename tensorflow::TTypes::ConstFlat; -template using Flat = typename tensorflow::TTypes::Flat; +template +using ConstScalar = typename tensorflow::TTypes::ConstScalar; +template +using Scalar = typename tensorflow::TTypes::Scalar; +template +using ConstVec = typename tensorflow::TTypes::ConstVec; +template +using Vec = typename tensorflow::TTypes::Vec; +template +using ConstFlat = typename tensorflow::TTypes::ConstFlat; +template +using Flat = typename tensorflow::TTypes::Flat; // Functor called by FakeQuantWithMinMaxArgsOp to do the work. Compiles both // for CPU and GPU. template struct FakeQuantWithMinMaxArgsFunctor { - void operator()(const Device& d, ConstFlat inputs, - const float min, const float max, Flat outputs) { + void operator()(const Device& d, ConstFlat inputs, const float min, + const float max, Flat outputs) { eigen_assert(min <= 0.0f && ""min should be <= 0.0""); eigen_assert(max >= 0.0f && ""max should be >= 0.0""); eigen_assert(min < max && ""min should be < max""); @@ -78,8 +91,9 @@ struct FakeQuantWithMinMaxArgsFunctor { auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min); auto clamped_shifted = clamped - nudged_min; - outputs.device(d) = (clamped_shifted * inv_nudged_scale + 0.5f).floor() * - nudged_scale + nudged_min; + outputs.device(d) = + (clamped_shifted * inv_nudged_scale + 0.5f).floor() * nudged_scale + + nudged_min; } }; @@ -97,8 +111,9 @@ struct FakeQuantWithMinMaxArgsGradientFunctor { float nudged_min, nudged_max, nudged_scale; Nudge(min, max, &nudged_min, &nudged_max, &nudged_scale); - auto between_nudged_min_max = (inputs >= nudged_min && inputs <= nudged_max) - .select(inputs.constant(1.0f), inputs.constant(0.0f)); + auto between_nudged_min_max = + (inputs >= nudged_min && inputs <= nudged_max) + .select(inputs.constant(1.0f), inputs.constant(0.0f)); backprops.device(d) = gradients * between_nudged_min_max; } }; @@ -129,7 +144,8 @@ struct FakeQuantWithMinMaxVarsFunctor { const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min); const auto clamped_shifted = clamped - nudged_min; outputs.device(d) = (clamped_shifted / nudged_scale_repl + 0.5f).floor() * - nudged_scale_repl + nudged_min; + nudged_scale_repl + + nudged_min; } }; @@ -137,9 +153,9 @@ struct FakeQuantWithMinMaxVarsFunctor { // both for CPU and GPU. template struct FakeQuantWithMinMaxVarsGradientFunctor { - void operator()(const Device& d, - ConstFlat gradients, ConstFlat inputs, - ConstScalar min, ConstScalar max, + void operator()(const Device& d, ConstFlat gradients, + ConstFlat inputs, ConstScalar min, + ConstScalar max, #ifndef FAKE_QUANT_NO_DEBUG Scalar check_min_max, #endif @@ -158,16 +174,19 @@ struct FakeQuantWithMinMaxVarsGradientFunctor { float nudged_min, nudged_max, nudged_scale; Nudge(min(), max(), &nudged_min, &nudged_max, &nudged_scale); - const auto between_min_max = (inputs >= nudged_min && inputs <= nudged_max) - .select(inputs.constant(1.0f), inputs.constant(0.0f)); + const auto between_min_max = + (inputs >= nudged_min && inputs <= nudged_max) + .select(inputs.constant(1.0f), inputs.constant(0.0f)); backprops_wrt_input.device(d) = gradients * between_min_max; - const auto below_min = (inputs < nudged_min) - .select(inputs.constant(1.0f), inputs.constant(0.0f)); + const auto below_min = + (inputs < nudged_min) + .select(inputs.constant(1.0f), inputs.constant(0.0f)); backprop_wrt_min.device(d) = (gradients * below_min).sum(); - const auto above_max = (inputs > nudged_max) - .select(inputs.constant(1.0f), inputs.constant(0.0f)); + const auto above_max = + (inputs > nudged_max) + .select(inputs.constant(1.0f), inputs.constant(0.0f)); backprop_wrt_max.device(d) = (gradients * above_max).sum(); } }; @@ -180,8 +199,8 @@ using Index = typename tensorflow::TTypes::ConstTensor::Index; // Already verified: inputs, outputs, min, max are of shape [d]. template struct FakeQuant1WithMinMaxVarsPerChannelFunctor { - void operator()(const Device& d, ConstVec inputs, - ConstVec min, ConstVec max, + void operator()(const Device& d, ConstVec inputs, ConstVec min, + ConstVec max, #ifndef FAKE_QUANT_NO_DEBUG Scalar check_min_max, #endif @@ -202,8 +221,8 @@ struct FakeQuant1WithMinMaxVarsPerChannelFunctor { std::max(std::min(inputs(i), nudged_max), nudged_min); const float clamped_shifted = clamped - nudged_min; - outputs(i) = std::round(clamped_shifted / nudged_scale) * nudged_scale + - nudged_min; + outputs(i) = + StdRound(clamped_shifted / nudged_scale) * nudged_scale + nudged_min; } } }; @@ -213,8 +232,8 @@ struct FakeQuant1WithMinMaxVarsPerChannelFunctor { template struct FakeQuant2WithMinMaxVarsPerChannelFunctor { void operator()(const Device& d, const Index batch_size, const Index depth, - ConstFlat inputs, - ConstVec min, ConstVec max, + ConstFlat inputs, ConstVec min, + ConstVec max, #ifndef FAKE_QUANT_NO_DEBUG Scalar check_min_max, #endif @@ -233,13 +252,13 @@ struct FakeQuant2WithMinMaxVarsPerChannelFunctor { for (Index i = 0; i < min.size(); ++i) { float nudged_min, nudged_max, nudged_scale; Nudge(min(i), max(i), &nudged_min, &nudged_max, &nudged_scale); - const auto clamped = inputs_restored.chip<1>(i) - .cwiseMin(nudged_max).cwiseMax(nudged_min); + const auto clamped = + inputs_restored.chip<1>(i).cwiseMin(nudged_max).cwiseMax(nudged_min); const auto clamped_shifted = clamped - nudged_min; outputs.reshape(restored).chip<1>(i).device(d) = (clamped_shifted / nudged_scale + 0.5f).floor() * nudged_scale + - nudged_min; + nudged_min; } } }; @@ -249,8 +268,7 @@ struct FakeQuant2WithMinMaxVarsPerChannelFunctor { template struct FakeQuant4WithMinMaxVarsPerChannelFunctor { void operator()(const Device& d, const Index batch_size, const Index height, - const Index width, const Index depth, - ConstFlat inputs, + const Index width, const Index depth, ConstFlat inputs, ConstVec min, ConstVec max, #ifndef FAKE_QUANT_NO_DEBUG Scalar check_min_max, @@ -270,13 +288,13 @@ struct FakeQuant4WithMinMaxVarsPerChannelFunctor { for (Index i = 0; i < min.size(); ++i) { float nudged_min, nudged_max, nudged_scale; Nudge(min(i), max(i), &nudged_min, &nudged_max, &nudged_scale); - const auto clamped = inputs_restored.chip<3>(i) - .cwiseMin(nudged_max).cwiseMax(nudged_min); + const auto clamped = + inputs_restored.chip<3>(i).cwiseMin(nudged_max).cwiseMax(nudged_min); const auto clamped_shifted = clamped - nudged_min; outputs.reshape(restored).chip<3>(i).device(d) = (clamped_shifted / nudged_scale + 0.5f).floor() * nudged_scale + - nudged_min; + nudged_min; } } }; @@ -288,9 +306,9 @@ struct FakeQuant4WithMinMaxVarsPerChannelFunctor { // backprop_wrt_min, backprop_wrt_max are of shape [d]. template struct FakeQuant1WithMinMaxVarsPerChannelGradientFunctor { - void operator()(const Device& d, - ConstVec gradients, ConstVec inputs, - ConstVec min, ConstVec max, + void operator()(const Device& d, ConstVec gradients, + ConstVec inputs, ConstVec min, + ConstVec max, #ifndef FAKE_QUANT_NO_DEBUG Scalar check_min_max, #endif @@ -332,8 +350,8 @@ struct FakeQuant2WithMinMaxVarsPerChannelGradientFunctor { #ifndef FAKE_QUANT_NO_DEBUG Scalar check_min_max, #endif - Flat backprops_wrt_input, - Vec backprop_wrt_min, Vec backprop_wrt_max) { + Flat backprops_wrt_input, Vec backprop_wrt_min, + Vec backprop_wrt_max) { #ifndef FAKE_QUANT_NO_DEBUG check_min_max.device(d) = (min <= 0.0f).all(); eigen_assert(check_min_max() && ""min should be <= 0.0 coeff-wise""); @@ -358,14 +376,16 @@ struct FakeQuant2WithMinMaxVarsPerChannelGradientFunctor { backprops_wrt_input.reshape(restored).chip<1>(i).device(d) = gradients_chip * between_min_max; - const auto below_min = (inputs_chip < nudged_min) - .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); + const auto below_min = + (inputs_chip < nudged_min) + .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); Eigen::DSizes reduce(0); backprop_wrt_min.chip<0>(i).device(d) = (gradients_chip * below_min).sum(reduce); - const auto above_max = (inputs_chip > nudged_max) - .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); + const auto above_max = + (inputs_chip > nudged_max) + .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); backprop_wrt_max.chip<0>(i).device(d) = (gradients_chip * above_max).sum(reduce); } @@ -383,8 +403,8 @@ struct FakeQuant4WithMinMaxVarsPerChannelGradientFunctor { #ifndef FAKE_QUANT_NO_DEBUG Scalar check_min_max, #endif - Flat backprops_wrt_input, - Vec backprop_wrt_min, Vec backprop_wrt_max) { + Flat backprops_wrt_input, Vec backprop_wrt_min, + Vec backprop_wrt_max) { #ifndef FAKE_QUANT_NO_DEBUG check_min_max.device(d) = (min <= 0.0f).all(); eigen_assert(check_min_max() && ""min should be <= 0.0 coeff-wise""); @@ -409,14 +429,16 @@ struct FakeQuant4WithMinMaxVarsPerChannelGradientFunctor { backprops_wrt_input.reshape(restored).chip<3>(i).device(d) = gradients_chip * between_min_max; - const auto below_min = (inputs_chip < nudged_min) - .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); + const auto below_min = + (inputs_chip < nudged_min) + .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); Eigen::DSizes reduce(0, 1, 2); backprop_wrt_min.chip<0>(i).device(d) = (gradients_chip * below_min).sum(reduce); - const auto above_max = (inputs_chip > nudged_max) - .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); + const auto above_max = + (inputs_chip > nudged_max) + .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); backprop_wrt_max.chip<0>(i).device(d) = (gradients_chip * above_max).sum(reduce); } ",0,train 63a49c712edd3b2ee990a9f98b766b24190d3ccb,tensorflow/tensorflow,"Adds support for Eager delegate to tflite_diff. PiperOrigin-RevId: 208752057",generate_testspec.cc,"@@ -114,7 +114,13 @@ bool GenerateTestSpecFromTensorflowModel( // different set. std::vector input_values = GenerateInputValues(input_layer, input_layer_type, input_layer_shape); - if (input_values.empty()) return false; + if (input_values.empty()) { + std::cerr << ""Unable to generate input values for the TensorFlow model. "" + ""Make sure the correct values are defined for "" + ""input_layer, input_layer_type, and input_layer_shape."" + << std::endl; + return false; + } // Run TensorFlow. for (int j = 0; j < input_values.size(); j++) { ",0,train 63a49c712edd3b2ee990a9f98b766b24190d3ccb,tensorflow/tensorflow,"Adds support for Eager delegate to tflite_diff. PiperOrigin-RevId: 208752057",tf_driver.cc,"@@ -179,7 +179,9 @@ void TfDriver::Invoke() { auto status = session_->Run({input_tensors_.begin(), input_tensors_.end()}, output_names_, {}, &output_tensors_); if (!status.ok()) { - Invalidate(""Failed to run input data on graph""); + Invalidate( + ""Failed to run input data on graph. Make sure the correct value is "" + ""defined for the input and output arrays.""); } } ",0,train 63a49c712edd3b2ee990a9f98b766b24190d3ccb,tensorflow/tensorflow,"Adds support for Eager delegate to tflite_diff. PiperOrigin-RevId: 208752057",tflite_diff_flags.h,"@@ -33,6 +33,7 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) { string input_layer_shape; string output_layer; int32_t num_runs_per_pass = 100; + string delegate; } values; std::vector flags = { @@ -42,18 +43,21 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) { ""Path of tensorflow lite model.""), tensorflow::Flag(""input_layer"", &values.input_layer, ""Names of input tensors, separated by comma. Example: "" - ""input_1,input_2""), + ""input_1,input_2.""), tensorflow::Flag(""input_layer_type"", &values.input_layer_type, ""Data types of input tensors, separated by comma. "" - ""Example: float,int""), + ""Example: float,int.""), tensorflow::Flag( ""input_layer_shape"", &values.input_layer_shape, - ""Shapes of input tensors, separated by colon. Example: 1,3,4,1:2""), + ""Shapes of input tensors, separated by colon. Example: 1,3,4,1:2.""), tensorflow::Flag(""output_layer"", &values.output_layer, - ""Names of output tensors, separated by comma. Example "" - ""output_1,output_2""), + ""Names of output tensors, separated by comma. Example: "" + ""output_1,output_2.""), tensorflow::Flag(""num_runs_per_pass"", &values.num_runs_per_pass, - ""Number of full runs in each pass.""), + ""[optional] Number of full runs in each pass.""), + tensorflow::Flag(""delegate"", &values.delegate, + ""[optional] Delegate to use for executing ops. Must be "" + ""`{\""\"", EAGER}`""), }; bool no_inputs = *argc == 1; @@ -61,6 +65,14 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) { if (!success || no_inputs || (*argc == 2 && !strcmp(argv[1], ""--helpfull""))) { fprintf(stderr, ""%s"", tensorflow::Flags::Usage(argv[0], flags).c_str()); return {}; + } else if (values.tensorflow_model.empty() || values.tflite_model.empty() || + values.input_layer.empty() || values.input_layer_type.empty() || + values.input_layer_shape.empty() || values.output_layer.empty()) { + fprintf(stderr, ""%s"", tensorflow::Flags::Usage(argv[0], flags).c_str()); + return {}; + } else if (!(values.delegate == """" || values.delegate == ""EAGER"")) { + fprintf(stderr, ""%s"", tensorflow::Flags::Usage(argv[0], flags).c_str()); + return {}; } return {values.tensorflow_model, @@ -69,7 +81,8 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) { Split(values.input_layer_type, "",""), Split(values.input_layer_shape, "":""), Split(values.output_layer, "",""), - values.num_runs_per_pass}; + values.num_runs_per_pass, + values.delegate}; } } // namespace testing ",0,train 63a49c712edd3b2ee990a9f98b766b24190d3ccb,tensorflow/tensorflow,"Adds support for Eager delegate to tflite_diff. PiperOrigin-RevId: 208752057",tflite_diff_util.cc,"@@ -33,7 +33,7 @@ bool RunDiffTest(const DiffOptions& options, int num_invocations) { options.input_layer_shape, options.output_layer)) { return false; } - TfLiteDriver tflite_driver(/*use_nnapi=*/true); + TfLiteDriver tflite_driver(/*use_nnapi=*/true, options.delegate); tflite_driver.LoadModel(options.tflite_model); return tflite::testing::ParseAndRunTests(&tflite_stream, &tflite_driver); } ",0,train 63a49c712edd3b2ee990a9f98b766b24190d3ccb,tensorflow/tensorflow,"Adds support for Eager delegate to tflite_diff. PiperOrigin-RevId: 208752057",tflite_diff_util.h,"@@ -44,6 +44,9 @@ struct DiffOptions { // each of the passes. The first pass has a single inference, while the // second pass does multiple inferences back to back. int num_runs_per_pass; + // Path to the delegate library to be loaded in order to execute ops. Must be + // `{"""", EAGER}`. + string delegate; }; // Run a single TensorFLow Lite diff test with a given options. ",0,train 63a49c712edd3b2ee990a9f98b766b24190d3ccb,tensorflow/tensorflow,"Adds support for Eager delegate to tflite_diff. PiperOrigin-RevId: 208752057",tflite_driver.cc,"@@ -17,6 +17,7 @@ limitations under the License. #include #include ""tensorflow/contrib/lite/builtin_op_data.h"" +#include ""tensorflow/contrib/lite/delegates/eager/delegate.h"" #include ""tensorflow/contrib/lite/testing/split.h"" namespace tflite { @@ -135,7 +136,13 @@ class TfLiteDriver::Expectation { size_t num_elements_; }; -TfLiteDriver::TfLiteDriver(bool use_nnapi) : use_nnapi_(use_nnapi) {} +TfLiteDriver::TfLiteDriver(bool use_nnapi, const string& delegate_name) + : use_nnapi_(use_nnapi) { + if (delegate_name == ""EAGER"") { + delegate_.reset(new EagerDelegate()); + } +} + TfLiteDriver::~TfLiteDriver() {} void TfLiteDriver::AllocateTensors() { @@ -165,6 +172,13 @@ void TfLiteDriver::LoadModel(const string& bin_file_path) { } interpreter_->UseNNAPI(use_nnapi_); + if (delegate_) { + if (delegate_->Apply(interpreter_.get()) != kTfLiteOk) { + Invalidate(""Unable to the build graph using the delegate""); + return; + } + } + must_allocate_tensors_ = true; } ",0,train 63a49c712edd3b2ee990a9f98b766b24190d3ccb,tensorflow/tensorflow,"Adds support for Eager delegate to tflite_diff. PiperOrigin-RevId: 208752057",tflite_driver.h,"@@ -17,6 +17,7 @@ limitations under the License. #include +#include ""tensorflow/contrib/lite/delegates/eager/delegate.h"" #include ""tensorflow/contrib/lite/interpreter.h"" #include ""tensorflow/contrib/lite/kernels/register.h"" #include ""tensorflow/contrib/lite/model.h"" @@ -28,7 +29,7 @@ namespace testing { // A test runner that feeds inputs into TF Lite and verifies its outputs. class TfLiteDriver : public TestRunner { public: - explicit TfLiteDriver(bool use_nnapi); + explicit TfLiteDriver(bool use_nnapi, const string& delegate = """"); ~TfLiteDriver() override; void LoadModel(const string& bin_file_path) override; @@ -52,6 +53,7 @@ class TfLiteDriver : public TestRunner { class Expectation; + std::unique_ptr delegate_; bool use_nnapi_ = false; std::unique_ptr model_; std::unique_ptr interpreter_; ",0,train 10fb2155fb720f9e0e70d9e48a934383b4b42c91,tensorflow/tensorflow,"Revert ""modify docstring"" This reverts commit 5f2e0240ee7977042e41d9c29c349a7b14301290.",loader_impl.py,"@@ -73,7 +73,7 @@ def parse_saved_model(export_dir): """"""Reads the savedmodel.pb or savedmodel.pbtxt file containing `SavedModel`. Args: - export_dir: String or Pathlike, path to the directory containing the SavedModel file. + export_dir: Directory containing the SavedModel file. Returns: A `SavedModel` protocol buffer. ",0,train 60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a PiperOrigin-RevId: 235649273",api.py,"@@ -28,7 +28,6 @@ from enum import Enum # pylint:disable=g-bad-import-order import numpy as np -import six # pylint:enable=g-bad-import-order @@ -182,18 +181,18 @@ def _call_unconverted(f, args, kwargs): Returns: The return value of f(*args, **kwargs). """""" - if inspect_utils.istfmethodtarget(f): - return f.__self__.call(args, kwargs) - - return f(*args, **kwargs) + # TODO(mdan): This may be inconsistent in certain situations. + # If the function had already been annotated with @tf.function, it + # may be bound to the incorrect object. It's unclear if those situations + # are possible, but if they happen, we need to check if f is bound + # to a shim like WeakrefSelf and unpack it. + if tf_inspect.ismethod(f) and args: + f_self = inspect_utils.getmethodself(f) + if args[0] is f_self: + args = args[1:] -def _is_known_loaded_type(f, module_name, entity_name): - if tf_inspect.ismethod(f): - f = six.get_unbound_function(f) - return (module_name in sys.modules and - hasattr(sys.modules[module_name], entity_name) and - isinstance(f, getattr(sys.modules[module_name], entity_name))) + return f(*args, **kwargs) def converted_call(f, owner, options, args, kwargs): @@ -220,12 +219,13 @@ def converted_call(f, owner, options, args, kwargs): return py_builtins.overload_of(f)(*args, **kwargs) # TODO(b/122265385): Remove this bypass. - if (_is_known_loaded_type(f, 'wrapt', 'FunctionWrapper') or - _is_known_loaded_type(f, 'wrapt', 'BoundFunctionWrapper')): + if ('wrapt' in sys.modules and + hasattr(sys.modules['wrapt'], 'FunctionWrapper') and + isinstance(f, sys.modules['wrapt'].FunctionWrapper)): logging.warn( 'Entity {} appears to be decorated by wrapt, which is not yet supported' ' by AutoGraph. The function will be called without transformation.' - ' You may however apply AutoGraph before the decorator.'.format(f)) + ' You may however apply AutoGraph before the decorator.'.format(f), 1) logging.log(2, 'Permanently whitelisted: %s: wrapt decorated', f) return _call_unconverted(f, args, kwargs) ",0,test 60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a PiperOrigin-RevId: 235649273",api_test.py,"@@ -402,6 +402,21 @@ class ApiTest(test.TestCase): self.evaluate(variables.global_variables_initializer()) self.assertAllEqual([[0.0, 0.0]], self.evaluate(x)) + def test_converted_call_whitelisted_method_extra_self(self): + + opts = converter.ConversionOptions() + + model = sequential.Sequential([ + core.Dense(2) + ]) + + x = api.converted_call(model.call, None, opts, + (model, constant_op.constant([[0.0]])), + {'training': True}) + + self.evaluate(variables.global_variables_initializer()) + self.assertAllEqual([[0.0, 0.0]], self.evaluate(x)) + def test_converted_call_whitelisted_method_via_owner(self): opts = converter.ConversionOptions() ",0,test 60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a PiperOrigin-RevId: 235649273",inspect_utils.py,"@@ -191,12 +191,9 @@ def getdefiningclass(m, owner_class): return owner_class -def istfmethodtarget(m): - """"""Tests whether an object is a `function.TfMethodTarget`."""""" - # See eager.function.TfMethodTarget for more details. - return (hasattr(m, '__self__') and - hasattr(m.__self__, 'weakrefself_target__') and - hasattr(m.__self__, 'weakrefself_func__')) +def isweakrefself(m): + """"""Tests whether an object is a ""weakref self"" wrapper, see getmethodself."""""" + return hasattr(m, '__self__') and hasattr(m.__self__, 'ag_self_weakref__') def getmethodself(m): @@ -209,8 +206,8 @@ def getmethodself(m): # A fallback allowing methods to be actually bound to a type different # than __self__. This is useful when a strong reference from the method # to the object is not desired, for example when caching is involved. - if istfmethodtarget(m): - return m.__self__.target + if isweakrefself(m): + return m.__self__.ag_self_weakref__() return m.__self__ ",0,test 60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a PiperOrigin-RevId: 235649273",inspect_utils_test.py,"@@ -28,7 +28,6 @@ import six from tensorflow.python import lib from tensorflow.python.autograph.pyct import inspect_utils -from tensorflow.python.eager import function from tensorflow.python.framework import constant_op from tensorflow.python.platform import test @@ -359,13 +358,15 @@ class InspectUtilsTest(test.TestCase): def test_getmethodclass_weakref_mechanism(self): test_obj = TestClass() + class WeakrefWrapper(object): + + def __init__(self): + self.ag_self_weakref__ = weakref.ref(test_obj) + def test_fn(self): return self - bound_method = types.MethodType( - test_fn, - function.TfMethodTarget( - weakref.ref(test_obj), test_obj.member_function)) + bound_method = types.MethodType(test_fn, WeakrefWrapper()) self.assertEqual(inspect_utils.getmethodclass(bound_method), TestClass) def test_getmethodclass_no_bool_conversion(self): ",0,test 60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a PiperOrigin-RevId: 235649273",dataset_ops.py,"@@ -2366,8 +2366,6 @@ class StructuredFunctionWrapper(object): else: defun_kwargs.update({""func_name"": func_name}) - # TODO(b/124254153): Enable autograph once the overhead is low enough. - # TODO(mdan): Make sure autograph recurses into _wrapper_helper when on. @eager_function.defun_with_attributes( input_signature=[ tensor_spec.TensorSpec(input_shape, input_type) # pylint: disable=g-complex-comprehension @@ -2375,7 +2373,6 @@ class StructuredFunctionWrapper(object): self._input_structure._flat_shapes, self._input_structure._flat_types) ], - autograph=False, attributes=defun_kwargs) def wrapper_fn(*args): # pylint: disable=missing-docstring ret = _wrapper_helper(*args) ",0,test 60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a PiperOrigin-RevId: 235649273",multi_device_iterator_ops.py,"@@ -42,15 +42,13 @@ class _PerDeviceGenerator(dataset_ops.DatasetV2): gen_dataset_ops.multi_device_iterator_to_string_handle( multi_device_iterator_resource)) - # TODO(b/124254153): Enable autograph once the overhead is low enough. - @function.defun(autograph=False) # Pure graph code. + @function.defun() def _init_func(): return multi_device_iterator_string_handle init_func_concrete = _init_func._get_concrete_function_internal() # pylint: disable=protected-access - # TODO(b/124254153): Enable autograph once the overhead is low enough. - @function.defun(autograph=False) # Pure graph code. + @function.defun() def _remote_init_func(): return functional_ops.remote_call( target=source_device, @@ -61,10 +59,7 @@ class _PerDeviceGenerator(dataset_ops.DatasetV2): self._init_func = _remote_init_func._get_concrete_function_internal() # pylint: disable=protected-access self._init_captured_args = self._init_func.captured_inputs - # TODO(b/124254153): Enable autograph once the overhead is low enough. - @function.defun( - input_signature=[tensor_spec.TensorSpec([], dtypes.string)], - autograph=False) # Pure graph code. + @function.defun(input_signature=[tensor_spec.TensorSpec([], dtypes.string)]) def _next_func(string_handle): # pylint: disable=protected-access multi_device_iterator = ( @@ -81,11 +76,9 @@ class _PerDeviceGenerator(dataset_ops.DatasetV2): next_func_concrete = _next_func._get_concrete_function_internal() # pylint: disable=protected-access - # TODO(b/124254153): Enable autograph once the overhead is low enough. @function.defun_with_attributes( input_signature=[tensor_spec.TensorSpec([], dtypes.string)], - attributes={""experimental_ints_on_device"": True}, - autograph=False) # Pure graph code. + attributes={""experimental_ints_on_device"": True}) def _remote_next_func(string_handle): return functional_ops.remote_call( target=source_device, @@ -101,19 +94,13 @@ class _PerDeviceGenerator(dataset_ops.DatasetV2): if arg == incarnation_id: self._incarnation_id_index = i - # TODO(b/124254153): Enable autograph once the overhead is low enough. - @function.defun( - input_signature=[tensor_spec.TensorSpec([], dtypes.string)], - autograph=False) # Pure graph code. + @function.defun(input_signature=[tensor_spec.TensorSpec([], dtypes.string)]) def _finalize_func(unused_string_handle): return array_ops.constant(0, dtypes.int64) finalize_func_concrete = _finalize_func._get_concrete_function_internal() # pylint: disable=protected-access - # TODO(b/124254153): Enable autograph once the overhead is low enough. - @function.defun( - input_signature=[tensor_spec.TensorSpec([], dtypes.string)], - autograph=False) # Pure graph code. + @function.defun(input_signature=[tensor_spec.TensorSpec([], dtypes.string)]) def _remote_finalize_func(string_handle): return functional_ops.remote_call( target=source_device, ",0,test 60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a PiperOrigin-RevId: 235649273",def_function.py,"@@ -514,7 +514,7 @@ class Function(object): """"""Make and call a `ConcreteFunction` which initializes variables."""""" # Note: using defun here avoids an infinite recursion. - @function_lib.defun(autograph=False) # Pure graph code. + @function_lib.defun def initialize_variables(): for v, init in initializer_map.items(): with ops.init_scope(): ",0,test 60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a PiperOrigin-RevId: 235649273",function.py,"@@ -61,7 +61,6 @@ from tensorflow.python.util import tf_inspect FORWARD_FUNCTION_ATTRIBUTE_NAME = ""forward_function_name"" BACKWARD_FUNCTION_ATTRIBUTE_NAME = ""backward_function_name"" - class CacheKey( collections.namedtuple(""CacheKey"", [ ""input_signature"", ""parent_graph"", ""device_functions"", @@ -2020,23 +2019,13 @@ def defun_with_attributes(func=None, # When a method is bound to objects of this type, it allows AutoGraph to -# recover a weak reference the original method's self pointer, so that it can -# execute it consistent with class_method_to_instance_method's -# bound_method_wrapper. +# recover a weak reference the original method's self pointer. This uses the +# mechanism from pyct.inspect_utils.getmethodclass. # TODO(b/119246461): This is not pretty. Use a descriptor instead? -class TfMethodTarget(object): - """"""Binding target for methods replaced by function and defun."""""" - - def __init__(self, target, original_python_function): - self.weakrefself_target__ = target - self.weakrefself_func__ = weakref.ref(original_python_function) - - @property - def target(self): - return self.weakrefself_target__() +class _WeakrefSelf(object): - def call(self, args, kwargs): - return self.weakrefself_func__()(*args, **kwargs) + def __init__(self, target): + self.ag_self_weakref__ = target def class_method_to_instance_method(original_function, instance): @@ -2045,9 +2034,8 @@ def class_method_to_instance_method(original_function, instance): # Note: while we could bind to a weakref proxy instead, that causes the # bound method to be unhashable. - bound_method = types_lib.MethodType( - original_function.python_function, - TfMethodTarget(weak_instance, original_function.python_function)) + bound_method = types_lib.MethodType(original_function.python_function, + _WeakrefSelf(weak_instance)) # original_function is expected to be of one of the two `Function` types # (defined either in function.py or def_function.py). @@ -2065,7 +2053,6 @@ def class_method_to_instance_method(original_function, instance): if wrapped_fn is strong_bound_method_wrapper.__original_wrapped__: # If __wrapped__ was not replaced, then call original_function. - # TODO(mdan): For better consistency, use the wrapper's call(). wrapped_fn = original_function.python_function if tf_inspect.ismethod(wrapped_fn): wrapped_fn = six.get_unbound_function(wrapped_fn) ",0,test 60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a PiperOrigin-RevId: 235649273",func_graph.py,"@@ -679,8 +679,7 @@ def func_graph_from_py_func(name, # Wrapping around a decorator allows checks like tf_inspect.getargspec # to be accurate. converted_func = tf_decorator.make_decorator(original_func, wrapper) - python_func = tf_decorator.rewrap(python_func, original_func, - converted_func) + tf_decorator.rewrap(python_func, original_func, converted_func) func_outputs = python_func(*func_args, **func_kwargs) ",0,test 60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a PiperOrigin-RevId: 235649273",tf_decorator.py,"@@ -138,10 +138,6 @@ def rewrap(decorator_func, previous_target, new_target): decorator_func: Callable returned by `wrap`. previous_target: Callable that needs to be replaced. new_target: Callable to replace previous_target with. - - Returns: - The updated decorator. If decorator_func is not a tf_decorator, new_target - is returned. """""" # Because the process mutates the decorator, we only need to alter the # innermost function that wraps previous_target. @@ -154,15 +150,9 @@ def rewrap(decorator_func, previous_target, new_target): if target.decorated_target is previous_target: break cur = target.decorated_target - assert cur is not None - # If decorator_func is not a decorator, new_target replaces it directly. if innermost_decorator is None: - # Consistency check. The caller should always pass the result of - # tf_decorator.unwrap as previous_target. If decorator_func is not a - # decorator, that will have returned decorator_func itself. - assert decorator_func is previous_target - return new_target + return target.decorated_target = new_target @@ -178,8 +168,6 @@ def rewrap(decorator_func, previous_target, new_target): else: innermost_decorator.__wrapped__ = new_target - return decorator_func - def unwrap(maybe_tf_decorator): """"""Unwraps an object into a list of TFDecorators and a final target. ",0,test 547bd9c88b1a86f0543fff3460e2d4d1c8009cb4,tensorflow/tensorflow,"[TF:TRT] Limit the number of times that a warning message is printed out. Add LOG_FIRST_FEW_WARNING_WITH_PREFIX for only printing out the first five occurences of a warning message. Use the new macro to replace the use of LOG_WARNING_WITH_PREFIX in the TRTEngineOp runtime. This can avoid repeating the same warning message at each inference step. PiperOrigin-RevId: 339068142 Change-Id: Ibb3cc172fcd23f76df6cec67085233b347263668",trt_engine_op.cc,"@@ -60,6 +60,9 @@ using absl::StrCat; using ::nvinfer1::IRuntime; using ::stream_executor::port::StatusOr; +#define LOG_FIRST_FEW_WARNING_WITH_PREFIX \ + LOG_FIRST_N(WARNING, 5) << ""TF-TRT Warning: "" + // A helper class to call done() when destructed for asynchronous execution. // Helps simultaneous execution of native and TRT engines. @@ -584,9 +587,10 @@ void TRTEngineOp::ComputeAsync(OpKernelContext* ctx, Status verify_input_shape_status = VerifyInputShapes(input_concrete_shapes); // TODO(bixia): Fix the segmentation. if (!verify_input_shape_status.ok()) { - LOG_FIRST_N(WARNING, 5) << ""Running native segment for"" << name() - << "" due to failure in verifying input shapes: "" - << verify_input_shape_status.error_message(); + LOG_FIRST_FEW_WARNING_WITH_PREFIX + << ""Running native segment for"" << name() + << "" due to failure in verifying input shapes: "" + << verify_input_shape_status.error_message(); ExecuteNativeSegment(ctx, helper); return; } @@ -625,7 +629,7 @@ void TRTEngineOp::ComputeAsync(OpKernelContext* ctx, return true; }; if (!engine_context->cuda_engine) { - LOG_WARNING_WITH_PREFIX + LOG_FIRST_FEW_WARNING_WITH_PREFIX << ""Engine retrieval for input shapes: "" << TensorShapeUtils::ShapeListString(input_concrete_shapes) << "" failed. Running native segment for "" << name(); @@ -636,8 +640,9 @@ void TRTEngineOp::ComputeAsync(OpKernelContext* ctx, } Status stat = ExecuteTrtEngine(ctx, engine_context, trt_context_idx); if (!stat.ok()) { - LOG_WARNING_WITH_PREFIX << ""Failed to execute engine: "" << stat - << "" Retrying with native segment for "" << name(); + LOG_FIRST_FEW_WARNING_WITH_PREFIX << ""Failed to execute engine: "" << stat + << "" Retrying with native segment for "" + << name(); if (!may_execute_native_segment()) { return; } @@ -755,9 +760,10 @@ StatusOr> TRTEngineOp::BuildEngine( calibrator, &engine, use_calibration, use_implicit_batch_, nullptr, &cache_resource->profiles_); if (!status.ok()) { - LOG_WARNING_WITH_PREFIX << ""Engine creation for "" << name() << "" failed. "" - << ""The native segment will be used instead. "" - << ""Reason: "" << status; + LOG_FIRST_FEW_WARNING_WITH_PREFIX + << ""Engine creation for "" << name() << "" failed. "" + << ""The native segment will be used instead. "" + << ""Reason: "" << status; // Store an empty engine in the cache for these input shapes so we don't try // to build the same failing engine again. cache_resource->cache_.emplace(input_concrete_shapes, @@ -822,9 +828,9 @@ StatusOr> TRTEngineOp::GetEngine( FunctionDefToGraphDef(func_handle_, lib, &segment_graph_def_); } if (!status.ok()) { - LOG_WARNING_WITH_PREFIX << ""Getting segment graph for "" << name() - << "" failed. "" - << ""Reason: "" << status; + LOG_FIRST_FEW_WARNING_WITH_PREFIX << ""Getting segment graph for "" + << name() << "" failed. "" + << ""Reason: "" << status; } } auto result = BuildEngine(input_concrete_shapes, batch_size, @@ -883,7 +889,7 @@ StatusOr> TRTEngineOp::GetEngine( // If cache does not have a compatible engine then create a new engine. if (engine_contexts == nullptr) { if (!allow_build_at_runtime_) { - LOG_WARNING_WITH_PREFIX + LOG_FIRST_FEW_WARNING_WITH_PREFIX << ""Found no engine in cache matching input shapes. "" << ""Not building a new engine because "" << ""allow_build_at_runtime=False. "" ",0,train 484f0e5fd96c850c5a1ba87b8a6b8b23b11582e0,tensorflow/tensorflow,"Support folding TF::TransposeOp when perm is a constant instead of TF::ConstOp PiperOrigin-RevId: 328149666 Change-Id: I0c5561152383f12126ab9568c0facc4c3043c6a3",tf_ops_n_z.cc,"@@ -1939,11 +1939,9 @@ void TransposeOp::build(OpBuilder &builder, OperationState &result, Value x, namespace { OpFoldResult FoldIdentityTranspose(TransposeOp op) { - auto const_perm = dyn_cast_or_null(op.perm().getDefiningOp()); - if (!const_perm) return {}; - - auto const_value = const_perm.value(); - const auto elements = const_value.getValues(); + DenseIntElementsAttr perm; + if (!matchPattern(op.perm(), m_Constant(&perm))) return {}; + const auto elements = perm.getValues(); for (auto it : llvm::enumerate(elements)) { if (it.index() != it.value()) return {}; @@ -1966,14 +1964,14 @@ OpFoldResult FoldCancellableTranspose(TransposeOp op) { if (!transpose) return {}; // Permutations defined by constant operations. - auto perm0 = dyn_cast_or_null(op.perm().getDefiningOp()); - auto perm1 = dyn_cast_or_null(transpose.perm().getDefiningOp()); - if (!perm0 || !perm1) return {}; + DenseIntElementsAttr perm0; + DenseIntElementsAttr perm1; + if (!matchPattern(op.perm(), m_Constant(&perm0)) || + !matchPattern(transpose.perm(), m_Constant(&perm1))) + return {}; // With permutation indices that cancel each other - auto perm0_value = perm0.value().cast(); - auto perm1_value = perm1.value().cast(); - if (!AreCancellablePermutations(perm0_value, perm1_value)) return {}; + if (!AreCancellablePermutations(perm0, perm1)) return {}; return transpose.x(); } ",0,train d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D. PiperOrigin-RevId: 272023638",conv_powervr.cc,"@@ -274,9 +274,9 @@ std::string GenerateConvPowerVR1x1( if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) { if (op_def.precision == CalculationsPrecision::F32_F16) { c += "" src"" + id + "" = "" + - src_tensor.ReadAsFloat3D(""src_a_"" + id) + multiplier + "";\n""; + src_tensor.ReadAsFloat(""src_a_"" + id) + multiplier + "";\n""; } else { - c += "" src"" + id + "" = "" + src_tensor.Read3D(""src_a_"" + id) + + c += "" src"" + id + "" = "" + src_tensor.Read(""src_a_"" + id) + multiplier + "";\n""; } } ",0,train d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D. PiperOrigin-RevId: 272023638",conv_texture.cc,"@@ -151,10 +151,10 @@ std::string GenerateConvCode( } c += "" for (int s = 0; s < src_size.w; ++s) {\n""; if (is_image_buffer) { - c += "" FLT4 src0 = "" + src_tensor.Read3D(""addr_0"") + "";\n""; - c += "" FLT4 src1 = "" + src_tensor.Read3D(""addr_1"") + "";\n""; - c += "" FLT4 src2 = "" + src_tensor.Read3D(""addr_2"") + "";\n""; - c += "" FLT4 src3 = "" + src_tensor.Read3D(""addr_3"") + "";\n""; + c += "" FLT4 src0 = "" + src_tensor.Read(""addr_0"") + "";\n""; + c += "" FLT4 src1 = "" + src_tensor.Read(""addr_1"") + "";\n""; + c += "" FLT4 src2 = "" + src_tensor.Read(""addr_2"") + "";\n""; + c += "" FLT4 src3 = "" + src_tensor.Read(""addr_3"") + "";\n""; } std::string fc0 = ""(int2)(Z, "" + f_y + "")""; std::string fc1 = ""(int2)(Z + 1, "" + f_y + "")""; ",0,train d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D. PiperOrigin-RevId: 272023638",convolution_transposed_3x3_thin.cc,"@@ -103,10 +103,10 @@ std::string GenerateConvolutionTransposedCode( c += "" c1 = select(-1, c1, x_in);\n""; c += "" c2 = select(-1, c2, y_in);\n""; c += "" c3 = select(-1, c3, x_in && y_in);\n""; - c += "" FLT4 src0 = "" + src_tensor.Read3D(""c0"") + "";\n""; - c += "" FLT4 src1 = "" + src_tensor.Read3D(""c1"") + "";\n""; - c += "" FLT4 src2 = "" + src_tensor.Read3D(""c2"") + "";\n""; - c += "" FLT4 src3 = "" + src_tensor.Read3D(""c3"") + "";\n""; + c += "" FLT4 src0 = "" + src_tensor.Read(""c0"") + "";\n""; + c += "" FLT4 src1 = "" + src_tensor.Read(""c1"") + "";\n""; + c += "" FLT4 src2 = "" + src_tensor.Read(""c2"") + "";\n""; + c += "" FLT4 src3 = "" + src_tensor.Read(""c3"") + "";\n""; } else { const auto mode = GetFastestZeroMode(device); c += "" FLT4 src0 = "" + src_tensor.Read3D(""X"", ""Y"", z, mode) + "";\n""; ",0,train d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D. PiperOrigin-RevId: 272023638",max_unpooling.cc,"@@ -62,16 +62,15 @@ std::string GetMaxUnoolingKernelCode( code += "" FLT4 src = (FLT4)(0.0f);\n""; code += "" int4 ind = (int4)(0);\n""; code += "" if (!outside) {\n""; - code += - "" src = "" + src.Read3D(""src_adr"", TextureAddressMode::DONT_CARE) + - "";\n""; + code += "" src = "" + src.Read(""src_adr"", TextureAddressMode::DONT_CARE) + + "";\n""; code += "" ind = convert_int4("" + - src_ind.Read3D(""src_adr"", TextureAddressMode::DONT_CARE) + "");\n""; + src_ind.Read(""src_adr"", TextureAddressMode::DONT_CARE) + "");\n""; code += "" }\n""; } else { - code += "" FLT4 src = "" + src.Read3D(""src_adr"", address_mode) + "";\n""; + code += "" FLT4 src = "" + src.Read(""src_adr"", address_mode) + "";\n""; code += "" int4 ind = convert_int4("" + - src_ind.Read3D(""src_adr"", address_mode) + "");\n""; + src_ind.Read(""src_adr"", address_mode) + "");\n""; } code += "" int t_x = X - (src_x * stride.x - padding.x);\n""; code += "" int t_y = Y - (src_y * stride.y - padding.y);\n""; ",0,train d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D. PiperOrigin-RevId: 272023638",pooling.cc,"@@ -154,12 +154,11 @@ std::string GetMaxPoolingKernelCode( code += "" };\n""; code += "" }\n""; code += "" }\n""; - code += "" "" + dst_tensor.GetAddress(""address"", ""X"", ""Y"", ""Z"") + ""\n""; const LinkingContext context{""maximum"", ""X"", ""Y"", ""Z""}; code += PostProcess(linked_operations, context); - code += "" "" + dst_tensor.Write3D(""maximum"", ""address""); + code += "" "" + dst_tensor.Write3D(""maximum"", ""X"", ""Y"", ""Z""); if (output_indices) { - code += "" "" + indices_tensor.Write3D(""indexes"", ""address""); + code += "" "" + indices_tensor.Write3D(""indexes"", ""X"", ""Y"", ""Z""); } code += ""}\n""; ",0,train d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D. PiperOrigin-RevId: 272023638",softmax.cc,"@@ -61,10 +61,10 @@ std::string GetSoftmaxKernelCode( code += "" sum += dot(mask, exp(t));\n""; code += "" }\n""; code += "" for (int d = 0; d < size.w; ++d) {\n""; - code += "" "" + src_tensor.GetAddress(""address"", ""X"", ""Y"", ""d"") + ""\n""; - code += "" float4 t = "" + - src_tensor.ReadAsFloat3D(""address"", TextureAddressMode::DONT_CARE) + - "";\n""; + code += + "" float4 t = "" + + src_tensor.ReadAsFloat3D(""X"", ""Y"", ""d"", TextureAddressMode::DONT_CARE) + + "";\n""; code += "" t = exp(t) / sum;\n""; code += "" FLT4 result = TO_FLT4(t);\n""; const LinkingContext context{""result"", ""X"", ""Y"", ""d""}; ",0,train d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D. PiperOrigin-RevId: 272023638",softmax1x1.cc,"@@ -83,13 +83,13 @@ std::string GetSoftmaxKernelCode( code += "" do {\n""; code += "" int z = offset + tid;\n""; code += "" if (z < size.x) {\n""; - code += "" "" + dst_tensor.GetAddress(""address"", ""0"", ""0"", ""z"") + ""\n""; - code += "" FLT4 value = TO_FLT4(exp("" + - src_tensor.ReadAsFloat3D(""address"", TextureAddressMode::DONT_CARE) + - "") * sum);\n""; + code += + "" FLT4 value = TO_FLT4(exp("" + + src_tensor.ReadAsFloat3D(""0"", ""0"", ""z"", TextureAddressMode::DONT_CARE) + + "") * sum);\n""; const LinkingContext context{""value"", ""0"", ""0"", ""z""}; code += PostProcess(linked_operations, context); - code += "" "" + dst_tensor.Write3D(""value"", ""address""); + code += "" "" + dst_tensor.Write3D(""value"", ""0"", ""0"", ""z""); code += "" offset += 32;\n""; code += "" }\n""; code += "" s++;\n""; ",0,train d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D. PiperOrigin-RevId: 272023638",util.cc,"@@ -182,31 +182,21 @@ std::string TensorCodeGenerator::Read3D(const std::string& x, const std::string& y, const std::string& z, TextureAddressMode address_mode) const { - return Read3D(GetGlobalAddressNoDeclaration(x, y, z), address_mode); + return Read(GetGlobalAddressNoDeclaration(x, y, z), address_mode); } std::string TensorCodeGenerator::Read4D(const std::string& x, const std::string& y, const std::string& z, const std::string& b) const { - return Read3D(GetGlobalAddressNoDeclaration(x, y, z, b), - TextureAddressMode::DONT_CARE); + return Read(GetGlobalAddressNoDeclaration(x, y, z, b), + TextureAddressMode::DONT_CARE); } std::string TensorCodeGenerator::ReadAsFloat3D( const std::string& x, const std::string& y, const std::string& z, TextureAddressMode address_mode) const { - return ReadAsFloat3D(GetGlobalAddressNoDeclaration(x, y, z), address_mode); -} - -std::string TensorCodeGenerator::Read3D(const std::string& global_address, - TextureAddressMode address_mode) const { - return ReadGlobalFLT4(global_address, address_mode); -} - -std::string TensorCodeGenerator::ReadAsFloat3D( - const std::string& global_address, TextureAddressMode address_mode) const { - return ReadGlobalFloat4(global_address, address_mode); + return ReadAsFloat(GetGlobalAddressNoDeclaration(x, y, z), address_mode); } std::string TensorCodeGenerator::GetAddress(const std::string& var_name, @@ -265,12 +255,7 @@ std::string TensorCodeGenerator::Write3D(const std::string& var_name, const std::string& x, const std::string& y, const std::string& z) const { - return Write3D(var_name, GetGlobalAddressNoDeclaration(x, y, z)); -} - -std::string TensorCodeGenerator::Write3D( - const std::string& var_name, const std::string& global_address) const { - return WriteGlobalFLT4(var_name, global_address); + return Write(var_name, GetGlobalAddressNoDeclaration(x, y, z)); } std::string TensorCodeGenerator::Write4D(const std::string& var_name, @@ -278,11 +263,11 @@ std::string TensorCodeGenerator::Write4D(const std::string& var_name, const std::string& y, const std::string& z, const std::string& b) const { - return WriteGlobalFLT4(var_name, GetGlobalAddressNoDeclaration(x, y, z, b)); + return Write(var_name, GetGlobalAddressNoDeclaration(x, y, z, b)); } -std::string TensorCodeGenerator::ReadGlobalFLT4( - const std::string& global_address, TextureAddressMode address_mode) const { +std::string TensorCodeGenerator::Read(const std::string& global_address, + TextureAddressMode address_mode) const { switch (descriptor_.storage_type) { case TensorStorageType::BUFFER: return absl::StrCat(tensor_name_, ""["", global_address, ""]""); @@ -301,7 +286,7 @@ std::string TensorCodeGenerator::ReadGlobalFLT4( } } -std::string TensorCodeGenerator::ReadGlobalFloat4( +std::string TensorCodeGenerator::ReadAsFloat( const std::string& global_address, TextureAddressMode address_mode) const { switch (descriptor_.storage_type) { case TensorStorageType::BUFFER: @@ -322,7 +307,7 @@ std::string TensorCodeGenerator::ReadGlobalFloat4( } } -std::string TensorCodeGenerator::WriteGlobalFLT4( +std::string TensorCodeGenerator::Write( const std::string& var_name, const std::string& global_address) const { switch (descriptor_.storage_type) { case TensorStorageType::BUFFER: ",0,train d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D. PiperOrigin-RevId: 272023638",util.h,"@@ -87,16 +87,16 @@ class TensorCodeGenerator { const std::string& y, const std::string& z, const std::string& b) const; - std::string Read3D( + std::string Read( const std::string& global_address, TextureAddressMode address_mode = TextureAddressMode::ZERO) const; // Optimization for textures, so as in opencl we can use read_imagef for any // texture type. - std::string ReadAsFloat3D( + std::string ReadAsFloat( const std::string& global_address, TextureAddressMode address_mode = TextureAddressMode::ZERO) const; - std::string Write3D(const std::string& var_name, - const std::string& global_address) const; + std::string Write(const std::string& var_name, + const std::string& global_address) const; private: std::string GetGlobalAddressNoDeclaration(const std::string& x, @@ -107,15 +107,6 @@ class TensorCodeGenerator { const std::string& z, const std::string& b) const; - std::string ReadGlobalFLT4(const std::string& global_address, - TextureAddressMode address_mode) const; - - std::string ReadGlobalFloat4(const std::string& global_address, - TextureAddressMode address_mode) const; - - std::string WriteGlobalFLT4(const std::string& var_name, - const std::string& global_address) const; - std::string tensor_name_; std::string uniform_size_name_; TensorDescriptor descriptor_; ",0,train f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_allocator_retry.cc,"@@ -14,10 +14,10 @@ limitations under the License. ==============================================================================*/ #include ""tensorflow/core/common_runtime/gpu/gpu_allocator_retry.h"" +#include ""tensorflow/core/platform/env.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/mutex.h"" -#include ""tensorflow/core/platform/port.h"" -#include ""tensorflow/core/public/env.h"" +#include ""tensorflow/core/platform/types.h"" namespace tensorflow { ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_allocator_retry.h,"@@ -16,9 +16,9 @@ limitations under the License. #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ALLOCATOR_RETRY_H_ #define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ALLOCATOR_RETRY_H_ +#include ""tensorflow/core/platform/env.h"" #include ""tensorflow/core/platform/mutex.h"" -#include ""tensorflow/core/platform/port.h"" -#include ""tensorflow/core/public/env.h"" +#include ""tensorflow/core/platform/types.h"" namespace tensorflow { ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_allocator_retry_test.cc,"@@ -17,12 +17,12 @@ limitations under the License. #include #include ""tensorflow/core/lib/core/notification.h"" +#include ""tensorflow/core/platform/env.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/mutex.h"" -#include ""tensorflow/core/platform/port.h"" #include ""tensorflow/core/platform/test.h"" #include ""tensorflow/core/platform/thread_annotations.h"" -#include ""tensorflow/core/public/env.h"" +#include ""tensorflow/core/platform/types.h"" namespace tensorflow { namespace { ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_bfc_allocator.cc,"@@ -24,8 +24,8 @@ limitations under the License. #include ""tensorflow/core/lib/strings/strcat.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/mutex.h"" -#include ""tensorflow/core/platform/port.h"" #include ""tensorflow/core/platform/stream_executor.h"" +#include ""tensorflow/core/platform/types.h"" namespace gpu = ::perftools::gputools; ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_bfc_allocator.h,"@@ -27,9 +27,9 @@ limitations under the License. #include ""tensorflow/core/lib/strings/strcat.h"" #include ""tensorflow/core/platform/macros.h"" #include ""tensorflow/core/platform/mutex.h"" -#include ""tensorflow/core/platform/port.h"" #include ""tensorflow/core/platform/stream_executor.h"" #include ""tensorflow/core/platform/thread_annotations.h"" +#include ""tensorflow/core/platform/types.h"" namespace tensorflow { ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_bfc_allocator_test.cc,"@@ -24,9 +24,9 @@ limitations under the License. #include ""tensorflow/core/lib/gtl/inlined_vector.h"" #include ""tensorflow/core/lib/random/simple_philox.h"" #include ""tensorflow/core/platform/logging.h"" -#include ""tensorflow/core/platform/port.h"" #include ""tensorflow/core/platform/stream_executor.h"" #include ""tensorflow/core/platform/test.h"" +#include ""tensorflow/core/platform/types.h"" namespace gpu = ::perftools::gputools; ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_debug_allocator.h,"@@ -22,8 +22,8 @@ limitations under the License. #include ""tensorflow/core/common_runtime/gpu/visitable_allocator.h"" #include ""tensorflow/core/platform/macros.h"" -#include ""tensorflow/core/platform/port.h"" #include ""tensorflow/core/platform/stream_executor.h"" +#include ""tensorflow/core/platform/types.h"" namespace tensorflow { ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_debug_allocator_test.cc,"@@ -24,9 +24,9 @@ limitations under the License. #include ""tensorflow/core/common_runtime/gpu/gpu_init.h"" #include ""tensorflow/core/lib/gtl/inlined_vector.h"" #include ""tensorflow/core/platform/logging.h"" -#include ""tensorflow/core/platform/port.h"" #include ""tensorflow/core/platform/stream_executor.h"" #include ""tensorflow/core/platform/test.h"" +#include ""tensorflow/core/platform/types.h"" namespace gpu = ::perftools::gputools; ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_device.cc,"@@ -38,20 +38,20 @@ limitations under the License. #include ""tensorflow/core/framework/allocator.h"" #include ""tensorflow/core/framework/device_base.h"" #include ""tensorflow/core/framework/op_kernel.h"" +#include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/types.h"" #include ""tensorflow/core/graph/types.h"" +#include ""tensorflow/core/lib/core/status.h"" #include ""tensorflow/core/lib/gtl/stl_util.h"" #include ""tensorflow/core/lib/strings/numbers.h"" #include ""tensorflow/core/lib/strings/strcat.h"" #include ""tensorflow/core/platform/cuda.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/macros.h"" -#include ""tensorflow/core/platform/port.h"" #include ""tensorflow/core/platform/stream_executor.h"" #include ""tensorflow/core/platform/tracing.h"" +#include ""tensorflow/core/platform/types.h"" #include ""tensorflow/core/public/session_options.h"" -#include ""tensorflow/core/public/status.h"" -#include ""tensorflow/core/public/tensor.h"" #include ""tensorflow/core/util/device_name_utils.h"" namespace gpu = ::perftools::gputools; ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_device.h,"@@ -29,12 +29,12 @@ limitations under the License. #include ""tensorflow/core/framework/allocator.h"" #include ""tensorflow/core/framework/device_base.h"" #include ""tensorflow/core/framework/op_kernel.h"" +#include ""tensorflow/core/framework/tensor.h"" +#include ""tensorflow/core/lib/core/status.h"" #include ""tensorflow/core/platform/mutex.h"" -#include ""tensorflow/core/platform/port.h"" #include ""tensorflow/core/platform/stream_executor.h"" +#include ""tensorflow/core/platform/types.h"" #include ""tensorflow/core/public/session_options.h"" -#include ""tensorflow/core/public/status.h"" -#include ""tensorflow/core/public/tensor.h"" namespace tensorflow { ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_event_mgr.h,"@@ -18,15 +18,15 @@ limitations under the License. #include #include +#include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/tensor_reference.h"" #include ""tensorflow/core/lib/core/notification.h"" #include ""tensorflow/core/lib/core/threadpool.h"" #include ""tensorflow/core/lib/gtl/inlined_vector.h"" #include ""tensorflow/core/platform/mutex.h"" -#include ""tensorflow/core/platform/port.h"" #include ""tensorflow/core/platform/stream_executor.h"" #include ""tensorflow/core/platform/thread_annotations.h"" -#include ""tensorflow/core/public/tensor.h"" +#include ""tensorflow/core/platform/types.h"" namespace perftools { namespace gputools { ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_init.cc,"@@ -21,8 +21,8 @@ limitations under the License. #include ""tensorflow/core/lib/strings/numbers.h"" #include ""tensorflow/core/lib/strings/strcat.h"" #include ""tensorflow/core/platform/logging.h"" -#include ""tensorflow/core/platform/port.h"" #include ""tensorflow/core/platform/stream_executor.h"" +#include ""tensorflow/core/platform/types.h"" namespace gpu = ::perftools::gputools; ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_region_allocator.cc,"@@ -25,8 +25,8 @@ limitations under the License. #include ""tensorflow/core/lib/strings/strcat.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/mutex.h"" -#include ""tensorflow/core/platform/port.h"" #include ""tensorflow/core/platform/stream_executor.h"" +#include ""tensorflow/core/platform/types.h"" // If true, the CUDA gpu manager checks that all allocated memory // through the GPU memory pool implementation has been freed. ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_region_allocator.h,"@@ -26,9 +26,9 @@ limitations under the License. #include ""tensorflow/core/lib/strings/strcat.h"" #include ""tensorflow/core/platform/macros.h"" #include ""tensorflow/core/platform/mutex.h"" -#include ""tensorflow/core/platform/port.h"" #include ""tensorflow/core/platform/stream_executor.h"" #include ""tensorflow/core/platform/thread_annotations.h"" +#include ""tensorflow/core/platform/types.h"" namespace tensorflow { ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_region_allocator_test.cc,"@@ -23,9 +23,9 @@ limitations under the License. #include ""tensorflow/core/common_runtime/gpu/gpu_init.h"" #include ""tensorflow/core/lib/gtl/inlined_vector.h"" #include ""tensorflow/core/platform/logging.h"" -#include ""tensorflow/core/platform/port.h"" #include ""tensorflow/core/platform/stream_executor.h"" #include ""tensorflow/core/platform/test.h"" +#include ""tensorflow/core/platform/types.h"" namespace gpu = ::perftools::gputools; ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_stream_util.h,"@@ -19,7 +19,7 @@ limitations under the License. #include #include ""tensorflow/core/graph/graph.h"" -#include ""tensorflow/core/public/status.h"" +#include ""tensorflow/core/lib/core/status.h"" namespace tensorflow { namespace gpu_stream_util { ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_util.cc,"@@ -21,6 +21,7 @@ limitations under the License. #include ""tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"" #include ""tensorflow/core/common_runtime/gpu/process_state.h"" #include ""tensorflow/core/common_runtime/gpu_device_context.h"" +#include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/tensor_reference.h"" #include ""tensorflow/core/framework/types.h"" #include ""tensorflow/core/lib/core/errors.h"" @@ -34,7 +35,6 @@ limitations under the License. #include ""tensorflow/core/platform/stream_executor.h"" #include ""tensorflow/core/platform/tensor_coding.h"" #include ""tensorflow/core/platform/tracing.h"" -#include ""tensorflow/core/public/tensor.h"" #include ""tensorflow/core/util/util.h"" // If this need to be runtime configurable, consider adding options to ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_util.h,"@@ -18,9 +18,9 @@ limitations under the License. #include ""tensorflow/core/common_runtime/device.h"" #include ""tensorflow/core/common_runtime/dma_helper.h"" +#include ""tensorflow/core/framework/tensor.h"" +#include ""tensorflow/core/lib/core/status.h"" #include ""tensorflow/core/platform/stream_executor.h"" -#include ""tensorflow/core/public/status.h"" -#include ""tensorflow/core/public/tensor.h"" namespace tensorflow { ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",gpu_util_platform_specific.cc,"@@ -16,9 +16,9 @@ limitations under the License. #include ""tensorflow/core/common_runtime/gpu/gpu_util.h"" #include ""tensorflow/core/common_runtime/device.h"" #include ""tensorflow/core/common_runtime/gpu_device_context.h"" +#include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/types.h"" #include ""tensorflow/core/platform/stream_executor.h"" -#include ""tensorflow/core/public/tensor.h"" namespace tensorflow { ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",pool_allocator.cc,"@@ -24,7 +24,7 @@ limitations under the License. #include ""tensorflow/core/lib/strings/numbers.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/mutex.h"" -#include ""tensorflow/core/platform/port.h"" +#include ""tensorflow/core/platform/types.h"" namespace tensorflow { ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",pool_allocator.h,"@@ -30,8 +30,8 @@ limitations under the License. #include ""tensorflow/core/platform/macros.h"" #include ""tensorflow/core/platform/mem.h"" #include ""tensorflow/core/platform/mutex.h"" -#include ""tensorflow/core/platform/port.h"" #include ""tensorflow/core/platform/stream_executor.h"" +#include ""tensorflow/core/platform/types.h"" namespace tensorflow { ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",process_state.cc,"@@ -25,8 +25,8 @@ limitations under the License. #include ""tensorflow/core/lib/strings/strcat.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/mutex.h"" -#include ""tensorflow/core/platform/port.h"" #include ""tensorflow/core/platform/stream_executor.h"" +#include ""tensorflow/core/platform/types.h"" // If these flags need to be runtime configurable, consider adding // options to ConfigProto. ",0,test f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for tensorflow/core/ files and build targets. Change: 113080048",process_state.h,"@@ -22,8 +22,8 @@ limitations under the License. #include ""tensorflow/core/framework/allocator.h"" #include ""tensorflow/core/platform/mutex.h"" -#include ""tensorflow/core/platform/port.h"" #include ""tensorflow/core/platform/thread_annotations.h"" +#include ""tensorflow/core/platform/types.h"" namespace tensorflow { ",0,test d0819763b64c693b33b0b8ba454f80b30c9f0590,tensorflow/tensorflow,"Add test case for GitHub issue 53300. Signed-off-by: Yong Tang ",stack_op_test.py,"@@ -288,6 +288,16 @@ class StackOpTest(test.TestCase): c = array_ops.stack(xs) self.assertAllEqual(self.evaluate(c), data) + def testZeroDimUnmatch(self): + # Test case for GitHub issue 53300. + # Error message is `Shapes of all inputs must match` in eager mode, + # and `Shapes ...` in graph mode. Below is to capture both: + with self.assertRaisesRegex((errors.InvalidArgumentError, ValueError), + r""Shapes""): + with self.session(): + t = [array_ops.zeros([0, 3]), array_ops.zeros([1, 3])] + self.evaluate(array_ops.stack(t)) + class AutomaticStackingTest(test.TestCase): ",0,train 19673dfff5231471524cdcf257c0f5f5790696c4,tensorflow/tensorflow,"[RunHandler] Respect the operation timeout in `RunHandlerPool::Get()`. PiperOrigin-RevId: 289754304 Change-Id: I4be1bf1a2799899f27240de580779b83b627e976",direct_session.cc,"@@ -584,11 +584,20 @@ Status DirectSession::RunInternal( } } + const int64 call_timeout = run_options.timeout_in_ms() > 0 + ? run_options.timeout_in_ms() + : operation_timeout_in_ms_; + std::unique_ptr handler; if (ShouldUseRunHandlerPool(run_options) && run_options.experimental().use_run_handler_pool()) { VLOG(1) << ""Using RunHandler to scheduler inter-op closures.""; - handler = GetOrCreateRunHandlerPool(options_)->Get(step_id); + handler = GetOrCreateRunHandlerPool(options_)->Get(step_id, call_timeout); + if (!handler) { + return errors::DeadlineExceeded( + ""Could not obtain RunHandler for request after waiting for "", + call_timeout, ""ms.""); + } } auto* handler_ptr = handler.get(); @@ -607,9 +616,6 @@ Status DirectSession::RunInternal( } // Start parallel Executors. - const int64 call_timeout = run_options.timeout_in_ms() > 0 - ? run_options.timeout_in_ms() - : operation_timeout_in_ms_; const bool can_execute_synchronously = pool == nullptr && call_timeout == 0; Executor::Args args; ",0,test 19673dfff5231471524cdcf257c0f5f5790696c4,tensorflow/tensorflow,"[RunHandler] Respect the operation timeout in `RunHandlerPool::Get()`. PiperOrigin-RevId: 289754304 Change-Id: I4be1bf1a2799899f27240de580779b83b627e976",run_handler.cc,"@@ -879,7 +879,12 @@ class RunHandlerPool::Impl { return run_handler_thread_pool_.get(); } - std::unique_ptr Get(int64 step_id) LOCKS_EXCLUDED(mu_) { + bool has_free_handler() EXCLUSIVE_LOCKS_REQUIRED(mu_) { + return !free_handlers_.empty(); + } + + std::unique_ptr Get(int64 step_id, int64 timeout_in_ms) + LOCKS_EXCLUDED(mu_) { std::unique_ptr> thread_work_sources; uint64 version; @@ -894,8 +899,10 @@ class RunHandlerPool::Impl { ""#""); }, profiler::TraceMeLevel::kInfo); - while (free_handlers_.empty()) { - one_handler_free_.wait(l); + if (!mu_.AwaitWithDeadline( + Condition(this, &Impl::has_free_handler), + EnvTime::NowNanos() + timeout_in_ms * 1000 * 1000)) { + return nullptr; } } // Remove the last entry from free_handlers_ and add to the end of @@ -992,7 +999,6 @@ class RunHandlerPool::Impl { LogInfo(); } RecomputePoolStats(num_active_requests, version, *thread_work_sources); - one_handler_free_.notify_one(); } private: @@ -1022,7 +1028,6 @@ class RunHandlerPool::Impl { histogram::Histogram time_hist_ GUARDED_BY(mu_); int64 iterations_ GUARDED_BY(mu_); - condition_variable one_handler_free_; mutex mu_; int64 version_ GUARDED_BY(mu_); const std::vector sub_thread_pool_end_request_percentage_; @@ -1130,8 +1135,9 @@ RunHandlerPool::RunHandlerPool(int num_inter_op_threads, RunHandlerPool::~RunHandlerPool() {} -std::unique_ptr RunHandlerPool::Get(int64 step_id) { - return impl_->Get(step_id); +std::unique_ptr RunHandlerPool::Get(int64 step_id, + int64 timeout_in_ms) { + return impl_->Get(step_id, timeout_in_ms); } RunHandler::RunHandler(Impl* impl) : impl_(impl) {} ",0,test 19673dfff5231471524cdcf257c0f5f5790696c4,tensorflow/tensorflow,"[RunHandler] Respect the operation timeout in `RunHandlerPool::Get()`. PiperOrigin-RevId: 289754304 Change-Id: I4be1bf1a2799899f27240de580779b83b627e976",run_handler.h,"@@ -62,7 +62,7 @@ class RunHandlerPool { // unique_ptr is destroyed. // // Will block unless there is an inactive handler. - std::unique_ptr Get(int64 step_id = 0); + std::unique_ptr Get(int64 step_id = 0, int64 timeout_in_ms = 0); private: class Impl; ",0,test 6eff291a056d06f8c159485f81228f685b6f719c,tensorflow/tensorflow,"Use python tracer to control TraceMe in python launguage. This should have better performance than go through pybind11. PiperOrigin-RevId: 315547199 Change-Id: I64c4d9f5dce6a23fbeed7fcde10c7a8e839494a4",python_tracer.cc,"@@ -23,7 +23,6 @@ limitations under the License. #include ""tensorflow/core/profiler/profiler_options.pb.h"" #include ""tensorflow/core/profiler/protobuf/xplane.pb.h"" #include ""tensorflow/core/protobuf/config.pb.h"" -#include ""tensorflow/core/util/env_var.h"" #include ""tensorflow/python/profiler/internal/python_hooks.h"" namespace tensorflow { @@ -34,7 +33,8 @@ namespace { // the events to TraceMeRecorder. class PythonTracer : public ProfilerInterface { public: - explicit PythonTracer() = default; + explicit PythonTracer(const PythonHooksOptions& options) + : options_(options) {} ~PythonTracer() override; // Starts recording TraceMes. @@ -51,6 +51,7 @@ class PythonTracer : public ProfilerInterface { private: bool recording_ = false; + const PythonHooksOptions options_; TF_DISALLOW_COPY_AND_ASSIGN(PythonTracer); }; @@ -66,7 +67,7 @@ Status PythonTracer::Start() { } VLOG(1) << __FUNCTION__; recording_ = true; - PythonHooks::GetSingleton()->Start(); + PythonHooks::GetSingleton()->Start(options_); return Status::OK(); } @@ -75,7 +76,7 @@ Status PythonTracer::Stop() { return errors::Internal(""TraceMeRecorder not started""); } VLOG(1) << __FUNCTION__; - PythonHooks::GetSingleton()->Stop(); + PythonHooks::GetSingleton()->Stop(options_); recording_ = false; return Status::OK(); } @@ -105,18 +106,15 @@ Status PythonTracer::CollectData(XSpace* space) { // Not in anonymous namespace for testing purposes. std::unique_ptr CreatePythonTracer( const ProfileOptions& options) { - if (options.python_tracer_level() == 0) return nullptr; - // This ProfilerInterface rely on TraceMeRecorder to be active. - if (options.host_tracer_level() == 0) return nullptr; - return absl::make_unique(); + PythonHooksOptions pyhooks_options; + pyhooks_options.enable_trace_python_function = + options.python_tracer_level() && options.host_tracer_level(); + pyhooks_options.enable_python_traceme = options.host_tracer_level() != 0; + return absl::make_unique(pyhooks_options); } auto register_python_tracer_factory = [] { - bool enable; - TF_CHECK_OK(ReadBoolFromEnvVar(""TF_ENABLE_OSS_PYTHON_TRACER"", true, &enable)); - if (enable) { - RegisterProfilerFactory(&CreatePythonTracer); - } + RegisterProfilerFactory(&CreatePythonTracer); return 0; }(); ",0,test 6eff291a056d06f8c159485f81228f685b6f719c,tensorflow/tensorflow,"Use python tracer to control TraceMe in python launguage. This should have better performance than go through pybind11. PiperOrigin-RevId: 315547199 Change-Id: I64c4d9f5dce6a23fbeed7fcde10c7a8e839494a4",python_hooks.cc,"@@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include ""tensorflow/python/profiler/internal/python_hooks.h"" +#include ""absl/strings/string_view.h"" #include ""absl/strings/strip.h"" #include ""tensorflow/core/platform/path.h"" @@ -44,16 +45,30 @@ PythonHooks* PythonHooks::GetSingleton() { return singleton; } -void PythonHooks::Start() { - PyGILState_STATE gil_state = PyGILState_Ensure(); - SetProfilerInAllThreads(); - PyGILState_Release(gil_state); +void PythonHooks::Start(const PythonHooksOptions& option) { + if (option.enable_python_traceme || option.enable_trace_python_function) { + PyGILState_STATE gil_state = PyGILState_Ensure(); + if (option.enable_trace_python_function) { + SetProfilerInAllThreads(); + } + if (option.enable_python_traceme) { + EnableTraceMe(true); + } + PyGILState_Release(gil_state); + } } -void PythonHooks::Stop() { - PyGILState_STATE gil_state = PyGILState_Ensure(); - ClearProfilerInAllThreads(); - PyGILState_Release(gil_state); +void PythonHooks::Stop(const PythonHooksOptions& option) { + if (option.enable_python_traceme || option.enable_trace_python_function) { + PyGILState_STATE gil_state = PyGILState_Ensure(); + if (option.enable_trace_python_function) { + ClearProfilerInAllThreads(); + } + if (option.enable_python_traceme) { + EnableTraceMe(false); + } + PyGILState_Release(gil_state); + } } void PythonHooks::Finalize() { tracemes_.clear(); } @@ -180,5 +195,12 @@ void PythonHooks::ClearProfilerInAllThreads() { ThreadingSetProfile(py::none()); } +void PythonHooks::EnableTraceMe(bool enable) { + const char* kModuleName = + ""tensorflow.python.profiler.internal._pywrap_traceme""; + auto trace_module = py::module::import(kModuleName); + trace_module.attr(""enabled"") = enable; +} + } // namespace profiler } // namespace tensorflow ",0,test 6eff291a056d06f8c159485f81228f685b6f719c,tensorflow/tensorflow,"Use python tracer to control TraceMe in python launguage. This should have better performance than go through pybind11. PiperOrigin-RevId: 315547199 Change-Id: I64c4d9f5dce6a23fbeed7fcde10c7a8e839494a4",python_hooks.h,"@@ -30,19 +30,26 @@ namespace profiler { namespace py = ::pybind11; +struct PythonHooksOptions { + bool enable_trace_python_function = false; + bool enable_python_traceme = true; +}; + // Singleton for tracing python function calls. class PythonHooks { public: static PythonHooks* GetSingleton(); - void Start(); - void Stop(); + void Start(const PythonHooksOptions& option); + void Stop(const PythonHooksOptions& option); void Finalize(); void ProfileSlow(const py::object& frame, const string& event, const py::object& arg); void ProfileFast(PyFrameObject* frame, int what, PyObject* arg); private: + void EnableTraceMe(bool enable); + void SetProfilerInAllThreads(); void ClearProfilerInAllThreads(); ",0,test 6eff291a056d06f8c159485f81228f685b6f719c,tensorflow/tensorflow,"Use python tracer to control TraceMe in python launguage. This should have better performance than go through pybind11. PiperOrigin-RevId: 315547199 Change-Id: I64c4d9f5dce6a23fbeed7fcde10c7a8e839494a4",traceme_wrapper.cc,"@@ -23,8 +23,10 @@ namespace py = ::pybind11; using ::tensorflow::profiler::TraceMeWrapper; PYBIND11_MODULE(_pywrap_traceme, m) { + // This variable will be modified by PythonHooks::Start/Stop(). such + // arrangement will reduce the number of calls through pybind11. + m.attr(""enabled"") = py::bool_(false); py::class_(m, ""TraceMe"", py::module_local()) .def(py::init()) - .def(""SetMetadata"", &TraceMeWrapper::SetMetadata) - .def_static(""IsEnabled"", &TraceMeWrapper::IsEnabled); + .def(""SetMetadata"", &TraceMeWrapper::SetMetadata); }; ",0,test 6eff291a056d06f8c159485f81228f685b6f719c,tensorflow/tensorflow,"Use python tracer to control TraceMe in python launguage. This should have better performance than go through pybind11. PiperOrigin-RevId: 315547199 Change-Id: I64c4d9f5dce6a23fbeed7fcde10c7a8e839494a4",trace.py,"@@ -72,7 +72,7 @@ class Trace(object): The example above uses the keyword argument ""step_num"" to specify the training step being traced. """""" - if _pywrap_traceme.TraceMe.IsEnabled(): + if _pywrap_traceme.enabled: # Creating _pywrap_traceme.TraceMe starts the clock. self._traceme = _pywrap_traceme.TraceMe(name, **kwargs) else: ",0,test 2bccece3856992080f7902d4434c8df973901e99,tensorflow/tensorflow,"Add missing space in error message. Change: 135981721",tensor_shape.cc,"@@ -33,7 +33,7 @@ static void AppendTo(const TensorShape& s, gtl::InlinedVector* vals) { } void TensorShape::CheckDimsEqual(int NDIMS) const { - CHECK_EQ(NDIMS, dims()) << ""Asking for tensor of "" << NDIMS << ""dimensions"" + CHECK_EQ(NDIMS, dims()) << ""Asking for tensor of "" << NDIMS << "" dimensions"" << "" from a tensor of "" << dims() << "" dimensions""; } ",0,train 1427bfc12ec5a3a2c6a4ffd57fc5b465d3eedfae,tensorflow/tensorflow,"Update gradient_checker_v2 to use a step size in the finite difference approximation that is exactly representable as a binary floating point number. This is an old trick that in some cases avoids polluting the finite difference approximation with rounding errors that cause false negatives in gradient tests. PiperOrigin-RevId: 343348502 Change-Id: I3539ae7de7105177c5a1b9144b491f36369344f4",relu_op_test.py,"@@ -19,9 +19,7 @@ from __future__ import division from __future__ import print_function import numpy as np -from six.moves import xrange # pylint: disable=redefined-builtin -from tensorflow.python import tf2 from tensorflow.python.eager import backprop from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -29,7 +27,6 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import gradient_checker_v2 -from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables @@ -117,45 +114,19 @@ class ReluTest(test.TestCase): order=""F"") err = gradient_checker_v2.max_error(*gradient_checker_v2.compute_gradient( nn_ops.relu, [x], delta=1.0 / 1024)) - self.assertLess(err, 1e-4) + self.assertLess(err, 1e-6) - # The gradient for fp16 is inaccurate due to the low-precision. - # We compare the fp16 analytical gradient against their fp32 counterpart. + # The gradient test for ReLU is a bit tricky as the derivative is not well + # defined at around zero and we want to avoid that in terms of input values. def testGradientFloat16(self): - - def grad(x): - with backprop.GradientTape() as tape: - tape.watch(x) - y = nn_ops.l2_loss(nn_ops.relu(x)) - return tape.gradient(y, x) - - def f(): - with test_util.use_gpu(): - # Randomly construct a 1D shape from [1, 40) - shape = random_ops.random_uniform([1], - minval=1, - maxval=40, - dtype=dtypes.int32) - x32 = random_ops.random_uniform(shape, minval=-1, maxval=1) - x16 = math_ops.cast(x32, dtype=dtypes.float16) - return grad(x32), grad(x16) - - # We're going to ensure that the fp16 and fp32 gradients - # are ""close"" to each other for ~100 random values. - # - # In TensorFlow 1.x, invoking f() (without eager execution enabled) - # would construct a graph. Instead of construct a graph with O(100) nodes, - # we construct a single graph to be executed ~100 times in a Session. - if not tf2.enabled(): - d32_tensor, d16_tensor = f() - with self.cached_session() as sess: - f = lambda: sess.run([d32_tensor, d16_tensor]) - - # Repeat the experiment for 100 times. All tensor shapes and its tensor - # values are randomly generated for each run. - for _ in xrange(100): - d32, d16 = f() - self.assertAllClose(d32, d16, atol=3e-4) + with self.cached_session(): + x = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float16, + order=""F"") + err = gradient_checker_v2.max_error( + *gradient_checker_v2.compute_gradient(nn_ops.relu, [x])) + self.assertLess(err, 1e-6) def testGradientFloat64(self): with self.cached_session(): @@ -165,7 +136,7 @@ class ReluTest(test.TestCase): order=""F"") err = gradient_checker_v2.max_error(*gradient_checker_v2.compute_gradient( nn_ops.relu, [x], delta=1.0 / 1024)) - self.assertLess(err, 1e-10) + self.assertLess(err, 1e-15) def testGradGradFloat32(self): with self.cached_session(): ",0,train 1427bfc12ec5a3a2c6a4ffd57fc5b465d3eedfae,tensorflow/tensorflow,"Update gradient_checker_v2 to use a step size in the finite difference approximation that is exactly representable as a binary floating point number. This is an old trick that in some cases avoids polluting the finite difference approximation with rounding errors that cause false negatives in gradient tests. PiperOrigin-RevId: 343348502 Change-Id: I3539ae7de7105177c5a1b9144b491f36369344f4",gradient_checker_v2.py,"@@ -292,7 +292,7 @@ def _compute_gradient_list(f, xs, delta): @tf_export(""test.compute_gradient"", v1=[]) -def compute_gradient(f, x, delta=1e-3): +def compute_gradient(f, x, delta=None): """"""Computes the theoretical and numeric Jacobian of `f`. With y = f(x), computes the theoretical and numeric Jacobian dy/dx. @@ -329,6 +329,12 @@ def compute_gradient(f, x, delta=1e-3): raise ValueError( ""`x` must be a list or tuple of values convertible to a Tensor "" ""(arguments to `f`), not a %s"" % type(x)) + if delta is None: + # By default, we use a step size for the central finite difference + # approximation that is exactly representable as a binary floating + # point number, since this reduces the amount of noise due to rounding + # in the approximation of some functions. + delta = 1.0 / 1024 return _compute_gradient_list(f, x, delta) ",0,train 3f56b1402409ad4efb8dd931d5b1b7bdc713597e,tensorflow/tensorflow,"Log initialization and warmup time to proto results in benchmark tool. PiperOrigin-RevId: 172792563",graph_compiler.cc,"@@ -38,7 +38,6 @@ limitations under the License. #include ""tensorflow/core/graph/algorithm.h"" #include ""tensorflow/core/graph/graph_constructor.h"" #include ""tensorflow/core/graph/node_builder.h"" -#include ""tensorflow/core/lib/gtl/cleanup.h"" #include ""tensorflow/core/lib/hash/hash.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/public/version.h"" @@ -85,20 +84,9 @@ Status PrepareArguments(XlaOpKernelContext* ctx, Graph* graph, } } // namespace Status GraphCompiler::Compile() { - // Maintain a mapping from node id to node outputs. - using NodeOutputs = std::vector; - std::vector output_registry(graph_->num_node_ids()); - auto output_registry_cleanup = gtl::MakeCleanup([&output_registry] { - for (const NodeOutputs& outputs : output_registry) { - for (const TensorValue& value : outputs) { - CHECK(!value.is_ref()); - delete value.tensor; - } - } - }); - - // XLA requires determinism, generate a stable ordering from DFS. + OutputRegistry output_registry(graph_->num_node_ids()); std::vector topo_sorted_nodes; + // XLA requires determinism, generate a stable ordering from DFS. GetReversePostOrder(*graph_, &topo_sorted_nodes, /*stable_comparator=*/NodeComparatorName()); @@ -106,6 +94,7 @@ Status GraphCompiler::Compile() { PartiallySetupParams(¶ms); for (Node* n : topo_sorted_nodes) { + NodeOutputs node_outputs; OpKernel* op_kernel_raw = nullptr; Status s = flib_->CreateKernel(n->def(), &op_kernel_raw); // Transfer ownership of the kernel to a local smart pointer. @@ -133,9 +122,9 @@ Status GraphCompiler::Compile() { if (e->IsControlEdge()) continue; Node* src = e->src(); TF_RET_CHECK(src->id() < output_registry.size()); - const NodeOutputs& src_outputs = output_registry[src->id()]; + const NodeOutputs& outputs = output_registry[src->id()]; - tensor_inputs_[e->dst_input()] = src_outputs[e->src_output()]; + tensor_inputs_[e->dst_input()] = outputs.values[e->src_output()]; } OpKernelContext op_context(¶ms, n->num_outputs()); @@ -149,15 +138,15 @@ Status GraphCompiler::Compile() { // Set up outputs. Also check if outputs from the previous computation is // valid. - NodeOutputs& outputs = output_registry[n->id()]; - outputs.resize(n->num_outputs()); for (int o = 0; o < n->num_outputs(); ++o) { - outputs[o] = op_context.release_output(o); - if (*op_context.is_output_dead() || outputs[o].tensor == nullptr) { + const auto tensor_val = op_context.release_output(o); + if (*op_context.is_output_dead() || tensor_val.tensor == nullptr) { return errors::Internal(""Missing xla_context "", o, ""-th output from "", (*op_context.is_output_dead() ? ""(dead)"" : """"), SummarizeNode(*n)); } + // Set up outputs + output_registry[n->id()].values.push_back(tensor_val); } } return Status::OK(); ",0,train 3f56b1402409ad4efb8dd931d5b1b7bdc713597e,tensorflow/tensorflow,"Log initialization and warmup time to proto results in benchmark tool. PiperOrigin-RevId: 172792563",graph_compiler.h,"@@ -69,6 +69,23 @@ class GraphCompiler { Status Compile(); private: + // NodeOutputs is a wrapper over TensorValues that represents outputs of a + // node. + struct NodeOutputs { + ~NodeOutputs() { + for (auto& v : values) { + CHECK(!v.is_ref()); + delete v.tensor; + } + } + + // Output values of this node. + std::vector values; + }; + + // A mapping from node id to node output. + using OutputRegistry = std::vector; + // Partially sets params. This partially set params can be reused // across multple nodes visit. void PartiallySetupParams(OpKernelContext::Params* params); ",0,train 3f8dcd3e288f213001eace4aea0f22cfb1b65946,tensorflow/tensorflow,"Allow exceptions in code that will be staged. Consider all exceptions to be exiting the CFG, with no explicit support for exception-based control-flow. It may incidentally work to use exception-based control flow in code that is never staged. PiperOrigin-RevId: 232719435",side_effect_guards.py,"@@ -125,6 +125,10 @@ class SideEffectGuardTransformer(converter.Base): node.orelse = self._visit_and_reindent(node.orelse) return node + # TODO(b/123995141) Remove once ExceptionHandlers are in the CFG + def visit_ExceptHandler(self, node): + return node + def visit_Expr(self, node): self.generic_visit(node) if isinstance(node.value, gast.Call): ",0,train 3f8dcd3e288f213001eace4aea0f22cfb1b65946,tensorflow/tensorflow,"Allow exceptions in code that will be staged. Consider all exceptions to be exiting the CFG, with no explicit support for exception-based control-flow. It may incidentally work to use exception-based control flow in code that is never staged. PiperOrigin-RevId: 232719435",cfg.py,"@@ -393,6 +393,8 @@ class GraphBuilder(object): def _connect_jump_to_finally_sections(self, node): """"""Connects a jump node to the finally sections protecting it."""""" cursor = set((node,)) + if node not in self.finally_sections: + return cursor for guard_section_id in self.finally_sections[node]: guard_begin, guard_ends = self.finally_section_subgraphs[guard_section_id] self._connect_nodes(cursor, guard_begin) @@ -620,10 +622,10 @@ class AstToCfg(gast.NodeVisitor): leaving_node = self.lexical_scopes.pop() assert node == leaving_node - def _get_enclosing_scopes(self, include, stop_at): + def _get_enclosing_finally_scopes(self, stop_at): included = [] for node in reversed(self.lexical_scopes): - if isinstance(node, include): + if isinstance(node, gast.Try) and node.finalbody: included.append(node) if isinstance(node, stop_at): return node, included @@ -635,10 +637,8 @@ class AstToCfg(gast.NodeVisitor): def _process_exit_statement(self, node, *exits_nodes_of_type): # Note: this is safe because we process functions separately. - try_node, guards = self._get_enclosing_scopes( - include=(gast.Try,), - stop_at=tuple(exits_nodes_of_type), - ) + try_node, guards = self._get_enclosing_finally_scopes( + tuple(exits_nodes_of_type)) if try_node is None: raise ValueError( '%s that is not enclosed by any of %s' % (node, exits_nodes_of_type)) @@ -646,10 +646,8 @@ class AstToCfg(gast.NodeVisitor): def _process_continue_statement(self, node, *loops_to_nodes_of_type): # Note: this is safe because we process functions separately. - try_node, guards = self._get_enclosing_scopes( - include=(gast.Try,), - stop_at=tuple(loops_to_nodes_of_type), - ) + try_node, guards = self._get_enclosing_finally_scopes( + tuple(loops_to_nodes_of_type)) if try_node is None: raise ValueError('%s that is not enclosed by any of %s' % (node, loops_to_nodes_of_type)) @@ -698,10 +696,7 @@ class AstToCfg(gast.NodeVisitor): self._process_basic_statement(node) def visit_Raise(self, node): - try_node, guards = self._get_enclosing_scopes( - include=(gast.Try,), - stop_at=(gast.FunctionDef,), - ) + try_node, guards = self._get_enclosing_finally_scopes((gast.FunctionDef,)) if try_node is None: raise ValueError('%s that is not enclosed by any FunctionDef' % node) self.builder.add_error_node(node, guards) @@ -797,16 +792,13 @@ class AstToCfg(gast.NodeVisitor): for stmt in node.orelse: self.visit(stmt) - if node.handlers: - # TODO(mdan): Should we still support bare try/except? Might be confusing. - raise NotImplementedError('exceptions are not yet supported') - self._exit_lexical_scope(node) - self.builder.enter_finally_section(node) - for stmt in node.finalbody: - self.visit(stmt) - self.builder.exit_finally_section(node) + if node.finalbody: + self.builder.enter_finally_section(node) + for stmt in node.finalbody: + self.visit(stmt) + self.builder.exit_finally_section(node) def visit_With(self, node): # TODO(mdan): Mark the context manager's exit call as exit guard. ",0,train 3f8dcd3e288f213001eace4aea0f22cfb1b65946,tensorflow/tensorflow,"Allow exceptions in code that will be staged. Consider all exceptions to be exiting the CFG, with no explicit support for exception-based control-flow. It may incidentally work to use exception-based control flow in code that is never staged. PiperOrigin-RevId: 232719435",liveness.py,"@@ -219,6 +219,10 @@ class Annotator(transformer.Base): frozenset(self.current_analyzer.out[cfg_node])) return node + def visit_ExceptHandler(self, node): + # TODO(b/123995141) Add Exception Handlers to the CFG + return node + def resolve(node, source_info, graphs): """"""Resolves the live symbols at the exit of control flow statements. ",0,train 3f8dcd3e288f213001eace4aea0f22cfb1b65946,tensorflow/tensorflow,"Allow exceptions in code that will be staged. Consider all exceptions to be exiting the CFG, with no explicit support for exception-based control-flow. It may incidentally work to use exception-based control flow in code that is never staged. PiperOrigin-RevId: 232719435",reaching_definitions.py,"@@ -223,6 +223,10 @@ class TreeAnnotator(transformer.Base): def visit_global(self, node): raise NotImplementedError() + def visit_ExceptHandler(self, node): + # TODO(b/123995141) Add Exception Handlers to the CFG + return node + def visit_Name(self, node): if self.current_analyzer is None: # Names may appear outside function defs - for example in class @@ -232,7 +236,8 @@ class TreeAnnotator(transformer.Base): analyzer = self.current_analyzer cfg_node = self.current_cfg_node - assert cfg_node is not None, 'name node outside of any statement?' + assert cfg_node is not None, ('name node, %s, outside of any statement?' + % node.id) qn = anno.getanno(node, anno.Basic.QN) if isinstance(node.ctx, gast.Load): ",0,train 2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched. Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty. PiperOrigin-RevId: 368873433 Change-Id: Ie98b41206a134af693034215aeb902206398551e",cpu_device.cc,"@@ -51,7 +51,8 @@ StatusOr> GetCpuClient(bool asynchronous) { TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor, platform->GetExecutor(config)); auto device_state = absl::make_unique( - executor, client, LocalDeviceState::kSynchronous, asynchronous, + executor, client, LocalDeviceState::kSynchronous, + /*max_inflight_computations=*/32, /*allow_event_reuse=*/false, /*use_callback_stream=*/false); auto device = absl::make_unique(i, std::move(device_state)); devices.push_back(std::move(device)); ",0,train 2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched. Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty. PiperOrigin-RevId: 368873433 Change-Id: Ie98b41206a134af693034215aeb902206398551e",gpu_device.cc,"@@ -212,7 +212,7 @@ StatusOr>> BuildLocalDeviceStates( xla_client->backend().stream_executor(i).ValueOrDie(); addressable_devices.push_back(absl::make_unique( executor, xla_client, LocalDeviceState::kComputeSynchronized, - asynchronous, + /*max_inflight_computations=*/32, /*allow_event_reuse=*/true, /*use_callback_stream=*/true)); } return std::move(addressable_devices); ",0,train 2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched. Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty. PiperOrigin-RevId: 368873433 Change-Id: Ie98b41206a134af693034215aeb902206398551e",interpreter_device.cc,"@@ -45,7 +45,8 @@ StatusOr> GetInterpreterClient() { se::StreamExecutor* executor = client->backend().stream_executor(0).ValueOrDie(); auto device_state = absl::make_unique( - executor, client, LocalDeviceState::kSynchronous, /*asynchronous=*/false, + executor, client, LocalDeviceState::kSynchronous, + /*max_inflight_computations=*/1, /*allow_event_reuse=*/false, /*use_callback_stream=*/false); auto device = absl::make_unique(0, std::move(device_state)); ",0,train 2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched. Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty. PiperOrigin-RevId: 368873433 Change-Id: Ie98b41206a134af693034215aeb902206398551e",local_device_state.cc,"@@ -31,11 +31,13 @@ namespace xla { LocalDeviceState::LocalDeviceState(se::StreamExecutor* executor, LocalClient* client, AllocationModel allocation_model, - bool asynchronous, bool allow_event_reuse, + int max_inflight_computations, + bool allow_event_reuse, bool use_callback_stream) : allocation_model_(allocation_model), event_pool_(allow_event_reuse), - compute_semaphore_(/*capacity=*/asynchronous ? 32 : 1), + compute_semaphore_( + /*capacity=*/max_inflight_computations), executor_(executor), client_(client), prng_seed_generator_(prng_seed_device_()), ",0,train 2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched. Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty. PiperOrigin-RevId: 368873433 Change-Id: Ie98b41206a134af693034215aeb902206398551e",local_device_state.h,"@@ -89,8 +89,9 @@ class LocalDeviceState { // If asynchronous is false, the host will synchronize to the device after // each execution or transfer. This is intended for debugging only. LocalDeviceState(se::StreamExecutor* executor, LocalClient* client, - AllocationModel allocation_model, bool asynchronous, - bool allow_event_reuse, bool use_callback_stream); + AllocationModel allocation_model, + int max_inflight_computations, bool allow_event_reuse, + bool use_callback_stream); virtual ~LocalDeviceState(); se::StreamExecutor* executor() const { return executor_; } ",0,train 2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched. Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty. PiperOrigin-RevId: 368873433 Change-Id: Ie98b41206a134af693034215aeb902206398551e",tpu_client.cc,"@@ -49,7 +49,7 @@ namespace { class TpuDeviceState : public LocalDeviceState { public: TpuDeviceState(se::StreamExecutor* executor, LocalClient* client, - bool asynchronous); + int max_inflight_computations); Status ThenMemcpyDeviceToDevice(se::Stream* transfer_stream, se::Stream* dst_stream, @@ -58,9 +58,10 @@ class TpuDeviceState : public LocalDeviceState { }; TpuDeviceState::TpuDeviceState(se::StreamExecutor* executor, - LocalClient* client, bool asynchronous) + LocalClient* client, + int max_inflight_computations) : LocalDeviceState(executor, client, LocalDeviceState::kAsynchronous, - asynchronous, + max_inflight_computations, /*allow_event_reuse=*/false, /*use_callback_stream=*/true) {} @@ -194,7 +195,7 @@ StatusOr>> GetTpuDevices( } // namespace StatusOr> GetTpuClient( - bool asynchronous, absl::Duration init_retry_timeout) { + int max_inflight_computations, absl::Duration init_retry_timeout) { tf_tpu::TpuPlatformInterface* platform = tf_tpu::TpuPlatformInterface::GetRegisteredPlatform( /*initialize_platform=*/true, /*num_tries=*/1); @@ -230,8 +231,8 @@ StatusOr> GetTpuClient( for (int i = 0; i < client->device_count(); ++i) { se::StreamExecutor* executor = client->backend().stream_executor(i).ValueOrDie(); - local_device_states.push_back( - absl::make_unique(executor, client, asynchronous)); + local_device_states.push_back(absl::make_unique( + executor, client, max_inflight_computations)); } TF_ASSIGN_OR_RETURN(auto devices, ",0,train 2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched. Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty. PiperOrigin-RevId: 368873433 Change-Id: Ie98b41206a134af693034215aeb902206398551e",tpu_client.h,"@@ -53,7 +53,7 @@ class PjRtTpuDevice : public PjRtStreamExecutorDevice { }; StatusOr> GetTpuClient( - bool asynchronous, + int max_inflight_computations, absl::Duration init_retry_timeout = absl::ZeroDuration()); } // namespace xla ",0,train 2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched. Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty. PiperOrigin-RevId: 368873433 Change-Id: Ie98b41206a134af693034215aeb902206398551e",outfeed_receiver_test.cc,"@@ -90,7 +90,8 @@ StatusOr> GetCpuClientWithNonLocalDevice() { TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor, platform->GetExecutor(config)); auto device_state = absl::make_unique( - executor, client, LocalDeviceState::kSynchronous, /*asynchronous=*/true, + executor, client, LocalDeviceState::kSynchronous, + /*max_inflight_computations=*/32, /*allow_event_reuse=*/false, /*use_callback_stream=*/false); std::vector> devices; ",0,train 2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched. Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty. PiperOrigin-RevId: 368873433 Change-Id: Ie98b41206a134af693034215aeb902206398551e",xla.cc,"@@ -262,12 +262,12 @@ PYBIND11_MODULE(xla_extension, m) { py::arg(""distributed_client"") = nullptr, py::arg(""node_id"") = 0); m.def( ""get_tpu_client"", - [](bool asynchronous) -> StatusOr> { + [](int max_inflight_computations) -> StatusOr> { TF_ASSIGN_OR_RETURN(std::shared_ptr client, - GetTpuClient(asynchronous)); + GetTpuClient(max_inflight_computations)); return std::make_shared(std::move(client)); }, - py::arg(""asynchronous"") = true); + py::arg(""max_inflight_computations"") = 32); TF_CHECK_OK(PyBuffer::RegisterTypes(m)); ",0,train 2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched. Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty. PiperOrigin-RevId: 368873433 Change-Id: Ie98b41206a134af693034215aeb902206398551e",xla_client.py,"@@ -96,7 +96,7 @@ def _gpu_backend_factory(distributed_client=None, node_id=0): def _tpu_backend_factory(): - return _xla.get_tpu_client(asynchronous=True) + return _xla.get_tpu_client(max_inflight_computations=32) # Backend factories, keyed by user-visible name, in increasing priority order. ",0,train 5b04fe0d14fef00df44a53b8e5dcd8fa4705a6f9,tensorflow/tensorflow,"Add a few more transpose benchmarks. PiperOrigin-RevId: 407128419 Change-Id: Ie44a8fccf0fb81a56478a7fe7664c134a4b01d91",transpose_test.cc,"@@ -438,12 +438,34 @@ static std::vector BenchmarkCases() { return std::vector{ TransposeTestCase(/*dims=*/{256, 256}, /*permutation=*/{1, 0}), + TransposeTestCase(/*dims=*/{512, 512}, + /*permutation=*/{1, 0}), + TransposeTestCase(/*dims=*/{1024, 1024}, + /*permutation=*/{1, 0}), + TransposeTestCase(/*dims=*/{256, 256, 256}, + /*permutation=*/{0, 2, 1}), + TransposeTestCase(/*dims=*/{256, 256, 256}, + /*permutation=*/{1, 0, 2}), + TransposeTestCase(/*dims=*/{256, 256, 256}, + /*permutation=*/{1, 2, 0}), + TransposeTestCase(/*dims=*/{256, 256, 256}, + /*permutation=*/{2, 0, 1}), + TransposeTestCase(/*dims=*/{256, 256, 256}, + /*permutation=*/{2, 1, 0}), + TransposeTestCase(/*dims=*/{512, 512, 512}, + /*permutation=*/{0, 2, 1}), + TransposeTestCase(/*dims=*/{512, 512, 512}, + /*permutation=*/{1, 0, 2}), + TransposeTestCase(/*dims=*/{512, 512, 512}, + /*permutation=*/{1, 2, 0}), + TransposeTestCase(/*dims=*/{512, 512, 512}, + /*permutation=*/{2, 0, 1}), + TransposeTestCase(/*dims=*/{512, 512, 512}, + /*permutation=*/{2, 1, 0}), TransposeTestCase(/*dims=*/{64, 224, 224, 3}, /*permutation=*/{1, 2, 3, 0}), TransposeTestCase(/*dims=*/{256, 64, 64, 3}, /*permutation=*/{1, 3, 2, 0}), - TransposeTestCase(/*dims=*/{1024, 1024}, - /*permutation=*/{1, 0}), }; } @@ -488,7 +510,6 @@ void BM_Transpose(const TransposeTestCase& bm, int parallelism, plan->Execute(input.data(), output.data(), [&](std::function fn) { threadpool.Schedule(std::move(fn)); }); - tensorflow::testing::DoNotOptimize(output); } } @@ -515,9 +536,10 @@ static void* benchmarks = []() { for (const auto& benchmark_case : benchmark_cases) { for (const auto& variant : variants) { for (int num_threads : std::get<2>(variant)) { - std::string name = absl::StrCat( - std::get<0>(variant), ""_"", absl::StrJoin(benchmark_case.dims, ""_""), - ""_perm_"", absl::StrJoin(benchmark_case.permutation, ""_"")); + std::string name = + absl::StrCat(std::get<0>(variant), ""_threads_"", num_threads, ""_"", + absl::StrJoin(benchmark_case.dims, ""_""), ""_perm_"", + absl::StrJoin(benchmark_case.permutation, ""_"")); TransposeTestCase testcase = benchmark_case; BenchmarkFn fn = std::get<1>(variant); ",0,train 7483a659271621e79ca13867a6268aedac0e87f9,tensorflow/tensorflow,"[XLA] Fix Broadcast implementation in HloEvaluator to handle the special case of scalar broadcast to be consistent with other backends. Also add a test for scalar broadcast. PiperOrigin-RevId: 164781786",hlo_evaluator.cc,"@@ -177,6 +177,29 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { parent_->GetEvaluatedLiteralFor(broadcast->operand(0)); std::vector broadcast_indices( ShapeUtil::Rank(broadcast->operand(0)->shape()), 0); + + // Special case for broadcasting scalars: ignore broadcast dimension and + // broadcast to whatever the output dimension is. + // TODO(b/64533549): Remove the need of this once this bug is resolved. + if (ShapeUtil::IsScalar(operand_to_broadcast.shape())) { + return output->Populate( + [&](tensorflow::gtl::ArraySlice multi_index) { + return operand_to_broadcast.Get({}); + }); + } + + TF_RET_CHECK(broadcast->dimensions().size() == + ShapeUtil::Rank(operand_to_broadcast.shape())) + << ""broadcast dimensions is of size: "" << broadcast->dimensions().size() + << "" and rank of operand_to_broadcast is: "" + << ShapeUtil::Rank(operand_to_broadcast.shape()); + // Checks that operand's dimensions are the same as the broadcast's + // dimensions along the dimensions to be broadcasted. + for (int64 i = 0; i < broadcast->dimensions().size(); ++i) { + TF_RET_CHECK(broadcast->shape().dimensions(broadcast->dimensions(i)) == + operand_to_broadcast.shape().dimensions(i)); + } + return output->Populate( [&](tensorflow::gtl::ArraySlice multi_index) { for (int64 i = 0; i < broadcast->dimensions().size(); ++i) { @@ -184,7 +207,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { } return operand_to_broadcast.Get(broadcast_indices); }); - } + }; Status HandleCeil(HloInstruction* ceil, HloInstruction* operand) override { TF_ASSIGN_OR_RETURN(parent_->evaluated_[ceil], ",0,train 7483a659271621e79ca13867a6268aedac0e87f9,tensorflow/tensorflow,"[XLA] Fix Broadcast implementation in HloEvaluator to handle the special case of scalar broadcast to be consistent with other backends. Also add a test for scalar broadcast. PiperOrigin-RevId: 164781786",hlo_evaluator_test.cc,"@@ -311,6 +311,27 @@ TEST_F(HloEvaluatorTest, DoesBroadcast) { LiteralTestUtil::ExpectEqual(*result, *output_literal); } +TEST_F(HloEvaluatorTest, DoesBroadcastScalar) { + HloComputation::Builder b(TestName()); + auto input_literal = Literal::CreateR0(111); + auto output_literal = Literal::CreateR2( + {{111, 111}, {111, 111}, {111, 111}, {111, 111}, {111, 111}, {111, 111}}); + + HloInstruction* literal_instruction = b.AddInstruction( + HloInstruction::CreateConstant(std::move(input_literal))); + // Broadcast dimension is ignored in the case of scalars. + b.AddInstruction(HloInstruction::CreateBroadcast( + output_literal->shape(), literal_instruction, + /*broadcast_dimensions=*/{1})); + HloModule module(TestName()); + auto computation = module.AddEntryComputation(b.Build()); + + std::unique_ptr result = + evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie(); + + LiteralTestUtil::ExpectEqual(*result, *output_literal); +} + TEST_F(HloEvaluatorTest, ConvertWithSameLayout) { HloComputation::Builder b(TestName()); ",0,train 563a0184e11c1b960853a66532fc4780af828333,tensorflow/tensorflow,"Legalize TensorFlow XlaGather and CollectivePermute ops to HLO XlaGather requires constant slice_sizes operand and CollectivePermute requires constant source_target_pairs operand as these are attributes in the corresponding MHLO dialect ops. PiperOrigin-RevId: 326960529 Change-Id: I0a7c2eaa81b39c0f01993b1d789c678157b55a9a",legalize_tf.cc,"@@ -50,6 +50,7 @@ limitations under the License. #include ""tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h"" #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"" #include ""tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.h"" +#include ""tensorflow/compiler/mlir/xla/attribute_importer.h"" #include ""tensorflow/compiler/mlir/xla/transforms/passes.h"" #include ""tensorflow/compiler/xla/client/lib/conv_grad_size_util.h"" #include ""tensorflow/compiler/xla/client/padding.h"" @@ -1065,6 +1066,21 @@ static void BuildSortComparisonBody(llvm::ArrayRef element_types, builder->create(loc, compare); } +//===----------------------------------------------------------------------===// +// XlaGather op utilities. +//===----------------------------------------------------------------------===// + +bool HasValidGatherDims(StringAttr attr) { + ::xla::GatherDimensionNumbers dims; + return dims.ParseFromString(attr.getValue().str()); +} + +GatherDimensionNumbers GetGatherDimNumsAttr(StringAttr attr, Builder *builder) { + ::xla::GatherDimensionNumbers dims; + if (!dims.ParseFromString(attr.getValue().str())) return {}; + return ::xla::ConvertGatherDimensionNumbers(dims, builder); +} + //===----------------------------------------------------------------------===// // Op converters. //===----------------------------------------------------------------------===// ",0,train 43a7963cd58f04d0ceac097a859c51d3f760db8b,tensorflow/tensorflow,"Put the dynamic libraries into the platlib (platform specific) instead of the purelib (default). This allows auditwheel to audit the wheel without errors. PiperOrigin-RevId: 251828232",setup.py,"@@ -139,6 +139,7 @@ class InstallCommand(InstallCommandBase): ret = InstallCommandBase.finalize_options(self) self.install_headers = os.path.join(self.install_purelib, 'tensorflow', 'include') + self.install_lib = self.install_platlib return ret ",0,train 9b44cbafe53d1409e6d7d1086284109824495abf,tensorflow/tensorflow,"Add logging to indicate which checkpoint is being restored. Change: 145817772",saver.py,"@@ -1435,6 +1435,7 @@ class Saver(object): """""" if self._is_empty: return + logging.info(""Restoring parameters from %s"", save_path) sess.run(self.saver_def.restore_op_name, {self.saver_def.filename_tensor_name: save_path}) ",0,test cc9bdb70b88c76f293f27e29e91cb7739ba3fdc4,tensorflow/tensorflow,"Optimizing code and adding from http to https (#15714) * Removing extra space, preventing double declared ""if"" statement and from http to https * Optimizing code for reducing if statement * Replacing in above line * Reverting my changes back for 'else'. * Reverting back changes as discussed * ""No new line at end of file"" indication of lint.",while_op.cc,"@@ -39,7 +39,7 @@ Status MakeXlaCompilerArgumentsFromInputs( *has_uninitialized_vars = false; *has_tensor_arrays = false; for (int i = 0; i < ctx->num_inputs(); ++i) { - VLOG(2) << "" Input "" << i + VLOG(2) << "" Input "" << i << "" type: "" << DataTypeString(ctx->input_type(i)) << "" shape: "" << ctx->InputShape(i).DebugString(); XlaCompiler::Argument& arg = (*args)[i]; ",0,test cc9bdb70b88c76f293f27e29e91cb7739ba3fdc4,tensorflow/tensorflow,"Optimizing code and adding from http to https (#15714) * Removing extra space, preventing double declared ""if"" statement and from http to https * Optimizing code for reducing if statement * Replacing in above line * Reverting my changes back for 'else'. * Reverting back changes as discussed * ""No new line at end of file"" indication of lint.",model.cc,"@@ -80,8 +80,7 @@ FlatBufferModel::FlatBufferModel(const char* filename, bool mmap_file, } else { allocation_ = new FileCopyAllocation(filename, error_reporter); } - if (!allocation_->valid()) return; - if (!CheckModelIdentifier()) return; + if (!allocation_->valid() || !CheckModelIdentifier()) return; model_ = VerifyAndGetModel(allocation_->base(), allocation_->bytes()); } ",0,test c28ca27b96b3a141922523c005c71af51cc61906,tensorflow/tensorflow,"Fixing a TensorFlow control flow bug. Calling `nest.map_structure` with a lambda that does not return (i.e. only for its side-effect) will fail on structures that contain composite tensors because the `map_structure` implementation will try to reconstruct the composite tensors from the return values of the lambda, which will be None. PiperOrigin-RevId: 284197904 Change-Id: I9b3e43bbd28712281839eaf77b2e4280db7c585c",control_flow_ops.py,"@@ -749,10 +749,10 @@ class ControlFlowContext(object): def ExitResult(self, result): """"""Make a list of tensors available in the outer context."""""" if self._outer_context: - nest.map_structure( - lambda x: self._outer_context.AddName(x.name), - result, - expand_composites=True) + def fn(x): + self._outer_context.AddName(x.name) + return x + nest.map_structure(fn, result, expand_composites=True) def GetWhileContext(self): """"""Return the while context containing this context."""""" ",0,train 7687debbf63e31375d960d663373da8d469f2d2e,tensorflow/tensorflow,"Add unit-test for questions: - http://stackoverflow.com/q/45109305 - #10766 PiperOrigin-RevId: 162026912",mvn_diag_test.py,"@@ -24,7 +24,12 @@ from tensorflow.contrib import distributions from tensorflow.contrib.distributions.python.ops import bijectors from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -233,6 +238,43 @@ class MultivariateNormalDiagTest(test.TestCase): self.assertAllClose(mu, samps.mean(axis=0), atol=0.1) self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.1) + def testMultivariateNormalDiagNegLogLikelihood(self): + num_draws = 50 + dims = 3 + with self.test_session() as sess: + x_pl = array_ops.placeholder(dtype=dtypes.float32, + shape=[None, dims], + name=""x"") + mu_var = variable_scope.get_variable( + name=""mu"", + shape=[dims], + dtype=dtypes.float32, + initializer=init_ops.constant_initializer(1.)) + sess.run([variables.global_variables_initializer()]) + + mvn = ds.MultivariateNormalDiag( + loc=mu_var, + scale_diag=array_ops.ones(shape=[dims], dtype=dtypes.float32)) + + # Typically you'd use `mvn.log_prob(x_pl)` which is always at least as + # numerically stable as `tf.log(mvn.prob(x_pl))`. However in this test + # we're testing a bug specific to `prob` and not `log_prob`; + # http://stackoverflow.com/q/45109305. (The underlying issue was not + # related to `Distributions` but that `reduce_prod` didn't correctly + # handle negative indexes.) + neg_log_likelihood = -math_ops.reduce_sum(math_ops.log(mvn.prob(x_pl))) + grad_neg_log_likelihood = gradients_impl.gradients( + neg_log_likelihood, variables.trainable_variables()) + + x = np.zeros([num_draws, dims], dtype=np.float32) + grad_neg_log_likelihood_ = sess.run( + grad_neg_log_likelihood, + feed_dict={x_pl: x}) + self.assertEqual(1, len(grad_neg_log_likelihood_)) + self.assertAllClose(grad_neg_log_likelihood_[0], + np.tile(num_draws, dims), + rtol=1e-6, atol=0.) + if __name__ == ""__main__"": test.main() ",0,test bea42a1b32d7faf2effcafc5ef1d6d1e4436ae31,tensorflow/tensorflow,"allow metadata tf_op override level0's tf_op. PiperOrigin-RevId: 347040091 Change-Id: I13bed86c9c5b5acd7d95a3c4db96a279755f6a76",cupti_collector.cc,"@@ -224,27 +224,25 @@ struct PerDeviceCollector { std::vector annotation_stack = ParseAnnotationStack(event.annotation); + if (!annotation_stack.empty()) { + xevent.AddStatValue( + *plane->GetOrCreateStatMetadata(GetStatTypeStr(StatType::kTfOp)), + *plane->GetOrCreateStatMetadata(annotation_stack.begin()->name)); + } // If multiple metadata have the same key name, show the values from the top // of the stack (innermost annotation). Concatenate the values from // ""hlo_op"". absl::flat_hash_set key_set; - std::vector hlo_op_names; + for (auto annotation = annotation_stack.rbegin(); annotation != annotation_stack.rend(); ++annotation) { for (const Annotation::Metadata& metadata : annotation->metadata) { - if (metadata.key == ""tf_op"") { - continue; // ignored, obtained from HLO proto via DebugInfoMap - } else if (key_set.insert(metadata.key).second) { + if (key_set.insert(metadata.key).second) { xevent.ParseAndAddStatValue( *plane->GetOrCreateStatMetadata(metadata.key), metadata.value); } } } - if (!annotation_stack.empty()) { - xevent.AddStatValue( - *plane->GetOrCreateStatMetadata(GetStatTypeStr(StatType::kTfOp)), - *plane->GetOrCreateStatMetadata(annotation_stack.begin()->name)); - } } absl::optional GetDeviceAttribute(CUdevice device, ",0,test bea42a1b32d7faf2effcafc5ef1d6d1e4436ae31,tensorflow/tensorflow,"allow metadata tf_op override level0's tf_op. PiperOrigin-RevId: 347040091 Change-Id: I13bed86c9c5b5acd7d95a3c4db96a279755f6a76",traceme_encode.h,"@@ -133,6 +133,12 @@ TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeOp( absl::string_view op_name, absl::string_view op_type) { return absl::StrCat(op_name, "":"", op_type); } + +TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeOp(const char* op_name, + const char* op_type) { + return absl::StrCat(op_name, "":"", op_type); +} + TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeOp( std::string&& op_name, absl::string_view op_type) { absl::StrAppend(&op_name, "":"", op_type); ",0,test f8c008aa1833eab6c5ef4523e1bff2f2769c8ac0,tensorflow/tensorflow,Further fixes to test case,check_ops_test.py,"@@ -240,44 +240,6 @@ First 2 elements of y: out = array_ops.identity(larry) self.evaluate(out) - def test_error_message_eager(self): - expected_error_msg_full = r""""""Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'This is the error message.' -b'Condition x != y did not hold for every single element:' -b'x (shape=(2, 3) dtype=float32) = ' -0.0, 1.0, 2.0, 3.0, 4.0, 5.0 -b'y (shape=(2, 3) dtype=float32) = ' -0.0, 1.0, 2.0, 3.0, 4.0, 5.0"""""" - expected_error_msg_default = r""""""Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'This is the error message.' -b'Condition x != y did not hold for every single element:' -b'x (shape=(2, 3) dtype=float32) = ' -0.0, 1.0, 2.0, ... -b'y (shape=(2, 3) dtype=float32) = ' -0.0, 1.0, 2.0, ..."""""" - expected_error_msg_short = r""""""Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'This is the error message.' -b'Condition x != y did not hold for every single element:' -b'x (shape=(2, 3) dtype=float32) = ' -0.0, 1.0, ... -b'y (shape=(2, 3) dtype=float32) = ' -0.0, 1.0, ..."""""" - with context.eager_mode(): - t = constant_op.constant(np.array(range(6)), shape=[2,3], dtype=np.float32) - with self.assertRaisesRegexp(errors.InvalidArgumentError, - expected_error_msg_full): - check_ops.assert_none_equal(t, t, message=""This is the error message."", - summarize=10) - with self.assertRaisesRegexp(errors.InvalidArgumentError, - expected_error_msg_full): - check_ops.assert_equal(t, t, message=""This is the error message."", - summarize=-1) - with self.assertRaisesRegexp(errors.InvalidArgumentError, - expected_error_msg_default): - check_ops.assert_equal(t, t, message=""This is the error message."") - with self.assertRaisesRegexp(errors.InvalidArgumentError, - expected_error_msg_short): - check_ops.assert_equal(t, t, message=""This is the error message."", - summarize=2) - - class AssertNoneEqualTest(test.TestCase): @@ -340,6 +302,43 @@ class AssertNoneEqualTest(test.TestCase): x = check_ops.assert_none_equal(t1, t2) assert x is None + def test_error_message_eager(self): + expected_error_msg_full = r""""""Expected 'tf.Tensor\(False, shape=\(\), dtype=bool\)' to be true. Summarized data: b'This is the error message.' +b'Condition x != y did not hold for every single element:' +b'x \(shape=\(2, 3\) dtype=float32\) = ' +0.0, 1.0, 2.0, 3.0, 4.0, 5.0 +b'y \(shape=\(2, 3\) dtype=float32\) = ' +0.0, 1.0, 2.0, 3.0, 4.0, 5.0"""""" + expected_error_msg_default = r""""""Expected 'tf.Tensor\(False, shape=\(\), dtype=bool\)' to be true. Summarized data: b'This is the error message.' +b'Condition x != y did not hold for every single element:' +b'x \(shape=\(2, 3\) dtype=float32\) = ' +0.0, 1.0, 2.0, ... +b'y \(shape=\(2, 3\) dtype=float32\) = ' +0.0, 1.0, 2.0, ..."""""" + expected_error_msg_short = r""""""Expected 'tf.Tensor\(False, shape=\(\), dtype=bool\)' to be true. Summarized data: b'This is the error message.' +b'Condition x != y did not hold for every single element:' +b'x \(shape=\(2, 3\) dtype=float32\) = ' +0.0, 1.0, ... +b'y \(shape=\(2, 3\) dtype=float32\) = ' +0.0, 1.0, ..."""""" + with context.eager_mode(): + t = constant_op.constant(np.array(range(6)), shape=[2,3], dtype=np.float32) + with self.assertRaisesRegexp(errors.InvalidArgumentError, + expected_error_msg_full): + check_ops.assert_none_equal(t, t, message=""This is the error message."", + summarize=10) + with self.assertRaisesRegexp(errors.InvalidArgumentError, + expected_error_msg_full): + check_ops.assert_equal(t, t, message=""This is the error message."", + summarize=-1) + with self.assertRaisesRegexp(errors.InvalidArgumentError, + expected_error_msg_default): + check_ops.assert_equal(t, t, message=""This is the error message."") + with self.assertRaisesRegexp(errors.InvalidArgumentError, + expected_error_msg_short): + check_ops.assert_equal(t, t, message=""This is the error message."", + summarize=2) + class AssertAllCloseTest(test.TestCase): ",0,train 2ba26da2794775576d50003414460ebcf92bf945,tensorflow/tensorflow,"Improving error messages for more python/ops/... files PiperOrigin-RevId: 397769627 Change-Id: Ifef986322f9deaac7c8163d2c6a02b67b9625670",losses_impl.py,"@@ -70,7 +70,8 @@ class Reduction(object): @classmethod def validate(cls, key): if key not in cls.all(): - raise ValueError(""Invalid Reduction Key %s."" % key) + raise ValueError(f""Invalid Reduction Key {key}. Key should be one of "" + f""{cls.all()}."") def _safe_mean(losses, num_present): @@ -256,9 +257,9 @@ def absolute_difference( @end_compatibility """""" if labels is None: - raise ValueError(""labels must not be None."") + raise ValueError(""Argument `labels` must not be None."") if predictions is None: - raise ValueError(""predictions must not be None."") + raise ValueError(""Argument `predictions` must not be None."") with ops.name_scope(scope, ""absolute_difference"", (predictions, labels, weights)) as scope: predictions = math_ops.cast(predictions, dtype=dtypes.float32) @@ -309,11 +310,11 @@ def cosine_distance( """""" axis = deprecated_argument_lookup(""axis"", axis, ""dim"", dim) if axis is None: - raise ValueError(""You must specify 'axis'."") + raise ValueError(""You must specify argument `axis`."") if labels is None: - raise ValueError(""labels must not be None."") + raise ValueError(""Argument `labels` must not be None."") if predictions is None: - raise ValueError(""predictions must not be None."") + raise ValueError(""Argument `predictions` must not be None."") with ops.name_scope(scope, ""cosine_distance_loss"", (predictions, labels, weights)) as scope: predictions = math_ops.cast(predictions, dtype=dtypes.float32) @@ -361,9 +362,9 @@ def hinge_loss(labels, logits, weights=1.0, scope=None, @end_compatibility """""" if labels is None: - raise ValueError(""labels must not be None."") + raise ValueError(""Argument `labels` must not be None."") if logits is None: - raise ValueError(""logits must not be None."") + raise ValueError(""Argument `logits` must not be None."") with ops.name_scope(scope, ""hinge_loss"", (logits, labels, weights)) as scope: logits = math_ops.cast(logits, dtype=dtypes.float32) labels = math_ops.cast(labels, dtype=dtypes.float32) @@ -428,9 +429,9 @@ def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None, @end_compatibility """""" if labels is None: - raise ValueError(""labels must not be None."") + raise ValueError(""Argument `labels` must not be None."") if predictions is None: - raise ValueError(""predictions must not be None."") + raise ValueError(""Argument `predictions` must not be None."") with ops.name_scope(scope, ""huber_loss"", (predictions, labels, weights)) as scope: predictions = math_ops.cast(predictions, dtype=dtypes.float32) @@ -495,9 +496,9 @@ def log_loss(labels, predictions, weights=1.0, epsilon=1e-7, scope=None, @end_compatibility """""" if labels is None: - raise ValueError(""labels must not be None."") + raise ValueError(""Argument `labels` must not be None."") if predictions is None: - raise ValueError(""predictions must not be None."") + raise ValueError(""Argument `predictions` must not be None."") with ops.name_scope(scope, ""log_loss"", (predictions, labels, weights)) as scope: predictions = math_ops.cast(predictions, dtype=dtypes.float32) @@ -564,9 +565,9 @@ def mean_pairwise_squared_error( @end_compatibility """""" if labels is None: - raise ValueError(""labels must not be None."") + raise ValueError(""Argument `labels` must not be None."") if predictions is None: - raise ValueError(""predictions must not be None."") + raise ValueError(""Argument `predictions` must not be None."") with ops.name_scope(scope, ""mean_pairwise_squared_error"", (predictions, labels, weights)) as scope: weights = math_ops.cast(weights, dtype=dtypes.float32) @@ -757,9 +758,9 @@ def mean_squared_error( @end_compatibility """""" if labels is None: - raise ValueError(""labels must not be None."") + raise ValueError(""Argument `labels` must not be None."") if predictions is None: - raise ValueError(""predictions must not be None."") + raise ValueError(""Argument `predictions` must not be None."") with ops.name_scope(scope, ""mean_squared_error"", (predictions, labels, weights)) as scope: predictions = math_ops.cast(predictions, dtype=dtypes.float32) @@ -816,9 +817,9 @@ def sigmoid_cross_entropy( @end_compatibility """""" if multi_class_labels is None: - raise ValueError(""multi_class_labels must not be None."") + raise ValueError(""Argument `multi_class_labels` must not be None."") if logits is None: - raise ValueError(""logits must not be None."") + raise ValueError(""Argument `logits` must not be None."") with ops.name_scope(scope, ""sigmoid_cross_entropy_loss"", (logits, multi_class_labels, weights)) as scope: logits = ops.convert_to_tensor(logits) @@ -969,9 +970,9 @@ def softmax_cross_entropy( @end_compatibility """""" if onehot_labels is None: - raise ValueError(""onehot_labels must not be None."") + raise ValueError(""Argument `onehot_labels` must not be None."") if logits is None: - raise ValueError(""logits must not be None."") + raise ValueError(""Argument `logits` must not be None."") with ops.name_scope(scope, ""softmax_cross_entropy_loss"", (logits, onehot_labels, weights)) as scope: logits = ops.convert_to_tensor(logits) @@ -1087,9 +1088,9 @@ def sparse_softmax_cross_entropy( @end_compatibility """""" if labels is None: - raise ValueError(""labels must not be None."") + raise ValueError(""Argument `labels` must not be None."") if logits is None: - raise ValueError(""logits must not be None."") + raise ValueError(""Argument `logits` must not be None."") with ops.name_scope(scope, ""sparse_softmax_cross_entropy_loss"", (logits, labels, weights)) as scope: # As documented above in Args, labels contain class IDs and logits contains ",0,train 2ba26da2794775576d50003414460ebcf92bf945,tensorflow/tensorflow,"Improving error messages for more python/ops/... files PiperOrigin-RevId: 397769627 Change-Id: Ifef986322f9deaac7c8163d2c6a02b67b9625670",util.py,"@@ -160,7 +160,8 @@ def check_per_example_loss_rank(per_example_loss): if loss_rank == 0: raise ValueError( ""Invalid value passed for `per_example_loss`. Expected a tensor with "" - ""at least rank 1, received: {}"".format(per_example_loss)) + f""at least rank 1. Received per_example_loss={per_example_loss} with "" + f""rank {loss_rank}"") yield else: # Handle dynamic rank. ",0,train 2ba26da2794775576d50003414460ebcf92bf945,tensorflow/tensorflow,"Improving error messages for more python/ops/... files PiperOrigin-RevId: 397769627 Change-Id: Ifef986322f9deaac7c8163d2c6a02b67b9625670",nn_loss_scaling_utilities_test.py,"@@ -151,7 +151,7 @@ class LossUtilitiesTest(test_lib.TestCase, parameterized.TestCase): # Static rank with self.assertRaisesRegex( ValueError, ""Invalid value passed for `per_example_loss`. "" - ""Expected a tensor with at least rank 1,""): + ""Expected a tensor with at least rank 1.""): nn_impl.compute_average_loss(per_example_loss) with context.graph_mode(): ",0,train 2ba26da2794775576d50003414460ebcf92bf945,tensorflow/tensorflow,"Improving error messages for more python/ops/... files PiperOrigin-RevId: 397769627 Change-Id: Ifef986322f9deaac7c8163d2c6a02b67b9625670",control_flow_ops.py,"@@ -72,9 +72,9 @@ def for_loop(loop_fn, loop_fn_dtypes, iters, parallel_iterations=None): fn_output = nest.flatten(loop_fn(i)) if len(fn_output) != len(flat_loop_fn_dtypes): raise ValueError( - ""Number of expected outputs, %d, does not match the number of "" - ""actual outputs, %d, from loop_fn"" % (len(flat_loop_fn_dtypes), - len(fn_output))) + f""Number of expected outputs {len(flat_loop_fn_dtypes)}, does not "" + f""match the number of actual outputs {len(fn_output)} from loop_fn: "" + f""{loop_fn} with output {fn_output}."") outputs = [] del is_none_list[:] is_none_list.extend(x is None for x in fn_output) @@ -222,10 +222,9 @@ def _composite_to_tensors(value, is_batched=False): if _should_expand_composite(value): spec = value._type_spec if not isinstance(spec, type_spec.BatchableTypeSpec): - raise ValueError(""CompositeTensor instance {} returned from "" + raise ValueError(f""CompositeTensor instance {value} returned from "" ""parallel_for or vectorized_map loop body must provide "" - ""a `BatchableTypeSpec` (saw: {})."".format( - value, spec)) + f""a `BatchableTypeSpec` (saw: {spec})."") if is_batched: return spec._to_batched_tensor_list(value) return spec._to_tensor_list(value) @@ -258,7 +257,7 @@ def _loop_fn_has_config(loop_fn): else: loop_class = tf_decorator.unwrap(loop_fn)[1] if not hasattr(loop_class, ""__call__""): - raise ValueError(""loop_fn object did not have a __call__ method"") + raise ValueError(""`loop_fn` object did not have a __call__ method"") argspec = tf_inspect.getargspec(loop_class.__call__) return PFOR_CONFIG_ARG in argspec.args @@ -309,9 +308,12 @@ def _pfor_impl(loop_fn, iters = ops.convert_to_tensor(iters) if parallel_iterations is not None: if parallel_iterations < 1: - raise ValueError(""parallel_iterations must be None or a positive integer"") + raise ValueError( + ""Argument `parallel_iterations` must be None or a positive integer. "" + f""Received: {parallel_iterations}."") if parallel_iterations == 1: - raise ValueError(""Found parallel_iterations == 1. Use for_loop instead."") + raise ValueError( + ""Found `parallel_iterations == 1`. Use `for_loop` instead."") if iters_value is not None and iters_value < parallel_iterations: parallel_iterations = None if parallel_iterations is None: @@ -325,8 +327,8 @@ def _pfor_impl(loop_fn, flattened_output_tensors.append(output) else: if pfor_config is not None and pfor_config._has_reductions(): # pylint: disable=protected-access - raise ValueError(""Setting parallel_iterations currently unsupported if"" - "" reductions across iterations are performed."") + raise ValueError(""Setting `parallel_iterations` currently unsupported if "" + ""reductions across iterations are performed."") num_tiled_iterations = iters // parallel_iterations num_remaining_iterations = iters % parallel_iterations # TODO(agarwal): Avoid calling loop_fn twice. Generate the loop body inside ",0,train 2ba26da2794775576d50003414460ebcf92bf945,tensorflow/tensorflow,"Improving error messages for more python/ops/... files PiperOrigin-RevId: 397769627 Change-Id: Ifef986322f9deaac7c8163d2c6a02b67b9625670",control_flow_ops_test.py,"@@ -133,7 +133,7 @@ class PForTest(PForTestCase): lambda i: 1, dtypes.int32, 8, parallel_iterations=0) def test_parallel_iterations_one(self): - with self.assertRaisesRegex(ValueError, ""Use for_loop instead""): + with self.assertRaisesRegex(ValueError, ""Use `for_loop` instead""): pfor_control_flow_ops.pfor(lambda i: 1, 8, parallel_iterations=1) def test_vectorized_map(self): @@ -330,7 +330,7 @@ class ReductionTest(PForTestCase): return pfor_config.reduce_sum(x_i) with self.assertRaisesRegex(ValueError, - ""parallel_iterations currently unsupported""): + ""`parallel_iterations` currently unsupported""): pfor_control_flow_ops.pfor(loop_fn, 8, parallel_iterations=2) def test_var_loop_len(self): ",0,train d8ef9b091839b4edfbb3826af920c996e3b1982a,tensorflow/tensorflow,removing some nonsense in AddNodeWithParameters,subgraph.cc,"@@ -808,13 +808,9 @@ TfLiteStatus Subgraph::AddNodeWithParameters( int new_node_index = nodes_and_registration_.size(); if (node_index) *node_index = new_node_index; - nodes_and_registration_.resize(nodes_and_registration_.size() + 1); + nodes_and_registration_.emplace_back(); auto& node_and_reg = nodes_and_registration_.back(); TfLiteNode& node = node_and_reg.first; - if (node.inputs) TfLiteIntArrayFree(node.inputs); - if (node.outputs) TfLiteIntArrayFree(node.outputs); - if (node.intermediates) TfLiteIntArrayFree(node.intermediates); - if (node.temporaries) TfLiteIntArrayFree(node.temporaries); // NOTE, here we are not using move semantics yet, since our internal // representation isn't std::vector, but in the future we would like to avoid ",0,train 2ba2e6b25891e63c425c7f239ac75bcb7f8f9bda,tensorflow/tensorflow,"Change signature of tf.count_nonzero for TF 2.0. PiperOrigin-RevId: 221536352",math_ops.py,"@@ -1408,7 +1408,7 @@ def reduce_sum(input_tensor, name=name)) -@tf_export(""math.count_nonzero"", ""count_nonzero"") +@tf_export(v1=[""math.count_nonzero"", ""count_nonzero""]) @deprecation.deprecated_args( None, ""keep_dims is deprecated, use keepdims instead"", ""keep_dims"") def count_nonzero(input_tensor, @@ -1469,20 +1469,79 @@ def count_nonzero(input_tensor, """""" keepdims = deprecation.deprecated_argument_lookup(""keepdims"", keepdims, ""keep_dims"", keep_dims) + axis = deprecation.deprecated_argument_lookup( + ""axis"", axis, + ""reduction_indices"", reduction_indices + ) if keepdims is None: keepdims = False - with ops.name_scope(name, ""count_nonzero"", [input_tensor]): - input_tensor = ops.convert_to_tensor(input_tensor, name=""input_tensor"") + return count_nonzero_v2(input_tensor, axis, keepdims, dtype, name) + + +@tf_export(""math.count_nonzero"", v1=[]) +def count_nonzero_v2(input, # pylint: disable=redefined-builtin + axis=None, + keepdims=None, + dtype=dtypes.int64, + name=None): + """"""Computes number of nonzero elements across dimensions of a tensor. + + Reduces `input` along the dimensions given in `axis`. + Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each + entry in `axis`. If `keepdims` is true, the reduced dimensions + are retained with length 1. + + If `axis` has no entries, all dimensions are reduced, and a + tensor with a single element is returned. + + **NOTE** Floating point comparison to zero is done by exact floating point + equality check. Small values are **not** rounded to zero for purposes of + the nonzero check. + + For example: + + ```python + x = tf.constant([[0, 1, 0], [1, 1, 0]]) + tf.count_nonzero(x) # 3 + tf.count_nonzero(x, 0) # [1, 2, 0] + tf.count_nonzero(x, 1) # [1, 2] + tf.count_nonzero(x, 1, keepdims=True) # [[1], [2]] + tf.count_nonzero(x, [0, 1]) # 3 + ``` + + **NOTE** Strings are compared against zero-length empty string `""""`. Any + string with a size greater than zero is already considered as nonzero. + + For example: + ```python + x = tf.constant(["""", ""a"", "" "", ""b"", """"]) + tf.count_nonzero(x) # 3, with ""a"", "" "", and ""b"" as nonzero strings. + ``` + + Args: + input: The tensor to reduce. Should be of numeric type, `bool`, + or `string`. + axis: The dimensions to reduce. If `None` (the default), + reduces all dimensions. Must be in the range + `[-rank(input), rank(input))`. + keepdims: If true, retains reduced dimensions with length 1. + dtype: The output dtype; defaults to `tf.int64`. + name: A name for the operation (optional). + + Returns: + The reduced tensor (number of nonzero values). + """""" + with ops.name_scope(name, ""count_nonzero"", [input]): + input = ops.convert_to_tensor(input, name=""input"") # A scalar of 'zero' is enough as `not_equal` will broadcast. - zero = array_ops.zeros([], dtype=input_tensor.dtype) + zero = array_ops.zeros([], dtype=input.dtype) return cast( reduce_sum( # int64 reduction happens on GPU - to_int64(gen_math_ops.not_equal(input_tensor, zero)), + to_int64(gen_math_ops.not_equal(input, zero)), axis=axis, - keepdims=keepdims, - reduction_indices=reduction_indices), + keepdims=keepdims), dtype=dtype) ",0,test 2ba2e6b25891e63c425c7f239ac75bcb7f8f9bda,tensorflow/tensorflow,"Change signature of tf.count_nonzero for TF 2.0. PiperOrigin-RevId: 221536352",tf_upgrade_v2.py,"@@ -37,9 +37,14 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec): ""tf.convert_to_tensor"": { ""preferred_dtype"": ""dtype_hint"" }, + ""tf.math.count_nonzero"": { + ""input_tensor"": ""input"", + ""keep_dims"": ""keepdims"", + ""reduction_indices"": ""axis"", + }, ""tf.nn.pool"": { ""dilation_rate"": ""dilations"" - } + }, } # Mapping from function to the new name of the function ",0,test 2ba2e6b25891e63c425c7f239ac75bcb7f8f9bda,tensorflow/tensorflow,"Change signature of tf.count_nonzero for TF 2.0. PiperOrigin-RevId: 221536352",tf_upgrade_v2_test.py,"@@ -110,6 +110,18 @@ class TestUpgrade(test_util.TensorFlowTestCase): % ""tf.estimator.LinearClassifier""]) self.assertIn(""loss_reduction has been changed"", report) + def testCountNonZeroChanges(self): + text = ( + ""tf.math.count_nonzero(input_tensor=input, dtype=dtype, name=name, "" + ""reduction_indices=axis, keep_dims=keepdims)\n"" + ) + _, unused_report, unused_errors, new_text = self._upgrade(text) + expected_text = ( + ""tf.math.count_nonzero(input=input, dtype=dtype, name=name, "" + ""axis=axis, keepdims=keepdims)\n"" + ) + self.assertEqual(new_text, expected_text) + class TestUpgradeFiles(test_util.TensorFlowTestCase): ",0,test 1284047dca0dd58745a31cd2fd68da3173c7e120,tensorflow/tensorflow,"* Don't copy on-host and on-device shapes locally. * Use ForEachMutableElement rather than the iterators, as it is much quicker. There is still room for improvement; ForEachMutableElement is linear in the number of nodes in the shape tree but we want to be linear in the number of nodes in the sub shape tree. But I feel this is a good enough improvement. PiperOrigin-RevId: 195384423",xla_launch_util.cc,"@@ -77,16 +77,16 @@ Status XlaAllocator::Deallocate(int device_ordinal, se::DeviceMemoryBase* mem) { return Status::OK(); } -namespace { +namespace internal { // Return the 'index''th subtree of the given ShapedBuffer as a // ScopedShapedBuffer. The returned ScopedShapedBuffer takes ownership of the // subtree, and sets the input's buffer pointers to nullptr for the subtree. ScopedShapedBuffer ExtractSubShapedBuffer( ShapedBuffer* shaped_buffer, int index, xla::DeviceMemoryAllocator* allocator) { - xla::Shape on_host_shape = xla::ShapeUtil::GetTupleElementShape( + const xla::Shape& on_host_shape = xla::ShapeUtil::GetTupleElementShape( shaped_buffer->on_host_shape(), index); - xla::Shape on_device_shape = xla::ShapeUtil::GetTupleElementShape( + const xla::Shape& on_device_shape = xla::ShapeUtil::GetTupleElementShape( shaped_buffer->on_device_shape(), index); ShapedBuffer sub_shaped_buffer(on_host_shape, on_device_shape, @@ -98,14 +98,18 @@ ScopedShapedBuffer ExtractSubShapedBuffer( sub_shape_tree.CopySubtreeFrom(shape_tree, /*source_base_index=*/{index}, /*target_base_index=*/{}); - for (auto& index_to_buffer : shape_tree) { - if (!index_to_buffer.first.empty() && index_to_buffer.first[0] == index) { - index_to_buffer.second = se::DeviceMemoryBase(nullptr, 0); - } - } + shape_tree.ForEachMutableElement( + [index](const xla::ShapeIndex& shape_index, + tensorflow::se::DeviceMemoryBase* data) { + // shape_index is empty for the root node. Ignore that. + if (!shape_index.empty() && shape_index[0] == index) { + *data = tensorflow::se::DeviceMemoryBase(nullptr, 0); + } + }); return ScopedShapedBuffer(std::move(sub_shaped_buffer), allocator); } -} // namespace +} // namespace internal +using internal::ExtractSubShapedBuffer; XlaComputationLaunchContext::XlaComputationLaunchContext( int64 num_resource_args, xla::LocalClient* client, ",0,test 1284047dca0dd58745a31cd2fd68da3173c7e120,tensorflow/tensorflow,"* Don't copy on-host and on-device shapes locally. * Use ForEachMutableElement rather than the iterators, as it is much quicker. There is still room for improvement; ForEachMutableElement is linear in the number of nodes in the shape tree but we want to be linear in the number of nodes in the sub shape tree. But I feel this is a good enough improvement. PiperOrigin-RevId: 195384423",xla_launch_util.h,"@@ -140,6 +140,17 @@ class XlaTensorBuffer : public TensorBuffer { Allocator* allocator_; }; +// Exposed in this header file for microbenchmarking purposes, but this is an +// internal implementation detail. +namespace internal { +// Return the 'index''th subtree of the given ShapedBuffer as a +// ScopedShapedBuffer. The returned ScopedShapedBuffer takes ownership of the +// subtree, and sets the input's buffer pointers to nullptr for the subtree. +xla::ScopedShapedBuffer ExtractSubShapedBuffer( + xla::ShapedBuffer* shaped_buffer, int index, + xla::DeviceMemoryAllocator* allocator); +} // namespace internal + } // namespace tensorflow #endif ",0,test 1284047dca0dd58745a31cd2fd68da3173c7e120,tensorflow/tensorflow,"* Don't copy on-host and on-device shapes locally. * Use ForEachMutableElement rather than the iterators, as it is much quicker. There is still room for improvement; ForEachMutableElement is linear in the number of nodes in the shape tree but we want to be linear in the number of nodes in the sub shape tree. But I feel this is a good enough improvement. PiperOrigin-RevId: 195384423",xla_launch_util_test.cc,"@@ -0,0 +1,64 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Contains microbenchmarks for performance critical functions in +// xla_launch_util.cc. + +#include ""tensorflow/compiler/jit/xla_launch_util.h"" +#include ""tensorflow/compiler/tf2xla/shape_util.h"" +#include ""tensorflow/core/platform/test.h"" +#include ""tensorflow/core/platform/test_benchmark.h"" + +// Test ExtractSubBuffer with different depths (depth of ShapeTree) and fan-outs +// (cardinality of each non-leaf node's children). +void BM_ExtractSubBuffer(int iters, int depth, int fan_out) { + tensorflow::testing::StopTiming(); + xla::Shape shape = xla::ShapeUtil::MakeShape(xla::F32, {32, 64, 128}); + for (int i = 0; i < depth; ++i) { + std::vector shapes(fan_out, shape); + shape = xla::ShapeUtil::MakeTupleShape(shapes); + } + xla::ShapedBuffer shaped_buffer(shape, shape, /*platform=*/nullptr, + /*device_ordinal=*/0); + tensorflow::testing::StartTiming(); + for (int i = 0; i < iters; ++i) { + // Extract a buffer from approximately the middle of the first level of the + // tree. + tensorflow::internal::ExtractSubShapedBuffer(&shaped_buffer, + /*index=*/fan_out / 2, + /*allocator=*/nullptr) + .release(); + } +} + +BENCHMARK(BM_ExtractSubBuffer) + ->ArgPair(1, 4) + ->ArgPair(1, 8) + ->ArgPair(1, 32) + ->ArgPair(1, 64) + ->ArgPair(1, 128) + ->ArgPair(1, 256) + ->ArgPair(1, 512) + ->ArgPair(2, 4) + ->ArgPair(2, 8) + ->ArgPair(2, 32) + ->ArgPair(2, 64) + ->ArgPair(2, 128); + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + tensorflow::testing::RunBenchmarks(); + return RUN_ALL_TESTS(); +} ",0,test 6381a7b127bd276a3817a93e5423b15a06c33419,tensorflow/tensorflow,"[tf.data] Add a check for ram_budget == 0 to avoid division by 0 exception when ram_budget is not set. PiperOrigin-RevId: 410071934 Change-Id: Ida9fb401ba24367e48066c8a899962877429c3da",model.cc,"@@ -46,6 +46,9 @@ bool AreAllParametersMax(const Model::ModelParameters& parameters) { // Records the ram usage of hill climbing algorithm. void RecordAutotuneRamUsage(int64 ram_budget, double max_buffered_bytes) { + if (ram_budget == 0) { + return; + } const auto memory_info = port::GetMemoryInfo(); // Records ratio of memory used since RootDataset was created over the ram // budget. ",0,train eb4577c283452c601afcaa07da3e21722b826df7,tensorflow/tensorflow,"Initialize alloc_fns with 0s Change: 150117973",grpc_server_lib.cc,"@@ -15,6 +15,7 @@ limitations under the License. #include ""tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h"" +#include #include #include @@ -330,6 +331,7 @@ class GrpcServerRegistrar { public: GrpcServerRegistrar() { gpr_allocation_functions alloc_fns; + memset(&alloc_fns, 0, sizeof(alloc_fns)); alloc_fns.malloc_fn = port::Malloc; alloc_fns.realloc_fn = port::Realloc; alloc_fns.free_fn = port::Free; ",0,train 7f37ded0367a87bc4ed7e83679ce941c0bac13b7,tensorflow/tensorflow,"Adding an environment variable to control whether to use Cudnn with AvgPool: TF_AVGPOOL_USE_CUDNN. The default is false for now. Change: 127123594",avgpooling_op.cc,"@@ -39,6 +39,7 @@ limitations under the License. #if GOOGLE_CUDA #include ""tensorflow/core/kernels/maxpooling_op_gpu.h"" #include ""tensorflow/core/kernels/pooling_ops_common_gpu.h"" +#include ""tensorflow/core/util/use_cudnn.h"" #endif // GOOGLE_CUDA namespace tensorflow { @@ -150,7 +151,7 @@ class AvgPoolingOp : public UnaryOp { TensorShape output_shape = params.forward_output_shape(); - if (data_format_ == FORMAT_NCHW) { + if (internal::AvgPoolUseCudnn() || data_format_ == FORMAT_NCHW) { DnnPoolingOp::Compute( context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, stride_, padding_, data_format_, tensor_in, output_shape); ",0,train 7f37ded0367a87bc4ed7e83679ce941c0bac13b7,tensorflow/tensorflow,"Adding an environment variable to control whether to use Cudnn with AvgPool: TF_AVGPOOL_USE_CUDNN. The default is false for now. Change: 127123594",use_cudnn.cc,"@@ -40,4 +40,11 @@ bool CudnnUseAutotune() { return ReadBoolFromEnvVar(""TF_CUDNN_USE_AUTOTUNE"", true); } +namespace internal { + +bool AvgPoolUseCudnn() { + return ReadBoolFromEnvVar(""TF_AVGPOOL_USE_CUDNN"", false); +} + +} // namespace internal } // namespace tensorflow ",0,train 7f37ded0367a87bc4ed7e83679ce941c0bac13b7,tensorflow/tensorflow,"Adding an environment variable to control whether to use Cudnn with AvgPool: TF_AVGPOOL_USE_CUDNN. The default is false for now. Change: 127123594",use_cudnn.h,"@@ -23,6 +23,12 @@ namespace tensorflow { bool CanUseCudnn(); bool CudnnUseAutotune(); +namespace internal { + +// This function is for transition only. And it may go away at any time. +bool AvgPoolUseCudnn(); + +} // namespace internal } // namespace tensorflow #endif // TENSORFLOW_UTIL_USE_CUDNN_H_ ",0,train a18853eb0e18b47952bab3ba5df582b0f8b3516d,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-06-25 PiperOrigin-RevId: 318231043 Change-Id: Ic8bf82284920a04fe9d1589753905c69bbf8b8e4",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 6, 24) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 6, 25) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train e4c94c279e1f7990d23eab8bdc29ad1ed6277916,tensorflow/tensorflow,"Simplify control flow in ops/functional_ops.py when creating PartitionedCall The current branching is confusing: reviewers glancing at the code assume that the first branch corresponds to the eager mode, and miss `or len(tout)`. Handling two separate cases in this branch requires a correlated if- at the end, which can only take the empty branch in eager mode. Moreover, the logic is subtly incorrect: the user-provided `config` is not considered in the second branch. PiperOrigin-RevId: 286595076 Change-Id: I29fb3533f19923a8b63fe1cd7ea94848a74ac6ec",functional_ops.py,"@@ -1125,7 +1125,7 @@ def partitioned_call(args, if executor_type is None: executor_type = """" - if executing_eagerly or len(tout): + if executing_eagerly: if f.stateful_ops: outputs = gen_functional_ops.stateful_partitioned_call( args=args, @@ -1158,8 +1158,7 @@ def partitioned_call(args, # When running in graph mode, the graph and function graphs are optimized # (i.e. run through grappler) per the session options, so we can disable any # eager-specific rewriting. - config_proto = attr_value_pb2.AttrValue( - s=function_utils.get_disabled_rewriter_config()) + config_proto = attr_value_pb2.AttrValue(s=config) graph = ops.get_default_graph() f.add_to_graph(graph) @@ -1168,7 +1167,7 @@ def partitioned_call(args, op_name, args, tout, - name=""PartitionedFunctionCall"", + name=op_name, attrs={ ""Tin"": tin_attr, ""Tout"": tout_attr, ",0,train e4c94c279e1f7990d23eab8bdc29ad1ed6277916,tensorflow/tensorflow,"Simplify control flow in ops/functional_ops.py when creating PartitionedCall The current branching is confusing: reviewers glancing at the code assume that the first branch corresponds to the eager mode, and miss `or len(tout)`. Handling two separate cases in this branch requires a correlated if- at the end, which can only take the empty branch in eager mode. Moreover, the logic is subtly incorrect: the user-provided `config` is not considered in the second branch. PiperOrigin-RevId: 286595076 Change-Id: I29fb3533f19923a8b63fe1cd7ea94848a74ac6ec",utils_test.py,"@@ -57,7 +57,7 @@ class UtilsTest(test.TestCase): x = constant_op.constant(1, name=""x"") y = constant_op.constant(2, name=""y"") init_op_info = utils.build_tensor_info_from_op(my_init_fn(x, y)) - self.assertEqual(""PartitionedFunctionCall"", init_op_info.name) + self.assertEqual(""PartitionedCall"", init_op_info.name) self.assertEqual(types_pb2.DT_INVALID, init_op_info.dtype) self.assertEqual(0, len(init_op_info.tensor_shape.dim)) ",0,train 20ab9fbeba8652b17e87e956f4cbf2c457128fdf,tensorflow/tensorflow,"Add code path in estimator to use the new distributed strategy api PiperOrigin-RevId: 202214880",tpu_strategy.py,"@@ -47,7 +47,12 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy): return self._call_dataset_fn(dataset_fn) # TODO(priyag): Deal with OutOfRange errors. - def run_steps_on_dataset(self, fn, iterator, iterations): + # TODO(sourabhbajaj): Remove the initial_values parameter + def _run_steps_on_dataset(self, fn, iterator, iterations, + initial_values=None): + if initial_values is None: + initial_values = [] + # Enqueue ops shapes = nest.flatten(iterator.output_shapes) if any([not s.is_fully_defined() for s in shapes]): @@ -93,23 +98,35 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy): return nest.pack_sequence_as(iterator.output_shapes, dequeued) # Wrap `fn` for repeat. - run_fn = lambda: fn(dequeue_fn()) + def run_fn(*args, **kwargs): + del args, kwargs + return fn(dequeue_fn()) # Repeat + # TODO(sourabhbajaj): The input to while loop should be based on the output + # type of the step_fn def iterate_on_tpu(): - return tpu.repeat(iterations, run_fn, []) + return tpu.repeat(iterations, run_fn, initial_values) # Re-write and distribute computation. + # TODO(sourabhbajaj): Convert the output to perDevice variable and + # implement support for that in reduce. tpu_result = tpu.batch_parallel( iterate_on_tpu, [], num_shards=self._num_cores_per_host) - return control_flow_ops.group(tpu_result, enqueue_ops) + return control_flow_ops.group(tpu_result, enqueue_ops), tpu_result def _call_for_each_tower(self, fn, *args, **kwargs): kwargs.pop('run_concurrently', None) with one_device_strategy._OneDeviceTowerContext(self): # pylint: disable=protected-access return fn(*args, **kwargs) + def get_initialization_ops(self): + return [tpu.initialize_system()] + + def get_finalize_ops(self): + return [tpu.shutdown_system()] + def _reduce(self, method_string, value, destinations): del destinations # TPU is graph mode only. Rely on implicit Send/Recv. if method_string == 'mean': ",0,train 20ab9fbeba8652b17e87e956f4cbf2c457128fdf,tensorflow/tensorflow,"Add code path in estimator to use the new distributed strategy api PiperOrigin-RevId: 202214880",estimator.py,"@@ -71,6 +71,7 @@ from tensorflow.python.util.tf_export import estimator_export _VALID_MODEL_FN_ARGS = set( ['features', 'labels', 'mode', 'params', 'self', 'config']) +_INITIAL_TRAINING_LOSS = 1e7 @estimator_export('estimator.Estimator') @@ -1183,25 +1184,76 @@ class Estimator(object): Loss from training """""" self._distribution.configure(self._session_config) + + # TODO(sourabhbajaj): Remove this hack once we migrate the other strategies + # to use the new API + is_tpu_strategy = self._distribution.__class__.__name__ == 'TPUStrategy' + worker_hooks = [] with ops.Graph().as_default() as g: with self._distribution.scope(): random_seed.set_random_seed(self._config.tf_random_seed) - features, labels, input_hooks = ( - self._get_features_and_labels_from_input_fn( - input_fn, model_fn_lib.ModeKeys.TRAIN)) - worker_hooks.extend(input_hooks) - global_step_tensor = self._create_and_assert_global_step(g) - # we want to add to the global collection in the main thread not the - # tower threads. - ops.add_to_collection(training_util.GLOBAL_STEP_READ_KEY, - self._distribution.read_var(global_step_tensor)) - grouped_estimator_spec = self._distribution.call_for_each_tower( - self._call_model_fn, - features, - labels, # although this will be None it seems - model_fn_lib.ModeKeys.TRAIN, - self.config) + + if is_tpu_strategy: + # Create the iterator for run_on_dataset function + # TODO(sourabhbajaj): refactor this out to call a function on the + # strategy + dataset = self._distribution.distribute_dataset( + lambda: self._call_input_fn(input_fn, # pylint: disable=g-long-lambda + model_fn_lib.ModeKeys.TRAIN)) + iterator = dataset.make_initializable_iterator() + worker_hooks.append( + estimator_util._DatasetInitializerHook(iterator)) # pylint: disable=protected-access + + global_step_tensor = self._create_and_assert_global_step(g) + # we want to add to the global collection in the main thread not the + # tower threads. + ops.add_to_collection(training_util.GLOBAL_STEP_READ_KEY, + self._distribution.read_var(global_step_tensor)) + + # TODO(sourabhbajaj): Remove this once the context input to step_fn + # is implemented + estimator_spec_wrapper = {} + + # Create a step_fn from the train_op of grouped_estimator_spec + def step_fn(inputs): + """"""A single step that is passed to run_on_dataset."""""" + features, labels = inputs + estimator_spec = self._distribution.call_for_each_tower( + self._call_model_fn, + features, + labels, + model_fn_lib.ModeKeys.TRAIN, + self.config) + estimator_spec_wrapper['grouped_estimator_spec'] = estimator_spec + with ops.control_dependencies([estimator_spec.train_op]): + return array_ops.identity(estimator_spec.loss) + + # Create new train_op post graph rewrites + # TODO(sourabhbajaj): Make sure train_steps and tpu_iterations + # work correctly. Currently hardcoded at 2 + distributed_train_op, tpu_result = \ + self._distribution._run_steps_on_dataset( # pylint: disable=protected-access + step_fn, iterator, 2, [_INITIAL_TRAINING_LOSS]) + + grouped_estimator_spec = estimator_spec_wrapper[ + 'grouped_estimator_spec'] + else: + features, labels, input_hooks = ( + self._get_features_and_labels_from_input_fn( + input_fn, model_fn_lib.ModeKeys.TRAIN)) + worker_hooks.extend(input_hooks) + global_step_tensor = self._create_and_assert_global_step(g) + # we want to add to the global collection in the main thread not the + # tower threads. + ops.add_to_collection(training_util.GLOBAL_STEP_READ_KEY, + self._distribution.read_var(global_step_tensor)) + grouped_estimator_spec = self._distribution.call_for_each_tower( + self._call_model_fn, + features, + labels, # although this will be None it seems + model_fn_lib.ModeKeys.TRAIN, + self.config) # TODO(anjalisridhar): Figure out how to resolve the following scaffold # parameters: init_feed_dict, init_fn. @@ -1287,13 +1339,28 @@ class Estimator(object): training_chief_hooks = get_hooks_from_the_first_device( grouped_estimator_spec.training_chief_hooks) + # TODO(sourabhbajaj): Merge the two code paths once we can + # handle per device variables correctly in reduce and can output + # the loss scaler. + if is_tpu_strategy: + loss = self._distribution.unwrap( + self._distribution.reduce(distribute_lib.get_loss_reduction(), + tpu_result[0])[0])[0] + worker_hooks.append( + estimator_util.StrategyInitFinalizeHook( + self._distribution.get_initialization_ops, + self._distribution.get_finalize_ops)) + else: + loss = self._distribution.unwrap( + self._distribution.reduce(distribute_lib.get_loss_reduction(), + grouped_estimator_spec.loss, + destinations='/device:CPU:0'))[0] + distributed_train_op = grouped_estimator_spec.train_op + estimator_spec = model_fn_lib.EstimatorSpec( mode=grouped_estimator_spec.mode, - loss=self._distribution.unwrap( - self._distribution.reduce(distribute_lib.get_loss_reduction(), - grouped_estimator_spec.loss, - destinations='/device:CPU:0'))[0], - train_op=self._distribution.group(grouped_estimator_spec.train_op), + loss=loss, + train_op=self._distribution.group(distributed_train_op), training_hooks=training_hooks, training_chief_hooks=training_chief_hooks, scaffold=scaffold) ",0,train 20ab9fbeba8652b17e87e956f4cbf2c457128fdf,tensorflow/tensorflow,"Add code path in estimator to use the new distributed strategy api PiperOrigin-RevId: 202214880",util.py,"@@ -22,6 +22,7 @@ from __future__ import print_function import os import time +from tensorflow.core.protobuf import config_pb2 from tensorflow.python.platform import gfile from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import training @@ -129,3 +130,24 @@ class _DatasetInitializerHook(training.SessionRunHook): def after_create_session(self, session, coord): del coord session.run(self._initializer) + + +class StrategyInitFinalizeHook(training.SessionRunHook): + """"""Creates a SessionRunHook that initializes and shutsdown devices."""""" + + def __init__(self, initialization_fn, finalize_fn): + self._initialization_fn = initialization_fn + self._finalize_fn = finalize_fn + + def begin(self): + self._init_ops = self._initialization_fn() + self._finalize_ops = self._finalize_fn() + + def after_create_session(self, session, coord): + logging.info('Initialize system') + session.run(self._init_ops, + options=config_pb2.RunOptions(timeout_in_ms=5 * 60 * 1000)) + + def end(self, session): + logging.info('Finalize system.') + session.run(self._finalize_ops) ",0,train cf549e5f36d122e91c774bd7da44519c9f7792ff,tensorflow/tensorflow,"Update Softmax interface for fixed point int8 Op/kernel. Use 1/256 as scale and -128 as zero point for output. PiperOrigin-RevId: 232406765",activations.cc,"@@ -60,9 +60,9 @@ namespace { TfLiteStatus CheckOutputQuantParams(TfLiteContext* context, const TfLiteTensor* input, const TfLiteTensor* output) { + TF_LITE_ENSURE(context, output->params.scale == 1. / 256); if (input->type == kTfLiteUInt8) { TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); - TF_LITE_ENSURE(context, output->params.scale == 1. / 256); } else { TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128); } ",0,train cf549e5f36d122e91c774bd7da44519c9f7792ff,tensorflow/tensorflow,"Update Softmax interface for fixed point int8 Op/kernel. Use 1/256 as scale and -128 as zero point for output. PiperOrigin-RevId: 232406765",subgraph_quantizer.cc,"@@ -325,6 +325,27 @@ TfLiteStatus SubgraphQuantizer::AsymmetricQuantizeSingleInputOutputOp( return kTfLiteOk; } +TfLiteStatus SubgraphQuantizer::AsymmetricQuantizeSoftmax( + BuiltinOperator op_code, OperatorT* op) { + TF_LITE_ENSURE_EQ(this->error_reporter_, op->inputs.size(), 1); + TF_LITE_ENSURE_EQ(this->error_reporter_, op->outputs.size(), 1); + + if (IsSubgraphInput(op->inputs[0])) { + TF_LITE_ENSURE_STATUS(AsymmetricQuantizeTensor(op_code, op->inputs[0])); + } + + auto output_tensor = subgraph_->tensors[op->outputs[0]].get(); + if (output_tensor->type != TensorType_FLOAT32) { + return kTfLiteOk; + } + + // Softmax output is hardcoded to have 1/256 as scale and -128 as zero point. + output_tensor->type = TensorType_INT8; + output_tensor->quantization->scale = {1.0f / 256.0f}; + output_tensor->quantization->zero_point = {-128}; + return kTfLiteOk; +} + bool SubgraphQuantizer::IsSubgraphInput(int32_t tensor_idx) const { return std::find(subgraph_->inputs.begin(), subgraph_->inputs.end(), tensor_idx) != subgraph_->inputs.end(); @@ -342,8 +363,9 @@ TfLiteStatus SubgraphQuantizer::QuantizeOperator(int op_idx) { case BuiltinOperator_MAX_POOL_2D: return PropagateMinMaxForAvgAndMaxPool(op_code, op); case BuiltinOperator_SQUEEZE: - case BuiltinOperator_SOFTMAX: return AsymmetricQuantizeSingleInputOutputOp(op_code, op); + case BuiltinOperator_SOFTMAX: + return AsymmetricQuantizeSoftmax(op_code, op); default: return kTfLiteError; } ",0,train cf549e5f36d122e91c774bd7da44519c9f7792ff,tensorflow/tensorflow,"Update Softmax interface for fixed point int8 Op/kernel. Use 1/256 as scale and -128 as zero point for output. PiperOrigin-RevId: 232406765",subgraph_quantizer.h,"@@ -51,6 +51,12 @@ class SubgraphQuantizer { TfLiteStatus AsymmetricQuantizeSingleInputOutputOp(BuiltinOperator op_code, OperatorT* op); + // Asymmetric quantizes inputs and outputs of an Softmax Op. + // Input is quantized with the min-max range and output is hardcoded to have + // 1/256 as scale and -128 as zero point. + TfLiteStatus AsymmetricQuantizeSoftmax(BuiltinOperator op_code, + OperatorT* op); + TfLiteStatus AsymmetricQuantizeTensor(BuiltinOperator op_code, int32_t tensor_idx); ",0,train cf549e5f36d122e91c774bd7da44519c9f7792ff,tensorflow/tensorflow,"Update Softmax interface for fixed point int8 Op/kernel. Use 1/256 as scale and -128 as zero point for output. PiperOrigin-RevId: 232406765",subgraph_quantizer_test.cc,"@@ -291,6 +291,7 @@ TEST(SubgraphQuantizerTest, VerifySoftmaxQuantization) { ASSERT_EQ(op->outputs.size(), 1); auto float_graph = readonly_model->subgraphs()->Get(0); + // Verify input. ASSERT_EQ(float_graph->tensors()->Get(op->inputs[0])->type(), TensorType_FLOAT32); ASSERT_EQ(float_graph->tensors()->Get(op->outputs[0])->type(), @@ -306,12 +307,18 @@ TEST(SubgraphQuantizerTest, VerifySoftmaxQuantization) { VerifyAsymmetricQuantizationScale(*float_input_quant_params, *input_quant_params); + // Verify output. auto float_output_quant_params = float_graph->tensors()->Get(op->outputs[0])->quantization(); auto output_quant_params = subgraph->tensors[op->outputs[0]]->quantization.get(); - VerifyAsymmetricQuantizationScale(*float_output_quant_params, - *output_quant_params); + ASSERT_EQ(float_output_quant_params->min()->size(), 1); + ASSERT_EQ(float_output_quant_params->max()->size(), 1); + + ASSERT_EQ(output_quant_params->scale.size(), 1); + ASSERT_EQ(output_quant_params->zero_point.size(), 1); + ASSERT_EQ(1.0f / 256.0f, output_quant_params->scale[0]); + ASSERT_EQ(-128, output_quant_params->zero_point[0]); } TEST(SubgraphQuantizerTest, VerifyAvgPoolQuantization) { ",0,train 589deaa9fb5cb1d1b5bddf07538729abbbbee996,tensorflow/tensorflow,"Extracts the 'simplify squeeze node' optimization into its own method. PiperOrigin-RevId: 197968452",constant_folding.cc,"@@ -1937,22 +1937,8 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, } } - if (use_shape_info && IsSqueeze(*node) && - !properties->GetInputProperties(node->name()).empty()) { - // https://www.tensorflow.org/api_docs/python/tf/squeeze mentions it's - // error to squeeze a dimension that is not 1, so we only need to check - // whether the input has > 1 size for each dimension. - const auto& shape = properties->GetInputProperties(node->name())[0].shape(); - // The node is replaceable iff - // unknown_rank == false && (dim_size == 0 || all dims have size > 1) - bool replaceable = !shape.unknown_rank(); - for (int j = 0; replaceable && j < shape.dim_size(); ++j) { - replaceable &= shape.dim(j).size() > 1; - } - if (replaceable) { - ReplaceOperationWithIdentity(0, *properties, node, optimized_graph); - return Status::OK(); - } + if (SimplifySqueeze(*properties, use_shape_info, optimized_graph, node)) { + return Status::OK(); } if (SimplifyPack(optimized_graph, node)) { @@ -2024,6 +2010,30 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node, return Status::OK(); } +bool ConstantFolding::SimplifySqueeze(const GraphProperties& properties, + bool use_shape_info, + GraphDef* optimized_graph, + NodeDef* node) { + if (use_shape_info && IsSqueeze(*node) && + !properties.GetInputProperties(node->name()).empty()) { + // https://www.tensorflow.org/api_docs/python/tf/squeeze mentions it's + // error to squeeze a dimension that is not 1, so we only need to check + // whether the input has > 1 size for each dimension. + const auto& shape = properties.GetInputProperties(node->name())[0].shape(); + // The node is replaceable iff + // unknown_rank == false && (dim_size == 0 || all dims have size > 1) + bool replaceable = !shape.unknown_rank(); + for (int j = 0; replaceable && j < shape.dim_size(); ++j) { + replaceable &= shape.dim(j).size() > 1; + } + if (replaceable) { + ReplaceOperationWithIdentity(0, properties, node, optimized_graph); + return true; + } + } + return false; +} + bool ConstantFolding::SimplifyPack(GraphDef* optimized_graph, NodeDef* node) { if (IsPack(*node) && NumNonControlInputs(*node) == 1 && !OptimizedNodeExists(*node, ""_const_axis"")) { ",0,train 589deaa9fb5cb1d1b5bddf07538729abbbbee996,tensorflow/tensorflow,"Extracts the 'simplify squeeze node' optimization into its own method. PiperOrigin-RevId: 197968452",constant_folding.h,"@@ -170,6 +170,10 @@ class ConstantFolding : public GraphOptimizer { // Simplifies Pack operation if applicable. bool SimplifyPack(GraphDef* optimized_graph, NodeDef* node); + // Simplifies a Squeeze operation to an Identity operation if applicable. + bool SimplifySqueeze(const GraphProperties& properties, bool use_shape_info, + GraphDef* optimized_graph, NodeDef* node); + // Points to an externally provided device or to owned_device_; RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; ",0,train ecbb8b1ccac295537827dfe1ca25ddb03ca5f22b,tensorflow/tensorflow,"Add helper function for Xor in HLO. RELNOTES: n/a PiperOrigin-RevId: 188119450",computation_builder.cc,"@@ -868,6 +868,14 @@ ComputationDataHandle ComputationBuilder::Or( return BinaryOp(BINOP_OR, lhs, rhs, broadcast_dimensions); } +// TODO(b/65209188): Create a dedicated lowering for Xor +ComputationDataHandle ComputationBuilder::Xor( + const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, + tensorflow::gtl::ArraySlice broadcast_dimensions) { + return Or(And(Not(lhs), rhs, broadcast_dimensions), + And(lhs, Not(rhs), broadcast_dimensions)); +} + ComputationDataHandle ComputationBuilder::Not( const ComputationDataHandle& operand) { return UnaryOp(UNOP_NOT, operand); ",0,train ecbb8b1ccac295537827dfe1ca25ddb03ca5f22b,tensorflow/tensorflow,"Add helper function for Xor in HLO. RELNOTES: n/a PiperOrigin-RevId: 188119450",computation_builder.h,"@@ -512,6 +512,10 @@ class ComputationBuilder { const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions = {}); + ComputationDataHandle Xor( + const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, + tensorflow::gtl::ArraySlice broadcast_dimensions = {}); + ComputationDataHandle Not(const ComputationDataHandle& operand); ComputationDataHandle ShiftLeft( ",0,train d2d9df08a6b3f64977e22881cc59b0011f58b9df,tensorflow/tensorflow,Finish the port to upstream TF from TF 2.4,ir_emitter_unnested.cc,"@@ -1806,6 +1806,20 @@ StatusOr IrEmitterUnnested::GetMlirEmitterInput( return input; } +bool IsRowMajor(mlir::Operation* op) { + if (auto attr = mlir::GetLayoutFromMlirHlo(op)) { + std::vector minor_to_major; + absl::c_transform( + attr, std::back_inserter(minor_to_major), + std::function(&llvm::APInt::getZExtValue)); + bool ret = std::is_sorted(minor_to_major.begin(), + minor_to_major.end(), std::greater()); + return ret; + } + // It is row major by default. + return true; +} + // TODO(timshen): update the comment once the HandleFusion code path deleted. // // This is migrated from IrEmitter::HandleFusion() with IrEmitterUnnested as the @@ -1882,18 +1896,23 @@ Status IrEmitterUnnested::EmitLoopFusionFromMlir( }(); bool row_optimized = fusion.getFusionResults().size() == 1 && // Not tested with MOF. - absl::c_all_of(GetHloOperands(fusion), [](const mlir::Value& op) { + absl::c_all_of(GetHloOperands(fusion), [](const mlir::Value& value) { // Only tested when the inputs are row-major. So only enable that case. // Maybe it would works if only the inner dimensions is contiguous. - return true;//TODO: LayoutUtil::IsMonotonicWithDim0Major(instr->shape().layout()); + if (auto op = value.getDefiningOp()) { + return IsRowMajor(value.getDefiningOp()); + } + // Reuse TypeToShape to not duplicate the layout convertion code. + return LayoutUtil::IsMonotonicWithDim0Major(TypeToShape(value.getType()).layout()); }) && // Only tested when the output is row-major. - //LayoutUtil::IsMonotonicWithDim0Major(hlo.shape().layout()); - true; + absl::c_all_of(GetOutputOps(fusion), IsRowMajor); + bool some_row_broadcasting = false; for (mlir::Operation& op : fusion.region().front()) { if (mlir::isa(op) ) { + mlir::lmhlo::TerminatorOp, mlir::mhlo::ReturnOp, + mlir::mhlo::ConstOp, mlir::lmhlo::ConstOp>(op) ) { continue; } HloOpcode opcode = *MhloToHloOpcode(&op); @@ -1901,10 +1920,10 @@ Status IrEmitterUnnested::EmitLoopFusionFromMlir( continue; } - if (auto broadcast = mlir::dyn_cast(op)) { + if (auto broadcast = mlir::dyn_cast(op)) { std::vector broadcast_dimensions; - if (broadcast.broadcast_sizes().size() > 0) { - for (const llvm::APInt& int_value : broadcast.broadcast_sizes()) { + if (broadcast.broadcast_dimensions().size() > 0) { + for (const llvm::APInt& int_value : broadcast.broadcast_dimensions()) { broadcast_dimensions.push_back(int_value.getSExtValue()); } } @@ -1915,12 +1934,13 @@ Status IrEmitterUnnested::EmitLoopFusionFromMlir( continue; } if (broadcast_dimensions.size() == 1 && - broadcast_dimensions.back() != (rank - 1)) { + broadcast_dimensions.back() == (rank - 1)) { some_row_broadcasting = true; + continue; } } row_optimized = false; - VLOG(3) << ""Row vectorization not enabled due to this op: "" << HloOpcodeString(opcode); + VLOG(2) << ""Row vectorization not enabled due to this op: "" << MlirToString(&op); break; } // Trigger only when there is a row broadcasting. ",0,test 06eea697f10cd0004b6d68dda49a74bd3a7870f6,tensorflow/tensorflow,"Replace TraceMe for EagerCopyToDevice with TraceMe recording loops calling it. PiperOrigin-RevId: 257290089",execute.cc,"@@ -214,6 +214,8 @@ Status ValidateInputTypeAndPlacement( EagerContext* ctx, EagerOperation* op, const core::RefCountPtr& kernel, RunMetadata* run_metadata) { + profiler::TraceMe activity(""ValidateInputTypeAndPlacement"", + profiler::TraceMeLevel::kInfo); if (kernel->num_inputs() != op->Inputs().size()) { return errors::InvalidArgument(""expected "", kernel->num_inputs(), "" inputs, got "", op->Inputs().size()); @@ -487,6 +489,8 @@ Status EagerLocalExecute(EagerOperation* op, std::unordered_map input_resource_variable_dtypes_and_shapes; if (is_multi_device_function) { + profiler::TraceMe activity(""EagerCopyToDeviceAndAddCacheKey"", + profiler::TraceMeLevel::kInfo); input_dev_ptrs.reserve(op->Inputs().size()); // All inputs need to be on local devices. // TODO(b/122851476): This is a limitation of the current code base (but @@ -807,34 +811,38 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, eager::Operation* remote_op = request->add_queue()->mutable_operation(); - for (int i = 0; i < op->Inputs().size(); i++) { - tensorflow::TensorHandle* input = op->Inputs()[i]; - tensorflow::Device* input_device = input->device(); - if (op->Device() != input_device && - // If the expected and actual devices are on the same task, don't - // explicitly copy, and instead depend on the copy to happen locally - // when the op is executed on the device. - !ctx->OnSameTask(op->Device(), input_device)) { - tensorflow::Device* remote_cpu_device; - TF_RETURN_IF_ERROR( - ctx->CPUDeviceOnTask(op->Device(), &remote_cpu_device)); - // TODO(b/110044833): It's possible the same tensor gets copied to the - // remote device repeatedly. - // Always copy to the remote CPU so that the actual device can be - // correctly determined after the kernel is selected/instantiated, since - // the op might have its inputs on host memory. - TensorHandle* handle = nullptr; - TF_RETURN_IF_ERROR( - MaybeCopyInputToExpectedDevice(op, op->Device(), i, remote_cpu_device, - /* run_metadata= */ nullptr, &handle)); - op->UpdateInput(i, handle); - input = handle; - input_device = remote_cpu_device; - // Unref handle since it has a ref as an input now - handle->Unref(); - } + { + profiler::TraceMe activity(""CopyInputToExpectedDevice"", + profiler::TraceMeLevel::kInfo); + for (int i = 0; i < op->Inputs().size(); i++) { + tensorflow::TensorHandle* input = op->Inputs()[i]; + tensorflow::Device* input_device = input->device(); + if (op->Device() != input_device && + // If the expected and actual devices are on the same task, don't + // explicitly copy, and instead depend on the copy to happen locally + // when the op is executed on the device. + !ctx->OnSameTask(op->Device(), input_device)) { + tensorflow::Device* remote_cpu_device; + TF_RETURN_IF_ERROR( + ctx->CPUDeviceOnTask(op->Device(), &remote_cpu_device)); + // TODO(b/110044833): It's possible the same tensor gets copied to the + // remote device repeatedly. + // Always copy to the remote CPU so that the actual device can be + // correctly determined after the kernel is selected/instantiated, since + // the op might have its inputs on host memory. + TensorHandle* handle = nullptr; + TF_RETURN_IF_ERROR(MaybeCopyInputToExpectedDevice( + op, op->Device(), i, remote_cpu_device, + /* run_metadata= */ nullptr, &handle)); + op->UpdateInput(i, handle); + input = handle; + input_device = remote_cpu_device; + // Unref handle since it has a ref as an input now + handle->Unref(); + } - TF_RETURN_IF_ERROR(AddRemoteInput(remote_op, input, input_device)); + TF_RETURN_IF_ERROR(AddRemoteInput(remote_op, input, input_device)); + } } PrepareRemoteOp(remote_op, op); @@ -1259,8 +1267,7 @@ Status ExecuteSend(EagerContext* ctx, Device* device, TensorHandle* h, } else { eager::EagerClient* eager_client; uint64 context_id = ctx->GetContextId(); - TF_RETURN_IF_ERROR( - ctx->GetClient(device, &eager_client)); + TF_RETURN_IF_ERROR(ctx->GetClient(device, &eager_client)); std::unique_ptr request(new eager::EnqueueRequest); eager::EnqueueResponse response; @@ -1328,8 +1335,7 @@ Status ExecuteRecv(EagerContext* ctx, Device* device, DataType dtype, } else { eager::EagerClient* eager_client; uint64 context_id = ctx->GetContextId(); - TF_RETURN_IF_ERROR( - ctx->GetClient(device, &eager_client)); + TF_RETURN_IF_ERROR(ctx->GetClient(device, &eager_client)); std::unique_ptr request(new eager::EnqueueRequest); eager::EnqueueResponse response; @@ -1385,8 +1391,6 @@ string GetUniqueWireID() { Status EagerCopyToDevice(TensorHandle* h, EagerContext* ctx, Device* device, bool mirror, TensorHandle** result) { - profiler::TraceMe activity(""EagerCopyToDevice"", - profiler::TraceMeLevel::kInfo); Device* send_device = h->DeviceOrHostCPU(ctx); bool sender_is_local = ctx->IsLocal(send_device); ",0,train d82c766b445eea2a52a3f7b6dba05356d8b03648,tensorflow/tensorflow,"Avoid adding the same edge multiple times which will cause checking failure in Graph::ToGraphDefSubRange(). PiperOrigin-RevId: 239905587",convert_graph.cc,"@@ -611,17 +611,18 @@ Status CreateTRTNode(const ConversionParams& params, UpdateToEngineNode(infos, pos, *engine_nodes, /*is_input_edge=*/false, conn.outside_node_name, &output_node, &port); } - VLOG(1) << ""Updating "" << engine_node->name() << "":"" << conn.port_number - << "" to "" << output_node->name() << "":"" << port; if (conn.is_control_edge()) { + VLOG(1) << ""Updating control edge from "" << engine_node->name() << "" to "" + << output_node->name(); QCHECK_EQ(Graph::kControlSlot, port); graph->AddControlEdge(engine_node, output_node); } else { - auto new_edge = - graph->AddEdge(engine_node, conn.port_number, output_node, port); - QCHECK(new_edge) << ""Adding a new edge failed "" << engine_node->name() - << "":"" << conn.port_number << "" -> "" - << output_node->name() << "":"" << conn.outside_port; + VLOG(1) << ""Updating data edge from "" << engine_node->name() << "":"" + << conn.port_number << "" to "" << output_node->name() << "":"" + << port; + // Use UpdateEdge() to avoid adding the same edge multiple times. + TF_CHECK_OK( + graph->UpdateEdge(engine_node, conn.port_number, output_node, port)); } } return Status::OK(); ",0,test d82c766b445eea2a52a3f7b6dba05356d8b03648,tensorflow/tensorflow,"Avoid adding the same edge multiple times which will cause checking failure in Graph::ToGraphDefSubRange(). PiperOrigin-RevId: 239905587",convert_graph_test.cc,"@@ -17,6 +17,7 @@ limitations under the License. #include #include +#include ""tensorflow/cc/framework/ops.h"" #include ""tensorflow/cc/framework/scope.h"" #include ""tensorflow/cc/ops/standard_ops.h"" #include ""tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h"" @@ -222,6 +223,76 @@ TEST(ConvertGraphTest, GetDeviceAndAllocator) { } } +class ConvertAfterShapesTest : public ::testing::Test { + public: + Status RunConvertAfterShape(Scope s, GraphDef* output_graph_def) { + // Create GraphProperties. + grappler::GrapplerItem item; + TF_EXPECT_OK(s.ToGraphDef(&item.graph)); + grappler::GraphProperties graph_properties(item); + TF_EXPECT_OK(graph_properties.InferStatically(true)); + + // Construct ConversionParams. + const std::vector output_names{""output""}; + ConversionParams params; + params.input_graph_def = &item.graph; + params.output_names = &output_names; + params.max_workspace_size_bytes = 8 << 20; + params.output_graph_def = output_graph_def; + params.minimum_segment_size = 2; + params.graph_properties = &graph_properties; + params.use_calibration = false; + + return ConvertAfterShapes(params); + } +}; + +TEST_F(ConvertAfterShapesTest, DirectlyConnectedEngines) { + // Create the graph. There will be two TRTEngineOps after the conversion, and + // the upstream TRTEngineOp will have two output connections from the same + // node:port inside the op to the downstream TRTEngineOp. Then, if it adds the + // downstream TRTEngineOp first, when adding the upstream op it'll need to + // update the same output connection twice. This test ensures the correctness + // of the conversion under such condition. + Scope s = Scope::NewRootScope(); + auto input = ops::Placeholder(s.WithOpName(""input""), DT_FLOAT, + ops::Placeholder::Shape({2, 1})); + // We purposefully choose the name of the root node of each segment, so it'll + // process the segment in the downstream first, then, when it tries to update + // the edge between the two TRTEngineOps, it'll try to add the same edge + // multiple times. + auto segment_root_1 = ops::Identity(s.WithOpName(""segment_root_b""), input); + auto add1 = ops::Add(s.WithOpName(""add1""), segment_root_1, segment_root_1); + // Add incompatible reshapes that change the batch dimension. + auto incompatible = + ops::Reshape(s.WithOpName(""reshape1""), add1, Input({1, 2})); + incompatible = + ops::Reshape(s.WithOpName(""reshape2""), incompatible, Input({2, 1})); + + auto add2 = ops::Add(s.WithOpName(""add2""), incompatible, add1); + auto segment_root_2 = ops::Identity(s.WithOpName(""segment_root_a""), add1); + auto add3 = ops::Add(s.WithOpName(""add3""), add2, segment_root_2); + ops::Identity(s.WithOpName(""output""), add3); + + GraphDef output_graph_def; + TF_EXPECT_OK(RunConvertAfterShape(s, &output_graph_def)); + + int num_trt_ops = 0; + for (const NodeDef& node : output_graph_def.node()) { + if (node.name() == ""TRTEngineOp_1"") { + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ(""input"", node.input(0)); + ++num_trt_ops; + } else if (node.name() == ""TRTEngineOp_0"") { + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ(""TRTEngineOp_1"", node.input(0)); + EXPECT_EQ(""reshape2"", node.input(1)); + ++num_trt_ops; + } + } + EXPECT_EQ(2, num_trt_ops); +} + } // namespace convert } // namespace tensorrt } // namespace tensorflow ",0,test 24cbb2a8ce16da1157e6ac64a7d6391bead6b633,tensorflow/tensorflow,"Allow use of configure for gpu remote execution (i.e., no checks on local cuda) PiperOrigin-RevId: 167995887",configure.py,"@@ -995,7 +995,8 @@ def main(): set_computecpp_toolkit_path(environ_cp) set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False) - if environ_cp.get('TF_NEED_CUDA') == '1': + if (environ_cp.get('TF_NEED_CUDA') == '1' and + 'TF_CUDA_CONFIG_REPO' not in environ_cp): set_tf_cuda_version(environ_cp) set_tf_cunn_version(environ_cp) set_tf_cuda_compute_capabilities(environ_cp) ",0,train 1166a62a552ae43301b38ddfd8d6aa8aac6a2824,tensorflow/tensorflow,"Revert ""fix gpu_device.cc sanity"" This reverts commit 0816dc52e439d190df5950860438e3dd05c76f41.",gpu_device.cc,"@@ -34,7 +34,6 @@ limitations under the License. #include #include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" - #include ""tensorflow/core/common_runtime/device_factory.h"" #include ""tensorflow/core/common_runtime/gpu/gpu_device.h"" #include ""tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"" @@ -611,7 +610,7 @@ Status BaseGPUDevice::MaybeCopyTensorToGPU( return err; } - auto wrapped_done = [ to, copy, done = std::move(done) ](const Status& s) { + auto wrapped_done = [to, copy, done = std::move(done)](const Status& s) { if (s.ok()) { *to = std::move(*copy); } @@ -651,7 +650,7 @@ Status BaseGPUDevice::MakeTensorFromProto(const TensorProto& tensor_proto, std::list notifications; Status copy_status; auto copier = [this, &alloc_attrs, ¬ifications, ©_status]( - const Tensor& from, Tensor* to) { + const Tensor& from, Tensor* to) { // Copier isn't run in a multithreaded environment, so we don't // have to worry about the notifications list being modified in parallel. notifications.emplace_back(); @@ -742,8 +741,8 @@ Status ParseVisibleDeviceList(const string& visible_device_list, if (!strings::safe_strto32(platform_gpu_id_str, &platform_gpu_id)) { return errors::InvalidArgument( ""Could not parse entry in 'visible_device_list': '"", - platform_gpu_id_str, ""'. visible_device_list = "", - visible_device_list); + platform_gpu_id_str, + ""'. visible_device_list = "", visible_device_list); } if (platform_gpu_id < 0 || platform_gpu_id >= gpu_manager->VisibleDeviceCount()) { @@ -1038,32 +1037,32 @@ Status BaseGPUDeviceFactory::CreateDevices( #if GOOGLE_CUDA err = cudaSetDevice(platform_gpu_id.value()); if (err != cudaSuccess) { - return errors::Internal(""cudaSetDevice() on GPU:"", - platform_gpu_id.value(), "" failed. Status: "", - cudaGetErrorString(err)); + return errors::Internal( + ""cudaSetDevice() on GPU:"", platform_gpu_id.value(), + "" failed. Status: "", cudaGetErrorString(err)); } err = cudaFree(nullptr); if (err != cudaSuccess) { return errors::Internal(""CUDA runtime implicit initialization on GPU:"", - platform_gpu_id.value(), "" failed. Status: "", - cudaGetErrorString(err)); + platform_gpu_id.value(), + "" failed. Status: "", cudaGetErrorString(err)); } #elif TENSORFLOW_USE_ROCM err = hipSetDevice(platform_gpu_id.value()); if (err != hipSuccess) { - return errors::Internal(""hipSetDevice() on GPU:"", - platform_gpu_id.value(), "" failed. Status: "", - hipGetErrorString(err)); + return errors::Internal( + ""hipSetDevice() on GPU:"", platform_gpu_id.value(), + "" failed. Status: "", hipGetErrorString(err)); } err = hipFree(nullptr); if (err != hipSuccess) { return errors::Internal(""ROCm runtime implicit initialization on GPU:"", - platform_gpu_id.value(), "" failed. Status: "", - hipGetErrorString(err)); + platform_gpu_id.value(), + "" failed. Status: "", hipGetErrorString(err)); } #endif } -// Reset to the original device. + // Reset to the original device. #if GOOGLE_CUDA err = cudaSetDevice(original_device); if (err != cudaSuccess) { @@ -1174,13 +1173,15 @@ static string GetShortDeviceDescription(PlatformGpuId platform_gpu_id, cc_minor = 0; } // LINT.IfChange - return strings::StrCat(""device: "", platform_gpu_id.value(), "", name: "", - desc.name(), "", pci bus id: "", desc.pci_bus_id(), + return strings::StrCat(""device: "", platform_gpu_id.value(), + "", name: "", desc.name(), + "", pci bus id: "", desc.pci_bus_id(), "", compute capability: "", cc_major, ""."", cc_minor); -// LINT.ThenChange(//tensorflow/python/platform/test.py) + // LINT.ThenChange(//tensorflow/python/platform/test.py) #elif TENSORFLOW_USE_ROCM - return strings::StrCat(""device: "", platform_gpu_id.value(), "", name: "", - desc.name(), "", pci bus id: "", desc.pci_bus_id()); + return strings::StrCat(""device: "", platform_gpu_id.value(), + "", name: "", desc.name(), + "", pci bus id: "", desc.pci_bus_id()); #endif } @@ -1419,8 +1420,8 @@ struct CudaVersion { // Initialize from version_name in the form of ""3.5"" explicit CudaVersion(const std::string& version_name) { size_t dot_pos = version_name.find('.'); - CHECK(dot_pos != string::npos) << ""Illegal version name: ["" << version_name - << ""]""; + CHECK(dot_pos != string::npos) + << ""Illegal version name: ["" << version_name << ""]""; string major_str = version_name.substr(0, dot_pos); CHECK(strings::safe_strto32(major_str, &major_part)) << ""Illegal version name: ["" << version_name << ""]""; @@ -1445,8 +1446,7 @@ struct CudaVersion { }; std::vector supported_cuda_compute_capabilities = { - TF_CUDA_CAPABILITIES, -}; + TF_CUDA_CAPABILITIES,}; std::vector GetSupportedCudaComputeCapabilities() { auto cuda_caps = supported_cuda_compute_capabilities; @@ -1792,10 +1792,10 @@ void GPUKernelTracker::RecordTerminated(uint64 queued_count) { VLOG(2) << this << "" RecordTerminated queued_count="" << queued_count << "" first_available_="" << first_available_ << "" last_completed_="" << last_completed_ - << "" num_pending_="" << num_pending_ - << "" LC="" << ((last_completed_ >= 0) - ? pending_kernels_[last_completed_].queued_count - : -1); + << "" num_pending_="" << num_pending_ << "" LC="" + << ((last_completed_ >= 0) + ? pending_kernels_[last_completed_].queued_count + : -1); DCHECK_NE(first_available_, last_completed_); DCHECK_GT(num_pending_, 0); // Starting just past the last completed entry, find the entry with ",0,train 70cd042611d8edf8cb4d7c55994b9d80c5386205,tensorflow/tensorflow,Set output when side input exists,common_shape_fns.cc,"@@ -1285,6 +1285,12 @@ Status FusedBatchNormGradShape(shape_inference::InferenceContext* c) { Status FusedBatchNormGradExShape(shape_inference::InferenceContext* c) { TF_RETURN_IF_ERROR(FusedBatchNormGradShape(c)); + int num_side_inputs; + TF_RETURN_IF_ERROR(c->GetAttr(""num_side_inputs"", &num_side_inputs)); + if (num_side_inputs == 0) { + return Status::OK(); + } + string data_format_str; TF_RETURN_IF_ERROR(c->GetAttr(""data_format"", &data_format_str)); TensorFormat data_format; ",0,train f26ea84fdbb13fff6b7979231db95dd20438645d,tensorflow/tensorflow,"[tf2xla] Remove MakeLinspaceTensor MakeLinspaceTensor is now unused. PiperOrigin-RevId: 234124584",xla_helpers.cc,"@@ -81,16 +81,6 @@ xla::XlaOp XlaHelpers::FloatLiteral(xla::XlaBuilder* b, DataType data_type, return Status::OK(); } -template -static Tensor MakeLinspaceTensor(const TensorShape& shape, int64 depth) { - Tensor linspace(DataTypeToEnum::v(), shape); - auto linspace_flat = linspace.flat(); - for (int64 i = 0; i < depth; ++i) { - linspace_flat(i) = i; - } - return linspace; -} - Status XlaHelpers::OneHot(xla::XlaBuilder* builder, int64 depth, int axis, DataType index_type, const TensorShape& indices_shape, const xla::XlaOp& indices, const xla::XlaOp& on_value, ",0,train a5923d823e088a9723e445cce9248d5fc59f1b30,tensorflow/tensorflow,"Allow StreamExecutor commands to return status types other than the TensorFlow status type. Change: 116793254",gpu_util.cc,"@@ -388,7 +388,7 @@ string GPUUtil::MemoryDebugString(const Device* device, Tensor* tensor) { string buf; buf.resize(num_bytes); DeviceMemoryBase gpu_ptr(ptr, num_bytes); - Status s = dev_info->stream->parent()->SynchronousMemcpyD2H( + auto s = dev_info->stream->parent()->SynchronousMemcpyD2H( gpu_ptr, num_bytes, gtl::string_as_array(&buf)); strings::StrAppend(&ret, PrintMemory(gtl::string_as_array(&buf), num_bytes)); ",0,train 5cfb7593f2141d5885734104bc2891995532ea18,tensorflow/tensorflow,Update bcast.h,bcast.h,"@@ -139,7 +139,7 @@ BCastList::BCastList(const BCastList::Vec (&x)[N], if (x[i] != x[0]) { all_equal = false; } - int x_i_size = x[i].size(); + const int x_i_size = x[i].size(); if (x_i_size > largest_rank) { largest_rank = x[i].size(); } ",0,train f9ccd02fb44b1272798058cb80cb31e0d12a1881,tensorflow/tensorflow,"Removing duplicate `six` requirement in setup.py. PiperOrigin-RevId: 275549971 Change-Id: I7f42d74c4833009c0542c1a09d688ccd63083183",setup.py,"@@ -60,7 +60,6 @@ REQUIRED_PACKAGES = [ 'keras_preprocessing >= 1.0.5', 'numpy >= 1.16.0, < 2.0', 'opt_einsum >= 2.3.2', - 'six >= 1.10.0', 'protobuf >= 3.6.1', 'tensorboard >= 2.0.0, < 2.1.0', 'tensorflow_estimator >= 2.0.0, < 2.1.0', ",0,test 9d17630338fb0cadd4bb347eb4993102cb77bb03,tensorflow/tensorflow,"Lower tolerace of eigvalsh for float32/complex64 PiperOrigin-RevId: 274723451",linear_operator_test_util.py,"@@ -434,8 +434,8 @@ def _test_eigvalsh(use_placeholder, shapes_info, dtype): atol = self._atol[dtype] # pylint: disable=protected-access rtol = self._rtol[dtype] # pylint: disable=protected-access if dtype == dtypes.float32 or dtype == dtypes.complex64: - atol = 1e-5 - rtol = 1e-5 + atol = 1e-4 + rtol = 1e-4 self.assertAllClose(op_eigvals_v, mat_eigvals_v, atol=atol, rtol=rtol) return test_eigvalsh ",0,test 1acc02f4689f0a5ac5ecd5bc1a1fa3b5236fd56c,tensorflow/tensorflow,"Let variables initialized from checkpoints answer "".initialized_value()"" correctly. PiperOrigin-RevId: 186741832",checkpoint_utils.py,"@@ -293,6 +293,8 @@ def _set_checkpoint_initializer(variable, restore_op = io_ops.restore_v2( ckpt_file, [tensor_name], [slice_spec], [base_type], name=name)[0] variable._initializer_op = state_ops.assign(variable, restore_op) # pylint:disable=protected-access + restore_op.set_shape(variable.shape) + variable._initial_value = restore_op # pylint:disable=protected-access def _set_variable_or_list_initializer(variable_or_list, ckpt_file, ",0,train 1acc02f4689f0a5ac5ecd5bc1a1fa3b5236fd56c,tensorflow/tensorflow,"Let variables initialized from checkpoints answer "".initialized_value()"" correctly. PiperOrigin-RevId: 186741832",checkpoint_utils_test.py,"@@ -145,6 +145,36 @@ class CheckpointsTest(test.TestCase): # Check that tensors are not explicitly in the graph. self.assertLess(len(str(session.graph.as_graph_def())), 29000) + def testInitialValueComesFromCheckpoint(self): + checkpoint_dir = self.get_temp_dir() + with self.test_session() as session: + v1, _, _, _ = _create_checkpoints(session, checkpoint_dir) + + # New graph and session. + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as session: + with variable_scope.variable_scope( + ""some_scope"", initializer=init_ops.zeros_initializer()): + my1 = variable_scope.get_variable(""my1"", [1, 10]) + + # At this point, my1.initialized_value() will add ops that reference + # the zeros initializer of my1. + before = variables.Variable(my1.initialized_value(), name=""before"") + + checkpoint_utils.init_from_checkpoint(checkpoint_dir, {""var1"": my1}) + + # At this point, my1.initialized_value() will add ops that reference + # the newly set initializer of my1. + after = variables.Variable(my1.initialized_value(), name=""after"") + + session.run(variables.global_variables_initializer()) + self.assertAllEqual(session.run(my1), v1) + self.assertAllEqual(session.run(my1.initialized_value()), v1) + self.assertAllClose(session.run(before), [[0.0] * 10]) + self.assertAllClose(session.run(after), v1) + with self.assertRaises(AssertionError): + self.assertAllClose(session.run(before), session.run(after)) + def testInitWithScopeDoesNotCaptureSuffixes(self): checkpoint_dir = self.get_temp_dir() with self.test_session() as session: ",0,train 63b5e80e5c99f847d7ab63087ded0b4fc1854d3a,tensorflow/tensorflow,"Use strategy._gather in test_util.gather We now have strategy._gather MWMS, so we can remove the naive implementation in test_util. PiperOrigin-RevId: 334518762 Change-Id: I7b1d97ff3937b8ca9f5c0287d007bb386efa9e3e",test_util.py,"@@ -21,10 +21,7 @@ from __future__ import print_function import functools from tensorflow.python.distribute import collective_all_reduce_strategy -from tensorflow.python.distribute import cross_device_utils -from tensorflow.python.distribute import distribute_utils from tensorflow.python.distribute import values -from tensorflow.python.eager import def_function from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.util import nest @@ -57,16 +54,5 @@ def _gather(strategy, value): return array_ops.stack(value._values) assert len(strategy.extended.worker_devices) == len(value._values) inputs = [array_ops.expand_dims_v2(v, axis=0) for v in value._values] - collective_keys = strategy.extended._collective_keys - devices = strategy.extended.worker_devices - group_size = strategy.num_replicas_in_sync - - @def_function.function - def gather_fn(): - gathered = cross_device_utils.build_collective_gather( - inputs, devices, group_size, collective_keys, axis=0) - return distribute_utils.update_regroup( - strategy.extended, gathered, group=True) - - return gather_fn() + return strategy._gather(values.PerReplica(inputs), axis=0) # pylint: enable=protected-access ",0,train 1b3142988270fd69c9aa4fc933ae96fc92fbcbd7,tensorflow/tensorflow,"[XLA:Python] Delete deprecated overloads that accept device ordinals instead of device objects. PiperOrigin-RevId: 285251896 Change-Id: I89b3f4ddf806d9bfe4d3d56554dde7056e09205a",xla.cc,"@@ -442,30 +442,6 @@ PYBIND11_MODULE(xla_extension, m) { std::move(leaves), tree.shape, std::move(py_buffer_ref), std::move(client), device->local_device_ordinal()); }) - // TODO(skyewm): get rid of this overload once everyone passes Device - .def_static( - ""from_python"", - [](const pybind11::object& argument, - std::shared_ptr client, - int device_ordinal) -> StatusOr> { - GlobalPyRefManager()->CollectGarbage(); - TF_ASSIGN_OR_RETURN(PythonBufferTree tree, - GetPythonBufferTree(argument)); - std::shared_ptr py_buffer_ref = - GlobalPyRefManager()->ManageReferences( - absl::MakeSpan(tree.arrays)); - tree.arrays.clear(); - - std::vector leaves; - leaves.insert(leaves.end(), - std::make_move_iterator(tree.leaves.begin()), - std::make_move_iterator(tree.leaves.end())); - - py::gil_scoped_release gil_release; - return PyLocalBuffer::FromLiterals( - std::move(leaves), tree.shape, std::move(py_buffer_ref), - std::move(client), device_ordinal); - }) .def_static(""make_tuple"", [](const std::vector buffers, std::shared_ptr client, @@ -481,8 +457,6 @@ PYBIND11_MODULE(xla_extension, m) { return PyLocalBuffer::MakeTuple( buffers, client, device->local_device_ordinal()); }) - // TODO(skyewm): get rid of this overload once everyone passes Device - .def_static(""make_tuple"", &PyLocalBuffer::MakeTuple) .def(""copy_to_device"", [](PyLocalBuffer* buffer, std::shared_ptr dst_device) { CHECK(dst_device != nullptr); @@ -490,13 +464,6 @@ PYBIND11_MODULE(xla_extension, m) { py::gil_scoped_release gil_release; return buffer->CopyToDevice(dst_device->local_device_ordinal()); }) - // TODO(skyewm): get rid of this overload once everyone passes Device - .def(""copy_to_device"", - [](PyLocalBuffer* buffer, int dst_device_ordinal) { - GlobalPyRefManager()->CollectGarbage(); - py::gil_scoped_release gil_release; - return buffer->CopyToDevice(dst_device_ordinal); - }) .def(""delete"", &PyLocalBuffer::Delete) .def(""destructure"", &PyLocalBuffer::DestructureTuple) .def(""block_host_until_ready"", @@ -522,8 +489,6 @@ PYBIND11_MODULE(xla_extension, m) { [](PyLocalBuffer* buffer) -> std::shared_ptr { return buffer->client()->local_devices()[buffer->device_ordinal()]; }) - // TODO(skyewm): get rid of `device_ordinal` once everything uses `device` - .def(""device_ordinal"", &PyLocalBuffer::device_ordinal) .def(""platform"", &PyLocalBuffer::platform_name) .def(""is_deleted"", [](const PyLocalBuffer& buffer) { @@ -546,15 +511,6 @@ PYBIND11_MODULE(xla_extension, m) { .def_static(""Compile"", &PyLocalExecutable::Compile, py::call_guard()) .def(""local_devices"", &PyLocalExecutable::local_devices) - // TODO(skyewm): get rid of this once everything uses `local_devices` - .def(""DeviceOrdinals"", - [](const PyLocalExecutable& executable) { - std::vector device_ordinals; - for (std::shared_ptr device : executable.local_devices()) { - device_ordinals.push_back(device->local_device_ordinal()); - } - return device_ordinals; - }) .def(""SizeOfGeneratedCodeInBytes"", &PyLocalExecutable::SizeOfGeneratedCodeInBytes) .def(""Delete"", &PyLocalExecutable::Delete) ",0,test 1b3142988270fd69c9aa4fc933ae96fc92fbcbd7,tensorflow/tensorflow,"[XLA:Python] Delete deprecated overloads that accept device ordinals instead of device objects. PiperOrigin-RevId: 285251896 Change-Id: I89b3f4ddf806d9bfe4d3d56554dde7056e09205a",xla_client.py,"@@ -597,7 +597,7 @@ class Computation(object): # An Executable is a C++ class that duck types with the following API: # class Executable(object): -# def DeviceOrdinals(self) -> [int]: +# def local_devices(self) -> [Device]: # def Execute(self, arguments : [Buffer]) -> Buffer: # """"""Execute on one replica with Buffer arguments and return value."""""" # @@ -627,7 +627,7 @@ def execute_with_python_values(executable, arguments=(), backend=None): def put(arg): return Buffer.from_pyval( - arg, device=executable.DeviceOrdinals()[0], backend=backend) + arg, device=executable.local_devices()[0], backend=backend) arguments = [put(arg) for arg in arguments] return executable.Execute(arguments).to_py() @@ -646,9 +646,9 @@ def execute_with_python_values_replicated(executable, arguments, backend=None): A list of python values, one per replica. """""" backend = backend or get_local_backend() - device_ordinals = executable.DeviceOrdinals() + devices = executable.local_devices() # pylint: disable=g-complex-comprehension - flat_args = [(arg, device_ordinals[replica]) + flat_args = [(arg, devices[replica]) for replica, replica_args in enumerate(arguments) for arg in replica_args] flat_arg_buffers = [ ",0,test 1b3142988270fd69c9aa4fc933ae96fc92fbcbd7,tensorflow/tensorflow,"[XLA:Python] Delete deprecated overloads that accept device ordinals instead of device objects. PiperOrigin-RevId: 285251896 Change-Id: I89b3f4ddf806d9bfe4d3d56554dde7056e09205a",xla_client_test.py,"@@ -530,7 +530,8 @@ class BufferTest(ComputationTest): ) b0 = xla_client.Buffer.from_pyval(t[0]) b1 = xla_client.Buffer.from_pyval(t[1]) - btup = xla_client.Buffer.make_tuple([b0, b1], device=0) + device = xla_client.get_local_backend().local_devices()[0] + btup = xla_client.Buffer.make_tuple([b0, b1], device=device) pieces = btup.destructure() self.assertLen(pieces, 2) array0, array1 = pieces @@ -576,15 +577,6 @@ class BufferTest(ComputationTest): self.assertEqual(buf.device(), device) np.testing.assert_equal(x, buf.to_py()) - def testInvalidDevice(self): - t = np.array(1.) - with self.assertRaisesRegexp( - RuntimeError, - r""PyLocalBuffer::FromLiterals got bad device_ordinal: 100 "" - r""\(num_local_devices=\d+\)""): - # TODO(skyewm): figure out how to test this with a Device - xla_client.Buffer.from_pyval(t, device=100) - class SingleOpTest(ComputationTest): """"""Tests for single ops. ",0,test 7817f10ec7ed4622d305fdee298042347ee55da7,tensorflow/tensorflow,"Give a better error message that sparse tensor is not supported. Change: 125206796",control_flow_grad.py,"@@ -197,10 +197,13 @@ def _EnterGrad(op, grad): return grad if op.get_attr(""is_constant""): # Add a gradient accumulator for each loop invariant. - if isinstance(grad, ops.IndexedSlices): + if isinstance(grad, ops.Tensor): + result = grad_ctxt.AddBackPropAccumulator(grad) + elif isinstance(grad, ops.IndexedSlices): result = grad_ctxt.AddBackPropIndexedSlicesAccumulator(grad) else: - result = grad_ctxt.AddBackPropAccumulator(grad) + # TODO(yuanbyu, lukasr): Add support for SparseTensor. + raise TypeError(""Type %s not supported"" % type(grad)) else: result = exit(grad) grad_ctxt.ExitResult([result]) ",0,test 1a5364efe43f76ab72a1f3651df394d6b121c915,tensorflow/tensorflow,"Fix incorrect documentation for MaxPool3D, MaxPoolGrad3D, AvgPoolGrad3D The channel size should be zero rather than the depth. Fixes #2573 (GitHub) Change: 124261261",nn_ops.cc,"@@ -496,7 +496,7 @@ REGISTER_OP(""AvgPool3D"") Performs 3D average pooling on the input. ksize: 1-D tensor of length 5. The size of the window for each dimension of - the input tensor. Must have `ksize[0] = ksize[1] = 1`. + the input tensor. Must have `ksize[0] = ksize[4] = 1`. strides: 1-D tensor of length 5. The stride of the sliding window for each dimension of `input`. Must have `strides[0] = strides[4] = 1`. padding: The type of padding algorithm to use. @@ -516,7 +516,7 @@ REGISTER_OP(""AvgPool3DGrad"") Computes gradients of average pooling function. ksize: 1-D tensor of length 5. The size of the window for each dimension of - the input tensor. Must have `ksize[0] = ksize[1] = 1`. + the input tensor. Must have `ksize[0] = ksize[4] = 1`. strides: 1-D tensor of length 5. The stride of the sliding window for each dimension of `input`. Must have `strides[0] = strides[4] = 1`. padding: The type of padding algorithm to use. @@ -538,7 +538,7 @@ REGISTER_OP(""MaxPool3D"") Performs 3D max pooling on the input. ksize: 1-D tensor of length 5. The size of the window for each dimension of - the input tensor. Must have `ksize[0] = ksize[1] = 1`. + the input tensor. Must have `ksize[0] = ksize[4] = 1`. strides: 1-D tensor of length 5. The stride of the sliding window for each dimension of `input`. Must have `strides[0] = strides[4] = 1`. padding: The type of padding algorithm to use. @@ -559,7 +559,7 @@ REGISTER_OP(""MaxPool3DGrad"") Computes gradients of max pooling function. ksize: 1-D tensor of length 5. The size of the window for each dimension of - the input tensor. Must have `ksize[0] = ksize[1] = 1`. + the input tensor. Must have `ksize[0] = ksize[4] = 1`. strides: 1-D tensor of length 5. The stride of the sliding window for each dimension of `input`. Must have `strides[0] = strides[4] = 1`. padding: The type of padding algorithm to use. ",0,train 2eecf54f925fe4cb28aff39e4cb90924fad88196,tensorflow/tensorflow,updating testcases to work correctly with ROCm,direct_session_test.cc,"@@ -51,9 +51,11 @@ limitations under the License. #include ""tensorflow/core/public/session_options.h"" #include ""tensorflow/core/util/device_name_utils.h"" -#ifdef GOOGLE_CUDA +#if GOOGLE_CUDA #include ""third_party/gpus/cuda/include/cuda.h"" #include ""third_party/gpus/cuda/include/cuda_runtime_api.h"" +#elif TENSORFLOW_USE_ROCM +#include ""rocm/include/hip/hip_runtime.h"" #endif // GOOGLE_CUDA namespace tensorflow { @@ -2089,6 +2091,12 @@ bool IsCUDATensor(const Tensor& t) { if (err == cudaErrorInvalidValue) return false; CHECK_EQ(cudaSuccess, err) << cudaGetErrorString(err); return (attributes.memoryType == cudaMemoryTypeDevice); +#elif TENSORFLOW_USE_ROCM + hipPointerAttribute_t attributes; + hipError_t err = hipPointerGetAttributes(&attributes, t.tensor_data().data()); + if (err == hipErrorInvalidValue) return false; + CHECK_EQ(hipSuccess, err) << hipGetErrorString(err); + return (attributes.memoryType == hipMemoryTypeDevice); #else return false; #endif ",0,train 2eecf54f925fe4cb28aff39e4cb90924fad88196,tensorflow/tensorflow,updating testcases to work correctly with ROCm,process_function_library_runtime_test.cc,"@@ -33,9 +33,11 @@ limitations under the License. #include ""tensorflow/core/public/session_options.h"" #include ""tensorflow/core/public/version.h"" -#ifdef GOOGLE_CUDA +#if GOOGLE_CUDA #include ""third_party/gpus/cuda/include/cuda.h"" #include ""third_party/gpus/cuda/include/cuda_runtime_api.h"" +#elif TENSORFLOW_USE_ROCM +#include ""rocm/include/hip/hip_runtime.h"" #endif // GOOGLE_CUDA namespace tensorflow { @@ -122,7 +124,7 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test { } Tensor GPUToCPU(const Tensor& device_tensor) { -#ifdef GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM CHECK(gpu_device_); CHECK(gpu_device_->tensorflow_gpu_device_info() != nullptr); DeviceContext* device_context = @@ -146,7 +148,7 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test { } Tensor CPUToGPU(const Tensor& cpu_tensor) { -#ifdef GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM CHECK(gpu_device_); CHECK(gpu_device_->tensorflow_gpu_device_info() != nullptr); DeviceContext* device_context = @@ -461,6 +463,12 @@ bool IsCUDATensor(const Tensor& t) { if (err == cudaErrorInvalidValue) return false; CHECK_EQ(cudaSuccess, err) << cudaGetErrorString(err); return (attributes.memoryType == cudaMemoryTypeDevice); +#elif TENSORFLOW_USE_ROCM + hipPointerAttribute_t attributes; + hipError_t err = hipPointerGetAttributes(&attributes, t.tensor_data().data()); + if (err == hipErrorInvalidValue) return false; + CHECK_EQ(hipSuccess, err) << hipGetErrorString(err); + return (attributes.memoryType == hipMemoryTypeDevice); #else CHECK(false) << ""IsCUDATensor should not be called when CUDA is not available""; ",0,train 2eecf54f925fe4cb28aff39e4cb90924fad88196,tensorflow/tensorflow,updating testcases to work correctly with ROCm,utils_test.cc,"@@ -40,6 +40,18 @@ TEST(UtilsTest, GetLocalGPUInfo) { properties = GetLocalGPUInfo(PlatformGpuId(0)); EXPECT_EQ(""GPU"", properties.type()); EXPECT_EQ(""NVIDIA"", properties.vendor()); +#elif TENSORFLOW_USE_ROCM + LOG(INFO) << ""ROCm is enabled.""; + DeviceProperties properties; + + // Invalid platform GPU ID. + properties = GetLocalGPUInfo(PlatformGpuId(100)); + EXPECT_EQ(""UNKNOWN"", properties.type()); + + // Succeed when a valid platform GPU id was inserted. + properties = GetLocalGPUInfo(PlatformGpuId(0)); + EXPECT_EQ(""GPU"", properties.type()); + EXPECT_EQ(""Advanced Micro Devices, Inc"", properties.vendor()); #else LOG(INFO) << ""CUDA is not enabled.""; DeviceProperties properties; @@ -73,6 +85,8 @@ TEST(UtilsTest, GetDeviceInfo) { EXPECT_EQ(""GPU"", properties.type()); #if GOOGLE_CUDA EXPECT_EQ(""NVIDIA"", properties.vendor()); +#elif TENSORFLOW_USE_ROCM + EXPECT_EQ(""Advanced Micro Devices, Inc"", properties.vendor()); #endif // TF to platform GPU id mapping entry doesn't exist. @@ -81,7 +95,7 @@ TEST(UtilsTest, GetDeviceInfo) { properties = GetDeviceInfo(device); EXPECT_EQ(""UNKNOWN"", properties.type()); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // Invalid platform GPU id. TF_ASSERT_OK( GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId(0), PlatformGpuId(100))); @@ -94,7 +108,11 @@ TEST(UtilsTest, GetDeviceInfo) { device.id = 1; properties = GetDeviceInfo(device); EXPECT_EQ(""GPU"", properties.type()); +#if GOOGLE_CUDA EXPECT_EQ(""NVIDIA"", properties.vendor()); +#elif TENSORFLOW_USE_ROCM + EXPECT_EQ(""Advanced Micro Devices, Inc"", properties.vendor()); +#endif #endif } ",0,train 2eecf54f925fe4cb28aff39e4cb90924fad88196,tensorflow/tensorflow,updating testcases to work correctly with ROCm,pin_to_host_optimizer_test.cc,"@@ -203,7 +203,7 @@ TEST_F(PinToHostOptimizerTest, Identity) { // If CUDA, then there is a GPU kernel registration that is pinned to Host // memory. Consequently, `b` will be mapped to Host correct if there is // a GPU kernel registered. -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM EXPECT_EQ(node.device(), ""/device:CPU:0""); #else EXPECT_TRUE(node.device().empty()); ",0,train bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation PiperOrigin-RevId: 378549445 Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",xla_launch_util.cc,"@@ -57,10 +57,19 @@ se::Platform::Id XlaPlatformInfoFromDevice(DeviceBase* device_base) { } // anonymous namespace -VariableInfo::VariableInfo(int index, absl::string_view name, Var* var) - : index_(index), name_(name), var_(var) {} +VariableInfo::VariableInfo( + int index, absl::string_view name, Var* var, + const absl::optional& definition_stack_trace) + : index_(index), + name_(name), + var_(var), + definition_stack_trace_(definition_stack_trace) {} + VariableInfo::VariableInfo(VariableInfo&& other) - : index_(other.index_), var_(other.var_), lock_held_(other.lock_held_) { + : index_(other.index_), + var_(other.var_), + definition_stack_trace_(other.definition_stack_trace_), + lock_held_(other.lock_held_) { other.index_ = -1; other.var_ = nullptr; } @@ -69,6 +78,7 @@ VariableInfo& VariableInfo::operator=(VariableInfo&& other) { index_ = other.index_; var_ = other.var_; lock_held_ = other.lock_held_; + definition_stack_trace_ = other.definition_stack_trace_; other.index_ = -1; other.var_ = nullptr; @@ -100,21 +110,8 @@ Status GetVariableInfosFromInputs(ResourceMgr* rm, DeviceBase* dev, Var* variable = nullptr; ResourceHandle handle = inputs[var_idx]->flat()(0); if (handle.device() != dev->attributes().name()) { - std::string definition_location = [&]() -> std::string { - if (handle.definition_stack_trace()) { - std::vector stack_frames = - handle.definition_stack_trace()->ToStackFrames( - {}, IsInternalFrameForFilename, - /*reverse_traversal=*/true, - /*limit=*/1); - if (!stack_frames.empty()) { - const StackFrame& last_frame = stack_frames[0]; - return absl::StrCat("" (defined @ "", last_frame.file_name, "":"", - last_frame.line_number, "")""); - } - } - return """"; - }(); + std::string definition_location = + DefinitionLocationMsg(handle.definition_stack_trace()); return errors::InvalidArgument(""Trying to access resource "", handle.name(), definition_location, "" located in device "", handle.device(), @@ -126,7 +123,8 @@ Status GetVariableInfosFromInputs(ResourceMgr* rm, DeviceBase* dev, *ptr = new Var(DT_INVALID); return Status::OK(); })); - result->emplace_back(var_idx, handle.name(), variable); + result->emplace_back(var_idx, handle.name(), variable, + handle.definition_stack_trace()); } return Status::OK(); } @@ -445,7 +443,8 @@ StatusOr> GatherVariableInfo( const ResourceHandle handle = HandleFromInput(ctx, actual_input_index); TF_ASSIGN_OR_RETURN(Var * variable, GetOrCreateResourceVar(ctx, handle, write)); - out.emplace_back(actual_input_index, handle.name(), variable); + out.emplace_back(actual_input_index, handle.name(), variable, + handle.definition_stack_trace()); } return std::move(out); } @@ -647,6 +646,7 @@ XlaComputationLaunchContext::BuildXlaCompilerArguments( arg.name = std::string(variable.name()); arg.kind = XlaCompiler::Argument::kResource; arg.resource_kind = XlaResource::kVariable; + arg.definition_stack_trace = variable.definition_stack_trace(); if (variable.var() && variable.var()->is_initialized) { const Tensor* value = variable.var()->tensor(); arg.type = value->dtype(); ",0,test bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation PiperOrigin-RevId: 378549445 Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",xla_launch_util.h,"@@ -44,7 +44,9 @@ using ResourceVarsSnapshot = absl::flat_hash_map>; // refcount on destruction. class VariableInfo { public: - explicit VariableInfo(int index, absl::string_view name, Var* var); + explicit VariableInfo(int index, absl::string_view name, Var* var, + const absl::optional& + definition_stack_trace = absl::nullopt); VariableInfo(VariableInfo&& other); VariableInfo& operator=(VariableInfo&& other); @@ -68,12 +70,17 @@ class VariableInfo { bool lock_held() const { return lock_held_; } void set_lock_held() { lock_held_ = true; } + const absl::optional& definition_stack_trace() const { + return definition_stack_trace_; + } + ~VariableInfo(); private: int index_; std::string name_; Var* var_; + absl::optional definition_stack_trace_; // We can't use a optional here because it confuses the compiler's // thread safety analysis. Instead we use a boolean flag and release the lock ",0,test bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation PiperOrigin-RevId: 378549445 Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",xla_argument.h,"@@ -122,6 +122,7 @@ struct XlaArgument { // When true, xla_compiler should input/output alias this arg to prevent // unnecessary HBM usage. bool requires_broadcast = false; + absl::optional definition_stack_trace; }; // Returns true if any of `args` is an uninitialized resource variable. ",0,test bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation PiperOrigin-RevId: 378549445 Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",xla_compiler.cc,"@@ -987,7 +987,8 @@ Status XlaCompiler::BuildArguments( absl::get(arg.shape), xla::XlaOp(), /*max_array_size=*/arg.max_array_size, /*tensor_array_gradients=*/arg.tensor_array_gradients, - /*tensor_array_multiple_writes_aggregate=*/true)); + /*tensor_array_multiple_writes_aggregate=*/true, + arg.definition_stack_trace)); arg_expression = arg.kind == XlaCompiler::Argument::kResource ? XlaExpression::Resource(resource) ",0,test bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation PiperOrigin-RevId: 378549445 Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",xla_helpers.cc,"@@ -285,4 +285,20 @@ StatusOr> ResolveDeviceAssignment( return {{out}}; } +std::string DefinitionLocationMsg( + const absl::optional& stack_trace) { + if (stack_trace) { + std::vector stack_frames = + stack_trace->ToStackFrames({}, IsInternalFrameForFilename, + /*reverse_traversal=*/true, + /*limit=*/1); + if (!stack_frames.empty()) { + const StackFrame& last_frame = stack_frames[0]; + return absl::StrCat("" (defined @ "", last_frame.file_name, "":"", + last_frame.line_number, "")""); + } + } + return """"; +} + } // end namespace tensorflow ",0,test bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation PiperOrigin-RevId: 378549445 Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",xla_helpers.h,"@@ -188,6 +188,11 @@ StatusOr> ResolveDeviceAssignment( const absl::optional& collective_reduce_info); +// Generate a message with a definition location based on a provided stack +// trace, or an empty one if the stack trace is empty. +std::string DefinitionLocationMsg( + const absl::optional& stack_trace); + } // end namespace tensorflow #endif // TENSORFLOW_COMPILER_TF2XLA_XLA_HELPERS_H_ ",0,test bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation PiperOrigin-RevId: 378549445 Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",xla_resource.cc,"@@ -59,11 +59,12 @@ namespace tensorflow { /*tensor_array_multiple_writes_aggregate=*/false); } -XlaResource::XlaResource(Kind kind, int arg_num, string name, DataType type, - TensorShape shape, const xla::XlaOp& initial_value, - int64 max_array_size, - const std::set& tensor_array_gradients, - bool tensor_array_multiple_writes_aggregate) +XlaResource::XlaResource( + Kind kind, int arg_num, string name, DataType type, TensorShape shape, + xla::XlaOp initial_value, int64 max_array_size, + const std::set& tensor_array_gradients, + bool tensor_array_multiple_writes_aggregate, + const absl::optional& definition_stack_trace) : kind_(kind), arg_num_(arg_num), name_(std::move(name)), @@ -73,7 +74,8 @@ XlaResource::XlaResource(Kind kind, int arg_num, string name, DataType type, initial_value_(initial_value), max_array_size_(max_array_size), tensor_array_multiple_writes_aggregate_( - tensor_array_multiple_writes_aggregate) { + tensor_array_multiple_writes_aggregate), + definition_stack_trace_(definition_stack_trace) { CHECK(kind_ != kInvalid); for (const string& gradient : tensor_array_gradients) { @@ -87,22 +89,25 @@ XlaResource::XlaResource(Kind kind, int arg_num, string name, DataType type, Status XlaResource::SetTypeAndShape(DataType type, const TensorShape& shape) { if (type == DT_INVALID) { - return errors::InvalidArgument(""Attempted to set type of resource '"", name_, - ""'' to an invalid type""); + return errors::InvalidArgument( + ""Attempted to set type of resource '"", name_, ""'' to an invalid type"", + DefinitionLocationMsg(definition_stack_trace_)); } if (initialized() && type_ != type) { - return errors::Unimplemented(""Type of resource "", name_, - "" cannot be changed after initialization: "" - ""old type was "", - DataTypeString(type_), "", new type is "", - DataTypeString(type)); + return errors::InvalidArgument( + ""Type of resource "", name_, + "" cannot be changed after initialization: "" + ""old type was "", + DataTypeString(type_), "", new type is "", DataTypeString(type), + DefinitionLocationMsg(definition_stack_trace_)); } if (initialized() && shape_ != shape) { - return errors::Unimplemented(""Shape of resource "", name_, - "" cannot be changed after initialization: "" - ""old shape was "", - shape_.DebugString(), "", new shape is "", - shape.DebugString()); + return errors::InvalidArgument( + ""Shape of resource "", name_, + "" cannot be changed after initialization: "" + ""old shape was "", + shape_.DebugString(), "", new shape is "", shape.DebugString(), + DefinitionLocationMsg(definition_stack_trace_)); } type_ = type; shape_ = shape; ",0,test bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation PiperOrigin-RevId: 378549445 Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",xla_resource.h,"@@ -24,6 +24,7 @@ limitations under the License. #include ""tensorflow/core/framework/tensor_shape.h"" #include ""tensorflow/core/framework/types.pb.h"" #include ""tensorflow/core/lib/core/status.h"" +#include ""tensorflow/core/util/managed_stack_trace.h"" namespace tensorflow { @@ -48,10 +49,11 @@ class XlaResource { int64 max_array_size); XlaResource(Kind kind, int arg_num, string name, DataType type, - TensorShape shape, const xla::XlaOp& initial_value, - int64 max_array_size, + TensorShape shape, xla::XlaOp initial_value, int64 max_array_size, const std::set& tensor_array_gradients, - bool tensor_array_multiple_writes_aggregate); + bool tensor_array_multiple_writes_aggregate, + const absl::optional& definition_stack_trace = + absl::nullopt); XlaResource(const XlaResource&) = delete; XlaResource(XlaResource&&) = delete; @@ -182,6 +184,8 @@ class XlaResource { std::map> tensor_array_gradients_; bool is_overwritten_ = false; + + absl::optional definition_stack_trace_; }; } // namespace tensorflow ",0,test bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation PiperOrigin-RevId: 378549445 Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",def_function_xla_jit_test.py,"@@ -1066,6 +1066,24 @@ class DefFunctionTest(xla_test.XLATestCase): v = variables.Variable([[2.]]) self.assertAllClose(f(v), constant_op.constant([[0.5]])) + @test_util.disable_mlir_bridge('TODO(b/190444466): MLIR bridge seems to ' + 'ignore resource assignments') + def testErrMsgAssignWrongShape(self): + with ops.device('device:{}:0'.format(self.device)): + + v = variables.Variable([3.1, 3.2]) + + @def_function.function(jit_compile=True) + def f(samples): + v.assign(array_ops.zeros(samples)) # assignment + + with self.assertRaisesRegex(errors.InvalidArgumentError, + '@ .+def_function_xla_jit_test.py'): + f(constant_op.constant(6)) + + with self.assertRaisesRegex(errors.InvalidArgumentError, 'assignment'): + f(constant_op.constant(6)) + if __name__ == '__main__': ops.enable_eager_execution() ",0,test bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation PiperOrigin-RevId: 378549445 Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",stack_trace.h,"@@ -152,8 +152,14 @@ inline std::vector ManagedStackTraceToStackFrames( int id, const StackTraceMap& mapper, const StackTraceFilter& filtered, bool reverse_traversal, int limit) { PyGILState_STATE gstate = PyGILState_Ensure(); - std::vector result = stack_trace_manager->Get(id)->ToStackFrames( - mapper, filtered, reverse_traversal, limit); + StackTrace* stack_trace = stack_trace_manager->Get(id); + if (!stack_trace) { + // Must have evicted the stack trace by now. Do best effort. + return {}; + } + + std::vector result = + stack_trace->ToStackFrames(mapper, filtered, reverse_traversal, limit); PyGILState_Release(gstate); return result; } ",0,test 2a3d40e362a9ea67570d4e8ca467e49845443962,tensorflow/tensorflow,"[XLA:CPU] Switch dynamic_ops_test to use Parameter (instead of Constant) for large test values (also reduces test timeout to medium/moderate). Reduces avg runtime of DynamicUpdateSliceTest.R3ContiguousLarger (over 1000 runs) from 60sec to 10sec. Change: 146524027",client_library_test_base.h,"@@ -239,6 +239,18 @@ class ClientLibraryTestBase : public ::testing::Test { const string& name, ComputationBuilder* builder, ComputationDataHandle* data_handle); + // Create a parameter instruction that wraps the given constant array + // ""array_3d"" and then stores to ""data_handle"" the global handle for that + // parameter. + // + // ""parameter_number"" is the parameter number. + // ""name"" is the name of the parameter instruction. + template + std::unique_ptr CreateR3Parameter( + const Array3D& array_3d, int64 parameter_number, + const string& name, ComputationBuilder* builder, + ComputationDataHandle* data_handle); + Client* client_; ExecutionOptions execution_options_; }; @@ -382,6 +394,18 @@ std::unique_ptr ClientLibraryTestBase::CreateR2Parameter( return data; } +template +std::unique_ptr ClientLibraryTestBase::CreateR3Parameter( + const Array3D& array_3d, int64 parameter_number, + const string& name, ComputationBuilder* builder, + ComputationDataHandle* data_handle) { + std::unique_ptr literal = LiteralUtil::CreateR3FromArray3D(array_3d); + std::unique_ptr data = + client_->TransferToServer(*literal).ConsumeValueOrDie(); + *data_handle = builder->Parameter(parameter_number, literal->shape(), name); + return data; +} + template std::vector ClientLibraryTestBase::CreatePseudorandomR1( const int width, NativeT min_value, NativeT max_value, uint32 seed) { ",0,train 2a3d40e362a9ea67570d4e8ca467e49845443962,tensorflow/tensorflow,"[XLA:CPU] Switch dynamic_ops_test to use Parameter (instead of Constant) for large test values (also reduces test timeout to medium/moderate). Reduces avg runtime of DynamicUpdateSliceTest.R3ContiguousLarger (over 1000 runs) from 60sec to 10sec. Change: 146524027",dynamic_ops_test.cc,"@@ -350,13 +350,20 @@ class DynamicUpdateSliceTest : public ClientLibraryTestBase { // Build dynamic slice computation. ComputationBuilder builder(client_, TestName()); + // Initialize and transfer input parameter. + ComputationDataHandle input; + std::unique_ptr input_data = CreateR3Parameter( + input_values, 0, ""input_values"", &builder, &input); + // Initialize and transfer update parameter. + ComputationDataHandle update; + std::unique_ptr update_data = CreateR3Parameter( + update_values, 1, ""update_values"", &builder, &update); auto starts = builder.ConstantR1({index, 0, 0}); - auto input = builder.ConstantR3FromArray3D(input_values); - auto update = builder.ConstantR3FromArray3D(update_values); builder.DynamicUpdateSlice(input, update, starts); // Run computation and compare against expected values. - ComputeAndCompareR3(&builder, expected_values, {}, + ComputeAndCompareR3(&builder, expected_values, + {input_data.get(), update_data.get()}, ErrorSpec(0.000001)); } ",0,train ffca44e327e02a38bdaab56b8c5ebd6f4b2ab69c,tensorflow/tensorflow,"Fix tf.nn.log_softmax documentation. (Missing `log` in normalization.) Change: 136520720",nn_ops.py,"@@ -1103,7 +1103,7 @@ def _softmax(logits, compute_op, dim=-1, name=None): def softmax(logits, dim=-1, name=None): - """"""Computes log softmax activations. + """"""Computes softmax activations. For each batch `i` and class `j` we have @@ -1130,7 +1130,7 @@ def log_softmax(logits, dim=-1, name=None): For each batch `i` and class `j` we have - logsoftmax = logits - reduce_sum(exp(logits), dim) + logsoftmax = logits - log(reduce_sum(exp(logits), dim)) Args: logits: A non-empty `Tensor`. Must be one of the following types: `half`, ",0,test 77c0bb432292f53148a1c2e36e8ec643ba994d23,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-04-10 PiperOrigin-RevId: 305845881 Change-Id: Idbaae458d48d6751d19452ae3d2ac42d58d91864",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 4, 9) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 4, 10) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train ab5d7ab16789e72a523ba3033ce8ecbd0021ed63,tensorflow/tensorflow,"[MLIR][HLO] Annotate `mhlo.clamp` and `mhlo.select` as element-wise broadcasting The operations allow for a limited form of broadcasting which allows some operands to be scalars. As such they are neither strictly `Elementwise`, nor `Broadcasting`. They do fulfill the requirements for `BroadcastingElementwise` though. PiperOrigin-RevId: 379719961 Change-Id: I4dce939c95d0e791e2c6a45dea9b8ce19ff0b6aa",chlo_ops.h,"@@ -53,10 +53,6 @@ namespace mlir { namespace chlo { namespace OpTrait { -template -class BroadcastingElementwise - : public mlir::OpTrait::TraitBase {}; - template class Broadcasting : public mlir::OpTrait::TraitBase {}; ",0,train ab5d7ab16789e72a523ba3033ce8ecbd0021ed63,tensorflow/tensorflow,"[MLIR][HLO] Annotate `mhlo.clamp` and `mhlo.select` as element-wise broadcasting The operations allow for a limited form of broadcasting which allows some operands to be scalars. As such they are neither strictly `Elementwise`, nor `Broadcasting`. They do fulfill the requirements for `BroadcastingElementwise` though. PiperOrigin-RevId: 379719961 Change-Id: I4dce939c95d0e791e2c6a45dea9b8ce19ff0b6aa",hlo_ops.h,"@@ -34,6 +34,7 @@ limitations under the License. #include ""mlir/Interfaces/SideEffectInterfaces.h"" // clang-format off +#include ""mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.h"" #include ""mlir-hlo/Dialect/mhlo/IR/hlo_ops_base_structs.h"" #include ""mlir-hlo/Dialect/mhlo/IR/hlo_ops_base_enums.h"" #include ""mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h"" ",0,train ab5d7ab16789e72a523ba3033ce8ecbd0021ed63,tensorflow/tensorflow,"[MLIR][HLO] Annotate `mhlo.clamp` and `mhlo.select` as element-wise broadcasting The operations allow for a limited form of broadcasting which allows some operands to be scalars. As such they are neither strictly `Elementwise`, nor `Broadcasting`. They do fulfill the requirements for `BroadcastingElementwise` though. PiperOrigin-RevId: 379719961 Change-Id: I4dce939c95d0e791e2c6a45dea9b8ce19ff0b6aa",hlo_ops_base.h,"@@ -0,0 +1,33 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_IR_HLO_OPS_BASE_H_ +#define TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_IR_HLO_OPS_BASE_H_ + +#include ""mlir/IR/OpDefinition.h"" + +namespace mlir { +namespace mhlo { +namespace OpTrait { + +template +class BroadcastingElementwise + : public mlir::OpTrait::TraitBase {}; + +} // namespace OpTrait +} // namespace mhlo +} // namespace mlir + +#endif ",0,train ab5d7ab16789e72a523ba3033ce8ecbd0021ed63,tensorflow/tensorflow,"[MLIR][HLO] Annotate `mhlo.clamp` and `mhlo.select` as element-wise broadcasting The operations allow for a limited form of broadcasting which allows some operands to be scalars. As such they are neither strictly `Elementwise`, nor `Broadcasting`. They do fulfill the requirements for `BroadcastingElementwise` though. PiperOrigin-RevId: 379719961 Change-Id: I4dce939c95d0e791e2c6a45dea9b8ce19ff0b6aa",hlo_ops.cc,"@@ -1561,7 +1561,8 @@ class DynamicReshapeOpSameShapeOpResult LogicalResult matchAndRewrite(DynamicReshapeOp op, PatternRewriter& rewriter) const override { Operation* def_op = op.operand().getDefiningOp(); - if (!def_op || !def_op->hasTrait()) { + if (!def_op || + !def_op->hasTrait()) { return failure(); } Operation* input_def_op = def_op->getOperand(0).getDefiningOp(); @@ -2098,7 +2099,7 @@ Operation* ReduceWindowOp::getReductionOp(int result_index) { if (arg0_num == result_index && arg1_num == other_arg_index) return compute_op; if (arg0_num == other_arg_index && arg1_num == result_index && - compute_op->hasTrait()) + compute_op->hasTrait()) return compute_op; return nullptr; } ",0,train ab5d7ab16789e72a523ba3033ce8ecbd0021ed63,tensorflow/tensorflow,"[MLIR][HLO] Annotate `mhlo.clamp` and `mhlo.select` as element-wise broadcasting The operations allow for a limited form of broadcasting which allows some operands to be scalars. As such they are neither strictly `Elementwise`, nor `Broadcasting`. They do fulfill the requirements for `BroadcastingElementwise` though. PiperOrigin-RevId: 379719961 Change-Id: I4dce939c95d0e791e2c6a45dea9b8ce19ff0b6aa",broadcast_propagation.cc,"@@ -177,8 +177,8 @@ struct MoveElementwiseOpsIntoAssumingOpPattern : public RewritePattern { LogicalResult matchAndRewrite(Operation *op, PatternRewriter &rewriter) const override { // Apply to all elementwise and broadcasting elementwise operations. - if (!op->hasTrait() && - !op->hasTrait()) + if (!op->hasTrait() && + !op->hasTrait()) return failure(); return MoveIntoAssumingOpMatchAndRewrite(op, rewriter); @@ -336,8 +336,8 @@ struct EarlyBroadcastInDimOpPattern PatternRewriter &rewriter) const override { Operation *producer_op = bcast_op.operand().getDefiningOp(); if (!producer_op || - !producer_op->hasTrait() || - !producer_op->hasTrait()) { + !producer_op->hasTrait() || + !producer_op->hasTrait()) { return failure(); } ",0,train ab5d7ab16789e72a523ba3033ce8ecbd0021ed63,tensorflow/tensorflow,"[MLIR][HLO] Annotate `mhlo.clamp` and `mhlo.select` as element-wise broadcasting The operations allow for a limited form of broadcasting which allows some operands to be scalars. As such they are neither strictly `Elementwise`, nor `Broadcasting`. They do fulfill the requirements for `BroadcastingElementwise` though. PiperOrigin-RevId: 379719961 Change-Id: I4dce939c95d0e791e2c6a45dea9b8ce19ff0b6aa",rank_specialization.cc,"@@ -66,9 +66,9 @@ namespace { bool IsClusterable(Operation *op) { if (!llvm::isa(op)) return false; if (op->getNumOperands() == 0) return false; - return (op->hasTrait() && - op->hasTrait()) || - (op->hasTrait() && + return (op->hasTrait() && + op->hasTrait()) || + (op->hasTrait() && op->hasTrait()); } @@ -729,7 +729,7 @@ SmallVector, 4> FindNonScalarShapeEquivalences( for (Value v : vs.drop_front()) eqs.unionSets(repr, v); }; for (Operation &nested_op : op.getBody()->without_terminator()) { - if (nested_op.hasTrait()) { + if (nested_op.hasTrait()) { union_sets(nested_op.getOperands()); union_sets(nested_op.getResults()); if (!nested_op.getOperands().empty() && !nested_op.getResults().empty()) ",0,train ab5d7ab16789e72a523ba3033ce8ecbd0021ed63,tensorflow/tensorflow,"[MLIR][HLO] Annotate `mhlo.clamp` and `mhlo.select` as element-wise broadcasting The operations allow for a limited form of broadcasting which allows some operands to be scalars. As such they are neither strictly `Elementwise`, nor `Broadcasting`. They do fulfill the requirements for `BroadcastingElementwise` though. PiperOrigin-RevId: 379719961 Change-Id: I4dce939c95d0e791e2c6a45dea9b8ce19ff0b6aa",sink_constants_to_control_flow.cc,"@@ -65,7 +65,7 @@ class SinkConstantsToControlFlowPass visitUsedValuesDefinedAbove({*region}, [&](OpOperand* use) { Value constant = use->get(); auto op = constant.getDefiningOp(); - if (!op || !op->hasTrait()) return; + if (!op || !op->hasTrait()) return; auto map_entry = sunk_constant.try_emplace(constant, nullptr); if (!map_entry.second) { // This constant has already been cloned into the region, reuse it. ",0,train ab5d7ab16789e72a523ba3033ce8ecbd0021ed63,tensorflow/tensorflow,"[MLIR][HLO] Annotate `mhlo.clamp` and `mhlo.select` as element-wise broadcasting The operations allow for a limited form of broadcasting which allows some operands to be scalars. As such they are neither strictly `Elementwise`, nor `Broadcasting`. They do fulfill the requirements for `BroadcastingElementwise` though. PiperOrigin-RevId: 379719961 Change-Id: I4dce939c95d0e791e2c6a45dea9b8ce19ff0b6aa",legalize_tf.cc,"@@ -3418,9 +3418,9 @@ static void BroadcastBatchMatMulV2Operands(Value lhs, Value rhs, Location loc, // The last two dimensions are the matrix row/col dimensions. Don't broadcast // them. SmallVector result_batch_shape_compile_time_extents; - OpTrait::util::getBroadcastedShape(lhs_type.getShape().drop_back(2), - rhs_type.getShape().drop_back(2), - result_batch_shape_compile_time_extents); + mlir::OpTrait::util::getBroadcastedShape( + lhs_type.getShape().drop_back(2), rhs_type.getShape().drop_back(2), + result_batch_shape_compile_time_extents); auto result_batch_shape = rewriter->create( loc, shape_type, lhs_splitted.head(), rhs_splitted.head(), /*error=*/nullptr); ",0,train 8ff33271ea4de89e6ff662fe8e479c1fcf56fe77,tensorflow/tensorflow,"Dump the computation's SessionModule as part of the tf_compile rule. PiperOrigin-RevId: 172946149",compile.cc,"@@ -97,11 +97,11 @@ Status CompileGraph(const GraphDef& graph_def, const tf2xla::Config& config, TF_RETURN_IF_ERROR(ConvertGraphDefToXla(graph_def, config, client, &computation, &compile_result->has_context_arg)); - if (!flags.debug_dir.empty()) { + if (!flags.out_session_module.empty()) { TF_ASSIGN_OR_RETURN(std::unique_ptr module, computation.Snapshot()); - string file = io::JoinPath(flags.debug_dir, ""tfcompile_xla_module.pb""); - TF_RETURN_IF_ERROR(WriteBinaryProto(Env::Default(), file, *module)); + TF_RETURN_IF_ERROR( + WriteBinaryProto(Env::Default(), flags.out_session_module, *module)); } xla::cpu::CpuAotCompilationOptions aot_opts( flags.target_triple, flags.target_cpu, flags.target_features, ",0,test 8ff33271ea4de89e6ff662fe8e479c1fcf56fe77,tensorflow/tensorflow,"Dump the computation's SessionModule as part of the tf_compile rule. PiperOrigin-RevId: 172946149",flags.cc,"@@ -33,9 +33,6 @@ void AppendMainFlags(std::vector* flag_list, MainFlags* flags) { ""fetch nodes will be dumped to stdout in a comma-separated list. "" ""Typically used to format arguments for other tools, e.g. "" ""freeze_graph.""}, - {""debug_dir"", &flags->debug_dir, - ""Specifies a directory to dump debugging information, including "" - ""rewritten graphs and the XLA HLO module.""}, // Flags controlling the XLA ahead-of-time compilation, that correspond to // the fields of xla::cpu::CpuAotCompilationOptions. // @@ -64,6 +61,8 @@ void AppendMainFlags(std::vector* flag_list, MainFlags* flags) { ""namespaces are given, within the global namespace.""}, {""out_object"", &flags->out_object, ""Output object file name.""}, {""out_header"", &flags->out_header, ""Output header file name.""}, + {""out_session_module"", &flags->out_session_module, + ""Output session module proto.""}, {""gen_name_to_index"", &flags->gen_name_to_index, ""Generate name-to-index data for Lookup{Arg,Result}Index methods.""}, {""gen_program_shape"", &flags->gen_program_shape, ",0,test 8ff33271ea4de89e6ff662fe8e479c1fcf56fe77,tensorflow/tensorflow,"Dump the computation's SessionModule as part of the tf_compile rule. PiperOrigin-RevId: 172946149",flags.h,"@@ -29,7 +29,6 @@ struct MainFlags { string graph; string config; bool dump_fetch_nodes = false; - string debug_dir; string target_triple; string target_cpu; string target_features; @@ -37,6 +36,7 @@ struct MainFlags { string cpp_class; string out_object; string out_header; + string out_session_module; // C++ codegen options bool gen_name_to_index = false; ",0,test 63b9442ce3cf07fc990f62b307c54694b2baa4bb,tensorflow/tensorflow,"Fix the issue that customized Sonnet module doesn't work on TF2.0 TPU. The following changes are included: 1. In TPUMirroredVariable creator, get the initial value from kwargs[""initial_value""] directly instead of from the first variable value. 2. Don't lift nodes with ""_tpu_replicate"" attributes and ""TPUReplicateMetadata"" node out of the function, which may otherwise trigger failures in TF-XLA bridge. 3. Fix a bug in TensorHandle that the resource type and shape cache is enabled even when the look up is not successful. What happens in the sonnet case is the resource shape lookup is before the resource creation, then the cache will remember the NotFound status and causes failures for future lookups. 4. Mark TPUReplicatedInput node as no differentiable. PiperOrigin-RevId: 252153260",tensor_handle.cc,"@@ -400,8 +400,14 @@ Status TensorHandle::GetResourceVariableDtypeAndShape( mutex_lock l(ctx_mutex_); resource_dtype_and_shape_status_ = GetResourceVariableDtypeAndShapeInternal( tensor_, resource_device_, &resource_dtype_and_shape_); - resource_dtype_and_shape_initialized_ = true; - *result = resource_dtype_and_shape_; + + // TODO(endlessroad): the resource variable shape may be partially known at + // creation time, and it can be changed later. We may not want the cache in + // this case. + if (resource_dtype_and_shape_status_.ok()) { + resource_dtype_and_shape_initialized_ = true; + *result = resource_dtype_and_shape_; + } return resource_dtype_and_shape_status_; } ",0,train 63b9442ce3cf07fc990f62b307c54694b2baa4bb,tensorflow/tensorflow,"Fix the issue that customized Sonnet module doesn't work on TF2.0 TPU. The following changes are included: 1. In TPUMirroredVariable creator, get the initial value from kwargs[""initial_value""] directly instead of from the first variable value. 2. Don't lift nodes with ""_tpu_replicate"" attributes and ""TPUReplicateMetadata"" node out of the function, which may otherwise trigger failures in TF-XLA bridge. 3. Fix a bug in TensorHandle that the resource type and shape cache is enabled even when the look up is not successful. What happens in the sonnet case is the resource shape lookup is before the resource creation, then the cache will remember the NotFound status and causes failures for future lookups. 4. Mark TPUReplicatedInput node as no differentiable. PiperOrigin-RevId: 252153260",tpu_strategy.py,"@@ -438,6 +438,15 @@ class TPUExtended(distribute_lib.StrategyExtendedV1): value_list = [] for i, d in enumerate(devices): with ops.device(d): + if i == 0: + initial_value = kwargs[""initial_value""] + # TODO(b/134779280): Remove initialization scope once the + # ""Tensor-typed variable initializers must either be wrapped in an "" + # ""init_scope or callable"" error is fixed. + with ops.init_scope(): + initial_value = initial_value() if callable( + initial_value) else initial_value + if i > 0: # Give replicas meaningful distinct names: var0name = value_list[0].name.split("":"")[0] @@ -445,22 +454,11 @@ class TPUExtended(distribute_lib.StrategyExtendedV1): # ensure that we ignore the name scope and instead use the given # name as the absolute name of the variable. kwargs[""name""] = ""%s/replica_%d/"" % (var0name, i) - # Initialize replicas with the same value: - def initial_value_fn(): - return array_ops.identity(initial_value) + kwargs[""initial_value""] = initial_value - kwargs[""initial_value""] = initial_value_fn with context.device_policy(context.DEVICE_PLACEMENT_SILENT): v = next_creator(*args, **kwargs) - if i == 0: - # To avoid incorrectly nested device scopes, we exit out of - # existing control flow scopes and function building graphs. - # TODO(b/132997073): Remove initialization scope once nested - # device scope issue has been fixed. - with ops.init_scope(): - initial_value = ( - v.value() if ops.executing_eagerly_outside_functions() else - v.initial_value) + assert not isinstance(v, values.TPUMirroredVariable) value_list.append(v) return value_list ",0,train 63b9442ce3cf07fc990f62b307c54694b2baa4bb,tensorflow/tensorflow,"Fix the issue that customized Sonnet module doesn't work on TF2.0 TPU. The following changes are included: 1. In TPUMirroredVariable creator, get the initial value from kwargs[""initial_value""] directly instead of from the first variable value. 2. Don't lift nodes with ""_tpu_replicate"" attributes and ""TPUReplicateMetadata"" node out of the function, which may otherwise trigger failures in TF-XLA bridge. 3. Fix a bug in TensorHandle that the resource type and shape cache is enabled even when the look up is not successful. What happens in the sonnet case is the resource shape lookup is before the resource creation, then the cache will remember the NotFound status and causes failures for future lookups. 4. Mark TPUReplicatedInput node as no differentiable. PiperOrigin-RevId: 252153260",lift_to_graph.py,"@@ -204,13 +204,20 @@ def _copy_non_source(op, graph, op_map): old_graph_op=original_control_input)) else: copied_control_inputs.append(copied_control_input) + + # Don't copy over nodes with _tpu_replicate attribute. This attributed is used + # to signal that the op was built inside a tpu_replicate context; if we're + # lifting it to another graph we're similarly lifting it into another context. with ops.control_dependencies(copied_control_inputs), ops.device(op.device): copied_op = graph.create_op( op_type=op.type, inputs=copied_inputs, dtypes=[x.dtype for x in op.outputs], - attrs={key: value for key, value in op.node_def.attr.items() - if not key.startswith(""_class"")}, # b/128981532. + attrs={ + key: value for key, value in op.node_def.attr.items() + if not key.startswith(""_class"") and + not key.startswith(""_tpu_replicate"") + }, # b/128981532. name=op.name) op_map[op] = copied_op for i, o in enumerate(op.outputs): @@ -339,6 +346,10 @@ def lift_to_graph(init_tensors, graph, sources=None, marked_ops.add(op) ops_to_copy.append(op) for inp in _graph_inputs(op): + # Don't lift the TPUReplicateMetadata nodes out of the function, because + # it has no registered kernels. + if inp.name == ""TPUReplicateMetadata"": + continue unvisited_ops.add(inp) if (all(x in marked_ops for x in op_outputs[inp]) and inp not in sources): @@ -403,6 +414,10 @@ def lift_to_graph(init_tensors, graph, sources=None, mutation.copied_op._update_input( mutation.input_index, op_map[mutation.old_graph_tensor]) for mutation in control_mutations: + # Don't lift the TPUReplicateMetadata nodes out of the function, because + # it has no registered kernels. + if mutation.old_graph_op.name == ""TPUReplicateMetadata"": + continue mutation.copied_op._add_control_input(op_map[mutation.old_graph_op]) # pylint: enable=protected-access ",0,train 63b9442ce3cf07fc990f62b307c54694b2baa4bb,tensorflow/tensorflow,"Fix the issue that customized Sonnet module doesn't work on TF2.0 TPU. The following changes are included: 1. In TPUMirroredVariable creator, get the initial value from kwargs[""initial_value""] directly instead of from the first variable value. 2. Don't lift nodes with ""_tpu_replicate"" attributes and ""TPUReplicateMetadata"" node out of the function, which may otherwise trigger failures in TF-XLA bridge. 3. Fix a bug in TensorHandle that the resource type and shape cache is enabled even when the look up is not successful. What happens in the sonnet case is the resource shape lookup is before the resource creation, then the cache will remember the NotFound status and causes failures for future lookups. 4. Mark TPUReplicatedInput node as no differentiable. PiperOrigin-RevId: 252153260",tpu.py,"@@ -42,6 +42,7 @@ from tensorflow.python.util import compat from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export +ops.NotDifferentiable(""TPUReplicatedInput"") # Operations that indicate some error in the users graph, e.g. a placeholder # that's introduced outside of the infeed. ",0,train a8dbd3a680b06f617baef53dfad47a10e642f0ca,tensorflow/tensorflow,"Do not override `steps_per_epoch` if it is not None. Currently if you pass a tf.keras.preprocessing.image.ImageDataGenerator to fit, with a `steps_per_epoch`, your manual `steps_per_epoch` is overridden with the real value. Fix it to only override the value if the `steps_per_epoch` was not manually set. PiperOrigin-RevId: 229774626",training_generator.py,"@@ -410,7 +410,9 @@ def convert_to_generator_like(data, and may be `None` or `[None]`. batch_size: Used when creating a generator out of tuples of NumPy arrays or EagerTensors. - steps_per_epoch: Steps of the generator to run each epoch. + steps_per_epoch: Steps of the generator to run each epoch. If `None` the + number of steps will be read from the data (for + `keras.utils.data_utils.Sequence` types). epochs: Total number of epochs to run. shuffle: Whether the data should be shuffled. @@ -431,7 +433,8 @@ def convert_to_generator_like(data, if data_utils.is_generator_or_sequence(data) or isinstance( data, iterator_ops.EagerIterator): if isinstance(data, data_utils.Sequence): - steps_per_epoch = len(data) + if steps_per_epoch is None: + steps_per_epoch = len(data) return data, steps_per_epoch if isinstance(data, dataset_ops.DatasetV2): return dataset_ops.make_one_shot_iterator(data), steps_per_epoch ",0,test ede138563636c4db03fa915efed5e4627f099da5,tensorflow/tensorflow,"Extend space to batch to apply to larger batch sizes PiperOrigin-RevId: 335657911 Change-Id: I22a9f7f978b7d64bf09654771036d044d3d3ef41",space_to_batch_converter.cc,"@@ -52,7 +52,7 @@ class ConvolutionVisitor : public DfsHloVisitorWithDefault { Status HandleConvolution(HloInstruction* convolution) override; // Runs the visitor on a computation. - static bool Run(HloComputation* computation); + static bool Run(int64 limit_on_batch_size, HloComputation* computation); // Returns whether any convolution ops were rewritten. const bool changed() const { return changed_; } @@ -60,18 +60,23 @@ class ConvolutionVisitor : public DfsHloVisitorWithDefault { ~ConvolutionVisitor() override = default; private: - explicit ConvolutionVisitor(HloComputation* computation) - : computation_(computation) {} + explicit ConvolutionVisitor(int64 limit_on_batch_size, + HloComputation* computation) + : computation_(computation), limit_on_batch_size_(limit_on_batch_size) {} // Current HloComputation instance the ConvolutionVisitor is traversing. HloComputation* computation_; // Whether rewrite has occurred. bool changed_ = false; + + // Limit on batch size to apply this technique on. + int64 limit_on_batch_size_; }; -bool ConvolutionVisitor::Run(HloComputation* computation) { - ConvolutionVisitor visitor(computation); +bool ConvolutionVisitor::Run(int64 limit_on_batch_size, + HloComputation* computation) { + ConvolutionVisitor visitor(limit_on_batch_size, computation); TF_CHECK_OK(computation->Accept(&visitor)); return visitor.changed_; } @@ -93,11 +98,18 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) { constexpr int64 kLowLimitForSplitCount = 4; constexpr int64 kHighLimitForSplitCount = 24; + // Batch in batch_group_count has different semantics (it isn't true batch). + // Consider supporting this case in future if needed. + if (convolution->batch_group_count() != 1) { + return Status::OK(); + } + if (convolution->window().dimensions(kChosenSpatialDim).window_dilation() != 1) { return Status::OK(); } + // TODO(b/168316428): Support base dilations. if (convolution->window().dimensions(kChosenSpatialDim).base_dilation() != 1) { return Status::OK(); @@ -108,8 +120,7 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) { const int64 old_batch_size = convolution->operand(0)->shape().dimensions(activations_batch_dim); - // TODO(b/168316428): Only doing this for batch 1 currently. Extend later. - if (old_batch_size != 1) { + if (old_batch_size > limit_on_batch_size_) { return Status::OK(); } @@ -261,11 +272,20 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) { // -2 low padding and +2 high padding) to create shape B. Then, we select // between A and B such that halo regions are placed into A at the right // locations. + + // The benefit of the above mentioned scheme is that it allows for batch + // growth. Here are some examples of the size increases it causes for a 3x3 + // kernel. + // with batch=1, [1,16] -> [4,4] -> [4,6] -> [1,24] growth of 8. + // with batch=2, [2,16] -> [8,4] -> [8,6] -> [1,48] growth of 16. + // with batch=3, [3,16] -> [12,4] -> [12,6] -> [1,72] growth of 24. + std::vector reshape_dimensions( activations->shape().dimensions().begin(), activations->shape().dimensions().end()); + reshape_dimensions[spatial_dimension_to_split] = spatial_split_size; - reshape_dimensions[activations_batch_dim] = num_splits; + reshape_dimensions[activations_batch_dim] = num_splits * old_batch_size; TF_ASSIGN_OR_RETURN(HloInstruction * batch_increased_reshape, MakeReshapeHlo(reshape_dimensions, activations)); @@ -337,11 +357,19 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) { TF_ASSIGN_OR_RETURN(HloInstruction * select, MakeSelectHlo(shape_mask, straightened_activations, rotated_activations, convolution)); - VLOG(1) << ""Select generated""; + VLOG(1) << ""Select generated"" << select->ToString(); // Increase batch size for one last time. - TF_ASSIGN_OR_RETURN( - activations, MakeReshapeHlo(pad_applied->shape().dimensions(), select)); + std::vector combined_batch_dimensions( + pad_applied->shape().dimensions().begin(), + pad_applied->shape().dimensions().end()); + + combined_batch_dimensions[activations_batch_dim] = + old_batch_size * num_splits; + TF_ASSIGN_OR_RETURN(activations, + MakeReshapeHlo(combined_batch_dimensions, select)); + + VLOG(1) << ""Batch merge done "" << activations->ToString(); // Now, we rewrite the convolution with a larger batch. const auto& activations_shape = activations->shape(); @@ -385,28 +413,35 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) { VLOG(1) << ""new_conv "" << new_conv->ToString(); + const int64 output_split_spatial_dim = + new_dim_numbers.output_spatial_dimensions(kChosenSpatialDim); + const int64 output_batch_dim = new_dim_numbers.output_batch_dimension(); + Shape new_shape = new_conv->shape(); - const int64 new_batch_size = - new_shape.dimensions(new_dim_numbers.output_batch_dimension()); - const int64 new_spatial_dim_size = new_shape.dimensions( - new_dim_numbers.output_spatial_dimensions(kChosenSpatialDim)); - new_shape.set_dimensions( - new_dim_numbers.output_spatial_dimensions(kChosenSpatialDim), - new_batch_size * new_spatial_dim_size); - new_shape.set_dimensions(new_dim_numbers.output_batch_dimension(), - old_batch_size); + const int64 new_batch_size = new_shape.dimensions(output_batch_dim); + const int64 new_spatial_dim_size = + new_shape.dimensions(output_split_spatial_dim); + + CHECK_EQ(new_batch_size % old_batch_size, 0); + + const int64 output_split_batch_size = new_batch_size / old_batch_size; + + std::vector new_dimensions(new_conv->shape().dimensions().begin(), + new_conv->shape().dimensions().end()); + new_dimensions[output_split_spatial_dim] = + output_split_batch_size * new_spatial_dim_size; + new_dimensions[new_dim_numbers.output_batch_dimension()] = old_batch_size; // Reshape the output of the new conv into the old convolutions shape. TF_ASSIGN_OR_RETURN(HloInstruction * reshape, - MakeReshapeHlo(new_shape, new_conv)); + MakeReshapeHlo(new_dimensions, new_conv)); convolution->SetupDerivedInstruction(reshape); std::vector start_indices(rank, 0), - end_indices(new_shape.dimensions().begin(), new_shape.dimensions().end()), + end_indices(new_dimensions.begin(), new_dimensions.end()), strides(rank, 1); - end_indices[new_dim_numbers.output_spatial_dimensions(kChosenSpatialDim)] = - convolution->shape().dimensions( - dim_numbers.output_spatial_dimensions(kChosenSpatialDim)); + end_indices[output_split_spatial_dim] = convolution->shape().dimensions( + dim_numbers.output_spatial_dimensions(kChosenSpatialDim)); // This slicing is getting rid of the padding we added to evenly divide space. TF_ASSIGN_OR_RETURN( @@ -431,7 +466,7 @@ StatusOr ConvolutionSpaceToBatchConverter::Run(HloModule* module) { module->ToString()); bool changed = false; for (auto* comp : module->MakeNonfusionComputations()) { - if (ConvolutionVisitor::Run(comp)) { + if (ConvolutionVisitor::Run(limit_on_batch_size_, comp)) { changed = true; } } ",0,train ede138563636c4db03fa915efed5e4627f099da5,tensorflow/tensorflow,"Extend space to batch to apply to larger batch sizes PiperOrigin-RevId: 335657911 Change-Id: I22a9f7f978b7d64bf09654771036d044d3d3ef41",space_to_batch_converter.h,"@@ -26,7 +26,8 @@ namespace xla { // batch. class ConvolutionSpaceToBatchConverter : public HloModulePass { public: - ConvolutionSpaceToBatchConverter() = default; + explicit ConvolutionSpaceToBatchConverter(int64 limit_on_batch_size = 1) + : limit_on_batch_size_(limit_on_batch_size) {} absl::string_view name() const override { return ""convolution-space-to-batch-converter""; @@ -35,6 +36,8 @@ class ConvolutionSpaceToBatchConverter : public HloModulePass { // Run convolution rewriting on the given computation. Returns whether the // computation was changed. StatusOr Run(HloModule* module) override; + + int64 limit_on_batch_size_; }; } // namespace xla ",0,train ede138563636c4db03fa915efed5e4627f099da5,tensorflow/tensorflow,"Extend space to batch to apply to larger batch sizes PiperOrigin-RevId: 335657911 Change-Id: I22a9f7f978b7d64bf09654771036d044d3d3ef41",space_to_batch_converter_test.cc,"@@ -65,31 +65,42 @@ ENTRY computation { TEST_F(ConvolutionSpaceToBatchConverterTest, SimpleBatch2) { string hlo_string = R""( - HloModule module -ENTRY computation { - %p0 = bf16[2,258,258,32] parameter(0) - %p1 = bf16[3,3,32,32] parameter(1) - ROOT %convolution = bf16[2,256,256,32] convolution(%p0, %p1), window={size=3x3}, - dim_labels=b01f_01io->b01f -} + ENTRY computation { + %p0 = bf16[2,258,258,32] parameter(0) + %p1 = bf16[3,3,32,32] parameter(1) + ROOT %convolution = bf16[2,256,256,32] convolution(%p0, %p1), window={size=3x3}, + dim_labels=b01f_01io->b01f + } )""; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(hlo_string)); - ConvolutionSpaceToBatchConverter converter; - ASSERT_FALSE(converter.Run(module.get()).ValueOrDie()); + ConvolutionSpaceToBatchConverter converter(/*limit_on_batch_size=*/2); + ASSERT_TRUE(converter.Run(module.get()).ValueOrDie()); + auto computation = module->entry_computation(); + HloInstruction* root = computation->root_instruction(); + EXPECT_THAT(root, op::Transpose()); + EXPECT_THAT(root->operand(0), op::Slice()); + auto reshape = root->operand(0)->operand(0); + EXPECT_THAT(reshape, op::Reshape()); + EXPECT_THAT(reshape->operand(0), op::Convolution()); + const int64 batch_dim = reshape->operand(0) + ->convolution_dimension_numbers() + .output_batch_dimension(); + // Verify that the transform has increased the batch size. + EXPECT_GT(reshape->operand(0)->shape().dimensions(batch_dim), 1); } -TEST_F(ConvolutionSpaceToBatchConverterTest, Batch1WithStrideAndPad) { +TEST_F(ConvolutionSpaceToBatchConverterTest, Batch4WithStrideAndPad) { string hlo_string = R""( HloModule module ENTRY computation { - %p0 = bf16[1,224,224,3]{3,2,1,0} parameter(0) + %p0 = bf16[4,224,224,3]{3,2,1,0} parameter(0) %p1 = bf16[7,7,3,64]{3,2,1,0} parameter(1) - ROOT %convolution.3 = bf16[1,112,112,64]{3,2,1,0} convolution(%p0, %p1), + ROOT %convolution.3 = bf16[4,112,112,64]{3,2,1,0} convolution(%p0, %p1), window={size=7x7 stride=2x2 pad=3_3x3_3}, dim_labels=b01f_01io->b01f } )""; @@ -97,7 +108,7 @@ TEST_F(ConvolutionSpaceToBatchConverterTest, Batch1WithStrideAndPad) { ParseAndReturnVerifiedModule(hlo_string)); auto computation = module->entry_computation(); - ConvolutionSpaceToBatchConverter converter; + ConvolutionSpaceToBatchConverter converter(/*limit_on_batch_size=*/4); ASSERT_TRUE(converter.Run(module.get()).ValueOrDie()); HloInstruction* root = computation->root_instruction(); EXPECT_THAT(root, op::Transpose()); @@ -109,7 +120,7 @@ TEST_F(ConvolutionSpaceToBatchConverterTest, Batch1WithStrideAndPad) { ->convolution_dimension_numbers() .output_batch_dimension(); - EXPECT_GT(reshape->operand(0)->shape().dimensions(batch_dim), 1); + EXPECT_GT(reshape->operand(0)->shape().dimensions(batch_dim), 4); } TEST_F(ConvolutionSpaceToBatchConverterTest, Batch1WithKernelDilation) { ",0,train 4590f82223df65d7c0b1b2642408b0c8d17e1083,tensorflow/tensorflow,"Change callers of tf.image.per_image_whitening() to use tf.image.per_image_standardization(). Once these changes are submitted, per_image_whitening() can be removed. Change: 137714408",cifar10_input.py,"@@ -179,7 +179,7 @@ def distorted_inputs(data_dir, batch_size): lower=0.2, upper=1.8) # Subtract off the mean and divide by the variance of the pixels. - float_image = tf.image.per_image_whitening(distorted_image) + float_image = tf.image.per_image_standardization(distorted_image) # Ensure that the random shuffling has good mixing properties. min_fraction_of_examples_in_queue = 0.4 @@ -234,7 +234,7 @@ def inputs(eval_data, data_dir, batch_size): width, height) # Subtract off the mean and divide by the variance of the pixels. - float_image = tf.image.per_image_whitening(resized_image) + float_image = tf.image.per_image_standardization(resized_image) # Ensure that the random shuffling has good mixing properties. min_fraction_of_examples_in_queue = 0.4 ",0,train 4590f82223df65d7c0b1b2642408b0c8d17e1083,tensorflow/tensorflow,"Change callers of tf.image.per_image_whitening() to use tf.image.per_image_standardization(). Once these changes are submitted, per_image_whitening() can be removed. Change: 137714408",image_ops_test.py,"@@ -581,14 +581,14 @@ class PerImageWhiteningTest(test_util.TensorFlowTestCase): with self.test_session(use_gpu=True): x = constant_op.constant(x_np, shape=x_shape) - y = image_ops.per_image_whitening(x) + y = image_ops.per_image_standardization(x) y_tf = y.eval() self.assertAllClose(y_tf, y_np, atol=1e-4) def testUniformImage(self): im_np = np.ones([19, 19, 3]).astype(np.float32) * 249 im = constant_op.constant(im_np) - whiten = image_ops.per_image_whitening(im) + whiten = image_ops.per_image_standardization(im) with self.test_session(use_gpu=True): whiten_np = whiten.eval() self.assertFalse(np.any(np.isnan(whiten_np))) ",0,train bf3e812d313010246c480d81cc815fe0c92e5d70,tensorflow/tensorflow,"Add peer failure and recovery test cases with coordination service enabled. Conditionally disable MWMS health check when coordination service is enabled. PiperOrigin-RevId: 391344263 Change-Id: I0a3344eafe8fc02cd407b5d7a01017ec9a9d92fb",collective_all_reduce_strategy.py,"@@ -451,6 +451,11 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended): config_proto = copy.deepcopy(context.context().config) config_proto = self._update_config_proto(config_proto) + # If coordination service is enabled, use its internal heartbeat to detect + # peer failures instead of the Python-level health check. + if config_proto.experimental.coordination_service: + self._enable_check_health = False + if hasattr(cluster_resolver, ""port""): port = cluster_resolver.port else: ",0,train bf3e812d313010246c480d81cc815fe0c92e5d70,tensorflow/tensorflow,"Add peer failure and recovery test cases with coordination service enabled. Conditionally disable MWMS health check when coordination service is enabled. PiperOrigin-RevId: 391344263 Change-Id: I0a3344eafe8fc02cd407b5d7a01017ec9a9d92fb",mwms_peer_failure_test.py,"@@ -30,8 +30,11 @@ from tensorflow.python.distribute import collective_all_reduce_strategy as mwms_ from tensorflow.python.distribute import multi_process_runner from tensorflow.python.distribute import multi_worker_test_base from tensorflow.python.distribute import test_util +from tensorflow.python.eager import context from tensorflow.python.eager import test +COORDINATION_SERVICE = None +RPC_PROTOCOL = ""grpc"" # Put it in top level so it executes in the child processes as well. mwms_lib.CollectiveAllReduceExtended._enable_check_health = True @@ -77,6 +80,7 @@ class PeerFailureTest(test.TestCase): # the first replica to all replicas. def worker_fn(): + context.context().enable_coordination_service(COORDINATION_SERVICE) strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() with strategy.scope(): tf.Variable(1.) @@ -87,7 +91,8 @@ class PeerFailureTest(test.TestCase): return v.read_value().numpy() cluster_spec = multi_worker_test_base.create_cluster_spec(num_workers=2) - mpr = multi_process_runner.MultiProcessRunner(worker_fn, cluster_spec) + mpr = multi_process_runner.MultiProcessRunner( + worker_fn, cluster_spec, rpc_layer=RPC_PROTOCOL) mpr.start() # TODO(b/151232436): Always raise UnavailableError when a peer fails. with self.assertRaises( @@ -111,6 +116,7 @@ class PeerFailureTest(test.TestCase): # not aware of the failures of the receiving party. def worker_fn(): + context.context().enable_coordination_service(COORDINATION_SERVICE) strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() value = tf.identity([1.]) strategy.reduce(""sum"", value, axis=None) @@ -120,7 +126,8 @@ class PeerFailureTest(test.TestCase): strategy.reduce(""sum"", value, axis=None) cluster_spec = multi_worker_test_base.create_cluster_spec(num_workers=2) - mpr = multi_process_runner.MultiProcessRunner(worker_fn, cluster_spec) + mpr = multi_process_runner.MultiProcessRunner( + worker_fn, cluster_spec, rpc_layer=RPC_PROTOCOL) mpr.start() # TODO(b/151232436): Always raise UnavailableError when a peer fails. with self.assertRaises( @@ -136,6 +143,7 @@ class PeerFailureRecoverTest(test.TestCase): # See PeerFailureTest.test_creating_variable def worker_fn(attempts): + context.context().enable_coordination_service(COORDINATION_SERVICE) strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() task_id, attempt = get_attempt(strategy, attempts) with strategy.scope(): @@ -149,7 +157,11 @@ class PeerFailureRecoverTest(test.TestCase): cluster_spec = multi_worker_test_base.create_cluster_spec(num_workers=2) attempts = multi_process_runner.manager().dict() mpr = multi_process_runner.MultiProcessRunner( - worker_fn, cluster_spec, args=(attempts,), auto_restart=True) + worker_fn, + cluster_spec, + rpc_layer=RPC_PROTOCOL, + args=(attempts,), + auto_restart=True) mpr.start() results = mpr.join(timeout=90).return_value self.assertEqual(results[0], results[1]) @@ -158,6 +170,7 @@ class PeerFailureRecoverTest(test.TestCase): # See PeerFailureTest.test_reduce_small_tensor def worker_fn(attempts): + context.context().enable_coordination_service(COORDINATION_SERVICE) strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() task_id, attempt = get_attempt(strategy, attempts) value = tf.identity([1.]) @@ -170,7 +183,11 @@ class PeerFailureRecoverTest(test.TestCase): cluster_spec = multi_worker_test_base.create_cluster_spec(num_workers=2) attempts = multi_process_runner.manager().dict() mpr = multi_process_runner.MultiProcessRunner( - worker_fn, cluster_spec, args=(attempts,), auto_restart=True) + worker_fn, + cluster_spec, + rpc_layer=RPC_PROTOCOL, + args=(attempts,), + auto_restart=True) mpr.start() results = mpr.join(timeout=90).return_value self.assertAllEqual(results, [[2.], [2.]]) @@ -189,6 +206,7 @@ class PeerFailureRecoverTest(test.TestCase): mwms_lib.CollectiveAllReduceExtended._check_alive_interval = 30 mwms_lib.CollectiveAllReduceExtended._check_alive_initial_timeout = 30 + context.context().enable_coordination_service(COORDINATION_SERVICE) strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() task_id, attempt = get_attempt(strategy, attempts) @@ -209,7 +227,11 @@ class PeerFailureRecoverTest(test.TestCase): cluster_spec = multi_worker_test_base.create_cluster_spec(num_workers=2) attempts = multi_process_runner.manager().dict() mpr = multi_process_runner.MultiProcessRunner( - worker_fn, cluster_spec, args=(attempts,), auto_restart=True) + worker_fn, + cluster_spec, + rpc_layer=RPC_PROTOCOL, + args=(attempts,), + auto_restart=True) mpr.start() mpr.join(timeout=90) ",0,train bf3e812d313010246c480d81cc815fe0c92e5d70,tensorflow/tensorflow,"Add peer failure and recovery test cases with coordination service enabled. Conditionally disable MWMS health check when coordination service is enabled. PiperOrigin-RevId: 391344263 Change-Id: I0a3344eafe8fc02cd407b5d7a01017ec9a9d92fb",context.py,"@@ -506,6 +506,10 @@ class Context(object): device_list = pywrap_tfe.TFE_ContextListDevices(self._context_handle) try: self._num_gpus = 0 + current_job, current_task = None, None + server_def = self._server_def or self._collective_ops_server_def + if server_def is not None: + current_job, current_task = server_def.job_name, server_def.task_index for i in range(pywrap_tfe.TF_DeviceListCount(device_list)): dev_name = pywrap_tfe.TF_DeviceListName(device_list, i) context_devices.append(pydev.canonical_name(dev_name)) @@ -517,7 +521,8 @@ class Context(object): logical_devices.append( LogicalDevice(name=spec.to_string(), device_type=spec.device_type)) dev_type = pywrap_tfe.TF_DeviceListType(device_list, i) - if dev_type == ""GPU"": + if (dev_type == ""GPU"" and spec.job == current_job and + spec.task == current_task): self._num_gpus += 1 finally: ",0,train 42318e0e71123b9e776f85fb2c397b3cbda3d596,tensorflow/tensorflow,"Minor fixes to SpaceToDepth and DepthToSpace error strings. Change: 145747120",depthtospace_op.cc,"@@ -59,7 +59,7 @@ class DepthToSpaceOp : public OpKernel { static const int kRequiredDims = 4; OP_REQUIRES(context, kRequiredDims == dims, errors::InvalidArgument(""Input rank should be: "", kRequiredDims, - ""instead of: "", dims)); + "" instead of: "", dims)); const int batch_size = input.dim_size(0); const int input_height = input.dim_size(1); @@ -72,7 +72,7 @@ class DepthToSpaceOp : public OpKernel { OP_REQUIRES( context, input_depth % block_size_sq == 0, errors::InvalidArgument(""Input depth dimension "", input_depth, - ""should be divisible by: "", block_size_sq)); + "" should be divisible by: "", block_size_sq)); const int output_depth = input_depth / block_size_sq; const int output_width = input_width * block_size_; ",0,train 42318e0e71123b9e776f85fb2c397b3cbda3d596,tensorflow/tensorflow,"Minor fixes to SpaceToDepth and DepthToSpace error strings. Change: 145747120",spacetodepth_op.cc,"@@ -59,7 +59,7 @@ class SpaceToDepthOp : public OpKernel { static const int kRequiredDims = 4; OP_REQUIRES(context, kRequiredDims == dims, errors::InvalidArgument(""Input rank should be: "", kRequiredDims, - ""instead of: "", dims)); + "" instead of: "", dims)); const int batch_size = input.dim_size(0); const int height = input.dim_size(1); @@ -67,11 +67,11 @@ class SpaceToDepthOp : public OpKernel { const int input_depth = input.dim_size(3); // Both width and height must be divisible by block_size. - OP_REQUIRES( - context, (width % block_size_) == 0 && (height % block_size_) == 0, - errors::InvalidArgument(""Image width "", width, "" and height "", height, - ""should be divisible by block_size: "", - block_size_)); + OP_REQUIRES(context, + (width % block_size_) == 0 && (height % block_size_) == 0, + errors::InvalidArgument( + ""Image width "", width, "" and height "", height, + "" should be divisible by block_size: "", block_size_)); const int block_size_sq = block_size_ * block_size_; ",0,train 498565a68898b4ce0a696e24fc36a52792141631,tensorflow/tensorflow,"Reset the inputs to ShapeRefiner::RunShapeFn so that it behaves the same every time it's called. To properly handle queues that have populated by several enqueue ops, merge the shapes of the inputs to all the enqueue ops before calling InferenceContext::set_output_handle_shape(). This ensures that we detect incorrect queue setups (where the 2 enqueue ops might generate tensors with incompatible shapes), and that we take all the known shape information instead of that of just one of the enqueue ops. Change: 154866747",shape_refiner.cc,"@@ -468,6 +468,8 @@ Status ShapeRefiner::RunShapeFn(const Node* node, std::vector input_tensors_as_shapes; // Run the shape inference function, and return if there was an error. + c->set_input_tensors(input_tensors); + c->set_input_tensors_as_shapes(input_tensors_as_shapes); if (op_reg_data->shape_inference_fn) { TF_RETURN_IF_ERROR(c->Run(op_reg_data->shape_inference_fn)); } else { ",0,train 498565a68898b4ce0a696e24fc36a52792141631,tensorflow/tensorflow,"Reset the inputs to ShapeRefiner::RunShapeFn so that it behaves the same every time it's called. To properly handle queues that have populated by several enqueue ops, merge the shapes of the inputs to all the enqueue ops before calling InferenceContext::set_output_handle_shape(). This ensures that we detect incorrect queue setups (where the 2 enqueue ops might generate tensors with incompatible shapes), and that we take all the known shape information instead of that of just one of the enqueue ops. Change: 154866747",graph_properties.cc,"@@ -60,8 +60,9 @@ Status GraphProperties::InferStatically() { if (!qctx) { continue; } - shape_inference::ShapeHandle data_shp = qctx->output_handle_shape(0); - if (qctx->FullyDefined(data_shp)) { + DataType queue_type = qctx->output_handle_dtype(0); + shape_inference::ShapeHandle queue_shp = qctx->output_handle_shape(0); + if (qctx->FullyDefined(queue_shp) && queue_type != DT_INVALID) { continue; } @@ -73,16 +74,20 @@ Status GraphProperties::InferStatically() { if (node->type_string().find(""Enqueue"") != std::string::npos) { if (ctx->num_inputs() == 2) { const DataType dtype = node->input_type(1); - shape_inference::ShapeHandle shp = ctx->input(1); - shape_inference::ShapeHandle refined; - TF_RETURN_IF_ERROR(qctx->Merge(shp, data_shp, &refined)); - if (qctx->set_output_handle_shape(0, refined) || - qctx->set_output_handle_dtype(0, dtype)) { - new_shapes.push(qnode); + if (queue_type == DT_INVALID) { + queue_type = dtype; + } else { + CHECK_EQ(queue_type, dtype); } + shape_inference::ShapeHandle shp = ctx->input(1); + TF_RETURN_IF_ERROR(qctx->Merge(queue_shp, shp, &queue_shp)); } } } + if (qctx->set_output_handle_dtype(0, queue_type) || + qctx->set_output_handle_shape(0, queue_shp)) { + new_shapes.push(qnode); + } } // Propagate the shapes in the transitive fan-out of the queue. done = new_shapes.empty(); ",0,train 498565a68898b4ce0a696e24fc36a52792141631,tensorflow/tensorflow,"Reset the inputs to ShapeRefiner::RunShapeFn so that it behaves the same every time it's called. To properly handle queues that have populated by several enqueue ops, merge the shapes of the inputs to all the enqueue ops before calling InferenceContext::set_output_handle_shape(). This ensures that we detect incorrect queue setups (where the 2 enqueue ops might generate tensors with incompatible shapes), and that we take all the known shape information instead of that of just one of the enqueue ops. Change: 154866747",graph_properties_test.cc,"@@ -177,6 +177,19 @@ TEST_F(GraphPropertiesTest, Queues) { auto dequeue2 = ops::QueueDequeue(root.WithOpName(""Dequeue2""), q2, {DataType::DT_FLOAT}); + auto q3 = + ops::RandomShuffleQueue(root.WithOpName(""Queue3""), {DataType::DT_FLOAT}); + auto dequeue3 = + ops::QueueDequeue(root.WithOpName(""Dequeue3""), q3, {DataType::DT_FLOAT}); + + auto q4 = + ops::RandomShuffleQueue(root.WithOpName(""Queue4""), {DataType::DT_FLOAT}); + auto enqueue4 = ops::QueueEnqueue(root.WithOpName(""Enqueue4""), q4, {square2}); + auto enqueue4_2 = + ops::QueueEnqueue(root.WithOpName(""Enqueue4_2""), q4, {dequeue3[0]}); + auto dequeue4 = + ops::QueueDequeue(root.WithOpName(""Dequeue4""), q4, {DataType::DT_FLOAT}); + GrapplerItem item; TF_CHECK_OK(root.ToGraphDef(&item.graph)); @@ -200,6 +213,18 @@ TEST_F(GraphPropertiesTest, Queues) { EXPECT_EQ(2, prop2.shape().dim_size()); EXPECT_EQ(3, prop2.shape().dim(0).size()); EXPECT_EQ(7, prop2.shape().dim(1).size()); + + // The dequeue3 op shape is unknown. The square2 op shape is known. Verify + // that we merge the 2 properly to determine the shape of the data coming out + // of the queue. + const auto props4 = properties.GetOutputProperties(""Dequeue4""); + EXPECT_EQ(1, props4.size()); + const OpInfo::TensorProperties& prop4 = props4[0]; + EXPECT_EQ(DT_FLOAT, prop4.dtype()); + EXPECT_FALSE(prop4.shape().unknown_rank()); + EXPECT_EQ(2, prop4.shape().dim_size()); + EXPECT_EQ(3, prop4.shape().dim(0).size()); + EXPECT_EQ(7, prop4.shape().dim(1).size()); } } // namespace ",0,train 07bba62974b2e9bc39c3161be5fcdcb9b793757f,tensorflow/tensorflow,"Adding checks for control flow nodes when calculating device colocation within the graph. In addition to the prior fix to colocation_graph.cc, this fixes the issue underlying some skipped GPU-enabled tests in placement_test.py; this commit re-enables those skipped placements tests. PiperOrigin-RevId: 395838212 Change-Id: I47e99b471196be5167a075b5ee17e1bd5504e92c",colocation_graph.cc,"@@ -819,11 +819,12 @@ Status ColocationGraph::AddHostOnlyDataTypesConstraints() { }; auto enter = [&](Node* n) -> void { + // TODO(b/199443424): Replace this logic with propagated type information. if (data::DatasetOpKernel::IsDatasetOp(n->op_def())) { // NOTE: Datasets are expected to live on the host. This code should be // updated if that changes. Under this assumption, however, we must // locate some ops on the host when the input is a dataset variant. - if (node->IsRetval() || node->IsIdentity()) { + if (node->IsRetval() || node->IsIdentity() || node->IsControlFlow()) { is_host_data_type = true; } } else { ",0,test 07bba62974b2e9bc39c3161be5fcdcb9b793757f,tensorflow/tensorflow,"Adding checks for control flow nodes when calculating device colocation within the graph. In addition to the prior fix to colocation_graph.cc, this fixes the issue underlying some skipped GPU-enabled tests in placement_test.py; this commit re-enables those skipped placements tests. PiperOrigin-RevId: 395838212 Change-Id: I47e99b471196be5167a075b5ee17e1bd5504e92c",placement_test.py,"@@ -66,7 +66,6 @@ class PlacementTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testWhile(self): - self.skipTest(""b/166625126"") @def_function.function def f(): @@ -121,7 +120,6 @@ class PlacementTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testCond(self): - self.skipTest(""b/166625126"") # Ideally, placer should avoid cross-device copies even when the cond op # has no placement constraints. @def_function.function @@ -141,7 +139,6 @@ class PlacementTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testId(self): - self.skipTest(""b/166625126"") # Ideally, placer should know that Identity(dataset) should be on the same # device as the dataset. @def_function.function ",0,test f8a98002491b7cd5f04ec7def6fa7dc30a66215a,tensorflow/tensorflow,"Reenable test. PiperOrigin-RevId: 155894188",bias_op_test.py,"@@ -184,11 +184,8 @@ class BiasAddTest(test.TestCase): if dtype == dtypes.float64: threshold = 1e-10 self.assertAllClose(tensor_jacob_t, tensor_jacob_n, threshold, threshold) - # TODO(annarev): Re-add assertion for float16, float32 dtypes and NCHW - # once we figure out why this check started failing with cuda mavx. - if dtype == dtypes.float64 or data_format != ""NCHW"": - self.assertAllClose(bias_jacob_t, bias_jacob_n, threshold, threshold) - self.assertAllClose(grad_jacob_t, grad_jacob_n, threshold, threshold) + self.assertAllClose(bias_jacob_t, bias_jacob_n, threshold, threshold) + self.assertAllClose(grad_jacob_t, grad_jacob_n, threshold, threshold) def testGradientTensor(self): for (data_format, use_gpu) in GetTestConfigs(): ",0,test eb03daf8c03cc5c7737afaa1123347976cc1eb35,tensorflow/tensorflow,"Removed _keras_mask from EagerTensor It does not have to always be part of an EagerTensor and could instead be stored in a __dict__. Note that as a side-effect * an EagerTensor with a _keras_mask always has a materialized __dict__ and consumes ~280 bytes more; * EagerTensor._keras_mask lookup is slightly less efficient. PiperOrigin-RevId: 245298840",pywrap_tensor.cc,"@@ -283,9 +283,6 @@ typedef struct EagerTensor { // cycles, and hence don't provide GC support for it. PyObject* handle_data; - // This stores `_keras_mask` object and is set by Tensorflow layers. - PyObject* keras_mask; - // This stores `_tensor_shape`, a cached `TensorShape` object, and is set the // first time that `_EagerTensorBase`'s `shape` property is called. PyObject* tensor_shape; @@ -349,8 +346,6 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) { Py_INCREF(Py_None); self->handle_data = Py_None; Py_INCREF(Py_None); - self->keras_mask = Py_None; - Py_INCREF(Py_None); self->tensor_shape = Py_None; self->status = TF_NewStatus(); self->dict = nullptr; @@ -498,7 +493,6 @@ void EagerTensor_dealloc(EagerTensor* self) { TF_DeleteStatus(self->status); Py_DECREF(self->handle_data); - Py_DECREF(self->keras_mask); Py_DECREF(self->tensor_shape); // If an attribute dictionary has been created, release it. Note that this // is only ever created by CPython's attribute setting methods; we don't @@ -593,19 +587,6 @@ static int EagerTensor_settensor_handle(EagerTensor* self, PyObject* value, return 0; } -static PyObject* EagerTensor_keras_mask(EagerTensor* self, void* unused) { - Py_INCREF(self->keras_mask); - return self->keras_mask; -} - -static int EagerTensor_setkeras_mask(EagerTensor* self, PyObject* value, - void* unused) { - Py_DECREF(self->keras_mask); - Py_INCREF(value); - self->keras_mask = value; - return 0; -} - static PyObject* EagerTensor_tensor_shape(EagerTensor* self, void* unused) { Py_INCREF(self->tensor_shape); return self->tensor_shape; @@ -697,9 +678,6 @@ static PyGetSetDef EagerTensor_getseters[] = { {const_cast(""_handle_data""), (getter)EagerTensor_tensor_handle, (setter)EagerTensor_settensor_handle, const_cast(""_tensor_handle""), nullptr}, - {const_cast(""_keras_mask""), (getter)EagerTensor_keras_mask, - (setter)EagerTensor_setkeras_mask, const_cast(""_keras_mask""), - nullptr}, {const_cast(""_tensor_shape""), (getter)EagerTensor_tensor_shape, (setter)EagerTensor_settensor_shape, const_cast(""_tensor_shape""), nullptr}, @@ -824,8 +802,6 @@ PyObject* EagerTensorFromHandle(TFE_TensorHandle* handle) { Py_INCREF(Py_None); t->handle_data = Py_None; Py_INCREF(Py_None); - t->keras_mask = Py_None; - Py_INCREF(Py_None); t->tensor_shape = Py_None; t->handle = handle; t->status = TF_NewStatus(); ",0,train eb03daf8c03cc5c7737afaa1123347976cc1eb35,tensorflow/tensorflow,"Removed _keras_mask from EagerTensor It does not have to always be part of an EagerTensor and could instead be stored in a __dict__. Note that as a side-effect * an EagerTensor with a _keras_mask always has a materialized __dict__ and consumes ~280 bytes more; * EagerTensor._keras_mask lookup is slightly less efficient. PiperOrigin-RevId: 245298840",network.py,"@@ -862,8 +862,9 @@ class Network(base_layer.Layer): if 'training' in argspec: kwargs.setdefault('training', training) if 'mask' in argspec: - computed_masks = nest.map_structure(lambda t: t._keras_mask, - computed_tensors) + computed_masks = nest.map_structure( + lambda t: getattr(t, '_keras_mask', None), + computed_tensors) kwargs.setdefault('mask', computed_masks) # Compute outputs. ",0,train eb03daf8c03cc5c7737afaa1123347976cc1eb35,tensorflow/tensorflow,"Removed _keras_mask from EagerTensor It does not have to always be part of an EagerTensor and could instead be stored in a __dict__. Note that as a side-effect * an EagerTensor with a _keras_mask always has a materialized __dict__ and consumes ~280 bytes more; * EagerTensor._keras_mask lookup is slightly less efficient. PiperOrigin-RevId: 245298840",training_eager.py,"@@ -120,8 +120,7 @@ def _model_loss(model, outs = model(inputs, **kwargs) outs = nest.flatten(outs) - # `None` by default for `EagerTensors`. - masks = [t._keras_mask for t in outs] + masks = [getattr(t, '_keras_mask', None) for t in outs] targets = nest.flatten(targets) # Used to keep track of individual output losses. ",0,train c25692039b954bb5aa891a26ff1744aab137eee7,tensorflow/tensorflow,"[TF:Profiler] Fix use-after-free bug introduced by previous change. PiperOrigin-RevId: 295191859 Change-Id: I36584af1414c1b4e424f558fd7a4aec46d26cf50",profiler_server.cc,"@@ -21,7 +21,6 @@ limitations under the License. #include ""grpcpp/grpcpp.h"" #include ""absl/strings/str_cat.h"" #include ""tensorflow/core/platform/env.h"" -#include ""tensorflow/core/profiler/profiler_service.grpc.pb.h"" #include ""tensorflow/core/profiler/rpc/profiler_service_impl.h"" #include ""tensorflow/core/util/ptr_util.h"" @@ -29,11 +28,10 @@ namespace tensorflow { void ProfilerServer::StartProfilerServer(int32 port) { string server_address = absl::StrCat(""0.0.0.0:"", port); - std::unique_ptr service = - CreateProfilerService(); + service_ = CreateProfilerService(); ::grpc::ServerBuilder builder; builder.AddListeningPort(server_address, ::grpc::InsecureServerCredentials()); - builder.RegisterService(service.get()); + builder.RegisterService(service_.get()); server_ = builder.BuildAndStart(); LOG(INFO) << ""Profiling Server listening on "" << server_address; } ",0,train c25692039b954bb5aa891a26ff1744aab137eee7,tensorflow/tensorflow,"[TF:Profiler] Fix use-after-free bug introduced by previous change. PiperOrigin-RevId: 295191859 Change-Id: I36584af1414c1b4e424f558fd7a4aec46d26cf50",profiler_server.h,"@@ -19,11 +19,10 @@ limitations under the License. #include ""grpcpp/grpcpp.h"" #include ""tensorflow/core/platform/types.h"" +#include ""tensorflow/core/profiler/profiler_service.grpc.pb.h"" namespace tensorflow { -class Thread; - class ProfilerServer { public: ~ProfilerServer(); @@ -31,6 +30,7 @@ class ProfilerServer { void StartProfilerServer(int32 port); private: + std::unique_ptr service_; std::unique_ptr<::grpc::Server> server_; }; ",0,train c87d12a5e9bc4c568bd310c2266f1f28264e20fb,tensorflow/tensorflow,"Introduce TraceMeProducer and TraceMeConsumer. PiperOrigin-RevId: 312209299 Change-Id: I304049413d332b17e141e3f85486f9676e2f859a",connected_traceme.h,"@@ -0,0 +1,122 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_PROFILER_LIB_CONNECTED_TRACEME_H_ +#define TENSORFLOW_CORE_PROFILER_LIB_CONNECTED_TRACEME_H_ + +#include + +#include ""absl/strings/string_view.h"" +#include ""absl/types/optional.h"" +#include ""tensorflow/core/profiler/lib/traceme.h"" +#include ""tensorflow/core/profiler/lib/traceme_encode.h"" + +namespace tensorflow { +namespace profiler { + +/* + * TraceMeProducer and TraceMeConsumer are used to correlate TraceMe events on + * different threads. TraceMeProducer generates the context information to be + * passed to TraceMeConsumer, which consists of the context id and optionally + * the context name. They may be provided by the user. Then, the events of the + * same context information can be correlated during the analysis. + * + * Example Usages: + * (1) Using the user-provided context name and id. The user is responsible for + * providing the same context name and id to TraceMeProducer and + * TraceMeConsumer. + * [Producer Thread] + * // user_context_id is provided by the user. + * TraceMeProducer producer( + * [&] { return TraceMeEncode(""op_dispatch"", {{""op_type"", ""matmul""}}); }, + * ""executor_context"", user_context_id); + * [Consumer Thread] + * // user_context_id is provided by the user. + * TraceMeConsumer consumer( + * [&] { return ""op_execute""; }, user_context_id, ""executor_context""); + * + * (2) Using the user-provided context name and generic id. The user is + * responsible for passing the TraceMeProducer's context id to + * TraceMeConsumer as well as providing the same context name to + * TraceMeProducer and TraceMeConsumer. + * [Producer Thread] + * TraceMeProducer producer( + * [&] { return TraceMeEncode(""op_dispatch"", {{""op_type"", ""matmul""}}); }, + * ""executor_context""); + * context_id = producer.GetContextId(); + * // Pass context_id to the consumer thread. + * [Consumer Thread] + * // context_id is passed from the producer thread. + * TraceMeConsumer consumer( + * [&] { return ""op_execute""; }, context_id, ""executor_context""); + * + * (3) Using the generic context information. The user is responsible for + * passing the TraceMeProducer's context id to TraceMeConsumer. + * [Producer Thread] + * TraceMeProducer producer( + * [&] { return TraceMeEncode(""op_dispatch"", {{""op_type"", ""matmul""}}); }); + * context_id = producer.GetContextId(); + * // Pass context_id to the consumer thread. + * [Consumer Thread] + * // context_id is passed from the producer thread. + * TraceMeConsumer consumer([&] { return ""op_execute""; }, context_id); + */ +class TraceMeProducer { + public: + template + explicit TraceMeProducer(NameT name, absl::string_view context_name = """", + absl::optional context_id = absl::nullopt, + int level = 2) + : trace_me_(name, level) { + trace_me_.AppendMetadata([&] { + context_id_ = + context_id.has_value() ? *context_id : TraceMe::NewActivityId(); + if (context_name.empty()) { + return TraceMeEncode({{""$p"", context_id_}}); + } else { + return TraceMeEncode({{""$pn"", context_name}, {""$p"", context_id_}}); + } + }); + } + + uint64 GetContextId() const { return context_id_; } + + private: + TraceMe trace_me_; + uint64 context_id_ = 0; +}; + +class TraceMeConsumer { + public: + template + TraceMeConsumer(NameT name, uint64 context_id, + absl::string_view context_name = """", int level = 2) + : trace_me_(name, level) { + trace_me_.AppendMetadata([&] { + if (context_name.empty()) { + return TraceMeEncode({{""$c"", context_id}}); + } else { + return TraceMeEncode({{""$cn"", context_name}, {""$c"", context_id}}); + } + }); + } + + private: + TraceMe trace_me_; +}; + +} // namespace profiler +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PROFILER_LIB_CONNECTED_TRACEME_H_ ",0,train c87d12a5e9bc4c568bd310c2266f1f28264e20fb,tensorflow/tensorflow,"Introduce TraceMeProducer and TraceMeConsumer. PiperOrigin-RevId: 312209299 Change-Id: I304049413d332b17e141e3f85486f9676e2f859a",traceme.h,"@@ -248,6 +248,14 @@ class TraceMe { #endif } + static uint64 NewActivityId() { +#if !defined(IS_MOBILE_PLATFORM) + return TraceMeRecorder::NewActivityId(); +#else + return 0; +#endif + } + private: // Activity ID or start time used when tracing is disabled. constexpr static uint64 kUntracedActivity = 0; ",0,train c87d12a5e9bc4c568bd310c2266f1f28264e20fb,tensorflow/tensorflow,"Introduce TraceMeProducer and TraceMeConsumer. PiperOrigin-RevId: 312209299 Change-Id: I304049413d332b17e141e3f85486f9676e2f859a",xplane_schema.cc,"@@ -147,6 +147,11 @@ const StatTypeMap& GetStatTypeMap() { {""region_type"", kRegionType}, {""data_type"", kDataType}, {""shape"", kTensorShapes}, + // Schema related. + {""$pn"", kProducerContextName}, + {""$cn"", kConsumerContextName}, + {""$p"", kProducerId}, + {""$c"", kConsumerId}, // Device trace arguments. {""device_id"", kDeviceId}, {""context_id"", kContextId}, ",0,train c87d12a5e9bc4c568bd310c2266f1f28264e20fb,tensorflow/tensorflow,"Introduce TraceMeProducer and TraceMeConsumer. PiperOrigin-RevId: 312209299 Change-Id: I304049413d332b17e141e3f85486f9676e2f859a",xplane_schema.h,"@@ -139,6 +139,11 @@ enum StatType { kRegionType, kDataType, kTensorShapes, + // Schema related. + kProducerContextName, + kConsumerContextName, + kProducerId, + kConsumerId, // Device trace arguments. kDeviceId, kContextId, ",0,train a9919ccc922e3f06f046b2047a0dcc2b0a618393,tensorflow/tensorflow,"Allow low-bit QAT training parameters to be used for quantization parameters in TFLite. PiperOrigin-RevId: 414377528 Change-Id: I8cd69bf80a254d5089f9f525c49f9fae3656a977",tf_tfl_flatbuffer_helpers.cc,"@@ -269,11 +269,6 @@ Status PopulateQuantizationSpecs( quant_specs->inference_type = tensorflow::DT_QINT8; quant_specs->inference_input_type = tensorflow::DT_QINT8; } - } else { - // This flag is incompatible with post_training_quantize() as only - // QAT models can provide the desired range. - quant_specs->disable_infer_tensor_range = - toco_flags.disable_infer_tensor_range(); } // Add information about half-precision support if fp16 quantization applies. ",0,train a9919ccc922e3f06f046b2047a0dcc2b0a618393,tensorflow/tensorflow,"Allow low-bit QAT training parameters to be used for quantization parameters in TFLite. PiperOrigin-RevId: 414377528 Change-Id: I8cd69bf80a254d5089f9f525c49f9fae3656a977",quantization_utils.h,"@@ -520,8 +520,9 @@ struct ConvertUnsignedToSigned : public OpRewritePattern { int num_bits = qtype.getStorageTypeIntegralWidth(); if (num_bits == 8) { // If storage is 8-bit, trained num bits may be less than 8 so check here. - num_bits = - static_cast(std::ceil(std::log2(qtype.getStorageTypeMax()))); + const double range = static_cast(qtype.getStorageTypeMax() - + qtype.getStorageTypeMin()); + num_bits = static_cast(std::ceil(std::log2(range))); } // This is a positive value, and will be applied on zero points and fixed // point ranges. ",0,train a9919ccc922e3f06f046b2047a0dcc2b0a618393,tensorflow/tensorflow,"Allow low-bit QAT training parameters to be used for quantization parameters in TFLite. PiperOrigin-RevId: 414377528 Change-Id: I8cd69bf80a254d5089f9f525c49f9fae3656a977",tf_tfl_passes.cc,"@@ -277,11 +277,10 @@ void AddPostVariableFreezingTFToTFLConversionPasses( pass_manager->nest(), layout_optimization_options); // Prepare for TFLite dialect, rerun canonicalization, and then legalize to // the TFLite dialect. - pass_manager->addNestedPass( - mlir::TFL::CreatePrepareTFPass(pass_config.unfold_batch_matmul, - /*allow_bf16_and_f16_type_legalization=*/ - !pass_config.runtime_verification, - toco_flags.use_fake_quant_num_bits())); + pass_manager->addNestedPass(mlir::TFL::CreatePrepareTFPass( + pass_config.unfold_batch_matmul, + /*allow_bf16_and_f16_type_legalization=*/!pass_config + .runtime_verification)); pass_manager->addNestedPass(mlir::createCanonicalizerPass()); if (pass_config.shape_inference) { // Add a shape inference pass to optimize away the unnecessary casts. ",0,train a9919ccc922e3f06f046b2047a0dcc2b0a618393,tensorflow/tensorflow,"Allow low-bit QAT training parameters to be used for quantization parameters in TFLite. PiperOrigin-RevId: 414377528 Change-Id: I8cd69bf80a254d5089f9f525c49f9fae3656a977",quantize.cc,"@@ -254,8 +254,7 @@ void QuantizePass::runOnFunction() { // TODO(b/202451048): separate full and weight-only post-training dynamic // range quantization - if (quant_specs.weight_quantization || enable_dynamic_range_quantization || - quant_specs.disable_infer_tensor_range) { + if (quant_specs.weight_quantization || enable_dynamic_range_quantization) { patterns.insert(ctx, quant_params); } else { patterns.insert( ",0,train a9919ccc922e3f06f046b2047a0dcc2b0a618393,tensorflow/tensorflow,"Allow low-bit QAT training parameters to be used for quantization parameters in TFLite. PiperOrigin-RevId: 414377528 Change-Id: I8cd69bf80a254d5089f9f525c49f9fae3656a977",convert.py,"@@ -450,8 +450,6 @@ def build_toco_flags(inference_type=dtypes.float32, disable_per_channel_quantization=False, enable_mlir_dynamic_range_quantizer=False, tf_quantization_mode=None, - disable_infer_tensor_range=False, - use_fake_quant_num_bits=False, **_): """"""Build the TOCO flags object from params."""""" toco = _toco_flags_pb2.TocoFlags() @@ -500,8 +498,6 @@ def build_toco_flags(inference_type=dtypes.float32, toco.enable_mlir_dynamic_range_quantizer = enable_mlir_dynamic_range_quantizer if tf_quantization_mode: toco.tf_quantization_mode = tf_quantization_mode - toco.disable_infer_tensor_range = disable_infer_tensor_range - toco.use_fake_quant_num_bits = use_fake_quant_num_bits return toco @@ -541,9 +537,7 @@ def build_toco_convert_protos(input_tensors, supported_backends=None, disable_per_channel_quantization=False, enable_mlir_dynamic_range_quantizer=False, - tf_quantization_mode=None, - disable_infer_tensor_range=False, - use_fake_quant_num_bits=False): + tf_quantization_mode=None): """"""Builds protocol buffers describing a conversion of a model using TOCO. Typically this is to convert from TensorFlow GraphDef to TFLite, in which @@ -643,9 +637,7 @@ def build_toco_convert_protos(input_tensors, If false, the old TOCO dynamic range quantizer is used. tf_quantization_mode: Indicates the mode of TF Quantization when the output model is used for TF Quantization. - disable_infer_tensor_range: Disable infering tensor ranges. - use_fake_quant_num_bits: Allow quantization parameters to be calculated from - num_bits attribute. + Returns: model_flags, toco_flags, debug_info: three protocol buffers describing the conversion process and debug information. @@ -683,9 +675,7 @@ def build_toco_convert_protos(input_tensors, supported_backends=supported_backends, disable_per_channel_quantization=disable_per_channel_quantization, enable_mlir_dynamic_range_quantizer=enable_mlir_dynamic_range_quantizer, - tf_quantization_mode=tf_quantization_mode, - disable_infer_tensor_range=disable_infer_tensor_range, - use_fake_quant_num_bits=use_fake_quant_num_bits) + tf_quantization_mode=tf_quantization_mode) model = _model_flags_pb2.ModelFlags() model.change_concat_input_ranges = change_concat_input_ranges for idx, input_tensor in enumerate(input_tensors): ",0,train a9919ccc922e3f06f046b2047a0dcc2b0a618393,tensorflow/tensorflow,"Allow low-bit QAT training parameters to be used for quantization parameters in TFLite. PiperOrigin-RevId: 414377528 Change-Id: I8cd69bf80a254d5089f9f525c49f9fae3656a977",lite.py,"@@ -236,8 +236,7 @@ class QuantizationMode(object): representative_dataset, graph_def, disable_per_channel=False, - experimental_new_dynamic_range_quantizer=False, - experimental_low_bit_qat=False): + experimental_new_dynamic_range_quantizer=False): self._optimizations = optimizations for deprecated_optimization in [ Optimize.OPTIMIZE_FOR_SIZE, Optimize.OPTIMIZE_FOR_LATENCY @@ -256,9 +255,6 @@ class QuantizationMode(object): self._enable_new_dynamic_range_quantizer = ( experimental_new_dynamic_range_quantizer) - # Allow training with lower than 8 bit weights to be converted - # to constants with trained scale. - self._experimental_low_bit_qat = experimental_low_bit_qat # TODO(b/162537905): Refactor the following quantization functions - # re-organize and refactor for better readability. @@ -289,12 +285,10 @@ class QuantizationMode(object): def is_integer_quantize(self): return (self.is_post_training_integer_quantize() or - self.is_training_time_int8_allow_float() or - self.is_training_time_low_bit_allow_float()) + self.is_training_time_int8_allow_float()) def is_training_time_int8_allow_float(self): - return (not self.is_training_time_low_bit_allow_float() and - self.any_optimization_enabled() and + return (self.any_optimization_enabled() and self.contains_training_quant_op()) def is_bfloat16_inference_allowed(self): @@ -333,11 +327,6 @@ class QuantizationMode(object): self.post_training_dynamic_range_int8() or self.post_training_fp16()) - def is_training_time_low_bit_allow_float(self): - return (self.any_optimization_enabled() and - self.contains_training_quant_op() and - self._experimental_low_bit_qat) - def activations_type(self): if self.is_integer_quantize(): if self._is_int16x8_target_required(): @@ -351,15 +340,12 @@ class QuantizationMode(object): """"""Flags to the converter."""""" if self.is_integer_quantize(): - is_low_bit_qat = self.is_training_time_low_bit_allow_float() return { - ""inference_type"": (inference_ty if inference_ty is not None else - self.activations_type()), + ""inference_type"": ( + inference_ty if inference_ty else self.activations_type()), ""inference_input_type"": _dtypes.float32, ""post_training_quantize"": False, # disable dynamic range quantization - ""quantize_to_float16"": False, # disable float16 quantization - ""disable_infer_tensor_range"": is_low_bit_qat, - ""use_fake_quant_num_bits"": is_low_bit_qat, + ""quantize_to_float16"": False # disable float16 quantization } elif self.post_training_dynamic_range_int8(): return { @@ -388,8 +374,7 @@ class QuantizationMode(object): # Note this might still trigger (uint8) quantization to be compatible with # TOCO. return { - ""inference_type"": ( - inference_ty if inference_ty is not None else _dtypes.float32), + ""inference_type"": inference_ty if inference_ty else _dtypes.float32, ""inference_input_type"": inference_input_ty, ""post_training_quantize"": False, # enable dynamic range quantization ""quantize_to_float16"": False, # disable float16 quantization @@ -504,8 +489,6 @@ class TFLiteConverterBase(object): # by default and remove the flag once feature parity with the old quantizer # is verified. self._experimental_new_dynamic_range_quantizer = False - # Experimental flag to enable low-bit QAT in 8 bit. - self._experimental_low_bit_qat = False def _grappler_config(self, optimizers=None): """"""Creates a tf.compat.v1.ConfigProto for configuring Grappler. @@ -685,8 +668,7 @@ class TFLiteConverterBase(object): quant_mode = QuantizationMode( self.optimizations, self.target_spec, self.representative_dataset, graph_def, self._experimental_disable_per_channel, - self._experimental_new_dynamic_range_quantizer, - self._experimental_low_bit_qat) + self._experimental_new_dynamic_range_quantizer) converter_kwargs.update({ ""optimization_default"": quant_mode.any_optimization_enabled(), @@ -698,8 +680,6 @@ class TFLiteConverterBase(object): quant_mode.is_post_training_integer_quantize(), ""optimization_qat"": quant_mode.is_training_time_int8_allow_float(), - ""optimization_low_bit_qat"": - quant_mode.is_training_time_low_bit_allow_float(), ""optimization_sparsify"": self._sparsify_model(), ""activations_type"": @@ -885,8 +865,7 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): self._quant_mode = QuantizationMode( self.optimizations, self.target_spec, self.representative_dataset, graph_def, self._experimental_disable_per_channel, - self._experimental_new_dynamic_range_quantizer, - self._experimental_low_bit_qat) + self._experimental_new_dynamic_range_quantizer) self._validate_inference_input_output_types(self._quant_mode) if not self._is_unknown_shapes_allowed(): @@ -1060,8 +1039,7 @@ class TFLiteSavedModelConverterV2(TFLiteConverterBaseV2): quant_mode = QuantizationMode( self.optimizations, self.target_spec, self.representative_dataset, graph_def, self._experimental_disable_per_channel, - self._experimental_new_dynamic_range_quantizer, - self._experimental_low_bit_qat) + self._experimental_new_dynamic_range_quantizer) self._validate_inference_input_output_types(quant_mode) converter_kwargs = { @@ -1883,8 +1861,7 @@ class TFLiteConverterBaseV1(TFLiteConverterBase): quant_mode = QuantizationMode( self.optimizations, self.target_spec, self.representative_dataset, self._graph_def, self._experimental_disable_per_channel, - self._experimental_new_dynamic_range_quantizer, - self._experimental_low_bit_qat) + self._experimental_new_dynamic_range_quantizer) optimized_graph = self._optimize_tf_model(self._graph_def, self._input_tensors, ",0,train a9919ccc922e3f06f046b2047a0dcc2b0a618393,tensorflow/tensorflow,"Allow low-bit QAT training parameters to be used for quantization parameters in TFLite. PiperOrigin-RevId: 414377528 Change-Id: I8cd69bf80a254d5089f9f525c49f9fae3656a977",lite_v2_test.py,"@@ -17,7 +17,6 @@ import ctypes import functools -import itertools import os import sys @@ -2388,67 +2387,6 @@ class FromKerasModelTest(lite_v2_test_util.ModelTest): # quantization. self.assertEqual(np.int8, quantized_weight['dtype']) - @parameterized.named_parameters([ - ('{}BitWeightOnly={}LowBit={}'.format(num_bits, weight_only, low_bit), - num_bits, weight_only, low_bit) for num_bits, weight_only, low_bit - in itertools.product((2, 4, 6), (True, False), (True, False))]) - @test_util.run_v2_only - def testQATLowBitKerasModel(self, num_bits, weight_only, low_bit): - bit_max = (1 << (num_bits - 1)) - 1 - bit_min = -bit_max - tf_input_shape = (5, 5, 3) - tflite_input_shape = (1,) + tf_input_shape - model, input_name, output_name = (self._createV2QATLowBitKerasModel( - tf_input_shape, weight_only, num_bits, bit_min, bit_max)) - input_data = np.linspace( - 0, 6, np.prod(tflite_input_shape)).reshape(tflite_input_shape) - tf_result = model(input_data) - - converter = tf.lite.TFLiteConverter.from_keras_model(model) - converter.optimizations = [tf.lite.Optimize.DEFAULT] - if low_bit: - converter._experimental_low_bit_qat = True - tflite_model = converter.convert() - - result = self._evaluateTFLiteModelUsingSignatureDef( - tflite_model, 'serving_default', - {input_name: input_data.astype(np.float32)})[output_name] - self.assertAllClose( - [np.linalg.norm(result - tf_result.numpy().astype(np.float32))], [0.0]) - interpreter = tf.lite.Interpreter(model_content=tflite_model) - interpreter.allocate_tensors() - num_8bit_activations = 0 - num_8bit_weights = 0 - kernel_name = ('model/conv_wrapper/Conv2D;model/conv_wrapper/' - 'FakeQuantWithMinMaxVarsPerChannel') - - for detail in interpreter.get_tensor_details(): - if (detail['dtype'] == np.int8 and detail['name'] and - detail['name'] == kernel_name): - num_8bit_weights += 1 - weights = interpreter.get_tensor(detail['index']) - if low_bit: - self.assertFalse((bit_min > weights).any() or - (weights > bit_max).any()) - else: - self.assertTrue((bit_min > weights).any() or - (weights > bit_max).any()) - self.assertIn('scales', detail['quantization_parameters']) - if low_bit and detail['quantization_parameters']['scales']: - self.assertAllClose( - detail['quantization_parameters']['scales'], [1.0]) - elif detail['dtype'] == np.int8 and detail['name']: - self.assertFalse(weight_only) - self.assertIn('scales', detail['quantization_parameters']) - if detail['quantization_parameters']['scales']: - self.assertAllClose( - detail['quantization_parameters']['scales'], [6/255]) - num_8bit_activations += 1 - - self.assertEqual(num_8bit_weights, 0 if weight_only and not low_bit else 1) - # 3 activations with full integer: conv_input, conv_output, reshape_output - self.assertEqual(num_8bit_activations, 0 if weight_only else 3) - class FromJaxModelTest(lite_v2_test_util.ModelTest): ",0,train a9919ccc922e3f06f046b2047a0dcc2b0a618393,tensorflow/tensorflow,"Allow low-bit QAT training parameters to be used for quantization parameters in TFLite. PiperOrigin-RevId: 414377528 Change-Id: I8cd69bf80a254d5089f9f525c49f9fae3656a977",lite_v2_test_util.py,"@@ -20,7 +20,6 @@ import os from absl.testing import parameterized import numpy as np from six.moves import zip -import tensorflow as tf from tensorflow.lite.python.interpreter import Interpreter from tensorflow.python.eager import def_function @@ -207,43 +206,3 @@ class ModelTest(test_util.TensorFlowTestCase, parameterized.TestCase): # the name of this test file. self.assertIn('lite_v2_test.py', file_names) self.assertNotIn('lite_test.py', file_names) - - def _createV2QATLowBitKerasModel(self, shape, weight_only, num_bits, bit_min, - bit_max): - """"""Creates a simple QAT num_bits-Weight Keras Model."""""" - input_name = 'input' - output_name = 'scores' - - class ConvWrapper(tf.keras.layers.Wrapper): - """"""A Wrapper for simulating QAT on Conv2D layers."""""" - - def build(self, input_shape): - if not self.layer.built: - self.layer.build(input_shape) - self.quantized_weights = self.layer.kernel - - def call(self, inputs): - self.layer.kernel = ( - tf.quantization.fake_quant_with_min_max_vars_per_channel( - self.quantized_weights, min=[bit_min], max=[bit_max], - num_bits=num_bits, narrow_range=True)) - if not weight_only: - quant_inputs = tf.quantization.fake_quant_with_min_max_vars( - inputs, min=0, max=6, num_bits=8) - outputs = self.layer.call(quant_inputs) - return tf.quantization.fake_quant_with_min_max_vars( - outputs, min=0, max=6, num_bits=8) - return self.layer.call(inputs) - - input_tensor = tf.keras.layers.Input(shape, name=input_name) - kernel_shape = (shape[-1], 3, 3, 1) - # Ensure constant weights contains the min and max. - initial_weights = np.linspace( - bit_min, bit_max, np.prod(kernel_shape)).reshape(kernel_shape) - test_initializer = tf.constant_initializer(initial_weights) - x = ConvWrapper(tf.keras.layers.Conv2D( - 1, (3, 3), kernel_initializer=test_initializer, - activation='relu6'))(input_tensor) - scores = tf.keras.layers.Flatten(name=output_name)(x) - model = tf.keras.Model(input_tensor, scores) - return model, input_name, output_name ",0,train 0c79d3cdee83af0cf3bd7a9c7522ea88981159a6,tensorflow/tensorflow,"Fix apparent typo in an exception message that refers to the input shape instead of the output shape. PiperOrigin-RevId: 300148776 Change-Id: Idd3d40455e248688cb73d9d32f1ad11ef5fb389c",rebatch_dataset_test.py,"@@ -92,7 +92,8 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): def testScalarInputError(self): dataset = dataset_ops.Dataset.range(1024) distribute._RebatchDataset(dataset.batch(4), num_replicas=4) - with self.assertRaisesRegexp(ValueError, ""at least one dimension""): + with self.assertRaisesRegexp(ValueError, (""You can fix the issue "" + ""by adding the `batch`"")): distribute._RebatchDataset(dataset, num_replicas=4) @combinations.generate( ",0,train 0c79d3cdee83af0cf3bd7a9c7522ea88981159a6,tensorflow/tensorflow,"Fix apparent typo in an exception message that refers to the input shape instead of the output shape. PiperOrigin-RevId: 300148776 Change-Id: Idd3d40455e248688cb73d9d32f1ad11ef5fb389c",distribute.py,"@@ -92,8 +92,10 @@ class _RebatchDataset(dataset_ops.UnaryDataset): return None if len(output_shape) < 1: - raise ValueError(""Input shape should have at least one dimension. "" - ""Perhaps your input dataset is not batched?"") + raise ValueError(""Expected a dataset whose elements have rank >= 1 "" + ""but found a dataset whose elements are scalars. "" + ""You can fix the issue by adding the `batch` "" + ""transformation to the dataset."") output_dims = [d.value for d in output_shape.dims] if output_dims[0] is not None and output_dims[0] % num_replicas == 0: ",0,train 55a311cb735689a431c6aa9a6c765c5c5c034ede,tensorflow/tensorflow,"Add RISC Conv Op register. PiperOrigin-RevId: 341415820 Change-Id: Ibd5f4c939e22be2af61e434e6927898b74e523a5",risc_conv_op.cc,"@@ -0,0 +1,50 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/core/framework/common_shape_fns.h"" +#include ""tensorflow/core/framework/op.h"" +#include ""tensorflow/core/framework/op_kernel.h"" +#include ""tensorflow/core/framework/register_types.h"" +#include ""tensorflow/core/framework/shape_inference.h"" + +namespace tensorflow { +namespace risc { +namespace experimental { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +template +class RiscConvOp : public OpKernel { + public: + explicit RiscConvOp(OpKernelConstruction* context) : OpKernel(context) { + // TODO(b/171294012): Implement RiscConv op. + } + + void Compute(OpKernelContext* context) override { + // TODO(b/171294012): Implement RiscConv op. + } +}; + +#define REGISTER_CPU(T) \ + REGISTER_KERNEL_BUILDER( \ + Name(""RiscConv"").Device(DEVICE_CPU).TypeConstraint(""T""), \ + RiscConvOp); + +REGISTER_CPU(float); +REGISTER_CPU(double); + +} // namespace experimental +} // namespace risc +} // namespace tensorflow ",0,test 55a311cb735689a431c6aa9a6c765c5c5c034ede,tensorflow/tensorflow,"Add RISC Conv Op register. PiperOrigin-RevId: 341415820 Change-Id: Ibd5f4c939e22be2af61e434e6927898b74e523a5",risc_ops.cc,"@@ -30,4 +30,15 @@ REGISTER_OP(""RiscAdd"") .SetIsAggregate() .SetIsCommutative(); +// TODO(b/171294012): change shape function. +REGISTER_OP(""RiscConv"") + .Input(""input: T"") + .Input(""filter: T"") + .Output(""output: T"") + .Attr(""T: {float, double}"") + .Attr(""strides: list(int)"") + .Attr(GetConvnetDataFormatAttrString()) + .SetShapeFn(shape_inference::UnknownShape) + .Attr(""dilations: list(int) = [1, 1, 1, 1]""); + } // namespace tensorflow ",0,test 55a311cb735689a431c6aa9a6c765c5c5c034ede,tensorflow/tensorflow,"Add RISC Conv Op register. PiperOrigin-RevId: 341415820 Change-Id: Ibd5f4c939e22be2af61e434e6927898b74e523a5",risc_grad.py,"@@ -28,3 +28,10 @@ def _RiscAddGrad(_, grad): # pylint: disable=unused-argument # TODO(b/171294012): Implement gradient of RISC with RISC ops. return None, None + + +@ops.RegisterGradient(""RiscConv"") +def _RiscConvGrad(_, grad): + # pylint: disable=unused-argument + # TODO(b/171294012): Implement gradient of RISC with RISC ops. + return None, None ",0,test 55a311cb735689a431c6aa9a6c765c5c5c034ede,tensorflow/tensorflow,"Add RISC Conv Op register. PiperOrigin-RevId: 341415820 Change-Id: Ibd5f4c939e22be2af61e434e6927898b74e523a5",risc_ops.py,"@@ -30,5 +30,20 @@ from tensorflow.python.ops.risc_ops_gen import * def risc_add( input_lhs, input_rhs, - name=""RISC_ADD""): + name='RISC_ADD'): return gen_risc_ops.risc_add(input_lhs, input_rhs, name=name) + + +def risc_conv(x, + kernel, + strides, + data_format='NHWC', + dilations=None, + name='RISC_CONV'): + return gen_risc_ops.risc_conv( + x, + kernel, + strides, + data_format=data_format, + dilations=dilations, + name=name) ",0,test 76e45e9c05c794fd35966bbabbc9d0c7a900f6cc,tensorflow/tensorflow,"sort host plane by index of xline (which is already sorted by name) PiperOrigin-RevId: 316135421 Change-Id: Ie8d3999724c129326346a2b902d4b2d5308372b2",trace_events_to_json.cc,"@@ -50,11 +50,13 @@ void AddResourceMetadata(uint32 device_id, AppendEscapedName(json, resource.name()); absl::StrAppend(json, ""}},""); } + uint32 sort_index = + resource.sort_index() ? resource.sort_index() : resource_id; absl::StrAppendFormat( json, R""({""ph"":""M"",""pid"":%u,""tid"":%u,)"" R""(""name"":""thread_sort_index"",""args"":{""sort_index"":%u}},)"", - device_id, resource_id, resource_id); + device_id, resource_id, sort_index); } } ",0,train 76e45e9c05c794fd35966bbabbc9d0c7a900f6cc,tensorflow/tensorflow,"sort host plane by index of xline (which is already sorted by name) PiperOrigin-RevId: 316135421 Change-Id: Ie8d3999724c129326346a2b902d4b2d5308372b2",xplane_to_trace_events.cc,"@@ -40,10 +40,18 @@ Device BuildDeviceAndResource(const XPlaneVisitor& plane) { Device device; device.set_name(std::string(plane.Name())); device.set_device_id(plane.Id()); + + bool sort_by_ordinal = plane.Name() == kHostThreads; + int ordinal = 0; plane.ForEachLine([&](const XLineVisitor& line) { Resource resource; resource.set_resource_id(line.Id()); - resource.set_name(std::string(line.Name())); + resource.set_name(std::string(line.DisplayName())); + if (sort_by_ordinal) { + // When sort_index is absent (i.e. 0), resource id will be used. + // Therefore sort_index starts with 1. + resource.set_sort_index(++ordinal); + } (*device.mutable_resources())[line.Id()] = resource; }); return device; ",0,train 91c0e9a4f6efc2068c886ed2b7eaa5dbf888a3ba,tensorflow/tensorflow,"Add common activation functions to keras activation globals so they can be deserialized by `tf.keras.activation.deserialize`. PiperOrigin-RevId: 368509629 Change-Id: Ie85b50a03f2334cbf943de589297bd7d47b01422",activations.py,"@@ -507,14 +507,6 @@ def serialize(activation): return serialize_keras_object(activation) -# Add additional globals so that deserialize can find these common activation -# functions -leaky_relu = nn.leaky_relu -log_softmax = nn.log_softmax -relu6 = nn.relu6 -silu = nn.swish - - @keras_export('keras.activations.deserialize') @dispatch.add_dispatch_support def deserialize(name, custom_objects=None): ",0,train 91c0e9a4f6efc2068c886ed2b7eaa5dbf888a3ba,tensorflow/tensorflow,"Add common activation functions to keras activation globals so they can be deserialized by `tf.keras.activation.deserialize`. PiperOrigin-RevId: 368509629 Change-Id: Ie85b50a03f2334cbf943de589297bd7d47b01422",activations_test.py,"@@ -39,7 +39,7 @@ class KerasActivationsTest(test.TestCase, parameterized.TestCase): def test_serialization(self): all_activations = [ 'softmax', 'relu', 'elu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear', - 'softplus', 'softsign', 'selu', 'gelu', 'relu6' + 'softplus', 'softsign', 'selu', 'gelu' ] for name in all_activations: fn = activations.get(name) ",0,train aae44380256166127ece6f5010d4656556f5c60d,tensorflow/tensorflow,Remove duplicate import,py_func_test.py,"@@ -27,7 +27,6 @@ import tensorflow as tf from tensorflow.python.framework import errors from tensorflow.python.ops import script_ops -from six.moves import xrange class PyOpTest(tf.test.TestCase): ",0,train 1051c377051b2ee24a495318737358d9ccf7280f,tensorflow/tensorflow,Copy ben's changes,convert_nodes.cc,"@@ -62,14 +62,14 @@ limitations under the License. #define TFTRT_RETURN_ERROR_IF_FALSE(status, node) \ do { \ - if (status == false) { \ + if ((status) == false) { \ TFTRT_INTERNAL_ERROR_AT_NODE(node); \ } \ } while (0) #define TFTRT_RETURN_ERROR_IF_NULLPTR(ptr, node) \ do { \ - if (ptr == nullptr) { \ + if ((ptr) == nullptr) { \ TFTRT_INTERNAL_ERROR_AT_NODE(node); \ } \ } while (0) @@ -1577,12 +1577,14 @@ tensorflow::Status ConvertConv2DHelper(OpConverterParams* params, int group) { const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); TFAttrs attrs(node_def); + int c_index = 1; int h_index = 2; int w_index = 3; auto data_format = attrs.get(""data_format""); if (data_format == ""NHWC"") { TF_RETURN_IF_ERROR(params->converter->TransposeTensor( const_cast(tensor), {0, 3, 1, 2}, &tensor)); + c_index = 3; h_index = 1; w_index = 2; // TODO(jie): transpose it @@ -1618,14 +1620,30 @@ tensorflow::Status ConvertConv2DHelper(OpConverterParams* params, int group) { << tf_stride[3]; const nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]); + auto tf_dilations = attrs.get>(""dilations""); + if ((int)tf_dilations.size() != 4) { + return tensorflow::errors::InvalidArgument( + ""Convolution dilations field must specify 4 dimensions "" + + node_def.name()); + } + if (tf_dilations[0] != 1 || tf_dilations[c_index] != 1) { + return tensorflow::errors::Unimplemented( + ""Dilation rate must be 1 for batch and channel dimensions, at "", + node_def.name()); + } + nvinfer1::DimsHW dilation(tf_dilations[h_index], tf_dilations[w_index]); + std::vector> padding; // TODO(jie): padding. if (attrs.get(""padding"") == ""SAME"") { // This is NCHW tensor with no batch dimension. // 1 -> h // 2 -> w + nvinfer1::DimsHW effective_kernel_size = kernel_size; + effective_kernel_size.h() += (kernel_size.h() - 1) * (dilation.h() - 1); + effective_kernel_size.w() += (kernel_size.w() - 1) * (dilation.w() - 1); padding = CreateSamePadding( - stride, kernel_size, + stride, effective_kernel_size, {static_cast(tensor_dim.d[1]), static_cast(tensor_dim.d[2])}); } else { padding = {{0, 0}, {0, 0}}; @@ -1659,6 +1677,7 @@ tensorflow::Status ConvertConv2DHelper(OpConverterParams* params, int group) { layer->setPadding({padding[0].first, padding[1].first}); layer->setName(node_def.name().c_str()); layer->setNbGroups(num_groups); + layer->setDilation(dilation); const nvinfer1::ITensor* output_tensor = layer->getOutput(0); VLOG(2) << ""TENSOR out: "" << DebugString(output_tensor->getDimensions()); VLOG(2) << ""data_format: "" << data_format; ",0,train 1051c377051b2ee24a495318737358d9ccf7280f,tensorflow/tensorflow,Copy ben's changes,conv2d_test.py,"@@ -0,0 +1,172 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""""""Model script to test TF-TensorRT integration."""""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.tensorrt.test import tf_trt_integration_test_base as trt_test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops +from tensorflow.python.ops import gen_nn_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops +from tensorflow.python.platform import test + + +def conv2d_layer(inputs, filters, kernel_size, strides=(1, 1), padding='valid', + data_format='channels_last', dilation_rate=(1, 1), name=None): + dtype = inputs.dtype + c_axis = -1 if data_format == 'channels_last' else 1 + nchan = inputs.shape[c_axis] + weights_shape = (kernel_size[0], kernel_size[1], nchan, filters) + weights = constant_op.constant(np.random.randn(*weights_shape), dtype=dtype) + padding = padding.upper() + if data_format == 'channels_last': + strides = [1] + list(strides) + [1] + dilations = [1] + list(dilation_rate) + [1] + data_format = 'NHWC' + else: + strides = [1, 1] + list(strides) + dilations = [1, 1] + list(dilation_rate) + data_format = 'NCHW' + return gen_nn_ops.conv2d(inputs, weights, strides=strides, padding=padding, + dilations=dilations, data_format=data_format) + +def div_round_up(n, d): + return (n - 1) // d + 1 + +class Conv2DNCHWTest(trt_test.TfTrtIntegrationTestBase): + + def GetParams(self): + """"""Testing conversion of Conv2D (data_format=NCHW) in TF-TRT conversion."""""" + np.random.seed(1234) + dtype = dtypes.float32 + input_name = ""input"" + n, c, h, w = 13, 3, 7, 11 + num_filters = 5 + input_dims = [n, c, h, w] + output_name = ""output"" + g = ops.Graph() + with g.as_default(): + inp = array_ops.placeholder( + dtype=dtype, shape=[None] + input_dims[1:], name=input_name) + with g.device(""/GPU:0""): + results = [] + for kernel_size in [(3, 3), (3, 2)]: + for dilation_rate in [(1, 1), (2, 3)]: + result = conv2d_layer(inp, num_filters, kernel_size, + dilation_rate=dilation_rate, padding='same', + data_format='channels_first') + results.append(result) + output = sum(results) + output = array_ops.identity(output, name=output_name) + return trt_test.TfTrtIntegrationTestParams( + gdef=g.as_graph_def(), + input_names=[input_name], + input_dims=[input_dims], + output_names=[output_name], + expected_output_dims=[(n, num_filters, h, w)]) + + def ExpectedEnginesToBuild(self, run_params): + """"""Return the expected engines to build."""""" + return [""my_trt_op_0""] + + +class Conv2DStridedNCHWTest(trt_test.TfTrtIntegrationTestBase): + + def GetParams(self): + """"""Testing conversion of strided Conv2D (data_format=NCHW) in TF-TRT + conversion."""""" + np.random.seed(1234) + dtype = dtypes.float32 + input_name = ""input"" + n, c, h, w = 13, 3, 7, 11 + num_filters = 5 + input_dims = [n, c, h, w] + output_name = ""output"" + g = ops.Graph() + with g.as_default(): + inp = array_ops.placeholder( + dtype=dtype, shape=[None] + input_dims[1:], name=input_name) + with g.device(""/GPU:0""): + output = inp + output = conv2d_layer(output, num_filters, (3, 2), strides=(2, 2), + padding='same', data_format='channels_first') + h = div_round_up(h, 2) + w = div_round_up(w, 2) + output = conv2d_layer(output, num_filters, (3, 3), strides=(2, 2), + dilation_rate=(2, 3), padding='same', + data_format='channels_first') + h = div_round_up(h, 2) + w = div_round_up(w, 2) + output = array_ops.identity(output, name=output_name) + return trt_test.TfTrtIntegrationTestParams( + gdef=g.as_graph_def(), + input_names=[input_name], + input_dims=[input_dims], + output_names=[output_name], + expected_output_dims=[(n, num_filters, h, w)]) + + def ExpectedEnginesToBuild(self, run_params): + """"""Return the expected engines to build."""""" + return [""my_trt_op_0""] + + +class Conv2DNHWCTest(trt_test.TfTrtIntegrationTestBase): + + def GetParams(self): + """"""Testing conversion of Conv2D (data_format=NHWC) in TF-TRT conversion."""""" + np.random.seed(1234) + dtype = dtypes.float32 + input_name = ""input"" + n, h, w, c = 13, 7, 11, 3 + num_filters = 5 + input_dims = [n, h, w, c] + output_name = ""output"" + g = ops.Graph() + with g.as_default(): + inp = array_ops.placeholder( + dtype=dtype, shape=[None] + input_dims[1:], name=input_name) + with g.device(""/GPU:0""): + results = [] + for kernel_size in [(3, 3), (3, 2)]: + for dilation_rate in [(1, 1), (2, 3)]: + result = conv2d_layer(inp, num_filters, kernel_size, + dilation_rate=dilation_rate, padding='same', + data_format='channels_last') + results.append(result) + output = sum(results) + output = array_ops.identity(output, name=output_name) + return trt_test.TfTrtIntegrationTestParams( + gdef=g.as_graph_def(), + input_names=[input_name], + input_dims=[input_dims], + output_names=[output_name], + expected_output_dims=[(n, h, w, num_filters)]) + + def ExpectedEnginesToBuild(self, run_params): + """"""Return the expected engines to build."""""" + return [""my_trt_op_0""] + + +if __name__ == ""__main__"": + test.main() ",0,train 23900d46dadaad68bafdaae5c5acb1c9fd093f9a,tensorflow/tensorflow,"[XLA] Emit less IR for tuple-select For the number of tuple elements n, this used to emit n selects, 2n loads and n stores. Instead emit one select on the address and a memcpy. PiperOrigin-RevId: 240358129",tuple_ops.cc,"@@ -50,23 +50,12 @@ void EmitTupleSelect(const IrArray& select, const IrArray& pred, VLOG(2) << "" pred_value: "" << DumpToString(*pred_value); VLOG(2) << "" pred_cond: "" << DumpToString(*pred_cond); - for (int i = 0; i < ShapeUtil::TupleElementCount(select.GetShape()); ++i) { - llvm::Value* const element_index[] = {b->getInt64(0), b->getInt64(i)}; - llvm::Value* on_true_element_address = - b->CreateInBoundsGEP(on_true, element_index); - llvm::Value* on_true_element = b->CreateLoad( - on_true_element_address, ""on_true_element_"" + llvm::Twine(i)); - llvm::Value* on_false_element_address = - b->CreateInBoundsGEP(on_false, element_index); - llvm::Value* on_false_element = b->CreateLoad( - on_false_element_address, ""on_false_element_"" + llvm::Twine(i)); - - llvm::Value* output_element_address = - b->CreateInBoundsGEP(select.GetBasePointer(), element_index); - b->CreateStore(b->CreateSelect(pred_cond, on_true_element, on_false_element, - ""select_output_element_"" + llvm::Twine(i)), - output_element_address); - } + llvm::Value* src = b->CreateSelect(pred_cond, on_true, on_false); + llvm::Value* dst = select.GetBasePointer(); + int64 table_size = ShapeUtil::ByteSizeOfTupleIndexTable( + select.GetShape(), module->getDataLayout().getPointerSize()); + b->CreateMemCpy(dst, /*DstAlign=*/1, src, /*SrcAlign=*/1, + b->getInt64(table_size)); } void EmitTuple(const IrArray& tuple, absl::Span operands, ",0,train 932d1242b419290ff19189c44f9dc40c7f799b20,tensorflow/tensorflow,"Package the ptxas arguments that were used for compilation with the ptx of a fat binary. This was hardwired to --compile-only, which is wrong. PiperOrigin-RevId: 362285857 Change-Id: I185ce7154842b9e0959084d53c9aeacd56a0050a",gpu_kernel_to_blob_pass.cc,"@@ -222,8 +222,7 @@ class GpuKernelToBlobPass // TODO(b/169870789): Revisit the use of fatbins. // Bundle cubin and PTX images into a single fatbin. - return tensorflow::se::BundleGpuAsm(images, - gpu_asm_opts.preferred_cuda_dir); + return tensorflow::se::BundleGpuAsm(images, gpu_asm_opts); #endif return InternalError( ",0,train 932d1242b419290ff19189c44f9dc40c7f799b20,tensorflow/tensorflow,"Package the ptxas arguments that were used for compilation with the ptx of a fat binary. This was hardwired to --compile-only, which is wrong. PiperOrigin-RevId: 362285857 Change-Id: I185ce7154842b9e0959084d53c9aeacd56a0050a",asm_compiler.cc,"@@ -195,6 +195,15 @@ static void LogPtxasTooOld(const std::string& ptxas_path, int cc_major, } } +static void AppendArgsFromOptions(GpuAsmOpts options, + std::vector& args) { + if (options.disable_gpuasm_optimizations) { + args.push_back(""-O0""); + } + args.insert(args.end(), options.extra_flags.begin(), + options.extra_flags.end()); +} + port::StatusOr> CompileGpuAsm(int cc_major, int cc_minor, const char* ptx_contents, GpuAsmOpts options) { @@ -234,11 +243,7 @@ port::StatusOr> CompileGpuAsm(int cc_major, int cc_minor, if (VLOG_IS_ON(2)) { ptxas_args.push_back(""-v""); } - if (options.disable_gpuasm_optimizations) { - ptxas_args.push_back(""-O0""); - } - ptxas_args.insert(ptxas_args.end(), options.extra_flags.begin(), - options.extra_flags.end()); + AppendArgsFromOptions(options, ptxas_args); if (VLOG_IS_ON(3)) { VLOG(3) << absl::StrJoin(ptxas_args, "" ""); } @@ -283,9 +288,9 @@ port::StatusOr> CompileGpuAsm(int cc_major, int cc_minor, } port::StatusOr> BundleGpuAsm( - std::vector images, const std::string preferred_cuda_dir) { + std::vector images, GpuAsmOpts options) { std::string fatbinary_path = - findCudaExecutable(""fatbinary"", preferred_cuda_dir); + findCudaExecutable(""fatbinary"", options.preferred_cuda_dir); // Write images to temporary files. std::vector image_paths; @@ -319,11 +324,19 @@ port::StatusOr> BundleGpuAsm( tensorflow::Env::Default()->DeleteFile(result_path).IgnoreError(); }); + // Compute the ptxas options that were used to produce the cubins. + std::vector ptxas_options; + AppendArgsFromOptions(options, ptxas_options); + // Invoke fatbinary and collect its output. tensorflow::SubProcess fatbinary; std::vector fatbinary_args = { - fatbinary_path, ""--64"", ""--cmdline=--compile-only"", - ""--link"", ""--compress-all"", absl::StrCat(""--create="", result_path)}; + fatbinary_path, ""--64"", ""--link"", ""--compress-all"", + absl::StrCat(""--create="", result_path)}; + if (!ptxas_options.empty()) { + auto command_line = absl::StrJoin(ptxas_options, "" ""); + fatbinary_args.push_back(absl::StrFormat(""--cmdline=%s"", command_line)); + } assert(images.size() == image_paths.size()); for (int i = 0; i < images.size(); i++) { fatbinary_args.push_back(absl::StrFormat( ",0,train 932d1242b419290ff19189c44f9dc40c7f799b20,tensorflow/tensorflow,"Package the ptxas arguments that were used for compilation with the ptx of a fat binary. This was hardwired to --compile-only, which is wrong. PiperOrigin-RevId: 362285857 Change-Id: I185ce7154842b9e0959084d53c9aeacd56a0050a",asm_compiler.h,"@@ -63,7 +63,7 @@ struct CubinOrPTXImage { // Bundles the GPU machine code (cubins) and PTX if requested and returns the // resulting binary (i.e. a fatbin) as a byte array. port::StatusOr> BundleGpuAsm( - std::vector images, const std::string preferred_cuda_dir); + std::vector images, GpuAsmOpts options); struct HsacoImage { std::string gfx_arch; ",0,train 403e51018b3c47cd5989d6b50776e235221fade4,tensorflow/tensorflow,"[XLA] Factor out repeated LatestNonGteAncestorAndIndex helper. PiperOrigin-RevId: 171620470",ir_emitter.cc,"@@ -2102,19 +2102,6 @@ Status IrEmitter::HandleDynamicSlice(HloInstruction* dynamic_slice, namespace { -// Returns the first non-GetTupleElement ancestor instruction of 'hlo'. -// If the first non-GTE ancestor is tuple-shaped, populates 'index' with the -// (possibly nested) tuple indices used on the path from ancestor to 'hlo'. -const HloInstruction* LatestNonGteAncestorAndIndex(const HloInstruction* hlo, - ShapeIndex* index) { - if (hlo->opcode() == HloOpcode::kGetTupleElement) { - const auto* operand = LatestNonGteAncestorAndIndex(hlo->operand(0), index); - index->push_back(hlo->tuple_index()); - return operand; - } - return hlo; -} - // Checks if we can emit code for DynamicUpdateSlice to update data in-place. // Returns true if operand 0 of DynamicUpdateSlice and its output buffer // share the same buffer allocation. @@ -2126,9 +2113,10 @@ bool CanUpdateDynamicSliceInPlace(const BufferAssignment& assignment, // Walk DynamicUpdateSlice operand(0) to parameter and get its // associated operand. See if it shares an allocation with this operand. + HloInstruction* operand; ShapeIndex index; - auto* operand = - LatestNonGteAncestorAndIndex(dynamic_update_slice->operand(0), &index); + std::tie(operand, index) = + dynamic_update_slice->mutable_operand(0)->LatestNonGteAncestorAndIndex(); if (operand->opcode() != HloOpcode::kParameter) { return false; } ",0,train 403e51018b3c47cd5989d6b50776e235221fade4,tensorflow/tensorflow,"[XLA] Factor out repeated LatestNonGteAncestorAndIndex helper. PiperOrigin-RevId: 171620470",hlo_to_ir_bindings.cc,"@@ -67,7 +67,7 @@ void HloToIrBindings::EmitBasePointersForHlos( // Lookup allocation GetTupleElement operand. const BufferAllocation::Slice slice = buffer_assignment_ - ->GetUniqueTopLevelSlice(LatestNonGteAncestor(non_io_hlo)) + ->GetUniqueTopLevelSlice(non_io_hlo->LatestNonGteAncestor()) .ConsumeValueOrDie(); // We are not in a nested context, so check non-thread-local allocation. CHECK(!slice.allocation()->is_thread_local()); ",0,train 403e51018b3c47cd5989d6b50776e235221fade4,tensorflow/tensorflow,"[XLA] Factor out repeated LatestNonGteAncestorAndIndex helper. PiperOrigin-RevId: 171620470",ir_emission_utils.cc,"@@ -214,12 +214,5 @@ llvm::Value* EmitShuffleDown(llvm::Value* value, llvm::Value* offset, value->getType()); } -const HloInstruction* LatestNonGteAncestor(const HloInstruction* hlo) { - while (hlo->opcode() == HloOpcode::kGetTupleElement) { - hlo = hlo->operand(0); - } - return hlo; -} - } // namespace gpu } // namespace xla ",0,train 403e51018b3c47cd5989d6b50776e235221fade4,tensorflow/tensorflow,"[XLA] Factor out repeated LatestNonGteAncestorAndIndex helper. PiperOrigin-RevId: 171620470",ir_emission_utils.h,"@@ -53,10 +53,6 @@ llvm::Value* EmitPrintf(tensorflow::StringPiece fmt, llvm::Value* EmitShuffleDown(llvm::Value* value, llvm::Value* offset, llvm::IRBuilder<>* builder); -// Resolves GetTupleElement instruction operands starting with 'hlo'. -// Returns the first ancestor instruction which is not a GetTupleElement. -const HloInstruction* LatestNonGteAncestor(const HloInstruction* hlo); - } // namespace gpu } // namespace xla ",0,train 403e51018b3c47cd5989d6b50776e235221fade4,tensorflow/tensorflow,"[XLA] Factor out repeated LatestNonGteAncestorAndIndex helper. PiperOrigin-RevId: 171620470",ir_emitter_unnested.cc,"@@ -254,27 +254,11 @@ Status IrEmitterUnnested::HandleConvolution(HloInstruction* convolution, rhs_instruction, window); } -namespace { - -// Returns the first non-GetTupleElement ancestor instruction of 'hlo'. -// If the first non-GTE ancestor is tuple-shaped, populates 'index' with the -// (possibly nested) tuple indices used on the path from ancestor to 'hlo'. -const HloInstruction* LatestNonGteAncestorAndIndex(const HloInstruction* hlo, - ShapeIndex* index) { - if (hlo->opcode() == HloOpcode::kGetTupleElement) { - const auto* operand = LatestNonGteAncestorAndIndex(hlo->operand(0), index); - index->push_back(hlo->tuple_index()); - return operand; - } - return hlo; -} - // Checks if we can emit code for DynamicUpdateSlice to update data in-place. // Returns true if operand 0 of DynamicUpdateSlice and its output buffer // share the same buffer allocation. -// Returns false otherwise. -bool CanUpdateDynamicSliceInPlace(const BufferAssignment& assignment, - HloInstruction* fusion) { +static bool CanUpdateDynamicSliceInPlace(const BufferAssignment& assignment, + HloInstruction* fusion) { CHECK_EQ(HloOpcode::kFusion, fusion->opcode()); HloInstruction* fused_root = fusion->fused_expression_root(); if (fused_root->opcode() != HloOpcode::kDynamicUpdateSlice) { @@ -282,9 +266,10 @@ bool CanUpdateDynamicSliceInPlace(const BufferAssignment& assignment, } // Walk DynamicUpdateSlice operand(0) to fused parameter and get its // associated operand. See if it shares an allocation with this operand. + HloInstruction* fusion_operand; ShapeIndex index; - auto* fusion_operand = - LatestNonGteAncestorAndIndex(fused_root->operand(0), &index); + std::tie(fusion_operand, index) = + fused_root->mutable_operand(0)->LatestNonGteAncestorAndIndex(); if (fusion_operand->opcode() != HloOpcode::kParameter) { return false; } @@ -292,8 +277,6 @@ bool CanUpdateDynamicSliceInPlace(const BufferAssignment& assignment, return assignment.SharesSliceAtIndex(fusion, {}, operand, index); } -} // namespace - Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { HloInstruction* root = fusion->fused_expression_root(); // HandleFusion specializes reduction from a multi-dimensional array to a 1D @@ -386,7 +369,7 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { TF_RETURN_IF_ERROR(root->Accept(&fused_emitter)); // Recursively lookup 'fusion_operand' for DynamicUpdateSlice operand 0. - auto* fusion_operand = LatestNonGteAncestor(root->operand(0)); + auto* fusion_operand = root->operand(0)->LatestNonGteAncestor(); CHECK_EQ(HloOpcode::kParameter, fusion_operand->opcode()); // Operand(0) the input array which shares an allocation with the output. @@ -1625,7 +1608,7 @@ llvm::Function* IrEmitterUnnested::EmitBasePointersForHloAndItsOperands( // with their operand buffer in 'io_hlos' and 'non_io_hlos' below. std::vector non_io_hlos; for (const HloInstruction* operand : hlo.operands()) { - const HloInstruction* to_lookup = LatestNonGteAncestor(operand); + const HloInstruction* to_lookup = operand->LatestNonGteAncestor(); if (buffer_assignment.HasTopLevelAllocation(to_lookup) && buffer_assignment.GetUniqueTopLevelSlice(to_lookup) .ConsumeValueOrDie() @@ -1665,7 +1648,7 @@ std::unique_ptr IrEmitterUnnested::BuildKernelThunk( std::vector io_buffers; io_buffers.reserve(io_hlos.size()); for (const HloInstruction* io_hlo : io_hlos) { - io_buffers.push_back(GetAllocationSlice(*LatestNonGteAncestor(io_hlo))); + io_buffers.push_back(GetAllocationSlice(*io_hlo->LatestNonGteAncestor())); } // Create a KernelThunk that launches the kernel that implements ""inst"". ",0,train 403e51018b3c47cd5989d6b50776e235221fade4,tensorflow/tensorflow,"[XLA] Factor out repeated LatestNonGteAncestorAndIndex helper. PiperOrigin-RevId: 171620470",hlo_instruction.cc,"@@ -1131,6 +1131,29 @@ std::unique_ptr HloInstruction::CloneFusionWithNewOperands( return new_instruction; } +std::pair +HloInstruction::LatestNonGteAncestorAndIndex() const { + const HloInstruction* hlo = this; + ShapeIndex index; + while (hlo->opcode() == HloOpcode::kGetTupleElement) { + index.push_back(hlo->tuple_index()); + hlo = hlo->operand(0); + } + + // We built up index in the reverse order from what we want. + std::reverse(index.begin(), index.end()); + + return {hlo, index}; +} + +const HloInstruction* HloInstruction::LatestNonGteAncestor() const { + const HloInstruction* hlo = this; + while (hlo->opcode() == HloOpcode::kGetTupleElement) { + hlo = hlo->operand(0); + } + return hlo; +} + const Literal& HloInstruction::literal() const { CHECK_EQ(HloOpcode::kConstant, opcode_); return *literal_; ",0,train 403e51018b3c47cd5989d6b50776e235221fade4,tensorflow/tensorflow,"[XLA] Factor out repeated LatestNonGteAncestorAndIndex helper. PiperOrigin-RevId: 171620470",hlo_instruction.h,"@@ -508,6 +508,26 @@ class HloInstruction { // Precondition: opcode() == HloOpcode::kGetTupleElement int64 tuple_index() const; + // Returns the first non-GetTupleElement ancestor instruction of 'hlo'. + // If the first non-GTE ancestor is tuple-shaped, populates 'index' with the + // (possibly nested) tuple indices used on the path from ancestor to 'hlo'. + std::pair LatestNonGteAncestorAndIndex() + const; + + std::pair LatestNonGteAncestorAndIndex() { + auto rv = + const_cast(this)->LatestNonGteAncestorAndIndex(); + return {const_cast(rv.first), rv.second}; + } + + // Same as LatestNonGteAncestorAndIndex, but just returns the HloInstruction. + const HloInstruction* LatestNonGteAncestor() const; + + HloInstruction* LatestNonGteAncestor() { + return const_cast( + const_cast(this)->LatestNonGteAncestor()); + } + // Gets/sets the to_apply HloComputation for Call, Map, Reduce, etc. // The setter should only be called by HloModule or HloComputation methods. // ",0,train 5e0a1a375a795ceb0b8fac65b0a04bfdb124986e,tensorflow/tensorflow,"TensorSpec equality is more useful if it's shape-list-equality and not requires that the shapes are equal. PiperOrigin-RevId: 232554706",def_function_test.py,"@@ -268,7 +268,8 @@ class DefFunctionTest(test.TestCase): self.assertAllClose(4., concrete(constant_op.constant(2.))) signature_args, _ = concrete.structured_input_signature self.assertEqual(signature_args, - (tensor_spec.TensorSpec(None, dtypes.float32),)) + (tensor_spec.TensorSpec( + None, dtypes.float32, name='x'),)) def test_serialization_signature_cache(self): @@ -288,10 +289,10 @@ class DefFunctionTest(test.TestCase): self.assertEqual( signatures_args, - set(((tensor_spec.TensorSpec([1, 2], dtypes.float32), - tensor_spec.TensorSpec([1], dtypes.float32)), - (tensor_spec.TensorSpec([1, 3], dtypes.int32), - tensor_spec.TensorSpec([1], dtypes.int32))))) + set(((tensor_spec.TensorSpec([1, 2], dtypes.float32, name='x'), + tensor_spec.TensorSpec([1], dtypes.float32, name='y')), + (tensor_spec.TensorSpec([1, 3], dtypes.int32, name='x'), + tensor_spec.TensorSpec([1], dtypes.int32, name='y'))))) @test_util.assert_no_garbage_created def testFunctionReferenceCycles(self): @@ -373,6 +374,18 @@ class DefFunctionTest(test.TestCase): self.assertAllEqual(add(v, v), 2.0) + def testShapeCache(self): + @def_function.function + def func(x): + return 2 * x + + func_a = func.get_concrete_function( + tensor_spec.TensorSpec([None], dtypes.int32)) + func_b = func.get_concrete_function( + tensor_spec.TensorSpec([None], dtypes.int32)) + + self.assertIs(func_a, func_b) + def testInitializationInNestedCall(self): v_holder = [] ",0,train 5e0a1a375a795ceb0b8fac65b0a04bfdb124986e,tensorflow/tensorflow,"TensorSpec equality is more useful if it's shape-list-equality and not requires that the shapes are equal. PiperOrigin-RevId: 232554706",tensor_spec.py,"@@ -108,7 +108,9 @@ class TensorSpec(object): return hash((self._shape_tuple, self.dtype)) def __eq__(self, other): - return self.shape == other.shape and self.dtype == other.dtype + return (self._shape_tuple == other._shape_tuple # pylint: disable=protected-access + and self.dtype == other.dtype + and self._name == other._name) # pylint: disable=protected-access def __ne__(self, other): return not self == other ",0,train 5e0a1a375a795ceb0b8fac65b0a04bfdb124986e,tensorflow/tensorflow,"TensorSpec equality is more useful if it's shape-list-equality and not requires that the shapes are equal. PiperOrigin-RevId: 232554706",tensor_array_ops.py,"@@ -825,7 +825,7 @@ class _EagerTensorArray(object): if self._infer_shape: if self._element_shape is None: self._element_shape = value.shape - elif self._element_shape != value.shape: + elif not self._element_shape.is_compatible_with(value.shape): raise ValueError(""Incompatible shape for value (%s), expected (%s)"" % (value.shape.as_list(), self._element_shape.as_list())) ",0,train 8cf8afefdb4c240f74a05e24246c8cd2dcce9d54,tensorflow/tensorflow,"Internal Change. PiperOrigin-RevId: 211519679",__init__.py,"@@ -21,6 +21,14 @@ from __future__ import print_function import os +from tensorflow.python.tools import component_api_helper +component_api_helper.package_hook( + parent_package_str=( + ""tensorflow.contrib""), + child_package_str=( + ""tensorflow_estimator.contrib.estimator"")) +del component_api_helper + # Add projects here, they will show up under tf.contrib. from tensorflow.contrib import autograph from tensorflow.contrib import batching ",0,train 8cf8afefdb4c240f74a05e24246c8cd2dcce9d54,tensorflow/tensorflow,"Internal Change. PiperOrigin-RevId: 211519679",__init__.py,"@@ -48,6 +48,13 @@ import numpy as np from tensorflow.python import pywrap_tensorflow +from tensorflow.python.tools import component_api_helper +component_api_helper.package_hook( + parent_package_str='tensorflow.python', + child_package_str=( + 'tensorflow_estimator.python.estimator')) +del component_api_helper + # Protocol buffers from tensorflow.core.framework.graph_pb2 import * from tensorflow.core.framework.node_def_pb2 import * ",0,train 8cf8afefdb4c240f74a05e24246c8cd2dcce9d54,tensorflow/tensorflow,"Internal Change. PiperOrigin-RevId: 211519679",component_api_helper.py,"@@ -67,7 +67,7 @@ def package_hook(parent_package_str, child_package_str, error_msg=None): """""" child_pkg_path = [os.path.join(os.path.dirname(child_pkg.__file__), "".."")] try: - parent_pkg.__path__ += child_pkg_path + parent_pkg.__path__ = child_pkg_path + parent_pkg.__path__ except AttributeError: parent_pkg.__path__ = child_pkg_path ",0,train e8e1631f8e46bc2588f08f89fe2892d3cb2f8035,tensorflow/tensorflow,"Make sure trackables are initialized when _trackable_children is called PiperOrigin-RevId: 425543411 Change-Id: I53fa60d7a3ecffd73af7ba6245fad901b67fb88a",base.py,"@@ -1461,6 +1461,7 @@ class Trackable(object): Returns: Dictionary mapping names to child trackables. """""" + self._maybe_initialize_trackable() # TODO(kathywu): Migrate `_checkpoint_dependencies` overrides to # `_trackable_children`. if save_type == SaveType.CHECKPOINT: ",0,train f568deff5697891d6c6ca0d09490359cf96fe7a1,tensorflow/tensorflow,Fix an error in 'Adding a New Op' example code (#5846),zero_out_2_test.py,"@@ -31,6 +31,11 @@ class ZeroOut2Test(tf.test.TestCase): result = zero_out_op_2.zero_out([5, 4, 3, 2, 1]) self.assertAllEqual(result.eval(), [5, 0, 0, 0, 0]) + def test_2d(self): + with self.test_session(): + result = zero_out_op_2.zero_out([[6, 5, 4], [3, 2, 1]]) + self.assertAllEqual(result.eval(), [[6, 0, 0], [0, 0, 0]]) + def test_grad(self): with self.test_session(): shape = (5,) @@ -39,6 +44,14 @@ class ZeroOut2Test(tf.test.TestCase): err = tf.test.compute_gradient_error(x, shape, y, shape) self.assertLess(err, 1e-4) + def test_grad_2d(self): + with self.test_session(): + shape = (2, 3) + x = tf.constant([[6, 5, 4], [3, 2, 1]], dtype=tf.float32) + y = zero_out_op_2.zero_out(x) + err = tf.test.compute_gradient_error(x, shape, y, shape) + self.assertLess(err, 1e-4) + if __name__ == '__main__': tf.test.main() ",0,train f568deff5697891d6c6ca0d09490359cf96fe7a1,tensorflow/tensorflow,Fix an error in 'Adding a New Op' example code (#5846),zero_out_grad_2.py,"@@ -40,5 +40,5 @@ def _zero_out_grad(op, grad): shape = array_ops.shape(to_zero) index = array_ops.zeros_like(shape) first_grad = array_ops.reshape(grad, [-1])[0] - to_zero_grad = sparse_ops.sparse_to_dense(index, shape, first_grad, 0) + to_zero_grad = sparse_ops.sparse_to_dense([index], shape, first_grad, 0) return [to_zero_grad] # List of one Tensor, since we have one input ",0,train 4b161717ae7766111a7625b9110b730c4176ec03,tensorflow/tensorflow,Use deserialize-helper for deserializing dict activations,activations.py,"@@ -458,11 +458,10 @@ def get(identifier): if isinstance(identifier, six.string_types): identifier = str(identifier) return deserialize(identifier) + elif isinstance(identifier, dict): + return deserialize(identifier) elif callable(identifier): return identifier - elif isinstance(identifier, dict): - return deserialize_keras_object( - identifier, printable_module_name='activation') else: raise TypeError( 'Could not interpret activation function identifier: {}'.format( ",0,train f74ae6121e7dcaeca8a57af23f195d9de3e524da,tensorflow/tensorflow,"Add tests for collective ops + scoped allocator in while loop. PiperOrigin-RevId: 263032410",collective_ops_test.py,"@@ -25,9 +25,11 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import kernels from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import collective_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -110,6 +112,63 @@ class CollectiveOpTest(test.TestCase): set_graph_key=False, communication_hint='nccl') + def _testWhile(self, num_vars, num_iterations, key_base): + group_size = 2 + group_key = 1 + instances = [(key_base + i) for i in range(num_vars)] + devices = ['CPU:{}'.format(i) for i in range(group_size)] + + config = config_pb2.ConfigProto(device_count={'CPU': group_size}) + rewrite_options = config.graph_options.rewrite_options + rewrite_options.scoped_allocator_optimization = ( + rewriter_config_pb2.RewriterConfig.ON) + del rewrite_options.scoped_allocator_opts.enable_op[:] + rewrite_options.scoped_allocator_opts.enable_op.append('CollectiveReduce') + + with self.session(config=config) as sess: + loop_vars = [] + for device in devices: + with ops.device(device): + loop_vars.append( + [variables.VariableV1((1 << i) * 1.) for i in range(num_vars)]) + # This variable controls number of iterations. + loop_vars.append(variables.VariableV1(0.)) + def loop_body(dev0_tensors, dev1_tensors, loop_tensor): + return_ops = [] + for i in range(len(devices)): + device = devices[i] + device_tensors = dev0_tensors if i == 0 else dev1_tensors + with ops.device(device): + device_collectives = [] + for j in range(num_vars): + # TODO(ayushd): figure out why identity is necessary to get the + # right device on the input here with TF2_BEHAVIOR=1. + input_tensor = array_ops.identity(device_tensors[j]) + collective_op = collective_ops.all_reduce( + input_tensor, group_size, group_key, instances[j], + 'Add', 'Id') + device_collectives.append(collective_op) + return_ops.append(device_collectives) + return_ops.append(math_ops.add(loop_tensor, 1.)) + return return_ops + # Run until last variable exceeds number of iterations. + loop_cond = lambda d0, d1, i: math_ops.less(i, num_iterations) + sess.run(variables.global_variables_initializer()) + results = sess.run(control_flow_ops.while_loop(loop_cond, loop_body, + loop_vars)) + self.assertEqual(results[:-1], [ + [((1 << (num_iterations + v)) * 1.) for v in range(num_vars)] + for _ in range(group_size)]) + + @test_util.run_deprecated_v1 + def testSimpleWhile(self): + self._testWhile(num_vars=1, num_iterations=4, key_base=20) + + @test_util.run_deprecated_v1 + def testWhileMultipleAllReduce(self): + self.skipTest('Temporarily disabled') # TODO(b/135686041): re-enable + self._testWhile(num_vars=2, num_iterations=4, key_base=20) + @test_util.run_deprecated_v1 def testWhileWithScopedAllocator(self): group_size = 2 ",0,train e45d52e03932b6aca5ce8aac136b1b688fe2a47a,tensorflow/tensorflow,"Change MaybeFuseActivation to only support a single output - that is its only use-case. PiperOrigin-RevId: 322419198 Change-Id: I4307683446795de77be4b0d3dc06396cfa3347c4",model_builder.cc,"@@ -129,16 +129,15 @@ absl::Status IsActivationSupported(TfLiteFusedActivation fused_activation) { // that will have identical output as the given node. New operation node will // depend on the given node output. absl::Status MaybeFuseActivation(TfLiteFusedActivation fused_activation, - const std::vector& output_indices, GraphFloat32* graph, Node* node) { - if (fused_activation == kTfLiteActNone) { - return absl::OkStatus(); - } const auto outputs = graph->FindOutputs(node->id); - if (outputs.empty()) { - return absl::InternalError(""Empty outputs in fused node""); + if (outputs.size() != 1) { + return absl::InternalError(""Number of outputs != 1""); } switch (fused_activation) { + case kTfLiteActNone: + // Nothing to do here + return absl::OkStatus(); case kTfLiteActRelu: case kTfLiteActReluN1To1: case kTfLiteActRelu6: { @@ -146,36 +145,24 @@ absl::Status MaybeFuseActivation(TfLiteFusedActivation fused_activation, attr.clip = fused_activation == kTfLiteActRelu ? 0.0f : (fused_activation == kTfLiteActReluN1To1 ? 1.0f : 6.0f); - for (auto index : output_indices) { - Node* activation_node; - RETURN_IF_ERROR( - NewPassthroughNode(graph, node, outputs[index], &activation_node)); - activation_node->operation.type = ToString(OperationType::RELU); - activation_node->operation.attributes = attr; - } - break; + Node* activation_node; + RETURN_IF_ERROR( + NewPassthroughNode(graph, node, outputs[0], &activation_node)); + activation_node->operation.type = ToString(OperationType::RELU); + activation_node->operation.attributes = attr; + return absl::OkStatus(); + } + case kTfLiteActTanh: { + Node* activation_node; + RETURN_IF_ERROR( + NewPassthroughNode(graph, node, outputs[0], &activation_node)); + activation_node->operation.type = ToString(OperationType::TANH); + return absl::OkStatus(); } - case kTfLiteActTanh: - for (auto index : output_indices) { - Node* activation_node; - RETURN_IF_ERROR( - NewPassthroughNode(graph, node, outputs[index], &activation_node)); - activation_node->operation.type = ToString(OperationType::TANH); - } - break; default: return absl::NotFoundError( absl::StrCat(""Unsupported fused activation: "", fused_activation)); } - return absl::OkStatus(); -} - -absl::Status MaybeFuseActivationToTheSingleOutput( - TfLiteFusedActivation fused_activation, GraphFloat32* graph, Node* node) { - if (graph->FindOutputs(node->id).size() != 1) { - return absl::InternalError(""Number of outputs exceeds 1""); - } - return MaybeFuseActivation(fused_activation, {0}, graph, node); } HW ToHW(int32_t h, int32_t w) { return HW(h > 0 ? h : 1, w > 0 ? w : 1); } @@ -389,8 +376,7 @@ class AddOperationParser : public TFLiteOperationParser { node->operation.attributes = std::move(attr); const TfLiteAddParams* tf_options; RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options)); - return MaybeFuseActivationToTheSingleOutput(tf_options->activation, graph, - node); + return MaybeFuseActivation(tf_options->activation, graph, node); } }; @@ -463,8 +449,7 @@ class ConcatenationOperationParser : public TFLiteOperationParser { } const TfLiteConcatenationParams* tf_options; RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options)); - RETURN_IF_ERROR(MaybeFuseActivationToTheSingleOutput(tf_options->activation, - graph, node)); + RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, graph, node)); node->operation.attributes = attr; return absl::OkStatus(); } @@ -566,8 +551,7 @@ class Conv2DOperationParser : public TFLiteOperationParser { tf_options->dilation_width_factor); UpdatePadding(tf_options->padding, graph->FindInputs(node->id)[0]->tensor.shape, &attr); - RETURN_IF_ERROR(MaybeFuseActivationToTheSingleOutput(tf_options->activation, - graph, node)); + RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, graph, node)); node->operation.attributes = std::move(attr); return absl::OkStatus(); } @@ -684,8 +668,7 @@ class DepthwiseConvolutionOperationParser : public TFLiteOperationParser { std::max(1, tf_options->dilation_width_factor)); UpdatePadding(tf_options->padding, graph->FindInputs(node->id)[0]->tensor.shape, &attr); - RETURN_IF_ERROR(MaybeFuseActivationToTheSingleOutput(tf_options->activation, - graph, node)); + RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, graph, node)); const int depth_multiplier = tf_options->depth_multiplier; if (depth_multiplier != 1) { const TfLiteTensor* input = reader->GetInputTensor(0); @@ -850,8 +833,7 @@ class ElementwiseOperationParser : public TFLiteOperationParser { } if (activation) { - RETURN_IF_ERROR( - MaybeFuseActivationToTheSingleOutput(activation, graph, node)); + RETURN_IF_ERROR(MaybeFuseActivation(activation, graph, node)); } } else if (IsTwoArgumentOperationWithConst()) { RETURN_IF_ERROR(reader->VerifyInputsConstsOutputs(tflite_node, @@ -997,8 +979,7 @@ class FullyConnectedOperationParser : public TFLiteOperationParser { conv->operation.type = ToString(OperationType::FULLY_CONNECTED); conv->operation.attributes = std::move(attr); absl::Status result = reader->AddOutputs(conv); - RETURN_IF_ERROR(MaybeFuseActivationToTheSingleOutput(tf_options->activation, - graph, conv)); + RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, graph, conv)); return result; } @@ -1252,8 +1233,7 @@ class MulOperationParser : public TFLiteOperationParser { const TfLiteMulParams* tf_options; RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options)); - return MaybeFuseActivationToTheSingleOutput(tf_options->activation, graph, - node); + return MaybeFuseActivation(tf_options->activation, graph, node); } private: @@ -1454,9 +1434,7 @@ class Pooling2DOperationParser : public TFLiteOperationParser { RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options)); } - std::vector max_tensor_id{0}; - RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, max_tensor_id, - graph, node)); + RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, graph, node)); // Second output is optional. It is not required, it but must be added after // MaybeAddFusedActivation function is called reader->AddOutput(node, 1).IgnoreError(); ",0,train 44697e33251ac74fcd3b136dfbfa1daad4fe4bfb,tensorflow/tensorflow,"Add checks for cases where no tensors are being profiled in the graph. PiperOrigin-RevId: 424651575 Change-Id: Ibef450ba692d8e5678105006e14be40e3d5ee281",tensor_tracer.py,"@@ -1408,6 +1408,11 @@ class TensorTracer(object): else: return tensor + # Check if there are graph operations being profiled. + if not tensor_trace_order.traced_tensors: + logging.warn('Inspect mode has no tensors in the cache to check.') + return control_flow_ops.no_op + # Check if the cache includes any nan or inf if self._parameters.trace_mode == tensor_tracer_flags.TRACE_MODE_NAN_INF: # Cache has 1s or 0s if the mode is NaN_INF @@ -1571,6 +1576,9 @@ class TensorTracer(object): """""" # Add a dependency to op and tensor fetches to make sure that all tracing # ops are executed before flushing trace results. + if not tensor_trace_order.traced_tensors: + logging.warn('No tensor values being traced. No flush cache op added.') + return tensor_fetches with ops.control_dependencies(op_fetches + [tensor.op for tensor in tensor_fetches]): flush_cache_op = self._generate_flush_cache_op( ",0,train e10dcf4fe7c480dcdfdff744e35f68683741cc59,tensorflow/tensorflow,"Add _placeholder_value for DistributedVariableTraceType PiperOrigin-RevId: 436575035",values.py,"@@ -511,25 +511,28 @@ class DistributedVarOp(object): return hash((self.name, self.graph, tuple(self.traceback), self.type)) +# TODO(b/209081027): Remove this once Variable is a CompositeTensor. class DistributedVariableTraceType(trace.TraceType): - """"""Class outlining the Tracing Protocol for DistributedVariable."""""" + """"""TraceType of DistributedVariable objects."""""" - def __init__(self, shape, dtype): - self.components = (tuple(shape.as_list()), dtype) + def __init__(self, distributed_variable): + self.distributed_variable = distributed_variable + self.components = (tuple(distributed_variable.shape.as_list()), + distributed_variable.dtype) def is_subtype_of(self, other): return self == other def most_specific_common_supertype(self, others): - return None + return self if all(self == other for other in others) else None + + def _placeholder_value(self): + return self.distributed_variable def __hash__(self) -> int: return hash(self.components) def __eq__(self, other) -> bool: - if not isinstance(other, trace.TraceType): - return NotImplemented - if not isinstance(other, DistributedVariableTraceType): return False @@ -929,7 +932,7 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, self, sparse_delta, use_locking=use_locking, name=name) def __tf_tracing_type__(self, _): - return DistributedVariableTraceType(self.shape, self.dtype) + return DistributedVariableTraceType(self) def _gather_saveables_for_checkpoint(self): """"""Overrides Trackable method. ",0,train a72ee2f74061cdd72f1197eed4c90a8216d39d74,tensorflow/tensorflow,"Fast-path to VarHandleOp PiperOrigin-RevId: 195744374",resource_mgr.h,"@@ -338,6 +338,9 @@ class ResourceHandleOp : public OpKernel { private: string container_; string name_; + mutex mutex_; + Tensor resource_ GUARDED_BY(mutex_); + std::atomic initialized_{false}; }; // Registers a kernel for an op which produces a handle to a resource of the @@ -511,10 +514,17 @@ ResourceHandleOp::ResourceHandleOp(OpKernelConstruction* context) template void ResourceHandleOp::Compute(OpKernelContext* ctx) { - Tensor* output = nullptr; - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &output)); - output->scalar()() = - MakeResourceHandle(ctx, container_, name_); + if (!initialized_.load()) { + mutex_lock ml(mutex_); + AllocatorAttributes attr; + attr.set_on_host(true); + OP_REQUIRES_OK(ctx, ctx->allocate_temp(DT_RESOURCE, TensorShape({}), + &resource_, attr)); + resource_.scalar()() = + MakeResourceHandle(ctx, container_, name_); + initialized_.store(true); + } + ctx->set_output(0, resource_); } } // end namespace tensorflow ",0,test 05dfc24e863c6fb7e7bd3552443a819f66b12dff,tensorflow/tensorflow,"DepthwiseConv, NEON 3x3 kernel, implement templated rounding method. PiperOrigin-RevId: 242878081",depthwiseconv_quantized_test.cc,"@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include + #include #include #include @@ -137,14 +138,28 @@ inline void DispatchDepthwiseConv( << "" output_height = "" << output_shape.Dims(1); // Call kernel optimized for depthwise convolutions using 3x3 filters. - optimized_ops::depthwise_conv::DepthwiseConv3x3Filter( - params, input_shape, input_data, filter_shape, filter_data, - bias_shape, bias_data, output_shape, output_data, /*thread_start=*/0, - /*thread_end=*/output_shape.Dims(1), /*thread_dim=*/1); - return; -#else - break; + switch (test_param.output_rounding) { + case DepthwiseConvOutputRounding::kAwayFromZero: + optimized_ops::depthwise_conv::DepthwiseConv3x3Filter< + DepthwiseConvOutputRounding::kAwayFromZero>( + params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, + /*thread_start=*/0, + /*thread_end=*/output_shape.Dims(1), /*thread_dim=*/1); + return; + case DepthwiseConvOutputRounding::kUpward: + optimized_ops::depthwise_conv::DepthwiseConv3x3Filter< + DepthwiseConvOutputRounding::kAwayFromZero>( + params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, + /*thread_start=*/0, + /*thread_end=*/output_shape.Dims(1), /*thread_dim=*/1); + return; + default: + break; + } #endif + break; } case DepthwiseConvImplementation::kUseNeon3x3DotProduct: { #if defined(__ARM_FEATURE_DOTPROD) && !defined(GOOGLE_L4T) ",0,train 05dfc24e863c6fb7e7bd3552443a819f66b12dff,tensorflow/tensorflow,"DepthwiseConv, NEON 3x3 kernel, implement templated rounding method. PiperOrigin-RevId: 242878081",depthwiseconv_uint8.h,"@@ -2016,7 +2016,8 @@ inline void DepthwiseConvWithRounding( dilation_width_factor, dilation_height_factor, pad_width, pad_height, depth_multiplier, output_shape, output_shift)) { gemmlowp::ScopedProfilingLabel specialized_label(""DepthwiseConv/8bit/3x3""); - depthwise_conv::DepthwiseConv3x3Filter( + depthwise_conv::DepthwiseConv3x3Filter< + DepthwiseConvOutputRounding::kAwayFromZero>( params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, output_shape, output_data, thread_start, thread_end, thread_dim); ",0,train 05dfc24e863c6fb7e7bd3552443a819f66b12dff,tensorflow/tensorflow,"DepthwiseConv, NEON 3x3 kernel, implement templated rounding method. PiperOrigin-RevId: 242878081",depthwiseconv_uint8_3x3_filter.h,"@@ -574,11 +574,13 @@ static_assert(offsetof(DepthwiseConvDotProdParams, four_over_stride) == #endif // __ARM_FEATURE_DOTPROD && !GOOGLE_L4T #if defined(__aarch64__) && !defined(GOOGLE_L4T) -template +template struct DepthwiseConvWindow {}; template <> -struct DepthwiseConvWindow<8, 1, 1> { +struct DepthwiseConvWindow { public: static inline void Run(const uint8* input_ptr, const uint8* filter_ptr, const int32* bias_ptr, uint8* output_ptr, @@ -1512,7 +1514,8 @@ struct DepthwiseConvWindow<8, 1, 1> { }; template <> -struct DepthwiseConvWindow<8, 2, 2> { +struct DepthwiseConvWindow { static inline void Run(const uint8* input_ptr, const uint8* filter_ptr, const int32* bias_ptr, uint8* output_ptr, int64_t input_depth, int64_t input_row_size, @@ -2546,11 +2549,13 @@ struct DepthwiseConvWindow<8, 2, 2> { enum class EdgeType { kCorner, kHorizontal, kVertical, kCenter }; -template +template struct DepthwiseConvPartial {}; template <> -struct DepthwiseConvPartial { +struct DepthwiseConvPartial { static inline void Run(const uint8* input_ptr, const uint8* filter_ptr, const int32* bias_ptr, uint8* output_ptr, const DepthwiseConvParams* params_ptr) { @@ -2663,7 +2668,8 @@ struct DepthwiseConvPartial { }; template <> -struct DepthwiseConvPartial { +struct DepthwiseConvPartial { static inline void Run(const uint8* input_ptr, const uint8* filter_ptr, const int32* bias_ptr, uint8* output_ptr, const DepthwiseConvParams* params_ptr) { @@ -2828,7 +2834,8 @@ struct DepthwiseConvPartial { }; template <> -struct DepthwiseConvPartial { +struct DepthwiseConvPartial { static inline void Run(const uint8* input_ptr, const uint8* filter_ptr, const int32* bias_ptr, uint8* output_ptr, const DepthwiseConvParams* params_ptr) { @@ -3027,7 +3034,8 @@ struct DepthwiseConvPartial { }; template <> -struct DepthwiseConvPartial { +struct DepthwiseConvPartial { static inline void Run(const uint8* input_ptr, const uint8* filter_ptr, const int32* bias_ptr, uint8* output_ptr, const DepthwiseConvParams* params_ptr) { @@ -3287,7 +3295,8 @@ struct ShuffleParams { input_height(get_shuffle_input_size(stride_height, output_height)) {} }; -template +template struct DepthwiseConvThroughDepth { // Runs the DepthwiseConvWindow kernels through the depth dimension from // |start_depth| to |end_depth|. Keep this not inlined to maintain a small @@ -3299,7 +3308,7 @@ struct DepthwiseConvThroughDepth { int64_t input_depth, int64_t input_row_size, int32 output_window_height, int32 output_window_width, const DepthwiseConvParams& params) { for (; start_depth <= end_depth - 8; start_depth += 8) { - DepthwiseConvWindow<8, kStrideWidth, kStrideHeight>::Run( + DepthwiseConvWindow::Run( input_ptr, filter_ptr, bias_ptr, output_ptr, input_depth, input_row_size, output_window_height, output_window_width, ¶ms); input_ptr += 8; @@ -3310,9 +3319,11 @@ struct DepthwiseConvThroughDepth { } }; -template +template struct DepthwiseConvMultiRow { - using ConvKernel = DepthwiseConvThroughDepth; + using ConvKernel = + DepthwiseConvThroughDepth; static inline void Run(const uint8* input_data, int32 start_x, int32 end_x, const uint8* filter_data, const int32* bias_data, @@ -3411,6 +3422,7 @@ struct DepthwiseConvMultiRow { // * Corner edges. // * Horizontal edges. // * Vertical edges. +template inline void DepthwiseConvHandlePadding(const uint8* input_data, const uint8* filter_data, const int32* bias_data, @@ -3419,7 +3431,7 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data, if (params.input_width == 1 && params.input_height == 1) { const uint8* filter_ptr = filter_data + params.filter_row_size + params.output_depth; - DepthwiseConvPartial::Run( + DepthwiseConvPartial::Run( input_data, filter_ptr, bias_data, output_data, ¶ms); return; } @@ -3435,7 +3447,7 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data, filter_data + params.filter_row_size + params.output_depth; uint8* output_ptr = output_data; - DepthwiseConvPartial::Run( + DepthwiseConvPartial::Run( input_ptr, filter_ptr, bias_data, output_ptr, ¶ms); input_ptr += (params.stride_width - 1) * params.input_depth; @@ -3444,13 +3456,13 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data, for (int32 out_x = out_x_start_corner + 1; out_x < out_x_end_corner; out_x++) { - DepthwiseConvPartial::Run( + DepthwiseConvPartial::Run( input_ptr, filter_ptr, bias_data, output_ptr, ¶ms); input_ptr += params.stride_width * params.input_depth; output_ptr += params.output_depth; } - DepthwiseConvPartial::Run( + DepthwiseConvPartial::Run( input_ptr, filter_ptr, bias_data, output_ptr, ¶ms); // Handle left side. @@ -3460,7 +3472,7 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data, for (int32 out_y = out_y_start_corner + 1; out_y < out_y_end_corner; out_y++) { - DepthwiseConvPartial::Run( + DepthwiseConvPartial::Run( input_ptr, filter_ptr, bias_data, output_ptr, ¶ms); input_ptr += params.stride_width * params.input_row_size; output_ptr += params.output_row_size; @@ -3475,7 +3487,7 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data, for (int32 out_y = out_y_start_corner + 1; out_y < out_y_end_corner; out_y++) { - DepthwiseConvPartial::Run( + DepthwiseConvPartial::Run( input_ptr, filter_ptr, bias_data, output_ptr, ¶ms); input_ptr += params.stride_width * params.input_row_size; output_ptr += params.output_row_size; @@ -3487,7 +3499,7 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data, output_ptr = output_data + (params.output_height - 1) * params.output_row_size; - DepthwiseConvPartial::Run( + DepthwiseConvPartial::Run( input_ptr, filter_ptr, bias_data, output_ptr, ¶ms); input_ptr += (params.stride_width == 1) ? 0 : params.input_depth; @@ -3496,13 +3508,13 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data, for (int32 out_x = out_x_start_corner + 1; out_x < out_x_end_corner; out_x++) { - DepthwiseConvPartial::Run( + DepthwiseConvPartial::Run( input_ptr, filter_ptr, bias_data, output_ptr, ¶ms); input_ptr += params.stride_width * params.input_depth; output_ptr += params.output_depth; } - DepthwiseConvPartial::Run( + DepthwiseConvPartial::Run( input_ptr, filter_ptr, bias_data, output_ptr, ¶ms); } @@ -3568,6 +3580,7 @@ inline bool Fast3x3FilterKernelSupported( return supported; } +template inline void DepthwiseConv3x3Filter( const DepthwiseParams& rt_params, const RuntimeShape& input_shape, const uint8* input_data, const RuntimeShape& filter_shape, @@ -3645,10 +3658,12 @@ inline void DepthwiseConv3x3Filter( eight_row_shuffle_params = ShuffleParams(2, 8, 2, 2); } - using conv_multirow_func_t = decltype(&DepthwiseConvMultiRow<1, 1>::Run); - conv_multirow_func_t conv_multirow_func = DepthwiseConvMultiRow<1, 1>::Run; + using conv_multirow_func_t = + decltype(&DepthwiseConvMultiRow::Run); + conv_multirow_func_t conv_multirow_func = + DepthwiseConvMultiRow::Run; if (stride_width == 2) { - conv_multirow_func = DepthwiseConvMultiRow<2, 2>::Run; + conv_multirow_func = DepthwiseConvMultiRow::Run; } // Allocate maximum memory needed for shuffled input. @@ -3689,8 +3704,8 @@ inline void DepthwiseConv3x3Filter( int32 end_y = row_end; if (pad_width == 1 && pad_height == 1) { - DepthwiseConvHandlePadding(input_ptr, filter_data, bias_data, output_ptr, - params); + DepthwiseConvHandlePadding( + input_ptr, filter_data, bias_data, output_ptr, params); // Update extents now that the edges have been handled. out_x = 1; ",0,train e5dcaf921cf9feefd42b2ab176590c696b3b0285,tensorflow/tensorflow,"Fix #15900 (#16154) - Added `save_checkpoint_steps` attribute to `MonitoredTrainingSession`. If both `save_checkpoint_steps` and `save_checkpoint_secs` are both `None` then default saver is disabled. Default is `save_checkpoint_secs=600` - Added `test_save_checkpoint_steps` - Updated golden file",monitored_session.py,"@@ -281,13 +281,14 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name scaffold=None, hooks=None, chief_only_hooks=None, - save_checkpoint_secs=600, + save_checkpoint_secs=USE_DEFAULT, save_summaries_steps=USE_DEFAULT, save_summaries_secs=USE_DEFAULT, config=None, stop_grace_period_secs=120, log_step_count_steps=100, - max_wait_secs=7200): + max_wait_secs=7200, + save_checkpoint_steps=USE_DEFAULT): """"""Creates a `MonitoredSession` for training. For a chief, this utility sets proper session initializer/restorer. It also @@ -310,8 +311,10 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name chief_only_hooks: list of `SessionRunHook` objects. Activate these hooks if `is_chief==True`, ignore otherwise. save_checkpoint_secs: The frequency, in seconds, that a checkpoint is saved - using a default checkpoint saver. If `save_checkpoint_secs` is set to - `None`, then the default checkpoint saver isn't used. + using a default checkpoint saver. If both `save_checkpoint_steps` and + `save_checkpoint_secs` are set to `None`, then the default checkpoint + saver isn't used. If both are provided, then only `save_checkpoint_secs` + is used. Default 600. save_summaries_steps: The frequency, in number of global steps, that the summaries are written to disk using a default summary saver. If both `save_summaries_steps` and `save_summaries_secs` are set to `None`, then @@ -330,6 +333,11 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name become available. This should be kept relatively short to help detect incorrect code, but sometimes may need to be increased if the chief takes a while to start up. + save_checkpoint_steps: The frequency, in number of global steps, that a + checkpoint is saved using a default checkpoint saver. If both + `save_checkpoint_steps` and `save_checkpoint_secs` are set to `None`, then + the default checkpoint saver isn't used. If both are provided, then only + `save_checkpoint_secs` is used. Default not enabled. Returns: A `MonitoredSession` object. @@ -342,6 +350,15 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name elif save_summaries_steps == USE_DEFAULT: save_summaries_steps = None + if save_checkpoint_steps == USE_DEFAULT and \ + save_checkpoint_secs == USE_DEFAULT: + save_checkpoint_steps = None + save_checkpoint_secs = 600 + elif save_checkpoint_secs == USE_DEFAULT: + save_checkpoint_secs = None + elif save_checkpoint_steps == USE_DEFAULT: + save_checkpoint_steps = None + scaffold = scaffold or Scaffold() if not is_chief: session_creator = WorkerSessionCreator( @@ -374,9 +391,13 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name save_steps=save_summaries_steps, save_secs=save_summaries_secs, output_dir=checkpoint_dir)) - if save_checkpoint_secs and save_checkpoint_secs > 0: + if (save_checkpoint_secs and save_checkpoint_secs > 0) or ( + save_checkpoint_steps and save_checkpoint_steps > 0): all_hooks.append(basic_session_run_hooks.CheckpointSaverHook( - checkpoint_dir, save_secs=save_checkpoint_secs, scaffold=scaffold)) + checkpoint_dir, + save_steps=save_checkpoint_steps, + save_secs=save_checkpoint_secs, + scaffold=scaffold)) if hooks: all_hooks.extend(hooks) ",0,train e5dcaf921cf9feefd42b2ab176590c696b3b0285,tensorflow/tensorflow,"Fix #15900 (#16154) - Added `save_checkpoint_steps` attribute to `MonitoredTrainingSession`. If both `save_checkpoint_steps` and `save_checkpoint_secs` are both `None` then default saver is disabled. Default is `save_checkpoint_secs=600` - Added `test_save_checkpoint_steps` - Updated golden file",monitored_session_test.py,"@@ -282,6 +282,42 @@ class MonitoredTrainingSessionTest(test.TestCase): is_chief=True, checkpoint_dir=logdir) as session: self.assertEqual(2, session.run(gstep)) + def test_save_checkpoint_steps(self): + logdir = _test_dir(self.get_temp_dir(), 'test_save_checkpoint_steps') + with ops.Graph().as_default(): + gstep = variables_lib.get_or_create_global_step() + new_gstep = state_ops.assign_add(gstep, 1) + with monitored_session.MonitoredTrainingSession( + is_chief=True, + checkpoint_dir=logdir, + save_checkpoint_steps=100, + log_step_count_steps=10) as session: + for _ in range(100): + session.run(new_gstep) + # A restart will find the checkpoint and recover automatically. + with monitored_session.MonitoredTrainingSession( + is_chief=True, checkpoint_dir=logdir) as session: + self.assertEqual(100, session.run(gstep)) + + def test_save_checkpoint_secs(self): + logdir = _test_dir(self.get_temp_dir(), 'test_save_checkpoint_secs') + with ops.Graph().as_default(): + gstep = variables_lib.get_or_create_global_step() + new_gstep = state_ops.assign_add(gstep, 1) + with monitored_session.MonitoredTrainingSession( + is_chief=True, + checkpoint_dir=logdir, + save_checkpoint_secs=0.1, + log_step_count_steps=10) as session: + session.run(new_gstep) + time.sleep(0.2) + for _ in range(10): + session.run(new_gstep) + # A restart will find the checkpoint and recover automatically. + with monitored_session.MonitoredTrainingSession( + is_chief=True, checkpoint_dir=logdir) as session: + self.assertEqual(11, session.run(gstep)) + def test_summaries_steps(self): logdir = _test_dir(self.get_temp_dir(), 'test_summaries_steps') with ops.Graph().as_default(): ",0,train 46f86abb7bd15989b88f69e6027d867718675789,tensorflow/tensorflow,"[XLA] Fix a bug in SplitF64ToF32 Overflows in SplitF64ToF32 could result in non-finite lower components which, when reconstructed, would result in NaN. PiperOrigin-RevId: 343889763 Change-Id: Ie4dffd64738a22c4bc2377a40e4760cfe776e95a",util.cc,"@@ -367,15 +367,15 @@ string SanitizeFileName(string file_name) { // precision, Numerische Mathematik, vol. 18, pp. 224–242, 1971. std::pair SplitF64ToF32(double x) { const float x_f32 = static_cast(x); - // Early return if x is an infinity or NaN. - if (!std::isfinite(x)) { - return std::make_pair(x_f32, 0.0f); - } - // Only values within the range of F32 are supported, unless it is infinity. - // Small values with large negative exponents would be rounded to zero. + // Early return if x is an infinity or NaN. if (!std::isfinite(x_f32)) { - LOG(WARNING) << ""Out of range F64 constant detected: "" << x; + // Only values within the range of F32 are supported, unless it is infinity. + // Small values with large negative exponents would be rounded to zero. + if (std::isfinite(x)) { + LOG(WARNING) << ""Out of range F64 constant detected: "" << x; + } + return std::make_pair(x_f32, 0.0f); } // The high float is simply the double rounded to the nearest float. Because ",0,train 46f86abb7bd15989b88f69e6027d867718675789,tensorflow/tensorflow,"[XLA] Fix a bug in SplitF64ToF32 Overflows in SplitF64ToF32 could result in non-finite lower components which, when reconstructed, would result in NaN. PiperOrigin-RevId: 343889763 Change-Id: Ie4dffd64738a22c4bc2377a40e4760cfe776e95a",util_test.cc,"@@ -126,5 +126,13 @@ TEST(UtilTest, RoundTripFpToString) { ""-nan""); } +TEST(UtilTest, SplitF64ToF32) { + // Overflowing the F32 exponent in SplitF64ToF32 should result in a pair of + // [∞,0]. + EXPECT_EQ(SplitF64ToF32(std::numeric_limits::max()).first, + std::numeric_limits::infinity()); + EXPECT_EQ(SplitF64ToF32(std::numeric_limits::max()).second, 0.0f); +} + } // namespace } // namespace xla ",0,train b5358aa31cf6f7e80def9fbaea0513d04b2891b8,tensorflow/tensorflow,"Do not accumulate loop invariants in while_v2. PiperOrigin-RevId: 258199946",while_v2_test.py,"@@ -225,15 +225,6 @@ class WhileV2Test(test.TestCase, parameterized.TestCase): train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) train_op.append(outputs[0]) - def GetOptimizedGraph(): - mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph()) - config = config_pb2.ConfigProto() - config.graph_options.rewrite_options.CopyFrom( - rewriter_config_pb2.RewriterConfig( - constant_folding=rewriter_config_pb2.RewriterConfig.OFF, - memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL)) - return tf_optimizer.OptimizeGraph(config, mg) - g = GetOptimizedGraph() # TODO(b/136034023): while_v2 adds an extra loop_counter which is not pruned # away, causing an extra Enter node. @@ -267,6 +258,30 @@ class WhileV2Test(test.TestCase, parameterized.TestCase): def testPruningV2(self): self._testPruning() + @parameterized.named_parameters( + (""V1"", control_flow_ops.while_loop, ""StackPushV2""), + (""V2"", while_loop_v2, ""TensorListPushBack""), + ) + @test_util.run_deprecated_v1 + def testDoNotAccumulateInvariants(self, while_loop_fn, push_op): + # Tests that loop invariants, i.e., tensors that are ""captured"" by the + # while loop and not passed as loop variables are not accumulated in + # gradient computation. + v = constant_op.constant(5.0, name=""v"") + + r = while_loop_fn( + lambda _: True, lambda x: v * x, [1.0], maximum_iterations=5) + + output = gradients_impl.gradients(r, v)[0] + train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) + train_op.append(output) + + g = GetOptimizedGraph() + # The gradient for v * x requires the value of both v and x. Since v is a + # loop invariant it is not accumulated so we have just one accumulator for + # x. + self.assertLen([n for n in g.node if n.op == push_op], 1) + @test_util.run_deprecated_v1 def testCaptureExternalTensorInCond(self): x = constant_op.constant(2.) @@ -522,5 +537,15 @@ def ScalarShape(): return ops.convert_to_tensor([], dtype=dtypes.int32) +def GetOptimizedGraph(): + mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph()) + config = config_pb2.ConfigProto() + config.graph_options.rewrite_options.CopyFrom( + rewriter_config_pb2.RewriterConfig( + constant_folding=rewriter_config_pb2.RewriterConfig.OFF, + memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL)) + return tf_optimizer.OptimizeGraph(config, mg) + + if __name__ == ""__main__"": test.main() ",0,train b5358aa31cf6f7e80def9fbaea0513d04b2891b8,tensorflow/tensorflow,"Do not accumulate loop invariants in while_v2. PiperOrigin-RevId: 258199946",while_v2.py,"@@ -470,6 +470,16 @@ def _create_grad_func(ys, xs, grads, cond_graph, body_graph, name, while_op, counter = constant_op.constant( 0, dtype=total_iters.dtype, name=""grad_counter"") + # Build frozen sets so that we do not have linear time lookups in + # `_is_loop_invariant`. Note: `body_graph.inputs` and `body_graph.outputs` + # may get updated during gradient computation because we add accumulators to + # the forward op. However, those are not loop invariants so wouldn't affect + # the output of `_is_loop_invariant`. Also we would never attempt to capture + # those accumulators so `_is_loop_invariant` should never receive those new + # tensors as args. + body_graph_inputs = frozenset(body_graph.inputs) + body_graph_outputs = frozenset(body_graph.outputs) + args = [counter, maximum_iterations, total_iters] + list(grads) # Note: The returned function does not have `args` in the list of # `external_captures`. @@ -478,18 +488,28 @@ def _create_grad_func(ys, xs, grads, cond_graph, body_graph, name, while_op, lambda *args: _grad_fn(ys, xs, args, body_graph), args, {}, func_graph=_WhileBodyGradFuncGraph(name, cond_graph, body_graph, - maximum_iterations, while_op)) - - # Add the popped accumulators to the list of outputs. - for internal_capture in grad_func_graph.internal_captures: + maximum_iterations, while_op, + body_graph_inputs, body_graph_outputs)) + + # Update the list of outputs with tensors corresponding to the captured + # tensors. We capture 3 types of tensors when building the grad fn: + # 1. Accumulators for forward graph intermediates which are not loop + # invariants. The outputs corresponding to these are populated in + # `popped_tensor_lists` by `_WhileBodyGradFuncGraph`. + # 2. Resources, which are output as is. + # 3. Forward graph loop invariants, which are output as is. + for external_capture, internal_capture in grad_func_graph.captures.items(): if internal_capture in grad_func_graph.popped_tensor_lists: new_output = grad_func_graph.popped_tensor_lists[internal_capture] - elif internal_capture.dtype == dtypes.resource: + elif (internal_capture.dtype == dtypes.resource or _is_loop_invariant( + external_capture, body_graph_inputs, body_graph_outputs)): new_output = internal_capture else: - raise ValueError(""Tensor %s is in list of internal_captures but is"" - "" neither a resource nor is in popped_tensor_lists."" % - str(internal_capture)) + raise ValueError(""Tensor %s which captures %s is in list of "" + ""internal_captures but is not a resource, is not in "" + ""popped_tensor_lists and does not capture a loop "" + ""invariant."" % + (str(internal_capture), str(external_capture))) grad_func_graph.outputs.append(new_output) grad_func_graph.structured_outputs.append(new_output) @@ -562,7 +582,7 @@ def _resolve_grad_captures(body_graph, body_grad_graph, while_op): # graph or a captured resource variable (note that input gradients are # regular non-captured inputs). if t.graph == body_graph: - # Captured accumulator + # Captured accumulator or loop invariant. t = while_op.outputs[t.graph.outputs.index(t)] # Note: We rely on the capturing logic of the gradient While op graph to # correctly capture the tensors in `body_graph.outer_graph`. Both cond_v2 @@ -715,7 +735,8 @@ class _WhileBodyGradFuncGraph(util.WhileBodyFuncGraph): """""" def __init__(self, name, forward_cond_graph, forward_body_graph, - maximum_iterations, forward_while_op): + maximum_iterations, forward_while_op, body_graph_inputs, + body_graph_outputs): super(_WhileBodyGradFuncGraph, self).__init__(name) self.empty_tensor_lists = [] self.popped_tensor_lists = {} @@ -725,6 +746,11 @@ class _WhileBodyGradFuncGraph(util.WhileBodyFuncGraph): self._forward_cond_graph = forward_cond_graph self._maximum_iterations = maximum_iterations self._forward_while_op = forward_while_op + # Only for use in `_is_loop_invariant`. These are not updated when + # additional tensors are added to `forward_body_graph.inputs` and + # `forward_body_graph.outputs` in `_capture_helper`. + self._forward_graph_inputs = body_graph_inputs + self._forward_graph_outputs = body_graph_outputs # Dict from forward intermediate tensor to its indirectly captured tensor # in this graph. Indirect capturing happens in two ways: # 1. For non-resource tensors we capture their accumulators from the forward @@ -781,6 +807,15 @@ class _WhileBodyGradFuncGraph(util.WhileBodyFuncGraph): if tensor.dtype == dtypes.resource: return self._resource_capture_helper(tensor) + # No need to accumulate loop invariants. Capture them directly. + # The captured tensor gets resolved to the corresponding while output in + # `_resolve_grad_captures`. + if _is_loop_invariant(tensor, self._forward_graph_inputs, + self._forward_graph_outputs): + captured_tensor = super(_WhileBodyGradFuncGraph, + self)._capture_helper(tensor, name) + return captured_tensor + # Create or find an existing accumulator output for `tensor` in the forward # graph, and fetch from this accumulator in the gradient graph to get the # raw intermediate value. @@ -956,4 +991,8 @@ def _build_accumulator_name(tensor): # Tensor name may be of the form ""pow/y:0"". Name scope does not allow "":"". return ""{}/accumulator"".format(tensor.name).replace("":"", ""_"") + +def _is_loop_invariant(tensor, inputs, outputs): + return tensor in inputs and tensor in outputs + # pylint: enable=protected-access ",0,train 604988b5d4e8cec6564db6502e6e40eefac8fc67,tensorflow/tensorflow,"Add operator overloads to AutoCastVariable. The code was copied from DistributionStrategy at https://github.com/tensorflow/tensorflow/blob/81acfa851ecf413df02c6bdf4795630524f2f859/tensorflow/python/distribute/values.py#L401 with slight modifications. PiperOrigin-RevId: 256469842",autocast_variable.py,"@@ -148,8 +148,63 @@ class AutoCastVariable(trackable.Trackable): """"""Pass resource_variable_ops.is_resource_variable check."""""" pass - # TODO(reedwm): Define operator overloads. - + # Operator overloads: + # Note we only overload operators that support floating-point types, as + # non-float variables cannot be wrapped with an AutoCastVariable. + + # pylint: disable=multiple-statements + def __add__(self, o): return self.value() + o + def __radd__(self, o): return o + self.value() + def __sub__(self, o): return self.value() - o + def __rsub__(self, o): return o - self.value() + def __mul__(self, o): return self.value() * o + def __rmul__(self, o): return o * self.value() + def __truediv__(self, o): return self.value() / o + def __rtruediv__(self, o): return o / self.value() + def __floordiv__(self, o): return self.value() // o + + def __rfloordiv__(self, o): return o // self.value() + def __mod__(self, o): return self.value() % o + def __rmod__(self, o): return o % self.value() + def __lt__(self, o): return self.value() < o + def __le__(self, o): return self.value() <= o + def __gt__(self, o): return self.value() > o + def __ge__(self, o): return self.value() >= o + def __getitem__(self, o): return self.value()[o] + def __pow__(self, o, modulo=None): return pow(self.value(), o, modulo) + def __rpow__(self, o): return pow(o, self.value()) + def __neg__(self): return -self.value() + def __abs__(self): return abs(self.value()) + + def __div__(self, o): + try: + return self.value().__div__(o) + except AttributeError: + # See https://docs.python.org/3/library/constants.html#NotImplemented + return NotImplemented + + def __rdiv__(self, o): + try: + return self.value().__rdiv__(o) + except AttributeError: + # See https://docs.python.org/3/library/constants.html#NotImplemented + return NotImplemented + + def __matmul__(self, o): + try: + return self.value().__matmul__(o) + except AttributeError: + # See https://docs.python.org/3/library/constants.html#NotImplemented + return NotImplemented + + def __rmatmul__(self, o): + try: + return self.value().__rmatmul__(o) + except AttributeError: + # See https://docs.python.org/3/library/constants.html#NotImplemented + return NotImplemented + + # pylint: enable=multiple-statements ops.register_tensor_conversion_function( AutoCastVariable, AutoCastVariable._dense_var_to_tensor) # pylint:disable=protected-access ",0,train 604988b5d4e8cec6564db6502e6e40eefac8fc67,tensorflow/tensorflow,"Add operator overloads to AutoCastVariable. The code was copied from DistributionStrategy at https://github.com/tensorflow/tensorflow/blob/81acfa851ecf413df02c6bdf4795630524f2f859/tensorflow/python/distribute/values.py#L401 with slight modifications. PiperOrigin-RevId: 256469842",autocast_variable_test.py,"@@ -97,30 +97,46 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters(*TESTCASES) def test_operator_overloads(self, distribute): with get_distribute_scope(distribute): - x = get_var(1., dtypes.float32) - x = get_autocast_var(x, distribute) - self.evaluate(x.initializer) - - v1 = constant_op.constant(2., dtype=dtypes.float32) - v2 = constant_op.constant(2., dtype=dtypes.float16) - - # Because autocast variables do not yet define operator overloads, the - # operator is defined by the non-variable tensor - - # Test variable as the LHS. Currently, this is not supported with - # distributed autocast variables - if not distribute: - self.assertEqual(self.evaluate(x + v1), 3.) - - x._read_dtype = dtypes.float16 - self.assertEqual(self.evaluate(x + v2), 3.) - - # Test variable as the RHS - x._read_dtype = dtypes.float32 - self.assertEqual(self.evaluate(v1 + x), 3.) - - x._read_dtype = dtypes.float16 - self.assertEqual(self.evaluate(v2 + x), 3.) + for read_dtype in (dtypes.float32, dtypes.float16): + x = get_var(7., dtypes.float32) + x = get_autocast_var(x, distribute) + x._read_dtype = read_dtype + self.evaluate(x.initializer) + self.assertAlmostEqual(8, self.evaluate(x + 1)) + self.assertAlmostEqual(10, self.evaluate(3 + x)) + self.assertAlmostEqual(14, self.evaluate(x + x)) + self.assertAlmostEqual(5, self.evaluate(x - 2)) + self.assertAlmostEqual(6, self.evaluate(13 - x)) + self.assertAlmostEqual(0, self.evaluate(x - x)) + self.assertAlmostEqual(14, self.evaluate(x * 2)) + self.assertAlmostEqual(21, self.evaluate(3 * x)) + self.assertAlmostEqual(49, self.evaluate(x * x)) + self.assertAlmostEqual(3.5, self.evaluate(x / 2)) + self.assertAlmostEqual(1.5, self.evaluate(10.5 / x)) + self.assertAlmostEqual(3, self.evaluate(x // 2)) + self.assertAlmostEqual(2, self.evaluate(15 // x)) + if read_dtype == dtypes.float32: + # The ""mod"" operator does not support float16 + self.assertAlmostEqual(1, self.evaluate(x % 2)) + self.assertAlmostEqual(2, self.evaluate(16 % x)) + self.assertTrue(self.evaluate(x < 12)) + self.assertTrue(self.evaluate(x <= 12)) + self.assertFalse(self.evaluate(x > 12)) + self.assertFalse(self.evaluate(x >= 12)) + self.assertFalse(self.evaluate(12 < x)) + self.assertFalse(self.evaluate(12 <= x)) + self.assertTrue(self.evaluate(12 > x)) + self.assertTrue(self.evaluate(12 >= x)) + self.assertAlmostEqual(343, self.evaluate(pow(x, 3)), places=4) + self.assertAlmostEqual(128, self.evaluate(pow(2, x)), places=4) + self.assertAlmostEqual(-7, self.evaluate(-x)) + self.assertAlmostEqual(7, self.evaluate(abs(x))) + + x = get_var([7, 8, 9], dtypes.float32) + x = get_autocast_var(x, distribute) + x._read_dtype = read_dtype + self.evaluate(x.initializer) + self.assertEqual(self.evaluate(x[1]), 8) @parameterized.named_parameters(*TESTCASES) def test_assign(self, distribute): ",0,train 0ebd45086c7a9e412c4102f42004b6c02578fc49,tensorflow/tensorflow,fix mistype,generic_utils_test.py,"@@ -313,8 +313,8 @@ class SerializeKerasObjectTest(test.TestCase): def test_serialize_type_object_initializer(self): layer = keras.layers.Dense( 1, - kernel_initializer=keras.initializer.ones, - bias_initializer=keras.initializer.zeros) + kernel_initializer=keras.initializers.ones, + bias_initializer=keras.initializers.zeros) config = keras.layers.serialize(layer) self.assertEqual( config['config']['bias_initializer'], ",0,train 952c2ab177ece6e1d2ddeb3d59f0f5d617532dae,tensorflow/tensorflow,"MultiWorkerTutorialTest: Add the model saving and loading parts. The tutorial will use the same pieces of code to demonstrate how to save and load (with MWMS) in a multi-worker environment. PiperOrigin-RevId: 319902843 Change-Id: If3e00764c0b5f545da0e003f9c14a88be48a1ff6",multi_worker_tutorial_test.py,"@@ -33,6 +33,7 @@ from tensorflow.python.framework import errors_impl from tensorflow.python.framework import test_util from tensorflow.python.keras.datasets import mnist from tensorflow.python.keras.optimizer_v2 import gradient_descent +from tensorflow.python.lib.io import file_io from tensorflow.python.platform import test from tensorflow.python.util import nest @@ -104,7 +105,7 @@ class MultiWorkerTutorialTest(parameterized.TestCase, test.TestCase): num_workers = 4 - def proc_func(): + def proc_func(model_path): global_batch_size = per_worker_batch_size * num_workers strategy = collective_all_reduce_strategy.CollectiveAllReduceStrategy() with strategy.scope(): @@ -127,10 +128,47 @@ class MultiWorkerTutorialTest(parameterized.TestCase, test.TestCase): steps_per_epoch=20, callbacks=callbacks) + def _is_chief(task_type, task_id): + return task_type == 'chief' or (task_type == 'worker' and task_id == 0) + + def _get_temp_dir(dirpath, task_id): + base_dirpath = 'workertemp_' + str(task_id) + temp_dir = os.path.join(dirpath, base_dirpath) + file_io.recursive_create_dir_v2(temp_dir) + return temp_dir + + def write_filepath(filepath, task_type, task_id): + dirpath = os.path.dirname(filepath) + base = os.path.basename(filepath) + if not _is_chief(task_type, task_id): + dirpath = _get_temp_dir(dirpath, task_id) + return os.path.join(dirpath, base) + + task_type, task_id = (strategy.cluster_resolver.task_type, + strategy.cluster_resolver.task_id) + write_model_path = write_filepath(model_path, task_type, task_id) + + multi_worker_model.save(write_model_path) + if not _is_chief(task_type, task_id): + file_io.delete_recursively_v2(os.path.dirname(write_model_path)) + + # Make sure chief finishes saving before non-chief's assertions. + multi_process_runner.barrier().wait() + + if not file_io.file_exists(model_path): + raise RuntimeError() + if file_io.file_exists(write_model_path) != _is_chief(task_type, task_id): + raise RuntimeError() + + loaded_model = keras.saving.save.load_model(model_path) + loaded_model.fit(multi_worker_dataset, epochs=2, steps_per_epoch=20) + + model_path = os.path.join(self.get_temp_dir(), 'ckpt.tf') with test_util.skip_if_error(self, errors_impl.UnavailableError): mpr_result = multi_process_runner.run( proc_func, multi_worker_test_base.create_cluster_spec(num_workers=num_workers), + args=(model_path,), list_stdout=True) def extract_accuracy(worker_id, input_string): ",0,train b949f47bb463ea38ddaa6f6fc0a3f94e05e8b646,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@d65c32fb41b0 Updates LLVM usage to match [d65c32fb41b0](https://github.com/llvm/llvm-project/commit/d65c32fb41b0) PiperOrigin-RevId: 375537865 Change-Id: I91152346f59e770926a2f8ace1c3241094ce1f90",dot_op_emitter.cc,"@@ -24,8 +24,7 @@ limitations under the License. #include ""llvm/IR/Module.h"" #include ""llvm/IR/Value.h"" #include ""mlir/Dialect/Linalg/Transforms/CodegenStrategy.h"" // from @llvm-project -#include ""mlir/Dialect/StandardOps/EDSC/Intrinsics.h"" // from @llvm-project -#include ""mlir/EDSC/Builders.h"" // from @llvm-project +#include ""mlir/Dialect/StandardOps/Utils/Utils.h"" // from @llvm-project #include ""mlir/IR/Builders.h"" // from @llvm-project #include ""mlir/IR/BuiltinOps.h"" // from @llvm-project #include ""mlir/IR/MLIRContext.h"" // from @llvm-project @@ -305,26 +304,6 @@ Status DotOpEmitter::EmitLinalgMatmul() { llvm::SmallVector iteratorTypes( parallel_exprs.size(), toString(mlir::IteratorType::Parallel)); iteratorTypes.push_back(toString(mlir::IteratorType::Reduction)); - /// Helper struct to build simple arithmetic quantities with minimal - /// type inference support. - /// TODO: reuse the core abstraction once it is in a reusable location. - struct ArithBuilder { - ArithBuilder(mlir::OpBuilder& b, mlir::Location loc) - : b(b), loc(loc) {} - mlir::Value add(mlir::Value lhs, mlir::Value rhs) { - if (lhs.getType().isa()) - return b.create(loc, lhs, rhs); - return b.create(loc, lhs, rhs); - } - mlir::Value mul(mlir::Value lhs, mlir::Value rhs) { - if (lhs.getType().isa()) - return b.create(loc, lhs, rhs); - return b.create(loc, lhs, rhs); - } - - mlir::OpBuilder& b; - mlir::Location loc; - }; builder->create( function.getLoc(), /*inputs=*/mlir::ValueRange{b, c}, @@ -334,7 +313,7 @@ Status DotOpEmitter::EmitLinalgMatmul() { {b_exprs, c_exprs, parallel_exprs}), /*iteratorTypes=*/iteratorTypes, [](mlir::OpBuilder& b, mlir::Location loc, mlir::ValueRange args) { - ArithBuilder ab(b, loc); + mlir::ArithBuilder ab(b, loc); mlir::Value mul = ab.mul(args[0], args[1]); mlir::Value add = ab.add(mul, args[2]); b.create(loc, add); ",0,train eb03410ac0614ad25a7b0b487b7554affaa12ceb,tensorflow/tensorflow,added double braces to initialization,expected_output_data.h,"@@ -16,6 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_NETWORK_TESTER_EXPECTED_OUTPUT_DATA_H_ #define TENSORFLOW_LITE_MICRO_EXAMPLES_NETWORK_TESTER_EXPECTED_OUTPUT_DATA_H_ -static unsigned char expected_output_data[1][4] = {6, 8, 14, 16}; +static unsigned char expected_output_data[1][4] = {{6, 8, 14, 16}}; #endif // TENSORFLOW_LITE_MICRO_EXAMPLES_NETWORK_TESTER_EXPECTED_OUTPUT_DATA_H_ ",0,train 3bfd77c175160ca0d4edded4d7861c1f8abfa929,tensorflow/tensorflow,"[TF:XLA] Re-enable ResourceApplyAddSign of 16 bit floats This was blocked on an LLVM bug that was resolved in LLVM r336227. I had to increase the fp tolerance to make the test pass. x: array([ 0.925781, 1.927734], dtype=float16) y: array([ 0.927734, 1.927734], dtype=float16) PiperOrigin-RevId: 203808682",addsign_test.py,"@@ -64,9 +64,6 @@ class AddSignTest(xla_test.XLATestCase): alpha=1.0, beta=0.9): for dtype in self.float_types: - # TODO(b/111123982): remove once the bug is fixed. - if dtype == dtypes.float16: - continue with self.test_session(), self.test_scope(): # Initialize variables for numpy implementation. m0, m1 = 0.0, 0.0 @@ -128,7 +125,8 @@ class AddSignTest(xla_test.XLATestCase): ) # Validate updated params - self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType( + var0_np, var0.eval(), half_rtol=1e-2) self.assertAllCloseAccordingToType(var1_np, var1.eval()) def testDense(self): ",0,train 3bfd77c175160ca0d4edded4d7861c1f8abfa929,tensorflow/tensorflow,"[TF:XLA] Re-enable ResourceApplyAddSign of 16 bit floats This was blocked on an LLVM bug that was resolved in LLVM r336227. I had to increase the fp tolerance to make the test pass. x: array([ 0.925781, 1.927734], dtype=float16) y: array([ 0.927734, 1.927734], dtype=float16) PiperOrigin-RevId: 203808682",training_ops.cc,"@@ -719,9 +719,7 @@ class ResourceApplyAddSign : public ResourceApplySignBase { return alpha + decay; } }; -// TODO(b/111123982): Use kFloatTypes once the bug is fixed. -REGISTER_XLA_OP(Name(""ResourceApplyAddSign"") - .TypeConstraint(""T"", {DT_FLOAT, DT_DOUBLE, DT_BFLOAT16}), +REGISTER_XLA_OP(Name(""ResourceApplyAddSign"").TypeConstraint(""T"", kFloatTypes), ResourceApplyAddSign); class ResourceApplyPowerSign : public ResourceApplySignBase { ",0,train e436475e805d259b5359f64aebba89a5b83e4aee,tensorflow/tensorflow,"[FLR] Use the correct `Options` when populating `Executor::Args`. My recent change modified how `Executor::Args` was populated and missed the fact that we rewrite the `Options` to add in a created `Rendezvous` object in some cases. This change correctly uses the rewritten `Options` in both cases. PiperOrigin-RevId: 220218606",function.cc,"@@ -1028,9 +1028,9 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle, } Executor::Args exec_args; - ExecutorArgsFromOptions(opts, frame, &exec_args); + ExecutorArgsFromOptions(run_opts, frame, &exec_args); - bool allow_dead_tensors = opts.allow_dead_tensors; + bool allow_dead_tensors = run_opts.allow_dead_tensors; item->exec->RunAsync( // Executor args exec_args, @@ -1085,7 +1085,7 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle, DCHECK(run_opts.runner != nullptr); Executor::Args exec_args; - ExecutorArgsFromOptions(opts, frame, &exec_args); + ExecutorArgsFromOptions(run_opts, frame, &exec_args); item->exec->RunAsync(exec_args, std::move(done)); } ",0,train 80347abfbff60817c86b6de62709f6d3d3da00ce,tensorflow/tensorflow,"Add 'tf_saved_model.under_construction' attribute in tf_saved_model dialect This change will introduce a new module attribute, `tf_saved_model.under_construction`, which indicates that the given IR hasn't completed the transformation. 1) This attribute will be added before promoting variables to tf.resource_name arguments. 2) Skip the test for allowing saved_model attributes only in the exported function if there is a `tf_saved_model.under_construction`. 3) This attribute will be removed when lifting variables. PiperOrigin-RevId: 319455102 Change-Id: Id1950898a0abc73c3b0ad0a5f25405ffb50958d9",tf_saved_model.cc,"@@ -298,6 +298,7 @@ static LogicalResult VerifySavedModelModule( LogicalResult VerifyExportedFunc(FuncOp func) { bool reached_bound_inputs = false; + auto module = func.getParentOfType(); for (int i = 0, e = func.getNumArguments(); i < e; i++) { if (func.getArgAttr(i, ""tf_saved_model.bound_input"")) { reached_bound_inputs = true; @@ -312,7 +313,9 @@ LogicalResult VerifyExportedFunc(FuncOp func) { continue; } if (func.getArgAttr(i, ""tf.resource_name"")) { - continue; + if (module.getAttr(""tf_saved_model.under_construction"")) continue; + return func.emitError() << ""'tf.resource_name' attribute is not allowed "" + ""unless it is being under construction""; } return func.emitError() << ""all arguments should have 'tf_saved_model.index_path', "" @@ -371,6 +374,9 @@ LogicalResult TensorFlowSavedModelDialect::verifyOperationAttribute( } return VerifySavedModelModule(module, this); } + if (named_attr.first == ""tf_saved_model.under_construction"") { + return success(); + } return op->emitError() << ""unknown tf_saved_model dialect attribute '"" << named_attr.first << ""'""; ",0,train 80347abfbff60817c86b6de62709f6d3d3da00ce,tensorflow/tensorflow,"Add 'tf_saved_model.under_construction' attribute in tf_saved_model dialect This change will introduce a new module attribute, `tf_saved_model.under_construction`, which indicates that the given IR hasn't completed the transformation. 1) This attribute will be added before promoting variables to tf.resource_name arguments. 2) Skip the test for allowing saved_model attributes only in the exported function if there is a `tf_saved_model.under_construction`. 3) This attribute will be removed when lifting variables. PiperOrigin-RevId: 319455102 Change-Id: Id1950898a0abc73c3b0ad0a5f25405ffb50958d9",import_model.cc,"@@ -3399,9 +3399,11 @@ SavedModelSignatureDefImporter::ConvertSignatures() { mlir::OpBuilder builder(module_->getBodyRegion()); module_->setAttr(""tf_saved_model.semantics"", builder.getUnitAttr()); + module_->setAttr(""tf_saved_model.under_construction"", builder.getUnitAttr()); TF_RETURN_IF_ERROR(ExecutorDialectToFunctional()); TF_RETURN_IF_ERROR(RemoveVariablesInSessionInitializer()); TF_RETURN_IF_ERROR(LiftVariables()); + module_->removeAttr(""tf_saved_model.under_construction""); SortSavedModelModule(*module_); MarkSavedModelFunctionVisibility(*module_); ",0,train a286fad85e6ee8b8692a7db7ac5a5e5968f9e740,tensorflow/tensorflow,"Remove incorrect comment PiperOrigin-RevId: 245500211",mark_for_compilation_pass.cc,"@@ -1062,7 +1062,6 @@ StatusOr MarkForCompilationPassImpl::TryToContractEdge(Cluster* from, StatusOr MarkForCompilationPassImpl::TryToContractEdgesFrom( Cluster* cluster_from) { bool changed = false; - // Needs to be RPO because of shape consumer opt for (int to : cycles_graph_.Successors(cluster_from->cycles_graph_node_id())) { iteration_count_++; ",0,train e23aafa44fb804814d8d513a048ba5429e360e8c,tensorflow/tensorflow,"Use determinisitc, default-seeded random inputs for convolution autotuning. PiperOrigin-RevId: 251338008",cudnn_conv_algorithm_picker.cc,"@@ -266,8 +266,8 @@ void InitializeTypedBuffer(se::Stream* stream, se::DeviceMemory buffer, static std::vector* host_buffer = [] { // Use a large prime number to fragment the accesses. auto* ret = new std::vector(10069); - std::random_device rd; - std::mt19937 gen(rd()); + // Default-seeded random numbers. + std::mt19937 gen; for (auto& element : *ret) { using RandomType = typename std::conditional::value, float, ",0,train ca3addea1a508bdc6bc1ab2fc2f574fd69734877,tensorflow/tensorflow,"Fixes asan errors introduced due to cl/259085857 PiperOrigin-RevId: 259233558",object_detection_average_precision_stage.cc,"@@ -57,26 +57,26 @@ TfLiteStatus ObjectDetectionAveragePrecisionStage::Init() { } TfLiteStatus ObjectDetectionAveragePrecisionStage::Run() { - for (int i = 0; i < ground_truth_objects_->objects_size(); ++i) { - const int class_id = ground_truth_objects_->objects(i).class_id(); + for (int i = 0; i < ground_truth_objects_.objects_size(); ++i) { + const int class_id = ground_truth_objects_.objects(i).class_id(); if (class_id >= num_classes_) { LOG(ERROR) << ""Encountered invalid class ID: "" << class_id; return kTfLiteError; } ground_truth_object_vectors_[class_id].push_back(ConvertProtoToDetection( - ground_truth_objects_->objects(i), current_image_index_)); + ground_truth_objects_.objects(i), current_image_index_)); } - for (int i = 0; i < predicted_objects_->objects_size(); ++i) { - const int class_id = predicted_objects_->objects(i).class_id(); + for (int i = 0; i < predicted_objects_.objects_size(); ++i) { + const int class_id = predicted_objects_.objects(i).class_id(); if (class_id >= num_classes_) { LOG(ERROR) << ""Encountered invalid class ID: "" << class_id; return kTfLiteError; } predicted_object_vectors_[class_id].push_back(ConvertProtoToDetection( - predicted_objects_->objects(i), current_image_index_)); + predicted_objects_.objects(i), current_image_index_)); } current_image_index_++; ",0,train ca3addea1a508bdc6bc1ab2fc2f574fd69734877,tensorflow/tensorflow,"Fixes asan errors introduced due to cl/259085857 PiperOrigin-RevId: 259233558",object_detection_average_precision_stage.h,"@@ -42,17 +42,16 @@ class ObjectDetectionAveragePrecisionStage : public EvaluationStage { EvaluationStageMetrics LatestMetrics() override; // Call before Run(). - // Both protos must outlive the call to Run(). void SetEvalInputs(const ObjectDetectionResult& predicted_objects, const ObjectDetectionResult& ground_truth_objects) { - predicted_objects_ = &predicted_objects; - ground_truth_objects_ = &ground_truth_objects; + predicted_objects_ = predicted_objects; + ground_truth_objects_ = ground_truth_objects; } private: int num_classes_ = -1; - const ObjectDetectionResult* predicted_objects_; - const ObjectDetectionResult* ground_truth_objects_; + ObjectDetectionResult predicted_objects_; + ObjectDetectionResult ground_truth_objects_; int current_image_index_ = 0; // One inner vector per class for ground truth objects. ",0,train b2f092894012e9c2612cb46c332140f28d91ced2,tensorflow/tensorflow,"Add DeviceIndex xla op. DeviceIndex op: given a list of device names, this operation returns the index of the device this op runs. In the case of XLA, we are not executing on any device, we return the length of the list. PiperOrigin-RevId: 317740778 Change-Id: I0679aa0adc5508b83502eee0d2044584577ed5b4",mark_for_compilation_pass.cc,"@@ -1837,7 +1837,7 @@ absl::flat_hash_map>* GetWhitelistTable() { ""ConcatOffset"", ""Const"", ""MirrorPad"", ""Pack"", ""Pad"", ""PadV2"", ""Reverse"", ""ReverseV2"", ""ReverseSequence"", ""Slice"", ""Split"", ""SplitV"", ""StridedSlice"", ""StridedSliceGrad"", ""ResourceStridedSliceAssign"", - ""Tile"", ""Transpose"", ""InvertPermutation"", ""Unpack""}}}; + ""Tile"", ""Transpose"", ""InvertPermutation"", ""Unpack"", ""DeviceIndex""}}}; // clang-format on return result; } ",0,train b2f092894012e9c2612cb46c332140f28d91ced2,tensorflow/tensorflow,"Add DeviceIndex xla op. DeviceIndex op: given a list of device names, this operation returns the index of the device this op runs. In the case of XLA, we are not executing on any device, we return the length of the list. PiperOrigin-RevId: 317740778 Change-Id: I0679aa0adc5508b83502eee0d2044584577ed5b4",device_index_op.cc,"@@ -0,0 +1,51 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""absl/container/flat_hash_map.h"" +#include ""absl/strings/string_view.h"" +#include ""tensorflow/compiler/tf2xla/xla_helpers.h"" +#include ""tensorflow/compiler/tf2xla/xla_op_kernel.h"" +#include ""tensorflow/compiler/tf2xla/xla_op_registry.h"" +#include ""tensorflow/compiler/xla/client/client_library.h"" +#include ""tensorflow/compiler/xla/client/lib/arithmetic.h"" +#include ""tensorflow/compiler/xla/client/lib/constants.h"" +#include ""tensorflow/compiler/xla/client/lib/math.h"" +#include ""tensorflow/compiler/xla/client/xla_builder.h"" +#include ""tensorflow/core/framework/kernel_def_builder.h"" + +namespace tensorflow { +namespace { + +class DeviceIndexOp : public XlaOpKernel { + public: + explicit DeviceIndexOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr(""device_names"", &device_names_)); + } + + void Compile(XlaOpKernelContext* ctx) override { + // When compiling we are not executing on any physical device, so we return + // a sentinel value (size of the list of devices). + ctx->SetOutput( + 0, xla::ConstantR0(ctx->builder(), device_names_.size())); + } + + private: + std::vector device_names_; +}; + +REGISTER_XLA_OP(Name(""DeviceIndex""), DeviceIndexOp); + +} // namespace +} // namespace tensorflow ",0,train b2f092894012e9c2612cb46c332140f28d91ced2,tensorflow/tensorflow,"Add DeviceIndex xla op. DeviceIndex op: given a list of device names, this operation returns the index of the device this op runs. In the case of XLA, we are not executing on any device, we return the length of the list. PiperOrigin-RevId: 317740778 Change-Id: I0679aa0adc5508b83502eee0d2044584577ed5b4",control_flow_ops_test.py,"@@ -1274,6 +1274,26 @@ class ExecuteFnForDeviceTest(test_util.TensorFlowTestCase): self.assertEqual(6., self.evaluate(result)) self.assertEqual([2.], self.evaluate(grad)) + def testCompile(self): + if not test_util.is_gpu_available(): + return + + def cpu_fn(x): + return x + x + + def gpu_fn(x): + return x * x + + @def_function.function(experimental_compile=True) + def flexible_defun(a): + branches = {""CPU"": lambda: cpu_fn(a), ""GPU"": lambda: gpu_fn(a)} + return control_flow_ops.execute_fn_for_device(branches, lambda: cpu_fn(a)) + + # Always execute the default branch in xla compilation case. + a = array_ops.constant(3.) + r = flexible_defun(a) + self.assertEqual(6., self.evaluate(r)) + def testFallBack(self): def default_fn(x): ",0,train 65336d57f39903865f4f48ca5c5f791a87918f3d,tensorflow/tensorflow,"PFor: Support TensorLists in the while_loop converter when the condition is pfor-loop-variant Since they use internal stacking, they need to be accumulated differently. PiperOrigin-RevId: 382784481 Change-Id: I1628178d61e0f7a9158b0ee57d37244e45d93297",control_flow_ops_test.py,"@@ -1370,7 +1370,6 @@ class TensorListTest(PForTestCase): self._test_loop_fn(loop_fn, 2) - @test_util.enable_control_flow_v2 def test_tensor_list_reserve_while_loop(self): # Here a loop invariant TensorList is captured by a while_loop, which then # performs loop dependent operations on it, resulting in a loop variant @@ -1378,6 +1377,8 @@ class TensorListTest(PForTestCase): # while_loop. # We handle this particular case by forcing vectorization of # TensorListReserve operation. + v2_enabled = control_flow_v2_toggles.control_flow_v2_enabled() + control_flow_v2_toggles.enable_control_flow_v2() def loop_fn(i): handle = list_ops.tensor_list_reserve([], 2, dtypes.int32) @@ -1387,32 +1388,8 @@ class TensorListTest(PForTestCase): return list_ops.tensor_list_stack(out_handle, dtypes.int32) self._test_loop_fn(loop_fn, 2) - - @test_util.enable_control_flow_v2 - def test_tensor_list_while_loop_stacked_cond_stacked_list(self): - - def loop_fn(i): - handle = list_ops.tensor_list_from_tensor([20, 21, 22, 23, i], []) - _, out_handle = control_flow_ops.while_loop( - lambda j, _: j < i, - lambda j, h: (j + 1, list_ops.tensor_list_set_item(h, j, i)), - (0, handle)) - return list_ops.tensor_list_stack(out_handle, dtypes.int32) - - self._test_loop_fn(loop_fn, 5) - - @test_util.enable_control_flow_v2 - def test_tensor_list_while_loop_stacked_cond_unstacked_list(self): - - def loop_fn(i): - handle = list_ops.tensor_list_from_tensor([20, 21, 22, 23, 24], []) - _, out_handle = control_flow_ops.while_loop( - lambda j, _: j < i, - lambda j, h: (j + 1, list_ops.tensor_list_set_item(h, j, i)), - (0, handle)) - return list_ops.tensor_list_stack(out_handle, dtypes.int32) - - self._test_loop_fn(loop_fn, 5) + if not v2_enabled: + control_flow_v2_toggles.disable_control_flow_v2() def test_tensor_list_addn_already_stacked(self): ",0,test 65336d57f39903865f4f48ca5c5f791a87918f3d,tensorflow/tensorflow,"PFor: Support TensorLists in the while_loop converter when the condition is pfor-loop-variant Since they use internal stacking, they need to be accumulated differently. PiperOrigin-RevId: 382784481 Change-Id: I1628178d61e0f7a9158b0ee57d37244e45d93297",pfor.py,"@@ -88,23 +88,6 @@ def _variant_handle_data(t): return handle_data.shape_and_type -def _variant_type_id(t): - """"""Returns the full_type_pb2 type of `t`, or None if it is not available."""""" - if t.dtype != dtypes.variant: - return None - shapes_and_types = _variant_handle_data(t) - if shapes_and_types is None or not shapes_and_types: - # TODO(b/169968286): Identify all variant tensors (e.g. maps) and we can - # make this an error instead of assuming TensorLists have handle data. - return None # Presumed not a TensorList/Optional - return shapes_and_types[0].type.type_id - - -_INTERNAL_STACKING_TYPE_IDS = ( - full_type_pb2.TFT_ARRAY, - full_type_pb2.TFT_OPTIONAL) - - def _is_variant_with_internal_stacking(t): """"""Identifies variant tensors which pfor always maintains as scalars. @@ -116,8 +99,15 @@ def _is_variant_with_internal_stacking(t): Returns: True if `t` is a TensorList/Optional, False not, None if unknown. """""" - type_id = _variant_type_id(t) - return type_id in _INTERNAL_STACKING_TYPE_IDS + if t.dtype != dtypes.variant: + return False + shapes_and_types = _variant_handle_data(t) + if shapes_and_types is None or not shapes_and_types: + # TODO(b/169968286): Identify all variant tensors (e.g. maps) and we can + # make this an error instead of assuming TensorLists have handle data. + return None # Presumed not a TensorList/Optional + type_id = shapes_and_types[0].type.type_id + return type_id in (full_type_pb2.TFT_ARRAY, full_type_pb2.TFT_OPTIONAL) def _parse_variant_shapes_and_types(t): @@ -4536,60 +4526,11 @@ class WhileV2(object): with ops.name_scope(""while_init""): for inp in self._pfor_input.inputs: inputs.append(inp.t) - variant_type_id = _variant_type_id(inp.t) - if variant_type_id in _INTERNAL_STACKING_TYPE_IDS: - if variant_type_id != full_type_pb2.TFT_ARRAY: - raise NotImplementedError( - (""While loop conversion is only supported for TensorLists. Got "" - ""another variant {}, probably an optional. Please file a bug."") - .format(inp.t)) - # For TensorLists, the input format is: - # - # List[user_list_len, Tensor[loop_len, ...]] - # - # rather than the usual - # - # Tensor[loop_len, ...] - # - # The body of the loop will take and return lists in this ""internal - # vectorization"" format, so we want to keep it that way as much as - # possible. We'll accumulate finished iterations (only relevant for - # pfor-loop-variant while_loop conditions) in an accumulator with - # type: - # - # List[user_list_len, List[loop_len, Tensor[...]]] - # - # This means that each while_loop iteration, we'll iterate over the - # length of the TensorList, dividing done/remaining pfor loop indices - # and scattering the done indices into the inner nested list of the - # accumulator. - element_shape = list_ops.tensor_list_element_shape( - inp.t, dtypes.int32)[1:] - dtype = _parse_variant_shapes_and_types(inp.t)[0].dtype - - def _init_loop_body(index, output_ta): - output_ta = output_ta.write( - index, - list_ops.tensor_list_reserve(element_shape, loop_len, dtype)) - return index + 1, output_ta - - length = list_ops.tensor_list_length(inp.t) - output_ta = tensor_array_ops.TensorArray( - inp.t.dtype, # Variant; this is a nested TensorList - size=length, - dynamic_size=True, - infer_shape=False) - _, output_ta = control_flow_ops.while_loop( - lambda index, _: index < length, - _init_loop_body, - [0, output_ta]) - else: - output_ta = tensor_array_ops.TensorArray( + output_tas.append(tensor_array_ops.TensorArray( inp.t.dtype, size=loop_len, dynamic_size=False, - infer_shape=True) - output_tas.append(output_ta) + infer_shape=True)) # See documentation for __call__ for the structure of init_values. indices = ( math_ops.range(self._pfor.loop_len_vector[0]) @@ -4617,51 +4558,21 @@ class WhileV2(object): new_output_tas = [] for i, (inp, stacked) in enumerate(zip(inputs, inputs_stacked)): pass_through = i in self._body_pass_through_indices - if not pass_through and _variant_type_id(inp) == full_type_pb2.TFT_ARRAY: - shape_and_type = _parse_variant_shapes_and_types(inp)[0] - element_shape = list_ops.tensor_list_element_shape(inp, dtypes.int32) - user_list_len = list_ops.tensor_list_length(inp) - - def _split_vectorized_ta_element(index, new_inp, new_out_ta): - elem = list_ops.tensor_list_get_item(inp, index, shape_and_type.dtype, - element_shape) - if stacked: - done_elem, new_elem = data_flow_ops.dynamic_partition( - elem, conditions_int, 2) - new_inp = list_ops.tensor_list_set_item(new_inp, index, new_elem) - else: - done_elem = _stack(elem, [array_ops.size(done_indices)]).t - done_accum = new_out_ta.read(index) - done_accum = list_ops.tensor_list_scatter( - tensor=done_elem, indices=done_indices, input_handle=done_accum) - new_out_ta = new_out_ta.write(index, done_accum) - return index + 1, new_inp, new_out_ta - - length = list_ops.tensor_list_length(inp) - new_inp = list_ops.tensor_list_reserve( - tensor_shape.TensorShape([None]) - + tensor_shape.TensorShape(shape_and_type.shape)[1:], - user_list_len, shape_and_type.dtype) - _, new_inp, out_ta = control_flow_ops.while_loop( - lambda index, unused_new_inp, unused_new_out_ta: index < length, - _split_vectorized_ta_element, - [0, new_inp, output_tas[i]]) + # Partition the inputs. + if stacked: + done_inp, new_inp = data_flow_ops.dynamic_partition( + inp, conditions_int, 2) else: - # Partition the inputs. - if stacked: - done_inp, new_inp = data_flow_ops.dynamic_partition( - inp, conditions_int, 2) - else: - if not pass_through: - done_inp = _stack(inp, [array_ops.size(done_indices)]).t - new_inp = inp - - out_ta = output_tas[i] if not pass_through: - # Note that done_indices can be empty. done_inp should also be empty - # in that case. - out_ta = out_ta.scatter(done_indices, done_inp) + done_inp = _stack(inp, [array_ops.size(done_indices)]).t + new_inp = inp + new_inputs.append(new_inp) + out_ta = output_tas[i] + if not pass_through: + # Note that done_indices can be empty. done_inp should also be empty + # in that case. + out_ta = out_ta.scatter(done_indices, done_inp) new_output_tas.append(out_ta) assert len(new_output_tas) == len(output_tas) @@ -4862,37 +4773,7 @@ class WhileV2(object): outputs.append(init_values[i + 2]) else: ta = output_tas[i] - if _variant_type_id(inp) == full_type_pb2.TFT_ARRAY: - shape_and_type = _parse_variant_shapes_and_types(inp)[0] - length = list_ops.tensor_list_length(inp) - - # We have been accumulating values in a: - # - # List[user_list_len, List[loop_len, Tensor[...]]] - # - # We want to return an output in the same format as the input: - # - # List[user_list_len, Tensor[loop_len, ...]] - # - # So we need to loop over the list and stack its contents. - def _stack_loop_body(index, output_list): - current_value = ta.read(index) - output_list = list_ops.tensor_list_set_item( - output_list, index, - list_ops.tensor_list_stack( - current_value, shape_and_type.dtype)) - return index + 1, output_list - - output_list = list_ops.tensor_list_reserve( - tensor_shape.TensorShape(shape_and_type.shape), length, - shape_and_type.dtype) - _, output_list = control_flow_ops.while_loop( - lambda index, _: index < length, - _stack_loop_body, - [0, output_list]) - outputs.append(output_list) - else: - outputs.append(ta.stack()) + outputs.append(ta.stack()) else: outputs.append(inp) return outputs ",0,test 17b7d69ad4bfe3e51c4cee2a10fa24bd9048ec27,tensorflow/tensorflow,Removed Depricated API from the file.,vector_diffeomixture.py,"@@ -1060,5 +1060,5 @@ def softmax(x, axis, name=None): if axis_ is not None: axis = np.int(ndims + axis_ if axis_ < 0 else axis_) else: - axis = array_ops.where(axis < 0, ndims + axis, axis) + axis = array_ops.where_v2(axis < 0, ndims + axis, axis) return nn_ops.softmax(x, axis=axis) ",0,test a9429e942a261948f146f9b4a9fbaeab8598dadc,tensorflow/tensorflow,"Fix resize_bilinear type propagation This operator supports more than just float32 outputs. PiperOrigin-RevId: 259411764",propagate_array_data_types.cc,"@@ -55,7 +55,6 @@ void SetDataTypeForAllOutputs(Model* model, Operator* op, // Do the actual output data types propagation. switch (op->type) { case OperatorType::kDequantize: - case OperatorType::kResizeBilinear: // These operators unconditionally produce float outputs SetDataTypeForAllOutputs(model, op, ArrayDataType::kFloat); break; ",0,train bfaaefa9ecbbbc797f5af60f3d87f6a3c3ac7a09,tensorflow/tensorflow,"Update APIs for TPU Cluster Resolver to remove the custom API definition and instead use a standard definition file stored in GCS. PiperOrigin-RevId: 170960877",tpu_cluster_resolver.py,"@@ -39,7 +39,6 @@ class TPUClusterResolver(ClusterResolver): """""" def __init__(self, - api_definition, project, zone, tpu_names, @@ -52,8 +51,6 @@ class TPUClusterResolver(ClusterResolver): for the IP addresses and ports of each Cloud TPU listed. Args: - api_definition: (Alpha only) A copy of the JSON API definitions for - Cloud TPUs. This will be removed once Cloud TPU enters beta. project: Name of the GCP project containing Cloud TPUs zone: Zone where the TPUs are located tpu_names: A list of names of the target Cloud TPUs. @@ -83,11 +80,13 @@ class TPUClusterResolver(ClusterResolver): raise ImportError('googleapiclient must be installed before using the ' 'TPU cluster resolver') - # TODO(frankchn): Remove once Cloud TPU API Definitions are public and - # replace with discovery.build('tpu', 'v1') - self._service = discovery.build_from_document( - api_definition, - credentials=self._credentials) + # TODO(b/67375680): Remove custom URL once TPU APIs are finalized + self._service = discovery.build( + 'tpu', + 'v1', + credentials=self._credentials, + discoveryServiceUrl='https://storage.googleapis.com' + '/tpu-api-definition/v1alpha1.json') else: self._service = service ",0,train 4002c6c2bc4946a162236f3357ec54b4ab8b4e1e,tensorflow/tensorflow,"Improve documentation for TFRecordDataset. This adds a longer description and includes the example used for TFRecordWriter (https://www.tensorflow.org/api_docs/python/tf/io/TFRecordWriter) PiperOrigin-RevId: 363734980 Change-Id: I97e616bbcabae0e64b016d34bfbdba6996bd9ebc",readers.py,"@@ -213,7 +213,48 @@ class TextLineDatasetV1(dataset_ops.DatasetV1Adapter): class _TFRecordDataset(dataset_ops.DatasetSource): - """"""A `Dataset` comprising records from one or more TFRecord files."""""" + """"""A `Dataset` comprising records from one or more TFRecord files. + + This dataset loads TFRecords from file as bytes, exactly as they were written. + `TFRecordDataset` does not do any parsing or decoding on its own. Parsing and + decoding can be done by applying `Dataset.map` transformations after the + `TFRecordDataset`. + + A minimal example is given below: + + >>> import tempfile + >>> example_path = os.path.join(tempfile.gettempdir(), ""example.tfrecords"") + >>> np.random.seed(0) + + >>> # Write the records to a file. + ... with tf.io.TFRecordWriter(example_path) as file_writer: + ... for _ in range(4): + ... x, y = np.random.random(), np.random.random() + ... + ... record_bytes = tf.train.Example(features=tf.train.Features(feature={ + ... ""x"": tf.train.Feature(float_list=tf.train.FloatList(value=[x])), + ... ""y"": tf.train.Feature(float_list=tf.train.FloatList(value=[y])), + ... })).SerializeToString() + ... file_writer.write(record_bytes) + + >>> # Read the data back out. + >>> def decode_fn(record_bytes): + ... return tf.io.parse_single_example( + ... # Data + ... record_bytes, + ... + ... # Schema + ... {""x"": tf.io.FixedLenFeature([], dtype=tf.float32), + ... ""y"": tf.io.FixedLenFeature([], dtype=tf.float32)} + ... ) + + >>> for batch in tf.data.TFRecordDataset([example_path]).map(decode_fn): + ... print(""x = {x:.4f}, y = {y:.4f}"".format(**batch)) + x = 0.5488, y = 0.7152 + x = 0.6028, y = 0.5449 + x = 0.4237, y = 0.6459 + x = 0.4376, y = 0.8918 + """""" def __init__(self, filenames, compression_type=None, buffer_size=None): """"""Creates a `TFRecordDataset`. ",0,train 591eaa548218c664632b6ee1fe0732650094116d,tensorflow/tensorflow,"Account for the fact that the equivallent of the IsVariableInitialized op is VarIsInitializedOp when working on ResourceVariables PiperOrigin-RevId: 161272800",variables.py,"@@ -736,7 +736,7 @@ class Variable(object): return initial_value elif isinstance(initial_value, ops.Operation): if initial_value.node_def.op in [ - ""IsVariableInitialized"", ""ReadVariableOp"" + ""IsVariableInitialized"", ""VarIsInitializedOp"", ""ReadVariableOp"" ]: return initial_value if initial_value.node_def.op in [""Variable"", ""VariableV2"", ""VarHandleOp""]: ",0,test 9406c024d4d31a5a3db13bc5d4b2b0b19a311afb,tensorflow/tensorflow,"Remove padding_inital_value(s) for optimizers. The padding segment has been removed in favor of another technique to handle padding. PiperOrigin-RevId: 384520602 Change-Id: Id64e5d9d618666c7f6d407795a2299b58c24c133",tpu_embedding_optimization_parameters_utils.cc,"@@ -15,6 +15,9 @@ limitations under the License. #include ""tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.h"" +#include +#include + #include ""tensorflow/compiler/xla/service/hlo.pb.h"" #include ""tensorflow/compiler/xla/service/hlo_opcode.h"" #include ""tensorflow/compiler/xla/xla_data.pb.h"" @@ -26,7 +29,7 @@ limitations under the License. namespace tensorflow { namespace tpu { -string GetOptimizationAlgorithmName(OptimizationAlgorithm alg) { +std::string GetOptimizationAlgorithmName(OptimizationAlgorithm alg) { switch (alg) { case OptimizationAlgorithm::kAdagrad: return ""Adagrad""; @@ -66,7 +69,7 @@ string GetOptimizationAlgorithmName(OptimizationAlgorithm alg) { return ""*** Not set ***""; } -string GetOptimizationAlgorithmFriendlyName(OptimizationAlgorithm alg) { +std::string GetOptimizationAlgorithmFriendlyName(OptimizationAlgorithm alg) { switch (alg) { case OptimizationAlgorithm::kAdagrad: return ""Adagrad""; @@ -194,21 +197,6 @@ Status GetGradientAccumulationSupport(const OptimizationParameters& params, return Status::OK(); } -namespace { -// Make a normal state variable specification. Please refer to -// //tensorflow/core/protobuf/tpu/optimization_parameters.proto -// (StateVariableSpecification message) for instructions on how to set the -// padding_initial_value field. -StateVariableSpecification MakeStandardStateVariableSpecification( - const string& name, double padding_initial_value) { - StateVariableSpecification result; - result.set_name(name); - result.mutable_user_defined()->set_padding_initial_value( - padding_initial_value); - return result; -} -} // namespace - Status UseGradientAccumulation(const OptimizationParameters& params, bool* use_gradient_accumulation) { GradientAccumulationSupport support; @@ -264,112 +252,104 @@ Status GetOptimizationAlgorithmStateVariables( TF_RETURN_IF_ERROR( UseGradientAccumulation(params, &use_gradient_accumulation)); - auto add_state_variable = [&](const std::string& name, float value) { - state_variables->push_back( - MakeStandardStateVariableSpecification(name, value)); + auto add_state_variable = [&](const std::string& name) { + StateVariableSpecification spec; + spec.set_name(name); + (void)spec.mutable_user_defined(); + state_variables->push_back(spec); }; switch (params.parameters_case()) { case OptimizationAlgorithm::kAdagrad: { - add_state_variable(""parameters"", 0.0); - add_state_variable(""accumulators"", 0.1); + add_state_variable(""parameters""); + add_state_variable(""accumulators""); break; } case OptimizationAlgorithm::kBoundedAdagrad: { - add_state_variable(""parameters"", 0.0); - add_state_variable(""accumulators"", 0.1); + add_state_variable(""parameters""); + add_state_variable(""accumulators""); break; } case OptimizationAlgorithm::kStochasticGradientDescent: { - add_state_variable(""parameters"", 0.0); + add_state_variable(""parameters""); break; } case OptimizationAlgorithm::kFtrl: { - add_state_variable(""parameters"", 0.0); - add_state_variable(""accumulators"", 0.1); - add_state_variable(""linears"", 0.0); + add_state_variable(""parameters""); + add_state_variable(""accumulators""); + add_state_variable(""linears""); break; } case OptimizationAlgorithm::kAdam: { - add_state_variable(""parameters"", 0.0); - add_state_variable(""momenta"", 0.0); - add_state_variable(""velocities"", 0.0); + add_state_variable(""parameters""); + add_state_variable(""momenta""); + add_state_variable(""velocities""); break; } case OptimizationAlgorithm::kMomentum: { - add_state_variable(""parameters"", 0.0); - add_state_variable(""momenta"", 0.0); + add_state_variable(""parameters""); + add_state_variable(""momenta""); break; } case OptimizationAlgorithm::kRmsProp: { - add_state_variable(""parameters"", 0.0); - add_state_variable(""ms"", 1.0); - add_state_variable(""mom"", 0.0); + add_state_variable(""parameters""); + add_state_variable(""ms""); + add_state_variable(""mom""); break; } case OptimizationAlgorithm::kCenteredRmsProp: { - add_state_variable(""parameters"", 0.0); - add_state_variable(""ms"", 1.0); - add_state_variable(""mom"", 0.0); - add_state_variable(""mg"", 0.0); + add_state_variable(""parameters""); + add_state_variable(""ms""); + add_state_variable(""mom""); + add_state_variable(""mg""); break; } case OptimizationAlgorithm::kMdlAdagradLight: { - add_state_variable(""parameters"", 0.0); - add_state_variable(""accumulators"", 0.1); - add_state_variable(""weights"", 0.0); - add_state_variable(""benefits"", 0.0); + add_state_variable(""parameters""); + add_state_variable(""accumulators""); + add_state_variable(""weights""); + add_state_variable(""benefits""); break; } case OptimizationAlgorithm::kAdadelta: { - add_state_variable(""parameters"", 0.0); - add_state_variable(""accumulators"", 0.0); - add_state_variable(""updates"", 0.0); + add_state_variable(""parameters""); + add_state_variable(""accumulators""); + add_state_variable(""updates""); break; } case OptimizationAlgorithm::kProximalAdagrad: { - add_state_variable(""parameters"", 0.0); - add_state_variable(""accumulators"", 0.1); + add_state_variable(""parameters""); + add_state_variable(""accumulators""); break; } case OptimizationAlgorithm::kOnlineYogi: { - add_state_variable(""parameters"", 0.0); - add_state_variable(""vs"", 0.1); - add_state_variable(""linears"", 0.0); + add_state_variable(""parameters""); + add_state_variable(""vs""); + add_state_variable(""linears""); break; } case OptimizationAlgorithm::kProximalYogi: { - add_state_variable(""parameters"", 0.0); - add_state_variable(""v"", 0.1); - add_state_variable(""m"", 0.0); + add_state_variable(""parameters""); + add_state_variable(""v""); + add_state_variable(""m""); break; } case OptimizationAlgorithm::kFrequencyEstimator: { - add_state_variable(""parameters"", 0.0); - add_state_variable(""last_hit_step"", 0); + add_state_variable(""parameters""); + add_state_variable(""last_hit_step""); break; } case OptimizationAlgorithm::kUserDefinedProgram: { - add_state_variable(""parameters"", - params.user_defined_program().padding_values(0)); + add_state_variable(""parameters""); int num_slots = -1; TF_RETURN_IF_ERROR(GetBaseAuxiliaryParameterCount(params, &num_slots)); - if (num_slots + 1 != - params.user_defined_program().padding_values_size()) { - return errors::InvalidArgument( - absl::StrCat(""Number of slots "", num_slots + 1, - "" does not agree with the number of padding values "", - params.user_defined_program().padding_values_size(), - "" specified for "", params.ShortDebugString(), ""."")); - } for (int i = 0; i < num_slots; ++i) { - add_state_variable(absl::StrCat(""Slot_"", i), - params.user_defined_program().padding_values(i + 1)); + add_state_variable(absl::StrCat(""Slot_"", i)); } break; } case OptimizationAlgorithm::kAssign: { - add_state_variable(""parameters"", 0.0); + add_state_variable(""parameters""); break; } case OptimizationAlgorithm::PARAMETERS_NOT_SET: { ",0,train b4c37a452d2ed1d1c29ceb70127c4ef6434c44ca,tensorflow/tensorflow,"Teach the conditinal simplifier about sharding. PiperOrigin-RevId: 193510638",conditional_simplifier.cc,"@@ -69,7 +69,7 @@ static StatusOr TryRemoveConditional(HloInstruction* conditional) { conditional->shape(), {conditional->mutable_operand(2)}, conditional->false_computation())); } - + conditional->SetupDerivedInstruction(call_op); TF_RETURN_IF_ERROR(computation->ReplaceInstruction(conditional, call_op)); TF_RETURN_IF_ERROR(CallInliner::Inline(call_op).status()); ",0,train 7ad011b7c542f7183af78f09a47a9673f8457954,tensorflow/tensorflow,"Allow registering concrete functions directly, rather than through PolymorphicFunction. PiperOrigin-RevId: 217735452",function.py,"@@ -1409,8 +1409,35 @@ class PolymorphicFunction(object): ] +def register_concrete(func): + """"""Register a concrete function into the graph. + + Args: + func: A graph function. + """""" + graph = ops.get_default_graph() + + # There are two situations for the actual call of a defun: + # 1. If none of the input args are resource variables or watch by any tape, + # it will run the _inference_function of concrete_func for forward pass, and + # the gradient will be generated by standard mechanism. + # 2. Otherwise, defun will create two functions, one for forward pass, and the + # backward pass will be created via tape. + # When registering the function, we put both cases into graph. + # pylint: disable=protected-access + func._inference_function.add_to_graph(graph) + + if func._backward_graph_function is None: + func._construct_backprop_function() + forward_function = func._forward_function + backward_function = func._backward_graph_function._inference_function + forward_function.add_to_graph(graph) + backward_function.add_to_graph(graph) + # pylint: enable=protected-access + + def register(func, *args, **kwargs): - """"""Register the defun function into the graph. + """"""Register a specialization of a PolymorphicFunction into the graph. This won't actually call the function with the inputs, and only put the function definition into graph. Register function with different input param @@ -1434,26 +1461,7 @@ def register(func, *args, **kwargs): raise ValueError(""Only defun function is allowed to be registered. "" ""Got type: %s"" % type(func)) concrete_func = func.get_concrete_function(*args, **kwargs) - graph = ops.get_default_graph() - - # There are two situations for the actual call of a defun: - # 1. If none of the input args are resource variables or watch by any tape, - # it will run the _inference_function of concrete_func for forward pass, and - # the gradient will be generated by standard mechanism. - # 2. Otherwise, defun will create two functions, one for forward pass, and the - # backward pass will be created via tape. - # When registering the function, we put both cases into graph. - # pylint: disable=protected-access - concrete_func._inference_function.add_to_graph(graph) - - if concrete_func._backward_graph_function is None: - concrete_func._construct_backprop_function() - forward_function = concrete_func._forward_function - backward_function = concrete_func._backward_graph_function._inference_function - forward_function.add_to_graph(graph) - backward_function.add_to_graph(graph) - # pylint: enable=protected-access - + register_concrete(concrete_func) return concrete_func ",0,train 7ad011b7c542f7183af78f09a47a9673f8457954,tensorflow/tensorflow,"Allow registering concrete functions directly, rather than through PolymorphicFunction. PiperOrigin-RevId: 217735452",function_test.py,"@@ -88,7 +88,7 @@ class DefunnedMiniModel(MiniModel): @test_util.with_c_shapes -class FunctionTest(test.TestCase): +class FunctionTest(test.TestCase, parameterized.TestCase): def testBasic(self): matmul = function.defun(math_ops.matmul) @@ -2149,7 +2149,7 @@ class FunctionTest(test.TestCase): t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]]) add(t, t) - def testRegisterFunction(self): + def testRegisterPolymorphicFunction(self): @function.defun def add(x, y): return math_ops.add(x, y) @@ -2211,6 +2211,65 @@ class FunctionTest(test.TestCase): self.assertEquals(captured_function_names[i], functions[i].definition.signature.name) + @parameterized.named_parameters( + dict(testcase_name='Defun', + function_decorator=function.defun), + dict(testcase_name='DefFunction', + function_decorator=def_function.function)) + def testRegisterConcreteFunction(self, function_decorator): + @function_decorator + def py_add(x, y): + return math_ops.add(x, y) + + py_add(array_ops.ones([]), array_ops.ones([])) + add = py_add.get_concrete_function( + tensor_spec.TensorSpec(None, dtypes.float32), + tensor_spec.TensorSpec(None, dtypes.float32)) + + @function_decorator + def py_composite(x, y): + return x, add(x, y) + + py_composite(array_ops.ones([]), array_ops.ones([])) + composite = py_composite.get_concrete_function( + tensor_spec.TensorSpec(None, dtypes.float32), + tensor_spec.TensorSpec(None, dtypes.float32)) + + with context.graph_mode(), self.cached_session(): + with ops.get_default_graph().as_default(): + t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]]) + function.register_concrete(composite) + + graph = ops.get_default_graph() + # pylint: disable=protected-access + self.assertEqual(len(graph._functions), 6) + # two sets of functions, each of them are (inference, forward, backward) + functions = list(graph._functions.values()) + captured_function_names = [ + f.definition.signature.name for f in functions + ] + expected_func_name_regex = [ + '.*inference.*py_composite.*', + '.*inference.*py_add.*', + '.*forward.*py_composite.*', + '.*forward.*py_add.*', + '.*inference.*backward.*py_composite.*', + '.*inference.*backward.*py_add.*', + ] + for expected, found in zip( + expected_func_name_regex, + captured_function_names): + self.assertRegexpMatches(found, expected) + + composite_t, composite_double = composite(t, t) + double = add(t, t) + self.assertAllEqual([[2, 4], [6, 8]], self.evaluate(double)) + self.assertAllEqual([[2, 4], [6, 8]], self.evaluate(composite_double)) + self.assertAllEqual([[1, 2], [3, 4]], self.evaluate(composite_t)) + # Make sure the pre registered function is used, and no other function + # is added. + self.assertEqual(len(graph._functions), 6) + def testRegisterFunctionWithInputSignature(self): def matmul(x, y): return math_ops.matmul(x, y) ",0,train 2a71aacb81d5e3282bbcbcd4712803f02800bf6f,tensorflow/tensorflow,"Exclude IDLE op from metrics db used for ""Overview Page | Top 10 TF operations"" analysis. PiperOrigin-RevId: 295038203 Change-Id: Ie2a46b36600f929a292aac9a5d0a8175c5acc934",op_stats_to_overview_page.cc,"@@ -144,8 +144,8 @@ OverviewPageRecommendation ComputeGenericRecommendation( OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) { OverviewPageAnalysis analysis; - OpMetricsDb metrics_db = - CreateTfMetricsDbFromHloMetricsDb(op_stats.device_op_metrics_db()); + OpMetricsDb metrics_db = CreateTfMetricsDbFromHloMetricsDb( + op_stats.device_op_metrics_db(), /*with_idle=*/false); uint64 total_device_time_ps = metrics_db.total_time_ps(); constexpr int kNumTopOpsShown = 10; double device_cumulative_fraction = 0.0; ",0,train 2a71aacb81d5e3282bbcbcd4712803f02800bf6f,tensorflow/tensorflow,"Exclude IDLE op from metrics db used for ""Overview Page | Top 10 TF operations"" analysis. PiperOrigin-RevId: 295038203 Change-Id: Ie2a46b36600f929a292aac9a5d0a8175c5acc934",op_metrics_db_utils.cc,"@@ -90,8 +90,8 @@ void AddIdleOp(OpMetricsDb* db) { metrics->set_self_time_ps(idle_time_ps); } -OpMetricsDb CreateTfMetricsDbFromHloMetricsDb( - const OpMetricsDb& hlo_metrics_db) { +OpMetricsDb CreateTfMetricsDbFromHloMetricsDb(const OpMetricsDb& hlo_metrics_db, + bool with_idle) { OpMetricsDb tf_op_metrics_db; DeviceTfOpMetricsDbBuilder builder(&tf_op_metrics_db); for (const auto& hlo_op_metrics : hlo_metrics_db.metrics_db()) { @@ -101,11 +101,18 @@ OpMetricsDb CreateTfMetricsDbFromHloMetricsDb( hlo_op_metrics); } else { DCHECK_EQ(hlo_op_metrics.name(), ""IDLE""); - builder.UpdateTfOpMetricsWithHloOpMetrics(""IDLE"", ""IDLE"", hlo_op_metrics); + if (with_idle) { + builder.UpdateTfOpMetricsWithHloOpMetrics(""IDLE"", ""IDLE"", + hlo_op_metrics); + } } } tf_op_metrics_db.set_total_op_time_ps(hlo_metrics_db.total_op_time_ps()); - tf_op_metrics_db.set_total_time_ps(hlo_metrics_db.total_time_ps()); + + tf_op_metrics_db.set_total_time_ps(with_idle + ? hlo_metrics_db.total_time_ps() + : hlo_metrics_db.total_op_time_ps()); + return tf_op_metrics_db; } } // namespace profiler ",0,train 2a71aacb81d5e3282bbcbcd4712803f02800bf6f,tensorflow/tensorflow,"Exclude IDLE op from metrics db used for ""Overview Page | Top 10 TF operations"" analysis. PiperOrigin-RevId: 295038203 Change-Id: Ie2a46b36600f929a292aac9a5d0a8175c5acc934",op_metrics_db_utils.h,"@@ -68,8 +68,8 @@ uint64 IdleTimePs(const OpMetricsDb& metrics_db); void AddIdleOp(OpMetricsDb* db); // Converts from Hlo-op metrics to Tf-op metrics. -OpMetricsDb CreateTfMetricsDbFromHloMetricsDb( - const OpMetricsDb& hlo_metrics_db); +OpMetricsDb CreateTfMetricsDbFromHloMetricsDb(const OpMetricsDb& hlo_metrics_db, + bool with_idle = true); } // namespace profiler } // namespace tensorflow ",0,train 293a2be20bef4cd9edc2b53d828d091f1f49e855,tensorflow/tensorflow,"Convert absl string_view constants to const, rather than constexpr. MSVC fails to compile such variables with: error C2131: expression did not evaluate to a constant PiperOrigin-RevId: 293408611 Change-Id: Ie42a085fce1540ed2da7739b6875e5f2e4e5411a",call_inliner_test.cc,"@@ -208,7 +208,7 @@ TEST_F(CallInlinerTest, CallToOutfeedComputationIsInlined) { } TEST_F(CallInlinerTest, InlineSingleUseCalleesOnly) { - constexpr absl::string_view hlo_string = R""( + const absl::string_view hlo_string = R""( HloModule inline_module a { ",0,test 293a2be20bef4cd9edc2b53d828d091f1f49e855,tensorflow/tensorflow,"Convert absl string_view constants to const, rather than constexpr. MSVC fails to compile such variables with: error C2131: expression did not evaluate to a constant PiperOrigin-RevId: 293408611 Change-Id: Ie42a085fce1540ed2da7739b6875e5f2e4e5411a",hlo_evaluator_test.cc,"@@ -4094,7 +4094,7 @@ ENTRY main { TEST_P(HloEvaluatorBf16Test, Bitcast) { // Regression test for b/114735354. - constexpr absl::string_view hlo_text_base = R""( + const absl::string_view hlo_text_base = R""( HloModule Bitcast ENTRY main { @@ -4121,7 +4121,7 @@ ENTRY main { // Check that s32 under/overflow doesn't trigger a ubsan failure. TEST_F(HloEvaluatorTest, Int32Overflow) { - constexpr absl::string_view hlo_text = R""( + const absl::string_view hlo_text = R""( HloModule Test ENTRY main { @@ -4150,7 +4150,7 @@ ENTRY main { } TEST_F(HloEvaluatorTest, GetDimensionSize) { - constexpr absl::string_view hlo_text = R""( + const absl::string_view hlo_text = R""( HloModule Test ENTRY main { @@ -4184,7 +4184,7 @@ ENTRY main { // Check that we get a useful error if we pass inputs of the wrong shape. TEST_F(HloEvaluatorTest, EvaluateWithWrongInputShapes) { - constexpr absl::string_view hlo_text = R""( + const absl::string_view hlo_text = R""( HloModule Test ENTRY main { @@ -4211,7 +4211,7 @@ ENTRY main { // Check that we get a useful error if we pass too many or too few inputs. TEST_F(HloEvaluatorTest, EvaluateWithWrongNumberOfInputs) { - constexpr absl::string_view hlo_text = R""( + const absl::string_view hlo_text = R""( HloModule Test ENTRY main { @@ -4233,7 +4233,7 @@ ENTRY main { } TEST_F(HloEvaluatorTest, PreserveFusionInputLayout) { - constexpr absl::string_view hlo_text = R""( + const absl::string_view hlo_text = R""( HloModule FusionInputLayout fused_computation { @@ -4255,7 +4255,7 @@ TEST_F(HloEvaluatorTest, PreserveFusionInputLayout) { } TEST_F(HloEvaluatorTest, PreserveFusionOutputLayout) { - constexpr absl::string_view hlo_text = R""( + const absl::string_view hlo_text = R""( HloModule FusionOutputLayout fused_computation { @@ -4276,7 +4276,7 @@ TEST_F(HloEvaluatorTest, PreserveFusionOutputLayout) { } TEST_F(HloEvaluatorTest, PreserveMOFusionOutputLayout) { - constexpr absl::string_view hlo_text = R""( + const absl::string_view hlo_text = R""( HloModule MOFusionOutputLayout fused_computation { @@ -4301,7 +4301,7 @@ TEST_F(HloEvaluatorTest, PreserveMOFusionOutputLayout) { // Tests that custom_calls fail to evaluate when no handler is specified. TEST_F(HloEvaluatorTest, EvaluateCustomCall_NoHandler) { - constexpr absl::string_view hlo_text = R""( + const absl::string_view hlo_text = R""( HloModule EvaluateCustomCall_NoHandler ENTRY kernel_entry { parameter.0 = u32[2,2]{1,0} parameter(0) @@ -4318,7 +4318,7 @@ TEST_F(HloEvaluatorTest, EvaluateCustomCall_NoHandler) { // Tests when a custom_call handler returns an error. TEST_F(HloEvaluatorTest, EvaluateCustomCall_HandlerError) { - constexpr absl::string_view hlo_text = R""( + const absl::string_view hlo_text = R""( HloModule EvaluateCustomCall_HandlerError ENTRY kernel_entry { parameter.0 = u32[2,2]{1,0} parameter(0) @@ -4342,7 +4342,7 @@ TEST_F(HloEvaluatorTest, EvaluateCustomCall_HandlerError) { // We sum the operands so that we can verify the operand and output literals // are properly mapped for access. TEST_F(HloEvaluatorTest, EvaluateCustomCall_ManyInputs) { - constexpr absl::string_view hlo_text = R""( + const absl::string_view hlo_text = R""( HloModule EvaluateCustomCall_ManyInputs ENTRY kernel_entry { parameter.0 = u32[1]{0} parameter(0) @@ -4378,7 +4378,7 @@ TEST_F(HloEvaluatorTest, EvaluateCustomCall_ManyInputs) { } TEST_F(HloEvaluatorTest, IsFiniteF16) { - constexpr absl::string_view hlo_text = R""( + const absl::string_view hlo_text = R""( HloModule test ENTRY IsFiniteTest { @@ -4395,7 +4395,7 @@ TEST_F(HloEvaluatorTest, IsFiniteF16) { } TEST_F(HloEvaluatorTest, IsFiniteBf16) { - constexpr absl::string_view hlo_text = R""( + const absl::string_view hlo_text = R""( HloModule test ENTRY IsFiniteTest { @@ -4414,7 +4414,7 @@ TEST_F(HloEvaluatorTest, IsFiniteBf16) { // Check that evaluating `f32[, 0] iota` doesn't oom (it's an empty // array!). TEST_F(HloEvaluatorTest, ZeroSizedIotaWithHugeDimension) { - constexpr absl::string_view hlo_text = R""( + const absl::string_view hlo_text = R""( HloModule test ENTRY t { ROOT i = f32[1000000000000, 0] iota(), iota_dimension=0 @@ -4427,7 +4427,7 @@ TEST_F(HloEvaluatorTest, ZeroSizedIotaWithHugeDimension) { } TEST_F(HloEvaluatorTest, CopyStartCopyDone) { - constexpr absl::string_view hlo_text = R""( + const absl::string_view hlo_text = R""( HloModule test ENTRY CopyStartCopyDone { init = f32[] constant(42.0) ",0,test d432c1fb460dd3de37312cbb69d3f4fbdc5508c6,tensorflow/tensorflow,Update comments.,parallel_loop_emitter.cc,"@@ -68,6 +68,7 @@ ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock(absl::string_view loop_name, // ... // } // The part between [] are added only if blockDim.y > 1. + // blockIdx.y and gridDim.y are always 1. // Per the PTX documentation: // ""It is guaranteed that [...] 0 <= %ctaid.x < %nctaid.x"" @@ -76,7 +77,8 @@ ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock(absl::string_view loop_name, if (launch_dimensions_.thread_counts_per_block().y > 1) { // When blockDim.y > 1, then we are in the small row case. Each // blockDim.x do exatly to one row and blockDim.y map to some - // consecutive row. + // consecutive row. This prevents too small block size that isn't + // efficient. CHECK(launch_config_.row_vectorized); CHECK_EQ(shape_.dimensions().back(), launch_dimensions_.thread_counts_per_block().x * ",0,train bed506e3a160402f4f93aa8fdfc4bb8b270a3953,tensorflow/tensorflow,"[XLA] Fix race condition in RefcountingHashMap Quoting the bug from jlebar@: > Suppose the refcount of entry for key K goes to 0. Then before the deleter is run, someone touches map[K], thus causing the refcount of this entry to go back to 1. Then the deleter runs, deleting the object. Boom. PiperOrigin-RevId: 289194684 Change-Id: I3a1d9a8294d45eb1c554ee511328fc5a9d0b1e20",refcounting_hash_map.h,"@@ -63,16 +63,22 @@ class RefcountingHashMap { std::shared_ptr operator[](const K& key) { absl::MutexLock lock(&mu_); auto it = map_.find(key); - if (it == map_.end()) { - // Create entry in the map and then set its value, so the value can - // contain a pointer back into the map. - it = map_.emplace(key, std::weak_ptr()).first; - std::shared_ptr value(value_factory_(key).release(), - Deleter{&it->first, this}); - it->second = value; // Set the weak ptr to the shared ptr. - return value; + // We ensure that the entry has not expired in case deleter was running when + // we have entered this block. + if (it != map_.end()) { + if (std::shared_ptr value = it->second.lock()) { + return value; + } + map_.erase(it); } - return it->second.lock(); + + // Create entry in the map and then set its value, so the value can + // contain a pointer back into the map. + it = map_.emplace(key, std::weak_ptr()).first; + std::shared_ptr value(value_factory_(key).release(), + Deleter{&it->first, this}); + it->second = value; // Set the weak ptr to the shared ptr. + return value; } // Runs a function over every key/value in the map. @@ -99,9 +105,9 @@ class RefcountingHashMap { delete v; absl::MutexLock lock(&parent->mu_); auto it = parent->map_.find(*key); - CHECK(it != parent->map_.end()); - CHECK(it->second.expired()); - parent->map_.erase(it); + if (it != parent->map_.end() && it->second.expired()) { + parent->map_.erase(it); + } } }; ",0,train e46f394c1744ac93ed3a73a333c47809ff6198a7,tensorflow/tensorflow,"[XLA:GPU] Simplify Cusolver rewriter to not require a StreamExecutor or an allocator. PiperOrigin-RevId: 249255382",cusolver_context.cc,"@@ -91,12 +91,14 @@ StatusOr CusolverContext::Create(se::Stream* stream) { TF_RETURN_IF_ERROR(CusolverStatusToStatus(cusolverDnCreate(&handle))); CusolverContext context(stream, handle); - // StreamExecutor really should just expose the Cuda stream to clients... - const cudaStream_t* cuda_stream = - CHECK_NOTNULL(reinterpret_cast( - stream->implementation()->GpuStreamMemberHack())); - TF_RETURN_IF_ERROR( - CusolverStatusToStatus(cusolverDnSetStream(handle, *cuda_stream))); + if (stream) { + // StreamExecutor really should just expose the Cuda stream to clients... + const cudaStream_t* cuda_stream = + CHECK_NOTNULL(reinterpret_cast( + stream->implementation()->GpuStreamMemberHack())); + TF_RETURN_IF_ERROR( + CusolverStatusToStatus(cusolverDnSetStream(handle, *cuda_stream))); + } return std::move(context); } @@ -131,17 +133,40 @@ CusolverContext::~CusolverContext() { #define DN_SOLVER_FN(method, type_prefix) cusolverDn##type_prefix##method -#define POTRF_BUFFER_SIZE_INSTANCE(T, type_prefix) \ - StatusOr CusolverContext::PotrfBufferSize( \ - se::blas::UpperLower uplo, int n, se::DeviceMemory A, int lda) { \ - int size = -1; \ - TF_RETURN_IF_ERROR(CusolverStatusToStatus(DN_SOLVER_FN( \ - potrf_bufferSize, type_prefix)(handle(), CUDABlasUpperLower(uplo), n, \ - ToDevicePointer(A), lda, &size))); \ - return size; \ +// Note: NVidia have promised that it is safe to pass 'nullptr' as the argument +// buffers to cuSolver buffer size methods and this will be a documented +// behavior in a future cuSolver release. +StatusOr CusolverContext::PotrfBufferSize(PrimitiveType type, + se::blas::UpperLower uplo, + int n, int lda) { + int size = -1; + switch (type) { + case F32: { + TF_RETURN_IF_ERROR(CusolverStatusToStatus(cusolverDnSpotrf_bufferSize( + handle(), CUDABlasUpperLower(uplo), n, /*A=*/nullptr, lda, &size))); + break; + } + case F64: { + TF_RETURN_IF_ERROR(CusolverStatusToStatus(cusolverDnDpotrf_bufferSize( + handle(), CUDABlasUpperLower(uplo), n, /*A=*/nullptr, lda, &size))); + break; + } + case C64: { + TF_RETURN_IF_ERROR(CusolverStatusToStatus(cusolverDnCpotrf_bufferSize( + handle(), CUDABlasUpperLower(uplo), n, /*A=*/nullptr, lda, &size))); + break; + } + case C128: { + TF_RETURN_IF_ERROR(CusolverStatusToStatus(cusolverDnZpotrf_bufferSize( + handle(), CUDABlasUpperLower(uplo), n, /*A=*/nullptr, lda, &size))); + break; + } + default: + return InvalidArgument(""Invalid type for cholesky decomposition: %s"", + PrimitiveType_Name(type)); } - -CALL_LAPACK_TYPES(POTRF_BUFFER_SIZE_INSTANCE); + return size; +} #define POTRF_INSTANCE(T, type_prefix) \ Status CusolverContext::Potrf( \ ",0,train e46f394c1744ac93ed3a73a333c47809ff6198a7,tensorflow/tensorflow,"[XLA:GPU] Simplify Cusolver rewriter to not require a StreamExecutor or an allocator. PiperOrigin-RevId: 249255382",cusolver_context.h,"@@ -32,6 +32,8 @@ namespace gpu { class CusolverContext { public: + // stream may be nullptr, in which case the context can only be used for + // buffer size queries. static StatusOr Create(se::Stream* stream); CusolverContext() = default; ~CusolverContext(); @@ -63,17 +65,9 @@ class CusolverContext { se::DeviceMemory> workspace); // Returns the size of the `workspace` required by Potrf, in number of - // elements of size T. - StatusOr PotrfBufferSize(se::blas::UpperLower uplo, int n, - se::DeviceMemory dev_A, int lda); - StatusOr PotrfBufferSize(se::blas::UpperLower uplo, int n, - se::DeviceMemory dev_A, int lda); - StatusOr PotrfBufferSize(se::blas::UpperLower uplo, int n, - se::DeviceMemory> dev_A, - int lda); - StatusOr PotrfBufferSize(se::blas::UpperLower uplo, int n, - se::DeviceMemory> dev_A, - int lda); + // elements of `type`. + StatusOr PotrfBufferSize(PrimitiveType type, se::blas::UpperLower uplo, + int n, int lda); private: CusolverContext(se::Stream* stream, cusolverDnHandle_t handle); ",0,train e46f394c1744ac93ed3a73a333c47809ff6198a7,tensorflow/tensorflow,"[XLA:GPU] Simplify Cusolver rewriter to not require a StreamExecutor or an allocator. PiperOrigin-RevId: 249255382",cusolver_rewriter.cc,"@@ -23,7 +23,6 @@ limitations under the License. #include ""tensorflow/compiler/xla/literal.h"" #include ""tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"" #include ""tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"" -#include ""tensorflow/compiler/xla/service/gpu/scratch_allocator.h"" #include ""tensorflow/compiler/xla/service/hlo_computation.h"" #include ""tensorflow/compiler/xla/service/hlo_instruction.h"" #include ""tensorflow/compiler/xla/service/hlo_opcode.h"" @@ -31,7 +30,6 @@ limitations under the License. #include ""tensorflow/compiler/xla/xla_data.pb.h"" #include ""tensorflow/core/lib/core/status.h"" #include ""tensorflow/core/platform/logging.h"" -#include ""tensorflow/core/platform/stream_executor_no_cuda.h"" #include ""tensorflow/stream_executor/blas.h"" namespace xla { @@ -48,7 +46,6 @@ void SetFortranLayout(Shape* shape) { } StatusOr CreateCholesky(CusolverContext* context, - ScratchAllocator* allocator, HloInstruction* operand, const CholeskyOptions& options, const OpMetadata& metadata) { @@ -67,39 +64,8 @@ StatusOr CreateCholesky(CusolverContext* context, se::blas::UpperLower uplo = options.lower() ? se::blas::UpperLower::kLower : se::blas::UpperLower::kUpper; int64 workspace_size; // Number of elements of size a_shape.element_type() - switch (a_shape.element_type()) { - case F32: { - TF_ASSIGN_OR_RETURN(auto a, - allocator->Allocate(context->stream(), n * n)); - TF_ASSIGN_OR_RETURN(workspace_size, - context->PotrfBufferSize(uplo, n, a, n)); - break; - } - case F64: { - TF_ASSIGN_OR_RETURN( - auto a, allocator->Allocate(context->stream(), n * n)); - TF_ASSIGN_OR_RETURN(workspace_size, - context->PotrfBufferSize(uplo, n, a, n)); - break; - } - case C64: { - TF_ASSIGN_OR_RETURN(auto a, allocator->Allocate>( - context->stream(), n * n)); - TF_ASSIGN_OR_RETURN(workspace_size, - context->PotrfBufferSize(uplo, n, a, n)); - break; - } - case C128: { - TF_ASSIGN_OR_RETURN(auto a, allocator->Allocate>( - context->stream(), n * n)); - TF_ASSIGN_OR_RETURN(workspace_size, - context->PotrfBufferSize(uplo, n, a, n)); - break; - } - default: - return InvalidArgument(""Invalid type for cholesky decomposition: %s"", - a_shape.ToString()); - } + TF_ASSIGN_OR_RETURN(workspace_size, context->PotrfBufferSize( + a_shape.element_type(), uplo, n, n)); // TODO(phawkins): Ideally we would relax this constraint. What we actually // want is that: @@ -131,7 +97,6 @@ StatusOr CreateCholesky(CusolverContext* context, // Tries to rewrite a single convolution into a call to cudnn. StatusOr RunOnInstruction(CusolverContext* context, - ScratchAllocator* allocator, HloInstruction* instruction) { if (instruction->opcode() != HloOpcode::kCholesky) { return false; @@ -139,7 +104,7 @@ StatusOr RunOnInstruction(CusolverContext* context, TF_ASSIGN_OR_RETURN( HloInstruction * custom_call, - CreateCholesky(context, allocator, instruction->mutable_operand(0), + CreateCholesky(context, instruction->mutable_operand(0), instruction->cholesky_options(), instruction->metadata())); VLOG(1) << ""Replacing "" << instruction->ToString() << "" with "" @@ -167,41 +132,18 @@ StatusOr CusolverRewriter::RunOnComputation(HloComputation* computation) { return false; } - // Create a stream for us to do our work on. We don't really need to do any - // work, just allocate memory, but that's the cuSolver API. - se::Stream stream{stream_exec_}; - stream.Init(); - const auto device_ordinal = stream_exec_->device_ordinal(); - - // allocator either points to this->allocator_ or, if that's null, to a - // se::StreamExecutorMemoryAllocator for stream_exec_. - se::DeviceMemoryAllocator* allocator; - absl::optional se_allocator; - if (allocator_ != nullptr) { - allocator = allocator_; - } else { - se_allocator.emplace(stream_exec_->platform(), - absl::Span({stream_exec_})); - allocator = &*se_allocator; - } - ScratchAllocator scratch_allocator(device_ordinal, allocator); - TF_ASSIGN_OR_RETURN(CusolverContext context, - CusolverContext::Create(&stream)); + CusolverContext::Create(/*stream=*/nullptr)); bool changed = false; for (HloInstruction* instruction : cusolver_calls) { - TF_ASSIGN_OR_RETURN( - bool result, - RunOnInstruction(&context, &scratch_allocator, instruction)); + TF_ASSIGN_OR_RETURN(bool result, RunOnInstruction(&context, instruction)); changed |= result; } return changed; } -CusolverRewriter::CusolverRewriter(se::StreamExecutor* stream_exec, - se::DeviceMemoryAllocator* allocator) - : stream_exec_(stream_exec), allocator_(allocator) {} +CusolverRewriter::CusolverRewriter() = default; StatusOr CusolverRewriter::Run(HloModule* module) { bool changed = false; ",0,train e46f394c1744ac93ed3a73a333c47809ff6198a7,tensorflow/tensorflow,"[XLA:GPU] Simplify Cusolver rewriter to not require a StreamExecutor or an allocator. PiperOrigin-RevId: 249255382",cusolver_rewriter.h,"@@ -29,17 +29,13 @@ namespace gpu { // Rewrites Cholesky calls into CustomCall HLOs that call into cuSolver. class CusolverRewriter : public HloModulePass { public: - CusolverRewriter(se::StreamExecutor* stream_exec, - se::DeviceMemoryAllocator* allocator); + CusolverRewriter(); absl::string_view name() const override { return ""cusolver-rewriter""; } StatusOr Run(HloModule* module) override; private: StatusOr RunOnComputation(HloComputation* computation); - - se::StreamExecutor* stream_exec_; // never null - se::DeviceMemoryAllocator* allocator_; // may be null }; } // namespace gpu ",0,train e46f394c1744ac93ed3a73a333c47809ff6198a7,tensorflow/tensorflow,"[XLA:GPU] Simplify Cusolver rewriter to not require a StreamExecutor or an allocator. PiperOrigin-RevId: 249255382",nvptx_compiler.cc,"@@ -266,7 +266,7 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, HloPassPipeline pipeline(""conv_canonicalization""); pipeline.AddInvariantChecker(/*layout_sensitive=*/false, /*allow_mixed_precision=*/false); - pipeline.AddPass(stream_exec, device_allocator); + pipeline.AddPass(); pipeline.AddPass(); pipeline.AddPass(); pipeline.AddPass(); ",0,train 83765f2c27c1ed3da86e78f033ba73a87d7cf6b7,tensorflow/tensorflow,VLOG(2) accepted nodes in segmenter,segment.cc,"@@ -450,6 +450,12 @@ tensorflow::Status SegmentGraph( num_unsupported_ops++; node = nullptr; } + else { + VLOG(2) << ""Accepted as a TF-TRT candidate, "" + << ""(Op type: "" << node->tf_node()->type_string() << ""), "" + << ""(Op name: "" << node->name() << ""), "" + << ""(Reason: "" << status << "")""; + } } node_segments.emplace_back(node); } ",0,train 1dc984a4f49cd4181a00d361b567a2c7c11c6650,tensorflow/tensorflow,Avoid unnecessary creation of RuntimeShape,space_to_batch_nd.h,"@@ -42,9 +42,13 @@ inline void SpaceToBatchND(const SpaceToBatchParams& params, // Extends the input/output shape from 3D to 4D if needed, NHC -> NH1C. const RuntimeShape input1_shape = - RuntimeShape::ExtendedShape(4, unextended_input1_shape); + (unextended_input1_shape.DimensionsCount() == 4) + ? unextended_input1_shape + : RuntimeShape::ExtendedShape(4, unextended_input1_shape); const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(4, unextended_output_shape); + (unextended_output_shape.DimensionsCount() == 4) + ? unextended_output_shape + : RuntimeShape::ExtendedShape(4, unextended_output_shape); const int depth = input1_shape.Dims(3); const int input_width = input1_shape.Dims(2); ",0,train b98a1f31bca1e773ee215f2c32aa0509843c1247,tensorflow/tensorflow,"Propagate NaNs for floating point min/max operations. PiperOrigin-RevId: 187395444",hlo_evaluator.cc,"@@ -613,14 +613,25 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { return Status::OK(); } - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> + template ::value>::type* = + nullptr> + Status HandleMaximum(HloInstruction* maximum) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[maximum], + ElementWiseBinaryOp(maximum, [](ElementwiseT lhs, ElementwiseT rhs) { + return std::max(lhs, rhs); + })); + return Status::OK(); + } + + template ::value>::type* = nullptr> Status HandleMaximum(HloInstruction* maximum) { TF_ASSIGN_OR_RETURN( parent_->evaluated_[maximum], ElementWiseBinaryOp(maximum, [](ElementwiseT lhs, ElementwiseT rhs) { - return std::fmax(lhs, rhs); + return ((lhs >= rhs) || std::isnan(lhs)) ? lhs : rhs; })); return Status::OK(); } @@ -636,18 +647,30 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { return HandleMaximum(maximum); } - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> + template ::value>::type* = + nullptr> Status HandleMinimum(HloInstruction* minimum) { TF_ASSIGN_OR_RETURN(parent_->evaluated_[minimum], ElementWiseBinaryOp(minimum, [](ElementwiseT lhs_el, ElementwiseT rhs_el) { - return std::fmin(lhs_el, rhs_el); + return std::min(lhs_el, rhs_el); })); return Status::OK(); } + template ::value>::type* = nullptr> + Status HandleMinimum(HloInstruction* minimum) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[minimum], + ElementWiseBinaryOp(minimum, [](ElementwiseT lhs_el, + ElementwiseT rhs_el) { + return ((lhs_el <= rhs_el) || std::isnan(lhs_el)) ? lhs_el : rhs_el; + })); + return Status::OK(); + } + template < typename NativeT, typename std::enable_if::value>::type* = nullptr> ",0,train b98a1f31bca1e773ee215f2c32aa0509843c1247,tensorflow/tensorflow,"Propagate NaNs for floating point min/max operations. PiperOrigin-RevId: 187395444",llvm_util.cc,"@@ -106,8 +106,10 @@ llvm::Value* EmitFloatMax(llvm::Value* lhs_value, llvm::Value* rhs_value, auto cmp = ir_builder->CreateFCmpUGE(lhs_value, rhs_value); return ir_builder->CreateSelect(cmp, lhs_value, rhs_value); } else { - return EmitCallToIntrinsic(llvm::Intrinsic::maxnum, {lhs_value, rhs_value}, - {lhs_value->getType()}, ir_builder); + auto cmp_ge = ir_builder->CreateFCmpOGE(lhs_value, rhs_value); + auto lhs_is_nan = ir_builder->CreateFCmpUNE(lhs_value, lhs_value); + auto sel_lhs = ir_builder->CreateOr(cmp_ge, lhs_is_nan); + return ir_builder->CreateSelect(sel_lhs, lhs_value, rhs_value); } } @@ -117,8 +119,10 @@ llvm::Value* EmitFloatMin(llvm::Value* lhs_value, llvm::Value* rhs_value, auto cmp = ir_builder->CreateFCmpULE(lhs_value, rhs_value); return ir_builder->CreateSelect(cmp, lhs_value, rhs_value); } else { - return EmitCallToIntrinsic(llvm::Intrinsic::minnum, {lhs_value, rhs_value}, - {lhs_value->getType()}, ir_builder); + auto cmp_le = ir_builder->CreateFCmpOLE(lhs_value, rhs_value); + auto lhs_is_nan = ir_builder->CreateFCmpUNE(lhs_value, lhs_value); + auto sel_lhs = ir_builder->CreateOr(cmp_le, lhs_is_nan); + return ir_builder->CreateSelect(sel_lhs, lhs_value, rhs_value); } } ",0,train b98a1f31bca1e773ee215f2c32aa0509843c1247,tensorflow/tensorflow,"Propagate NaNs for floating point min/max operations. PiperOrigin-RevId: 187395444",array_elementwise_ops_test.cc,"@@ -1648,33 +1648,15 @@ XLA_TEST_F(ArrayElementwiseOpTest, SquareIn4DZeroElements) { ComputeAndCompareR4(&builder, expected, {}, error_spec_); } -// GPU backend emits nvvm intrinsic for fmin and fmax, whose semantics is NOT -// such -// * fmin(NaN, x) = x -// * fmax(NaN, x) = x -// so we only test NAN on CPU. -// -// TODO(b/28180546): Make this compile in a way that is consistent -// among backends. XLA_TEST_F(ArrayElementwiseOpTest, MinF32s) { ComputationBuilder builder(client_, TestName()); -#if !defined(XLA_TEST_BACKEND_CPU) - auto lhs = builder.ConstantR1({1.0f, 1.0f, 2.25f}); - auto rhs = builder.ConstantR1({2.0f, -5.0f, 1.0f}); -#else SetFastMathDisabled(true); auto lhs = builder.ConstantR1({1.0f, 1.0f, 2.25f, NAN, 6.0f}); auto rhs = builder.ConstantR1({2.0f, -5.0f, 1.0f, 10.0f, NAN}); -#endif auto minimum = builder.Min(lhs, rhs); - ComputeAndCompareR1(&builder, -#if !defined(XLA_TEST_BACKEND_CPU) - {1.0f, -5.0f, 1.0f}, -#else - {1.0f, -5.0f, 1.0f, 10.0f, 6.0f}, -#endif - {}, error_spec_); + ComputeAndCompareR1(&builder, {1.0f, -5.0f, 1.0f, NAN, NAN}, {}, + error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, MinZeroElementF32s) { @@ -1685,50 +1667,26 @@ XLA_TEST_F(ArrayElementwiseOpTest, MinZeroElementF32s) { ComputeAndCompareR1(&builder, {}, {}, error_spec_); } -// TODO(b/28180546): Make this compile in a way that is consistent -// among backends. See comment on MinF32s test above. XLA_TEST_F(ArrayElementwiseOpTest, MinF64s) { ComputationBuilder builder(client_, TestName()); -#if !defined(XLA_TEST_BACKEND_CPU) - auto lhs = builder.ConstantR1({1.0, 1.0, 2.25}); - auto rhs = builder.ConstantR1({2.0, -5.0, 1.0}); -#else SetFastMathDisabled(true); auto lhs = builder.ConstantR1({1.0, 1.0, 2.25, NAN, 6.0}); auto rhs = builder.ConstantR1({2.0, -5.0, 1.0, 10.0, NAN}); -#endif auto minimum = builder.Min(lhs, rhs); - ComputeAndCompareR1(&builder, -#if !defined(XLA_TEST_BACKEND_CPU) - {1.0, -5.0, 1.0}, -#else - {1.0, -5.0, 1.0, 10.0, 6.0}, -#endif - {}, error_spec_); + ComputeAndCompareR1(&builder, {1.0, -5.0, 1.0, NAN, NAN}, {}, + error_spec_); } -// TODO(b/28180546): Make this compile in a way that is consistent -// among backends. See comment on MinF32s test above. XLA_TEST_F(ArrayElementwiseOpTest, MaxF32s) { ComputationBuilder builder(client_, TestName()); -#if !defined(XLA_TEST_BACKEND_CPU) - auto lhs = builder.ConstantR1({1.0f, 1.0f, 2.25f}); - auto rhs = builder.ConstantR1({2.0f, -5.0f, 1.0f}); -#else SetFastMathDisabled(true); auto lhs = builder.ConstantR1({1.0f, 1.0f, 2.25f, NAN, 6.0f}); auto rhs = builder.ConstantR1({2.0f, -5.0f, 1.0f, 10.0f, NAN}); -#endif auto maximum = builder.Max(lhs, rhs); - ComputeAndCompareR1(&builder, -#if !defined(XLA_TEST_BACKEND_CPU) - {2.0f, 1.0f, 2.25f}, -#else - {2.0f, 1.0f, 2.25f, 10.0f, 6.0f}, -#endif - {}, error_spec_); + ComputeAndCompareR1(&builder, {2.0f, 1.0f, 2.25f, NAN, NAN}, {}, + error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, MaxZeroElementF32s) { @@ -1739,27 +1697,15 @@ XLA_TEST_F(ArrayElementwiseOpTest, MaxZeroElementF32s) { ComputeAndCompareR1(&builder, {}, {}, error_spec_); } -// TODO(b/28180546): Make this compile in a way that is consistent -// among backends. See comment on MinF32s test above. XLA_TEST_F(ArrayElementwiseOpTest, MaxF64s) { ComputationBuilder builder(client_, TestName()); -#if !defined(XLA_TEST_BACKEND_CPU) - auto lhs = builder.ConstantR1({1.0, 1.0, 2.25}); - auto rhs = builder.ConstantR1({2.0, -5.0, 1.0}); -#else SetFastMathDisabled(true); auto lhs = builder.ConstantR1({1.0, 1.0, 2.25, NAN, 6.0}); auto rhs = builder.ConstantR1({2.0, -5.0, 1.0, 10.0, NAN}); -#endif auto maximum = builder.Max(lhs, rhs); - ComputeAndCompareR1(&builder, -#if !defined(XLA_TEST_BACKEND_CPU) - {2.0, 1.0, 2.25}, -#else - {2.0, 1.0, 2.25, 10.0, 6.0}, -#endif - {}, error_spec_); + ComputeAndCompareR1(&builder, {2.0, 1.0, 2.25, NAN, NAN}, {}, + error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, MaxS32s) { ",0,train b98a1f31bca1e773ee215f2c32aa0509843c1247,tensorflow/tensorflow,"Propagate NaNs for floating point min/max operations. PiperOrigin-RevId: 187395444",scalar_computations_test.cc,"@@ -860,6 +860,12 @@ XLA_TEST_F(ScalarComputationsTest, MinF32Below) { TestMinMax(-100.1f, 3.1f, -100.1f, &ComputationBuilder::Min); } +XLA_TEST_F(ScalarComputationsTest, MinPropagatesNan) { + SetFastMathDisabled(true); + TestMinMax(NAN, 3.1f, NAN, &ComputationBuilder::Min); + TestMinMax(-3.1f, NAN, NAN, &ComputationBuilder::Min); +} + XLA_TEST_F(ScalarComputationsTest, MaxF32Above) { TestMinMax(10.1f, 3.1f, 10.1f, &ComputationBuilder::Max); } @@ -868,6 +874,12 @@ XLA_TEST_F(ScalarComputationsTest, MaxF32Below) { TestMinMax(-100.1f, 3.1f, 3.1f, &ComputationBuilder::Max); } +XLA_TEST_F(ScalarComputationsTest, MaxPropagatesNan) { + SetFastMathDisabled(true); + TestMinMax(NAN, 3.1f, NAN, &ComputationBuilder::Max); + TestMinMax(-3.1f, NAN, NAN, &ComputationBuilder::Max); +} + XLA_TEST_F(ScalarComputationsTest, ComplicatedArithmeticExpressionF32) { // Compute the expression (1 * (3 - 1) * (7 + 0) - 4) / 20. ComputationBuilder b(client_, TestName()); ",0,train c102dc94c70dfa58cc1aa847492bba8307e731d0,tensorflow/tensorflow,"Remove redundant code. PiperOrigin-RevId: 351484243 Change-Id: If3a9eead0ceb1859c65627c13efeeb5b9dc8676c",stream_executor.cc,"@@ -801,10 +801,6 @@ port::Status InitStreamExecutorPlugin(SEInitPluginFn init_fn) { TF_RETURN_IF_ERROR(tensorflow::StatusFromTF_Status(c_status.get())); TF_RETURN_IF_ERROR(ValidateSPTimerFns(timer_fns)); - platform_fns.create_timer_fns(&platform, &timer_fns, c_status.get()); - TF_RETURN_IF_ERROR(tensorflow::StatusFromTF_Status(c_status.get())); - TF_RETURN_IF_ERROR(ValidateSPTimerFns(timer_fns)); - // Register new platform std::string platform_name = std::string(platform.name); std::unique_ptr cplatform( ",0,train 2197c067c79d1464eb957764f8edd120693dcdf6,tensorflow/tensorflow,"Fix setup.py for Eigen headers Change: 114173506",setup.py,"@@ -99,18 +99,21 @@ class InstallHeaders(Command): # directories for -I install_dir = re.sub('/google/protobuf/src', '', install_dir) - # Copy eigen code into tensorflow/include and - # tensorflow/include/external/eigen_archive/eigen-eigen-. + # Copy eigen code into tensorflow/include, + # tensorflow/include/external/eigen_archive/eigen-eigen-, + # and tensorflow/include/eigen-eigen-. # A symlink would do, but the wheel file that gets created ignores # symlink within the directory hierarchy. # NOTE(keveman): Figure out how to customize bdist_wheel package so # we can do the symlink. if re.search(r'(external/eigen_archive/eigen-eigen-\w+)', install_dir): - extra_dir = re.sub(r'/external/eigen_archive/eigen-eigen-\w+', '', - install_dir) - if not os.path.exists(extra_dir): - self.mkpath(extra_dir) - self.copy_file(header, extra_dir) + extra_dirs = [re.sub('/external/eigen_archive', '', install_dir), + re.sub(r'external/eigen_archive/eigen-eigen-\w+', '', + install_dir)] + for extra_dir in extra_dirs: + if not os.path.exists(extra_dir): + self.mkpath(extra_dir) + self.copy_file(header, extra_dir) if not os.path.exists(install_dir): self.mkpath(install_dir) ",0,train 4a4605f50fc63871f995a5f487c9b66e399ac674,tensorflow/tensorflow,"Initialize TPU inside MWMS PiperOrigin-RevId: 401877668 Change-Id: I3e19d194e94ac617bf3ab5a131898030f9ffd67c",collective_all_reduce_strategy.py,"@@ -43,6 +43,7 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import collective_ops from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.tpu import tpu_strategy_util from tensorflow.python.training.tracking import base from tensorflow.python.util import deprecation from tensorflow.python.util.tf_export import tf_export @@ -504,6 +505,8 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended): # some cases. local_devices, local_device_type = self._initialize_local_devices( cluster_resolver, self._worker_device) + if local_device_type == ""TPU"": + tpu_strategy_util.initialize_tpu_system() self._collective_keys = cross_device_utils.CollectiveKeys( group_key_start=1 + self._collective_key_base) ",0,train 4a4605f50fc63871f995a5f487c9b66e399ac674,tensorflow/tensorflow,"Initialize TPU inside MWMS PiperOrigin-RevId: 401877668 Change-Id: I3e19d194e94ac617bf3ab5a131898030f9ffd67c",cross_device_utils.py,"@@ -265,10 +265,11 @@ class CollectiveReplicaLauncher(object): self._group_size = group_size self._collective_keys = collective_keys self._device = device - # Created lazily in _get_ordering_token to avoid creating tensors on TPUs - # before the user has a chance to call initialize_system. - self._ordering_token = None - self._ordering_token_init_lock = threading.Lock() + if self._use_ordering_token(): + with ops.init_scope(), ops.device(device): + self._ordering_token = resource_variable_ops.ResourceVariable(0.) + else: + self._ordering_token = None def _control_input(self, control_input): if control_input is not None and not self._use_ordering_token(): @@ -319,14 +320,8 @@ class CollectiveReplicaLauncher(object): self._device) def _get_ordering_token(self, communication_hint): - if self._use_ordering_token(): - with self._ordering_token_init_lock: - if self._ordering_token is None: - with ops.init_scope(), ops.device(self._device): - self._ordering_token = resource_variable_ops.ResourceVariable(0.) - if communication_hint == 'NCCL': - return self._ordering_token.handle - + if self._use_ordering_token() and communication_hint == 'NCCL': + return self._ordering_token.handle return None def can_order_nccl(self): ",0,train ac91ebc9bec9eb9b0ade27ccd470a547b180ec8b,tensorflow/tensorflow,"Don't clear+resize outputs vector twice in ExecutorState. Change: 121507010",executor.cc,"@@ -1006,7 +1006,6 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) { Entry* input_tensors = GetInputTensors(input_frame, input_iter); Entry* first_input = input_tensors + item.input_start; outputs.clear(); - outputs.resize(item.num_outputs); TensorReferenceVector accessed_tensors; DeviceContext* device_context = nullptr; @@ -1014,7 +1013,9 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) { // transfer node. For transfer nodes, we need to propagate the ""dead"" // bit even when the node is dead. bool launched_asynchronously = false; - if (!tagged_node.is_dead || IsTransferNode(node)) { + if (tagged_node.is_dead && !IsTransferNode(node)) { + outputs.resize(item.num_outputs); + } else { // Prepares inputs. bool is_input_dead = false; s = PrepareInputs(item, first_input, &inputs, &input_device_contexts, @@ -1230,7 +1231,7 @@ Status ExecutorState::ProcessOutputs(const NodeItem& item, OpKernelContext* ctx, EntryVector* outputs, NodeExecStats* stats) { const Node* node = item.node; - outputs->clear(); + DCHECK_EQ(0, outputs->size()); outputs->resize(item.num_outputs); Status s = ctx->status(); ",0,train ebf554ff77bc46bfdd9b424bc44b62f803100b33,tensorflow/tensorflow,"Make adaptive SDCA the default. PiperOrigin-RevId: 188380039",sdca_ops_test.py,"@@ -270,14 +270,14 @@ class SdcaWithLogisticLossTest(SdcaModelTest): train_op = lr.minimize() - def Minimize(): + def minimize(): with self._single_threaded_test_session(): for _ in range(_MAX_ITERATIONS): - train_op.run() + train_op.run() # pylint: disable=cell-var-from-loop threads = [] for _ in range(num_loss_partitions): - threads.append(threading.Thread(target=Minimize)) + threads.append(threading.Thread(target=minimize)) threads[-1].start() for t in threads: @@ -395,7 +395,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): predicted_labels = get_binary_predictions_for_logistic(predictions) self.assertAllClose([0, 1, 1, 1], predicted_labels.eval()) self.assertAllClose( - 0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2) + 0.0, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2) def testFractionalExampleLabel(self): # Setup test data with 1 positive, and 1 mostly-negative example. @@ -407,7 +407,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): make_example_proto({ 'age': [1], 'gender': [1] - }, 1), + }, 0.9), ] example_weights = [1.0, 1.0] for num_shards in _SHARD_NUMBERS: ",0,train ebf554ff77bc46bfdd9b424bc44b62f803100b33,tensorflow/tensorflow,"Make adaptive SDCA the default. PiperOrigin-RevId: 188380039",sdca_internal.cc,"@@ -226,7 +226,7 @@ const ExampleStatistics Example::ComputeWxAndWeightedExampleNorm( } // Examples contains all the training examples that SDCA uses for a mini-batch. -Status Examples::SampleAdaptativeProbabilities( +Status Examples::SampleAdaptiveProbabilities( const int num_loss_partitions, const Regularizations& regularization, const ModelWeights& model_weights, const TTypes::Matrix example_state_data, ",0,train ebf554ff77bc46bfdd9b424bc44b62f803100b33,tensorflow/tensorflow,"Make adaptive SDCA the default. PiperOrigin-RevId: 188380039",sdca_internal.h,"@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_KERNELS_SDCA_INTERNAL_H_ -#define TENSORFLOW_KERNELS_SDCA_INTERNAL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_SDCA_INTERNAL_H_ +#define TENSORFLOW_CORE_KERNELS_SDCA_INTERNAL_H_ #define EIGEN_USE_THREADS @@ -75,7 +75,7 @@ struct ExampleStatistics { class Regularizations { public: - Regularizations(){}; + Regularizations() {} // Initialize() must be called immediately after construction. Status Initialize(OpKernelConstruction* const context) { @@ -199,7 +199,7 @@ class FeatureWeightsDenseStorage { FeatureWeightsDenseStorage(const TTypes::Matrix nominals, TTypes::Matrix deltas) : nominals_(nominals), deltas_(deltas) { - CHECK(deltas.rank() > 1); + CHECK_GT(deltas.rank(), 1); } // Check if a feature index is with-in the bounds. @@ -322,15 +322,15 @@ class Examples { return examples_.at(example_index); } - int sampled_index(const int id, const bool adaptative) const { - if (adaptative) return sampled_index_[id]; + int sampled_index(const int id, const bool adaptive) const { + if (adaptive) return sampled_index_[id]; return id; } // Adaptive SDCA in the current implementation only works for // binary classification, where the input argument for num_weight_vectors // is 1. - Status SampleAdaptativeProbabilities( + Status SampleAdaptiveProbabilities( const int num_loss_partitions, const Regularizations& regularization, const ModelWeights& model_weights, const TTypes::Matrix example_state_data, @@ -378,7 +378,7 @@ class Examples { // All examples in the batch. std::vector examples_; - // Adaptative sampling variables + // Adaptive sampling variables. std::vector probabilities_; std::vector sampled_index_; std::vector sampled_count_; @@ -391,4 +391,4 @@ class Examples { } // namespace sdca } // namespace tensorflow -#endif // TENSORFLOW_KERNELS_SDCA_INTERNAL_H_ +#endif // TENSORFLOW_CORE_KERNELS_SDCA_INTERNAL_H_ ",0,train ebf554ff77bc46bfdd9b424bc44b62f803100b33,tensorflow/tensorflow,"Make adaptive SDCA the default. PiperOrigin-RevId: 188380039",sdca_ops.cc,"@@ -80,7 +80,7 @@ struct ComputeOptions { context, false, errors::InvalidArgument(""Unsupported loss type: "", loss_type)); } - OP_REQUIRES_OK(context, context->GetAttr(""adaptative"", &adaptative)); + OP_REQUIRES_OK(context, context->GetAttr(""adaptative"", &adaptive)); OP_REQUIRES_OK( context, context->GetAttr(""num_sparse_features"", &num_sparse_features)); OP_REQUIRES_OK(context, context->GetAttr(""num_sparse_features_with_values"", @@ -113,7 +113,7 @@ struct ComputeOptions { int num_dense_features = 0; int num_inner_iterations = 0; int num_loss_partitions = 0; - bool adaptative = false; + bool adaptive = true; Regularizations regularizations; }; @@ -147,9 +147,9 @@ void DoCompute(const ComputeOptions& options, OpKernelContext* const context) { OP_REQUIRES_OK(context, context->set_output(""out_example_state_data"", mutable_example_state_data_t)); - if (options.adaptative) { + if (options.adaptive) { OP_REQUIRES_OK(context, - examples.SampleAdaptativeProbabilities( + examples.SampleAdaptiveProbabilities( options.num_loss_partitions, options.regularizations, model_weights, example_state_data, options.loss_updater, /*num_weight_vectors =*/1)); @@ -163,7 +163,7 @@ void DoCompute(const ComputeOptions& options, OpKernelContext* const context) { // num_examples which is an int. for (int id = static_cast(begin); id < end; ++id) { const int64 example_index = - examples.sampled_index(++atomic_index, options.adaptative); + examples.sampled_index(++atomic_index, options.adaptive); const Example& example = examples.example(example_index); const float dual = example_state_data(example_index, 0); const float example_weight = example.example_weight(); ",0,train 720a3a15764546619a38da655c23ba6e1cd9200d,tensorflow/tensorflow,"Lower ReluGrad to HLO. PiperOrigin-RevId: 273772952",hlo_ops.cc,"@@ -702,5 +702,31 @@ static LogicalResult Verify(TransposeOp op) { return success(); } +//===----------------------------------------------------------------------===// +// CompareOp +//===----------------------------------------------------------------------===// + +void CompareOp::build(Builder* builder, OperationState& result, Value* lhs, + Value* rhs, DenseIntElementsAttr broadcast_dimensions, + StringAttr comparison_direction) { + build(builder, result, + InferOutputTypes(builder, lhs, rhs, broadcast_dimensions, + comparison_direction), + lhs, rhs, broadcast_dimensions, comparison_direction); +} + +Type CompareOp::InferOutputTypes(Builder* builder, Value* lhs, Value* rhs, + DenseIntElementsAttr broadcast_dimensions, + StringAttr comparison_direction) { + if (!lhs->getType().isa() || !rhs->getType().isa()) + return builder->getTensorType(builder->getI1Type()); + // TODO(parkers): When binary ops support broadcasting shape inference, reuse + // that logic. + auto lhs_type = lhs->getType().cast(); + auto rhs_type = rhs->getType().cast(); + if (lhs_type != rhs_type) return builder->getTensorType(builder->getI1Type()); + return builder->getTensorType(lhs_type.getShape(), builder->getI1Type()); +} + #define GET_OP_CLASSES #include ""tensorflow/compiler/mlir/xla/ir/hlo_ops.cc.inc"" ",0,test a8001b9e8db92620603c3c0588d251192d327bae,tensorflow/tensorflow,"Take proto by value. PiperOrigin-RevId: 312626373 Change-Id: I2effeab7b0c97052f14b8f52b653f24a379dc7ee",xla_computation.h,"@@ -29,8 +29,8 @@ namespace xla { class XlaComputation { public: XlaComputation() : unique_id_(-1) {} - XlaComputation(const HloModuleProto& proto) - : unique_id_(proto.id()), proto_(proto) {} + XlaComputation(HloModuleProto proto) + : unique_id_(proto.id()), proto_(std::move(proto)) {} ~XlaComputation() {} ",0,train 56760749a29fbbca270c90811d1bdfc8414c6c7f,tensorflow/tensorflow,Extract the logic into a separate function to write unit tests,hadoop_file_system.cc,"@@ -135,6 +135,24 @@ const LibHDFS* libhdfs() { return libhdfs; } +Status SplitArchiveNameAndPath(StringPiece& path, string& nn) { + size_t index_end_archive_name = path.find("".har""); + if (index_end_archive_name == path.npos) { + return errors::InvalidArgument( + ""Hadoop archive path does not contain a .har extension""); + } + // Case of hadoop archive. Namenode is the path to the archive. + nn = string(""har://"") + string(nn) + + string(path.substr(0, index_end_archive_name + 4)); + // Remove the hadoop archive path to the path + path.remove_prefix(index_end_archive_name + 4); + if (path.empty()) { + // Root of the archive + path = ""/""; + } + return Status::OK(); +} + // We rely on HDFS connection caching here. The HDFS client calls // org.apache.hadoop.fs.FileSystem.get(), which caches the connection // internally. @@ -164,16 +182,7 @@ Status HadoopFileSystem::Connect(StringPiece fname, hdfsFS* fs) { // https://github.com/tensorflow/tensorflow/blob/v1.0.0/third_party/hadoop/hdfs.h#L259 libhdfs()->hdfsBuilderSetNameNode(builder, ""default""); } else if (scheme == ""har"") { - size_t index_end_archive_name = path.find("".har""); - if (index_end_archive_name == path.npos) { - return errors::InvalidArgument( - ""Hadoop archive path does not contain a .har extension""); - } - // Case of hadoop archive. Namenode is the path to the archive. - nn = string(""har://"") + string(nn) + - string(path.substr(0, index_end_archive_name + 4)); - // Remove the hadoop archive path to the path - path.remove_prefix(index_end_archive_name + 4); + SplitArchiveNameAndPath(path, nn); libhdfs()->hdfsBuilderSetNameNode(builder, nn.c_str()); } else { libhdfs()->hdfsBuilderSetNameNode(builder, ",0,train 56760749a29fbbca270c90811d1bdfc8414c6c7f,tensorflow/tensorflow,Extract the logic into a separate function to write unit tests,hadoop_file_system.h,"@@ -70,6 +70,8 @@ class HadoopFileSystem : public FileSystem { Status Connect(StringPiece fname, hdfsFS* fs); }; +Status SplitArchiveNameAndPath(StringPiece& path, string& nn); + } // namespace tensorflow #endif // TENSORFLOW_CORE_PLATFORM_HADOOP_HADOOP_FILE_SYSTEM_H_ ",0,train 56760749a29fbbca270c90811d1bdfc8414c6c7f,tensorflow/tensorflow,Extract the logic into a separate function to write unit tests,hadoop_file_system_test.cc,"@@ -235,6 +235,44 @@ TEST_F(HadoopFileSystemTest, WriteWhileReading) { TF_EXPECT_OK(writer->Close()); } +TEST_F(HadoopFileSystemTest, HarSplit) { + string har_path = + ""har://hdfs-root/user/j.doe/my_archive.har/dir0/dir1/file.txt""; + StringPiece scheme, namenode, path; + io::ParseURI(har_path, &scheme, &namenode, &path); + EXPECT_EQ(""har"", scheme); + EXPECT_EQ(""hdfs-root"", namenode); + EXPECT_EQ(""/user/j.doe/my_archive.har/dir0/dir1/file.txt"", path); + string nn(namenode); + TF_EXPECT_OK(SplitArchiveNameAndPath(path, nn)); + EXPECT_EQ(""har://hdfs-root/user/j.doe/my_archive.har"", nn); + EXPECT_EQ(""/dir0/dir1/file.txt"", path); +} + +TEST_F(HadoopFileSystemTest, NoHarExtension) { + string har_path = ""har://hdfs-root/user/j.doe/my_archive/dir0/dir1/file.txt""; + StringPiece scheme, namenode, path; + io::ParseURI(har_path, &scheme, &namenode, &path); + EXPECT_EQ(""har"", scheme); + EXPECT_EQ(""hdfs-root"", namenode); + EXPECT_EQ(""/user/j.doe/my_archive/dir0/dir1/file.txt"", path); + string nn(namenode); + EXPECT_EQ(errors::InvalidArgument("""").code(), + SplitArchiveNameAndPath(path, nn).code()); +} + +TEST_F(HadoopFileSystemTest, HarRootPath) { + string har_path = ""har://hdfs-root/user/j.doe/my_archive.har""; + StringPiece scheme, namenode, path; + io::ParseURI(har_path, &scheme, &namenode, &path); + EXPECT_EQ(""har"", scheme); + EXPECT_EQ(""hdfs-root"", namenode); + EXPECT_EQ(""/user/j.doe/my_archive.har"", path); + string nn(namenode); + TF_EXPECT_OK(SplitArchiveNameAndPath(path, nn)); + EXPECT_EQ(""har://hdfs-root/user/j.doe/my_archive.har"", nn); + EXPECT_EQ(""/"", path); +} // NewAppendableFile() is not testable. Local filesystem maps to // ChecksumFileSystem in Hadoop, where appending is an unsupported operation. ",0,train a4fb4cb6d4440212fbd2e694bbe4d16f02708384,tensorflow/tensorflow,"Legalize xla_hlo.reshape to tf.Reshape PiperOrigin-RevId: 313630645 Change-Id: Ie8d2c1f0963c6b4a61e593bb95449ef7bf8915ff",legalize_hlo.cc,"@@ -24,6 +24,7 @@ limitations under the License. #include ""mlir/IR/MLIRContext.h"" // from @llvm-project #include ""mlir/IR/Operation.h"" // from @llvm-project #include ""mlir/IR/PatternMatch.h"" // from @llvm-project +#include ""mlir/IR/StandardTypes.h"" // from @llvm-project #include ""mlir/Pass/Pass.h"" // from @llvm-project #include ""mlir/Support/LLVM.h"" // from @llvm-project #include ""mlir/Support/LogicalResult.h"" // from @llvm-project @@ -94,6 +95,15 @@ static bool AreBroadcastCompatible(Value x, Value y) { y_ranked.getShape(), resultShape); } +// Returns the shape of the given value in a Constant Op. +ConstantOp ShapeToConst(PatternRewriter &rewriter, Value value) { + ArrayRef shape = value.getType().cast().getShape(); + auto attr_type = RankedTensorType::get({static_cast(shape.size())}, + rewriter.getIntegerType(64)); + auto attr = DenseElementsAttr::get(attr_type, shape); + return rewriter.create(value.getLoc(), attr_type, attr); +} + #include ""tensorflow/compiler/mlir/tensorflow/transforms/generated_legalize_hlo.inc"" /// Performs the lowering to XLA dialect. @@ -107,7 +117,7 @@ void LegalizeHloToTf::runOnFunction() { ConversionTarget target(context); target.addLegalDialect(); - target.addLegalOp(); + target.addLegalOp(); if (failed(applyPartialConversion(getFunction(), target, patterns))) signalPassFailure(); } ",0,train f38b4a412fdd7002368cf3d6dd5471239526c310,tensorflow/tensorflow,"Minor update of rnn comments, as pointed out in: https://github.com/tensorflow/tensorflow/issues/4197 Change: 132656666",rnn.py,"@@ -489,7 +489,7 @@ def bidirectional_rnn(cell_fw, cell_bw, inputs, [batch_size, input_size], or a nested tuple of such elements. initial_state_fw: (optional) An initial state for the forward RNN. This must be a tensor of appropriate type and shape - `[batch_size x cell_fw.state_size]`. + `[batch_size, cell_fw.state_size]`. If `cell_fw.state_size` is a tuple, this should be a tuple of tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. initial_state_bw: (optional) Same as for `initial_state_fw`, but using @@ -574,7 +574,7 @@ def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, containing the actual lengths for each of the sequences. initial_state_fw: (optional) An initial state for the forward RNN. This must be a tensor of appropriate type and shape - `[batch_size x cell_fw.state_size]`. + `[batch_size, cell_fw.state_size]`. If `cell_fw.state_size` is a tuple, this should be a tuple of tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. initial_state_bw: (optional) Same as for `initial_state_fw`, but using @@ -717,7 +717,7 @@ def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None, sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. initial_state: (optional) An initial state for the RNN. If `cell.state_size` is an integer, this must be - a `Tensor` of appropriate type and shape `[batch_size x cell.state_size]`. + a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. If `cell.state_size` is a tuple, this should be a tuple of tensors having shapes `[batch_size, s] for s in cell.state_size`. dtype: (optional) The data type for the initial state and expected output. ",0,test 46afa1f0e8a8b269054025aefe9a7d42290f8e8d,tensorflow/tensorflow,"Amend cluster resolver error to suggest oauth2client as a possible issue. PiperOrigin-RevId: 198894470",tpu_cluster_resolver.py,"@@ -170,10 +170,11 @@ class TPUClusterResolver(ClusterResolver): if service is None and should_resolve: if not _GOOGLE_API_CLIENT_INSTALLED: - raise ImportError('googleapiclient must be installed before using the ' - 'TPU cluster resolver. Execute: `pip install ' - '--upgrade google-api-python-client` to install with ' - 'pip.') + raise ImportError('googleapiclient and oauth2client must be installed ' + 'before using the TPU cluster resolver. Execute: ' + '`pip install --upgrade google-api-python-client` ' + 'and `pip install --upgrade oauth2lclient` to ' + 'install with pip.') final_discovery_url = self._discoveryUrl() or discovery_url if final_discovery_url: ",0,train 1219f682f7faa3619b58f41cc3f479445588cf24,tensorflow/tensorflow,"Enable OpenCL 2.0 or 3.0 compilation when the device supports it. By default OpenCL programs are compiled as 1.x only. PiperOrigin-RevId: 327300390 Change-Id: I7e31c3c0253bc9175f156614a47f5ef8dddf2147",cl_program.cc,"@@ -95,6 +95,8 @@ std::string CompilerOptionToString(const CLDevice& device, return ""-cl-opt-disable""; case CompilerOptions::CL_2_0: return ""-cl-std=CL2.0""; + case CompilerOptions::CL_3_0: + return ""-cl-std=CL3.0""; } } ",0,train 1219f682f7faa3619b58f41cc3f479445588cf24,tensorflow/tensorflow,"Enable OpenCL 2.0 or 3.0 compilation when the device supports it. By default OpenCL programs are compiled as 1.x only. PiperOrigin-RevId: 327300390 Change-Id: I7e31c3c0253bc9175f156614a47f5ef8dddf2147",cl_program.h,"@@ -41,7 +41,8 @@ enum class CompilerOptions { ADRENO_MORE_WAVES, POWERVR_FP16, CL_OPT_DISABLE, - CL_2_0 + CL_2_0, + CL_3_0, }; std::string CompilerOptionsToString( ",0,train 1219f682f7faa3619b58f41cc3f479445588cf24,tensorflow/tensorflow,"Enable OpenCL 2.0 or 3.0 compilation when the device supports it. By default OpenCL programs are compiled as 1.x only. PiperOrigin-RevId: 327300390 Change-Id: I7e31c3c0253bc9175f156614a47f5ef8dddf2147",mean_stddev_normalization.cc,"@@ -17,6 +17,8 @@ limitations under the License. #include +#include ""tensorflow/lite/delegates/gpu/cl/cl_program.h"" +#include ""tensorflow/lite/delegates/gpu/cl/device_info.h"" #include ""tensorflow/lite/delegates/gpu/cl/kernels/util.h"" #include ""tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h"" #include ""tensorflow/lite/delegates/gpu/cl/precision.h"" @@ -64,7 +66,8 @@ static inline float local_reduce(float input, __local float* tmp) { } } // namespace -MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition) +MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition, + const DeviceInfo& device_info) : GPUOperation(definition) { // The kernel code does not inherently need a fixed size, but in order to not // hardcode the __local array's size for the reductions, we would need to pass @@ -74,6 +77,11 @@ MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition) work_group_size_.y = 1; // Required work_group_size_.z = 1; // Required code_ = GetNormalizationCode(); + if (device_info.cl_version >= OpenCLVersion::CL_3_0) { + compiler_options_.push_back(CompilerOptions::CL_3_0); + } else if (device_info.cl_version >= OpenCLVersion::CL_2_0) { + compiler_options_.push_back(CompilerOptions::CL_2_0); + } } std::string MeanStdDevNormalization::GetNormalizationCode() { @@ -145,8 +153,8 @@ int3 MeanStdDevNormalization::GetGridSize() const { } MeanStdDevNormalization CreateMeanStdDevNormalization( - const OperationDef& definition) { - return MeanStdDevNormalization(definition); + const OperationDef& definition, const DeviceInfo& device_info) { + return MeanStdDevNormalization(definition, device_info); } } // namespace cl ",0,train 1219f682f7faa3619b58f41cc3f479445588cf24,tensorflow/tensorflow,"Enable OpenCL 2.0 or 3.0 compilation when the device supports it. By default OpenCL programs are compiled as 1.x only. PiperOrigin-RevId: 327300390 Change-Id: I7e31c3c0253bc9175f156614a47f5ef8dddf2147",mean_stddev_normalization.h,"@@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_ +#include ""tensorflow/lite/delegates/gpu/cl/device_info.h"" #include ""tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"" #include ""tensorflow/lite/delegates/gpu/common/operations.h"" #include ""tensorflow/lite/delegates/gpu/common/status.h"" @@ -28,7 +29,8 @@ namespace cl { // Implements tensor_utils::MeanStddevNormalization class MeanStdDevNormalization : public GPUOperation { public: - explicit MeanStdDevNormalization(const OperationDef& definition); + explicit MeanStdDevNormalization(const OperationDef& definition, + const DeviceInfo& device_info); void GetPossibleKernelWorkGroups( TuningType tuning_type, const DeviceInfo& device_info, @@ -50,7 +52,7 @@ class MeanStdDevNormalization : public GPUOperation { }; MeanStdDevNormalization CreateMeanStdDevNormalization( - const OperationDef& definition); + const OperationDef& definition, const DeviceInfo& device_info); } // namespace cl } // namespace gpu ",0,train 1219f682f7faa3619b58f41cc3f479445588cf24,tensorflow/tensorflow,"Enable OpenCL 2.0 or 3.0 compilation when the device supports it. By default OpenCL programs are compiled as 1.x only. PiperOrigin-RevId: 327300390 Change-Id: I7e31c3c0253bc9175f156614a47f5ef8dddf2147",operation_selector.cc,"@@ -262,7 +262,8 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, return SelectMean(attr, op_def, creation_context.device->info_, gpu_op); } case OperationType::MEAN_STDDEV_NORMALIZATION: { - MeanStdDevNormalization operation = CreateMeanStdDevNormalization(op_def); + MeanStdDevNormalization operation = + CreateMeanStdDevNormalization(op_def, creation_context.device->info_); *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); ",0,train bd24a27f305badb89d68014ee1cfedd460b04536,tensorflow/tensorflow,Final Changes 2,compat.py,"@@ -112,8 +112,10 @@ def as_str_any(value): @tf_export('compat.path_to_str') def path_to_str(path): """"""Returns the file system path representation of a `PathLike` object, else as it is. + Args: path: An object that can be converted to path representation. + Returns: A `str` object. ",0,train 830cde8776d9adb6bdbb2e0b3173d16780d52df7,tensorflow/tensorflow,"Eliminate crashy Concat()/Split() overloads. Change: 150143909",tensor_util.cc,"@@ -42,12 +42,6 @@ Tensor DeepCopy(const Tensor& other) { return tmp; } -Tensor Concat(const gtl::ArraySlice& tensors) { - Tensor result; - TF_CHECK_OK(Concat(tensors, &result)); - return result; -} - Status Concat(const gtl::ArraySlice& tensors, Tensor* result) { if (tensors.empty()) { return errors::InvalidArgument(""Cannot concatenate zero tensors""); @@ -109,13 +103,6 @@ Status Concat(const gtl::ArraySlice& tensors, Tensor* result) { return Status::OK(); } -std::vector Split(const Tensor& tensor, - const gtl::ArraySlice& sizes) { - std::vector result; - TF_CHECK_OK(Split(tensor, sizes, &result)); - return result; -} - Status Split(const Tensor& tensor, const gtl::ArraySlice& sizes, std::vector* result) { if (tensor.dims() == 0) { ",0,train 830cde8776d9adb6bdbb2e0b3173d16780d52df7,tensorflow/tensorflow,"Eliminate crashy Concat()/Split() overloads. Change: 150143909",tensor_util.h,"@@ -41,10 +41,6 @@ Tensor DeepCopy(const Tensor& other); Status Concat(const gtl::ArraySlice& tensors, Tensor* result) TF_MUST_USE_RESULT; -// Version of Concat() that crashes upon hitting an error. -// DEPRECATED. DO NOT USE. -Tensor Concat(const gtl::ArraySlice& tensors); - // Splits 'tensor' into 'sizes.size()' individual tensors, along the 0th // dimension. The ith output tensor has 0th-dimension size 'sizes[i]'. // @@ -58,11 +54,6 @@ Tensor Concat(const gtl::ArraySlice& tensors); Status Split(const Tensor& tensor, const gtl::ArraySlice& sizes, std::vector* result) TF_MUST_USE_RESULT; -// Version of Split() that crashes upon hitting an error. -// DEPRECATED. DO NOT USE. -std::vector Split(const Tensor& tensor, - const gtl::ArraySlice& sizes); - } // namespace tensor } // namespace tensorflow ",0,train 830cde8776d9adb6bdbb2e0b3173d16780d52df7,tensorflow/tensorflow,"Eliminate crashy Concat()/Split() overloads. Change: 150143909",tensor_util_test.cc,"@@ -208,7 +208,10 @@ TEST(TensorUtil, ConcatSplitStrings) { x.flat()(i) = strings::StrCat(""foo_"", i); } - Tensor x_round_tripped = tensor::Concat(tensor::Split(x, {2, 1, 1})); + std::vector split; + TF_ASSERT_OK(tensor::Split(x, {2, 1, 1}, &split)); + Tensor x_round_tripped; + TF_ASSERT_OK(tensor::Concat(split, &x_round_tripped)); ASSERT_EQ(x.shape(), x_round_tripped.shape()); for (int i = 0; i < 4 * 3; ++i) { EXPECT_EQ(x.flat()(i), x_round_tripped.flat()(i)); ",0,train e30d648f8782d595056ef7cf251d30fcf34aef7c,tensorflow/tensorflow,"Adding regexp target to tensorflow/core/platform/BUILD. PiperOrigin-RevId: 268105122",regexp.h,"@@ -21,7 +21,7 @@ limitations under the License. #include ""tensorflow/core/platform/types.h"" #if defined(PLATFORM_GOOGLE) || defined(PLATFORM_GOOGLE_ANDROID) -#include ""tensorflow/core/platform/google/build_config/re2.h"" +#include ""third_party/re2/re2.h"" #else #include ""re2/re2.h"" #endif ",0,train cc967a1f37270d17b624214f3a23629e889054e3,tensorflow/tensorflow,"Expose 'CreateNNAPIDelegate(StatefulNnApiDelegate::Options)' unconditionally, rather than only on Android. On unsupported platforms, it will return nullptr. PiperOrigin-RevId: 333654569 Change-Id: I2dc6f63f7a25c951f6f310acd8c1b52657be7267",utils.cc,"@@ -105,14 +105,16 @@ TfLiteDelegatePtr CreateNNAPIDelegate() { #endif // defined(__ANDROID__) } -#if defined(__ANDROID__) TfLiteDelegatePtr CreateNNAPIDelegate(StatefulNnApiDelegate::Options options) { +#if defined(__ANDROID__) return TfLiteDelegatePtr( new StatefulNnApiDelegate(options), [](TfLiteDelegate* delegate) { delete reinterpret_cast(delegate); }); -} +#else + return CreateNullDelegate(); #endif // defined(__ANDROID__) +} #if TFLITE_SUPPORTS_GPU_DELEGATE TfLiteDelegatePtr CreateGPUDelegate(TfLiteGpuDelegateOptionsV2* options) { ",0,train cc967a1f37270d17b624214f3a23629e889054e3,tensorflow/tensorflow,"Expose 'CreateNNAPIDelegate(StatefulNnApiDelegate::Options)' unconditionally, rather than only on Android. On unsupported platforms, it will return nullptr. PiperOrigin-RevId: 333654569 Change-Id: I2dc6f63f7a25c951f6f310acd8c1b52657be7267",utils.h,"@@ -29,12 +29,10 @@ limitations under the License. #include ""tensorflow/lite/delegates/gpu/delegate.h"" #endif -#if defined(__ANDROID__) #include ""tensorflow/lite/delegates/nnapi/nnapi_delegate.h"" -#if (defined(__arm__) || defined(__aarch64__)) +#if defined(__ANDROID__) && (defined(__arm__) || defined(__aarch64__)) #include ""tensorflow/lite/delegates/hexagon/hexagon_delegate.h"" #endif -#endif // TODO(b/149248802): include XNNPACK delegate when the issue is resolved. #if !defined(__Fuchsia__) || defined(TFLITE_WITHOUT_XNNPACK) @@ -46,8 +44,8 @@ limitations under the License. namespace tflite { namespace evaluation { -// Same w/ Interpreter::TfLiteDelegatePtr to avoid pulling -// tensorflow/lite/interpreter.h dependency +// Same as Interpreter::TfLiteDelegatePtr, defined here to avoid pulling +// in tensorflow/lite/interpreter.h dependency. using TfLiteDelegatePtr = std::unique_ptr; @@ -68,10 +66,9 @@ inline TfLiteStatus GetSortedFileNames(const std::string& directory, std::unordered_set()); } +// Returns nullptr on error, e.g. if NNAPI isn't supported on this platform. TfLiteDelegatePtr CreateNNAPIDelegate(); -#if defined(__ANDROID__) TfLiteDelegatePtr CreateNNAPIDelegate(StatefulNnApiDelegate::Options options); -#endif TfLiteDelegatePtr CreateGPUDelegate(); #if TFLITE_SUPPORTS_GPU_DELEGATE ",0,train bf1d138211f2ca1a23923ba44b2a234417e46adb,tensorflow/tensorflow,"Remove TF_CPP_VMODULE from NCCL tests since it has no effect when set after program start. PiperOrigin-RevId: 247040886",collective_nccl_reducer_test.cc,"@@ -95,7 +95,6 @@ class NcclReducerTest : public ::testing::Test { void Init(int num_ranks) { setenv(""NCCL_DEBUG"", ""INFO"", 1 /* replace */); setenv(""NCCL_LAUNCH_MODE"", ""PARALLEL"", 1 /* replace */); - setenv(""TF_CPP_VMODULE"", ""nccl_manager=2"", 1 /* replace */); InitGPUDevices(); std::vector> local_devices; std::vector device_names; ",0,train bf1d138211f2ca1a23923ba44b2a234417e46adb,tensorflow/tensorflow,"Remove TF_CPP_VMODULE from NCCL tests since it has no effect when set after program start. PiperOrigin-RevId: 247040886",nccl_manager_test.cc,"@@ -66,7 +66,6 @@ class NcclManagerTest : public ::testing::Test { static void SetUpTestCase() { setenv(""NCCL_DEBUG"", ""INFO"", 1 /* replace */); setenv(""NCCL_LAUNCH_MODE"", ""PARALLEL"", 1 /* replace */); - setenv(""TF_CPP_VMODULE"", ""nccl_manager=2"", 1 /* replace */); devices_ = new std::vector>(GetGPUDevices()); LOG(INFO) << ""Running test with "" << devices_->size() << "" gpus""; } ",0,train 1b9f56058daeb5d95f853969c9d9e0b0b4d349c7,tensorflow/tensorflow,"Refactor the FillRandom code and seperate integer and float This is majorly for code health and avoid misusing the integer code path for float PiperOrigin-RevId: 418045468 Change-Id: Ib781c503c447d3e26390d974da0442e849a41d9c",test_util.cc,"@@ -103,7 +103,7 @@ float ExponentialRandomPositiveFloat(float percentile, float percentile_val, return val; } -void FillRandom(std::vector* vec, float min, float max) { +void FillRandomFloat(std::vector* vec, float min, float max) { std::uniform_real_distribution dist(min, max); // TODO(b/154540105): use std::ref to avoid copying the random engine. auto gen = std::bind(dist, RandomEngine()); ",0,test 1b9f56058daeb5d95f853969c9d9e0b0b4d349c7,tensorflow/tensorflow,"Refactor the FillRandom code and seperate integer and float This is majorly for code health and avoid misusing the integer code path for float PiperOrigin-RevId: 418045468 Change-Id: Ib781c503c447d3e26390d974da0442e849a41d9c",test_util.h,"@@ -57,7 +57,7 @@ float ExponentialRandomPositiveFloat(float percentile, float percentile_val, float max_val); // Fills a vector with random floats between |min| and |max|. -void FillRandom(std::vector* vec, float min, float max); +void FillRandomFloat(std::vector* vec, float min, float max); template void FillRandom(typename std::vector::iterator begin_it, @@ -74,7 +74,13 @@ void FillRandom(typename std::vector::iterator begin_it, // Fills a vector with random numbers between |min| and |max|. template void FillRandom(std::vector* vec, T min, T max) { - return FillRandom(std::begin(*vec), std::end(*vec), min, max); + FillRandom(std::begin(*vec), std::end(*vec), min, max); +} + +// Template specialization for float. +template <> +inline void FillRandom(std::vector* vec, float min, float max) { + FillRandomFloat(vec, min, max); } // Fills a vector with random numbers. ",0,test 5947bb78e7728a2b2f80edc4a1ed9a774bbb2274,tensorflow/tensorflow,"Demonstrate variables updates in tf.function PiperOrigin-RevId: 275301015 Change-Id: I2b6b96f706f7d8fdcc35129581c5df5b1f35c2da",def_function_test.py,"@@ -539,7 +539,6 @@ class DefFunctionTest(test.TestCase, parameterized.TestCase): self.assertAllClose([13., 14.], add_var(constant_op.constant(2.))) def testSameVariableTwice(self): - v = variables.Variable(1.0) @def_function.function @@ -548,6 +547,29 @@ class DefFunctionTest(test.TestCase, parameterized.TestCase): self.assertAllEqual(add(v, v), 2.0) + def testVariableUpdate(self): + v1 = variables.Variable(1.0) + v2 = variables.Variable(2.0) + v3 = variables.Variable(4, dtype=dtypes.int32) + + trace_count = [0] + + @def_function.function + def double_variable(x): + trace_count[0] += 1 + x.assign_add(x.read_value()) + + self.assertEqual(trace_count[0], 0) + double_variable(v1) + self.assertEqual(trace_count[0], 1) + self.assertEqual(self.evaluate(v1), 2.0) + double_variable(v2) + self.assertEqual(trace_count[0], 1 if ops.Tensor._USE_EQUALITY else 2) + self.assertEqual(self.evaluate(v2), 4.0) + double_variable(v3) + self.assertEqual(trace_count[0], 2 if ops.Tensor._USE_EQUALITY else 3) + self.assertEqual(self.evaluate(v3), 8) + def testShapeCache(self): @def_function.function def func(x): ",0,train 334aa8f8f38cc31cd8c934471fd9d45a390b5f3d,tensorflow/tensorflow,"Automated g4 rollback of changelist 181260801 PiperOrigin-RevId: 181548597",xla_launch_op.cc,"@@ -257,8 +257,10 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { const XlaCompiler::CompilationResult* kernel; xla::LocalExecutable* executable; + OP_REQUIRES_OK(ctx, cache->Compile(options, function_, num_constant_args_, - variables, ctx, &kernel, &executable)); + variables, ctx, &kernel, &executable, + /*compile_options=*/nullptr)); VLOG(1) << ""Executing XLA Computation...""; ",0,test 334aa8f8f38cc31cd8c934471fd9d45a390b5f3d,tensorflow/tensorflow,"Automated g4 rollback of changelist 181260801 PiperOrigin-RevId: 181548597",xla_compilation_cache.cc,"@@ -238,7 +238,8 @@ Status XlaCompilationCache::Compile( int num_constant_args, const std::vector& variable_args, OpKernelContext* ctx, const XlaCompiler::CompilationResult** compilation_result, - xla::LocalExecutable** executable) { + xla::LocalExecutable** executable, + const XlaCompiler::CompileOptions* compile_options) { VLOG(1) << ""XlaCompilationCache::Compile "" << DebugString(); if (VLOG_IS_ON(2)) { @@ -297,9 +298,9 @@ Status XlaCompilationCache::Compile( XlaCompiler compiler(options); entry->compiled = true; - entry->compilation_status = - compiler.CompileFunction(XlaCompiler::CompileOptions(), function, args, - &entry->compilation_result); + entry->compilation_status = compiler.CompileFunction( + compile_options ? *compile_options : XlaCompiler::CompileOptions(), + function, args, &entry->compilation_result); } *compilation_result = &entry->compilation_result; if (entry->compilation_status.ok() && executable) { ",0,test 334aa8f8f38cc31cd8c934471fd9d45a390b5f3d,tensorflow/tensorflow,"Automated g4 rollback of changelist 181260801 PiperOrigin-RevId: 181548597",xla_compilation_cache.h,"@@ -66,7 +66,8 @@ class XlaCompilationCache : public ResourceBase { const std::vector& variable_args, OpKernelContext* ctx, const XlaCompiler::CompilationResult** compilation_result, - xla::LocalExecutable** executable); + xla::LocalExecutable** executable, + const XlaCompiler::CompileOptions* compile_options); xla::LocalClient* client() const { return client_; } const DeviceType& device_type() const { return device_type_; } ",0,test 334aa8f8f38cc31cd8c934471fd9d45a390b5f3d,tensorflow/tensorflow,"Automated g4 rollback of changelist 181260801 PiperOrigin-RevId: 181548597",while_op.cc,"@@ -201,10 +201,16 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) { OP_REQUIRES_OK(ctx, compiler->CompileFunction(cond_options, cond_name_attr_, arguments, &cond)); - xla::Shape body_input_shape = - xla::ShapeUtil::MakeTupleShape(body.xla_input_shapes); - xla::Shape cond_input_shape = - xla::ShapeUtil::MakeTupleShape(cond.xla_input_shapes); + OP_REQUIRES(ctx, body.xla_input_shapes.size() == 1, + errors::FailedPrecondition(""Expected one input shape"")); + xla::Shape body_input_shape = body.xla_input_shapes[0]; + OP_REQUIRES(ctx, xla::ShapeUtil::IsTuple(body_input_shape), + errors::FailedPrecondition(""Expected tuple shape"")); + OP_REQUIRES(ctx, cond.xla_input_shapes.size() == 1, + errors::FailedPrecondition(""Expected one input shape"")); + xla::Shape cond_input_shape = cond.xla_input_shapes[0]; + OP_REQUIRES(ctx, xla::ShapeUtil::IsTuple(cond_input_shape), + errors::FailedPrecondition(""Expected tuple shape"")); VLOG(2) << ""Body shape: "" << xla::ShapeUtil::HumanString(body_input_shape) << "" -> "" << xla::ShapeUtil::HumanString(body.xla_output_shape); ",0,test 334aa8f8f38cc31cd8c934471fd9d45a390b5f3d,tensorflow/tensorflow,"Automated g4 rollback of changelist 181260801 PiperOrigin-RevId: 181548597",xla_compiler.cc,"@@ -316,15 +316,22 @@ Status BuildArguments(const Graph& graph, return Status::OK(); } - input_shapes->resize(parameters.size()); + std::vector arg_shapes; + arg_shapes.reserve(parameters.size()); input_mapping->resize(parameters.size()); for (std::vector::size_type i = 0; i < parameters.size(); ++i) { const XlaCompiler::Argument& arg = args[parameters[i]]; // Computes the shapes of non-constant arguments. - (*input_shapes)[i] = arg.shape; + arg_shapes.push_back(arg.shape); (*input_mapping)[i] = parameters[i]; } + if (use_tuple_arg) { + input_shapes->push_back(xla::ShapeUtil::MakeTupleShape(arg_shapes)); + } else { + *input_shapes = arg_shapes; + } + // Use the _Arg nodes in the graph to resolve core assignments. for (const Node* n : graph.nodes()) { if (StringPiece(n->type_string()) != ""_Arg"") continue; @@ -348,9 +355,19 @@ Status BuildArguments(const Graph& graph, // Build parameter handles for non-constant arguments. std::vector arg_handles(parameters.size()); if (use_tuple_arg) { - xla::Shape tuple_shape = xla::ShapeUtil::MakeTupleShape(*input_shapes); + xla::OpSharding tuple_sharding; + tuple_sharding.set_type(xla::OpSharding::Type::OpSharding_Type_TUPLE); + for (int64 parameter : parameters) { + const int core = (*arg_cores)[parameter]; + const int root_device = 0; + *tuple_sharding.add_tuple_shardings() = + core == -1 ? xla::sharding_builder::AssignDevice(root_device) + : xla::sharding_builder::AssignDevice(core); + } + xla::ScopedShardingAssignment assign_tuple_sharding(builder, + tuple_sharding); xla::ComputationDataHandle tuple = - builder->Parameter(0, tuple_shape, ""arg_tuple""); + builder->Parameter(0, (*input_shapes)[0], ""arg_tuple""); for (std::vector::size_type i = 0; i < parameters.size(); ++i) { const int core = (*arg_cores)[parameters[i]]; xla::ScopedShardingAssignment assign_sharding( @@ -374,7 +391,7 @@ Status BuildArguments(const Graph& graph, for (std::vector::size_type i = 0; i < parameters.size(); ++i) { const XlaCompiler::Argument& arg = args[parameters[i]]; VLOG(2) << "" XLA arg "" << i - << "" shape: "" << xla::ShapeUtil::HumanString((*input_shapes)[i]) + << "" shape: "" << xla::ShapeUtil::HumanString(arg_shapes[i]) << "" name: "" << arg.name << "" TF arg "" << parameters[i]; XlaExpression& arg_expression = (*arg_expressions)[parameters[i]]; switch (arg.kind) { ",0,test 90a754c965005fe33dfde1352267be92fea4c095,tensorflow/tensorflow,"TFLM: nit catch memory allocation failures PiperOrigin-RevId: 283699825 Change-Id: I0434f161c1ac6d5578e7a35f26f7a8344cc3cf6f",micro_allocator.cc,"@@ -89,6 +89,11 @@ MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model, reinterpret_cast(memory_allocator_.AllocateFromTail( sizeof(TfLiteTensor) * context_->tensors_size, alignof(TfLiteTensor))); + if (context_->tensors == nullptr) { + error_reporter_->Report( + ""Failed to allocate memory for context->tensors, %d bytes required"", + sizeof(TfLiteTensor) * context_->tensors_size); + } // Null all inputs so we can later perform a null check to avoid re-allocating // registered pre-allocated inputs. @@ -230,6 +235,12 @@ TfLiteStatus MicroAllocator::FinishTensorAllocation() { TensorInfo* tensor_info = reinterpret_cast(tmp_allocator.AllocateFromTail( sizeof(TensorInfo) * tensors_size, alignof(TensorInfo))); + if (tensor_info == nullptr) { + error_reporter_->Report( + ""Failed to allocate memory for tensor_info, %d bytes required"", + sizeof(TfLiteTensor) * context_->tensors_size); + return kTfLiteError; + } // Set up the runtime data structures for all tensors. for (size_t i = 0; i < tensors_size; ++i) { ",0,train 39e5d1099a343212ee2a5ac2cbc0bff31d70e739,tensorflow/tensorflow,"Fix dtype handling in tf.contrib.metrics.confusion_matrix() Change: 133403718",confusion_matrix_ops_test.py,"@@ -27,10 +27,12 @@ class ConfusionMatrixTest(tf.test.TestCase): def _testConfMatrix(self, predictions, labels, truth, weights=None): with self.test_session(): + dtype = predictions.dtype ans = tf.contrib.metrics.confusion_matrix( - predictions, labels, weights=weights) + predictions, labels, dtype=dtype, weights=weights) tf_ans = ans.eval() self.assertAllClose(tf_ans, truth, atol=1e-10) + self.assertEqual(tf_ans.dtype, dtype) def _testBasic(self, dtype): predictions = np.arange(5, dtype=dtype) @@ -44,10 +46,7 @@ class ConfusionMatrixTest(tf.test.TestCase): [0, 0, 0, 0, 1]], dtype=dtype) - self._testConfMatrix( - predictions=predictions, - labels=labels, - truth=truth) + self._testConfMatrix(predictions=predictions, labels=labels, truth=truth) def testInt32Basic(self): self._testBasic(dtype=np.int32) @@ -55,6 +54,41 @@ class ConfusionMatrixTest(tf.test.TestCase): def testInt64Basic(self): self._testBasic(dtype=np.int64) +def _testConfMatrixOnTensors(self, tf_dtype, np_dtype): + with self.test_session() as sess: + m_neg = tf.placeholder(dtype=tf.float32) + m_pos = tf.placeholder(dtype=tf.float32) + s = tf.placeholder(dtype=tf.float32) + + neg = tf.random_normal([20], mean=m_neg, stddev=s, dtype=tf.float32) + pos = tf.random_normal([20], mean=m_pos, stddev=s, dtype=tf.float32) + + data = tf.concat(0, [neg, pos]) + data = tf.cast(tf.round(data), tf_dtype) + data = tf.minimum(tf.maximum(data, 0), 1) + lab = tf.concat(0, [tf.zeros([20], dtype=tf_dtype), + tf.ones([20], dtype=tf_dtype)]) + + cm = tf.contrib.metrics.confusion_matrix( + data, lab, dtype=tf_dtype, num_classes=2) + + d, l, cm_out = sess.run([data, lab, cm], {m_neg: 0.0, + m_pos: 1.0, + s: 1.0}) + + truth = np.zeros([2, 2], dtype=np_dtype) + for i in xrange(len(d)): + truth[d[i], l[i]] = truth[d[i], l[i]] + 1 + + self.assertEqual(cm_out.dtype, np_dtype) + self.assertAllClose(cm_out, truth, atol=1e-10) + + def _testOnTensors_int32(self): + self._testConfMatrixOnTensors(tf.int32, np.int32) + + def testOnTensors_int64(self): + self._testConfMatrixOnTensors(tf.int64, np.int64) + def _testDiffentLabelsInPredictionAndTarget(self, dtype): predictions = np.asarray([1, 2, 3], dtype=dtype) labels = np.asarray([4, 5, 6], dtype=dtype) @@ -69,10 +103,7 @@ class ConfusionMatrixTest(tf.test.TestCase): [0, 0, 0, 0, 0, 0, 0]], dtype=dtype) - self._testConfMatrix( - predictions=predictions, - labels=labels, - truth=truth) + self._testConfMatrix(predictions=predictions, labels=labels, truth=truth) def testInt32DifferentLabels(self, dtype=np.int32): self._testDiffentLabelsInPredictionAndTarget(dtype) @@ -94,10 +125,7 @@ class ConfusionMatrixTest(tf.test.TestCase): [0, 1, 0, 0, 0, 0, 0]], dtype=dtype) - self._testConfMatrix( - predictions=predictions, - labels=labels, - truth=truth) + self._testConfMatrix(predictions=predictions, labels=labels, truth=truth) def testInt32MultipleLabels(self, dtype=np.int32): self._testMultipleLabels(dtype) @@ -119,30 +147,27 @@ class ConfusionMatrixTest(tf.test.TestCase): dtype=np.int32) self._testConfMatrix( - predictions=predictions, - labels=labels, - weights=weights, - truth=truth) + predictions=predictions, labels=labels, weights=weights, truth=truth) def testInvalidRank(self): predictions = np.asarray([[1, 2, 3]]) labels = np.asarray([1, 2, 3]) - self.assertRaisesRegexp( - ValueError, ""an not squeeze dim"", - tf.contrib.metrics.confusion_matrix, predictions, labels) + self.assertRaisesRegexp(ValueError, ""an not squeeze dim"", + tf.contrib.metrics.confusion_matrix, predictions, + labels) predictions = np.asarray([1, 2, 3]) labels = np.asarray([[1, 2, 3]]) - self.assertRaisesRegexp( - ValueError, ""an not squeeze dim"", - tf.contrib.metrics.confusion_matrix, predictions, labels) + self.assertRaisesRegexp(ValueError, ""an not squeeze dim"", + tf.contrib.metrics.confusion_matrix, predictions, + labels) def testInputDifferentSize(self): predictions = np.asarray([1, 2, 3]) labels = np.asarray([1, 2]) - self.assertRaisesRegexp( - ValueError, ""must be equal"", - tf.contrib.metrics.confusion_matrix, predictions, labels) + self.assertRaisesRegexp(ValueError, ""must be equal"", + tf.contrib.metrics.confusion_matrix, predictions, + labels) def testOutputIsInt32(self): predictions = np.arange(2) ",0,train 39e5d1099a343212ee2a5ac2cbc0bff31d70e739,tensorflow/tensorflow,"Fix dtype handling in tf.contrib.metrics.confusion_matrix() Change: 133403718",confusion_matrix_ops.py,"@@ -75,8 +75,10 @@ def confusion_matrix(predictions, labels, num_classes=None, dtype=dtypes.int32, [predictions, labels, num_classes]) as name: predictions, labels = metric_ops_util.remove_squeezable_dimensions( ops.convert_to_tensor( - predictions, name='predictions', dtype=dtypes.int64), - ops.convert_to_tensor(labels, name='labels', dtype=dtypes.int64)) + predictions, name='predictions'), + ops.convert_to_tensor(labels, name='labels')) + predictions = math_ops.cast(predictions, dtypes.int64) + labels = math_ops.cast(labels, dtypes.int64) if num_classes is None: num_classes = math_ops.maximum(math_ops.reduce_max(predictions), @@ -91,7 +93,7 @@ def confusion_matrix(predictions, labels, num_classes=None, dtype=dtypes.int32, values = (array_ops.ones_like(predictions, dtype) if weights is None else weights) cm_sparse = ops.SparseTensor( - indices=indices, values=values, shape=shape) + indices=indices, values=values, shape=math_ops.to_int64(shape)) zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtype) return sparse_ops.sparse_add(zero_matrix, cm_sparse) ",0,train 5c4055b80bb5d86f5a1c2d0d843d9501817ef369,tensorflow/tensorflow,"[XLA] Bound compilation time spent in the region-based live-range interference analysis in the copy insertion pass to be at most linear to the size of the input HLO module. PiperOrigin-RevId: 396740128 Change-Id: I2e9ff7172c1fb6039af6eb95068c01f53ec7be6c",copy_insertion.cc,"@@ -1312,8 +1312,10 @@ class CopyRemover { // live range interference is introduced by the copy's elimination. If // elision is possible, then the internal state (value lists) are updated, // and true is returned. Returns false otherwise. - bool TryElideCopy(const HloInstruction* copy, int64_t region_analysis_limit) { + bool TryElideCopy(const HloInstruction* copy, + int64_t* region_analysis_limit) { VLOG(2) << ""Trying to remove "" << copy->name(); + CHECK_NE(region_analysis_limit, nullptr); if (!ContainsKey(copy_map_, copy)) { VLOG(2) << copy->name() << "" is not removable""; @@ -1340,8 +1342,9 @@ class CopyRemover { // are cheap and are later removed by replicating the broadcasts. bool use_region_analysis = copy->operand(0)->opcode() != HloOpcode::kBroadcast && - (region_analysis_limit < 0 || - live_range_size1 * live_range_size2 <= region_analysis_limit); + (*region_analysis_limit < 0 || + live_range_size1 * live_range_size2 <= *region_analysis_limit); + *region_analysis_limit = 0; VLOG(3) << copy->name() << "" copies value "" << copy_node.src->value->ToShortString(); VLOG(3) << ""Source buffer values: "" << ValueListToString(copy_node.src); @@ -1369,6 +1372,7 @@ class CopyRemover { VLOG(2) << ""Configured to not use region-based analysis.\n""; return true; } + *region_analysis_limit += live_range_size1 * live_range_size2; if (ValuesInterfere(src, dest, option)) { VLOG(2) << ""Region-based interference is true. \n""; return true; @@ -1964,7 +1968,6 @@ Status CopyInsertion::RemoveUnnecessaryCopies(HloOrdering* ordering, XLA_VLOG_LINES(4, module->ToString()); TF_ASSIGN_OR_RETURN(std::unique_ptr alias_analysis, HloAliasAnalysis::Run(module, can_share_buffer_)); - CopyRemover copy_remover(*module, *alias_analysis, ordering, check_live_range_ordering); if (VLOG_IS_ON(3)) { @@ -1980,6 +1983,11 @@ Status CopyInsertion::RemoveUnnecessaryCopies(HloOrdering* ordering, int64_t num_existing_copies = GetNumExistingCopies(module); bool changed = true; int64_t num_iterations = -1; + constexpr int64_t region_analysis_allowance_cap = 30000; + VLOG(6) << ""Copy Insertion analyzing module with instructino count = "" + << module->instruction_count() << ""\n""; + int64_t region_analysis_allowance = + std::max(region_analysis_allowance_cap, module->instruction_count() / 10); while (changed) { CHECK_LE(++num_iterations, num_existing_copies); changed = false; @@ -1989,13 +1997,29 @@ Status CopyInsertion::RemoveUnnecessaryCopies(HloOrdering* ordering, VLOG(2) << ""computation:"" << computation->name() << ""\n""; for (HloInstruction* instruction : computation->instructions()) { VLOG(2) << instruction->ToString() << ""\n""; - if (instruction->opcode() == HloOpcode::kCopy && - copy_remover.TryElideCopy(instruction, - use_region_based_live_range_analysis_)) { - changed = true; - TF_RETURN_IF_ERROR(StripControlDependenciesFrom(instruction)); - TF_RETURN_IF_ERROR( - instruction->ReplaceAllUsesWith(instruction->mutable_operand(0))); + // The region_analysis_cost_now is always set to + // use_region_based_live_range_analysis_ if it is < 0, in which case the + // analysis is always performed. + int64_t region_analysis_cost_now = std::min( + region_analysis_allowance, use_region_based_live_range_analysis_); + if (instruction->opcode() == HloOpcode::kCopy) { + if (copy_remover.TryElideCopy(instruction, + ®ion_analysis_cost_now)) { + changed = true; + TF_RETURN_IF_ERROR(StripControlDependenciesFrom(instruction)); + TF_RETURN_IF_ERROR(instruction->ReplaceAllUsesWith( + instruction->mutable_operand(0))); + VLOG(6) << ""succeeded in eliminating copy.\n""; + } + if (region_analysis_allowance > 0 && region_analysis_cost_now > 0) { + VLOG(6) << ""Copy Insertion analyzing module cost: "" + << region_analysis_cost_now << ""\n""; + VLOG(6) << ""instruction:"" << instruction->ToString() << ""\n""; + region_analysis_allowance -= region_analysis_cost_now; + if (region_analysis_allowance < 0) { + region_analysis_allowance = 0; + } + } } } } ",0,train 5c4055b80bb5d86f5a1c2d0d843d9501817ef369,tensorflow/tensorflow,"[XLA] Bound compilation time spent in the region-based live-range interference analysis in the copy insertion pass to be at most linear to the size of the input HLO module. PiperOrigin-RevId: 396740128 Change-Id: I2e9ff7172c1fb6039af6eb95068c01f53ec7be6c",copy_insertion.h,"@@ -99,9 +99,6 @@ class CopyInsertion : public HloModulePass { private: Status AddCopiesToResolveInterference(HloModule* module); - // TODO(b/189898980): the region based live range analysis currently - // does not enforce a strict ordering of the merged live ranges. This may - // cause problems for parallel workloads (e.g., in SPMD). int64_t use_region_based_live_range_analysis_; }; ",0,train 0204fbd5fec268e2b4d4d4e9185e21725a6c248d,tensorflow/tensorflow,"Update tests and pydoc for dequeue_batch. De-flake graph_io_test. Fix typo. Change: 129677002",graph_io.py,"@@ -133,7 +133,7 @@ def read_keyed_batch_examples( Raises: ValueError: for invalid inputs. """""" - # Retrive files to read. + # Retrieve files to read. if isinstance(file_pattern, list): file_names = file_pattern if not file_names: ",0,test 0204fbd5fec268e2b4d4d4e9185e21725a6c248d,tensorflow/tensorflow,"Update tests and pydoc for dequeue_batch. De-flake graph_io_test. Fix typo. Change: 129677002",graph_io_test.py,"@@ -200,11 +200,20 @@ class GraphIOTest(tf.test.TestCase): def _create_temp_file(self, lines): tempdir = tempfile.mkdtemp() - filename = os.path.join(tempdir, ""file.csv"") + filename = os.path.join(tempdir, ""temp_file"") gfile.Open(filename, ""w"").write(lines) return filename - def test_read_csv(self): + def _create_sorted_temp_files(self, lines_list): + tempdir = tempfile.mkdtemp() + filenames = [] + for i, lines in enumerate(lines_list): + filename = os.path.join(tempdir, ""temp_file%05d"" % i) + gfile.Open(filename, ""w"").write(lines) + filenames.append(filename) + return filenames + + def test_read_text_lines(self): gfile.Glob = self._orig_glob filename = self._create_temp_file(""ABC\nDEF\nGHK\n"") @@ -214,9 +223,35 @@ class GraphIOTest(tf.test.TestCase): with tf.Graph().as_default() as g, self.test_session(graph=g) as session: inputs = tf.contrib.learn.io.read_batch_examples( - filename, batch_size, - reader=tf.TextLineReader, randomize_input=False, - num_epochs=1, queue_capacity=queue_capacity, name=name) + filename, batch_size, reader=tf.TextLineReader, + randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, + name=name) + session.run(tf.initialize_local_variables()) + + coord = tf.train.Coordinator() + tf.train.start_queue_runners(session, coord=coord) + + self.assertAllEqual(session.run(inputs), [b""ABC""]) + self.assertAllEqual(session.run(inputs), [b""DEF""]) + self.assertAllEqual(session.run(inputs), [b""GHK""]) + with self.assertRaises(errors.OutOfRangeError): + session.run(inputs) + + coord.request_stop() + + def test_read_text_lines_multifile(self): + gfile.Glob = self._orig_glob + filenames = self._create_sorted_temp_files([""ABC\n"", ""DEF\nGHK\n""]) + + batch_size = 1 + queue_capacity = 5 + name = ""my_batch"" + + with tf.Graph().as_default() as g, self.test_session(graph=g) as session: + inputs = tf.contrib.learn.io.read_batch_examples( + filenames, batch_size, reader=tf.TextLineReader, + randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, + name=name) session.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() @@ -230,7 +265,7 @@ class GraphIOTest(tf.test.TestCase): coord.request_stop() - def test_batch_reader(self): + def test_batch_text_lines(self): gfile.Glob = self._orig_glob filename = self._create_temp_file(""A\nB\nC\nD\nE\n"") @@ -255,7 +290,7 @@ class GraphIOTest(tf.test.TestCase): coord.request_stop() - def test_keyed_read_csv(self): + def test_keyed_read_text_lines(self): gfile.Glob = self._orig_glob filename = self._create_temp_file(""ABC\nDEF\nGHK\n"") ",0,test 4f333b63f7b46a3122f91b5358f2763e6c2e8206,tensorflow/tensorflow,"[XLA] Add a whole graph execution interface. PiperOrigin-RevId: 188554206",service.cc,"@@ -937,6 +937,11 @@ tensorflow::Status Service::Execute(const ExecuteRequest* arg, return tensorflow::Status::OK(); } +tensorflow::Status Service::ExecuteGraph(const ExecuteGraphRequest* /*arg*/, + ExecuteResponse* /*result*/) { + return Unimplemented(""execute-graph is not yet implemented""); +} + tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg, ExecuteAsyncResponse* result) { VLOG(1) << ""running execute-async request: "" << arg->ShortDebugString(); ",0,train 4f333b63f7b46a3122f91b5358f2763e6c2e8206,tensorflow/tensorflow,"[XLA] Add a whole graph execution interface. PiperOrigin-RevId: 188554206",service.h,"@@ -112,6 +112,12 @@ class Service : public ServiceInterface { tensorflow::Status Execute(const ExecuteRequest* arg, ExecuteResponse* result) override; + // Executes a computation with the provided global data passed as + // immutable arguments. The request contains the whole computation graph. + // Returns global data output and execution timing. + tensorflow::Status ExecuteGraph(const ExecuteGraphRequest* arg, + ExecuteResponse* result) override; + // Executes one or more computations in parallel with the provided global data // passed as immutable arguments. Returns global data output for each // computation. ",0,train 4f333b63f7b46a3122f91b5358f2763e6c2e8206,tensorflow/tensorflow,"[XLA] Add a whole graph execution interface. PiperOrigin-RevId: 188554206",service_interface.h,"@@ -54,6 +54,9 @@ class ServiceInterface { virtual tensorflow::Status Execute(const ExecuteRequest* arg, ExecuteResponse* result) = 0; + virtual tensorflow::Status ExecuteGraph(const ExecuteGraphRequest* arg, + ExecuteResponse* result) = 0; + virtual tensorflow::Status ExecuteParallel( const ExecuteParallelRequest* arg, ExecuteParallelResponse* result) = 0; ",0,train 4da61c0caadbab46cd43961565ac21314dee8254,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-10-01 PiperOrigin-RevId: 334769371 Change-Id: Id864a5af5aa673ca2eb2bf2ba32ae236744d5c38",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 9, 30) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 10, 1) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,test da81d8fa1bf4d82633b283473f7d04bca148f974,tensorflow/tensorflow,"TFlite benchmark_model tool: Don't destruct the NNAPI SL before destructing the delegate that uses it. PiperOrigin-RevId: 431445099",nnapi_delegate_provider.cc,"@@ -28,18 +28,19 @@ namespace { using nnapi::NnApiSupportLibrary; -// StatefulNnApiDelegate that takes ownership of NnApiSupportLibrary instance -// passed to the constructor. +// StatefulNnApiDelegate that holds onto an NnApiSupportLibrary instance +// passed to the constructor for later destruction. +// Note that the support library must outlive the delegate. class NnApiSupportLibraryDelegate : public StatefulNnApiDelegate { public: - // The constructed object takes ownership of the nnapi_sl. NnApiSupportLibraryDelegate(const NnApiSupportLibrary* nnapi_sl, Options options) : StatefulNnApiDelegate(nnapi_sl->getFL5(), options), nnapi_sl_(nnapi_sl) {} + const NnApiSupportLibrary* get_nnapi_sl() const { return nnapi_sl_; } private: - std::unique_ptr nnapi_sl_; + const NnApiSupportLibrary* const nnapi_sl_; }; } // namespace @@ -260,7 +261,11 @@ TfLiteDelegatePtr NnapiDelegateProvider::CreateTfLiteDelegate( return TfLiteDelegatePtr( new NnApiSupportLibraryDelegate(nnapi_impl.release(), options), [](TfLiteDelegate* delegate) { - delete reinterpret_cast(delegate); + NnApiSupportLibraryDelegate* sl_delegate = + reinterpret_cast(delegate); + const NnApiSupportLibrary* sl = sl_delegate->get_nnapi_sl(); + delete sl_delegate; + delete sl; }); } } else if (!params.Get(""nnapi_accelerator_name"").empty()) { ",0,train 744a6bb1db15bf0d7ed9d83fb117ef2a02fb4591,tensorflow/tensorflow,"[XLA:GPU] Revert the workaround for the LLVM PTX backend bug on llvm.round. Previously, we translated HLO RoundNearestAfz instruction to a call of the NVIDIA libdevice routine __nv_round_ as a workaround of the bug (see cl/235610143). This change reverts the workaround to translate the HLO RoundNearestAfz instruction to llvm.round again as the LLVM PTX bug has been fixed. PiperOrigin-RevId: 246848247",elemental_ir_emitter.cc,"@@ -442,7 +442,9 @@ StatusOr ElementalIrEmitter::EmitFloatUnaryOp( {operand_value}, {operand_value->getType()}, b_); case HloOpcode::kRoundNearestAfz: - return EmitRoundNearestAfz(op->shape().element_type(), operand_value); + return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::round, + {operand_value}, + {operand_value->getType()}, b_); case HloOpcode::kSign: { auto type = operand_value->getType(); auto zero = llvm::ConstantFP::get(type, 0.0); @@ -1139,12 +1141,6 @@ StatusOr ElementalIrEmitter::EmitExpm1(PrimitiveType prim_type, return Select(x_is_small, for_small_x, for_large_x); } -StatusOr ElementalIrEmitter::EmitRoundNearestAfz( - PrimitiveType /*prim_type*/, llvm::Value* value) { - return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::round, {value}, - {value->getType()}, b_); -} - StatusOr ElementalIrEmitter::EmitPow(PrimitiveType prim_type, llvm::Value* lhs, llvm::Value* rhs) { ",0,train 744a6bb1db15bf0d7ed9d83fb117ef2a02fb4591,tensorflow/tensorflow,"[XLA:GPU] Revert the workaround for the LLVM PTX backend bug on llvm.round. Previously, we translated HLO RoundNearestAfz instruction to a call of the NVIDIA libdevice routine __nv_round_ as a workaround of the bug (see cl/235610143). This change reverts the workaround to translate the HLO RoundNearestAfz instruction to llvm.round again as the LLVM PTX bug has been fixed. PiperOrigin-RevId: 246848247",elemental_ir_emitter.h,"@@ -146,9 +146,6 @@ class ElementalIrEmitter : public IrBuilderMixin { virtual StatusOr EmitTanh(PrimitiveType prim_type, llvm::Value* value); - virtual StatusOr EmitRoundNearestAfz(PrimitiveType prim_type, - llvm::Value* value); - virtual StatusOr EmitReducePrecision(const HloInstruction* hlo, llvm::Value* x); ",0,train 744a6bb1db15bf0d7ed9d83fb117ef2a02fb4591,tensorflow/tensorflow,"[XLA:GPU] Revert the workaround for the LLVM PTX backend bug on llvm.round. Previously, we translated HLO RoundNearestAfz instruction to a call of the NVIDIA libdevice routine __nv_round_ as a workaround of the bug (see cl/235610143). This change reverts the workaround to translate the HLO RoundNearestAfz instruction to llvm.round again as the LLVM PTX bug has been fixed. PiperOrigin-RevId: 246848247",elemental_ir_emitter.cc,"@@ -271,16 +271,6 @@ StatusOr GpuElementalIrEmitter::EmitTanh(PrimitiveType prim_type, return FPCast(fast_tanh, value->getType()); } -StatusOr GpuElementalIrEmitter::EmitRoundNearestAfz( - PrimitiveType prim_type, llvm::Value* value) { - // Use libdevice __nv_round instead of llvm.round. This is to workaround a - // bug in the PTX backend, which implements llvm.round with PTX cvt.rni. - // When the llvm.round is fixed, we may still want to use __nv_round here as - // expanding the non-trivial implementation early while inlining allows better - // optimizations. - return EmitLibdeviceMathCall(""__nv_round"", {value}, {prim_type}, prim_type); -} - llvm::Value* GpuElementalIrEmitter::EmitDeviceFunctionCall( const string& callee_name, absl::Span operands, absl::Span input_types, PrimitiveType output_type, ",0,train 744a6bb1db15bf0d7ed9d83fb117ef2a02fb4591,tensorflow/tensorflow,"[XLA:GPU] Revert the workaround for the LLVM PTX backend bug on llvm.round. Previously, we translated HLO RoundNearestAfz instruction to a call of the NVIDIA libdevice routine __nv_round_ as a workaround of the bug (see cl/235610143). This change reverts the workaround to translate the HLO RoundNearestAfz instruction to llvm.round again as the LLVM PTX bug has been fixed. PiperOrigin-RevId: 246848247",elemental_ir_emitter.h,"@@ -91,9 +91,6 @@ class GpuElementalIrEmitter : public ElementalIrEmitter { StatusOr EmitTanh(PrimitiveType prim_type, llvm::Value* value) override; - StatusOr EmitRoundNearestAfz(PrimitiveType prim_type, - llvm::Value* value) override; - llvm::Value* EmitThreadId() override; private: ",0,train 5fc14e22172722a115a282cbde8c4770e305aef5,tensorflow/tensorflow,"Fix layers_test exception regex matching. Change: 152422855",layers_test.py,"@@ -1486,7 +1486,7 @@ class PartialFlattenTest(test.TestCase): inputs = sparse_tensor.SparseTensor(indices, values, shape) with self.assertRaisesRegexp(ValueError, - 'inputs has rank less than new_rank'): + 'Inputs has rank less than new_rank'): _layers._inner_flatten(inputs, new_rank) ",0,train 503da90d9deb8964bd435e0893eb50b6f42a18ee,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2019-05-02 PiperOrigin-RevId: 246278996",compat.py,"@@ -27,7 +27,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 5, 1) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 5, 2) @tf_export(""compat.forward_compatible"") ",0,train 6e369da8b4a8668169cdd6a93c8a069a5a4888c2,tensorflow/tensorflow,"Add new str_util::HumanReadableElapsedTime that formats a 'double seconds' value using appropriate units based on the magnitude of the time interval, and add tests for this. Change: 132365945",str_util.cc,"@@ -18,6 +18,7 @@ limitations under the License. #include #include #include ""tensorflow/core/lib/strings/numbers.h"" +#include ""tensorflow/core/lib/strings/stringprintf.h"" namespace tensorflow { namespace str_util { @@ -334,5 +335,58 @@ bool SplitAndParseAsInts(StringPiece text, char delim, return true; } +string HumanReadableElapsedTime(double seconds) { + string human_readable; + + if (seconds < 0) { + human_readable = ""-""; + seconds = -seconds; + } + + // Start with us and keep going up to years. + // The comparisons must account for rounding to prevent the format breaking + // the tested condition and returning, e.g., ""1e+03 us"" instead of ""1 ms"". + const double microseconds = seconds * 1.0e6; + if (microseconds < 999.5) { + strings::Appendf(&human_readable, ""%0.3g us"", microseconds); + return human_readable; + } + double milliseconds = seconds * 1e3; + if (milliseconds >= .995 && milliseconds < 1) { + // Round half to even in Appendf would convert this to 0.999 ms. + milliseconds = 1.0; + } + if (milliseconds < 999.5) { + strings::Appendf(&human_readable, ""%0.3g ms"", milliseconds); + return human_readable; + } + if (seconds < 60.0) { + strings::Appendf(&human_readable, ""%0.3g s"", seconds); + return human_readable; + } + seconds /= 60.0; + if (seconds < 60.0) { + strings::Appendf(&human_readable, ""%0.3g min"", seconds); + return human_readable; + } + seconds /= 60.0; + if (seconds < 24.0) { + strings::Appendf(&human_readable, ""%0.3g h"", seconds); + return human_readable; + } + seconds /= 24.0; + if (seconds < 30.0) { + strings::Appendf(&human_readable, ""%0.3g days"", seconds); + return human_readable; + } + if (seconds < 365.2425) { + strings::Appendf(&human_readable, ""%0.3g months"", seconds / 30.436875); + return human_readable; + } + seconds /= 365.2425; + strings::Appendf(&human_readable, ""%0.3g years"", seconds); + return human_readable; +} + } // namespace str_util } // namespace tensorflow ",0,test 6e369da8b4a8668169cdd6a93c8a069a5a4888c2,tensorflow/tensorflow,"Add new str_util::HumanReadableElapsedTime that formats a 'double seconds' value using appropriate units based on the magnitude of the time interval, and add tests for this. Change: 132365945",str_util.h,"@@ -80,6 +80,15 @@ string Uppercase(StringPiece s); // set of characters that can be used as word boundaries. void TitlecaseString(string* s, StringPiece delimiters); +// Converts a time interval as double to a human readable +// string. For example: +// 0.001 -> ""1 ms"" +// 10.0 -> ""10 s"" +// 933120.0 -> ""10.8 days"" +// 39420000.0 -> ""1.25 years"" +// -10 -> ""-10 s"" +string HumanReadableElapsedTime(double seconds); + // Join functionality template string Join(const T& s, const char* sep); ",0,test 6e369da8b4a8668169cdd6a93c8a069a5a4888c2,tensorflow/tensorflow,"Add new str_util::HumanReadableElapsedTime that formats a 'double seconds' value using appropriate units based on the magnitude of the time interval, and add tests for this. Change: 132365945",str_util_test.cc,"@@ -287,4 +287,23 @@ TEST(TitlecaseString, Basic) { ASSERT_EQ(s, ""Dense""); } +TEST(HumanReadableElapsedTime, Basic) { + EXPECT_EQ(str_util::HumanReadableElapsedTime(-10), ""-10 s""); + EXPECT_EQ(str_util::HumanReadableElapsedTime(-0.001), ""-1 ms""); + EXPECT_EQ(str_util::HumanReadableElapsedTime(-60.0), ""-1 min""); + EXPECT_EQ(str_util::HumanReadableElapsedTime(0.00000001), ""0.01 us""); + EXPECT_EQ(str_util::HumanReadableElapsedTime(0.0000012), ""1.2 us""); + EXPECT_EQ(str_util::HumanReadableElapsedTime(0.0012), ""1.2 ms""); + EXPECT_EQ(str_util::HumanReadableElapsedTime(0.12), ""120 ms""); + EXPECT_EQ(str_util::HumanReadableElapsedTime(1.12), ""1.12 s""); + EXPECT_EQ(str_util::HumanReadableElapsedTime(90.0), ""1.5 min""); + EXPECT_EQ(str_util::HumanReadableElapsedTime(600.0), ""10 min""); + EXPECT_EQ(str_util::HumanReadableElapsedTime(9000.0), ""2.5 h""); + EXPECT_EQ(str_util::HumanReadableElapsedTime(87480.0), ""1.01 days""); + EXPECT_EQ(str_util::HumanReadableElapsedTime(7776000.0), ""2.96 months""); + EXPECT_EQ(str_util::HumanReadableElapsedTime(78840000.0), ""2.5 years""); + EXPECT_EQ(str_util::HumanReadableElapsedTime(382386614.40), ""12.1 years""); + EXPECT_EQ(str_util::HumanReadableElapsedTime(DBL_MAX), ""5.7e+300 years""); +} + } // namespace tensorflow ",0,test 70bc79a21abd445cca6930e51369941c3951d5ee,tensorflow/tensorflow,"NFC: Remove use of GetRankedTensorTypeForOperand for results in BroadcastGradientArgs op GetRankedTensorTypeForOperand is for operands. Result types don't involve constants can be fetched directly. Also, use getDimSize over getShape. PiperOrigin-RevId: 347593054 Change-Id: Ie8fc9f27b9105f3ab06fb4a7eac8699823dfa5fc",tf_ops_a_m.cc,"@@ -655,12 +655,14 @@ static LogicalResult Verify(BroadcastGradientArgsOp op) { GetOutputShapeForBroadcastGradientArgs(bcasted_shape, s0_shape, s1_shape, r0, r1); - RankedTensorType r0_ty = GetRankedTensorTypeForOperand(op.r0()); - RankedTensorType r1_ty = GetRankedTensorTypeForOperand(op.r1()); - if (r0_ty && r0_ty.hasStaticShape() && r0_ty.getShape()[0] != r0.size()) + // Verify that output types are of rank one and matches the computed result + // shape. + auto r0_ty = op.r0().getType().cast(); + auto r1_ty = op.r1().getType().cast(); + if (r0_ty.hasStaticShape() && r0_ty.getDimSize(0) != r0.size()) return op.emitOpError() << ""requires dimension 0 size of 'r0' to be "" << r0.size() << "" but got "" << r0_ty.getShape()[0]; - if (r1_ty && r1_ty.hasStaticShape() && r1_ty.getShape()[0] != r1.size()) + if (r1_ty.hasStaticShape() && r1_ty.getDimSize(0) != r1.size()) return op.emitOpError() << ""requires dimension 0 size of 'r1' to be "" << r1.size() << "" but got "" << r1_ty.getShape()[0]; ",0,train 6ebd3bb334bd9e99eb34e4440dab5853fc84e869,tensorflow/tensorflow,"Adds support for SpaceToDepth & DepthToSpace in hexagon delegate. PiperOrigin-RevId: 288393196 Change-Id: I8bf2ad0edd1b40230e88a10ce70dbe2449f172cc",op_builder.cc,"@@ -80,6 +80,10 @@ OpBuilder* GraphBuilder::CreateOpBuilderFromTfLiteOp(int op_type) { return CreateNegOpBuilder(this, OP_QuantizedNeg_8); case kTfLiteBuiltinTranspose: return CreateTransposeBuilder(this, OP_Transpose_8); + case kTfLiteBuiltinSpaceToDepth: + return CreateSpaceToDepthBuilder(this, OP_SpaceToDepth_8); + case kTfLiteBuiltinDepthToSpace: + return CreateSpaceToDepthBuilder(this, OP_DepthToSpace_8); default: context_->ReportError(context_, ""Op not supported: %d"", op_type); return nullptr; ",0,train 6ebd3bb334bd9e99eb34e4440dab5853fc84e869,tensorflow/tensorflow,"Adds support for SpaceToDepth & DepthToSpace in hexagon delegate. PiperOrigin-RevId: 288393196 Change-Id: I8bf2ad0edd1b40230e88a10ce70dbe2449f172cc",op_factory.h,"@@ -43,6 +43,7 @@ OpBuilder* CreateResizeBilinearOpBuilder(GraphBuilder* graph_builder, int op_type); OpBuilder* CreateNegOpBuilder(GraphBuilder* graph_builder, int op_type); OpBuilder* CreateTransposeBuilder(GraphBuilder* graph_builder, int op_type); +OpBuilder* CreateSpaceToDepthBuilder(GraphBuilder* graph_builder, int op_type); } // namespace hexagon } // namespace delegates ",0,train 6ebd3bb334bd9e99eb34e4440dab5853fc84e869,tensorflow/tensorflow,"Adds support for SpaceToDepth & DepthToSpace in hexagon delegate. PiperOrigin-RevId: 288393196 Change-Id: I8bf2ad0edd1b40230e88a10ce70dbe2449f172cc",space_to_depth_builder.cc,"@@ -0,0 +1,93 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include ""tensorflow/lite/experimental/delegates/hexagon/builders/space_to_depth_builder.h"" + +#include + +#include + +#include ""tensorflow/lite/c/builtin_op_data.h"" +#include ""tensorflow/lite/experimental/delegates/hexagon/hexagon_nn/hexagon_nn.h"" +#include ""tensorflow/lite/kernels/internal/reference/reference_ops.h"" +#include ""tensorflow/lite/kernels/kernel_util.h"" + +namespace tflite { +namespace delegates { +namespace hexagon { +TfLiteStatus SpaceToDepthOpBuilder::PopulateSubGraph( + const TfLiteIntArray* inputs, const TfLiteIntArray* outputs, + TfLiteContext* context) { + static int quant_bound_shape[] = {1, 1, 1, 1}; + int tensor_id; + + // Input tensor. + tensor_id = inputs->data[0]; + const auto& input_tensor = context->tensors[tensor_id]; + TF_LITE_ENSURE_STATUS( + ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_, + std::numeric_limits::min(), + std::numeric_limits::max())); + auto* input_min_const = graph_builder_->AddConstNodeWithData( + quant_bound_shape, reinterpret_cast(&input_min_), + sizeof(input_min_)); + auto* input_max_const = graph_builder_->AddConstNodeWithData( + quant_bound_shape, reinterpret_cast(&input_max_), + sizeof(input_max_)); + + // Block size. + const TfLiteSpaceToDepthParams* space_to_depth_params = + reinterpret_cast(builtin_data_); + block_size_ = space_to_depth_params->block_size; + auto* block_size_node = graph_builder_->AddConstNodeWithData( + quant_bound_shape, reinterpret_cast(&block_size_), + sizeof(int)); + + // All inputs. + AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); + AddInput(TensorID(block_size_node->GetID(), 0)); + AddInput(TensorID(input_min_const->GetID(), 0)); + AddInput(TensorID(input_max_const->GetID(), 0)); + + // Hexagon outputs for this node. + int output_batch_size, output_height_size, output_width_size, + output_depth_size; + GetDims(&output_batch_size, &output_height_size, &output_width_size, + &output_depth_size, context->tensors[outputs->data[0]].dims); + node_output_ = AddOutput(sizeof(uint8_t), 4, + {output_batch_size, output_height_size, + output_width_size, output_depth_size}); + AddOutput(sizeof(float), 4, {1, 1, 1, 1}); + AddOutput(sizeof(float), 4, {1, 1, 1, 1}); + + return kTfLiteOk; +} + +TfLiteStatus SpaceToDepthOpBuilder::RegisterOutputs( + const TfLiteIntArray* outputs, TfLiteContext* context) { + // Should be only 1 output. + graph_builder_->AddTensorWithID(outputs->data[0], node_output_.first, + node_output_.second); + return kTfLiteOk; +} + +SpaceToDepthOpBuilder::~SpaceToDepthOpBuilder() {} + +OpBuilder* CreateSpaceToDepthBuilder(GraphBuilder* graph_builder, int op_type) { + return new SpaceToDepthOpBuilder(graph_builder, op_type); +} + +} // namespace hexagon +} // namespace delegates +} // namespace tflite ",0,train 6ebd3bb334bd9e99eb34e4440dab5853fc84e869,tensorflow/tensorflow,"Adds support for SpaceToDepth & DepthToSpace in hexagon delegate. PiperOrigin-RevId: 288393196 Change-Id: I8bf2ad0edd1b40230e88a10ce70dbe2449f172cc",space_to_depth_builder.h,"@@ -0,0 +1,51 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_SPACE_TO_DEPTH_BUILDER_H_ +#define TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_SPACE_TO_DEPTH_BUILDER_H_ + +#include + +#include ""tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.h"" + +namespace tflite { +namespace delegates { +namespace hexagon { + +// Supports both ways: +// Space -> Depth & Depth -> Space. +class SpaceToDepthOpBuilder : public OpBuilder { + public: + explicit SpaceToDepthOpBuilder(GraphBuilder* graph_builder, int op_type) + : OpBuilder(graph_builder, op_type) {} + TfLiteStatus PopulateSubGraph(const TfLiteIntArray* inputs, + const TfLiteIntArray* outputs, + TfLiteContext* context) override; + + TfLiteStatus RegisterOutputs(const TfLiteIntArray* outputs, + TfLiteContext* context) override; + + ~SpaceToDepthOpBuilder() override; + + private: + TensorID node_output_; + float input_min_, input_max_, output_min_, output_max_; + int block_size_; +}; + +} // namespace hexagon +} // namespace delegates +} // namespace tflite + +#endif // TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_SPACE_TO_DEPTH_BUILDER_H_ ",0,train 6ebd3bb334bd9e99eb34e4440dab5853fc84e869,tensorflow/tensorflow,"Adds support for SpaceToDepth & DepthToSpace in hexagon delegate. PiperOrigin-RevId: 288393196 Change-Id: I8bf2ad0edd1b40230e88a10ce70dbe2449f172cc",utils.cc,"@@ -261,6 +261,12 @@ bool IsNodeSupportedByHexagon(const TfLiteRegistration* registration, return InputsWithCorrectTypes(node, context, {kTfLiteUInt8, kTfLiteInt32}); } + case kTfLiteBuiltinSpaceToDepth: { + return InputsWithCorrectTypes(node, context, {kTfLiteUInt8}); + } + case kTfLiteBuiltinDepthToSpace: { + return InputsWithCorrectTypes(node, context, {kTfLiteUInt8}); + } default: return false; } ",0,train dfbbc2d4de667e0f9fe07035d0c413d3e4bd8364,tensorflow/tensorflow,"Introduce a new XRTExecute flag which allows an exploded (in terms of its handles) tuple to be returned. This prevents clients which are interested in the tuple element handles to do extra RPCs to get them. PiperOrigin-RevId: 223994592",xrt_execute_op.cc,"@@ -228,14 +228,35 @@ Status XRTExecuteOp::DoWork(OpKernelContext* context) { TF_RETURN_IF_ERROR(XRTTupleAllocation::CreateFromBuffer( shaped_buffer, device_ref.backend(), device_ref.device_ordinal(), &output_tuple)); - - Tensor* output_tensor; - TF_RETURN_IF_ERROR( - context->allocate_output(0, TensorShape({}), &output_tensor)); - int64 key; - TF_RETURN_IF_ERROR(output_tuple->Intern(rm, &key)); - output_tensor->scalar()() = key; - + if (config_proto.return_exploded_tuple() && + xla::ShapeUtil::IsTuple(output_tuple->on_device_shape())) { + int64 tuple_element_count = + xla::ShapeUtil::TupleElementCount(output_tuple->on_device_shape()); + Tensor* output_tensor; + TF_RETURN_IF_ERROR(context->allocate_output( + 0, TensorShape({tuple_element_count}), &output_tensor)); + + for (int64 i = 0; i < tuple_element_count; ++i) { + xla::ShapeIndex shape_index; + shape_index.push_back(i); + + XRTTupleAllocation* suballocation; + TF_RETURN_IF_ERROR(XRTTupleAllocation::MakeSubBuffer( + output_tuple, shape_index, &suballocation, + /*alias_parent_allocation=*/false)); + int64 key; + TF_RETURN_IF_ERROR(suballocation->Intern(rm, &key)); + output_tensor->vec()(i) = key; + } + output_tuple->Unref(); + } else { + Tensor* output_tensor; + TF_RETURN_IF_ERROR( + context->allocate_output(0, TensorShape({}), &output_tensor)); + int64 key; + TF_RETURN_IF_ERROR(output_tuple->Intern(rm, &key)); + output_tensor->scalar()() = key; + } return Status::OK(); } ",0,train dfbbc2d4de667e0f9fe07035d0c413d3e4bd8364,tensorflow/tensorflow,"Introduce a new XRTExecute flag which allows an exploded (in terms of its handles) tuple to be returned. This prevents clients which are interested in the tuple element handles to do extra RPCs to get them. PiperOrigin-RevId: 223994592",raw_api_test.cc,"@@ -175,6 +175,18 @@ xla::XlaComputation AddAndTuple() { return builder.Build().ValueOrDie(); } +xla::XlaComputation AddAndSubTuple() { + xla::XlaBuilder builder(""AddAndSubTuple""); + auto p0 = xla::Parameter(&builder, 0, xla::ShapeUtil::MakeShape(xla::F32, {}), + ""P0""); + auto p1 = xla::Parameter(&builder, 1, xla::ShapeUtil::MakeShape(xla::F32, {}), + ""P1""); + auto sum = xla::Add(p0, p1); + auto sub = xla::Sub(p0, p1); + xla::Tuple(&builder, {sum, sub}); + return builder.Build().ValueOrDie(); +} + void StoreComputationSnapshot(const xla::XlaComputation& computation, xla::HloSnapshot* dst) { auto snapshot = computation.Snapshot().ValueOrDie(); @@ -681,6 +693,70 @@ TEST(RawApiTest, CompileAndExecuteReturnTuple) { EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response)); } +TEST(RawApiTest, CompileAndExecuteReturnExplodedTuple) { + xrt::XLAAllocation p0; + p0.set_device_ordinal(0); + *p0.mutable_value() = xla::LiteralUtil::CreateR0(12.0f).ToProto(); + + xrt::XLAAllocation p1; + p1.set_device_ordinal(0); + *p1.mutable_value() = xla::LiteralUtil::CreateR0(3.0f).ToProto(); + + xrt::XLAComputation c; + auto config = c.mutable_config(); + auto shapes = config->mutable_program_shape(); + *shapes->add_parameters() = xla::ShapeUtil::MakeShape(xla::F32, {}).ToProto(); + *shapes->add_parameters() = xla::ShapeUtil::MakeShape(xla::F32, {}).ToProto(); + *shapes->mutable_result() = + xla::ShapeUtil::MakeTupleShape({xla::ShapeUtil::MakeShape(xla::F32, {}), + xla::ShapeUtil::MakeShape(xla::F32, {})}) + .ToProto(); + StoreComputationSnapshot(AddAndSubTuple(), c.mutable_hlo_snapshot()); + + xrt::XRTExecutionConfig e; + e.set_release_input_handles(true); + e.set_release_compilation_handle(true); + e.set_return_exploded_tuple(true); + + Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag()); + auto e_config = + ops::Const(root.WithDevice(""/device:CPU:0""), e.SerializeAsString()); + auto computation = + ops::Const(root.WithDevice(""/device:CPU:0""), c.SerializeAsString()); + auto c_handle = ops::XRTCompile(root, computation); + auto p0_value = + ops::Const(root.WithDevice(""/device:CPU:0""), p0.SerializeAsString()); + auto p0_handle = ops::XRTAllocate(root, p0_value); + auto p1_value = + ops::Const(root.WithDevice(""/device:CPU:0""), p1.SerializeAsString()); + auto p1_handle = ops::XRTAllocate(root, p1_value); + auto result = ops::XRTExecute(root, c_handle.handle, e_config, + {Output(p0_handle), Output(p1_handle)}); + TF_ASSERT_OK(root.status()); + + ClientSession session(root); + std::vector outputs; + TF_EXPECT_OK(session.Run({result}, &outputs)); + EXPECT_EQ(outputs.size(), 1); + + auto handles_vec = outputs.front().vec(); + EXPECT_EQ(handles_vec.size(), 2); + + const float kResults[2] = {15.0f, 9.0f}; + for (int64 i = 0; i < handles_vec.size(); ++i) { + auto read_back = ops::XRTReadLiteralAndRelease(root, Input(handles_vec(i))); + std::vector voutputs; + TF_EXPECT_OK(session.Run({read_back}, &voutputs)); + EXPECT_EQ(voutputs.size(), 1); + + xla::LiteralProto response; + EXPECT_TRUE(response.ParseFromString(voutputs[0].scalar()())); + + auto expected = xla::LiteralUtil::CreateR0(kResults[i]); + EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response)); + } +} + TEST(RawApiTest, LeakCompilationReference) { xrt::XLAComputation c; auto config = c.mutable_config(); ",0,train 014d4b5417b7a361c6b9102bf80455ea4b44e4b3,tensorflow/tensorflow,Removed Depricated API from the file.,gamma.py,"@@ -267,7 +267,7 @@ class Gamma(distribution.Distribution): self.batch_shape_tensor(), np.array(np.nan, dtype=self.dtype.as_numpy_dtype()), name=""nan"") - return array_ops.where(self.concentration > 1., mode, nan) + return array_ops.where_v2(self.concentration > 1., mode, nan) else: return control_flow_ops.with_dependencies([ check_ops.assert_less( ",0,train ff563b9436509a35bbb5087952c7fbfda44df46f,tensorflow/tensorflow,"Fixed the bug in keras where the callback attributes are not correctly checked. PiperOrigin-RevId: 224598769",keras_test.py,"@@ -1085,8 +1085,8 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine( distribution=[ - combinations.mirrored_strategy_with_two_gpus, - combinations.core_mirrored_strategy_with_two_gpus], + combinations.mirrored_strategy_with_gpu_and_cpu, + combinations.core_mirrored_strategy_with_gpu_and_cpu], mode=['graph', 'eager'])) def test_unsupported_features(self, distribution): with self.cached_session(): @@ -1134,8 +1134,8 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine( distribution=[ - combinations.mirrored_strategy_with_two_gpus, - combinations.core_mirrored_strategy_with_two_gpus], + combinations.mirrored_strategy_with_gpu_and_cpu, + combinations.core_mirrored_strategy_with_gpu_and_cpu], mode=['graph', 'eager'])) def test_calling_with_unsupported_predefined_callbacks(self, distribution): with self.cached_session(): @@ -1161,12 +1161,6 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase): 'using'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, callbacks=[keras.callbacks.ReduceLROnPlateau()]) - with self.assertRaisesRegexp(ValueError, - 'histogram_freq in the TensorBoard callback ' - 'is not supported when using ' - 'DistributionStrategy.'): - model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, - callbacks=[keras.callbacks.TensorBoard(histogram_freq=10)]) class TestDistributionStrategyWithLossMasking(test.TestCase, ",0,test ff563b9436509a35bbb5087952c7fbfda44df46f,tensorflow/tensorflow,"Fixed the bug in keras where the callback attributes are not correctly checked. PiperOrigin-RevId: 224598769",distributed_training_utils.py,"@@ -199,11 +199,19 @@ def validate_callbacks(input_callbacks, optimizer, current_strategy): # running ops. if isinstance(callback, callbacks.TensorBoard): if callback.__getattribute__('histogram_freq'): - raise ValueError('histogram_freq in the TensorBoard callback is not ' - 'supported when using DistributionStrategy.') + logging.warning( + UserWarning( + '`histogram_freq` in the TensorBoard callback is not ' + 'supported when using DistributionStrategy. Setting ' + '`histogram_freq` to `0`.')) + callback.histogram_freq = 0 if callback.__getattribute__('write_grads'): - raise ValueError('write_grads in the TensorBoard callback is not ' - 'supported when using DistributionStrategy.') + logging.warning( + UserWarning( + '`write_grads` in the TensorBoard callback is not supported ' + 'when using DistributionStrategy. Setting `write_grads` ' + 'to `False`.')) + callback.histogram_freq = False def validate_distributed_dataset_inputs(distribution_strategy, x, y, ",0,test ab48dbd4ac2095548a5bc8505e08e751d409727f,tensorflow/tensorflow,"Fixing operator order in LRN docs to match code. The implementation adds the bias to a temporary, that temporary is what is then the base with exponent beta. The implementation also agrees with the equation in Section 3.3 of the referenced Krizhevsky et. al. paper. Change: 115721267",nn_ops.cc,"@@ -349,7 +349,7 @@ each component is divided by the weighted, squared sum of inputs within sqr_sum[a, b, c, d] = sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2) - output = input / (bias + alpha * sqr_sum ** beta) + output = input / (bias + alpha * sqr_sum) ** beta For details, see [Krizhevsky et al., ImageNet classification with deep convolutional neural networks (NIPS 2012)] ",0,test 5d4a29eaf590b4a3068ef4d0b7bea9d4f7bd9369,tensorflow/tensorflow,"Special case wrapping of ndarrays in the gradient tape code. PiperOrigin-RevId: 317762474 Change-Id: Ie848ad90a88aff5b2faef4069c3f05887038c367",backprop.py,"@@ -62,6 +62,9 @@ from tensorflow.python.util.tf_export import tf_export pfor_ops = LazyLoader( ""pfor_ops"", globals(), ""tensorflow.python.ops.parallel_for.control_flow_ops"") +np_arrays = LazyLoader( + ""np_arrays"", globals(), + ""tensorflow.python.ops.numpy_ops.np_arrays"") function = LazyLoader(""function"", globals(), ""tensorflow.python.eager.function"") @@ -721,9 +724,11 @@ pywrap_tfe.TFE_Py_RegisterVSpace(_default_vspace) def _handle_or_self(x): - """"""If x is ResourceVariable, return its handle, else x."""""" + """"""Unwrap resource variable/ndarray to return tensors."""""" if resource_variable_ops.is_resource_variable(x): - x = x.handle + return x.handle + if isinstance(x, np_arrays.ndarray): + return x.data return x @@ -1023,6 +1028,7 @@ class GradientTape(object): ""gradient in order to compute higher order "" ""derivatives."", 1) + num_ndarrays = 0 flat_targets = [] for t in nest.flatten(target): if not backprop_util.IsTrainable(t): @@ -1033,7 +1039,12 @@ class GradientTape(object): if resource_variable_ops.is_resource_variable(t): with self: t = ops.convert_to_tensor(t) + elif isinstance(t, np_arrays.ndarray): + t = t.data + num_ndarrays += 1 flat_targets.append(t) + # Only rewrap if all targets are ndarray. If not, prefer tensors. + rewrap_as_ndarray = num_ndarrays == len(flat_targets) flat_sources = nest.flatten(sources) flat_sources_raw = flat_sources @@ -1066,6 +1077,9 @@ class GradientTape(object): self._watched_variables = self._tape.watched_variables() self._tape = None + if rewrap_as_ndarray: + flat_grad = nest.map_structure(np_arrays.tensor_to_ndarray, flat_grad) + grad = nest.pack_sequence_as(sources, flat_grad) return grad @@ -1120,6 +1134,10 @@ class GradientTape(object): ValueError: If vectorization of jacobian computation fails. """""" flat_sources = nest.flatten(sources) + rewrap_as_ndarray = False + if isinstance(target, np_arrays.ndarray): + target = target.data + rewrap_as_ndarray = True target_static_shape = target.shape target_shape = array_ops.shape(target) # Note that we push and pop the tape here and below. This is needed since we @@ -1169,6 +1187,8 @@ class GradientTape(object): out = array_ops.reshape(out, new_shape) if context.executing_eagerly(): out.set_shape(target_static_shape.concatenate(flat_sources[i].shape)) + if rewrap_as_ndarray: + out = np_arrays.tensor_to_ndarray(out) output[i] = out return nest.pack_sequence_as(sources, output) ",0,train 5d4a29eaf590b4a3068ef4d0b7bea9d4f7bd9369,tensorflow/tensorflow,"Special case wrapping of ndarrays in the gradient tape code. PiperOrigin-RevId: 317762474 Change-Id: Ie848ad90a88aff5b2faef4069c3f05887038c367",np_arrays.py,"@@ -82,10 +82,10 @@ class NdarraySpec(type_spec.BatchableTypeSpec): return (self._data_spec,) def _batch(self, batch_size): - return NdarraySpec(self._data_spec.batch(batch_size)) + return NdarraySpec(self._data_spec._batch(batch_size)) # pylint: disable=protected-access def _unbatch(self): - return NdarraySpec(self._data_spec.unbatch()) + return NdarraySpec(self._data_spec._unbatch()) # pylint: disable=protected-access class ndarray(composite_tensor.CompositeTensor): # pylint: disable=invalid-name @@ -306,10 +306,6 @@ class ndarray(composite_tensor.CompositeTensor): # pylint: disable=invalid-name def __repr__(self): return 'ndarray<{}>'.format(self.data.__repr__()) - @property - def _id(self): - return self.data._id # pylint: disable=protected-access - def tensor_to_ndarray(tensor): return ndarray.from_tensor(tensor) ",0,train 846a73f9f336e54a02c12388ac76a0aa8700543a,tensorflow/tensorflow,"Adds a int32 to int32 HashTable mapping. PiperOrigin-RevId: 178190131",lookup_table_op.cc,"@@ -823,6 +823,7 @@ REGISTER_KERNEL(int64, int64); REGISTER_KERNEL(int64, float); REGISTER_KERNEL(string, string); REGISTER_KERNEL(string, bool); +REGISTER_KERNEL(int32, int32); #undef REGISTER_KERNEL ",0,train 8095a9a78a105cf1a3e196dadc9e07591c6e0439,tensorflow/tensorflow,"simplify naming convention in threading_options refactor options.threading serialization prefer getattr over __getattribute__ regenerate golden apis",threading_options.py,"@@ -54,13 +54,9 @@ class ThreadingOptions(options.OptionsBase): ""The value 0 can be used to indicate that the threadpool size should be "" ""determined at runtime based on the number of available CPU cores."") - def _get_option_names(self): - return [""max_intra_op_parallelism"", ""private_threadpool_size""] - def _has_non_default_values(self): - attrs = self._get_option_names() - for attr in attrs: - if object.__getattribute__(self, attr) is not None: + for attr in filter(lambda opt: not opt.startswith(""_""), dir(self)): + if getattr(self, attr) is not None: return True return False ",0,test 8095a9a78a105cf1a3e196dadc9e07591c6e0439,tensorflow/tensorflow,"simplify naming convention in threading_options refactor options.threading serialization prefer getattr over __getattribute__ regenerate golden apis",dataset_ops.py,"@@ -3617,20 +3617,16 @@ class Options(options_lib.OptionsBase): # (kvignesh1420): We try to keep the values of `threading` and # `experimental_threading` the same, to prevent unexpected behaviours # and ensure backward-compatibility. - if self.threading._has_non_default_values(): - if self.experimental_threading._has_non_default_values(): - override_options = [] - for name in self.threading._get_option_names(): - if (object.__getattribute__(self.threading, name) != - object.__getattribute__(self.experimental_threading, name)): - override_options.append(name) - if override_options: - logging.warning(""overriding options '{}' of experimental_threading "" - ""with respective values in threading."".format( - "","".join(override_options))) - self.experimental_threading = self.threading - else: - self.threading = self.experimental_threading + override_attrs = [] + for attr in filter(lambda opt: not opt.startswith(""_""), dir(self.threading)): + if (getattr(self.threading, attr) != + getattr(self.experimental_threading, attr)): + override_attrs.append(attr) + if override_attrs: + logging.warning(""overriding attr(s) '{}' of experimental_threading "" + ""with respective values in threading."".format( + "","".join(override_attrs))) + self.experimental_threading = self.threading pb.threading_options.CopyFrom(self.threading._to_proto()) # pylint: disable=protected-access return pb ",0,test 32140ae87fd86398ac4fa45cb67bd2f29a93090d,tensorflow/tensorflow,"Boosted trees: Adding categorical split support to prediction ops. PiperOrigin-RevId: 214448656",resources.cc,"@@ -60,14 +60,26 @@ int32 BoostedTreesEnsembleResource::next_node( DCHECK_LT(tree_id, tree_ensemble_->trees_size()); DCHECK_LT(node_id, tree_ensemble_->trees(tree_id).nodes_size()); const auto& node = tree_ensemble_->trees(tree_id).nodes(node_id); - DCHECK_EQ(node.node_case(), boosted_trees::Node::kBucketizedSplit); - const auto& split = node.bucketized_split(); - if (bucketized_features[split.feature_id()](index_in_batch) <= - split.threshold()) { - return split.left_id(); - } else { - return split.right_id(); + + switch (node.node_case()) { + case boosted_trees::Node::kBucketizedSplit: { + const auto& split = node.bucketized_split(); + return (bucketized_features[split.feature_id()](index_in_batch) <= + split.threshold()) + ? split.left_id() + : split.right_id(); + } + case boosted_trees::Node::kCategoricalSplit: { + const auto& split = node.categorical_split(); + return (bucketized_features[split.feature_id()](index_in_batch) == + split.value()) + ? split.left_id() + : split.right_id(); + } + default: + DCHECK(false) << ""Node type "" << node.node_case() << "" not supported.""; } + return -1; } float BoostedTreesEnsembleResource::node_value(const int32 tree_id, ",0,train 32140ae87fd86398ac4fa45cb67bd2f29a93090d,tensorflow/tensorflow,"Boosted trees: Adding categorical split support to prediction ops. PiperOrigin-RevId: 214448656",boosted_trees_ops.cc,"@@ -180,6 +180,8 @@ REGISTER_OP(""BoostedTreesMakeStatsSummary"") return Status::OK(); }); +// TODO(nponomareva): when/if creating the new op for unbucketized data, rename +// bucketized_features to features. REGISTER_OP(""BoostedTreesPredict"") .Input(""tree_ensemble_handle: resource"") .Input(""bucketized_features: num_bucketized_features * int32"") ",0,train 32140ae87fd86398ac4fa45cb67bd2f29a93090d,tensorflow/tensorflow,"Boosted trees: Adding categorical split support to prediction ops. PiperOrigin-RevId: 214448656",prediction_ops_test.py,"@@ -445,6 +445,78 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase): # change= 0.1(1.14+7.0-7.0) self.assertAllClose([[1], [0.114]], logits_updates) + def testCategoricalSplits(self): + """"""Tests the training prediction work for categorical splits."""""" + with self.cached_session() as session: + tree_ensemble_config = boosted_trees_pb2.TreeEnsemble() + text_format.Merge( + """""" + trees { + nodes { + categorical_split { + feature_id: 1 + value: 2 + left_id: 1 + right_id: 2 + } + } + nodes { + categorical_split { + feature_id: 0 + value: 13 + left_id: 3 + right_id: 4 + } + } + nodes { + leaf { + scalar: 7.0 + } + } + nodes { + leaf { + scalar: 5.0 + } + } + nodes { + leaf { + scalar: 6.0 + } + } + } + tree_weights: 1.0 + tree_metadata { + is_finalized: true + } + """""", tree_ensemble_config) + + # Create existing ensemble with one root split + tree_ensemble = boosted_trees_ops.TreeEnsemble( + 'ensemble', serialized_proto=tree_ensemble_config.SerializeToString()) + tree_ensemble_handle = tree_ensemble.resource_handle + resources.initialize_resources(resources.shared_resources()).run() + + feature_0_values = [13, 1, 3] + feature_1_values = [2, 2, 1] + + # No previous cached values. + cached_tree_ids = [0, 0, 0] + cached_node_ids = [0, 0, 0] + + # Grow tree ensemble. + predict_op = boosted_trees_ops.training_predict( + tree_ensemble_handle, + cached_tree_ids=cached_tree_ids, + cached_node_ids=cached_node_ids, + bucketized_features=[feature_0_values, feature_1_values], + logits_dimension=1) + + logits_updates, new_tree_ids, new_node_ids = session.run(predict_op) + + self.assertAllClose([0, 0, 0], new_tree_ids) + self.assertAllClose([3, 4, 2], new_node_ids) + self.assertAllClose([[5.], [6.], [7.]], logits_updates) + def testCachedPredictionFromTheSameTreeWithPostPrunedNodes(self): """"""Tests that prediction based on previous node in the tree works."""""" with self.cached_session() as session: @@ -924,6 +996,68 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): logits = session.run(predict_op) self.assertAllClose(expected_logits, logits) + def testCategoricalSplits(self): + """"""Tests the predictions work for categorical splits."""""" + with self.cached_session() as session: + tree_ensemble_config = boosted_trees_pb2.TreeEnsemble() + text_format.Merge( + """""" + trees { + nodes { + categorical_split { + feature_id: 1 + value: 2 + left_id: 1 + right_id: 2 + } + } + nodes { + categorical_split { + feature_id: 0 + value: 13 + left_id: 3 + right_id: 4 + } + } + nodes { + leaf { + scalar: 7.0 + } + } + nodes { + leaf { + scalar: 5.0 + } + } + nodes { + leaf { + scalar: 6.0 + } + } + } + tree_weights: 1.0 + """""", tree_ensemble_config) + + # Create existing ensemble with one root split + tree_ensemble = boosted_trees_ops.TreeEnsemble( + 'ensemble', serialized_proto=tree_ensemble_config.SerializeToString()) + tree_ensemble_handle = tree_ensemble.resource_handle + resources.initialize_resources(resources.shared_resources()).run() + + feature_0_values = [13, 1, 3] + feature_1_values = [2, 2, 1] + + expected_logits = [[5.], [6.], [7.]] + + # Prediction should work fine. + predict_op = boosted_trees_ops.predict( + tree_ensemble_handle, + bucketized_features=[feature_0_values, feature_1_values], + logits_dimension=1) + + logits = session.run(predict_op) + self.assertAllClose(expected_logits, logits) + class FeatureContribsOpsTest(test_util.TensorFlowTestCase): """"""Tests feature contribs ops for model understanding."""""" ",0,train 729e39b1a4f0f7a6b3e35a04bf8bbba5e921862b,tensorflow/tensorflow,"Improve the GPU memory use discipline of CollectiveReduce. GPU memory allocation can be done in one of two modes: efficient (but complex and therefore somewhat risky) or conservative (simpler, but less efficient). The main difference is that 'efficient' allocation allows the same memory area to be allocated to mutiple independent uses simultaenously, when it should be the case that those uses will in fact be serial and thus temporally disjoint, while 'conservative' allocation will always obey the invarient that one piece of memory is allocated to at most one use at any point in time. If GPUDevice::RequiresRecordingAccessedTensors() returns false, then the TF runtime uses efficient memory allocation for GPU ops. That is, GPU ops are nominally synchronous and their tensor Ref's are deleted immediately after the ops returns although really the corresponding GPU kernel is only guaranteed to have been enqueued on the compute stream and may not have yet begin execution. If RequiresRecordingAccessedTensors() returns true, then conservative memory allocation is used, i.e. Refs on the tensors accessed by a GPU op are held until the corresponding kernel is guaranteed to have completed execution and no part of the op will touch them again. Efficient GPU memory allocation should be safe when the following criteria are all met: 1. All GPU kernels are executed serially on a single compute stream. 2. All GPU kernel outputs and temp buffers are allocated by the GPU Op in the executor thread in which it is originally called. 3. Any read of a GPU tensor computed by a GPU kernel that is not by another kernel on that same GPU first synchronizes on the compute stream that produced it. 4. Any read by a GPU kernel of a value that was not produced by another GPU kernel first synchronizes on the entity that produced it, e.g. a copy stream. 5. All direct allocations of GPU memory that are not for kernel outputs or temp buffers are conservative in duration. 6. Any use of directly allocated GPU memory that is not part of a kernel execution first synchronizes on the compute stream to ensure that any prior granted uses of the same region have expired before this new use. These conditions together should be sufficient for safety, and correspond to established practice, though it may be possible to contrive other sets of rules that are also sufficient. Collective Ops for GPUs are unusual in that they are async (as TF Ops) and they can directly allocate GPU memory in CPU threads that are asynchronous to the launching executor thread. This CL corrects a couple of subtle misuse errors related to conditions 2 and 6. PiperOrigin-RevId: 210841522",ring_reducer.cc,"@@ -25,6 +25,7 @@ limitations under the License. #include ""tensorflow/core/common_runtime/device.h"" #include ""tensorflow/core/common_runtime/device_mgr.h"" #include ""tensorflow/core/common_runtime/dma_helper.h"" +#include ""tensorflow/core/common_runtime/process_util.h"" #include ""tensorflow/core/framework/allocator.h"" #include ""tensorflow/core/framework/device_base.h"" #include ""tensorflow/core/framework/op_kernel.h"" @@ -497,13 +498,6 @@ bool RingReducer::RunAsyncParts() { rfv_.clear(); rfv_.resize(group_size_ * num_subdivs_); PCQueue ready_queue; - int field_done_count = 0; - int send_pending_count = 0; - int recv_pending_count = 0; - std::atomic aborted(false); - field_done_count = 0; - send_pending_count = 0; - recv_pending_count = 0; for (int chunk_idx = 0; chunk_idx < group_size_; ++chunk_idx) { for (int subdiv_idx = 0; subdiv_idx < num_subdivs_; ++subdiv_idx) { int rf_index = (chunk_idx * num_subdivs_) + subdiv_idx; @@ -511,6 +505,30 @@ bool RingReducer::RunAsyncParts() { ready_queue.Enqueue(&rfv_[rf_index]); } } + const DeviceBase::GpuDeviceInfo* gpu_info = + col_ctx_->device->tensorflow_gpu_device_info(); + if (gpu_info) { + // Wait for all currently queued events on the CPU compute stream to + // complete before proceeding. The previous InitRingField calls allocated + // temp memory buffers that are not guaranteed to be valid (e.g. for RDMA + // write) unless we do. + Notification note; + Status s = gpu_info->default_context->ThenExecute( + col_ctx_->device, gpu_info->stream, [¬e]() { note.Notify(); }); + if (s.ok()) { + note.WaitForNotification(); + } else { + mutex_lock l(status_mu_); + status_ = + errors::Internal(""Failed to dispatch ThenExecute in RingReducer""); + return false; + } + } + + int field_done_count = 0; + int send_pending_count = 0; + int recv_pending_count = 0; + std::atomic aborted(false); // Loop until all RingFields have advanced to completion. while (field_done_count < rfv_.size()) { ",0,train 729e39b1a4f0f7a6b3e35a04bf8bbba5e921862b,tensorflow/tensorflow,"Improve the GPU memory use discipline of CollectiveReduce. GPU memory allocation can be done in one of two modes: efficient (but complex and therefore somewhat risky) or conservative (simpler, but less efficient). The main difference is that 'efficient' allocation allows the same memory area to be allocated to mutiple independent uses simultaenously, when it should be the case that those uses will in fact be serial and thus temporally disjoint, while 'conservative' allocation will always obey the invarient that one piece of memory is allocated to at most one use at any point in time. If GPUDevice::RequiresRecordingAccessedTensors() returns false, then the TF runtime uses efficient memory allocation for GPU ops. That is, GPU ops are nominally synchronous and their tensor Ref's are deleted immediately after the ops returns although really the corresponding GPU kernel is only guaranteed to have been enqueued on the compute stream and may not have yet begin execution. If RequiresRecordingAccessedTensors() returns true, then conservative memory allocation is used, i.e. Refs on the tensors accessed by a GPU op are held until the corresponding kernel is guaranteed to have completed execution and no part of the op will touch them again. Efficient GPU memory allocation should be safe when the following criteria are all met: 1. All GPU kernels are executed serially on a single compute stream. 2. All GPU kernel outputs and temp buffers are allocated by the GPU Op in the executor thread in which it is originally called. 3. Any read of a GPU tensor computed by a GPU kernel that is not by another kernel on that same GPU first synchronizes on the compute stream that produced it. 4. Any read by a GPU kernel of a value that was not produced by another GPU kernel first synchronizes on the entity that produced it, e.g. a copy stream. 5. All direct allocations of GPU memory that are not for kernel outputs or temp buffers are conservative in duration. 6. Any use of directly allocated GPU memory that is not part of a kernel execution first synchronizes on the compute stream to ensure that any prior granted uses of the same region have expired before this new use. These conditions together should be sufficient for safety, and correspond to established practice, though it may be possible to contrive other sets of rules that are also sufficient. Collective Ops for GPUs are unusual in that they are async (as TF Ops) and they can directly allocate GPU memory in CPU threads that are asynchronous to the launching executor thread. This CL corrects a couple of subtle misuse errors related to conditions 2 and 6. PiperOrigin-RevId: 210841522",tensor_coding.h,"@@ -87,6 +87,9 @@ class TensorResponse { // modified. const RecvTensorResponse& metadata() const { return meta_; } + // Return pointer to the device hosting the tensor. + DeviceBase* device() const { return device_; } + private: bool ParseTensorSubmessage(protobuf::io::CodedInputStream* input, TensorProto* tensor_meta); ",0,train 729e39b1a4f0f7a6b3e35a04bf8bbba5e921862b,tensorflow/tensorflow,"Improve the GPU memory use discipline of CollectiveReduce. GPU memory allocation can be done in one of two modes: efficient (but complex and therefore somewhat risky) or conservative (simpler, but less efficient). The main difference is that 'efficient' allocation allows the same memory area to be allocated to mutiple independent uses simultaenously, when it should be the case that those uses will in fact be serial and thus temporally disjoint, while 'conservative' allocation will always obey the invarient that one piece of memory is allocated to at most one use at any point in time. If GPUDevice::RequiresRecordingAccessedTensors() returns false, then the TF runtime uses efficient memory allocation for GPU ops. That is, GPU ops are nominally synchronous and their tensor Ref's are deleted immediately after the ops returns although really the corresponding GPU kernel is only guaranteed to have been enqueued on the compute stream and may not have yet begin execution. If RequiresRecordingAccessedTensors() returns true, then conservative memory allocation is used, i.e. Refs on the tensors accessed by a GPU op are held until the corresponding kernel is guaranteed to have completed execution and no part of the op will touch them again. Efficient GPU memory allocation should be safe when the following criteria are all met: 1. All GPU kernels are executed serially on a single compute stream. 2. All GPU kernel outputs and temp buffers are allocated by the GPU Op in the executor thread in which it is originally called. 3. Any read of a GPU tensor computed by a GPU kernel that is not by another kernel on that same GPU first synchronizes on the compute stream that produced it. 4. Any read by a GPU kernel of a value that was not produced by another GPU kernel first synchronizes on the entity that produced it, e.g. a copy stream. 5. All direct allocations of GPU memory that are not for kernel outputs or temp buffers are conservative in duration. 6. Any use of directly allocated GPU memory that is not part of a kernel execution first synchronizes on the compute stream to ensure that any prior granted uses of the same region have expired before this new use. These conditions together should be sufficient for safety, and correspond to established practice, though it may be possible to contrive other sets of rules that are also sufficient. Collective Ops for GPUs are unusual in that they are async (as TF Ops) and they can directly allocate GPU memory in CPU threads that are asynchronous to the launching executor thread. This CL corrects a couple of subtle misuse errors related to conditions 2 and 6. PiperOrigin-RevId: 210841522",collective_ops.cc,"@@ -132,14 +132,19 @@ class CollectiveReduceOpKernel : public CollectiveOpKernel { ""Failed to get CollectiveExecutor from OpKernelContext for Op "", col_params_.name), done); + // Allocate output on the first pass through this function. This must be + // done immediately, while we're still in the executor thread. Otherwise + // the memory is not guaranteed to be unused by any concurrently executing + // GPU kernel. + if (c->mutable_output(0) == nullptr) { + // Allocate the output tensor, trying to reuse the input. + Tensor* output = nullptr; + OP_REQUIRES_OK_ASYNC(c, + c->forward_input_or_allocate_output( + {0}, 0, c->input(0).shape(), &output), + done); + } if (!CanProceedWithCompute(c, col_exec, done)) return; - // Allocate the output tensor, trying to reuse the input. - Tensor* output = nullptr; - OP_REQUIRES_OK_ASYNC(c, - c->forward_input_or_allocate_output( - {0}, 0, c->input(0).shape(), &output), - done); - auto actual_done = [c, col_exec, done](const Status& s) { OP_REQUIRES_OK_ASYNC(c, s, done); done(); @@ -183,16 +188,23 @@ class CollectiveBcastSendOpKernel : public CollectiveOpKernel { ""Failed to get CollectiveExecutor from OpKernelContext for Op "", col_params_.name), done); + // Allocate output on the first pass through this function. This must be + // done immediately, while we're still in the executor thread. Otherwise + // the memory is not guaranteed to be unused by any concurrently executing + // GPU kernel. + if (c->mutable_output(0) == nullptr) { + // Allocate the output tensor, trying to reuse the input. + Tensor* output = nullptr; + OP_REQUIRES_OK_ASYNC( + c, c->forward_input_or_allocate_output({0}, 0, shape_, &output), + done); + } if (!CanProceedWithCompute(c, col_exec, done)) return; OP_REQUIRES_ASYNC( c, shape_.IsSameSize(c->input(0).shape()), errors::Internal(""Declared shape of op "", col_params_.name, "" does not match shape of input""), done); - // Allocate the output Tensor, trying to reuse the input. - Tensor* output = nullptr; - OP_REQUIRES_OK_ASYNC( - c, c->forward_input_or_allocate_output({0}, 0, shape_, &output), done); auto actual_done = [c, col_exec, done](const Status& s) { OP_REQUIRES_OK_ASYNC(c, s, done); @@ -239,10 +251,16 @@ class CollectiveBcastRecvOpKernel : public CollectiveOpKernel { ""Failed to get CollectiveExecutor from OpKernelContext for Op "", col_params_.name), done); + // Allocate output on the first pass through this function. This must be + // done immediately, while we're still in the executor thread. Otherwise + // the memory is not guaranteed to be unused by any concurrently executing + // GPU kernel. + if (c->mutable_output(0) == nullptr) { + // No input, so must allocate output. + Tensor* output = nullptr; + OP_REQUIRES_OK_ASYNC(c, c->allocate_output(0, shape_, &output), done); + } if (!CanProceedWithCompute(c, col_exec, done)) return; - // No input, so must allocate output. - Tensor* output = nullptr; - OP_REQUIRES_OK_ASYNC(c, c->allocate_output(0, shape_, &output), done); auto actual_done = [c, col_exec, done](const Status& s) { OP_REQUIRES_OK_ASYNC(c, s, done); ",0,train 1505376085cc87ee03367c1aed9ca5eae970f7ff,tensorflow/tensorflow,"Break dependency on conv_ops in kernels from XLA PiperOrigin-RevId: 236350778",conv_ops.cc,"@@ -32,7 +32,6 @@ limitations under the License. #include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/tensor_shape.h"" #include ""tensorflow/core/framework/tensor_slice.h"" -#include ""tensorflow/core/kernels/conv_grad_ops.h"" #include ""tensorflow/core/kernels/ops_util.h"" #include ""tensorflow/core/util/padding.h"" #include ""tensorflow/core/util/tensor_format.h"" ",0,train 1505376085cc87ee03367c1aed9ca5eae970f7ff,tensorflow/tensorflow,"Break dependency on conv_ops in kernels from XLA PiperOrigin-RevId: 236350778",fft_ops.cc,"@@ -27,7 +27,6 @@ limitations under the License. #include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/tensor_shape.h"" #include ""tensorflow/core/framework/tensor_slice.h"" -#include ""tensorflow/core/kernels/conv_grad_ops.h"" #include ""tensorflow/core/kernels/ops_util.h"" #include ""tensorflow/core/util/padding.h"" #include ""tensorflow/core/util/tensor_format.h"" ",0,train 1505376085cc87ee03367c1aed9ca5eae970f7ff,tensorflow/tensorflow,"Break dependency on conv_ops in kernels from XLA PiperOrigin-RevId: 236350778",pooling_ops.cc,"@@ -30,7 +30,6 @@ limitations under the License. #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/register_types.h"" #include ""tensorflow/core/framework/tensor.h"" -#include ""tensorflow/core/kernels/conv_grad_ops.h"" #include ""tensorflow/core/kernels/pooling_ops_common.h"" namespace tensorflow { ",0,train 0297d9c1a64270e266a7aeb48f81c78f0a31f63b,tensorflow/tensorflow,"[tf.data] Patch to unref iterator_resource in DeserializeIteratorOp. PiperOrigin-RevId: 195698980",iterator_ops.cc,"@@ -1051,7 +1051,7 @@ class DeserializeIteratorOp : public OpKernel { IteratorResource* iterator_resource; OP_REQUIRES_OK( ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator_resource)); - + core::ScopedUnref unref_iterator(iterator_resource); Variant variant = ctx->input(1).scalar()(); auto* wrapper = variant.get(); OP_REQUIRES(ctx, wrapper != nullptr, ",0,train 2bd1d7c555ad3029d5c3fcb1d0982330492305bc,tensorflow/tensorflow,"Tag `tf.keras.preprocessing` as deprecated when generating docs. All this does is add a ""status:deprecated"" to `tf.keras.preprocessing` in the tensorflow.org TOC, just like for contrib here: https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/contrib PiperOrigin-RevId: 433878866",generate2.py,"@@ -25,7 +25,7 @@ Requires a local installation of `tensorflow_docs`: pip install git+https://github.com/tensorflow/docs ``` """""" - +import distutils import pathlib import textwrap @@ -181,6 +181,9 @@ def build_docs(output_dir, code_url_prefix, search_hints): code_url_prefix: prefix for ""Defined in"" links. search_hints: Bool. Include meta-data search hints at the top of each file. """""" + if distutils.version.LooseVersion(tf.__version__) >= ""2.9"": + doc_controls.set_deprecated(tf.keras.preprocessing) + # The custom page will be used for raw_ops.md not the one generated above. doc_controls.set_custom_page_builder_cls(tf.raw_ops, RawOpsPageInfo) ",0,train 2bd1d7c555ad3029d5c3fcb1d0982330492305bc,tensorflow/tensorflow,"Tag `tf.keras.preprocessing` as deprecated when generating docs. All this does is add a ""status:deprecated"" to `tf.keras.preprocessing` in the tensorflow.org TOC, just like for contrib here: https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/contrib PiperOrigin-RevId: 433878866",generate2_test.py,"@@ -32,6 +32,7 @@ fake_tf.nn = tf.nn fake_tf.summary = tf.summary fake_tf.raw_ops = types.ModuleType('raw_ops') fake_tf.Module = tf.Module +fake_tf.__version__ = tf.__version__ for name in sorted(dir(tf.raw_ops))[:5]: setattr(fake_tf.raw_ops, name, getattr(tf.raw_ops, name)) ",0,train 4a09d23ea3346c449f4f42a447347c1f0f9a0cd0,tensorflow/tensorflow,changed from tf.cast to tf.dtypes.cast in example for tf.dtypes.cast,math_ops.py,"@@ -640,7 +640,7 @@ def cast(x, dtype, name=None): ```python x = tf.constant([1.8, 2.2], dtype=tf.float32) - tf.cast(x, tf.int32) # [1, 2], dtype=tf.int32 + tf.dtypes.cast(x, tf.int32) # [1, 2], dtype=tf.int32 ``` The operation supports data types (for `x` and `dtype`) of ",0,train f18d09553b2f26a07b0b5cd2ee96f68834fd3c10,tensorflow/tensorflow,"Add element tracing for tf.data.experimental.parallel_interleave. PiperOrigin-RevId: 324696858 Change-Id: I099b9b8935a38e263bd24f008e123c0623432e40",parallel_interleave_dataset_op.cc,"@@ -31,6 +31,8 @@ limitations under the License. #include ""tensorflow/core/lib/random/random.h"" #include ""tensorflow/core/platform/blocking_counter.h"" #include ""tensorflow/core/platform/stringprintf.h"" +#include ""tensorflow/core/profiler/lib/traceme.h"" +#include ""tensorflow/core/profiler/lib/traceme_encode.h"" namespace tensorflow { namespace data { @@ -323,6 +325,11 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { } *end_of_sequence = false; Status s = current_worker->outputs.front().status; + profiler::TraceMe traceme([&] { + return profiler::TraceMeEncode( + ""ParallelInterleaveConsume"", + {{""element_id"", current_worker->outputs.front().id}}); + }); current_worker->outputs.front().output.swap(*out_tensors); current_worker->outputs.pop_front(); current_worker->cond_var.notify_one(); @@ -564,8 +571,10 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { Status status; // The buffered data element. std::vector output; + int64 id = -1; explicit OutputElem(const Status& s) : status(s) {} + OutputElem(const Status& s, int64 id) : status(s), id(id) {} }; // Worker threads operate on their relevant WorkerState structs. @@ -813,6 +822,14 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { worker_thread_states_[thread_index] .output_elem.output.empty() && !worker_thread_states_[thread_index].end_of_sequence) { + int64& id = worker_thread_states_[thread_index].output_elem.id; + profiler::TraceMe traceme( + [&] { + id = profiler::TraceMe::NewActivityId(); + return profiler::TraceMeEncode( + ""ParallelInterleaveProduce"", {{""element_id"", id}}); + }, + profiler::kInfo); worker_thread_states_[thread_index].output_elem.status = worker_thread_states_[thread_index].iterator->GetNext( ctx.get(), @@ -856,7 +873,8 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { worker_thread_states_[thread_index].end_of_sequence = false; } else { workers_[thread_index].outputs.emplace_back( - worker_thread_states_[thread_index].output_elem.status); + worker_thread_states_[thread_index].output_elem.status, + worker_thread_states_[thread_index].output_elem.id); workers_[thread_index].outputs.back().output.swap( worker_thread_states_[thread_index].output_elem.output); } ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,export_simple_text_embedding.py,"@@ -87,7 +87,7 @@ class TextEmbeddingModel(tf.train.Checkpoint): return tf.nn.safe_embedding_lookup_sparse( embedding_weights=self.embeddings, - sparse_ids=tf.SparseTensor(token_ids, token_values, token_dense_shape), + sparse_ids=tf.sparse.SparseTensor(token_ids, token_values, token_dense_shape), sparse_weights=None, combiner=""sqrtn"") ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,session.py,"@@ -862,7 +862,7 @@ class BaseSession(SessionInterface): * A `tf.Tensor`. The corresponding fetched value will be a numpy ndarray containing the value of that tensor. - * A `tf.SparseTensor`. + * A `tf.sparse.SparseTensor`. The corresponding fetched value will be a `tf.compat.v1.SparseTensorValue` containing the value of that sparse tensor. @@ -907,7 +907,7 @@ class BaseSession(SessionInterface): `tf.compat.v1.placeholder`, the shape of the value will be checked for compatibility with the placeholder. * If the key is a - `tf.SparseTensor`, + `tf.sparse.SparseTensor`, the value should be a `tf.compat.v1.SparseTensorValue`. * If the key is a nested tuple of `Tensor`s or `SparseTensor`s, the value ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,batching.py,"@@ -88,15 +88,15 @@ def dense_to_ragged_batch(batch_size, @tf_export(""data.experimental.dense_to_sparse_batch"") def dense_to_sparse_batch(batch_size, row_shape): - """"""A transformation that batches ragged elements into `tf.SparseTensor`s. + """"""A transformation that batches ragged elements into `tf.sparse.SparseTensor`s. Like `Dataset.padded_batch()`, this transformation combines multiple consecutive elements of the dataset, which might have different shapes, into a single element. The resulting element has three components (`indices`, `values`, and `dense_shape`), which - comprise a `tf.SparseTensor` that represents the same data. The + comprise a `tf.sparse.SparseTensor` that represents the same data. The `row_shape` represents the dense shape of each row in the - resulting `tf.SparseTensor`, to which the effective batch size is + resulting `tf.sparse.SparseTensor`, to which the effective batch size is prepended. For example: ```python @@ -121,7 +121,7 @@ def dense_to_sparse_batch(batch_size, row_shape): consecutive elements of this dataset to combine in a single batch. row_shape: A `tf.TensorShape` or `tf.int64` vector tensor-like object representing the equivalent dense shape of a row in the resulting - `tf.SparseTensor`. Each element of this dataset must have the same rank as + `tf.sparse.SparseTensor`. Each element of this dataset must have the same rank as `row_shape`, and must have size less than or equal to `row_shape` in each dimension. @@ -283,7 +283,7 @@ def unbatch(): class _DenseToSparseBatchDataset(dataset_ops.UnaryDataset): - """"""A `Dataset` that batches ragged dense elements into `tf.SparseTensor`s."""""" + """"""A `Dataset` that batches ragged dense elements into `tf.sparse.SparseTensor`s."""""" def __init__(self, input_dataset, batch_size, row_shape): """"""See `Dataset.dense_to_sparse_batch()` for more details."""""" ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,grouping.py,"@@ -161,7 +161,7 @@ def bucket_by_sequence_length(element_length_func, bucket), and caller must ensure that the source `Dataset` does not contain any elements with length longer than `max(bucket_boundaries)`. no_padding: `bool`, indicates whether to pad the batch features (features - need to be either of type `tf.SparseTensor` or of same shape). + need to be either of type `tf.sparse.SparseTensor` or of same shape). drop_remainder: (Optional.) A `tf.bool` scalar `tf.Tensor`, representing whether the last batch should be dropped in the case it has fewer than `batch_size` elements; the default behavior is not to drop the smaller ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,from_sparse_tensor_slices_test.py,"@@ -37,7 +37,7 @@ class FromSparseTensorSlicesTest(test_base.DatasetTestBase, @combinations.generate( combinations.combine(tf_api_version=1, mode=[""graph""])) def testFromSparseTensorSlices(self): - """"""Test a dataset based on slices of a `tf.SparseTensor`."""""" + """"""Test a dataset based on slices of a `tf.sparse.SparseTensor`."""""" st = array_ops.sparse_placeholder(dtypes.float64) iterator = dataset_ops.make_initializable_iterator( dataset_ops.Dataset.from_sparse_tensor_slices(st)) ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,dataset_ops.py,"@@ -158,7 +158,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): Elements can be nested structures of tuples, named tuples, and dictionaries. Element components can be of any type representable by `tf.TypeSpec`, - including `tf.Tensor`, `tf.data.Dataset`, `tf.SparseTensor`, + including `tf.Tensor`, `tf.data.Dataset`, `tf.sparse.SparseTensor`, `tf.RaggedTensor`, and `tf.TensorArray`. >>> a = 1 # Integer element @@ -1486,7 +1486,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): array([[ 10, 100], [ 11, 12]], dtype=int32))] See also `tf.data.experimental.dense_to_sparse_batch`, which combines - elements that may have different shapes into a `tf.SparseTensor`. + elements that may have different shapes into a `tf.sparse.SparseTensor`. Args: batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of @@ -2296,10 +2296,10 @@ class DatasetV1(DatasetV2): @staticmethod @deprecation.deprecated(None, ""Use `tf.data.Dataset.from_tensor_slices()`."") def from_sparse_tensor_slices(sparse_tensor): - """"""Splits each rank-N `tf.SparseTensor` in this dataset row-wise. + """"""Splits each rank-N `tf.sparse.SparseTensor` in this dataset row-wise. Args: - sparse_tensor: A `tf.SparseTensor`. + sparse_tensor: A `tf.sparse.SparseTensor`. Returns: Dataset: A `Dataset` of rank-(N-1) sparse tensors. @@ -2909,13 +2909,13 @@ class TensorSliceDataset(DatasetSource): class SparseTensorSliceDataset(DatasetSource): - """"""A `Dataset` that splits a rank-N `tf.SparseTensor` into its rows."""""" + """"""A `Dataset` that splits a rank-N `tf.sparse.SparseTensor` into its rows."""""" def __init__(self, sparse_tensor): """"""See `Dataset.from_sparse_tensor_slices()` for details."""""" if not isinstance(sparse_tensor, sparse_tensor_lib.SparseTensor): raise TypeError( - ""`sparse_tensor` must be a `tf.SparseTensor` object. Was {}."".format( + ""`sparse_tensor` must be a `tf.sparse.SparseTensor` object. Was {}."".format( sparse_tensor)) self._sparse_tensor = sparse_tensor ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,iterator_ops.py,"@@ -448,7 +448,7 @@ class Iterator(trackable.Trackable): def output_classes(self): """"""Returns the class of each component of an element of this iterator. - The expected values are `tf.Tensor` and `tf.SparseTensor`. + The expected values are `tf.Tensor` and `tf.sparse.SparseTensor`. Returns: A nested structure of Python `type` objects corresponding to each @@ -677,7 +677,7 @@ class OwnedIterator(trackable.Trackable, composite_tensor.CompositeTensor): def output_classes(self): """"""Returns the class of each component of an element of this iterator. - The expected values are `tf.Tensor` and `tf.SparseTensor`. + The expected values are `tf.Tensor` and `tf.sparse.SparseTensor`. Returns: A nested structure of Python `type` objects corresponding to each ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,sparse.py,"@@ -47,7 +47,7 @@ def as_dense_shapes(shapes, classes): Returns: a structure matching the nested structure of `shapes`, containing `tensor_shape.unknown_shape()` at positions where `classes` contains - `tf.SparseTensor` and matching contents of `shapes` otherwise + `tf.sparse.SparseTensor` and matching contents of `shapes` otherwise """""" ret = nest.pack_sequence_as(shapes, [ tensor_shape.unknown_shape() if c is sparse_tensor.SparseTensor else shape @@ -65,7 +65,7 @@ def as_dense_types(types, classes): Returns: a structure matching the nested structure of `types`, containing - `dtypes.variant` at positions where `classes` contains `tf.SparseTensor` and + `dtypes.variant` at positions where `classes` contains `tf.sparse.SparseTensor` and matching contents of `types` otherwise """""" ret = nest.pack_sequence_as(types, [ @@ -106,7 +106,7 @@ def get_classes(tensors): Returns: a structure matching the nested structure of `tensors`, containing - `tf.SparseTensor` at positions where `tensors` contains a sparse tensor and + `tf.sparse.SparseTensor` at positions where `tensors` contains a sparse tensor and `tf.Tensor` otherwise """""" return nest.pack_sequence_as(tensors, [ ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,feature_column.py,"@@ -1969,7 +1969,7 @@ class _CategoricalColumn(_FeatureColumn): WARNING: Do not subclass this layer unless you know what you are doing: the API is subject to future changes. - A categorical feature typically handled with a `tf.SparseTensor` of IDs. + A categorical feature typically handled with a `tf.sparse.SparseTensor` of IDs. """""" IdWeightPair = collections.namedtuple( # pylint: disable=invalid-name ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,feature_column_v2.py,"@@ -2515,7 +2515,7 @@ def _create_dense_column_weighted_sum(column, transformation_cache, class CategoricalColumn(FeatureColumn): """"""Represents a categorical feature. - A categorical feature typically handled with a `tf.SparseTensor` of IDs. + A categorical feature typically handled with a `tf.sparse.SparseTensor` of IDs. """""" IdWeightPair = collections.namedtuple( # pylint: disable=invalid-name ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,indexed_slices.py,"@@ -84,7 +84,7 @@ class IndexedSlices(_TensorLike, composite_tensor.CompositeTensor): (e.g. `tf.gather`). Contrast this representation with - `tf.SparseTensor`, + `tf.sparse.SparseTensor`, which uses multi-dimensional indices and scalar values. """""" ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,ops.py,"@@ -338,7 +338,7 @@ class Tensor(_TensorLike): shape of a tensor at execution time. There are specialized tensors; for these, see `tf.Variable`, `tf.constant`, - `tf.placeholder`, `tf.SparseTensor`, and `tf.RaggedTensor`. + `tf.placeholder`, `tf.sparse.SparseTensor`, and `tf.RaggedTensor`. For more on Tensors, see the [guide](https://tensorflow.org/guide/tensor`). """""" ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,sparse_tensor.py,"@@ -298,14 +298,14 @@ _pywrap_utils.RegisterType(""SparseTensorValue"", SparseTensorValue) @tf_export(""SparseTensorSpec"") class SparseTensorSpec(type_spec.BatchableTypeSpec): - """"""Type specification for a `tf.SparseTensor`."""""" + """"""Type specification for a `tf.sparse.SparseTensor`."""""" __slots__ = [""_shape"", ""_dtype""] value_type = property(lambda self: SparseTensor) def __init__(self, shape=None, dtype=dtypes.float32): - """"""Constructs a type specification for a `tf.SparseTensor`. + """"""Constructs a type specification for a `tf.sparse.SparseTensor`. Args: shape: The dense shape of the `SparseTensor`, or `None` to allow @@ -473,13 +473,13 @@ def convert_to_tensor_or_sparse_tensor(value, dtype=None, name=None): def is_sparse(x): """"""Check whether `x` is sparse. - Check whether an object is a `tf.SparseTensor` or + Check whether an object is a `tf.sparse.SparseTensor` or `tf.compat.v1.SparseTensorValue`. Args: x: A python object to check. Returns: - `True` iff `x` is a `tf.SparseTensor` or `tf.compat.v1.SparseTensorValue`. + `True` iff `x` is a `tf.sparse.SparseTensor` or `tf.compat.v1.SparseTensorValue`. """""" return isinstance(x, (SparseTensor, SparseTensorValue)) ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,local.py,"@@ -782,7 +782,7 @@ def local_conv_sparse_matmul(inputs, kernel, kernel_idxs, kernel_shape, output_shape): """"""Apply N-D convolution with un-shared weights using a single sparse matmul. - This method outputs `inputs . tf.SparseTensor(indices=kernel_idxs, + This method outputs `inputs . tf.sparse.SparseTensor(indices=kernel_idxs, values=kernel, dense_shape=kernel_shape)`, with `.` standing for matrix-multiply. It also reshapes `inputs` to 2-D and `output` to (N+2)-D. ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,array_ops.py,"@@ -3069,7 +3069,7 @@ def sparse_placeholder(dtype, shape=None, name=None): print(sess.run(y, feed_dict={ x: (indices, values, shape)})) # Will succeed. - sp = tf.SparseTensor(indices=indices, values=values, dense_shape=shape) + sp = tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=shape) sp_value = sp.eval(session=sess) print(sess.run(y, feed_dict={x: sp_value})) # Will succeed. ``` @@ -3471,7 +3471,7 @@ def edit_distance(hypothesis, truth, normalize=True, name=""edit_distance""): # 'hypothesis' is a tensor of shape `[2, 1]` with variable-length values: # (0,0) = [""a""] # (1,0) = [""b""] - hypothesis = tf.SparseTensor( + hypothesis = tf.sparse.SparseTensor( [[0, 0, 0], [1, 0, 0]], [""a"", ""b""], @@ -3482,7 +3482,7 @@ def edit_distance(hypothesis, truth, normalize=True, name=""edit_distance""): # (0,1) = [""a""] # (1,0) = [""b"", ""c""] # (1,1) = [""a""] - truth = tf.SparseTensor( + truth = tf.sparse.SparseTensor( [[0, 1, 0], [1, 0, 0], [1, 0, 1], ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,ctc_ops.py,"@@ -1126,7 +1126,7 @@ def dense_labels_to_sparse(dense, length): length: int tensor of shape [batch] The length of each sequence in dense. Returns: - tf.SparseTensor with values only for the valid elements of sequences. + tf.sparse.SparseTensor with values only for the valid elements of sequences. """""" flat_values = array_ops.reshape(dense, [-1]) ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,map_fn.py,"@@ -106,7 +106,7 @@ def map_fn(fn, * A `tf.DType` or `tf.TensorSpec` (to describe a `tf.Tensor`) * A `tf.RaggedTensorSpec` (to describe a `tf.RaggedTensor`) - * A `tf.SparseTensorSpec` (to describe a `tf.SparseTensor`) + * A `tf.SparseTensorSpec` (to describe a `tf.sparse.SparseTensor`) * A (possibly nested) tuple, list, or dict containing the above types. #### RaggedTensors @@ -159,11 +159,11 @@ def map_fn(fn, #### SparseTensors - `map_fn` supports `tf.SparseTensor` inputs and outputs. In particular: + `map_fn` supports `tf.sparse.SparseTensor` inputs and outputs. In particular: * If `elems` is a `SparseTensor`, then `fn` will be called with each row of that sparse tensor. In particular, the value passed to `fn` will be a - `tf.SparseTensor` with one fewer dimension than `elems`. + `tf.sparse.SparseTensor` with one fewer dimension than `elems`. * If the result of `map_fn` should be a `SparseTensor`, then use a `tf.SparseTensorSpec` to specify `fn_output_signature`. The individual @@ -171,7 +171,7 @@ def map_fn(fn, `SparseTensor` with one more dimension. >>> # Example: SparseTensor input - >>> st = tf.SparseTensor([[0, 0], [2, 0], [2, 1]], [2, 3, 4], [4, 4]) + >>> st = tf.sparse.SparseTensor([[0, 0], [2, 0], [2, 1]], [2, 3, 4], [4, 4]) >>> tf.map_fn(tf.sparse.reduce_sum, st, fn_output_signature=tf.int32) @@ -191,9 +191,9 @@ def map_fn(fn, *rows* of a `SparseTensor`. If you wish to map a function over the nonzero values, then you should use: - * `tf.SparseTensor(st.indices, fn(st.values), st.dense_shape)` + * `tf.sparse.SparseTensor(st.indices, fn(st.values), st.dense_shape)` (if the function is expressible as TensorFlow ops) - * `tf.SparseTensor(st.indices, tf.map_fn(fn, st.values), st.dense_shape)` + * `tf.sparse.SparseTensor(st.indices, tf.map_fn(fn, st.values), st.dense_shape)` (otherwise). #### `map_fn` vs. vectorized operations @@ -276,7 +276,7 @@ def map_fn(fn, * A `tf.DType` or `tf.TensorSpec` (to describe a `tf.Tensor`) * A `tf.RaggedTensorSpec` (to describe a `tf.RaggedTensor`) - * A `tf.SparseTensorSpec` (to describe a `tf.SparseTensor`) + * A `tf.SparseTensorSpec` (to describe a `tf.sparse.SparseTensor`) * A (possibly nested) tuple, list, or dict containing the above types. Returns: ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,math_ops.py,"@@ -1432,8 +1432,8 @@ def equal(x, y, name=None): Args: - x: A `tf.Tensor` or `tf.SparseTensor` or `tf.IndexedSlices`. - y: A `tf.Tensor` or `tf.SparseTensor` or `tf.IndexedSlices`. + x: A `tf.Tensor` or `tf.sparse.SparseTensor` or `tf.IndexedSlices`. + y: A `tf.Tensor` or `tf.sparse.SparseTensor` or `tf.IndexedSlices`. name: A name for the operation (optional). Returns: @@ -1468,8 +1468,8 @@ def not_equal(x, y, name=None): Args: - x: A `tf.Tensor` or `tf.SparseTensor` or `tf.IndexedSlices`. - y: A `tf.Tensor` or `tf.SparseTensor` or `tf.IndexedSlices`. + x: A `tf.Tensor` or `tf.sparse.SparseTensor` or `tf.IndexedSlices`. + y: A `tf.Tensor` or `tf.sparse.SparseTensor` or `tf.IndexedSlices`. name: A name for the operation (optional). Returns: @@ -2907,12 +2907,12 @@ def matmul(a, **does not support `tf.sparse.SparseTensor`**, it just makes optimizations that assume most values in `a` are zero. See `tf.sparse.sparse_dense_matmul` - for some support for `tf.SparseTensor` multiplication. + for some support for `tf.sparse.SparseTensor` multiplication. b_is_sparse: If `True`, `b` is treated as a sparse matrix. Notice, this **does not support `tf.sparse.SparseTensor`**, it just makes optimizations that assume most values in `a` are zero. See `tf.sparse.sparse_dense_matmul` - for some support for `tf.SparseTensor` multiplication. + for some support for `tf.sparse.SparseTensor` multiplication. name: Name for the operation (optional). Returns: ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,pfor.py,"@@ -1217,10 +1217,10 @@ class PFor(object): the new dense shape will be (N, max_i(x_i), max_i(y_i), max_i(z_i)). Args: - y: A tf.SparseTensor. + y: A tf.sparse.SparseTensor. Returns: - A tf.SparseTensor that is the converted value corresponding to y. + A tf.sparse.SparseTensor that is the converted value corresponding to y. """""" outputs = [ self._convert_helper(t) for t in (y.indices, y.values, y.dense_shape) ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,ragged_tensor.py,"@@ -1629,7 +1629,7 @@ class RaggedTensor(composite_tensor.CompositeTensor): @classmethod def from_sparse(cls, st_input, name=None, row_splits_dtype=dtypes.int64): - """"""Converts a 2D `tf.SparseTensor` to a `RaggedTensor`. + """"""Converts a 2D `tf.sparse.SparseTensor` to a `RaggedTensor`. Each row of the `output` `RaggedTensor` will contain the explicit values from the same row in `st_input`. `st_input` must be ragged-right. If not @@ -1637,7 +1637,7 @@ class RaggedTensor(composite_tensor.CompositeTensor): Example: - >>> st = tf.SparseTensor(indices=[[0, 0], [0, 1], [0, 2], [1, 0], [3, 0]], + >>> st = tf.sparse.SparseTensor(indices=[[0, 0], [0, 1], [0, 2], [1, 0], [3, 0]], ... values=[1, 2, 3, 4, 5], ... dense_shape=[4, 3]) >>> tf.RaggedTensor.from_sparse(st).to_list() @@ -1690,7 +1690,7 @@ class RaggedTensor(composite_tensor.CompositeTensor): st_input.values, segment_ids, num_segments, validate=False) def to_sparse(self, name=None): - """"""Converts this `RaggedTensor` into a `tf.SparseTensor`. + """"""Converts this `RaggedTensor` into a `tf.sparse.SparseTensor`. Example: ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,sets_impl.py,"@@ -156,7 +156,7 @@ def set_intersection(a, b, validate_indices=True): ((1, 1, 0), 5), ((1, 1, 1), 6), ]) - a = tf.SparseTensor(list(a.keys()), list(a.values()), dense_shape=[2,2,2]) + a = tf.sparse.SparseTensor(list(a.keys()), list(a.values()), dense_shape=[2,2,2]) # b = np.array([[{1}, {}], [{4}, {5, 6, 7, 8}]]) b = collections.OrderedDict([ @@ -167,7 +167,7 @@ def set_intersection(a, b, validate_indices=True): ((1, 1, 2), 7), ((1, 1, 3), 8), ]) - b = tf.SparseTensor(list(b.keys()), list(b.values()), dense_shape=[2, 2, 4]) + b = tf.sparse.SparseTensor(list(b.keys()), list(b.values()), dense_shape=[2, 2, 4]) # `tf.sets.intersection` is applied to each aligned pair of sets. tf.sets.intersection(a, b) @@ -224,7 +224,7 @@ def set_difference(a, b, aminusb=True, validate_indices=True): ((1, 1, 0), 5), ((1, 1, 1), 6), ]) - a = tf.SparseTensor(list(a.keys()), list(a.values()), dense_shape=[2, 2, 2]) + a = tf.sparse.SparseTensor(list(a.keys()), list(a.values()), dense_shape=[2, 2, 2]) # np.array([[{1, 3}, {2}], [{4, 5}, {5, 6, 7, 8}]]) b = collections.OrderedDict([ @@ -238,7 +238,7 @@ def set_difference(a, b, aminusb=True, validate_indices=True): ((1, 1, 2), 7), ((1, 1, 3), 8), ]) - b = tf.SparseTensor(list(b.keys()), list(b.values()), dense_shape=[2, 2, 4]) + b = tf.sparse.SparseTensor(list(b.keys()), list(b.values()), dense_shape=[2, 2, 4]) # `set_difference` is applied to each aligned pair of sets. tf.sets.difference(a, b) @@ -302,7 +302,7 @@ def set_union(a, b, validate_indices=True): ((1, 1, 0), 5), ((1, 1, 1), 6), ]) - a = tf.SparseTensor(list(a.keys()), list(a.values()), dense_shape=[2, 2, 2]) + a = tf.sparse.SparseTensor(list(a.keys()), list(a.values()), dense_shape=[2, 2, 2]) # [[{1, 3}, {2}], [{4, 5}, {5, 6, 7, 8}]] b = collections.OrderedDict([ @@ -316,7 +316,7 @@ def set_union(a, b, validate_indices=True): ((1, 1, 2), 7), ((1, 1, 3), 8), ]) - b = tf.SparseTensor(list(b.keys()), list(b.values()), dense_shape=[2, 2, 4]) + b = tf.sparse.SparseTensor(list(b.keys()), list(b.values()), dense_shape=[2, 2, 4]) # `set_union` is applied to each aligned pair of sets. tf.sets.union(a, b) ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,sparse_ops.py,"@@ -16,7 +16,7 @@ # pylint: disable=g-short-docstring-punctuation """"""Sparse Tensor Representation. -See also `tf.SparseTensor`. +See also `tf.sparse.SparseTensor`. """""" from __future__ import absolute_import @@ -2460,7 +2460,7 @@ def sparse_softmax(sp_input, name=None): values = np.asarray([[[0., np.e], [1., 0.]], [[np.e, 0.], [np.e, np.e]]]) indices = np.vstack(np.where(values)).astype(np.int64).T - result = tf.sparse.softmax(tf.SparseTensor(indices, values, shape)) + result = tf.sparse.softmax(tf.sparse.SparseTensor(indices, values, shape)) # ...returning a 3-D SparseTensor, equivalent to: # [? 1.] [1 ?] # [1. ? ] and [.5 .5] ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,nest.py,"@@ -310,7 +310,7 @@ def flatten(structure, expand_composites=False): Args: structure: an arbitrarily nested structure. Note, numpy arrays are considered atoms and are not flattened. - expand_composites: If true, then composite tensors such as tf.SparseTensor + expand_composites: If true, then composite tensors such as tf.sparse.SparseTensor and tf.RaggedTensor are expanded into their component tensors. Returns: @@ -364,7 +364,7 @@ def assert_same_structure(nest1, nest2, check_types=True, considered the same if they are both list subtypes (which allows ""list"" and ""_ListWrapper"" from trackable dependency tracking to compare equal). - expand_composites: If true, then composite tensors such as `tf.SparseTensor` + expand_composites: If true, then composite tensors such as `tf.sparse.SparseTensor` and `tf.RaggedTensor` are expanded into their component tensors. Raises: @@ -537,7 +537,7 @@ def pack_sequence_as(structure, flat_sequence, expand_composites=False): tuples, and dicts. Note: numpy arrays and strings are considered scalars. flat_sequence: flat sequence to pack. - expand_composites: If true, then composite tensors such as `tf.SparseTensor` + expand_composites: If true, then composite tensors such as `tf.sparse.SparseTensor` and `tf.RaggedTensor` are expanded into their component tensors. Returns: @@ -574,7 +574,7 @@ def map_structure(func, *structure, **kwargs): Note that namedtuples with identical name and fields are always considered to have the same shallow structure. * `expand_composites`: If set to `True`, then composite tensors such - as `tf.SparseTensor` and `tf.RaggedTensor` are expanded into their + as `tf.sparse.SparseTensor` and `tf.RaggedTensor` are expanded into their component tensors. If `False` (the default), then composite tensors are not expanded. @@ -762,7 +762,7 @@ def assert_shallow_structure(shallow_tree, `input_tree` have to be the same. Note that even with check_types==True, this function will consider two different namedtuple classes with the same name and _fields attribute to be the same class. - expand_composites: If true, then composite tensors such as tf.SparseTensor + expand_composites: If true, then composite tensors such as tf.sparse.SparseTensor and tf.RaggedTensor are expanded into their component tensors. Raises: TypeError: If `shallow_tree` is a sequence but `input_tree` is not. @@ -911,7 +911,7 @@ def flatten_up_to(shallow_tree, input_tree, check_types=True, Note, numpy arrays are considered scalars. check_types: bool. If True, check that each node in shallow_tree has the same type as the corresponding node in input_tree. - expand_composites: If true, then composite tensors such as tf.SparseTensor + expand_composites: If true, then composite tensors such as tf.sparse.SparseTensor and tf.RaggedTensor are expanded into their component tensors. Returns: @@ -1015,7 +1015,7 @@ def flatten_with_tuple_paths_up_to(shallow_tree, Note, numpy arrays are considered scalars. check_types: bool. If True, check that each node in shallow_tree has the same type as the corresponding node in input_tree. - expand_composites: If true, then composite tensors such as tf.SparseTensor + expand_composites: If true, then composite tensors such as tf.sparse.SparseTensor and tf.RaggedTensor are expanded into their component tensors. Returns: @@ -1233,7 +1233,7 @@ def get_traverse_shallow_structure(traverse_fn, structure, shallow structure of the same type, describing which parts of the substructure to traverse. structure: The structure to traverse. - expand_composites: If true, then composite tensors such as tf.SparseTensor + expand_composites: If true, then composite tensors such as tf.sparse.SparseTensor and tf.RaggedTensor are expanded into their component tensors. Returns: @@ -1313,7 +1313,7 @@ def yield_flat_paths(nest, expand_composites=False): Args: nest: the value to produce a flattened paths list for. - expand_composites: If true, then composite tensors such as tf.SparseTensor + expand_composites: If true, then composite tensors such as tf.sparse.SparseTensor and tf.RaggedTensor are expanded into their component tensors. Yields: @@ -1338,7 +1338,7 @@ def flatten_with_joined_string_paths(structure, separator=""/"", structure: the nested structure to flatten. separator: string to separate levels of hierarchy in the results, defaults to '/'. - expand_composites: If true, then composite tensors such as tf.SparseTensor + expand_composites: If true, then composite tensors such as tf.sparse.SparseTensor and tf.RaggedTensor are expanded into their component tensors. Returns: @@ -1362,7 +1362,7 @@ def flatten_with_tuple_paths(structure, expand_composites=False): Args: structure: the nested structure to flatten. - expand_composites: If true, then composite tensors such as tf.SparseTensor + expand_composites: If true, then composite tensors such as tf.sparse.SparseTensor and tf.RaggedTensor are expanded into their component tensors. Returns: ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,util.h,"@@ -234,7 +234,7 @@ PyObject* AssertSameStructure(PyObject* o1, PyObject* o2, bool check_types, // nest: an arbitrarily nested structure or a scalar object. Note, numpy // arrays are considered scalars. // expand_composites: If true, then composite tensors (such as -// `tf.SparseTensor` and `tf.RaggedTensor` are flattened into their +// `tf.sparse.SparseTensor` and `tf.RaggedTensor` are flattened into their // component tensors. // // Returns: ",0,train 652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,util_wrapper.cc,"@@ -244,7 +244,7 @@ PYBIND11_MODULE(_pywrap_utils, m) { Args: nest: an arbitrarily nested structure or a scalar object. Note, numpy arrays are considered scalars. - expand_composites: If true, then composite tensors such as `tf.SparseTensor` + expand_composites: If true, then composite tensors such as `tf.sparse.SparseTensor` and `tf.RaggedTensor` are expanded into their component tensors. Returns: ",0,train e3ec33da39df95e24c8c22ad5dcf4c3b15707d6c,tensorflow/tensorflow,"Add a header to cover common math constants if not available on some platforms. PiperOrigin-RevId: 415640862 Change-Id: I0d927f9b1b51429254981e733d4c3c07d1633975",constants.h,"@@ -0,0 +1,61 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_CONSTANTS_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_CONSTANTS_H_ + +// Maths constants. +// The following macros are not always available on all platforms. +// E.g. MSVC requires additional compile flag to export those. +#ifndef M_E +#define M_E 2.7182818284590452354 /* e */ +#endif +#ifndef M_LOG2E +#define M_LOG2E 1.4426950408889634074 /* log_2 e */ +#endif +#ifndef M_LOG10E +#define M_LOG10E 0.43429448190325182765 /* log_10 e */ +#endif +#ifndef M_LN2 +#define M_LN2 0.69314718055994530942 /* log_e 2 */ +#endif +#ifndef M_LN10 +#define M_LN10 2.30258509299404568402 /* log_e 10 */ +#endif +#ifndef M_PI +#define M_PI 3.14159265358979323846 /* pi */ +#endif +#ifndef M_PI_2 +#define M_PI_2 1.57079632679489661923 /* pi/2 */ +#endif +#ifndef M_PI_4 +#define M_PI_4 0.78539816339744830962 /* pi/4 */ +#endif +#ifndef M_1_PI +#define M_1_PI 0.31830988618379067154 /* 1/pi */ +#endif +#ifndef M_2_PI +#define M_2_PI 0.63661977236758134308 /* 2/pi */ +#endif +#ifndef M_2_SQRTPI +#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */ +#endif +#ifndef M_SQRT2 +#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ +#endif +#ifndef M_SQRT1_2 +#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ +#endif + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_CONSTANTS_H_ ",0,train 86d29f8a72ad3e8042a0ff6abcb90e863b4c2d08,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2019-12-12 PiperOrigin-RevId: 285142785 Change-Id: Ie2e4c6f05f934fd6fea658e850d1967cc2268bba",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 12, 11) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 12, 12) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train 3f9c3d05329aec8af0330db66b0481d53658ee2a,tensorflow/tensorflow,"Replaced get_shape() with shape. This is the recommended method to use.",rnn.py,"@@ -217,10 +217,10 @@ def dynamic_rnn(cell, parallel_iterations = parallel_iterations or 32 if sequence_length is not None: sequence_length = math_ops.cast(sequence_length, dtypes.int32) - if sequence_length.get_shape().rank not in (None, 1): + if sequence_length.shape.rank not in (None, 1): raise ValueError( ""sequence_length must be a vector of length batch_size, "" - ""but saw shape: %s"" % sequence_length.get_shape()) + ""but saw shape: %s"" % sequence_length.shape) sequence_length = array_ops.identity( # Just to find it in the graph. sequence_length, name=""sequence_length"") ",0,train 7605b750d32a471e234a90a8d056bad8d084fada,tensorflow/tensorflow,"Remove the usage of TF private API ops.uid from Keras. PiperOrigin-RevId: 320420366 Change-Id: I2be3622a9cdeff207e2c112ff0c220e6ea15f729",hdf5_format_test.py,"@@ -21,6 +21,7 @@ from __future__ import print_function import os import shutil import tempfile +import uuid from absl.testing import parameterized import numpy as np @@ -1192,8 +1193,7 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase, parameterized.TestCase): m = DummySubclassModel() v = m.add_weight(name='v', shape=[]) self.evaluate(v.assign(42.)) - prefix = os.path.join(self.get_temp_dir(), - '{}'.format(ops.uid()), 'ckpt/') + prefix = os.path.join(self.get_temp_dir(), str(uuid.uuid4()), 'ckpt/') m.save_weights(prefix) self.evaluate(v.assign(2.)) m.load_weights(prefix) @@ -1236,8 +1236,7 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase, parameterized.TestCase): m = DummySubclassModel() v = m.add_weight(name='v', shape=[]) self.evaluate(v.assign(42.)) - prefix = os.path.join(self.get_temp_dir(), - '{}'.format(ops.uid()), 'bckpt') + prefix = os.path.join(self.get_temp_dir(), str(uuid.uuid4()), 'bckpt') m.save_weights(prefix) self.evaluate(v.assign(2.)) m.load_weights(prefix) ",0,test 1879b51356ecf0a0f4971f9b6ef61d518830c398,tensorflow/tensorflow,Example of text classification from characters using RNNs,text_classification.py,"@@ -68,9 +68,9 @@ def rnn_model(X, y): # EMBEDDING_SIZE]. word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words') - # Split sequence into list of embedding per word. + # Split into list of embedding per word, while removing doc length dim. # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE]. - word_list = [tf.squeeze(w, [1]) for w in tf.split(1, MAX_DOCUMENT_LENGTH, word_vectors)] + word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors) # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE. cell = rnn_cell.GRUCell(EMBEDDING_SIZE) # Create an unrolled Recurrent Neural Networks to length of ",0,test 1879b51356ecf0a0f4971f9b6ef61d518830c398,tensorflow/tensorflow,Example of text classification from characters using RNNs,text_classification_character_rnn.py,"@@ -0,0 +1,81 @@ +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""""" +This is an example of using recurrent neural networks over characters +for DBpedia dataset to predict class from description of an entity. + +This model is similar to one described in this paper: + ""Character-level Convolutional Networks for Text Classification"" + http://arxiv.org/abs/1509.01626 + +and is somewhat alternative to the Lua code from here: + https://github.com/zhangxiangxiao/Crepe +"""""" + +import csv +import numpy as np +from sklearn import metrics + +import tensorflow as tf +from tensorflow.models.rnn import rnn, rnn_cell +import skflow + +### Training data + +# Download dbpedia_csv.tar.gz from +# https://drive.google.com/folderview?id=0Bz8a_Dbh9Qhbfll6bVpmNUtUcFdjYmF2SEpmZUZUcVNiMUw1TWN6RDV3a0JHT3kxLVhVR2M +# Unpack: tar -xvf dbpedia_csv.tar.gz + +def load_dataset(filename): + target = [] + data = [] + reader = csv.reader(open(filename), delimiter=',') + for line in reader: + target.append(int(line[0])) + data.append(line[2]) + return data, np.array(target, np.float32) + +X_train, y_train = load_dataset('dbpedia_csv/train.csv') +X_test, y_test = load_dataset('dbpedia_csv/test.csv') + +### Process vocabulary + +MAX_DOCUMENT_LENGTH = 10 + +char_processor = skflow.preprocessing.ByteProcessor(MAX_DOCUMENT_LENGTH) +X_train = np.array(list(char_processor.fit_transform(X_train))) +X_test = np.array(list(char_processor.transform(X_test))) + +### Models + +HIDDEN_SIZE = 20 + +def char_rnn_model(X, y): + byte_list = skflow.ops.one_hot_matrix(X, 256) + byte_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, byte_list) + cell = rnn_cell.GRUCell(HIDDEN_SIZE) + _, encoding = rnn.rnn(cell, byte_list, dtype=tf.float32) + return skflow.models.logistic_regression(encoding[-1], y) + +classifier = skflow.TensorFlowEstimator(model_fn=char_rnn_model, n_classes=15, + steps=100, optimizer='Adam', learning_rate=0.01, continue_training=True, + log_device_placement=False) + +# Continuesly train for 1000 steps & predict on test set. +while True: + classifier.fit(X_train, y_train) + score = metrics.accuracy_score(classifier.predict(X_test), y_test) + print(""Accuracy: %f"" % score) + ",0,test 6bf5a24e8805c57a2d7e5741519a090228c76a89,tensorflow/tensorflow,Change: 112640197,padding_fifo_queue.cc,"@@ -155,7 +155,7 @@ void PaddingFIFOQueue::TryDequeueMany(int num_elements, OpKernelContext* ctx, // Expand sizes to match. int64 max_val = 0; for (const Tuple& t : tuples) { - max_val = max(max_val, t[i].shape().dim_size(j)); + max_val = std::max(max_val, t[i].shape().dim_size(j)); } shape.AddDim(max_val); } ",0,train 348eaa0d0af4106479ef4754277630cc4c4141c0,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2019-02-26 PiperOrigin-RevId: 235675690",compat.py,"@@ -27,7 +27,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 2, 25) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 2, 26) @tf_export(""compat.forward_compatible"") ",0,train 520c69a59b171bb27c898f8fc72fe5cd99bd32e2,tensorflow/tensorflow,"PSv2: Apply strategy.run() change to parameter_server_training_test. PiperOrigin-RevId: 327482183 Change-Id: I1e91e50905cb7011fe987a40a65688f2ef1d091c",parameter_server_training_test.py,"@@ -146,18 +146,22 @@ class KPLTest(test.TestCase): @def_function.function def worker_fn(iterator): - batch_data, labels = next(iterator) - with backprop.GradientTape() as tape: - pred = model(batch_data, training=True) - loss = nn.compute_average_loss( - keras.losses.BinaryCrossentropy( - reduction=loss_reduction.ReductionV2.NONE)(labels, pred)) - gradients = tape.gradient(loss, model.trainable_variables) - - optimizer.apply_gradients(zip(gradients, model.trainable_variables)) - - actual_pred = math_ops.cast(math_ops.greater(pred, 0.5), dtypes.int64) - accuracy.update_state(labels, actual_pred) + + def train_step(iterator): + batch_data, labels = next(iterator) + with backprop.GradientTape() as tape: + pred = model(batch_data, training=True) + loss = nn.compute_average_loss( + keras.losses.BinaryCrossentropy( + reduction=loss_reduction.ReductionV2.NONE)(labels, pred)) + gradients = tape.gradient(loss, model.trainable_variables) + + optimizer.apply_gradients(zip(gradients, model.trainable_variables)) + + actual_pred = math_ops.cast(math_ops.greater(pred, 0.5), dtypes.int64) + accuracy.update_state(labels, actual_pred) + + self.client._strategy.run(train_step, args=(iterator,)) distributed_iterator = iter(distributed_dataset) for _ in range(10): ",0,train ad5c47fe1f62599b26687911ea9749006aa51ebb,tensorflow/tensorflow,"Replaced get_shape() with shape. This is the recommended method to use.",audio_microfrontend_op.py,"@@ -96,7 +96,7 @@ def audio_microfrontend(audio, Raises: ValueError: If the audio tensor is not explicitly a vector. """""" - audio_shape = audio.get_shape() + audio_shape = audio.shape if audio_shape.ndims is None: raise ValueError(""Input to `AudioMicrofrontend` should have known rank."") if len(audio_shape) > 1: ",0,test fcf57c7246472f6c9e47b9c6d804668b52848497,tensorflow/tensorflow,"Use collections.abc for Python 3.10+ In Python 3.10+, Abstract Base Classes are not longer in `collections`. `collections.abc` should be used instead. Update python/eager/core_test.py, python/util/nest_test.py, python/distribute/distribute_utils_test.py. PiperOrigin-RevId: 415381625 Change-Id: I89b1da7af3254d34b8d2facb57ce2a7ff938f176",distribute_utils_test.py,"@@ -15,6 +15,7 @@ """"""Tests for utility functions in distribute_utils."""""" import collections +import collections.abc from absl.testing import parameterized import wrapt @@ -82,8 +83,9 @@ class RegroupAndSelectDeviceTest(test.TestCase, parameterized.TestCase): self._is_per_replica(result[""b""], [""b1"", ""b2""]) def testRegroupCollectionsMapping(self): - class CollectionsMappingBasedClass(collections.Mapping): - """"""Class inherited from collections.Mapping."""""" + + class CollectionsMappingBasedClass(collections.abc.Mapping): + """"""Class inherited from collections.abc.Mapping."""""" def __init__(self, *args, **kwargs): self._d = dict(*args, **kwargs) ",0,train fcf57c7246472f6c9e47b9c6d804668b52848497,tensorflow/tensorflow,"Use collections.abc for Python 3.10+ In Python 3.10+, Abstract Base Classes are not longer in `collections`. `collections.abc` should be used instead. Update python/eager/core_test.py, python/util/nest_test.py, python/distribute/distribute_utils_test.py. PiperOrigin-RevId: 415381625 Change-Id: I89b1da7af3254d34b8d2facb57ce2a7ff938f176",core_test.py,"@@ -14,7 +14,7 @@ # ============================================================================== """"""Tests for core."""""" -import collections +import collections.abc import os import pickle import threading @@ -81,11 +81,11 @@ class TFETest(test_util.TensorFlowTestCase): def _test_hashable(self, a, b, hashable): if hashable: - self.assertIsInstance(b, collections.Hashable) + self.assertIsInstance(b, collections.abc.Hashable) self.assertLen(set([a, b]), 2) else: # TODO(gjn): Figure out how to make this work for tf.Tensor - # self.assertNotIsInstance(b, collections.Hashable) + # self.assertNotIsInstance(b, collections.abc.Hashable) with self.assertRaisesRegex(TypeError, 'unhashable'): set([a, b]) ",0,train fcf57c7246472f6c9e47b9c6d804668b52848497,tensorflow/tensorflow,"Use collections.abc for Python 3.10+ In Python 3.10+, Abstract Base Classes are not longer in `collections`. `collections.abc` should be used instead. Update python/eager/core_test.py, python/util/nest_test.py, python/distribute/distribute_utils_test.py. PiperOrigin-RevId: 415381625 Change-Id: I89b1da7af3254d34b8d2facb57ce2a7ff938f176",nest_test.py,"@@ -15,6 +15,7 @@ """"""Tests for utilities working with arbitrarily nested structures."""""" import collections +import collections.abc import time from typing import NamedTuple @@ -30,7 +31,6 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.platform import test from tensorflow.python.util import nest -from tensorflow.python.util.compat import collections_abc try: import attr # pylint:disable=g-import-not-at-top @@ -38,7 +38,7 @@ except ImportError: attr = None -class _CustomMapping(collections_abc.Mapping): +class _CustomMapping(collections.abc.Mapping): def __init__(self, *args, **kwargs): self._wrapped = dict(*args, **kwargs) @@ -57,7 +57,7 @@ class _CustomList(list): pass -class _CustomSequenceThatRaisesException(collections.Sequence): +class _CustomSequenceThatRaisesException(collections.abc.Sequence): def __len__(self): return 1 ",0,train 71dbfdbec550845def8db48ed31fb4f978407906,tensorflow/tensorflow,"Summary ops should run on only Chef not on all workers. Change: 123163672",estimator.py,"@@ -393,6 +393,11 @@ class BaseEstimator(sklearn.BaseEstimator): summary_op=logging_ops.get_summary_op(), save_summary_steps=100) + is_chief = self._config.task == 0 + if not is_chief: + # Run monitors only on chief. + monitors = [] + # Setup monitors. for monitor in monitors: monitor.set_estimator(self) @@ -407,7 +412,7 @@ class BaseEstimator(sklearn.BaseEstimator): init_feed_dict=init_feed_fn() if init_feed_fn is not None else None, init_fn=init_fn, log_every_steps=log_every_steps, - supervisor_is_chief=(self._config.task == 0), + supervisor_is_chief=is_chief, supervisor_master=self._config.master, feed_fn=feed_fn, max_steps=steps, ",0,train 0a451b1aa0baaa3f7abbf8d90dfe58193cf1533e,tensorflow/tensorflow,"Speed up statistical_testing_test by consolidating sess.run calls. PiperOrigin-RevId: 190721153",statistical_testing_test.py,"@@ -22,39 +22,75 @@ import numpy as np from tensorflow.contrib.distributions.python.ops import statistical_testing as st from tensorflow.python.framework import errors -from tensorflow.python.ops import check_ops from tensorflow.python.platform import test class StatisticalTestingTest(test.TestCase): def test_dkwm_design_mean_one_sample_soundness(self): - numbers = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] + thresholds = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] rates = [1e-6, 1e-3, 1e-2, 1.1e-1, 0.2, 0.5, 0.7, 1.] - with self.test_session() as sess: - for ff in rates: - for fp in rates: - sufficient_n = st.min_num_samples_for_dkwm_mean_test( - numbers, 0., 1., false_fail_rate=ff, false_pass_rate=fp) - detectable_d = st.min_discrepancy_of_true_means_detectable_by_dkwm( - sufficient_n, 0., 1., false_fail_rate=ff, false_pass_rate=fp) - sess.run(check_ops.assert_less_equal(detectable_d, numbers)) + false_fail_rates, false_pass_rates = np.meshgrid(rates, rates) + false_fail_rates = false_fail_rates.flatten().astype(np.float32) + false_pass_rates = false_pass_rates.flatten().astype(np.float32) + + detectable_discrepancies = [] + for false_pass_rate, false_fail_rate in zip( + false_pass_rates, false_fail_rates): + sufficient_n = st.min_num_samples_for_dkwm_mean_test( + thresholds, low=0., high=1., false_fail_rate=false_fail_rate, + false_pass_rate=false_pass_rate) + detectable_discrepancies.append( + st.min_discrepancy_of_true_means_detectable_by_dkwm( + sufficient_n, low=0., high=1., false_fail_rate=false_fail_rate, + false_pass_rate=false_pass_rate)) + + detectable_discrepancies_ = self.evaluate(detectable_discrepancies) + for discrepancies, false_pass_rate, false_fail_rate in zip( + detectable_discrepancies_, false_pass_rates, false_fail_rates): + below_threshold = discrepancies <= thresholds + self.assertAllEqual( + np.ones_like(below_threshold, np.bool), below_threshold, + msg='false_pass_rate({}), false_fail_rate({})'.format( + false_pass_rate, false_fail_rate)) def test_dkwm_design_mean_two_sample_soundness(self): - numbers = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] + thresholds = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] rates = [1e-6, 1e-3, 1e-2, 1.1e-1, 0.2, 0.5, 0.7, 1.] - with self.test_session() as sess: - for ff in rates: - for fp in rates: - (sufficient_n1, - sufficient_n2) = st.min_num_samples_for_dkwm_mean_two_sample_test( - numbers, 0., 1., 0., 1., - false_fail_rate=ff, false_pass_rate=fp) - d_fn = st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample - detectable_d = d_fn( - sufficient_n1, 0., 1., sufficient_n2, 0., 1., - false_fail_rate=ff, false_pass_rate=fp) - sess.run(check_ops.assert_less_equal(detectable_d, numbers)) + false_fail_rates, false_pass_rates = np.meshgrid(rates, rates) + false_fail_rates = false_fail_rates.flatten().astype(np.float32) + false_pass_rates = false_pass_rates.flatten().astype(np.float32) + + detectable_discrepancies = [] + for false_pass_rate, false_fail_rate in zip( + false_pass_rates, false_fail_rates): + [ + sufficient_n1, + sufficient_n2 + ] = st.min_num_samples_for_dkwm_mean_two_sample_test( + thresholds, low1=0., high1=1., low2=0., high2=1., + false_fail_rate=false_fail_rate, + false_pass_rate=false_pass_rate) + + detectable_discrepancies.append( + st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample( + n1=sufficient_n1, + low1=0., + high1=1., + n2=sufficient_n2, + low2=0., + high2=1., + false_fail_rate=false_fail_rate, + false_pass_rate=false_pass_rate)) + + detectable_discrepancies_ = self.evaluate(detectable_discrepancies) + for discrepancies, false_pass_rate, false_fail_rate in zip( + detectable_discrepancies_, false_pass_rates, false_fail_rates): + below_threshold = discrepancies <= thresholds + self.assertAllEqual( + np.ones_like(below_threshold, np.bool), below_threshold, + msg='false_pass_rate({}), false_fail_rate({})'.format( + false_pass_rate, false_fail_rate)) def test_true_mean_confidence_interval_by_dkwm_one_sample(self): rng = np.random.RandomState(seed=0) ",0,train 9ae3853ccabef46d77a1ef16b6a8d7a28121a7bd,tensorflow/tensorflow,"TFLM: nit: Fix an obvious issue in the test. PiperOrigin-RevId: 277166720 Change-Id: Iff23f196c61f837d65bbda30e7c125281bb12ce6",magic_wand_test.cc,"@@ -154,7 +154,7 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { negative_score = output->data.f[kNegativeIndex]; TF_LITE_MICRO_EXPECT_GT(slope_score, wing_score); TF_LITE_MICRO_EXPECT_GT(slope_score, ring_score); - // TF_LITE_MICRO_EXPECT_GT(slope_score, negative_score); + TF_LITE_MICRO_EXPECT_GT(slope_score, negative_score); } TF_LITE_MICRO_TESTS_END ",0,train 74137f994faad09593ae2daad6251a4ccf72f558,tensorflow/tensorflow,"Fix signed int overflow issue in tensor_id.cc When a node name has a long numeric suffix, e.g., ""foo/y_0/gradient_debug_09684b60f2184c67b744721915034528"" (as has happened with tfdbg GradientsDebugger), the parsing algorithm in ParseTensorName() may experience signed int overflow. Replacing the types with ""unsigned int"" resolves the issue. PiperOrigin-RevId: 168039195",tensor_id.cc,"@@ -34,8 +34,8 @@ TensorId ParseTensorName(StringPiece name) { // whole name string forms the first part of the tensor name. const char* base = name.data(); const char* p = base + name.size() - 1; - int index = 0; - int mul = 1; + unsigned int index = 0; + unsigned int mul = 1; while (p > base && (*p >= '0' && *p <= '9')) { index += ((*p - '0') * mul); mul *= 10; ",0,test 3745f2582daeae7a49a129e250cf0cc2d573924a,tensorflow/tensorflow,"Pad support for quantized zero. PiperOrigin-RevId: 191938267",optimized_ops.h,"@@ -5067,7 +5067,7 @@ template inline void Pad(const T* input_data, const Dims<4>& input_dims, const std::vector& left_paddings, const std::vector& right_paddings, T* output_data, - const Dims<4>& output_dims) { + const Dims<4>& output_dims, const int32_t pad_value) { gemmlowp::ScopedProfilingLabel label(""Pad""); const int output_batch = ArraySize(output_dims, 3); const int output_height = ArraySize(output_dims, 2); @@ -5087,27 +5087,27 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims, const int input_depth = ArraySize(input_dims, 0); if (left_b_padding != 0) { - memset(output_data, 0, + memset(output_data, pad_value, left_b_padding * output_height * output_width * output_depth * sizeof(T)); } for (int out_b = left_b_padding; out_b < output_batch - right_b_padding; ++out_b) { if (left_h_padding != 0) { - memset(output_data + Offset(output_dims, 0, 0, 0, out_b), 0, + memset(output_data + Offset(output_dims, 0, 0, 0, out_b), pad_value, left_h_padding * output_width * output_depth * sizeof(T)); } for (int out_h = left_h_padding; out_h < output_height - right_h_padding; ++out_h) { if (left_w_padding != 0) { - memset(output_data + Offset(output_dims, 0, 0, out_h, out_b), 0, + memset(output_data + Offset(output_dims, 0, 0, out_h, out_b), pad_value, left_w_padding * output_depth * sizeof(T)); } for (int out_w = left_w_padding; out_w < output_width - right_w_padding; ++out_w) { if (left_d_padding != 0) { - memset(output_data + Offset(output_dims, 0, out_w, out_h, out_b), 0, - left_d_padding * sizeof(T)); + memset(output_data + Offset(output_dims, 0, out_w, out_h, out_b), + pad_value, left_d_padding * sizeof(T)); } T* out = output_data + @@ -5121,20 +5121,21 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims, memset( output_data + Offset(output_dims, output_depth - right_d_padding, out_w, out_h, out_b), - 0, right_d_padding * sizeof(T)); + pad_value, right_d_padding * sizeof(T)); } } if (right_w_padding != 0) { memset( output_data + Offset(output_dims, 0, output_width - right_w_padding, out_h, out_b), - 0, right_w_padding * output_depth * sizeof(T)); + pad_value, right_w_padding * output_depth * sizeof(T)); } } if (right_h_padding != 0) { memset(output_data + Offset(output_dims, 0, 0, output_height - right_h_padding, out_b), - 0, right_h_padding * output_width * output_depth * sizeof(T)); + pad_value, + right_h_padding * output_width * output_depth * sizeof(T)); } } if (right_b_padding != 0) { @@ -5146,6 +5147,15 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims, } } +template +inline void Pad(const T* input_data, const Dims<4>& input_dims, + const std::vector& left_paddings, + const std::vector& right_paddings, T* output_data, + const Dims<4>& output_dims) { + Pad(input_data, input_dims, left_paddings, right_paddings, output_data, + output_dims, 0); +} + template inline void StridedSlice(const T* input_data, const Dims<4>& input_dims, int begin_mask, int end_mask, ",0,train 3745f2582daeae7a49a129e250cf0cc2d573924a,tensorflow/tensorflow,"Pad support for quantized zero. PiperOrigin-RevId: 191938267",reference_ops.h,"@@ -3086,7 +3086,7 @@ template inline void Pad(const T* input_data, const Dims<4>& input_dims, const std::vector& left_paddings, const std::vector& right_paddings, T* output_data, - const Dims<4>& output_dims) { + const Dims<4>& output_dims, const int32_t pad_value) { const int output_batch = ArraySize(output_dims, 3); const int output_height = ArraySize(output_dims, 2); const int output_width = ArraySize(output_dims, 1); @@ -3116,7 +3116,7 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims, out_w >= output_width - right_w_padding || out_d < left_d_padding || out_d >= output_depth - right_d_padding) { - *out_ptr++ = 0; + *out_ptr++ = static_cast(pad_value); } else { *out_ptr++ = *in_ptr++; } @@ -3126,6 +3126,15 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims, } } +template +inline void Pad(const T* input_data, const Dims<4>& input_dims, + const std::vector& left_paddings, + const std::vector& right_paddings, T* output_data, + const Dims<4>& output_dims) { + Pad(input_data, input_dims, left_paddings, right_paddings, output_data, + output_dims, 0); +} + inline bool LoopCondition(int index, int stop, int stride) { return stride > 0 ? index < stop : index > stop; } ",0,train 3745f2582daeae7a49a129e250cf0cc2d573924a,tensorflow/tensorflow,"Pad support for quantized zero. PiperOrigin-RevId: 191938267",pad.cc,"@@ -119,39 +119,46 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { after_padding.push_back(paddings_data[idx * 2 + 1]); } -#define TF_LITE_PAD(type, scalar) \ +#define TF_LITE_PAD(type, scalar, pad_value) \ type::Pad(GetTensorData(op_context.input), \ GetTensorDims(op_context.input), before_padding, after_padding, \ GetTensorData(op_context.output), \ - GetTensorDims(op_context.output)) + GetTensorDims(op_context.output), pad_value) switch (op_context.input->type) { case kTfLiteFloat32: if (kernel_type == kReference) { - TF_LITE_PAD(reference_ops, float); + TF_LITE_PAD(reference_ops, float, 0); } else if (kernel_type == kGenericOptimized) { - TF_LITE_PAD(optimized_ops, float); + TF_LITE_PAD(optimized_ops, float, 0); } break; case kTfLiteUInt8: + // Quantized Pad requires that 0 is represented in the quantized range. + TF_LITE_ENSURE(context, op_context.output->params.zero_point >= + std::numeric_limits::min()); + TF_LITE_ENSURE(context, op_context.output->params.zero_point <= + std::numeric_limits::max()); if (kernel_type == kReference) { - TF_LITE_PAD(reference_ops, uint8_t); + TF_LITE_PAD(reference_ops, uint8_t, + op_context.output->params.zero_point); } else if (kernel_type == kGenericOptimized) { - TF_LITE_PAD(optimized_ops, uint8_t); + TF_LITE_PAD(optimized_ops, uint8_t, + op_context.output->params.zero_point); } break; case kTfLiteInt32: if (kernel_type == kReference) { - TF_LITE_PAD(reference_ops, int32_t); + TF_LITE_PAD(reference_ops, int32_t, 0); } else if (kernel_type == kGenericOptimized) { - TF_LITE_PAD(optimized_ops, int32_t); + TF_LITE_PAD(optimized_ops, int32_t, 0); } break; case kTfLiteInt64: if (kernel_type == kReference) { - TF_LITE_PAD(reference_ops, int64_t); + TF_LITE_PAD(reference_ops, int64_t, 0); } else if (kernel_type == kGenericOptimized) { - TF_LITE_PAD(optimized_ops, int64_t); + TF_LITE_PAD(optimized_ops, int64_t, 0); } break; default: ",0,train 3745f2582daeae7a49a129e250cf0cc2d573924a,tensorflow/tensorflow,"Pad support for quantized zero. PiperOrigin-RevId: 191938267",pad_test.cc,"@@ -22,6 +22,7 @@ namespace tflite { namespace { using ::testing::ElementsAreArray; +using ::testing::Matcher; class PadOpModel : public SingleOpModel { public: @@ -29,6 +30,10 @@ class PadOpModel : public SingleOpModel { PopulateTensor(input_, data); } + void SetQuantizedInput(std::initializer_list data) { + QuantizeAndPopulate(input_, data); + } + void SetPaddings(std::initializer_list paddings) { PopulateTensor(paddings_, paddings); } @@ -36,6 +41,11 @@ class PadOpModel : public SingleOpModel { std::vector GetOutput() { return ExtractVector(output_); } std::vector GetOutputShape() { return GetTensorShape(output_); } + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } + protected: int input_; int output_; @@ -50,16 +60,17 @@ class PadOpModel : public SingleOpModel { // m.Invoke(); class PadOpConstModel : public PadOpModel { public: - PadOpConstModel(std::initializer_list input_shape, + PadOpConstModel(const TensorData& input, std::initializer_list paddings_shape, - std::initializer_list paddings) { - input_ = AddInput(TensorType_FLOAT32); + std::initializer_list paddings, + const TensorData& output) { + input_ = AddInput(input); paddings_ = AddConstInput(TensorType_INT32, paddings, paddings_shape); - output_ = AddOutput(TensorType_FLOAT32); + output_ = AddOutput(output); SetBuiltinOp(BuiltinOperator_PAD, BuiltinOptions_PadOptions, CreatePadOptions(builder_).Union()); - BuildInterpreter({input_shape}); + BuildInterpreter({input.shape}); } }; @@ -72,40 +83,45 @@ class PadOpConstModel : public PadOpModel { // m.Invoke(); class PadOpDynamicModel : public PadOpModel { public: - PadOpDynamicModel(std::initializer_list input_shape, - std::initializer_list paddings_shape) { - input_ = AddInput(TensorType_FLOAT32); + PadOpDynamicModel(const TensorData& input, + std::initializer_list paddings_shape, + const TensorData& output) { + input_ = AddInput(input); paddings_ = AddInput(TensorType_INT32); - output_ = AddOutput(TensorType_FLOAT32); + output_ = AddOutput(output); SetBuiltinOp(BuiltinOperator_PAD, BuiltinOptions_PadOptions, CreatePadOptions(builder_).Union()); - BuildInterpreter({input_shape, paddings_shape}); + BuildInterpreter({input.shape, paddings_shape}); } }; TEST(PadOpTest, TooManyDimensions) { EXPECT_DEATH( - PadOpConstModel({1, 2, 3, 4, 5, 6, 7, 8, 9}, {9, 2}, - {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9}), + PadOpConstModel({TensorType_FLOAT32, {1, 2, 3, 4, 5, 6, 7, 8, 9}}, {9, 2}, + {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9}, + {TensorType_FLOAT32}), ""dims != 4""); } TEST(PadOpTest, UnequalDimensions) { - EXPECT_DEATH(PadOpConstModel({1, 1, 2, 1}, {3, 2}, {1, 1, 2, 2, 3, 3}), + EXPECT_DEATH(PadOpConstModel({TensorType_FLOAT32, {1, 1, 2, 1}}, {3, 2}, + {1, 1, 2, 2, 3, 3}, {TensorType_FLOAT32}), ""3 != 4""); } TEST(PadOpTest, InvalidPadValue) { EXPECT_DEATH( - PadOpConstModel({1, 1, 2, 1}, {4, 2}, {0, 0, 1, -1, 2, -1, 0, 0}), + PadOpConstModel({TensorType_FLOAT32, {1, 1, 2, 1}}, {4, 2}, + {0, 0, 1, -1, 2, -1, 0, 0}, {TensorType_FLOAT32}), ""Pad value has to be greater than equal to 0.""); } TEST(PadOpTest, SimpleConstTest) { // Padding is represented as four 2-D lists representing above padding and // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}). - PadOpConstModel m({1, 2, 2, 1}, {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0}); + PadOpConstModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2}, + {0, 0, 1, 1, 1, 1, 0, 0}, {TensorType_FLOAT32}); m.SetInput({1, 2, 3, 4}); m.Invoke(); EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, @@ -114,7 +130,8 @@ TEST(PadOpTest, SimpleConstTest) { } TEST(PadOpTest, SimpleDynamicTest) { - PadOpDynamicModel m({1, 2, 2, 1}, {4, 2}); + PadOpDynamicModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2}, + {TensorType_FLOAT32}); m.SetInput({1, 2, 3, 4}); m.SetPaddings({0, 0, 1, 1, 1, 1, 0, 0}); m.Invoke(); @@ -124,7 +141,8 @@ TEST(PadOpTest, SimpleDynamicTest) { } TEST(PadOpTest, AdvancedConstTest) { - PadOpConstModel m({1, 2, 3, 1}, {4, 2}, {0, 0, 0, 2, 1, 3, 0, 0}); + PadOpConstModel m({TensorType_FLOAT32, {1, 2, 3, 1}}, {4, 2}, + {0, 0, 0, 2, 1, 3, 0, 0}, {TensorType_FLOAT32}); m.SetInput({1, 2, 3, 4, 5, 6}); m.Invoke(); EXPECT_THAT(m.GetOutput(), @@ -134,7 +152,8 @@ TEST(PadOpTest, AdvancedConstTest) { } TEST(PadOpTest, AdvancedDynamicTest) { - PadOpDynamicModel m({1, 2, 3, 1}, {4, 2}); + PadOpDynamicModel m({TensorType_FLOAT32, {1, 2, 3, 1}}, {4, 2}, + {TensorType_FLOAT32}); m.SetInput({1, 2, 3, 4, 5, 6}); m.SetPaddings({0, 0, 0, 2, 1, 3, 0, 0}); m.Invoke(); @@ -144,6 +163,80 @@ TEST(PadOpTest, AdvancedDynamicTest) { EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 7, 1})); } +class QuantizedPadOpTest : public ::testing::Test { + protected: + std::vector> DequantizedArrayNear( + const std::vector& values, const float min, const float max) { + const float quantization_tolerance = (max - min) / 255.0; + return ArrayFloatNear(values, quantization_tolerance); + } +}; + +TEST_F(QuantizedPadOpTest, ZeroNotInQuantizationRange) { + // The test_util and actual quantization code currently ensure that the range + // must include zero, but if that ever changes, this test will catch it. + EXPECT_DEATH(PadOpConstModel m({TensorType_UINT8, {1, 2, 2, 1}, 1.0, 2.0}, + {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0}, + {TensorType_UINT8, {}, 1.0, 2.0}), + "".*Check failed: f_min <= 0.*""); +} + +TEST_F(QuantizedPadOpTest, SimpleConstTest) { + // Padding is represented as four 2-D lists representing above padding and + // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}). + PadOpConstModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {4, 2}, + {0, 0, 1, 1, 1, 1, 0, 0}, + {TensorType_UINT8, {}, -1.0, 1.0}); + m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(DequantizedArrayNear( + {0, 0, 0, 0, 0, -0.8, 0.2, 0, 0, 0.9, 0.7, 0, 0, 0, 0, 0}, + -1.0, 1.0))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1})); +} + +TEST_F(QuantizedPadOpTest, SimpleDynamicTest) { + PadOpDynamicModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {4, 2}, + {TensorType_UINT8, {}, -1.0, 1.0}); + m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7}); + m.SetPaddings({0, 0, 1, 1, 1, 1, 0, 0}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(DequantizedArrayNear( + {0, 0, 0, 0, 0, -0.8, 0.2, 0, 0, 0.9, 0.7, 0, 0, 0, 0, 0}, + -1.0, 1.0))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1})); +} + +TEST_F(QuantizedPadOpTest, AdvancedConstTest) { + PadOpConstModel m({TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, {4, 2}, + {0, 0, 0, 2, 1, 3, 0, 0}, + {TensorType_UINT8, {}, -1.0, 1.0}); + m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7, 0.1, -0.3}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(DequantizedArrayNear( + {0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + -1.0, 1.0))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 7, 1})); +} + +TEST_F(QuantizedPadOpTest, AdvancedDynamicTest) { + PadOpDynamicModel m({TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, {4, 2}, + {TensorType_UINT8, {}, -1.0, 1.0}); + m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7, 0.1, -0.3}); + m.SetPaddings({0, 0, 0, 2, 1, 3, 0, 0}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(DequantizedArrayNear( + {0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + -1.0, 1.0))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 7, 1})); +} + } // namespace } // namespace tflite ",0,train 2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`. PiperOrigin-RevId: 387606044 Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",parallel_interleave_test.py,"@@ -760,7 +760,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase): interleave_fn, cycle_length=10, sloppy=sloppy)) opts = options_lib.Options() - opts.experimental_deterministic = global_determinism + opts.deterministic = global_determinism dataset = dataset.with_options(opts) return dataset ",0,train 2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`. PiperOrigin-RevId: 387606044 Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",parse_example_dataset_test.py,"@@ -1140,7 +1140,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase, deterministic=local_determinism)) opts = options_lib.Options() - opts.experimental_deterministic = global_determinism + opts.deterministic = global_determinism dataset = dataset.with_options(opts) expected = list(range(num_elements)) ",0,train 2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`. PiperOrigin-RevId: 387606044 Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",data_service_ops_test.py,"@@ -437,7 +437,7 @@ class DataServiceOpsTest(data_service_test_base.TestBase, ds = dataset_ops.Dataset.from_tensor_slices(elements) ds = ds.interleave(interleave_fn, cycle_length=10, num_parallel_calls=10) opts = options_lib.Options() - opts.experimental_deterministic = False + opts.deterministic = False ds = ds.with_options(opts) ds = self.make_distributed_dataset(ds, cluster) return ds ",0,train 2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`. PiperOrigin-RevId: 387606044 Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",interleave_ops.py,"@@ -38,7 +38,7 @@ from tensorflow.python.util.tf_export import tf_export None, ""Use `tf.data.Dataset.interleave(map_func, cycle_length, block_length, "" ""num_parallel_calls=tf.data.AUTOTUNE)` instead. If sloppy "" - ""execution is desired, use `tf.data.Options.experimental_deterministic`."") + ""execution is desired, use `tf.data.Options.deterministic`."") @tf_export(""data.experimental.parallel_interleave"") def parallel_interleave(map_func, cycle_length, @@ -78,9 +78,8 @@ def parallel_interleave(map_func, `Dataset` before advancing to the next input `Dataset`. sloppy: A boolean controlling whether determinism should be traded for performance by allowing elements to be produced out of order. If `sloppy` - is `None`, the `tf.data.Options.experimental_deterministic` dataset option - (`True` by default) is used to decide whether to enforce a deterministic - order. + is `None`, the `tf.data.Options.deterministic` dataset option (`True` by + default) is used to decide whether to enforce a deterministic order. buffer_output_elements: The number of elements each iterator being interleaved should buffer (similar to the `.prefetch()` transformation for each interleaved iterator). ",0,train 2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`. PiperOrigin-RevId: 387606044 Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",parsing_ops.py,"@@ -131,9 +131,8 @@ def parse_example_dataset(features, num_parallel_calls=1, deterministic=None): should be traded for performance by allowing elements to be produced out of order if some parsing calls complete faster than others. If `deterministic` is `None`, the - `tf.data.Options.experimental_deterministic` dataset option (`True` by - default) is used to decide whether to produce elements - deterministically. + `tf.data.Options.deterministic` dataset option (`True` by default) is used + to decide whether to produce elements deterministically. Returns: A dataset transformation function, which can be passed to ",0,train 2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`. PiperOrigin-RevId: 387606044 Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",readers.py,"@@ -573,7 +573,7 @@ def make_csv_dataset_v2( dataset = dataset.interleave( filename_to_dataset, num_parallel_calls=num_parallel_reads) options = options_lib.Options() - options.experimental_deterministic = not sloppy + options.deterministic = not sloppy dataset = dataset.with_options(options) else: # Read files sequentially (if num_parallel_reads=1) or in parallel @@ -1025,7 +1025,7 @@ def make_batched_features_dataset_v2(file_pattern, lambda filename: reader(filename, *reader_args), num_parallel_calls=reader_num_threads) options = options_lib.Options() - options.experimental_deterministic = not sloppy_ordering + options.deterministic = not sloppy_ordering dataset = dataset.with_options(options) else: # Read files sequentially (if reader_num_threads=1) or in parallel ",0,train 2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`. PiperOrigin-RevId: 387606044 Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",batch_test.py,"@@ -269,7 +269,7 @@ class BatchTest(test_base.DatasetTestBase, parameterized.TestCase): batch_size=6, num_parallel_calls=2, deterministic=local_determinism).unbatch() opts = options_lib.Options() - opts.experimental_deterministic = global_determinism + opts.deterministic = global_determinism dataset = dataset.with_options(opts) return dataset ",0,train 2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`. PiperOrigin-RevId: 387606044 Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",interleave_test.py,"@@ -285,7 +285,7 @@ class InterleaveTest(test_base.DatasetTestBase, parameterized.TestCase): lambda x: dataset_ops.Dataset.from_tensors(x).repeat(x), cycle_length, block_length, num_parallel_calls) options = options_lib.Options() - options.experimental_deterministic = False + options.deterministic = False dataset = dataset.with_options(options) expected_output = [ element for element in _interleave( @@ -351,7 +351,7 @@ class InterleaveTest(test_base.DatasetTestBase, parameterized.TestCase): num_parallel_calls=10, deterministic=local_determinism) opts = options_lib.Options() - opts.experimental_deterministic = global_determinism + opts.deterministic = global_determinism dataset = dataset.with_options(opts) return dataset ",0,train 2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`. PiperOrigin-RevId: 387606044 Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",map_test.py,"@@ -1291,7 +1291,7 @@ class MapTest(test_base.DatasetTestBase, parameterized.TestCase): num_parallel_calls=2, deterministic=local_determinism) opts = options_lib.Options() - opts.experimental_deterministic = global_determinism + opts.deterministic = global_determinism dataset = dataset.with_options(opts) return dataset ",0,train 2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`. PiperOrigin-RevId: 387606044 Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",options_test.py,"@@ -66,14 +66,14 @@ class OptionsTest(test_base.DatasetTestBase, parameterized.TestCase): options1 = options_lib.Options() options1.experimental_optimization.autotune = True options2 = options_lib.Options() - options2.experimental_deterministic = False + options2.deterministic = False ds = dataset_ops.Dataset.range(0) ds = ds.with_options(options1) ds = ds.with_options(options2) options = self._get_options(ds) self.assertTrue(options.experimental_optimization.autotune) # Explicitly check that flag is False since assertFalse allows None - self.assertIs(options.experimental_deterministic, False) + self.assertIs(options.deterministic, False) @combinations.generate(test_base.default_test_combinations()) def testOptionsTwiceSameOption(self): @@ -94,13 +94,13 @@ class OptionsTest(test_base.DatasetTestBase, parameterized.TestCase): options1 = options_lib.Options() options1.experimental_optimization.autotune = True options2 = options_lib.Options() - options2.experimental_deterministic = True + options2.deterministic = True ds1 = dataset_ops.Dataset.range(0).with_options(options1) ds2 = dataset_ops.Dataset.range(0).with_options(options2) ds = dataset_ops.Dataset.zip((ds1, ds2)) options = self._get_options(ds) self.assertTrue(options.experimental_optimization.autotune) - self.assertTrue(options.experimental_deterministic) + self.assertTrue(options.deterministic) @combinations.generate(test_base.default_test_combinations()) def testOptionsHaveDefaults(self): @@ -125,7 +125,7 @@ class OptionsTest(test_base.DatasetTestBase, parameterized.TestCase): ds = ds.with_options(options2) dataset_options = ds.options() with self.assertRaises(ValueError): - dataset_options.experimental_deterministic = True + dataset_options.deterministic = True @combinations.generate(test_base.eager_only_combinations()) def testNestedDataset(self): @@ -139,7 +139,7 @@ class OptionsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.default_test_combinations()) def testOptionsProtoRoundTrip(self): options = options_lib.Options() - options.experimental_deterministic = True + options.deterministic = True options.experimental_external_state_policy = ( options_lib.ExternalStatePolicy.FAIL) options.experimental_distribute.auto_shard_policy = ( @@ -209,6 +209,20 @@ class OptionsTest(test_base.DatasetTestBase, parameterized.TestCase): self.assertEqual(result.experimental_threading.max_intra_op_parallelism, result.threading.max_intra_op_parallelism) + @combinations.generate(test_base.default_test_combinations()) + def testExperimentalDeterministicOverride(self): + options = options_lib.Options() + self.assertEqual(options.deterministic, options.experimental_deterministic) + options.experimental_deterministic = False + pb = options._to_proto() + result = options_lib.Options() + result._from_proto(pb) + self.assertFalse(result.deterministic) + self.assertEqual(result.deterministic, result.experimental_deterministic) + result.experimental_deterministic = True + self.assertTrue(result.deterministic) + self.assertEqual(result.deterministic, result.experimental_deterministic) + @combinations.generate(test_base.default_test_combinations()) def testPersistenceOptionsSetOutsideFunction(self): ",0,train 2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`. PiperOrigin-RevId: 387606044 Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",dataset_ops.py,"@@ -1561,8 +1561,8 @@ class DatasetV2(collections_abc.Iterable, tracking_base.Trackable, the transformation produces elements. If set to `False`, the transformation is allowed to yield elements out of order to trade determinism for performance. If not specified, the - `tf.data.Options.experimental_deterministic` option - (`True` by default) controls the behavior. + `tf.data.Options.deterministic` option (`True` by default) controls the + behavior. Returns: Dataset: A `Dataset`. @@ -1844,8 +1844,8 @@ name=None)) the transformation produces elements. If set to `False`, the transformation is allowed to yield elements out of order to trade determinism for performance. If not specified, the - `tf.data.Options.experimental_deterministic` option - (`True` by default) controls the behavior. + `tf.data.Options.deterministic` option (`True` by default) controls the + behavior. Returns: Dataset: A `Dataset`. @@ -1998,8 +1998,8 @@ name=None)) the transformation produces elements. If set to `False`, the transformation is allowed to yield elements out of order to trade determinism for performance. If not specified, the - `tf.data.Options.experimental_deterministic` option - (`True` by default) controls the behavior. + `tf.data.Options.deterministic` option (`True` by default) controls the + behavior. Returns: Dataset: A `Dataset`. @@ -2495,7 +2495,7 @@ name=None)) ... num_parallel_calls=3) >>> options = tf.data.Options() >>> # This will make the interleave order non-deterministic. - >>> options.experimental_deterministic = False + >>> options.deterministic = False >>> ds = ds.with_options(options) Args: @@ -3327,8 +3327,8 @@ class DatasetV1(DatasetV2): boolean controls the order in which the transformation produces elements. If set to `False`, the transformation is allowed to yield elements out of order to trade determinism for performance. If not - specified, the `tf.data.Options.experimental_deterministic` option - (`True` by default) controls the behavior. + specified, the `tf.data.Options.deterministic` option (`True` by + default) controls the behavior. Returns: Dataset: A `Dataset`. ",0,train 2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`. PiperOrigin-RevId: 387606044 Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",options.py,"@@ -412,9 +412,9 @@ class Options(options_lib.OptionsBase): >>> dataset = tf.data.Dataset.range(42) >>> options = tf.data.Options() - >>> options.experimental_deterministic = False + >>> options.deterministic = False >>> dataset = dataset.with_options(options) - >>> print(dataset.options().experimental_deterministic) + >>> print(dataset.options().deterministic) False Note: A known limitation of the `tf.data.Options` implementation is that the @@ -423,13 +423,18 @@ class Options(options_lib.OptionsBase): need to be set within the same tf.function. """""" - experimental_deterministic = options_lib.create_option( - name=""experimental_deterministic"", + deterministic = options_lib.create_option( + name=""deterministic"", ty=bool, docstring= ""Whether the outputs need to be produced in deterministic order. If None,"" "" defaults to True."") + experimental_deterministic = options_lib.create_option( + name=""experimental_deterministic"", + ty=bool, + docstring=""DEPRECATED. Use `deterministic` instead."") + experimental_distribute = options_lib.create_option( name=""experimental_distribute"", ty=DistributeOptions, @@ -438,6 +443,16 @@ class Options(options_lib.OptionsBase): ""`tf.data.experimental.DistributeOptions` for more details."", default_factory=DistributeOptions) + experimental_external_state_policy = options_lib.create_option( + name=""experimental_external_state_policy"", + ty=ExternalStatePolicy, + docstring=""This option can be used to override the default policy for "" + ""how to handle external state when serializing a dataset or "" + ""checkpointing its iterator. There are three settings available - "" + ""IGNORE: External state is ignored without a warning; WARN: External "" + ""state is ignored and a warning is logged; FAIL: External state results "" + ""in an error."") + experimental_optimization = options_lib.create_option( name=""experimental_optimization"", ty=OptimizationOptions, @@ -455,15 +470,10 @@ class Options(options_lib.OptionsBase): ""frequency is determined by the number of devices attached to this "" ""input pipeline. If None, defaults to False."") - experimental_external_state_policy = options_lib.create_option( - name=""experimental_external_state_policy"", - ty=ExternalStatePolicy, - docstring=""This option can be used to override the default policy for "" - ""how to handle external state when serializing a dataset or "" - ""checkpointing its iterator. There are three settings available - "" - ""IGNORE: External state is ignored without a warning; WARN: External "" - ""state is ignored and a warning is logged; FAIL: External state results "" - ""in an error."") + experimental_threading = options_lib.create_option( + name=""experimental_threading"", + ty=ThreadingOptions, + docstring=""DEPRECATED. Use `threading` instead."") threading = options_lib.create_option( name=""threading"", @@ -472,26 +482,35 @@ class Options(options_lib.OptionsBase): ""`tf.data.ThreadingOptions` for more details."", default_factory=ThreadingOptions) - def __getattr__(self, name): + def __getattribute__(self, name): if name == ""experimental_threading"": logging.warning(""options.experimental_threading is deprecated. "" ""Use options.threading instead."") return getattr(self, ""threading"") - else: - raise AttributeError(""Attribute %s not found."" % name) + if name == ""experimental_deterministic"": + # TODO(aaudibert): Uncomment after internal uses have been updated. + # logging.warning(""options.experimental_deterministic is deprecated. "" + # ""Use options.deterministic instead."") + return getattr(self, ""deterministic"") + return super(Options, self).__getattribute__(name) def __setattr__(self, name, value): if name == ""experimental_threading"": logging.warning(""options.experimental_threading is deprecated. "" ""Use options.threading instead."") super(Options, self).__setattr__(""threading"", value) + if name == ""experimental_deterministic"": + # TODO(aaudibert): Uncomment after internal uses have been updated. + # logging.warning(""options.experimental_deterministic is deprecated. "" + # ""Use options.deterministic instead."") + super(Options, self).__setattr__(""deterministic"", value) else: super(Options, self).__setattr__(name, value) def _to_proto(self): pb = dataset_options_pb2.Options() - if self.experimental_deterministic is not None: - pb.deterministic = self.experimental_deterministic + if self.deterministic is not None: + pb.deterministic = self.deterministic pb.distribute_options.CopyFrom(self.experimental_distribute._to_proto()) # pylint: disable=protected-access if self.experimental_external_state_policy is not None: pb.external_state_policy = ( @@ -505,7 +524,7 @@ class Options(options_lib.OptionsBase): def _from_proto(self, pb): if pb.WhichOneof(""optional_deterministic"") is not None: - self.experimental_deterministic = pb.deterministic + self.deterministic = pb.deterministic self.experimental_distribute._from_proto(pb.distribute_options) # pylint: disable=protected-access if pb.WhichOneof(""optional_external_state_policy"") is not None: self.experimental_external_state_policy = ( ",0,train 968725656bf5a01a2e3622eb4533166f2f592b88,tensorflow/tensorflow,Modified reduce_variance for extra optimization,math_ops.py,"@@ -2110,9 +2110,10 @@ def reduce_variance(input_tensor, axis=None, keepdims=False, name=None): name = name if name else ""reduce_variance"" with ops.name_scope(name): square_of_input = gen_math_ops.square(input_tensor) - mean_of_square = reduce_mean(squares_of_input, - axis=axis, - keepdims=keepdims) + mean_of_square = reduce_mean( + squares_of_input, + axis=axis, + keepdims=keepdims) mean = reduce_mean(input_tensor, axis=axis, keepdims=keepdims) square_of_mean = gen_math_ops.square(mean) return mean_of_square - square_of_mean ",0,test d3f6604c622c7a6cf38bd266ff6626bfc960299c,tensorflow/tensorflow,"Including cuda_kernel_helper.h, deleting some (now) duplicated code.",maxpooling_op_gpu.cu.cc,"@@ -24,6 +24,7 @@ limitations under the License. #include ""tensorflow/core/framework/tensor_types.h"" #include ""tensorflow/core/kernels/maxpooling_op.h"" #include ""tensorflow/core/kernels/maxpooling_op_gpu.h"" +#include ""tensorflow/core/util/cuda_kernel_helper.h"" namespace tensorflow { namespace { @@ -43,10 +44,7 @@ namespace { // int form, keeping track of the flattened index of the input item that // produces the max output. If a nullptr is passed in for mask, no mask // will be produced. -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ - i += blockDim.x * gridDim.x) - +// // To call the forward and backward functions, use e.g.: // const int kThreadsPerBlock = 1024 // const int output_size = batch * channels * pooled_height * pooled_width; @@ -201,11 +199,6 @@ __global__ void MaxPoolBackward(const int nthreads, const dtype* top_diff, } } -template -__global__ void SetZero(const int nthreads, dtype* bottom_diff) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { *(bottom_diff + index) = dtype(0); } -} - #undef CUDA_1D_KERNEL_LOOP } // namespace ",0,train d3f6604c622c7a6cf38bd266ff6626bfc960299c,tensorflow/tensorflow,"Including cuda_kernel_helper.h, deleting some (now) duplicated code.",cuda_kernel_helper.h,"@@ -108,6 +108,7 @@ template __global__ void SetZero(const int nthreads, T* bottom_diff) { CUDA_1D_KERNEL_LOOP(index, nthreads) { *(bottom_diff + index) = T(0); } } + // For atomicSub. // Custom implementation for sub by just negating the value. ",0,train 9ed22473db5e3b5d555e951c2dfc92a75ab235ca,tensorflow/tensorflow,"Capture the distribute.Strategy scope from the outer graph when entering the FuncGraph.as_default scope instead of __init__. Fixes issues with the grobal Keras FuncGraph keeping state between tests. PiperOrigin-RevId: 225257506",mirrored_strategy.py,"@@ -50,12 +50,17 @@ from tensorflow.python.util.tf_export import tf_export @contextlib.contextmanager -def _enter_graph(g, eager): +def _enter_graph(g, eager, creator_stack=None): + """"""Context manager for selecting a graph and maybe eager mode."""""" if eager: with g.as_default(), context.eager_mode(): + if creator_stack is not None: + g._variable_creator_stack = creator_stack # pylint: disable=protected-access yield else: with g.as_default(): + if creator_stack is not None: + g._variable_creator_stack = creator_stack # pylint: disable=protected-access yield @@ -865,7 +870,6 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended): def run(self): # pylint: disable=protected-access - self.graph._variable_creator_stack = self._variable_creator_stack self.should_run.wait() self.should_run.clear() try: @@ -873,7 +877,8 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended): return with self.coord.stop_on_exception(), \ _enter_graph(self._init_graph, self._init_in_eager), \ - _enter_graph(self.graph, self.in_eager), \ + _enter_graph(self.graph, self.in_eager, + self._variable_creator_stack), \ context.context().device_policy(self.context_device_policy), \ MirroredReplicaContext(self.distribution, constant_op.constant( self.replica_id, dtypes.int32)), \ ",0,train 9ed22473db5e3b5d555e951c2dfc92a75ab235ca,tensorflow/tensorflow,"Capture the distribute.Strategy scope from the outer graph when entering the FuncGraph.as_default scope instead of __init__. Fixes issues with the grobal Keras FuncGraph keeping state between tests. PiperOrigin-RevId: 225257506",func_graph.py,"@@ -36,6 +36,7 @@ from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import variable_scope from tensorflow.python.util import compat from tensorflow.python.util import nest +from tensorflow.python.util import tf_contextlib from tensorflow.python.util import tf_decorator from tensorflow.python.util.lazy_loader import LazyLoader @@ -108,38 +109,20 @@ class FuncGraph(ops.Graph): graph = self.outer_graph - # pylint: disable=protected-access - # TODO(b/112906995, nareshmodi): distribution strategy depends on inheriting - # this stack from the default graph even in eager mode. Maybe it should be - # part of the eager context? This would also allow us to remove a - # get_default_graph() call from the function cache lookup. - self._distribution_strategy_stack = list(graph._distribution_strategy_stack) - # We ignore device placements from any outer scopes while tracing the - # function when possible, to avoid hard-coding them in the function - # graph. ""Default"" placements come from the PartitionedCallOp's placement, - # so that the same trace of the Python function may be placed on several - # different devices and saved functions may be placed on new devices when - # restored. if context.executing_eagerly(): self.seed = context.global_seed() device_type = context.context().device_spec.device_type self._xla_compile = (device_type == ""TPU"" or device_type == ""XLA_GPU"" or device_type == ""XLA_CPU"") - if self._distribution_strategy_stack or self._xla_compile: - self._add_device_to_stack(context.context().device_name) else: self.seed = graph.seed self._xla_compile = getattr(graph, ""_xla_compile"", False) # TODO(allenl): Figure out if we can remove colocation stack # specialization (currently used in cond_v2), here and in the cache key. - self._colocation_stack = graph._colocation_stack.copy() - if (self._distribution_strategy_stack - or self._xla_compile - or device_stack_has_callable(graph._device_function_stack)): - # Hard-code devices from device functions in the function body - self._device_function_stack = graph._device_function_stack.copy() + self._colocation_stack = graph._colocation_stack.copy() # pylint: disable=protected-access + if not self._read_only_collections: - self._collections = graph._collections + self._collections = graph._collections # pylint: disable=protected-access else: for collection_name in graph.get_all_collection_keys(): if collection_name not in WHITELIST_COLLECTIONS: @@ -149,11 +132,55 @@ class FuncGraph(ops.Graph): self._collections[collection_name] = graph.get_collection_ref( collection_name) - self._variable_creator_stack = graph._variable_creator_stack - # Inherit the graph key, since this is used for matching variables in - # optimizers. - self._graph_key = graph._graph_key - # pylint: enable=protected-access + def as_default(self): + outer_cm = super(FuncGraph, self).as_default() + + @tf_contextlib.contextmanager + def inner_cm(): + """"""Context manager for copying distribute.Strategy scope information."""""" + graph = ops.get_default_graph() + # pylint: disable=protected-access + # TODO(b/112906995, nareshmodi): distribution strategy depends on + # inheriting this stack from the default graph even in eager mode. Maybe + # it should be part of the eager context? This would also allow us to + # remove a get_default_graph() call from the function cache lookup. + old_strategy_stack = self._distribution_strategy_stack + self._distribution_strategy_stack = list( + graph._distribution_strategy_stack) + # We ignore device placements from any outer scopes while tracing the + # function when possible, to avoid hard-coding them in the function + # graph. ""Default"" placements come from the PartitionedCallOp's placement, + # so that the same trace of the Python function may be placed on several + # different devices and saved functions may be placed on new devices when + # restored. + old_device_stack = self._device_function_stack + if context.executing_eagerly(): + if self._distribution_strategy_stack or self._xla_compile: + self._add_device_to_stack(context.context().device_name) + else: + if (self._distribution_strategy_stack + or self._xla_compile + or device_stack_has_callable(graph._device_function_stack)): + # Hard-code devices from device functions in the function body + self._device_function_stack = graph._device_function_stack.copy() + + old_creator_stack = self._variable_creator_stack + self._variable_creator_stack = graph._variable_creator_stack + # Inherit the graph key, since this is used for matching variables in + # optimizers. + old_graph_key = self._graph_key + self._graph_key = graph._graph_key + # pylint: enable=protected-access + + with outer_cm as g: + try: + yield g + finally: + self._distribution_strategy_stack = old_strategy_stack + self._device_function_stack = old_device_stack + self._variable_creator_stack = old_creator_stack + self._graph_key = old_graph_key + return inner_cm() @property def output_types(self): ",0,train 53c25e3db9876ea6bbf23cf10c15854511cf6ec8,tensorflow/tensorflow,"Add a TF1.X path to generate2.py It's version2 of the docs generator, not the docs generator for tensorflow2 PiperOrigin-RevId: 254407102",generate2.py,"@@ -51,22 +51,6 @@ parser.tf_inspect = tf_inspect # So patch `tf.__all__` to list everything. tf.__all__ = [item_name for item_name, value in tf_inspect.getmembers(tf)] -tf.__doc__ = """""" -## TensorFlow 2.0 Beta - -Caution: This is a developer preview. You will likely find some bugs, -performance issues, and more, and we encourage you to tell us about them. -We value your feedback! - -These docs were generated from the beta build of TensorFlow 2.0. - -You can install the exact version that was used to generate these docs -with: - -``` -pip install tensorflow==2.0.0-beta1 -``` -"""""" FLAGS = flags.FLAGS @@ -87,6 +71,75 @@ flags.DEFINE_string(""site_path"", """", ""`_toc.yaml` and `_redirects.yaml` files"") +if tf.__version__.startswith('1'): + PRIVATE_MAP = { + 'tf.contrib.autograph': ['utils', 'operators'], + 'tf.test': ['mock'], + 'tf.contrib.estimator': ['python'], + } + + DO_NOT_DESCEND_MAP = { + 'tf': ['cli', 'lib', 'wrappers'], + 'tf.contrib': [ + 'compiler', + 'grid_rnn', + # Block contrib.keras to de-clutter the docs + 'keras', + 'labeled_tensor', + 'quantization', + 'session_bundle', + 'slim', + 'solvers', + 'specs', + 'tensor_forest', + 'tensorboard', + 'testing', + 'tfprof', + ], + 'tf.contrib.bayesflow': [ + 'special_math', 'stochastic_gradient_estimators', + 'stochastic_variables' + ], + 'tf.contrib.ffmpeg': ['ffmpeg_ops'], + 'tf.contrib.graph_editor': [ + 'edit', 'match', 'reroute', 'subgraph', 'transform', 'select', 'util' + ], + 'tf.contrib.keras': ['api', 'python'], + 'tf.contrib.layers': ['feature_column', 'summaries'], + 'tf.contrib.learn': [ + 'datasets', + 'head', + 'graph_actions', + 'io', + 'models', + 'monitors', + 'ops', + 'preprocessing', + 'utils', + ], + 'tf.contrib.util': ['loader'], + } +else: + PRIVATE_MAP = {} + DO_NOT_DESCEND_MAP = {} + tf.__doc__ = """""" + ## TensorFlow 2.0 Beta + + Caution: This is a developer preview. You will likely find some bugs, + performance issues, and more, and we encourage you to tell us about them. + We value your feedback! + + These docs were generated from the beta build of TensorFlow 2.0. + + You can install the exact version that was used to generate these docs + with: + + ``` + pip install tensorflow==2.0.0-beta1 + ``` + """""" + + # The doc generator isn't aware of tf_export. # So prefix the score tuples with -1 when this is the canonical name, +1 # otherwise. The generator chooses the name with the lowest score. @@ -126,7 +179,6 @@ def _hide_layer_and_module_methods(): except AttributeError: pass - def build_docs(output_dir, code_url_prefix, search_hints=True): """"""Build api docs for tensorflow v2. @@ -142,6 +194,11 @@ def build_docs(output_dir, code_url_prefix, search_hints=True): except AttributeError: pass + try: + doc_controls.do_not_generate_docs(tf.compat.v1.pywrap_tensorflow) + except AttributeError: + pass + base_dir = path.dirname(tf.__file__) base_dirs = ( @@ -165,7 +222,9 @@ def build_docs(output_dir, code_url_prefix, search_hints=True): search_hints=search_hints, code_url_prefix=code_url_prefixes, site_path=FLAGS.site_path, - visitor_cls=TfExportAwareDocGeneratorVisitor) + visitor_cls=TfExportAwareDocGeneratorVisitor, + private_map=PRIVATE_MAP, + do_not_descend_map=DO_NOT_DESCEND_MAP) doc_generator.build(output_dir) ",0,train de2127833462b2d2b2915c72565601d1ceb798ff,tensorflow/tensorflow,"Implement modular POSIX filesystem support for testing if paths exist. We also provide tests to make sure all API requirements are satisfied. Just a small sized part of work for modular filesystem plugins. For more details, consult the RFC at https://github.com/tensorflow/community/blob/master/rfcs/20190506-filesystem-plugin-modular-tensorflow.md PiperOrigin-RevId: 281109338 Change-Id: I060ff60ce6502770d43798910a10b2d0d0a2a601",filesystem_interface.h,"@@ -527,11 +527,13 @@ typedef struct TF_FilesystemOps { /// of type `TF_Status*`. /// /// If `statuses` is not null, plugins must fill each element with detailed - /// status for each file, as if calling `path_exists` on each one. + /// status for each file, as if calling `path_exists` on each one. Core + /// TensorFlow initializes the `statuses` array and plugins must use + /// `TF_SetStatus` to set each element instead of dirrectly assigning. /// /// DEFAULT IMPLEMENTATION: Checks existence of every file. Needs /// `path_exists`. - bool (*paths_exist)(const TF_Filesystem* filesystem, const char** paths, + bool (*paths_exist)(const TF_Filesystem* filesystem, char** paths, int num_files, TF_Status** statuses); /// Obtains statistics for the given `path`. ",0,train de2127833462b2d2b2915c72565601d1ceb798ff,tensorflow/tensorflow,"Implement modular POSIX filesystem support for testing if paths exist. We also provide tests to make sure all API requirements are satisfied. Just a small sized part of work for modular filesystem plugins. For more details, consult the RFC at https://github.com/tensorflow/community/blob/master/rfcs/20190506-filesystem-plugin-modular-tensorflow.md PiperOrigin-RevId: 281109338 Change-Id: I060ff60ce6502770d43798910a10b2d0d0a2a601",modular_filesystem.cc,"@@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include ""tensorflow/c/experimental/filesystem/modular_filesystem.h"" +#include #include #include @@ -111,15 +112,47 @@ Status ModularFileSystem::NewReadOnlyMemoryRegionFromFile( } Status ModularFileSystem::FileExists(const std::string& fname) { - // TODO(mihaimaruseac): Implementation to come in a new change - return Status(error::UNIMPLEMENTED, - ""Modular filesystem stub not implemented yet""); + if (ops_->path_exists == nullptr) + return errors::Unimplemented(tensorflow::strings::StrCat( + ""Filesystem for "", fname, "" does not support FileExists()"")); + + UniquePtrTo_TF_Status plugin_status(TF_NewStatus(), TF_DeleteStatus); + const std::string translated_name = TranslateName(fname); + ops_->path_exists(filesystem_.get(), translated_name.c_str(), + plugin_status.get()); + return StatusFromTF_Status(plugin_status.get()); } bool ModularFileSystem::FilesExist(const std::vector& files, std::vector* status) { - // TODO(mihaimaruseac): Implementation to come in a new change - return true; + if (ops_->paths_exist == nullptr) + return FileSystem::FilesExist(files, status); + + std::vector translated_names; + translated_names.reserve(files.size()); + for (int i = 0; i < files.size(); i++) + translated_names.push_back(strdup(TranslateName(files[i]).c_str())); + + bool result; + if (status == nullptr) { + result = ops_->paths_exist(filesystem_.get(), translated_names.data(), + files.size(), nullptr); + } else { + std::vector plugin_status; + plugin_status.reserve(files.size()); + for (int i = 0; i < files.size(); i++) + plugin_status.push_back(TF_NewStatus()); + result = ops_->paths_exist(filesystem_.get(), translated_names.data(), + files.size(), plugin_status.data()); + for (int i = 0; i < files.size(); i++) { + status->push_back(StatusFromTF_Status(plugin_status[i])); + TF_DeleteStatus(plugin_status[i]); + } + } + + for (int i = 0; i < files.size(); i++) free(translated_names[i]); + + return result; } Status ModularFileSystem::GetChildren(const std::string& dir, ",0,train de2127833462b2d2b2915c72565601d1ceb798ff,tensorflow/tensorflow,"Implement modular POSIX filesystem support for testing if paths exist. We also provide tests to make sure all API requirements are satisfied. Just a small sized part of work for modular filesystem plugins. For more details, consult the RFC at https://github.com/tensorflow/community/blob/master/rfcs/20190506-filesystem-plugin-modular-tensorflow.md PiperOrigin-RevId: 281109338 Change-Id: I060ff60ce6502770d43798910a10b2d0d0a2a601",modular_filesystem_test.cc,"@@ -539,6 +539,95 @@ TEST_P(ModularFileSystemTest, TestDeleteDirectoryPathIsInvalid) { EXPECT_PRED2(UninmplementedOrReturnsCode, status, Code::FAILED_PRECONDITION); } +TEST_P(ModularFileSystemTest, TestFileExists) { + const std::string filepath = GetURIForPath(""a_file""); + std::unique_ptr file; + Status status = env_->NewWritableFile(filepath, &file); + if (!status.ok()) GTEST_SKIP() << ""NewWritableFile() not supported""; + + status = env_->FileExists(filepath); + EXPECT_PRED2(UninmplementedOrReturnsCode, status, Code::OK); +} + +TEST_P(ModularFileSystemTest, TestFileExistsButIsDirectory) { + const std::string filepath = GetURIForPath(""a_file""); + Status status = env_->CreateDir(filepath); + if (!status.ok()) GTEST_SKIP() << ""CreateDir() not supported""; + + status = env_->FileExists(filepath); + EXPECT_PRED2(UninmplementedOrReturnsCode, status, Code::OK); +} + +TEST_P(ModularFileSystemTest, TestFileExistsNotFound) { + const std::string filepath = GetURIForPath(""a_file""); + Status status = env_->FileExists(filepath); + EXPECT_PRED2(UninmplementedOrReturnsCode, status, Code::NOT_FOUND); +} + +TEST_P(ModularFileSystemTest, TestFileExistsPathIsInvalid) { + const std::string filepath = GetURIForPath(""a_file""); + std::unique_ptr file; + Status status = env_->NewWritableFile(filepath, &file); + if (!status.ok()) GTEST_SKIP() << ""NewWritableFile() not supported""; + + const std::string target_path = GetURIForPath(""a_file/a_new_file""); + status = env_->FileExists(target_path); + EXPECT_PRED2(UninmplementedOrReturnsCode, status, Code::FAILED_PRECONDITION); +} + +TEST_P(ModularFileSystemTest, TestFilesExist) { + const std::vector filenames = {GetURIForPath(""a""), + GetURIForPath(""b"")}; + for (const auto& filename : filenames) { + std::unique_ptr file; + Status status = env_->NewWritableFile(filename, &file); + if (!status.ok()) GTEST_SKIP() << ""NewWritableFile() not supported""; + } + + EXPECT_TRUE(env_->FilesExist(filenames, /*status=*/nullptr)); + + std::vector statuses; + EXPECT_TRUE(env_->FilesExist(filenames, &statuses)); + EXPECT_EQ(statuses.size(), filenames.size()); + for (const auto& status : statuses) + EXPECT_PRED2(UninmplementedOrReturnsCode, status, Code::OK); +} + +TEST_P(ModularFileSystemTest, TestFilesExistAllFailureModes) { + // if reordering these, make sure to reorder checks at the end + const std::vector filenames = { + GetURIForPath(""a_dir""), + GetURIForPath(""a_file""), + GetURIForPath(""a_file/a_new_file""), + GetURIForPath(""file_not_found""), + }; + + Status status = env_->CreateDir(filenames[0]); + if (!status.ok()) GTEST_SKIP() << ""CreateDir() not supported""; + + std::unique_ptr file; + status = env_->NewWritableFile(filenames[1], &file); + if (!status.ok()) GTEST_SKIP() << ""NewWritableFile() not supported""; + + std::vector statuses; + EXPECT_FALSE(env_->FilesExist(filenames, &statuses)); + EXPECT_EQ(statuses.size(), filenames.size()); + EXPECT_PRED2(UninmplementedOrReturnsCode, statuses[0], Code::OK); + EXPECT_PRED2(UninmplementedOrReturnsCode, statuses[1], Code::OK); + EXPECT_PRED2(UninmplementedOrReturnsCode, statuses[2], + Code::FAILED_PRECONDITION); + EXPECT_PRED2(UninmplementedOrReturnsCode, statuses[3], Code::NOT_FOUND); +} + +TEST_P(ModularFileSystemTest, TestFilesExistsNoFiles) { + const std::vector filenames = {}; + EXPECT_TRUE(env_->FilesExist(filenames, /*status=*/nullptr)); + + std::vector statuses; + EXPECT_TRUE(env_->FilesExist(filenames, &statuses)); + EXPECT_TRUE(statuses.empty()); +} + TEST_P(ModularFileSystemTest, TestAppendAndTell) { const std::string filename = GetURIForPath(""a_file""); std::unique_ptr file; ",0,train de2127833462b2d2b2915c72565601d1ceb798ff,tensorflow/tensorflow,"Implement modular POSIX filesystem support for testing if paths exist. We also provide tests to make sure all API requirements are satisfied. Just a small sized part of work for modular filesystem plugins. For more details, consult the RFC at https://github.com/tensorflow/community/blob/master/rfcs/20190506-filesystem-plugin-modular-tensorflow.md PiperOrigin-RevId: 281109338 Change-Id: I060ff60ce6502770d43798910a10b2d0d0a2a601",posix_filesystem.cc,"@@ -287,7 +287,13 @@ static void DeleteDir(const TF_Filesystem* filesystem, const char* path, TF_SetStatus(status, TF_OK, """"); } -// TODO(mihaimaruseac): More implementations to follow in subsequent changes. +static void PathExists(const TF_Filesystem* filesystem, const char* path, + TF_Status* status) { + if (access(path, F_OK) != 0) + TF_SetStatusFromIOError(status, errno, path); + else + TF_SetStatus(status, TF_OK, """"); +} } // namespace tf_posix_filesystem @@ -317,6 +323,11 @@ void TF_InitPlugin(TF_Status* status) { /*recursively_create_dir=*/nullptr, tf_posix_filesystem::DeleteFile, tf_posix_filesystem::DeleteDir, + /*delete_recursively=*/nullptr, + /*rename_file=*/nullptr, + /*copy_file=*/nullptr, + tf_posix_filesystem::PathExists, + /*paths_exist=*/nullptr, nullptr, }; ",0,train 977ea14361f7bceeeda2046f3073ea888806be02,tensorflow/tensorflow,"Added more streamlined interfaces for converting rendezvous ids to/from strings and used these in the rendezvous code. Improves performance for ptb_word_lm slightly (saves several allocations and an sscanf per CPU <-> GPU transfer). Change: 115852277",rendezvous.cc,"@@ -41,9 +41,10 @@ string Rendezvous::CreateKey(const string& src_device, uint64 src_incarnation, // // ""src_incarnation"" is used to distinguish a worker when it // restarts. - return strings::StrCat(src_device, "";"", strings::FpToString(src_incarnation), - "";"", dst_device, "";"", name, "";"", frame_iter.frame_id, - "":"", frame_iter.iter_id); + char buf[strings::kFastToBufferSize]; + return strings::StrCat( + src_device, "";"", strings::Uint64ToHexString(src_incarnation, buf), "";"", + dst_device, "";"", name, "";"", frame_iter.frame_id, "":"", frame_iter.iter_id); } // Return the prefix of ""*s"" up to the next occurrence of ""delim"", or @@ -73,7 +74,7 @@ Status Rendezvous::ParseKey(const string& key, ParsedKey* out) { if (s.empty() && // Consumed the whole string !parts[4].empty() && // Exactly five parts DeviceNameUtils::ParseFullName(parts[0], &out->src) && - strings::StringToFp(parts[1].ToString(), &out->src_incarnation) && + strings::HexStringToUint64(parts[1], &out->src_incarnation) && DeviceNameUtils::ParseFullName(parts[2], &out->dst) && !parts[3].empty()) { out->src_device.assign(parts[0].data(), parts[0].size()); ",0,test 977ea14361f7bceeeda2046f3073ea888806be02,tensorflow/tensorflow,"Added more streamlined interfaces for converting rendezvous ids to/from strings and used these in the rendezvous code. Improves performance for ptb_word_lm slightly (saves several allocations and an sscanf per CPU <-> GPU transfer). Change: 115852277",numbers.cc,"@@ -15,6 +15,7 @@ limitations under the License. #include ""tensorflow/core/lib/strings/numbers.h"" +#include #include #include #include @@ -237,6 +238,38 @@ bool StringToFp(const string& s, Fprint* fp) { } } +StringPiece Uint64ToHexString(uint64 v, char* buf) { + static const char* hexdigits = ""0123456789abcdef""; + const int num_byte = 16; + buf[num_byte] = '\0'; + for (int i = num_byte - 1; i >= 0; i--) { + buf[i] = hexdigits[v & 0xf]; + v >>= 4; + } + return StringPiece(buf, num_byte); +} + +bool HexStringToUint64(const StringPiece& s, uint64* result) { + uint64 v = 0; + if (s.empty()) { + return false; + } + for (int i = 0; i < s.size(); i++) { + char c = s[i]; + if (c >= '0' && c <= '9') { + v = (v << 4) + (c - '0'); + } else if (c >= 'a' && c <= 'f') { + v = (v << 4) + 10 + (c - 'a'); + } else if (c >= 'A' && c <= 'F') { + v = (v << 4) + 10 + (c - 'A'); + } else { + return false; + } + } + *result = v; + return true; +} + string HumanReadableNumBytes(int64 num_bytes) { if (num_bytes == kint64min) { // Special case for number with not representable negation. ",0,test 977ea14361f7bceeeda2046f3073ea888806be02,tensorflow/tensorflow,"Added more streamlined interfaces for converting rendezvous ids to/from strings and used these in the rendezvous code. Improves performance for ptb_word_lm slightly (saves several allocations and an sscanf per CPU <-> GPU transfer). Change: 115852277",numbers.h,"@@ -18,6 +18,7 @@ limitations under the License. #include +#include ""tensorflow/core/lib/core/stringpiece.h"" #include ""tensorflow/core/platform/types.h"" namespace tensorflow { @@ -81,6 +82,16 @@ string FpToString(Fprint fp); // returns false. bool StringToFp(const string& s, Fprint* fp); +// Convert a 64-bit fingerprint value to an ASCII representation that +// is terminated by a '\0'. +// Buf must point to an array of at least kFastToBufferSize characters +StringPiece Uint64ToHexString(uint64 v, char* buf); + +// Attempt to parse a uint64 in the form encoded by FastUint64ToHexString. If +// successful, stores the value in *v and returns true. Otherwise, +// returns false. +bool HexStringToUint64(const StringPiece& s, uint64* v); + // Convert strings to 32bit integer values. // Leading and trailing spaces are allowed. // Return false with overflow or invalid input. ",0,test 977ea14361f7bceeeda2046f3073ea888806be02,tensorflow/tensorflow,"Added more streamlined interfaces for converting rendezvous ids to/from strings and used these in the rendezvous code. Improves performance for ptb_word_lm slightly (saves several allocations and an sscanf per CPU <-> GPU transfer). Change: 115852277",numbers_test.cc,"@@ -41,6 +41,23 @@ TEST(FpToString, Ints) { EXPECT_FALSE(StringToFp(""0000000000000000xyz"", &dummy)); } +TEST(Uint64ToHexString, Ints) { + for (int s = 0; s < 64; s++) { + for (int delta = -1; delta <= 1; delta++) { + uint64 fp = (1ull << s) + delta; + char buf[kFastToBufferSize]; + StringPiece s = Uint64ToHexString(fp, buf); + uint64 fp2; + EXPECT_TRUE(HexStringToUint64(s, &fp2)); + EXPECT_EQ(fp, fp2) << s; + } + } + uint64 dummy; + EXPECT_FALSE(HexStringToUint64("""", &dummy)); + EXPECT_FALSE(HexStringToUint64(""xyz"", &dummy)); + EXPECT_FALSE(HexStringToUint64(""0000000000000000xyz"", &dummy)); +} + TEST(HumanReadableNumBytes, Bytes) { EXPECT_EQ(""0B"", HumanReadableNumBytes(0)); EXPECT_EQ(""4B"", HumanReadableNumBytes(4)); ",0,test 220c8954d3856648c3deb2013df9fd383ac6ecbe,tensorflow/tensorflow,"add back-ticks for parameter formatting add back-ticks for parameter formatting",mel_ops.py,"@@ -141,9 +141,9 @@ def linear_to_mel_weight_matrix(num_mel_bins=20, A `Tensor` of shape `[num_spectrogram_bins, num_mel_bins]`. Raises: - ValueError: If num_mel_bins/num_spectrogram_bins/sample_rate are not - positive, lower_edge_hertz is negative, frequency edges are incorrectly - ordered, or upper_edge_hertz is larger than the Nyquist frequency. + ValueError: If `num_mel_bins`/`num_spectrogram_bins`/`sample_rate` are not + positive, `lower_edge_hertz` is negative, frequency edges are incorrectly + ordered, or `upper_edge_hertz` is larger than the Nyquist frequency. [mel]: https://en.wikipedia.org/wiki/Mel_scale """""" ",0,train 657c503c4eca8919267863548cd1f516fb774944,tensorflow/tensorflow,"Update microbenchmarks after cl/271457940 PiperOrigin-RevId: 271586404",benchmarks_test.py,"@@ -679,8 +679,7 @@ class MicroBenchmarks(test.Benchmark): tangent = random_ops.random_uniform(shape).cpu() def func(): - with forwardprop.ForwardGradientAccumulator() as acc: - acc.watch(m, tangent) + with forwardprop.ForwardGradientAccumulator(m, tangent) as acc: result = math_ops.matmul(m, m, transpose_b=True) return result, acc.jvp(result) @@ -693,8 +692,7 @@ class MicroBenchmarks(test.Benchmark): with ops.device(CPU): @def_function.function def compiled_function(x, tangent): - with forwardprop.ForwardGradientAccumulator() as acc: - acc.watch(x, tangent) + with forwardprop.ForwardGradientAccumulator(x, tangent) as acc: result = math_ops.matmul(x, x, transpose_b=True) return result, acc.jvp(result) @@ -713,8 +711,7 @@ class MicroBenchmarks(test.Benchmark): @def_function.function() def compiled_function(x, tangent): - with forwardprop.ForwardGradientAccumulator() as acc: - acc.watch(x, tangent) + with forwardprop.ForwardGradientAccumulator(x, tangent) as acc: result = matmul(x, x, transpose_b=True) return result, acc.jvp(result) @@ -734,8 +731,7 @@ class MicroBenchmarks(test.Benchmark): matmul = def_function.function(math_ops.matmul) def func(): - with forwardprop.ForwardGradientAccumulator() as acc: - acc.watch(m, tangent) + with forwardprop.ForwardGradientAccumulator(m, tangent) as acc: result = matmul(m, m, transpose_b=True) return result, acc.jvp(result) ",0,train 75d68bbef7c7a3bbed8b6d0844f66549f1913b34,tensorflow/tensorflow,"Catch some more cases in reduction_ops_test.py. In particular, I ran into a case where `tf.reduce_mean(.., None)` was not properly covered, and that surprised me during some other related change. Change: 123775966",reduction_ops_test.py,"@@ -158,11 +158,13 @@ class SumReductionTest(tf.test.TestCase): # Simple tests for various types. def testDoubleReduce1D(self): np_arr = np.arange(1, 6).reshape([5]).astype(np.float64) + self._compareAll(np_arr, None) self._compareAll(np_arr, []) self._compareAll(np_arr, [0]) def testInt32Reduce1D(self): np_arr = np.arange(1, 6).reshape([5]).astype(np.int32) + self._compareAll(np_arr, None) self._compareAll(np_arr, []) self._compareAll(np_arr, [0]) @@ -247,14 +249,17 @@ class SumReductionTest(tf.test.TestCase): class MeanReductionTest(tf.test.TestCase): def _compare(self, x, reduction_axes, keep_dims, use_gpu=False): - np_sum = x - count = 1 - for ra in reduction_axes[::-1]: - np_sum = np.sum(np_sum, axis=ra, keepdims=keep_dims) - count *= x.shape[ra] - np_ans = np_sum / count - with self.test_session(use_gpu=use_gpu): + np_ans = x + if reduction_axes is None: + np_ans = np.mean(np_ans, keepdims=keep_dims) + else: reduction_axes = np.array(reduction_axes).astype(np.int32) + count = 1 + for ra in reduction_axes.ravel()[::-1]: + np_ans = np.sum(np_ans, axis=ra, keepdims=keep_dims) + count *= x.shape[ra] + np_ans /= count + with self.test_session(use_gpu=use_gpu): tf_ans = tf.reduce_mean(x, reduction_axes, keep_dims) out = tf_ans.eval() self.assertAllClose(np_ans, out) @@ -270,6 +275,7 @@ class MeanReductionTest(tf.test.TestCase): # Create a 3D array of floats and reduce across all possible # dimensions np_arr = np.arange(0, 30).reshape([2, 3, 5]).astype(np.float32) + self._compareAll(np_arr, None) self._compareAll(np_arr, []) self._compareAll(np_arr, [0]) self._compareAll(np_arr, [1]) @@ -283,6 +289,7 @@ class MeanReductionTest(tf.test.TestCase): # Create a 3D array of doubles and reduce across all possible # dimensions np_arr = np.arange(0, 30).reshape([2, 3, 5]).astype(np.float64) + self._compareAll(np_arr, None) self._compareAll(np_arr, []) self._compareAll(np_arr, [0]) self._compareAll(np_arr, [1]) @@ -450,6 +457,7 @@ class MinReductionTest(tf.test.TestCase): # Create a 3D array of floats and reduce across all possible # dimensions np_arr = np.arange(0, 30).reshape([2, 3, 5]).astype(np.float32) + self._compareAll(np_arr, None) self._compareAll(np_arr, []) self._compareAll(np_arr, [0]) self._compareAll(np_arr, [1]) @@ -463,6 +471,7 @@ class MinReductionTest(tf.test.TestCase): # Create a 3D array of doubles and reduce across all possible # dimensions np_arr = np.arange(0, 30).reshape([2, 3, 5]).astype(np.float64) + self._compareAll(np_arr, None) self._compareAll(np_arr, []) self._compareAll(np_arr, [0]) self._compareAll(np_arr, [1]) ",0,train c250a34f8a30566c55f46eb58487b211f13e7df2,tensorflow/tensorflow,[XLA] diag-part is implemented as xla gather operation,diag_op.cc,"@@ -115,7 +115,9 @@ REGISTER_XLA_OP(Name(""Diag""), DiagOp); class DiagPartOp : public XlaOpKernel { public: - explicit DiagPartOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} + explicit DiagPartOp(OpKernelConstruction* ctx) + : XlaOpKernel(ctx), + is_gpu_(ctx->device_type().type_string() == DEVICE_GPU_XLA_JIT) {} void Compile(XlaOpKernelContext* ctx) override { const TensorShape input_shape = ctx->InputShape(0); @@ -145,12 +147,17 @@ class DiagPartOp : public XlaOpKernel { xla::XlaOp input = ctx->Input(0); + xla::XlaOp reshape_input = xla::Reshape(input, {new_size, new_size}); xla::XlaOp output = xla::Reshape( - xla::GetMatrixDiagonal(xla::Reshape(input, {new_size, new_size})), + is_gpu_ ? xla::GetMatrixDiagonalViaGather(reshape_input) + : xla::GetMatrixDiagonal(reshape_input), new_dims); ctx->SetOutput(0, output); } + + private: + const bool is_gpu_; }; REGISTER_XLA_OP(Name(""DiagPart""), DiagPartOp); ",0,train c250a34f8a30566c55f46eb58487b211f13e7df2,tensorflow/tensorflow,[XLA] diag-part is implemented as xla gather operation,matrix_diag_ops.cc,"@@ -267,7 +267,8 @@ REGISTER_XLA_OP(Name(""MatrixDiagV2"") class MatrixDiagPartOp : public XlaOpKernel { public: explicit MatrixDiagPartOp(OpKernelConstruction* context) - : XlaOpKernel(context) {} + : XlaOpKernel(context), + is_gpu_(context->device_type().type_string() == DEVICE_GPU_XLA_JIT) {} void Compile(XlaOpKernelContext* context) override { const TensorShape input_shape = context->InputShape(0); @@ -315,13 +316,17 @@ class MatrixDiagPartOp : public XlaOpKernel { std::vector diag_list; xla::PaddingConfig padding_config; if (num_diags == 1) { - context->SetOutput(0, xla::GetMatrixDiagonal(input, upper_diag_index)); + context->SetOutput(0, + is_gpu_ ? xla::GetMatrixDiagonalViaGather(input, upper_diag_index) + : xla::GetMatrixDiagonal(input, upper_diag_index)); return; } padding_config = xla::MakeNoPaddingConfig(input_rank - 1); for (int diag_index = upper_diag_index; diag_index >= lower_diag_index; --diag_index) { - auto single_diag = xla::GetMatrixDiagonal(input, diag_index); + xla::XlaOp single_diag = + is_gpu_ ? xla::GetMatrixDiagonalViaGather(input, diag_index) + : xla::GetMatrixDiagonal(input, diag_index); const int64 diag_length = (diag_index >= 0) ? (num_cols - diag_index) : (num_rows + diag_index); const int64 padding_length = max_diag_len - diag_length; @@ -336,6 +341,9 @@ class MatrixDiagPartOp : public XlaOpKernel { xla::ConcatInDim(context->builder(), diag_list, input_rank - 2); context->SetOutput(0, xla::Reshape(concat, output_shape.dim_sizes())); } + + private: + const bool is_gpu_; }; REGISTER_XLA_OP(Name(""MatrixDiagPart""), MatrixDiagPartOp); ",0,train c250a34f8a30566c55f46eb58487b211f13e7df2,tensorflow/tensorflow,[XLA] diag-part is implemented as xla gather operation,matrix.cc,"@@ -19,6 +19,9 @@ limitations under the License. #include #include #include +#include +#include +#include #include ""absl/algorithm/container.h"" #include ""absl/container/flat_hash_set.h"" @@ -102,6 +105,70 @@ XlaOp GetMatrixDiagonal(XlaOp x, int k) { }); } +XlaOp GetMatrixDiagonalViaGather(XlaOp x, int k) { + XlaBuilder* builder = x.builder(); + return builder->ReportErrorOrReturn([&]() -> StatusOr { + TF_ASSIGN_OR_RETURN(Shape shape, builder->GetShape(x)); + auto n_dims = static_cast(shape.rank()); + TF_RET_CHECK(n_dims >= 2); + const int64 m = shape.dimensions(n_dims - 2); + const int64 n = shape.dimensions(n_dims - 1); + + // The start_indices has a shape of {diag_len, 2}, and each pair of value in + // its dimension 1 represents the (row, col) of the diagonal. We set + // index_vector_dim to 1 and make start_index_map and collapsed_slice_dims + // contain the same two dimension indices. This makes sure that the (row, + // col) pairs in start_indices are propagated to the indices for the two + // collapsed dimensions in the operand indices through start_index_map. + const int64 num_index_dims = 2; + const int64 axis = n_dims - num_index_dims; + + // Calculate the indices of diagonal part with offset k. + const int64 diag_len = std::max(std::min(m + std::min(k, 0), + n - std::max(k, 0)), + 0LL); + XlaOp diag_base_indices = BroadcastInDim( + Iota(builder, S32, diag_len), { diag_len, num_index_dims }, { 0 }); + XlaOp diag_offset = Broadcast( + ConstantR1(builder, { std::max(-k, 0), std::max(k, 0) }), + { diag_len }); + XlaOp start_indices = Add(diag_base_indices, diag_offset); + + // Example of a 3D diag-part extracting diagonal part with offset=1 out of a + // tensor of shape [2,5,4]. + // + // operand = s32[2,5,4] parameter(0) + // indices = s32[3,2] parameter(1) + // gather = s32[2,3] gather(operand, indices), + // offset_dims={0}, + // collapsed_slice_dims={1,2}, + // start_index_map={1,2}, + // index_vector_dim=1, + // slice_sizes={2, 1, 1} + + xla::GatherDimensionNumbers dim_numbers; + std::vector slice_sizes; + slice_sizes.reserve(n_dims); + for (int64 i = 0; i < n_dims; i++) { + int64 window_bound; + if (axis <= i) { + dim_numbers.add_collapsed_slice_dims(i); + dim_numbers.add_start_index_map(i); + window_bound = (shape.dimensions(i) != 0) ? 1 : 0; + } else { + dim_numbers.add_offset_dims(i); + window_bound = shape.dimensions(i); + } + slice_sizes.push_back(window_bound); + } + + dim_numbers.set_index_vector_dim(1); + + return Gather(x, start_indices, dim_numbers, slice_sizes, + /*indices_are_sorted=*/true); + }); +} + XlaOp SetMatrixDiagonal(XlaOp matrix, XlaOp diag, int k) { XlaBuilder* builder = matrix.builder(); return builder->ReportErrorOrReturn([&]() -> StatusOr { ",0,train c250a34f8a30566c55f46eb58487b211f13e7df2,tensorflow/tensorflow,[XLA] diag-part is implemented as xla gather operation,matrix.h,"@@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_CLIENT_LIB_MATRIX_H_ #include +#include #include ""absl/strings/string_view.h"" #include ""absl/types/span.h"" @@ -44,6 +45,7 @@ XlaOp GetDiagonalMask(XlaOp x, int diagonal = 0); // If k < 0: then the output has shape [..., min(M + k, N)], containing the // diagonal elements (i.e., with indices [..., i - k, i]). XlaOp GetMatrixDiagonal(XlaOp x, int k = 0); +XlaOp GetMatrixDiagonalViaGather(XlaOp x, int k = 0); // Places diag along the kth diagonal of target. XlaOp SetMatrixDiagonal(XlaOp matrix, XlaOp diag, int k = 0); ",0,train c250a34f8a30566c55f46eb58487b211f13e7df2,tensorflow/tensorflow,[XLA] diag-part is implemented as xla gather operation,matrix_test.cc,"@@ -16,6 +16,9 @@ limitations under the License. #include ""tensorflow/compiler/xla/client/lib/matrix.h"" #include +#include +#include +#include #include ""absl/strings/string_view.h"" #include ""tensorflow/compiler/xla/client/lib/constants.h"" @@ -36,6 +39,8 @@ class MatrixTest : public ClientLibraryTestBase { template void TestMatrixDiagonal(); template + void TestMatrixDiagonal4D(); + template void TestSetMatrixDiagonal(); template @@ -118,6 +123,43 @@ XLA_TEST_F(MatrixTest, GetMatrixDiagonal_S64) { TestMatrixDiagonal(); } XLA_TEST_F(MatrixTest, GetMatrixDiagonal_F32) { TestMatrixDiagonal(); } +template +void MatrixTest::TestMatrixDiagonal4D() { + XlaBuilder builder(""GetMatrixDiagonal""); + Array4D input(2, 2, 4, 3); + input.FillIota(0); + std::map> k_and_expected = { + {0, {{{0, 4, 8}, {12, 16, 20}}, {{24, 28, 32}, {36, 40, 44}}}}, + {1, {{{1, 5}, {13, 17}}, {{25, 29}, {37, 41}}}}, + {2, {{{2}, {14}}, {{26}, {38}}}}, + {3, {{{}, {}}, {{}, {}}}}, + {4, {{{}, {}}, {{}, {}}}}, + {-1, {{{3, 7, 11}, {15, 19, 23}}, {{27, 31, 35}, {39, 43, 47}}}}, + {-2, {{{6, 10}, {18, 22}}, {{30, 34}, {42, 46}}}}, + {-3, {{{9}, {21}}, {{33}, {45}}}}, + {-4, {{{}, {}}, {{}, {}}}}, + }; + for (const auto& kv : k_and_expected) { + XlaOp a; + auto a_data = CreateR4Parameter(input, 0, ""a"", &builder, &a); + GetMatrixDiagonal(a, kv.first); + + ComputeAndCompareR3(&builder, kv.second, {a_data.get()}); + } +} + +XLA_TEST_F(MatrixTest, GetMatrixDiagonal4D_S32) { + TestMatrixDiagonal4D(); +} + +XLA_TEST_F(MatrixTest, GetMatrixDiagonal4D_S64) { + TestMatrixDiagonal4D(); +} + +XLA_TEST_F(MatrixTest, GetMatrixDiagonal4D_F32) { + TestMatrixDiagonal4D(); +} + Array3D BatchedAValsFull() { return {{ {2, 0, 1, 2}, ",0,train c250a34f8a30566c55f46eb58487b211f13e7df2,tensorflow/tensorflow,[XLA] diag-part is implemented as xla gather operation,client_library_test_base.h,"@@ -344,8 +344,8 @@ class ClientLibraryTestBase : public ::testing::Test { const string& name, XlaBuilder* builder, XlaOp* data_handle); // Creates a parameter instruction that wraps the given constant array - // ""array_2d"" and then stores to ""data_handle"" the global handle for that - // parameter. + // ""array_2d"" and then stores it to the global handle for that parameter + // ""data_handle"". // // ""parameter_number"" is the parameter number. // ""name"" is the name of the parameter instruction. @@ -358,8 +358,8 @@ class ClientLibraryTestBase : public ::testing::Test { const string& name, XlaBuilder* builder, XlaOp* data_handle); // Creates a parameter instruction that wraps the given constant array - // ""array_3d"" and then stores to ""data_handle"" the global handle for that - // parameter. + // ""array_3d"" and then stores it to the global handle for that parameter + // ""data_handle"". // // ""parameter_number"" is the parameter number. // ""name"" is the name of the parameter instruction. @@ -371,6 +371,20 @@ class ClientLibraryTestBase : public ::testing::Test { const Array3D& array_3d, int64 parameter_number, const string& name, XlaBuilder* builder, XlaOp* data_handle); + // Creates a parameter instruction that wraps the given constant array + // ""array_4d"" and then stores it to the global handle for that parameter + // ""data_handle"". + // + // ""parameter_number"" is the parameter number. + // ""name"" is the name of the parameter instruction. + // + // When the use_bfloat16 flag is set but NativeT is float, the data will be + // converted to bfloat16. + template + std::unique_ptr CreateR4Parameter( + const Array4D& array_4d, int64 parameter_number, + const string& name, XlaBuilder* builder, XlaOp* data_handle); + // Getter and setter for the use_bfloat16 flag, which indicates whether to run // tests with all float-type input/output converted to bfloat16. bool use_bfloat16() const { return use_bfloat16_; } @@ -603,6 +617,20 @@ std::unique_ptr ClientLibraryTestBase::CreateR3Parameter( return data; } +template +std::unique_ptr ClientLibraryTestBase::CreateR4Parameter( + const Array4D& array_4d, int64 parameter_number, + const string& name, XlaBuilder* builder, XlaOp* data_handle) { + Literal literal = LiteralUtil::CreateR4FromArray4D(array_4d); + if (use_bfloat16_ && literal.shape().element_type() == F32) { + literal = LiteralUtil::ConvertF32ToBF16(literal); + } + std::unique_ptr data = + client_->TransferToServer(literal).ConsumeValueOrDie(); + *data_handle = Parameter(builder, parameter_number, literal.shape(), name); + return data; +} + template std::vector ClientLibraryTestBase::CreatePseudorandomR1( const int width, NativeT min_value, NativeT max_value, uint32 seed) { ",0,train 39ae74c84bfde629d1bd2bed2f88f3e32f5417c3,tensorflow/tensorflow,"allocate_persistent comment update in op_kernel.h PiperOrigin-RevId: 373557953 Change-Id: I99efb23a1ee27e2941120132ddb8e4d582386c0a",op_kernel.h,"@@ -272,13 +272,8 @@ class OpKernelConstruction { // Op kernel construction. Scratch tensors should be allocated using // allocate_temp below. Some kernels need to keep tensors in between // invocations. If such a Tensor is allocated during kernel - // construction this must be done using allocate_persistent, and the - // Op may only store the returned PersistentTensor object. When the - // Tensor is needed in a subsequent invocation, it can be retrieved - // from the PersistentTensor using the AccessTensor method. This - // ensures that the system is made aware of any use of the tensor's - // allocated memory, which is needed for correctness on asynchronous - // devices such as GPUs. + // construction this also must be done using allocate_temp, and the + // Op may only store the returned Tensor object. // Allocates a temporary Tensor of the specified type and shape. The // Tensor must not be used after kernel construction is @@ -288,6 +283,9 @@ class OpKernelConstruction { Status allocate_temp(DataType type, const TensorShape& shape, Tensor* out_temp, AllocatorAttributes allocator_attr); + // The following call is obsolete per b/185257650 and kept in place until + // it is fully removed form the code base. + // Please use Tensor class and allocate_temp instead. // Allocates a Tensor of the specified type and shape which the Op // plans to maintain as persistent state. out_persistent holds the // PersistentTensor which is the object the caller should store. For ",0,train 3c80efb7c9db7ff859cb748ea743c6693296f211,tensorflow/tensorflow,"Remove tags visible in browser tabs. Change: 147622442",parser.py,"@@ -583,7 +583,7 @@ def _generate_markdown_for_function(full_name, duplicate_names, else: aliases = '' - return '# `%s%s`\n\n%s%s%s' % ( + return '# %s%s\n\n%s%s%s' % ( full_name, signature, aliases, guides, docstring) @@ -628,7 +628,7 @@ def _generate_markdown_for_class(full_name, duplicate_names, py_class, else: aliases = '' - docs = '# `%s`\n\n%s%s%s\n\n' % (full_name, aliases, guides, docstring) + docs = '# %s\n\n%s%s%s\n\n' % (full_name, aliases, guides, docstring) field_names = [] properties = [] @@ -750,7 +750,7 @@ def _generate_markdown_for_module(full_name, duplicate_names, module, # TODO(deannarubin): Make this list into a table. - return '# Module `%s`\n\n%s%s\n\n## Members\n\n%s' % ( + return '# Module: %s\n\n%s%s\n\n## Members\n\n%s' % ( full_name, aliases, docstring, '\n\n'.join(member_links)) ",0,train 598f13dc7cd495a4d5be1ecddbc34a3780ef6e8d,tensorflow/tensorflow,Refactor RepeatDatasetOpTest,repeat_dataset_op_test.cc,"@@ -41,10 +41,10 @@ class RepeatDatasetOpTest : public DatasetOpsTestBase { const DataTypeVector &output_types, const std::vector &output_shapes, std::unique_ptr *op_kernel) { - node_def_ = test::function::NDef( + NodeDef node_def = test::function::NDef( kNodeName, kOpName, {""input_dataset"", ""count""}, {{""output_types"", output_types}, {""output_shapes"", output_shapes}}); - TF_RETURN_IF_ERROR(CreateOpKernel(node_def_, op_kernel)); + TF_RETURN_IF_ERROR(CreateOpKernel(node_def, op_kernel)); return Status::OK(); } @@ -56,9 +56,6 @@ class RepeatDatasetOpTest : public DatasetOpsTestBase { TF_RETURN_IF_ERROR(CreateOpKernelContext(op_kernel, inputs, context)); return Status::OK(); } - - private: - NodeDef node_def_; }; struct TestCase { @@ -123,11 +120,11 @@ TestCase ForeverRepeatTestCase() { /*breakpoints*/ {0, 1, 3}}; } -class ParameterizedDatasetTest +class ParameterizedDatasetOpTest : public RepeatDatasetOpTest, public ::testing::WithParamInterface {}; -TEST_P(ParameterizedDatasetTest, GetNext) { +TEST_P(ParameterizedDatasetOpTest, GetNext) { int thread_num = 2, cpu_num = 2; TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); @@ -198,7 +195,38 @@ TEST_P(ParameterizedDatasetTest, GetNext) { } } -TEST_F(RepeatDatasetOpTest, DatasetName) { +TEST_F(RepeatDatasetOpTest, DatasetNodeName) { + int thread_num = 2, cpu_num = 2; + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + const TestCase &test_case = FiniteRepeatTestCase(); + Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({})); + std::vector inputs_for_tensor_slice_dataset = test_case.input_tensors; + TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset, + &tensor_slice_dataset_tensor)); + Tensor count = CreateTensor(TensorShape{}, {test_case.count}); + gtl::InlinedVector inputs_for_repeat_dataset; + inputs_for_repeat_dataset.emplace_back(&tensor_slice_dataset_tensor); + inputs_for_repeat_dataset.emplace_back(&count); + + std::unique_ptr repeat_dataset_kernel; + TF_ASSERT_OK(CreateRepeatDatasetKernel(test_case.expected_output_dtypes, + test_case.expected_output_shapes, + &repeat_dataset_kernel)); + std::unique_ptr repeat_dataset_context; + TF_ASSERT_OK(CreateRepeatDatasetContext(repeat_dataset_kernel.get(), + &inputs_for_repeat_dataset, + &repeat_dataset_context)); + DatasetBase *repeat_dataset; + TF_ASSERT_OK(CreateDataset(repeat_dataset_kernel.get(), + repeat_dataset_context.get(), &repeat_dataset)); + core::ScopedUnref scoped_unref(repeat_dataset); + + EXPECT_EQ(repeat_dataset->node_name(), kNodeName); +} + +TEST_F(RepeatDatasetOpTest, DatasetTypeString) { int thread_num = 2, cpu_num = 2; TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); @@ -229,7 +257,7 @@ TEST_F(RepeatDatasetOpTest, DatasetName) { EXPECT_EQ(repeat_dataset->type_string(), kOpName); } -TEST_P(ParameterizedDatasetTest, DatasetOutputDtypes) { +TEST_P(ParameterizedDatasetOpTest, DatasetOutputDtypes) { int thread_num = 2, cpu_num = 2; TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); @@ -259,7 +287,7 @@ TEST_P(ParameterizedDatasetTest, DatasetOutputDtypes) { test_case.expected_output_dtypes)); } -TEST_P(ParameterizedDatasetTest, DatasetOutputShapes) { +TEST_P(ParameterizedDatasetOpTest, DatasetOutputShapes) { int thread_num = 2, cpu_num = 2; TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); @@ -289,7 +317,7 @@ TEST_P(ParameterizedDatasetTest, DatasetOutputShapes) { test_case.expected_output_shapes)); } -TEST_P(ParameterizedDatasetTest, Cardinality) { +TEST_P(ParameterizedDatasetOpTest, Cardinality) { int thread_num = 2, cpu_num = 2; TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); @@ -354,7 +382,7 @@ TEST_F(RepeatDatasetOpTest, DatasetSave) { TF_ASSERT_OK(writer.Flush()); } -TEST_P(ParameterizedDatasetTest, IteratorOutputDtypes) { +TEST_P(ParameterizedDatasetOpTest, IteratorOutputDtypes) { int thread_num = 2, cpu_num = 2; TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); @@ -391,7 +419,7 @@ TEST_P(ParameterizedDatasetTest, IteratorOutputDtypes) { test_case.expected_output_dtypes)); } -TEST_P(ParameterizedDatasetTest, IteratorOutputShapes) { +TEST_P(ParameterizedDatasetOpTest, IteratorOutputShapes) { int thread_num = 2, cpu_num = 2; TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); @@ -428,7 +456,7 @@ TEST_P(ParameterizedDatasetTest, IteratorOutputShapes) { test_case.expected_output_shapes)); } -TEST_P(ParameterizedDatasetTest, IteratorOutputPrefix) { +TEST_P(ParameterizedDatasetOpTest, IteratorOutputPrefix) { int thread_num = 2, cpu_num = 2; TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); @@ -470,7 +498,7 @@ TEST_P(ParameterizedDatasetTest, IteratorOutputPrefix) { } } -TEST_P(ParameterizedDatasetTest, Roundtrip) { +TEST_P(ParameterizedDatasetOpTest, Roundtrip) { int thread_num = 2, cpu_num = 2; TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); @@ -550,7 +578,7 @@ TEST_P(ParameterizedDatasetTest, Roundtrip) { } } -INSTANTIATE_TEST_SUITE_P(RepeatDatasetOpTest, ParameterizedDatasetTest, +INSTANTIATE_TEST_SUITE_P(RepeatDatasetOpTest, ParameterizedDatasetOpTest, ::testing::ValuesIn(std::vector( {FiniteRepeatTestCase(), EmptyRepeatTestCase(), ForeverRepeatTestCase()}))); ",0,train 9f320410a2cc0de6e3cdc8d372bfa79676f85058,tensorflow/tensorflow,"Support align_corners and half_pixel_centers for resize ops in NNAPI delegate. PiperOrigin-RevId: 310835481 Change-Id: I6538e64b453bc3b633a5656a8130ed2139781a94",acceleration_test_list.cc,"@@ -300,13 +300,15 @@ VariedShapeSpec/ReshapeOpTest/RegularShapes/1 VariedShapeSpec/ReshapeOpTest/WithStretchDimension/1 # resize_bilinear_test +// align_corners & half_pixel_centers are not implemented in NNAPI before API 30 +ResizeBilinearOpTest/ResizeBilinearOpTest.+HalfPixelCenters.*,30 // Only models with constant size tensor are accelerated ResizeBilinearOpTest/ResizeBilinearOpTest/.+/0,29 # resize_nearest_neighbor_test -// align_corners & half_pixel_centers are not implemented in NNAPI. --ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest.+AlignCorners.*,29 --ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest.+HalfPixelCenters.*,29 +// align_corners & half_pixel_centers are not implemented in NNAPI before API 30 +ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest.+AlignCorners.*,30 +ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest.+HalfPixelCenters.*,30 // Only models with constant size tensor are accelerated ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest/.+/0,29 ",0,train 9f320410a2cc0de6e3cdc8d372bfa79676f85058,tensorflow/tensorflow,"Support align_corners and half_pixel_centers for resize ops in NNAPI delegate. PiperOrigin-RevId: 310835481 Change-Id: I6538e64b453bc3b633a5656a8130ed2139781a94",nnapi_delegate.cc,"@@ -1648,13 +1648,14 @@ bool NNAPIDelegateKernel::Validate( } auto builtin = reinterpret_cast(node->builtin_data); - Expect(!builtin->align_corners, - NNAPIValidationFailureType::kUnsupportedOperandValue, - ""NNAPI does not support align_corners == true."", &val_ctx); - // TODO(b/147696142): Update when NNAPI delegate can support TF2 behavior. - Expect(!builtin->half_pixel_centers, - NNAPIValidationFailureType::kUnsupportedOperandValue, - ""NNAPI does not support half_pixel_centers == true."", &val_ctx); + if (android_sdk_version <= kMinSdkVersionForNNAPI12) { + Expect(!builtin->align_corners, + NNAPIValidationFailureType::kUnsupportedOperandValue, + ""NNAPI does not support align_corners == true."", &val_ctx); + Expect(!builtin->half_pixel_centers, + NNAPIValidationFailureType::kUnsupportedOperandValue, + ""NNAPI does not support half_pixel_centers == true."", &val_ctx); + } if (android_sdk_version < kMinSdkVersionForNNAPI12) { Expect(input.type == kTfLiteFloat32, NNAPIValidationFailureType::kUnsupportedInputType, @@ -1668,14 +1669,14 @@ bool NNAPIDelegateKernel::Validate( ExpectIsFloatOrQuant8Operator(context, node, &val_ctx); auto builtin = reinterpret_cast( node->builtin_data); - // TODO(b/149823713): Update when NNAPI delegate can support align_corners - // & half_pixel_centers. - Expect(!builtin->align_corners, - NNAPIValidationFailureType::kUnsupportedOperandValue, - ""NNAPI does not support align_corners == true."", &val_ctx); - Expect(!builtin->half_pixel_centers, - NNAPIValidationFailureType::kUnsupportedOperandValue, - ""NNAPI does not support half_pixel_centers == true."", &val_ctx); + if (android_sdk_version <= kMinSdkVersionForNNAPI12) { + Expect(!builtin->align_corners, + NNAPIValidationFailureType::kUnsupportedOperandValue, + ""NNAPI does not support align_corners == true."", &val_ctx); + Expect(!builtin->half_pixel_centers, + NNAPIValidationFailureType::kUnsupportedOperandValue, + ""NNAPI does not support half_pixel_centers == true."", &val_ctx); + } } break; case kTfLiteBuiltinSqueeze: { ExpectOpVersion(version, 1, &val_ctx); @@ -2436,6 +2437,14 @@ TfLiteStatus NNAPIDelegateKernel::Map( const int output_width = output.dims->data[2]; mapping_args.builder->AddScalarInt32Operand(output_width); mapping_args.builder->AddScalarInt32Operand(output_height); + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + if (builtin->align_corners == true || + builtin->half_pixel_centers == true) { + mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format + mapping_args.builder->AddScalarBoolOperand(builtin->align_corners); + mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers); + } *nn_op_type = ANEURALNETWORKS_RESIZE_BILINEAR; } break; case kTfLiteBuiltinResizeNearestNeighbor: { @@ -2445,7 +2454,13 @@ TfLiteStatus NNAPIDelegateKernel::Map( mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[1]); mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[0]); mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format - + auto builtin = reinterpret_cast( + mapping_args.node->builtin_data); + if (builtin->align_corners == true || + builtin->half_pixel_centers == true) { + mapping_args.builder->AddScalarBoolOperand(builtin->align_corners); + mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers); + } *nn_op_type = ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR; } break; case kTfLiteBuiltinSqueeze: { ",0,train 9f320410a2cc0de6e3cdc8d372bfa79676f85058,tensorflow/tensorflow,"Support align_corners and half_pixel_centers for resize ops in NNAPI delegate. PiperOrigin-RevId: 310835481 Change-Id: I6538e64b453bc3b633a5656a8130ed2139781a94",resize_bilinear_test.cc,"@@ -190,10 +190,6 @@ TEST_P(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatches) { TEST_P(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatches_HalfPixelCenters) { - // TODO(b/147696142): Update when NNAPI delegate can support TF2 behavior. - if (SingleOpModel::GetForceUseNnapi()) { - return; - } ResizeBilinearOpModel m({TensorType_FLOAT32, {2, 2, 2, 1}}, {3, 3}, GetParam(), /**half_pixel_centers**/ true); m.SetInput({ @@ -253,10 +249,6 @@ TEST_P(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatchesUInt8) { TEST_P(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatchesUInt8_HalfPixelCenters) { - // TODO(b/147696142): Update when NNAPI delegate can support TF2 behavior. - if (SingleOpModel::GetForceUseNnapi()) { - return; - } ResizeBilinearOpModel m({TensorType_UINT8, {2, 2, 2, 1}}, {3, 3}, GetParam(), /**half_pixel_centers**/ true); m.SetInput({ ",0,train 6161367d8942561f2eeb640b3798cfd8397ea35c,tensorflow/tensorflow,"Blacklist XRT ops in TF Eager's small Tensor pin to host optimization. PiperOrigin-RevId: 231513080",execute.cc,"@@ -17,6 +17,7 @@ limitations under the License. #include +#include ""absl/strings/match.h"" #include ""tensorflow/core/common_runtime/device.h"" #include ""tensorflow/core/common_runtime/device_set.h"" #include ""tensorflow/core/common_runtime/eager/context.h"" @@ -750,7 +751,10 @@ bool IsPinnableOp(const string& op_type) { ""StatelessRandomNormal"", }); - return unpinnable_ops->find(op_type) == unpinnable_ops->end(); + // XRT ops refer to per-device handles that are not safe to move between + // devices. + return unpinnable_ops->find(op_type) == unpinnable_ops->end() && + !absl::StartsWith(op_type, ""XRT""); } // The Op device may be updated if: ",0,train c8530b907a686b92c94d13f854dc504fa10901db,tensorflow/tensorflow,"tfe.Network naming under variable scopes. Networks take on the full prefix of their parent variable scopes. Fixes #14164. PiperOrigin-RevId: 174934769",network.py,"@@ -244,6 +244,12 @@ class Network(base.Layer): self._owned_layers = {} # The scope to use if we end up without a parent. self._default_parent_variable_scope = variable_scope.get_variable_scope() + # Hold on to the variable scope counts from init to check whether a scope + # with the name we want was ever created in our parent scope. Without this + # check we might have name collisions if the parent scope on init gets + # closed before build is called. + self._variable_scope_counts_on_init = ( + variable_scope._get_default_variable_store().variable_scopes_count) self._custom_getter, self._deferred_restorations = ( _make_custom_getter_for_deferred_restorations()) @@ -261,18 +267,29 @@ class Network(base.Layer): def _finalize_name(self, parent_network): if not self._name: - if not parent_network: - name_uid_map = base._get_default_graph_uid_map() - else: - name_uid_map = parent_network._sub_layer_name_uids # Were were not passed a name explicitly (or it was blank), so this is an # anonymous Network. We make up a unique name. if parent_network: avoid_names = parent_network._owned_layers + name_uid_map = parent_network._sub_layer_name_uids else: - avoid_names = None + name_uid_map = base._get_default_graph_uid_map() + # Figure out which names we have to avoid based on which variable scope + # we're nested in. + strip_name = self._default_parent_variable_scope.name + if strip_name: + strip_name += ""/"" + def _strip_on_init_scope(name): + if name.startswith(strip_name): + return name[len(strip_name):] + else: + return None + avoid_names = set( + _strip_on_init_scope(name) + for name in self._variable_scope_counts_on_init.keys() if name) self._name, self._base_name = self._make_unique_name( - name_uid_map=name_uid_map, avoid_names=avoid_names) + name_uid_map=name_uid_map, avoid_names=avoid_names, + namespace=self._default_parent_variable_scope.name) if self._first_parent is None or (self._first_parent # False = no parent and self._first_parent() is None): # Save a pointer to the parent Network so that we can later check that the @@ -302,7 +319,13 @@ class Network(base.Layer): parent_scope = first_parent._scope else: parent_scope = self._default_parent_variable_scope - with variable_scope.variable_scope(parent_scope): + with variable_scope.variable_scope(parent_scope) as parent_vs: + expected_scope_name = parent_vs.name + ""/"" + self._name + if expected_scope_name in self._variable_scope_counts_on_init: + raise ValueError( + (""A Network named '%s' already exists (or a variable_scope was "" + ""created with this name). Names must be unique."") % ( + self._name,)) # Make sure variables with this prefix will be unique. with variable_scope.variable_scope( None, use_resource=True, default_name=self._name) as scope: @@ -319,25 +342,22 @@ class Network(base.Layer): ""created with this name). Names must be unique."") % ( self._name,)) if (first_parent - and scope_prefix[:-1] != first_parent._scope.name): + and scope_prefix[:-1] != first_parent.scope_name): raise ValueError( (""Network variable names must match a nesting of sub-Network "" ""names. Expected prefix '%s' from parent network, but got "" ""'%s' when attempting to create a variable_scope for Network "" ""'%s'. Likely an explicit variable_scope was inserted into "" ""the nesting."") % ( - first_parent._scope.name, + first_parent.scope_name, scope_prefix[:-1], self._name)) elif not first_parent and scope_prefix: # For the case when this Network is not nested inside any other - # Network, but is in a variable_scope. This is an error for now. - raise ValueError( - ""Creating Networks inside named variable_scopes is currently "" - ""not supported (to ensure that variable names match the names "" - ""of Networks in which they were first created). To set "" - ""options, try `with tf.variable_scope(''):`. If this "" - ""limitation bothers you, please file a feature request."") + # Network, but is in a variable_scope. This Network's name takes on + # the full variable scope prefix. + self._name = scope_name + for non_network_sublayer in self._non_network_sublayers: self._set_scope_for_nonnetwork_sublayer(non_network_sublayer) @@ -355,8 +375,7 @@ class Network(base.Layer): raise ValueError( (""The parent of a Layer added to Network %s was garbage collected "" ""before the Layer was built. If this limitation bothers you "" - ""please, comment on "" - ""https://github.com/tensorflow/tensorflow/issues/14164."") % + ""please file a feature request."") % (self.name,)) with variable_scope.variable_scope(parent_scope): # Horrid hack to make Layer variable names which are direct @@ -420,7 +439,9 @@ class Network(base.Layer): # name, and we should respect it (subject to error checking). layer._name, layer._base_name = layer._make_unique_name( name_uid_map=self._sub_layer_name_uids, - avoid_names=self._owned_layers) + avoid_names=self._owned_layers + # No namespace required, since we've specified our own UID map. + ) layer._first_parent = weakref.ref(self) self._non_network_sublayers.append(layer) if (not layer.built @@ -556,7 +577,7 @@ class Network(base.Layer): if os.path.isdir(save_path): # If we were passed a directory, default to naming based on the Network # name. - save_path = os.path.join(save_path, self.name) + save_path = os.path.join(save_path, self.name.replace(""/"", ""_"")) user_map_func = map_func if map_func is None: map_func = _make_prefix_stripping_map_fn(self.scope_name) @@ -750,7 +771,7 @@ class Network(base.Layer): self._set_scope() # scope_name should be available to map_funcs if os.path.isdir(save_path): # If we don't have a name yet, set no parent. - save_path = os.path.join(save_path, self.name) + save_path = os.path.join(save_path, self.name.replace(""/"", ""_"")) user_map_func = map_func if map_func is None: map_func = _make_prefix_stripping_map_fn(self.scope_name) ",0,train c8530b907a686b92c94d13f854dc504fa10901db,tensorflow/tensorflow,"tfe.Network naming under variable scopes. Networks take on the full prefix of their parent variable scopes. Fixes #14164. PiperOrigin-RevId: 174934769",network_test.py,"@@ -410,19 +410,103 @@ class NetworkTest(test.TestCase): @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) def testWrappingInVariableScope(self): + one = constant_op.constant([[1.]]) + # Naming happens in the order of first build rather than the order of + # construction, but for clarity they're the same here and construction is + # annotated. + outside_net_before = MyNetwork() # name=my_network_1 + outside_net_before(one) + captured_scope = variable_scope.get_variable_scope() with variable_scope.variable_scope(""outside_scope""): - net = MyNetwork() - one = constant_op.constant([[1.]]) - with self.assertRaisesRegexp( - ValueError, - (""Creating Networks inside named variable_scopes is currently not "" - ""supported"")): - net(one) - # Alternatively, we could re-name the Network to match the variable_scope: - # self.assertEqual(""outside_scope/my_network_1"", net.name) - # self.assertStartsWith( - # expected_start=""outside_scope/my_network_1/dense/"", - # actual=net.trainable_weights[0].name) + net1 = MyNetwork() # name=outside_scope/my_network_1 + net1(one) + name_conflict1 = MyNetwork(name=""name_conflict"") # fine, unique so far + name_conflict2 = MyNetwork(name=""name_conflict"") # error on build + with variable_scope.variable_scope(""inside_scope""): + # No issue here since the name is unique within its scope. + name_conflict3 = MyNetwork(name=""name_conflict"") + net2 = MyNetwork() # name=outside_scope/my_network_3 to avoid the + # variable_scope my_network_2 below. + vs_name_conflict = MyNetwork(name=""vs_name_conflict"") # conflict below + with variable_scope.variable_scope(""intervening_scope""): + with variable_scope.variable_scope(captured_scope): + with variable_scope.variable_scope(""outside_scope""): + name_conflict4 = MyNetwork(name=""name_conflict"") # error on build + with variable_scope.variable_scope(""my_network_2""): + pass + with variable_scope.variable_scope(""vs_name_conflict""): + pass + net3 = MyNetwork() # name=outside_scope/my_network_4 + name_conflict1(one) + with self.assertRaisesRegexp( + ValueError, ""named 'name_conflict' already exists""): + name_conflict2(one) + name_conflict3(one) + net2(one) + with self.assertRaisesRegexp( + ValueError, ""or a variable_scope was created with this name""): + vs_name_conflict(one) + with self.assertRaisesRegexp( + ValueError, ""named 'name_conflict' already exists""): + name_conflict4(one) + self.assertEqual(""outside_scope/name_conflict"", + name_conflict1.name) + self.assertStartsWith( + expected_start=""outside_scope/name_conflict/dense_1/"", + actual=name_conflict1.variables[0].name) + self.assertEqual(""outside_scope/inside_scope/name_conflict"", + name_conflict3.name) + self.assertStartsWith( + expected_start=""outside_scope/inside_scope/name_conflict/dense_1/"", + actual=name_conflict3.variables[0].name) + self.assertEqual(""outside_scope/my_network_1"", net1.name) + self.assertStartsWith( + expected_start=""outside_scope/my_network_1/dense_1/"", + actual=net1.trainable_weights[0].name) + self.assertEqual(""outside_scope/my_network_3"", net2.name) + self.assertStartsWith( + expected_start=""outside_scope/my_network_3/dense_1/"", + actual=net2.trainable_weights[0].name) + net3(one) + self.assertEqual(""outside_scope/my_network_4"", net3.name) + self.assertStartsWith( + expected_start=""outside_scope/my_network_4/dense_1/"", + actual=net3.trainable_weights[0].name) + outside_net_after = MyNetwork() + outside_net_after(one) + self.assertEqual(""my_network_1"", outside_net_before.name) + self.assertStartsWith( + expected_start=""my_network_1/dense_1/"", + actual=outside_net_before.trainable_weights[0].name) + self.assertEqual(""my_network_2"", outside_net_after.name) + self.assertStartsWith( + expected_start=""my_network_2/dense_1/"", + actual=outside_net_after.trainable_weights[0].name) + + @test_util.run_in_graph_and_eager_modes() + def testVariableScopeStripping(self): + with variable_scope.variable_scope(""scope1""): + with variable_scope.variable_scope(""scope2""): + net = MyNetwork() + net(constant_op.constant([[2.0]])) + self.evaluate(net.variables[0].assign([[42.]])) + self.assertEqual(net.name, ""scope1/scope2/my_network_1"") + self.assertStartsWith( + expected_start=""scope1/scope2/my_network_1/dense_1/"", + actual=net.trainable_weights[0].name) + save_path = net.save(self.get_temp_dir()) + self.assertIn(""scope1_scope2_my_network_1"", save_path) + restore_net = MyNetwork() + # Delayed restoration + restore_net.restore(save_path) + restore_net(constant_op.constant([[1.0]])) + self.assertAllEqual([[42.]], + self.evaluate(restore_net.variables[0])) + self.evaluate(restore_net.variables[0].assign([[-1.]])) + # Immediate restoration + restore_net.restore(save_path) + self.assertAllEqual([[42.]], + self.evaluate(restore_net.variables[0])) @test_util.run_in_graph_and_eager_modes() def testLayerNamesRespected(self): ",0,train c8530b907a686b92c94d13f854dc504fa10901db,tensorflow/tensorflow,"tfe.Network naming under variable scopes. Networks take on the full prefix of their parent variable scopes. Fixes #14164. PiperOrigin-RevId: 174934769",base.py,"@@ -401,10 +401,11 @@ class Layer(object): """""" return input_shape - def _make_unique_name(self, name_uid_map=None, avoid_names=None): + def _make_unique_name(self, name_uid_map=None, avoid_names=None, + namespace=''): base_name = _to_snake_case(self.__class__.__name__) name = _unique_layer_name(base_name, name_uid_map=name_uid_map, - avoid_names=avoid_names) + avoid_names=avoid_names, namespace=namespace) return (name, base_name) def _set_scope(self, scope=None): @@ -2370,7 +2371,7 @@ def _get_default_graph_uid_map(): return name_uid_map -def _unique_layer_name(name, name_uid_map=None, avoid_names=None): +def _unique_layer_name(name, name_uid_map=None, avoid_names=None, namespace=''): """"""Makes a layer name (or arbitrary string) unique within a TensorFlow graph. Arguments: @@ -2379,6 +2380,9 @@ def _unique_layer_name(name, name_uid_map=None, avoid_names=None): names. If None (default), uses a per-Graph dictionary. avoid_names: An optional set or dict with names which should not be used. If None (default) does not avoid any names. + namespace: Gets a name which is unique within the (graph, namespace). Layers + which are not Networks use a blank namespace and so get graph-global + names. Returns: Unique string name. @@ -2396,6 +2400,7 @@ def _unique_layer_name(name, name_uid_map=None, avoid_names=None): avoid_names = set() proposed_name = None while proposed_name is None or proposed_name in avoid_names: - name_uid_map[name] += 1 - proposed_name = name + '_' + str(name_uid_map[name]) + name_key = (namespace, name) + name_uid_map[name_key] += 1 + proposed_name = name + '_' + str(name_uid_map[name_key]) return proposed_name ",0,train 9bea7a8aa991b63f7349514a5a2dc0d04d261f8f,tensorflow/tensorflow,"Add support for Softmax of 3D tensors PiperOrigin-RevId: 211524810",activations.cc,"@@ -200,7 +200,7 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, input->type, output->type); const int num_dims = NumDimensions(input); - TF_LITE_ENSURE(context, num_dims == 1 || num_dims == 2 || num_dims == 4); + TF_LITE_ENSURE(context, num_dims >= 1 && num_dims <= 4); if (input->type == kTfLiteUInt8) { TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); @@ -453,6 +453,19 @@ void Softmax2DFloat(const TfLiteTensor* input, TfLiteTensor* output, Softmax(input->data.f, input_size, batch_size, params->beta, output->data.f); } +// Takes a 3D tensor and perform softmax along the last dimension. +void Softmax3DFloat(const TfLiteTensor* input, TfLiteTensor* output, + TfLiteSoftmaxParams* params) { + const int batch_size = input->dims->data[0]; + const int intermediate_size = input->dims->data[1]; + const int input_size = input->dims->data[2]; + optimized_ops::Softmax( + GetTensorData(input), + GetTensorShape({batch_size, intermediate_size, 1, input_size}), + params->beta, GetTensorData(output), + GetTensorShape({batch_size, intermediate_size, 1, input_size})); +} + void Softmax1DQuantized(const TfLiteTensor* input, TfLiteTensor* output, TfLiteSoftmaxParams* params, OpData* data) { // TODO(ahentz): this is arguably a dirty trick. Since the implementation @@ -480,6 +493,19 @@ void Softmax2DQuantized(const TfLiteTensor* input, TfLiteTensor* output, GetTensorShape({batch_size, 1, 1, input_size})); } +void Softmax3DQuantized(const TfLiteTensor* input, TfLiteTensor* output, + TfLiteSoftmaxParams* params, OpData* data) { + const int batch_size = input->dims->data[0]; + const int intermediate_size = input->dims->data[1]; + const int input_size = input->dims->data[2]; + optimized_ops::Softmax( + GetTensorData(input), + GetTensorShape({batch_size, intermediate_size, 1, input_size}), + data->input_multiplier, data->input_left_shift, data->diff_min, + GetTensorData(output), + GetTensorShape({batch_size, intermediate_size, 1, input_size})); +} + // Takes a 4D tensor and perform softmax along the forth dimension. void Softmax4DFloat(const TfLiteTensor* input, TfLiteTensor* output, TfLiteSoftmaxParams* params) { @@ -515,6 +541,10 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) { Softmax2DFloat(input, output, params); return kTfLiteOk; } + if (NumDimensions(input) == 3) { + Softmax3DFloat(input, output, params); + return kTfLiteOk; + } if (NumDimensions(input) == 4) { Softmax4DFloat(input, output, params); return kTfLiteOk; @@ -533,6 +563,10 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) { Softmax2DQuantized(input, output, params, data); return kTfLiteOk; } + if (NumDimensions(input) == 3) { + Softmax3DQuantized(input, output, params, data); + return kTfLiteOk; + } if (NumDimensions(input) == 4) { Softmax4DQuantized(input, output, params, data); return kTfLiteOk; ",0,test 9bea7a8aa991b63f7349514a5a2dc0d04d261f8f,tensorflow/tensorflow,"Add support for Softmax of 3D tensors PiperOrigin-RevId: 211524810",activations_test.cc,"@@ -339,6 +339,76 @@ TEST(QuantizedActivationsOpTest, Softmax4D) { kQuantizedTolerance))); } +TEST(FloatActivationsOpTest, Softmax3D) { + FloatActivationsOpModel m(0.1, + /*input=*/{TensorType_FLOAT32, {1, 2, 4}}); + m.SetInput({ + 0, -6, 2, 4, // depth = 0 + 3, -2, 10, 1, // depth = 1 + }); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({ + .23463, .12877, .28658, .35003, // + .22528, .13664, .45365, .18443, // + }))); + + // Same input, but a different shape. + FloatActivationsOpModel m2(0.1, + /*input=*/{TensorType_FLOAT32, {4, 1, 2}}); + m2.SetInput({ + 0, -6, // + 2, 4, // + 3, -2, // + 10, 1, // + }); + m2.Invoke(); + EXPECT_THAT(m2.GetOutput(), ElementsAreArray(ArrayFloatNear({ + 0.645656, 0.354344, // + 0.450166, 0.549834, // + 0.622459, 0.377541, // + 0.710949, 0.28905, // + }))); +} + +TEST(QuantizedActivationsOpTest, Softmax3D) { + QuantizedActivationsOpModel m( + 0.1, + /*input=*/{TensorType_UINT8, {1, 2, 4}, -10, 10}); + m.SetInput({ + 0, -6, 2, 4, // depth = 0 + 3, -2, 10, 1, // depth = 1 + }); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + { + .23463, .12877, .28658, .35003, // + .22528, .13664, .45365, .18443, // + }, + kQuantizedTolerance))); + + // Same input, but a different shape. + QuantizedActivationsOpModel m2( + 0.1, + /*input=*/{TensorType_UINT8, {4, 1, 2}, -10, 10}); + m2.SetInput({ + 0, -6, // + 2, 4, // + 3, -2, // + 10, 1, // + }); + m2.Invoke(); + EXPECT_THAT(m2.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + { + 0.645656, 0.354344, // + 0.450166, 0.549834, // + 0.622459, 0.377541, // + 0.710949, 0.28905, // + }, + kQuantizedTolerance))); +} + TEST(FloatActivationsOpTest, Softmax1D) { FloatActivationsOpModel m(0.1, /*input=*/{TensorType_FLOAT32, {8}}); ",0,test 7e54bf3113361e36f1a49e71a6ecbcd10ddf7015,tensorflow/tensorflow,"[XLA] Add pattern matcher for select and scatter. PiperOrigin-RevId: 337122945 Change-Id: I9aaa3cdcb3cb3379492e4697ada514124995a8ce",pattern_matcher.h,"@@ -2125,6 +2125,7 @@ XLA_BINOP_PATTERN(ShiftRightLogical) XLA_TERNOP_PATTERN(Clamp); XLA_TERNOP_PATTERN(Scatter); XLA_TERNOP_PATTERN(Select); +XLA_TERNOP_PATTERN(SelectAndScatter); #undef XLA_TERNOP_PATTERN namespace detail { ",0,train c6156d4c7bf79250626b8f13f752777b24967455,tensorflow/tensorflow,"Minor refactor: move unused_min / unused_max variables to the smallest scope possible. PiperOrigin-RevId: 289475255 Change-Id: I16d718482e91d51def3d2eb3a52f444763382ee0",lstm_eval.cc,"@@ -500,9 +500,9 @@ inline void LstmStepHybrid( // For each batch and cell: compute input_weight * input. // Skip if input is all zeros. if (!tensor_utils::IsZeroVector(input_ptr, n_batch * n_input)) { - float unused_min, unused_max; for (int b = 0; b < n_batch; ++b) { const int offset = b * n_input; + float unused_min, unused_max; tensor_utils::SymmetricQuantizeFloats( input_ptr + offset, n_input, quantized_input_ptr + offset, &unused_min, &unused_max, &scaling_factors[b]); @@ -549,9 +549,9 @@ inline void LstmStepHybrid( // Skip if auxiliary input is not available or all zeros. if (aux_input_ptr != nullptr && !tensor_utils::IsZeroVector(aux_input_ptr, n_batch * n_aux_input)) { - float unused_min, unused_max; for (int b = 0; b < n_batch; ++b) { const int offset = b * n_aux_input; + float unused_min, unused_max; tensor_utils::SymmetricQuantizeFloats( aux_input_ptr + offset, n_aux_input, quantized_aux_input_ptr + offset, &unused_min, &unused_max, &scaling_factors[b]); @@ -597,9 +597,9 @@ inline void LstmStepHybrid( if (!tensor_utils::IsZeroVector(output_state_ptr, n_batch * n_output)) { // Save quantization and matmul computation for all zero input. - float unused_min, unused_max; for (int b = 0; b < n_batch; ++b) { const int offset = b * n_output; + float unused_min, unused_max; tensor_utils::SymmetricQuantizeFloats(output_state_ptr + offset, n_output, quantized_output_state_ptr + offset, &unused_min, &unused_max, @@ -761,9 +761,9 @@ inline void LstmStepHybrid( } if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) { // Save quantization and matmul computation for all zero input. - float unused_min, unused_max; for (int b = 0; b < n_batch; ++b) { const int offset = b * n_cell; + float unused_min, unused_max; tensor_utils::SymmetricQuantizeFloats( output_gate_scratch + offset, n_cell, quantized_cell_state_ptr + offset, &unused_min, &unused_max, ",0,train 1597edbeede9d7376626f0026cd94bd8eb7e50b3,tensorflow/tensorflow,"Update VirtualScheduler constructor. PiperOrigin-RevId: 241662817",analytical_cost_estimator.cc,"@@ -123,7 +123,22 @@ AnalyticalCostEstimator::AnalyticalCostEstimator( use_aggressive_shape_inference_(use_aggressive_shape_inference) { scheduler_ = absl::make_unique( use_static_shapes_, use_aggressive_shape_inference_, cluster, - node_manager_.get()); + node_manager_.get(), + absl::make_unique(cluster->GetDevices())); +} + +AnalyticalCostEstimator::AnalyticalCostEstimator( + Cluster* cluster, std::unique_ptr node_estimator, + std::unique_ptr node_manager, + std::unique_ptr placer, bool use_static_shapes, + bool use_aggressive_shape_inference) + : node_estimator_(std::move(node_estimator)), + node_manager_(std::move(node_manager)), + use_static_shapes_(use_static_shapes), + use_aggressive_shape_inference_(use_aggressive_shape_inference) { + scheduler_ = absl::make_unique( + use_static_shapes_, use_aggressive_shape_inference_, cluster, + node_manager_.get(), std::move(placer)); } Status AnalyticalCostEstimator::Initialize(const GrapplerItem& item) { ",0,train 1597edbeede9d7376626f0026cd94bd8eb7e50b3,tensorflow/tensorflow,"Update VirtualScheduler constructor. PiperOrigin-RevId: 241662817",analytical_cost_estimator.h,"@@ -47,6 +47,12 @@ class AnalyticalCostEstimator : public CostEstimator { std::unique_ptr node_manager, bool use_static_shapes, bool use_aggressive_shape_inference); + AnalyticalCostEstimator(Cluster* cluster, + std::unique_ptr node_estimator, + std::unique_ptr node_manager, + std::unique_ptr placer, + bool use_static_shapes, + bool use_aggressive_shape_inference); ~AnalyticalCostEstimator() override {} // Initializes the estimator for the specified grappler item. ",0,train 1597edbeede9d7376626f0026cd94bd8eb7e50b3,tensorflow/tensorflow,"Update VirtualScheduler constructor. PiperOrigin-RevId: 241662817",virtual_scheduler.cc,"@@ -259,13 +259,15 @@ std::unique_ptr ReadyNodeManagerFactory( VirtualScheduler::VirtualScheduler(const bool use_static_shapes, const bool use_aggressive_shape_inference, Cluster* cluster, - ReadyNodeManager* ready_nodes) + ReadyNodeManager* ready_nodes, + std::unique_ptr placer) : ready_nodes_(ready_nodes), graph_costs_(Costs::ZeroCosts()), cluster_(cluster), use_static_shapes_(use_static_shapes), use_aggressive_shape_inference_(use_aggressive_shape_inference), - placer_(cluster->GetDevices()) { + placer_(std::move(placer)) { + DCHECK(placer_); // check if the pointer is valid. graph_costs_.num_ops_total = 0; initialized_ = false; track_mem_usage_snapshot_ = VLOG_IS_ON(1); @@ -524,13 +526,13 @@ bool VirtualScheduler::IsPersistentNode(const NodeDef* node) const { } string VirtualScheduler::DeviceName(const NodeDef* node) const { - return placer_.get_canonical_device_name(*node); + return placer_->get_canonical_device_name(*node); } string VirtualScheduler::SanitizedDeviceName(const NodeDef* node) const { // Replace the "":"" characters that may be present in the device name with ""_"". // This makes it possible to then use the resulting string in a node name. - return str_util::StringReplace(placer_.get_canonical_device_name(*node), "":"", + return str_util::StringReplace(placer_->get_canonical_device_name(*node), "":"", ""_"", true); } @@ -620,7 +622,7 @@ OpContext VirtualScheduler::GetCurrNode() const { // Get the device from the placer. DeviceProperties device; - device = placer_.get_device(*node); + device = placer_->get_device(*node); // Special case for _Send op. if (IsSend(*node)) { ",0,train 1597edbeede9d7376626f0026cd94bd8eb7e50b3,tensorflow/tensorflow,"Update VirtualScheduler constructor. PiperOrigin-RevId: 241662817",virtual_scheduler.h,"@@ -263,7 +263,9 @@ class VirtualScheduler { // Does not take ownership of cluster or ready_nodes. VirtualScheduler(const bool use_static_shapes, const bool use_aggressive_shape_inference, Cluster* cluster, - ReadyNodeManager* ready_nodes); + ReadyNodeManager* ready_nodes, + std::unique_ptr placer); + // Initializes the scheduler for the specific grappler item. // Should be called immediately after the c'tor or when the scheduler will be // reused for a new grappler item. All internal states of the scheduler @@ -356,7 +358,7 @@ class VirtualScheduler { bool track_mem_usage_snapshot_; const bool use_aggressive_shape_inference_; - VirtualPlacer placer_; // owned. + std::unique_ptr placer_; }; } // namespace grappler ",0,train 1597edbeede9d7376626f0026cd94bd8eb7e50b3,tensorflow/tensorflow,"Update VirtualScheduler constructor. PiperOrigin-RevId: 241662817",virtual_scheduler_test.cc,"@@ -33,8 +33,10 @@ class TestVirtualScheduler : public VirtualScheduler { TestVirtualScheduler(const bool use_static_shapes, const bool use_aggressive_shape_inference, Cluster* cluster) - : VirtualScheduler(use_static_shapes, use_aggressive_shape_inference, - cluster, &ready_node_manager_) { + : VirtualScheduler( + use_static_shapes, use_aggressive_shape_inference, cluster, + &ready_node_manager_, + absl::make_unique(cluster->GetDevices())) { enable_mem_usage_tracking(); } ",0,train 74a99ac8ee77e724163912bbc3e6b45b0a455ec7,tensorflow/tensorflow,"Fix bug causing errors when loops create dynamically-shaped variables. For such variables, their initial dummy value will have a dynamic shape that is always zero at runtime. PiperOrigin-RevId: 390182690 Change-Id: I9640fd7a52c3a14edb9b679e1804d58a5552789a",control_flow.py,"@@ -76,11 +76,13 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import func_graph from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import control_flow_util from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.types import distribute @@ -971,14 +973,39 @@ LEGAL_LOOP_TYPES = 'Tensor, int, float, bool or a list, tuple or dict thereof' def _placeholder_value(like, original=None): + """"""Constructs a (dummy) placeholder value for a loop-initialized variable."""""" if isinstance(like, (variables.Undefined, variables.UndefinedReturnValue)): return original - if isinstance(like, (int, float, bool)): + + elif isinstance(like, (int, float, bool)): return type(like)(0) - if tensor_util.is_tf_type(like): - return array_ops.zeros(like.shape, like.dtype) + + elif tensor_util.is_tf_type(like): + + # To avoid while_loop complaining about shape invariants, the placeholder's + # shape must be identical to the corresponding loop var's shape. This means + # dynamic dimensions where the like value had dynamic dimensions. We + # simulate that by passing a tensor that is deterministically 0, but is + # obtained by means which most constant folders can't see through. + # TODO(mdan): Just use 0 once while_loop is smarter about shape invariants. + dynamic_zero = random_ops.random_uniform(minval=0, maxval=1, shape=()) + placeholder_shape = [] + for s in like.shape: + if s is None: + placeholder_shape.append(dynamic_zero) + elif isinstance(s, tensor_shape.Dimension): + if s.value is None: + placeholder_shape.append(dynamic_zero) + else: + placeholder_shape.append(s.value) + else: + placeholder_shape.append(s) + + return array_ops.zeros(placeholder_shape, like.dtype) + elif isinstance(like, (list, tuple, dict)): return nest.map_structure(_placeholder_value, like) + return original ",0,test 74a99ac8ee77e724163912bbc3e6b45b0a455ec7,tensorflow/tensorflow,"Fix bug causing errors when loops create dynamically-shaped variables. For such variables, their initial dummy value will have a dynamic shape that is always zero at runtime. PiperOrigin-RevId: 390182690 Change-Id: I9640fd7a52c3a14edb9b679e1804d58a5552789a",control_flow_test.py,"@@ -669,6 +669,32 @@ class WhileLoopTest(testing.AutoGraphTestCase): # Node naming is inconsistent between V1 and V2. self.assertGraphContains(r'(while/)?pow$', 1) + def test_tensor_creating_variable_of_dynamic_shape(self): + + def body(): + nonlocal i, s + i = array_ops.ones( + [random_ops.random_uniform(minval=1, maxval=4, shape=()), 7]) + s = math_ops.reduce_sum(i) + + def set_state(loop_vars): + nonlocal i, s + i, s = loop_vars + + i = variable_operators.Undefined('i') + s = constant_op.constant(0.0) + control_flow.while_stmt( + test=lambda: math_ops.equal(s, 0), + body=body, + get_state=lambda: (i, s), + set_state=set_state, + symbol_names=('i', 's'), + opts={}) + + self.assertEqual(i[0][0], 1) + self.assertGreaterEqual(s, 7) + self.assertOpCreated('While') # Not stateless because of the random op. + def test_tensor_with_side_effecting_condition(self): v = self.variable('v', 0, dtypes.int32) ",0,test 74a99ac8ee77e724163912bbc3e6b45b0a455ec7,tensorflow/tensorflow,"Fix bug causing errors when loops create dynamically-shaped variables. For such variables, their initial dummy value will have a dynamic shape that is always zero at runtime. PiperOrigin-RevId: 390182690 Change-Id: I9640fd7a52c3a14edb9b679e1804d58a5552789a",testing.py,"@@ -156,6 +156,9 @@ class AutoGraphTestCase(test.TestCase): def assertEqual(self, *args): self.assertions.append((super().assertEqual, list(args))) + def assertGreaterEqual(self, *args): + self.assertions.append((super().assertGreaterEqual, list(args))) + def assertDictEqual(self, *args): self.assertions.append((super().assertDictEqual, list(args))) ",0,test f88bcfc6bd02b7065c4bfc3b401dd5b0a682922f,tensorflow/tensorflow,"Invoke export strategies when train_and_evaluate runs locally. Previous changes export the model in accordance with the known export strategies when train_and_evaluate runs in the distributed mode. This change adds a similar support for the local mode. PiperOrigin-RevId: 170546015",training.py,"@@ -105,21 +105,6 @@ def _is_google_env(): return tf_config.get(_ENVIRONMENT_KEY) == _ENVIRONMENT_GOOGLE_VALUE -def _export_eval_result(eval_result, checkpoint_path, estimator, eval_spec): - """"""Export `eval_result` according to strategies in `EvalSpec`."""""" - export_dir_base = os.path.join( - compat.as_str_any(estimator.model_dir), compat.as_str_any('export')) - - for strategy in eval_spec.export_strategies: - strategy.export( - estimator, - os.path.join( - compat.as_str_any(export_dir_base), compat.as_str_any( - strategy.name)), - checkpoint_path=checkpoint_path, - eval_result=eval_result) - - class TrainSpec( collections.namedtuple('TrainSpec', ['input_fn', 'max_steps', 'hooks'])): """"""Objects passed to `train_and_evaluate`. @@ -384,18 +369,16 @@ class _TrainingExecutor(object): logging.info('Start train and evaluate loop. The evaluate will happen ' 'after {} secs (eval_spec.throttle_secs) or training is ' 'finished.'.format(self._eval_spec.throttle_secs)) + + evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec) + while True: self._estimator.train( input_fn=self._train_spec.input_fn, max_steps=self._train_spec.max_steps, hooks=train_hooks) - metrics = self._estimator.evaluate( - input_fn=self._eval_spec.input_fn, - steps=self._eval_spec.steps, - hooks=self._eval_spec.hooks, - name=self._eval_spec.name) - # TODO(b/65169058): Adds export once export strategies are moved. + metrics = evaluator.evaluate_and_export() if _should_stop_local_train(metrics[ops.GraphKeys.GLOBAL_STEP]): break @@ -503,7 +486,6 @@ class _TrainingExecutor(object): 'evaluation pass as evaluation results are expected to be same ' 'for the same checkpoint.') return None - eval_result = self._estimator.evaluate( input_fn=self._eval_spec.input_fn, steps=self._eval_spec.steps, @@ -515,8 +497,7 @@ class _TrainingExecutor(object): self._log_err_msg('Estimator evaluate returns empty result.') return None - _export_eval_result(eval_result, latest_ckpt_path, self._estimator, - self._eval_spec) + self._export_eval_result(eval_result, latest_ckpt_path) self._last_warning_time = 0 self._previous_ckpt_path = latest_ckpt_path @@ -528,3 +509,18 @@ class _TrainingExecutor(object): if current_time - self._last_warning_time > 600: logging.warning(message) self._last_warning_time = current_time + + def _export_eval_result(self, eval_result, checkpoint_path): + """"""Export `eval_result` according to strategies in `EvalSpec`."""""" + export_dir_base = os.path.join( + compat.as_str_any(self._estimator.model_dir), + compat.as_str_any('export')) + + for strategy in self._eval_spec.export_strategies: + strategy.export( + self._estimator, + os.path.join( + compat.as_str_any(export_dir_base), + compat.as_str_any(strategy.name)), + checkpoint_path=checkpoint_path, + eval_result=eval_result) ",0,train f88bcfc6bd02b7065c4bfc3b401dd5b0a682922f,tensorflow/tensorflow,"Invoke export strategies when train_and_evaluate runs locally. Previous changes export the model in accordance with the known export strategies when train_and_evaluate runs in the distributed mode. This change adds a similar support for the local mode. PiperOrigin-RevId: 170546015",training_test.py,"@@ -21,6 +21,7 @@ from __future__ import print_function import json +import random import time from tensorflow.python.estimator import estimator as estimator_lib @@ -32,7 +33,6 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import monitored_session -from tensorflow.python.training import saver from tensorflow.python.training import server_lib from tensorflow.python.training import session_run_hook from tensorflow.python.util import compat @@ -747,8 +747,7 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase): mock_sleep.assert_called_with(throttle_secs - operation_secs) self.assertTrue(mock_est.evaluate.called) - @test.mock.patch.object(saver, 'latest_checkpoint') - def test_that_export_fn_is_called(self, mock_latest_ckpt): + def test_that_export_fn_is_called(self): mock_est = test.mock.Mock(spec=estimator_lib.Estimator) mock_train_spec = test.mock.Mock(spec=training.TrainSpec) self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec) @@ -895,8 +894,12 @@ class StopAtSecsHookTest(test.TestCase): class TrainingExecutorRunLocalTest(test.TestCase): """"""Tests run_local of _TrainingExecutor."""""" + def unique_checkpoint_every_time_fn(self): + return 'checkpoint_path_%s/' % random.random() + def test_send_stop_at_secs_to_train(self): - mock_est = test.mock.Mock(spec=estimator_lib.Estimator) + mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/') + mock_est.latest_checkpoint = self.unique_checkpoint_every_time_fn train_spec = training.TrainSpec( input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()]) eval_spec = training.EvalSpec( @@ -911,11 +914,24 @@ class TrainingExecutorRunLocalTest(test.TestCase): self.assertEqual(eval_spec.throttle_secs, stop_hook._stop_after_secs) def test_runs_in_a_loop_until_max_steps(self): - mock_est = test.mock.Mock(spec=estimator_lib.Estimator) + mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/') + mock_est.latest_checkpoint = self.unique_checkpoint_every_time_fn + + mock_est.times_export_fn_was_called = 0 + def export_fn(estimator, *args, **kwargs): + del args, kwargs + estimator.times_export_fn_was_called += 1 + + export_strategy = export_strategy_lib.ExportStrategy( + name='see_whether_export_fn_is_called', export_fn=export_fn) + train_spec = training.TrainSpec( input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()]) eval_spec = training.EvalSpec( - input_fn=lambda: 1, hooks=[_FakeHook()], throttle_secs=100) + input_fn=lambda: 1, + hooks=[_FakeHook()], + throttle_secs=100, + export_strategies=export_strategy) # should be called 3 times. mock_est.evaluate.side_effect = [{ _GLOBAL_STEP_KEY: train_spec.max_steps - 100 @@ -930,9 +946,11 @@ class TrainingExecutorRunLocalTest(test.TestCase): self.assertEqual(3, mock_est.train.call_count) self.assertEqual(3, mock_est.evaluate.call_count) + self.assertEqual(3, mock_est.times_export_fn_was_called) def test_train_and_evaluate_args(self): - mock_est = test.mock.Mock(spec=estimator_lib.Estimator) + mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/') + mock_est.latest_checkpoint.return_value = 'checkpoint_path/' train_spec = training.TrainSpec( input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()]) eval_spec = training.EvalSpec( @@ -946,6 +964,7 @@ class TrainingExecutorRunLocalTest(test.TestCase): name=eval_spec.name, input_fn=eval_spec.input_fn, steps=eval_spec.steps, + checkpoint_path='checkpoint_path/', hooks=eval_spec.hooks) train_args = mock_est.train.call_args[1] @@ -962,6 +981,36 @@ class TrainingExecutorRunLocalTest(test.TestCase): with self.assertRaisesRegexp(ValueError, 'throttle_secs'): executor.run_local() + def test_that_export_fn_is_called_with_run_local(self): + mock_est = test.mock.Mock(spec=estimator_lib.Estimator) + mock_train_spec = test.mock.Mock(spec=training.TrainSpec) + mock_train_spec.max_steps = 200 + mock_est.evaluate.return_value = { + _GLOBAL_STEP_KEY: mock_train_spec.max_steps + } + # _validate_hooks would have made sure that train_spec.hooks is [], when + # None were passed. + mock_train_spec.hooks = [] + + def export_fn(estimator, *args, **kwargs): + del args, kwargs + estimator.export_fn_was_called = True + + export_strategy = export_strategy_lib.ExportStrategy( + name='see_whether_export_fn_is_called', export_fn=export_fn) + + eval_spec = training.EvalSpec( + input_fn=lambda: 1, + steps=2, + delay_secs=0, + throttle_secs=213, + export_strategies=export_strategy) + + executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec) + executor.run_local() + + self.assertTrue(mock_est.export_fn_was_called) + if __name__ == '__main__': test.main() ",0,train 60e499b4b608a52f5cf2a117c006ce8eac0941e0,tensorflow/tensorflow,"[XLA:GPU] Enable async all-reduce by default. PiperOrigin-RevId: 434787265",debug_options_flags.cc,"@@ -76,6 +76,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() { opts.set_xla_allow_excess_precision(true); opts.set_xla_force_host_platform_device_count(1); opts.set_xla_gpu_all_reduce_combine_threshold_bytes(30 * 1024 * 1024); + opts.set_xla_gpu_enable_async_all_reduce(true); opts.set_xla_cpu_enable_xprof_traceme(false); opts.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(false); opts.set_xla_multiheap_size_constraint_per_heap(-1); ",0,test f3bb411f120066c244131c0e5d80856948a45674,tensorflow/tensorflow,"Add a standard way to add a warning in tf_upgrade_v2.py script. PiperOrigin-RevId: 219352016",ast_edits.py,"@@ -184,6 +184,17 @@ class _ASTCallVisitor(ast.NodeVisitor): except KeyError: pass + def _print_warning_for_function(self, node, full_name): + function_warnings = self._api_change_spec.function_warnings + try: + warning_message = function_warnings[full_name] + warning_message = warning_message.replace("""", full_name) + self._file_edit.add(warning_message, + node.lineno, node.col_offset, full_name, full_name, + error=""%s requires manual check."" % full_name) + except KeyError: + pass + def _get_attribute_full_path(self, node): """"""Traverse an attribute to generate a full name e.g. tf.foo.bar. @@ -350,6 +361,7 @@ class _ASTCallVisitor(ast.NodeVisitor): full_name = self._get_attribute_full_path(node) if full_name: self._rename_functions(node, full_name) + self._print_warning_for_function(node, full_name) if full_name in self._api_change_spec.change_to_function: if not hasattr(node, ""is_function_for_call""): new_text = full_name + ""()"" ",0,train f3bb411f120066c244131c0e5d80856948a45674,tensorflow/tensorflow,"Add a standard way to add a warning in tf_upgrade_v2.py script. PiperOrigin-RevId: 219352016",tf_upgrade.py,"@@ -178,6 +178,9 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec): # Specially handled functions. self.function_handle = {""tf.reverse"": self._reverse_handler} + # Warnings that should be printed if corresponding functions are used. + self.function_warnings = {} + @staticmethod def _reverse_handler(file_edit_recorder, node): # TODO(aselle): Could check for a literal list of bools and try to convert ",0,train f3bb411f120066c244131c0e5d80856948a45674,tensorflow/tensorflow,"Add a standard way to add a warning in tf_upgrade_v2.py script. PiperOrigin-RevId: 219352016",tf_upgrade_v2.py,"@@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import argparse -import functools from tensorflow.tools.compatibility import ast_edits from tensorflow.tools.compatibility import renames_v2 @@ -46,29 +45,28 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec): # Specially handled functions. self.function_handle = {} - for decay in [""tf.train.exponential_decay"", ""tf.train.piecewise_constant"", - ""tf.train.polynomial_decay"", ""tf.train.natural_exp_decay"", - ""tf.train.inverse_time_decay"", ""tf.train.cosine_decay"", - ""tf.train.cosine_decay_restarts"", - ""tf.train.linear_cosine_decay"", - ""tf.train.noisy_linear_cosine_decay""]: - self.function_handle[decay] = functools.partial( - self._learning_rate_decay_handler, decay_name=decay) - - @staticmethod - def _learning_rate_decay_handler(file_edit_recorder, node, decay_name): - comment = (""ERROR: %s has been changed to return a callable instead of a "" - ""tensor when graph building, but its functionality remains "" - ""unchanged during eager execution (returns a callable like "" - ""before). The converter cannot detect and fix this reliably, so "" - ""you need to inspect this usage manually.\n"") % decay_name - file_edit_recorder.add( - comment, - node.lineno, - node.col_offset, - decay_name, - decay_name, - error=""%s requires manual check."" % decay_name) + + decay_function_comment = ( + ""ERROR: has been changed to return a callable instead "" + ""of a tensor when graph building, but its functionality remains "" + ""unchanged during eager execution (returns a callable like "" + ""before). The converter cannot detect and fix this reliably, so "" + ""you need to inspect this usage manually.\n"" + ) + + # Function warnings. placeholder inside warnings will be + # replaced by function name. + self.function_warnings = { + ""tf.train.exponential_decay"": decay_function_comment, + ""tf.train.piecewise_constant"": decay_function_comment, + ""tf.train.polynomial_decay"": decay_function_comment, + ""tf.train.natural_exp_decay"": decay_function_comment, + ""tf.train.inverse_time_decay"": decay_function_comment, + ""tf.train.cosine_decay"": decay_function_comment, + ""tf.train.cosine_decay_restarts"": decay_function_comment, + ""tf.train.linear_cosine_decay"": decay_function_comment, + ""tf.train.noisy_linear_cosine_decay"": decay_function_comment, + } if __name__ == ""__main__"": ",0,train f3bb411f120066c244131c0e5d80856948a45674,tensorflow/tensorflow,"Add a standard way to add a warning in tf_upgrade_v2.py script. PiperOrigin-RevId: 219352016",tf_upgrade_v2_test.py,"@@ -73,9 +73,10 @@ class TestUpgrade(test_util.TensorFlowTestCase): ""tf.train.noisy_linear_cosine_decay""]: text = ""%s(a, b)\n"" % decay - _, unused_report, errors, new_text = self._upgrade(text) + _, report, errors, new_text = self._upgrade(text) self.assertEqual(text, new_text) self.assertEqual(errors, [""test.py:1: %s requires manual check."" % decay]) + self.assertIn(""%s has been changed"" % decay, report) class TestUpgradeFiles(test_util.TensorFlowTestCase): ",0,train 091f6253725ae9a2370135237e42c5b3666b6138,tensorflow/tensorflow,"Made the logdir argument used to start up Tensorboard appear under the TOGGLE ALL RUNS button. As part of this effort, made the handler respond with a JSON object with a single key 'logdir' containing the value. It seems like the request manager parses JSON. Previously, the endpoint returned a raw string that was the logdir argument. Updated screen diff integration tests. Change: 135722994",handler.py,"@@ -177,7 +177,7 @@ class TensorboardHandler(BaseHTTPServer.BaseHTTPRequestHandler): def _serve_logdir(self, unused_query_params): """"""Writes out the logdir argument with which this tensorboard was started. """""" - self.respond(self._logdir, 'text/plain') + self.respond({'logdir': self._logdir}, 'application/json') def _serve_scalars(self, query_params): """"""Given a tag and single run, return array of ScalarEvents. ",0,train 091f6253725ae9a2370135237e42c5b3666b6138,tensorflow/tensorflow,"Made the logdir argument used to start up Tensorboard appear under the TOGGLE ALL RUNS button. As part of this effort, made the handler respond with a JSON object with a single key 'logdir' containing the value. It seems like the request manager parses JSON. Previously, the endpoint returned a raw string that was the logdir argument. Updated screen diff integration tests. Change: 135722994",server_test.py,"@@ -102,10 +102,9 @@ class TensorboardServerTest(tf.test.TestCase): self.assertEqual(response.status, 400) def testLogdir(self): - """"""Test the status code and content of the data/logdir endpoint."""""" - response = self._get('/data/logdir') - self.assertEqual(response.status, 200) - self.assertEqual(response.read().decode('utf-8'), '/foo/logdir/argument') + """"""Test the format of the data/logdir endpoint."""""" + parsed_object = self._getJson('/data/logdir') + self.assertEqual(parsed_object, {'logdir': '/foo/logdir/argument'}) def testRuns(self): """"""Test the format of the /data/runs endpoint."""""" ",0,train 801b09624f0488132638166fe782be4163269657,tensorflow/tensorflow,"More accurate input-pipeline analysis for TPU. PiperOrigin-RevId: 286672368 Change-Id: I14e6e47e79304154404629295b6728857583d781",event_span.cc,"@@ -116,17 +116,17 @@ EventType ClassifyGpuEvent(absl::string_view event_name) { } EventType ClassifyCpuEvent(absl::string_view event_name, int64 correlation_id) { - if (absl::StartsWithIgnoreCase(event_name, ""MEMCPYHtoD"")) + if (absl::StartsWithIgnoreCase(event_name, ""MEMCPYHtoD"") || + absl::StrContains(event_name, ""Infeed"")) return HOST_TO_DEVICE; if (absl::StartsWithIgnoreCase(event_name, ""MEMCPYHtoH"")) return HOST_TO_HOST; if (correlation_id >= 0 || absl::StartsWithIgnoreCase(event_name, ""ExecutorState::Process"")) { return HOST_PREPARE; - } else { - if (absl::StartsWithIgnoreCase(event_name, ""IteratorGetNext"")) - return HOST_WAIT_INPUT; - return HOST_COMPUTE; } + if (absl::StartsWithIgnoreCase(event_name, ""IteratorGetNext"")) + return HOST_WAIT_INPUT; + return HOST_COMPUTE; } std::string PrintEventType(EventType event_type) { ",0,test 22443c0f157658e04b96cbc06904b32486584055,tensorflow/tensorflow,"When using fake infeed data, fill the infeed when it is empty. This makes sure we avoid OOM when there is too much infeed data to send it at once, and we also don't need the magic ""num_infeeds"" parameter anymore. PiperOrigin-RevId: 197886121",infeed_manager.cc,"@@ -49,13 +49,25 @@ void InfeedManager::EnqueueBuffers(const std::vector& buffers) { } InfeedBuffer* InfeedManager::BlockingDequeueBuffer() { - tensorflow::mutex_lock l(mu_); - while (enqueued_buffer_.empty()) { - cv_.wait(l); + bool became_empty = false; + InfeedBuffer* current_buffer; + { + tensorflow::mutex_lock l(mu_); + while (enqueued_buffer_.empty()) { + cv_.wait(l); + } + current_buffer = enqueued_buffer_.front(); + enqueued_buffer_.pop_front(); + dequeued_buffer_.insert(current_buffer); + if (enqueued_buffer_.empty()) { + became_empty = true; + } + } + if (became_empty) { + for (const auto& callback : on_empty_callbacks_) { + callback(); + } } - InfeedBuffer* current_buffer = enqueued_buffer_.front(); - enqueued_buffer_.pop_front(); - dequeued_buffer_.insert(current_buffer); return current_buffer; } @@ -88,6 +100,10 @@ se::Stream* InfeedManager::GetStream(se::StreamExecutor* executor) { return host_to_device_stream_.get(); } +void InfeedManager::RegisterOnEmptyCallback(std::function callback) { + on_empty_callbacks_.push_back(std::move(callback)); +} + InfeedManager* GetOrCreateInfeedManager() { static InfeedManager* manager = new InfeedManager; return manager; ",0,train 22443c0f157658e04b96cbc06904b32486584055,tensorflow/tensorflow,"When using fake infeed data, fill the infeed when it is empty. This makes sure we avoid OOM when there is too much infeed data to send it at once, and we also don't need the magic ""num_infeeds"" parameter anymore. PiperOrigin-RevId: 197886121",infeed_manager.h,"@@ -21,6 +21,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_INFEED_MANAGER_H_ #include +#include #include ""tensorflow/compiler/xla/types.h"" #include ""tensorflow/core/lib/gtl/flatset.h"" @@ -100,6 +101,10 @@ class InfeedManager { // returns null. se::Stream* GetStream(se::StreamExecutor* executor); + // Registers a callback that will be called when 'enqueued_buffer_' becomes + // empty. + void RegisterOnEmptyCallback(std::function callback); + private: // TODO(b/30467474): Revisit if this mutex becomes a point of // contention. @@ -122,6 +127,10 @@ class InfeedManager { // Executor that the host_to_device_stream belongs to. Not owned. se::StreamExecutor* host_to_device_executor_; + + // List of callbacks which will be called when 'enqueued_buffer_' becomes + // empty. + std::vector> on_empty_callbacks_; }; // Singleton creator-or-accessor: Returns the GPU infeed manager. ",0,train 22443c0f157658e04b96cbc06904b32486584055,tensorflow/tensorflow,"When using fake infeed data, fill the infeed when it is empty. This makes sure we avoid OOM when there is too much infeed data to send it at once, and we also don't need the magic ""num_infeeds"" parameter anymore. PiperOrigin-RevId: 197886121",replay_computation.cc,"@@ -41,6 +41,7 @@ limitations under the License. #include ""tensorflow/compiler/xla/client/local_client.h"" #include ""tensorflow/compiler/xla/execution_options_util.h"" #include ""tensorflow/compiler/xla/literal_util.h"" +#include ""tensorflow/compiler/xla/service/gpu/infeed_manager.h"" #include ""tensorflow/compiler/xla/service/hlo.pb.h"" #include ""tensorflow/compiler/xla/shape_util.h"" #include ""tensorflow/compiler/xla/status_macros.h"" @@ -64,7 +65,6 @@ namespace { struct Options { string fake_infeed_shape; bool generate_fake_infeed = false; - int num_infeeds = 10; bool use_fake_data = false; bool print_result = true; int num_runs = 1; @@ -126,22 +126,26 @@ StatusOr> ReplayComputation(const HloSnapshot& module, // --generate_fake_infeed is passed and there exists an infeed operation in // the HloSnapshot. tensorflow::gtl::optional pool; + std::unique_ptr data; + if (provide_infeed) { + data = std::move(MakeFakeLiteral(infeed_shape)).ValueOrDie(); + } + auto transfer_infeed = [&data, client]() { + TF_CHECK_OK(client->TransferToInfeed(*data)); + }; if (provide_infeed) { pool.emplace(tensorflow::Env::Default(), ""infeed"", /*num_threads=*/1); - pool->Schedule([opts, infeed_shape, client]() { - StatusOr> data_status = - MakeFakeLiteral(infeed_shape); - TF_CHECK_OK(data_status.status()); - std::unique_ptr data = std::move(data_status).ValueOrDie(); + pool->Schedule([transfer_infeed]() { // There may be several infeed buffers needed, however we don't know how // many. If we proactively transfer too many infeed buffers, we may run // out of memory. If we transfer too few infeed buffers, the program will - // hang. - // TODO(akuegel): Figure out a better way to handle this. - for (int i = 0; i < opts.num_infeeds; ++i) { - TF_CHECK_OK(client->TransferToInfeed(*data)); - } + // hang. Therefore, we register a callback that is called when the infeed + // becomes empty, and in this callback we will transfer another fake + // infeed. + auto infeed_manager = xla::gpu::GetOrCreateInfeedManager(); + infeed_manager->RegisterOnEmptyCallback(transfer_infeed); + transfer_infeed(); }); } @@ -234,8 +238,6 @@ int main(int argc, char** argv) { ""Print the result of the computation to stdout""), tensorflow::Flag(""num_runs"", &opts.num_runs, ""Number of times to run each computation""), - tensorflow::Flag(""num_infeeds"", &opts.num_infeeds, - ""Number of times we transfer the fake infeed data""), tensorflow::Flag(""fake_infeed_shape"", &opts.fake_infeed_shape, ""Shape of fake data to construct for (infinite) infeed""), tensorflow::Flag(""generate_fake_infeed"", &opts.generate_fake_infeed, ",0,train 31b0dc568f23966c8e5fc576a08825e5b039aca4,tensorflow/tensorflow,"tridiagonal_solve: Remove stale forward compatibility checks `forward_compatible(2019, 10, 18)` always evaluates to `True` so a bit of stale code can be removed.",tridiagonal_solve_op_test.py,"@@ -24,7 +24,6 @@ import numpy as np from tensorflow.python.eager import backprop from tensorflow.python.client import session -from tensorflow.python.compat import compat from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -43,8 +42,6 @@ _sample_diags = np.array([[2, 1, 4, 0], [1, 3, 2, 2], [0, 1, -1, 1]]) _sample_rhs = np.array([1, 2, 3, 4]) _sample_result = np.array([-9, 5, -4, 4]) -FORWARD_COMPATIBLE_DATE = (2019, 10, 18) - # Flag, indicating that test should be run only with partial_pivoting=True FLAG_REQUIRES_PIVOTING = ""FLAG_REQUIRES_PIVOT"" @@ -303,13 +300,10 @@ class TridiagonalSolveOpTest(test.TestCase): # Tests with transpose and adjoint def testTransposeRhs(self): - expected = np.array([_sample_result, 2 * _sample_result]) - if compat.forward_compatible(*FORWARD_COMPATIBLE_DATE): - expected = expected.T self._testWithLists( diags=_sample_diags, rhs=np.array([_sample_rhs, 2 * _sample_rhs]), - expected=expected, + expected=np.array([_sample_result, 2 * _sample_result]).T, transpose_rhs=True) def testConjugateRhs(self): @@ -321,28 +315,22 @@ class TridiagonalSolveOpTest(test.TestCase): conjugate_rhs=True) def testAdjointRhs(self): - expected = np.array( - [_sample_result * (1 - 1j), _sample_result * (1 + 2j)]) - if compat.forward_compatible(*FORWARD_COMPATIBLE_DATE): - expected = expected.T self._testWithLists( diags=_sample_diags, rhs=np.array([_sample_rhs * (1 + 1j), _sample_rhs * (1 - 2j)]), - expected=expected, + expected=np.array( + [_sample_result * (1 - 1j), _sample_result * (1 + 2j)]).T, transpose_rhs=True, conjugate_rhs=True) def testTransposeRhsWithBatching(self): - expected = np.array( - [[_sample_result, 2 * _sample_result], - [-3 * _sample_result, -4 * _sample_result]]) - if compat.forward_compatible(*FORWARD_COMPATIBLE_DATE): - expected = expected.transpose(0, 2, 1) self._testWithLists( diags=np.array([_sample_diags, -_sample_diags]), rhs=np.array([[_sample_rhs, 2 * _sample_rhs], [3 * _sample_rhs, 4 * _sample_rhs]]), - expected=expected, + expected=np.array( + [[_sample_result, 2 * _sample_result], + [-3 * _sample_result, -4 * _sample_result]]).transpose(0, 2, 1), transpose_rhs=True) def testTransposeRhsWithRhsAsVector(self): ",0,test 31b0dc568f23966c8e5fc576a08825e5b039aca4,tensorflow/tensorflow,"tridiagonal_solve: Remove stale forward compatibility checks `forward_compatible(2019, 10, 18)` always evaluates to `True` so a bit of stale code can be removed.",linalg_impl.py,"@@ -20,7 +20,6 @@ from __future__ import print_function import numpy as np -from tensorflow.python.compat import compat from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -537,10 +536,7 @@ def _tridiagonal_solve_compact_format(diagonals, rhs, transpose_rhs, rhs = math_ops.conj(rhs) check_num_lhs_matches_num_rhs() - result = linalg_ops.tridiagonal_solve(diagonals, rhs, partial_pivoting, name) - if transpose_rhs and not compat.forward_compatible(2019, 10, 18): - return array_ops.matrix_transpose(result) - return result + return linalg_ops.tridiagonal_solve(diagonals, rhs, partial_pivoting, name) @tf_export('linalg.tridiagonal_matmul') ",0,test 23e9fc61dbd05621fa703630de66b373179a4de6,tensorflow/tensorflow,"Create a placer rule that co-locates edges of uncopyable types. ""Uncopyable"" is identified based on FullType, and typically includes some DT_VARIANT tensors like datasets, but might extend to some resources as well. PiperOrigin-RevId: 397989212 Change-Id: I9fc428a6a91b95b1c14855af4b9dd19707422270",colocation_graph.cc,"@@ -146,6 +146,25 @@ bool IsVariantWithUnsupportedDeviceCopy(const Node* node) { return is_mutex_lock_op || is_dataset_op; } +bool HasNoCopyReturns(const Node& node) { + if (!node.def().has_experimental_type()) { + return false; + } + const FullTypeDef& ft = node.def().experimental_type(); + DCHECK(ft.type_id() == TFT_PRODUCT) << ft.DebugString(); + + for (const auto& arg : ft.args()) { + switch (arg.type_id()) { + case TFT_DATASET: + return true; + default: + continue; + } + } + + return false; +} + } // namespace Status Member::SetParentAndSupportedDevices( @@ -717,6 +736,36 @@ Status ColocationGraph::ColocateResourceOrRefEdge(const Node* src, return Status::OK(); } +Status ColocationGraph::ColocateUncopiableTypeEdges( + std::unordered_set* inspection_required) { + for (const Edge* edge : graph_.edges()) { + if (edge->IsControlEdge()) { + continue; + } + Node* src = edge->src(); + Node* dst = edge->dst(); + bool needs_inspection; + TF_RETURN_IF_ERROR(inspection_required_checker_.IsPlacerInspectionRequired( + *src, &needs_inspection)); + if (needs_inspection) { + inspection_required->insert(src); + continue; + } + TF_RETURN_IF_ERROR(inspection_required_checker_.IsPlacerInspectionRequired( + *dst, &needs_inspection)); + if (needs_inspection) { + inspection_required->insert(dst); + continue; + } + + if (HasNoCopyReturns(*src)) { + TF_RETURN_IF_ERROR(ColocateResourceOrRefEdge(src, dst)); + } + } + + return Status::OK(); +} + Status ColocationGraph::ColocateResourceAndRefEdges( std::unordered_set* inspection_required) { // If `node` has an input edge with reference type, add an edge from the @@ -770,6 +819,7 @@ Status ColocationGraph::ColocateResourceAndRefEdges( namespace { // Returns tensor list element data type, if the node is one of the ops that // operate with TensorLists. Otherwise returns DT_INVALID. +// TODO(b/199443424): Don't use op names, use FullType here. DataType GetElementDataType(const Node& node) { static absl::flat_hash_set* tensor_list_ops = new absl::flat_hash_set( @@ -884,6 +934,7 @@ Status ColocationGraph::Initialize() { std::unordered_set inspection_required; TF_RETURN_IF_ERROR(ColocateResourceAndRefEdges(&inspection_required)); + TF_RETURN_IF_ERROR(ColocateUncopiableTypeEdges(&inspection_required)); TF_RETURN_IF_ERROR(AddHostOnlyDataTypesConstraints()); TF_RETURN_IF_ERROR(AddInspectionConstraints(inspection_required)); TF_RETURN_IF_ERROR(ColocateAllNodes()); ",0,train 23e9fc61dbd05621fa703630de66b373179a4de6,tensorflow/tensorflow,"Create a placer rule that co-locates edges of uncopyable types. ""Uncopyable"" is identified based on FullType, and typically includes some DT_VARIANT tensors like datasets, but might extend to some resources as well. PiperOrigin-RevId: 397989212 Change-Id: I9fc428a6a91b95b1c14855af4b9dd19707422270",colocation_graph.h,"@@ -278,12 +278,17 @@ class ColocationGraph { Status ColocateResourceOrRefEdge(const Node* src, const Node* dst); + // Adds colocation constraints to data types known not to support copying. + Status ColocateUncopiableTypeEdges( + std::unordered_set* inspection_required); + // Updates this ColocationGraph by making sure that all nodes // touching resource and/or ref tensors are colocated. // As it iterates over the edges, fills the `inspection_required` set with // the nodes that // PlacerInspectionRequiredOpChecker::IsPlacerInspectionRequired // deems as requiring deep inspection by placer. This is an optimization. + // TODO(mdan): Deprecate in favor of ColocateUncopiableTypeEdges. Status ColocateResourceAndRefEdges( std::unordered_set* inspection_required); ",0,train 23e9fc61dbd05621fa703630de66b373179a4de6,tensorflow/tensorflow,"Create a placer rule that co-locates edges of uncopyable types. ""Uncopyable"" is identified based on FullType, and typically includes some DT_VARIANT tensors like datasets, but might extend to some resources as well. PiperOrigin-RevId: 397989212 Change-Id: I9fc428a6a91b95b1c14855af4b9dd19707422270",placer.cc,"@@ -115,6 +115,10 @@ void LogDeviceAssignment(const Node* node, bool log_device_placement) { << ""("" << node->type_string() << ""): "" << node->assigned_device_name(); } + if (VLOG_IS_ON(1)) { + VLOG(1) << node->name() << ""("" << node->type_string() + << "") placed on: "" << node->assigned_device_name(); + } } Status AssignAndLog(int assigned_device, Node* node, @@ -211,6 +215,8 @@ Status Placer::Run() { *node); } + // TODO(mdan): This is a constrained optimization solver. Write it like one. + // Returns the first device in sorted devices list so we will always // choose the same device. // ",0,train 23e9fc61dbd05621fa703630de66b373179a4de6,tensorflow/tensorflow,"Create a placer rule that co-locates edges of uncopyable types. ""Uncopyable"" is identified based on FullType, and typically includes some DT_VARIANT tensors like datasets, but might extend to some resources as well. PiperOrigin-RevId: 397989212 Change-Id: I9fc428a6a91b95b1c14855af4b9dd19707422270",placer_test.cc,"@@ -201,6 +201,18 @@ REGISTER_KERNEL_BUILDER(Name(""TestXlaOp"").Device(""XLA_CPU"").Priority(2), REGISTER_KERNEL_BUILDER(Name(""TestXlaOp"").Device(""FakeCPU"").Priority(1), DummyOp); +// Op with no-copy type definition. +REGISTER_OP(""TestUncopiableTypeGeneratorCPU"") + .Output(""d: variant"") + .SetTypeConstructor(full_type::UnaryGeneric(TFT_DATASET)); +REGISTER_KERNEL_BUILDER( + Name(""TestUncopiableTypeGeneratorCPU"").Device(""FakeCPU""), DummyOp); + +// Op consuming a typed input. +REGISTER_OP(""TestTypedConsumer"").Input(""i: variant""); +REGISTER_KERNEL_BUILDER(Name(""TestTypedConsumer"").Device(""FakeCPU""), DummyOp); +REGISTER_KERNEL_BUILDER(Name(""TestTypedConsumer"").Device(""FakeGPU""), DummyOp); + //////////////////////////////////////////////////////////////////////////////// // // A PlacerTest method has three phases: @@ -762,6 +774,24 @@ TEST_F(PlacerTest, TestHeuristicGeneratorFollowsSingleConsumer) { EXPECT_COLOCATED(g, ""assign"", ""in""); } +TEST_F(PlacerTest, TestUncopiableTypeEdges) { + Graph g(OpRegistry::Global()); + + GraphDefBuilder b(GraphDefBuilder::kFailImmediately); + + // The producer can only be on the CPU. Without colocation constraints, + // the consumer would be placed on GPU, causing a copy. + Node* input = + ops::SourceOp(""TestUncopiableTypeGeneratorCPU"", b.opts().WithName(""ds"")); + ops::UnaryOp(""TestTypedConsumer"", ops::NodeOut(input, 0), + b.opts().WithName(""c"")); + + TF_EXPECT_OK(BuildGraph(b, &g)); + + TF_EXPECT_OK(Place(&g)); + EXPECT_COLOCATED(g, ""ds"", ""c""); +} + TEST_F(PlacerTest, TestIgnoreGeneratorHeuristicIfWrongDevice) { Graph g(OpRegistry::Global()); { // Scope for temporary variables used to construct g. ",0,train 1c0c9f2f6b7d6683c1aa16229ff9242c60ec760d,tensorflow/tensorflow,"Initialize context using memset PiperOrigin-RevId: 251821825",micro_interpreter.cc,"@@ -67,7 +67,8 @@ MicroInterpreter::MicroInterpreter(const Model* model, op_resolver_(op_resolver), tensor_allocator_(tensor_allocator), error_reporter_(error_reporter), - initialization_status_(kTfLiteOk) { + initialization_status_(kTfLiteOk), + context_() { auto* subgraphs = model->subgraphs(); if (subgraphs->size() != 1) { error_reporter->Report(""Only 1 subgraph is currently supported.\n""); @@ -82,23 +83,15 @@ MicroInterpreter::MicroInterpreter(const Model* model, context_.tensors = reinterpret_cast(tensor_allocator_->AllocateMemory( sizeof(TfLiteTensor) * context_.tensors_size, 4)); + context_.impl_ = static_cast(this); + context_.ReportError = ReportOpError; + context_.recommended_num_threads = 1; initialization_status_ = AllocateInputAndActTensors(); if (initialization_status_ != kTfLiteOk) { return; } - context_.impl_ = static_cast(this); - context_.GetExecutionPlan = nullptr; - context_.ResizeTensor = nullptr; - context_.ReportError = ReportOpError; - context_.AddTensors = nullptr; - context_.GetNodeAndRegistration = nullptr; - context_.ReplaceNodeSubsetsWithDelegateKernels = nullptr; - context_.recommended_num_threads = 1; - context_.GetExternalContext = nullptr; - context_.SetExternalContext = nullptr; - initialization_status_ = AllocateTemporaryTensors(); if (initialization_status_ != kTfLiteOk) { return; ",0,test 1c0c9f2f6b7d6683c1aa16229ff9242c60ec760d,tensorflow/tensorflow,"Initialize context using memset PiperOrigin-RevId: 251821825",micro_interpreter_test.cc,"@@ -20,6 +20,10 @@ limitations under the License. namespace tflite { namespace { void* MockInit(TfLiteContext* context, const char* buffer, size_t length) { + // We don't support delegate in TFL micro. This is a weak check to test if + // context struct being zero-initialized. + TF_LITE_MICRO_EXPECT_EQ(nullptr, + context->ReplaceNodeSubsetsWithDelegateKernels); // Do nothing. return nullptr; } ",0,test 5991a12e8c717f49caba2cc5673e6c72b0e301f5,tensorflow/tensorflow,"Switch away from deprecated tf.distribute.Strategy APIs to their supported replacements. PiperOrigin-RevId: 242605915",optimizer_v2.py,"@@ -916,7 +916,8 @@ class OptimizerV2(optimizer_v1.Optimizer): var_list = [v for _, v in grads_and_vars] grads_and_vars = zip(reduced_grads, var_list) - unwrapped_var_list = [x for v in var_list for x in distribution.unwrap(v)] + unwrapped_var_list = [ + x for v in var_list for x in distribution.experimental_local_results(v)] eager_execution = context.executing_eagerly() if eager_execution: # Give a clear error in this case instead of ""name not supported ",0,train 5991a12e8c717f49caba2cc5673e6c72b0e301f5,tensorflow/tensorflow,"Switch away from deprecated tf.distribute.Strategy APIs to their supported replacements. PiperOrigin-RevId: 242605915",tape.py,"@@ -66,7 +66,7 @@ def watch_variable(tape, variable): if context: variables = [strategy.extended.value_container(variable)] else: - variables = strategy.unwrap(variable) + variables = strategy.experimental_local_results(variable) for var in variables: pywrap_tensorflow.TFE_Py_TapeWatchVariable(tape._tape, var) # pylint: disable=protected-access @@ -82,7 +82,7 @@ def variable_accessed(variable): if context: variables = [strategy.extended.value_container(variable)] else: - variables = strategy.unwrap(variable) + variables = strategy.experimental_local_results(variable) for var in variables: pywrap_tensorflow.TFE_Py_TapeVariableAccessed(var) @@ -104,7 +104,7 @@ def variables_accessed(variables): else: for variable in variables: if variable.trainable: - accessed.extend(strategy.unwrap(variable)) + accessed.extend(strategy.experimental_local_results(variable)) for var in accessed: pywrap_tensorflow.TFE_Py_TapeVariableAccessed(var) ",0,train 5991a12e8c717f49caba2cc5673e6c72b0e301f5,tensorflow/tensorflow,"Switch away from deprecated tf.distribute.Strategy APIs to their supported replacements. PiperOrigin-RevId: 242605915",optimizer_v2.py,"@@ -560,7 +560,7 @@ class OptimizerV2(trackable.Trackable): else: initial_value = initializer strategy = distribute_ctx.get_strategy() - with strategy.colocate_vars_with(var): + with strategy.extended.colocate_vars_with(var): weight = tf_variables.Variable( name=""%s/%s"" % (var._shared_name, slot_name), # pylint: disable=protected-access dtype=var.dtype, ",0,train 5991a12e8c717f49caba2cc5673e6c72b0e301f5,tensorflow/tensorflow,"Switch away from deprecated tf.distribute.Strategy APIs to their supported replacements. PiperOrigin-RevId: 242605915",metrics_utils.py,"@@ -110,7 +110,8 @@ def result_wrapper(result_fn): def merge_fn_wrapper(distribution, merge_fn, *args): # We will get `PerDevice` merge function. Taking the first one as all # are identical copies of the function that we had passed below. - merged_result_fn = distribution.unwrap(merge_fn)[0](*args) + merged_result_fn = ( + distribution.experimental_local_results(merge_fn)[0](*args)) # Wrapping result in identity so that control dependency between # update_op from `update_state` and result works in case result returns ",0,train 5991a12e8c717f49caba2cc5673e6c72b0e301f5,tensorflow/tensorflow,"Switch away from deprecated tf.distribute.Strategy APIs to their supported replacements. PiperOrigin-RevId: 242605915",template_mirrored_strategy_test.py,"@@ -45,7 +45,8 @@ class TemplateMirroredStrategyTest(test.TestCase): temp = template.make_template(""my_template"", fn) strategy = mirrored_strategy.MirroredStrategy([""/cpu:0"", ""/gpu:0""]) - out = strategy.unwrap(strategy.experimental_run_v2(temp)) + out = strategy.experimental_local_results( + strategy.experimental_run_v2(temp)) self.evaluate(variables.global_variables_initializer()) self.assertAllEqual([42., 42.], self.evaluate(out)) ",0,train 915b02917db21de5a0ae304a067aedb0b5dd759d,tensorflow/tensorflow,"Adding a RecursivelyCreateDir function to the base environment. Removing functionality from the python library. Change: 132704774",env.cc,"@@ -137,6 +137,31 @@ Status Env::DeleteFile(const string& fname) { return fs->DeleteFile(fname); } +Status Env::RecursivelyCreateDir(const string& dirname) { + FileSystem* fs; + TF_RETURN_IF_ERROR(GetFileSystemForFile(dirname, &fs)); + std::vector sub_dirs; + StringPiece remaining_dir(dirname); + while (!fs->FileExists(remaining_dir.ToString())) { + // Basename returns """" for / ending dirs. + if (!remaining_dir.ends_with(""/"")) { + sub_dirs.push_back(io::Basename(remaining_dir)); + } + remaining_dir = io::Dirname(remaining_dir); + } + + // sub_dirs contains all the dirs to be created but in reverse order. + std::reverse(sub_dirs.begin(), sub_dirs.end()); + + // Now create the directories. + string built_path = remaining_dir.ToString(); + for (const StringPiece sub_dir : sub_dirs) { + built_path = io::JoinPath(built_path, sub_dir); + TF_RETURN_IF_ERROR(fs->CreateDir(built_path)); + } + return Status::OK(); +} + Status Env::CreateDir(const string& dirname) { FileSystem* fs; TF_RETURN_IF_ERROR(GetFileSystemForFile(dirname, &fs)); ",0,train 915b02917db21de5a0ae304a067aedb0b5dd759d,tensorflow/tensorflow,"Adding a RecursivelyCreateDir function to the base environment. Removing functionality from the python library. Change: 132704774",env.h,"@@ -158,7 +158,17 @@ class Env { Status DeleteRecursively(const string& dirname, int64* undeleted_files, int64* undeleted_dirs); - /// Creates the specified directory. + /// \brief Creates the specified directory and all the necessary + /// subdirectories. Typical return codes. + /// * OK - successfully created the directory and sub directories, even if + /// they were already created. + /// * PERMISSION_DENIED - dirname or some subdirectory is not writable. + Status RecursivelyCreateDir(const string& dirname); + + /// \brief Creates the specified directory. Typical return codes + /// * OK - successfully created the directory. + /// * ALREADY_EXISTS - directory already exists. + /// * PERMISSION_DENIED - dirname is not writable. Status CreateDir(const string& dirname); /// Deletes the specified directory. ",0,train 915b02917db21de5a0ae304a067aedb0b5dd759d,tensorflow/tensorflow,"Adding a RecursivelyCreateDir function to the base environment. Removing functionality from the python library. Change: 132704774",env_test.cc,"@@ -128,6 +128,41 @@ TEST(EnvTest, DeleteRecursivelyFail) { EXPECT_EQ(1, undeleted_dirs); } +TEST(EnvTest, RecursivelyCreateDir) { + Env* env = Env::Default(); + const string create_path = io::JoinPath(testing::TmpDir(), ""a/b/c/d""); + TF_CHECK_OK(env->RecursivelyCreateDir(create_path)); + TF_CHECK_OK(env->RecursivelyCreateDir(create_path)); // repeat creation. + EXPECT_TRUE(env->FileExists(create_path)); + + // Clean up. + // TODO(rohanj): Do this more elegantly using SetUp() and TearDown() methods. + int64 undeleted_files, undeleted_dirs; + TF_CHECK_OK(env->DeleteRecursively(io::JoinPath(testing::TmpDir(), ""a""), + &undeleted_files, &undeleted_dirs)); +} + +TEST(EnvTest, RecursivelyCreateDirSubdirsExist) { + Env* env = Env::Default(); + // First create a/b. + const string subdir_path = io::JoinPath(testing::TmpDir(), ""a/b""); + TF_CHECK_OK(env->CreateDir(io::JoinPath(testing::TmpDir(), ""a""))); + TF_CHECK_OK(env->CreateDir(subdir_path)); + EXPECT_TRUE(env->FileExists(subdir_path)); + + // Now try to recursively create a/b/c/d/ + const string create_path = io::JoinPath(testing::TmpDir(), ""a/b/c/d/""); + TF_CHECK_OK(env->RecursivelyCreateDir(create_path)); + TF_CHECK_OK(env->RecursivelyCreateDir(create_path)); // repeat creation. + EXPECT_TRUE(env->FileExists(create_path)); + EXPECT_TRUE(env->FileExists(io::JoinPath(testing::TmpDir(), ""a/b/c""))); + + // Clean up. + int64 undeleted_files, undeleted_dirs; + TF_CHECK_OK(env->DeleteRecursively(io::JoinPath(testing::TmpDir(), ""a""), + &undeleted_files, &undeleted_dirs)); +} + TEST(EnvTest, LocalFileSystem) { // Test filename with file:// syntax. Env* env = Env::Default(); ",0,train 915b02917db21de5a0ae304a067aedb0b5dd759d,tensorflow/tensorflow,"Adding a RecursivelyCreateDir function to the base environment. Removing functionality from the python library. Change: 132704774",file_io.py,"@@ -257,11 +257,7 @@ def recursive_create_dir(dirname): errors.OpError: If the operation fails. """""" with errors.raise_exception_on_not_ok_status() as status: - dirs = compat.as_str_any(dirname).split(""/"") - for i in range(len(dirs)): - partial_dir = ""/"".join(dirs[0:i + 1]) - if partial_dir and not file_exists(partial_dir): - pywrap_tensorflow.CreateDir(compat.as_bytes(partial_dir), status) + pywrap_tensorflow.RecursivelyCreateDir(compat.as_bytes(dirname), status) def copy(oldpath, newpath, overwrite=False): ",0,train 915b02917db21de5a0ae304a067aedb0b5dd759d,tensorflow/tensorflow,"Adding a RecursivelyCreateDir function to the base environment. Removing functionality from the python library. Change: 132704774",file_io_test.py,"@@ -95,6 +95,7 @@ class FileIoTest(tf.test.TestCase): def testCreateRecursiveDir(self): dir_path = os.path.join(self._base_dir, ""temp_dir/temp_dir1/temp_dir2"") file_io.recursive_create_dir(dir_path) + file_io.recursive_create_dir(dir_path) # repeat creation file_path = os.path.join(dir_path, ""temp_file"") file_io.FileIO(file_path, mode=""w"").write(""testing"") self.assertTrue(file_io.file_exists(file_path)) ",0,train 6dfb912e1f9735f0f8a151272a741780e34e7a74,tensorflow/tensorflow,"[XLA:SPMD] Make offset calculation faster. It was quadratic time before. PiperOrigin-RevId: 327827558 Change-Id: Ib50d2b567e0458b5d2146ba3d3b1006050f3d06f",spmd_partitioner_test.cc,"@@ -138,8 +138,7 @@ ENTRY entry { op::AllReduce(op::Select( op::Broadcast(op::Compare(op::PartitionId(), op::Constant())), op::Constant(), op::Broadcast())), - op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId(), - op::Constant())), + op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId())), op::Constant())), op::Shape(""s32[1,3]""))); } @@ -161,8 +160,7 @@ ENTRY entry { op::Copy(op::AllReduce(AllOf( op::DynamicUpdateSlice( op::Broadcast(), AllOf(op::Constant(), op::Shape(""s32[1,3]"")), - op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId(), - op::Constant())), + op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId())), op::Constant()), op::Shape(""s32[2,3]""))))); } @@ -184,8 +182,7 @@ ENTRY entry { op::Copy(op::Copy(op::AllReduce(AllOf( op::DynamicUpdateSlice( op::Broadcast(), AllOf(op::Constant(), op::Shape(""s32[1,3]"")), - op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId(), - op::Constant())), + op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId())), op::Constant()), op::Shape(""s32[2,3]"")))))); } @@ -279,8 +276,8 @@ ENTRY entry { HloInstruction* root = module->entry_computation()->root_instruction(); ASSERT_THAT(root, op::Tuple()); - auto offset = op::Reshape( - op::DynamicSlice(op::Constant(), op::PartitionId(), op::Constant())); + auto offset = + op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId())); EXPECT_THAT(root->operand(0), op::DynamicSlice(op::GetTupleElement(op::Parameter()), offset, @@ -305,13 +302,13 @@ ENTRY entry { PartitionComputation(hlo_string, /*num_devices=*/2)); HloInstruction* root = module->entry_computation()->root_instruction(); EXPECT_THAT( - root, op::Copy(op::AllReduce(op::DynamicUpdateSlice( - op::Broadcast(), - op::GetTupleElement( - AllOf(op::Infeed(), op::Shape(""(f32[4,2]{1,0}, token[])""))), - op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId(), - op::Constant())), - op::Constant())))); + root, + op::Copy(op::AllReduce(op::DynamicUpdateSlice( + op::Broadcast(), + op::GetTupleElement( + AllOf(op::Infeed(), op::Shape(""(f32[4,2]{1,0}, token[])""))), + op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId())), + op::Constant())))); } TEST_F(SpmdPartitioningTest, UnevenTiledInfeed) { @@ -3956,8 +3953,8 @@ ENTRY entry { TF_ASSERT_OK_AND_ASSIGN(auto module, PartitionComputation(hlo_string, /*num_devices=*/2)); VLOG(1) << module->ToString(); - auto offset = op::Reshape( - op::DynamicSlice(op::Constant(), op::PartitionId(), op::Constant())); + auto offset = + op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId())); auto min = AllOf(op::Broadcast(offset), op::Shape(""s32[2,3]"")); auto max = AllOf(op::Broadcast(op::Add(offset, op::Constant())), op::Shape(""s32[2,3]"")); @@ -4093,8 +4090,8 @@ ENTRY entry { TF_ASSERT_OK_AND_ASSIGN(auto module, PartitionComputation(hlo_string, /*num_devices=*/2)); VLOG(1) << module->ToString(); - auto offset = op::Reshape( - op::DynamicSlice(op::Constant(), op::PartitionId(), op::Constant())); + auto offset = + op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId())); auto indices = op::Subtract( op::Parameter(1), AllOf(op::Broadcast(offset), op::Shape(""s32[2,3]""))); HloInstruction* root = module->entry_computation()->root_instruction(); ",0,train 6dfb912e1f9735f0f8a151272a741780e34e7a74,tensorflow/tensorflow,"[XLA:SPMD] Make offset calculation faster. It was quadratic time before. PiperOrigin-RevId: 327827558 Change-Id: Ib50d2b567e0458b5d2146ba3d3b1006050f3d06f",spmd_partitioner_util.cc,"@@ -29,6 +29,7 @@ limitations under the License. #include ""tensorflow/compiler/xla/service/hlo_instruction.h"" #include ""tensorflow/compiler/xla/service/hlo_instructions.h"" #include ""tensorflow/compiler/xla/service/hlo_module.h"" +#include ""tensorflow/compiler/xla/service/hlo_opcode.h"" #include ""tensorflow/compiler/xla/service/hlo_sharding.h"" #include ""tensorflow/compiler/xla/service/hlo_sharding_util.h"" #include ""tensorflow/compiler/xla/service/pattern_matcher.h"" @@ -202,13 +203,17 @@ std::vector MakePartitionOffsets( absl::Span dims) { CHECK(!shape.IsTuple()); - Array2D offset_array( - {sharding.tile_assignment().num_elements(), shape.rank()}); - offset_array.Each([&](int64 i, int64 j, int32* value) { - *value = sharding.TileOffsetForDevice(shape, i)[j]; - }); - auto offset_table = b->AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR2FromArray2D(offset_array))); + std::vector> offset_arrays(shape.rank()); + for (int64 i = 0; i < shape.rank(); ++i) { + offset_arrays[i].resize(sharding.tile_assignment().num_elements()); + } + auto shard_shape = MakePartitionedShape(shape, sharding); + sharding.tile_assignment().Each( + [&](absl::Span indices, int64 device) { + for (int64 i = 0; i < shape.rank(); ++i) { + offset_arrays[i][device] = indices[i] * shard_shape.dimensions(i); + } + }); std::vector offsets; for (int64 i = 0; i < shape.rank(); ++i) { if (sharding.tile_assignment().dim(i) == 1 || @@ -216,11 +221,10 @@ std::vector MakePartitionOffsets( offsets.push_back(b->AddInstruction( HloInstruction::CreateConstant(LiteralUtil::Zero(S32)))); } else { + auto offset_table = b->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR1(offset_arrays[i]))); auto index = b->AddInstruction(HloInstruction::CreateDynamicSlice( - ShapeUtil::MakeShape(S32, {1, 1}), offset_table, - {partition_id, b->AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR0(i)))}, - {1, 1})); + ShapeUtil::MakeShape(S32, {1}), offset_table, {partition_id}, {1})); offsets.push_back(b->AddInstruction( HloInstruction::CreateReshape(ShapeUtil::MakeShape(S32, {}), index))); } ",0,train a6976d4034fae34128a93ea23129760eae63980c,tensorflow/tensorflow,"Improve TF-TRT logging: - set the engine name (without scope) before converting the segment to a GraaphDef, so we can print it out and know which graph is being operated on. - format the output of segment nodes in segment.cc PiperOrigin-RevId: 238043350",convert_graph.cc,"@@ -331,9 +331,13 @@ Status GetEngineInfo(const Graph* g, // Construct the const nodes first. subgraph_nodes.insert(subgraph_nodes.begin(), added_const_nodes.begin(), added_const_nodes.end()); + string scope_name; TF_RETURN_IF_ERROR(ConvertSegmentToGraphDef( g, graph_properties, subgraph_nodes, &info->connections, - &info->segment_graph_def, &info->engine_name)); + &info->segment_graph_def, &scope_name)); + info->engine_name = StrCat(scope_name, info->engine_name); + VLOG(1) << ""Converted TensorRT candidate segment '"" << info->engine_name + << ""' to a GraphDef""; // TODO(sami): This should not happen once segmenter is updated. if (segment_devices.size() == 1) { info->device = *segment_devices.begin(); @@ -492,8 +496,7 @@ Status CreateTRTNode(const ConversionParams& params, // these segments. if (inputs.empty()) { return errors::Internal( - ""Segment has no inputs (possible "" - ""constfold failure)""); + ""Segment has no inputs (possible constfold failure)""); } const bool calibrate_int8 = @@ -839,6 +842,7 @@ Status ConvertAfterShapes(const ConversionParams& params) { for (size_t t = 0; t < initial_segments.size(); t++) { auto& curr_segment = initial_segments.at(t); EngineInfo curr_engine; + curr_engine.engine_name = StrCat(""TRTEngineOp_"", t); Status status = GetEngineInfo(&graph, *params.graph_properties, curr_segment.first, node_map, reverse_topo_order, &curr_engine); @@ -854,7 +858,6 @@ Status ConvertAfterShapes(const ConversionParams& params) { curr_engine.use_calibration = params.use_calibration; curr_engine.cached_engine_batches = params.cached_engine_batches; curr_engine.maximum_cached_engines = params.max_cached_engines; - StrAppend(&curr_engine.engine_name, ""TRTEngineOp_"", t); if (params.use_function_backup) { status = RegisterSegmentFunctionToFunctionLibrary( &graph, curr_engine.segment_graph_def, curr_engine.engine_name); ",0,train a6976d4034fae34128a93ea23129760eae63980c,tensorflow/tensorflow,"Improve TF-TRT logging: - set the engine name (without scope) before converting the segment to a GraaphDef, so we can print it out and know which graph is being operated on. - format the output of segment nodes in segment.cc PiperOrigin-RevId: 238043350",convert_nodes.cc,"@@ -4148,7 +4148,7 @@ Status ConvertSegmentToGraphDef( const Graph* graph, const grappler::GraphProperties& graph_properties, const std::vector& subgraph_nodes, // In topological order std::vector* connections, GraphDef* segment_def, - string* common_scope) { + string* scope_name) { std::set marker_nodes; // Update connection shapes/data types and add corresponding input/output // nodes in the segment graphdef. @@ -4281,9 +4281,7 @@ Status ConvertSegmentToGraphDef( snode->mutable_input()->RemoveLast(); } } - *common_scope = local_scope; - VLOG(1) << ""Converted TensorRT candidate segment @scope '"" << local_scope - << ""' to a GraphDef""; + *scope_name = local_scope; return Status::OK(); } ",0,train a6976d4034fae34128a93ea23129760eae63980c,tensorflow/tensorflow,"Improve TF-TRT logging: - set the engine name (without scope) before converting the segment to a GraaphDef, so we can print it out and know which graph is being operated on. - format the output of segment nodes in segment.cc PiperOrigin-RevId: 238043350",convert_nodes.h,"@@ -123,13 +123,14 @@ struct EngineInfo { // topological order. // - segment_def: the output GraphDef, whose non-input/output nodedefs will be // sorted in topological order. +// - scope_name: the name of the scope where the TRTEngineOp will be placed. // // TODO(aaroey): add tests to validate these properties. Status ConvertSegmentToGraphDef( const Graph* graph, const grappler::GraphProperties& graph_properties, const std::vector& subgraph_nodes, std::vector* connections, GraphDef* segment_def, - string* common_scope); + string* scope_name); // Converts given subgraph to a TRT engine saved in 'engine'. Returns ok iff // 'builder' successfully build the engine. If the result is not ok, 'engine' ",0,train a6976d4034fae34128a93ea23129760eae63980c,tensorflow/tensorflow,"Improve TF-TRT logging: - set the engine name (without scope) before converting the segment to a GraaphDef, so we can print it out and know which graph is being operated on. - format the output of segment nodes in segment.cc PiperOrigin-RevId: 238043350",segment.cc,"@@ -668,10 +668,13 @@ Status SegmentGraph(const Graph* tf_graph, const string& segment_root = itr.first; // Return format does not require set comparator. std::set segment_nodes(itr.second.begin(), itr.second.end()); - if (VLOG_IS_ON(1)) { - string s = ""parent="" + segment_root + "":""; - for (auto node : segment_nodes) s += "" "" + node->name(); - VLOG(1) << ""Segment "" << segments->size() << "": "" << s; + if (VLOG_IS_ON(1) && !segment_nodes.empty()) { + string s; + for (auto node : segment_nodes) { + StrAppend(&s, ""\n[Op type: "", node->type_string(), ""] "", node->name()); + } + VLOG(1) << ""Nodes in segment "" << segments->size() + << "" with parent="" << segment_root << "":"" << s; } // Don't use small segments. ",0,train d9a9a0a2ba89bbbdab5bb232f2d80534ccd3706c,tensorflow/tensorflow,"Add HLO canonicalize patterns. Add a pattern to convert some dynamic slice operations into slice operations. Add test cases. PiperOrigin-RevId: 274858314 Change-Id: I3135db7601944186bdd8c6064479070862f62336",hlo_ops.cc,"@@ -80,6 +80,38 @@ static LogicalResult Verify(T op) { return success(); } +namespace { + +//===----------------------------------------------------------------------===// +// Utilities for the canonicalize patterns +//===----------------------------------------------------------------------===// + +// Returns 1D 64-bit dense elements attribute with the given values. +DenseIntElementsAttr GetI64ElementsAttr(ArrayRef values, + Builder* builder) { + RankedTensorType ty = builder->getTensorType( + {static_cast(values.size())}, builder->getIntegerType(64)); + return DenseElementsAttr::get(ty, values) + .cast(); +} + +// Given the start indices and slice sizes for a dynamic-slice that can be +// converted to a static slice, returns the limits for the static slice. +DenseIntElementsAttr BuildSliceLimits(DenseIntElementsAttr start_indices, + DenseIntElementsAttr slice_sizes, + Builder* builder) { + SmallVector slice_limits; + for (int64_t i = 0; i < slice_sizes.getNumElements(); ++i) { + int64_t start_index = start_indices.getValue(i).getInt(); + int64_t slice_size = slice_sizes.getValue(i).getInt(); + slice_limits.push_back(start_index + slice_size); + } + return GetI64ElementsAttr(slice_limits, builder); +} + +#include ""tensorflow/compiler/mlir/xla/transforms/generated_canonicalize.inc"" +} // namespace + //===----------------------------------------------------------------------===// // ConstOp //===----------------------------------------------------------------------===// @@ -468,6 +500,15 @@ static LogicalResult Verify(ConcatenateOp op) { return success(); } +//===----------------------------------------------------------------------===// +// DynamicSliceOp +//===----------------------------------------------------------------------===// + +void DynamicSliceOp::getCanonicalizationPatterns( + OwningRewritePatternList& results, MLIRContext* context) { + results.insert(context); +} + //===----------------------------------------------------------------------===// // ReshapeOp //===----------------------------------------------------------------------===// ",0,train 238424ffcf04c38561ed48ebadb16b3b3a770e2e,tensorflow/tensorflow,"[XLA:TF] Re-disable testRandomUniformIsInRange The bug is still there and makes this test flakily fail with fp16. PiperOrigin-RevId: 213669453",random_ops_test.py,"@@ -76,7 +76,8 @@ class RandomOpsTest(xla_test.XLATestCase): for dtype in self._random_types(): # TODO (b/112272078): enable bfloat16 for CPU and GPU when the bug is # fixed. - if (self.device in [""XLA_GPU"", ""XLA_CPU""]) and (dtype == dtypes.bfloat16): + if (self.device in [""XLA_GPU"", ""XLA_CPU"" + ]) and (dtype in [dtypes.bfloat16, dtypes.half]): continue with self.cached_session() as sess: with self.test_scope(): ",0,test d5a18ab07beda13db1b7cc5bea5f8d6c2e33303d,tensorflow/tensorflow,"Remove PersistentTensor from softmax_op.cc PiperOrigin-RevId: 371825789 Change-Id: I007d72c934208e7e581f5e7ee671b3dbd8274c06",softmax_op.cc,"@@ -165,20 +165,19 @@ class CSRSoftmaxGradOp : public OpKernel { // tensor is the elementwise product of some function with the // softmax value, it will keep the sparsity structure of the softmax. const int total_nnz = softmax_matrix->total_nnz(); - PersistentTensor gradient_values_pt; - Tensor* gradient_values_t; - OP_REQUIRES_OK(ctx, ctx->allocate_persistent( - DataTypeToEnum::value, TensorShape({total_nnz}), - &gradient_values_pt, &gradient_values_t)); + Tensor gradient_values; + OP_REQUIRES_OK( + ctx, ctx->allocate_temp(DataTypeToEnum::value, + TensorShape({total_nnz}), &gradient_values)); CSRSparseMatrix gradient_matrix; OP_REQUIRES_OK( - ctx, CSRSparseMatrix::CreateCSRSparseMatrix( - DataTypeToEnum::value, dense_shape_t, - softmax_matrix->batch_pointers(), - softmax_matrix->row_pointers(), softmax_matrix->col_indices(), - *gradient_values_t, &gradient_matrix)); + ctx, + CSRSparseMatrix::CreateCSRSparseMatrix( + DataTypeToEnum::value, dense_shape_t, + softmax_matrix->batch_pointers(), softmax_matrix->row_pointers(), + softmax_matrix->col_indices(), gradient_values, &gradient_matrix)); if (total_nnz > 0) { functor::CSRSparseMatrixSoftmaxGrad softmax_grad; ",0,train 5c135e7a7d48ea650fe0786670cc0560f1793b2b,tensorflow/tensorflow,"Fix the existing docstrings of array_ops and init_ops_v2. PiperOrigin-RevId: 267168146",array_ops.py,"@@ -1343,38 +1343,34 @@ def concat(values, axis, name=""concat""): For example: - ```python - t1 = [[1, 2, 3], [4, 5, 6]] - t2 = [[7, 8, 9], [10, 11, 12]] - tf.concat([t1, t2], 0) # [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] - tf.concat([t1, t2], 1) # [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]] - - # tensor t3 with shape [2, 3] - # tensor t4 with shape [2, 3] - tf.shape(tf.concat([t3, t4], 0)) # [4, 3] - tf.shape(tf.concat([t3, t4], 1)) # [2, 6] - ``` + >>> t1 = [[1, 2, 3], [4, 5, 6]] + >>> t2 = [[7, 8, 9], [10, 11, 12]] + >>> concat([t1, t2], 0) + + + >>> concat([t1, t2], 1) + + As in Python, the `axis` could also be negative numbers. Negative `axis` are interpreted as counting from the end of the rank, i.e., `axis + rank(values)`-th dimension. For example: - ```python - t1 = [[[1, 2], [2, 3]], [[4, 4], [5, 3]]] - t2 = [[[7, 4], [8, 4]], [[2, 10], [15, 11]]] - tf.concat([t1, t2], -1) - ``` - - would produce: - - ```python - [[[ 1, 2, 7, 4], - [ 2, 3, 8, 4]], - - [[ 4, 4, 2, 10], - [ 5, 3, 15, 11]]] - ``` + >>> t1 = [[[1, 2], [2, 3]], [[4, 4], [5, 3]]] + >>> t2 = [[[7, 4], [8, 4]], [[2, 10], [15, 11]]] + >>> tf.concat([t1, t2], -1) + Note: If you are concatenating along a new axis consider using stack. E.g. @@ -4810,14 +4806,20 @@ def repeat_with_axis(data, repeats, axis, name=None): A tensor with `max(N, 1)` dimensions. Has the same shape as `data`, except that dimension `axis` has size `sum(repeats)`. #### Examples: - ```python >>> repeat(['a', 'b', 'c'], repeats=[3, 0, 2], axis=0) - ['a', 'a', 'a', 'c', 'c'] + >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=0) - [[1, 2], [1, 2], [3, 4], [3, 4], [3, 4]] + >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=1) - [[1, 1, 2, 2, 2], [3, 3, 4, 4, 4]] - ``` + """""" if not isinstance(axis, int): raise TypeError(""axis must be an int; got %s"" % type(axis).__name__) @@ -4916,7 +4918,7 @@ def _with_nonzero_rank(data): @tf_export(""repeat"") def repeat(input, repeats, axis=None, name=None): # pylint: disable=redefined-builtin - """"""Repeat elements of `input` + """"""Repeat elements of `input`. Args: input: An `N`-dimensional Tensor. @@ -4932,18 +4934,31 @@ def repeat(input, repeats, axis=None, name=None): # pylint: disable=redefined-b If axis is None then the output array is flattened to match the flattened input array. #### Examples: - ```python + >>> repeat(['a', 'b', 'c'], repeats=[3, 0, 2], axis=0) - ['a', 'a', 'a', 'c', 'c'] + + >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=0) - [[1, 2], [1, 2], [3, 4], [3, 4], [3, 4]] + + >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=1) - [[1, 1, 2, 2, 2], [3, 3, 4, 4, 4]] + + >>> repeat(3, repeats=4) - [3, 3, 3, 3] + + >>> repeat([[1,2], [3,4]], repeats=2) - [1, 1, 2, 2, 3, 3, 4, 4] - ``` + """""" if axis is None: input = reshape(input, [-1]) ",0,test 5c135e7a7d48ea650fe0786670cc0560f1793b2b,tensorflow/tensorflow,"Fix the existing docstrings of array_ops and init_ops_v2. PiperOrigin-RevId: 267168146",init_ops_v2.py,"@@ -149,12 +149,8 @@ class Constant(Initializer): of the `value` list, even reshaped, as shown in the two commented lines below the `value` list initialization. - ```python >>> value = [0, 1, 2, 3, 4, 5, 6, 7] - >>> # value = np.array(value) - >>> # value = value.reshape([2, 4]) >>> init = tf.compat.v1.constant_initializer(value) - >>> >>> # Fitting shape >>> with tf.compat.v1.Session(): ... x = tf.compat.v1.get_variable('x', shape=[2, 4], initializer=init) @@ -164,18 +160,19 @@ class Constant(Initializer): [4. 5. 6. 7.]] >>> # Larger shape >>> with tf.compat.v1.Session(): - ... x = tf.compat.v1.get_variable('x', shape=[3, 4], initializer=init) - ... x.initializer.run() - ... print(x.eval()) - [[ 0. 1. 2. 3.] - [ 4. 5. 6. 7.] - [ 7. 7. 7. 7.]] + ... y = tf.compat.v1.get_variable('y', shape=[3, 4], initializer=init) + ... y.initializer.run() + ... print(y.eval()) + [[0. 1. 2. 3.] + [4. 5. 6. 7.] + [7. 7. 7. 7.]] >>> # Smaller shape >>> with tf.compat.v1.Session(): - ... x = tf.compat.v1.get_variable('x', shape=[2, 3], initializer=init) + ... z = tf.compat.v1.get_variable('z', shape=[2, 3], initializer=init) + Traceback (most recent call last): + ... ValueError: Too many elements provided. Needed at most 6, but received 8 - ``` """""" def __init__(self, value=0): ",0,test d7b9d9a60fcbd6a0d294f4886793f54b381a7145,tensorflow/tensorflow,"Expose configuration options for the LhloFuseLinalg pass. These were available as pass options but not when constructing the pass using `createLhloFuseLinalg`. PiperOrigin-RevId: 297550513 Change-Id: Ie0ce3bf3ea1f2cf40dd77eeec53eb121d2785b65",lhlo_fuse_linalg.cc,"@@ -22,6 +22,7 @@ limitations under the License. #include ""mlir/Dialect/Linalg/Utils/Utils.h"" // TF:llvm-project #include ""mlir/Pass/Pass.h"" // TF:llvm-project #include ""mlir/Transforms/FoldUtils.h"" // TF:llvm-project +#include ""tensorflow/compiler/mlir/xla/transforms/passes.h"" namespace mlir { namespace xla_lhlo { @@ -123,8 +124,9 @@ class LhloFuseLinalg : public FunctionPass { } // namespace -std::unique_ptr> createLhloFuseLinalg() { - return absl::make_unique(); +std::unique_ptr> createLhloFuseLinalg( + bool use_parallel_loops, ArrayRef tile_sizes) { + return absl::make_unique(use_parallel_loops, tile_sizes); } static PassRegistration legalize_pass( ",0,train d7b9d9a60fcbd6a0d294f4886793f54b381a7145,tensorflow/tensorflow,"Expose configuration options for the LhloFuseLinalg pass. These were available as pass options but not when constructing the pass using `createLhloFuseLinalg`. PiperOrigin-RevId: 297550513 Change-Id: Ie0ce3bf3ea1f2cf40dd77eeec53eb121d2785b65",passes.h,"@@ -18,6 +18,7 @@ limitations under the License. #include +#include ""llvm/ADT/ArrayRef.h"" #include ""mlir/IR/MLIRContext.h"" // TF:llvm-project #include ""mlir/Support/LogicalResult.h"" // TF:llvm-project @@ -76,8 +77,17 @@ std::unique_ptr> createLegalizeLhloToLinalgPass(); // Lowers from LHLO dialect to GPU dialect. std::unique_ptr> createLegalizeToGpuPass(); -// Fuses linalg ops obtained after LHLO lowering. -std::unique_ptr> createLhloFuseLinalg(); +// Fuses linalg ops obtained after LHLO lowering. To enable fusion, +// operations are first tiled. +// +// When 'use_parallel_loops' is set, the tiling will use loop.parallel +// operations. Otherwise, loop.for operations are used. +// +// 'tile_sizes' provides the tile sizes to use for tiling. If the linalg +// operation has more dimensions than tile sizes provided, 1 is used as +// default. +std::unique_ptr> createLhloFuseLinalg( + bool use_parallel_loops = false, ArrayRef tile_sizes = {}); } // namespace xla_lhlo } // namespace mlir ",0,train 1127ae0a91fcee00d2931ef142f0ac2c63bdc7be,tensorflow/tensorflow,Resolved description,lite.py,"@@ -308,8 +308,9 @@ class TFLiteConverterV2(TFLiteConverterBase): to apply when converting the model. E.g. `[Optimize.DEFAULT]` representative_dataset: A representative dataset that can be used to generate input and output samples for the model. The converter can use the - dataset to evaluate different optimizations. Note that this is a necessary - attribute since the conversion optimization depends upon it. + dataset to evaluate different optimizations. Note that this is an optional + attribute but it is necessary if INT8 is the only support builtin ops in + target ops. experimental_new_converter: Experimental flag, subject to change. Enables MLIR-based conversion instead of TOCO conversion. experimental_new_quantizer: Experimental flag, subject to change. ",0,train 8ae2eecf4f70b5efb55a108f2b0000ea6cad3e05,tensorflow/tensorflow,"Add a broadcasting test case with rank 6. With broadcast shape rank minimization, this test can be computed with broadcast rank specialization of rank 3. PiperOrigin-RevId: 360155936 Change-Id: I6f359af5ae7204ff6446216ce5172d69f43a1ac2",base_binary_ops_test.h,"@@ -309,6 +309,41 @@ class BinaryOpsTestBase : public OpsTestBase { expected_output, config); } + template + void TestBroadcastingRank6(const std::string& op_name, + const absl::InlinedVector& lhs_input, + const absl::InlinedVector& rhs_input, + BaselineOutT (*baseline_callback)(BaselineT, + BaselineT), + const test::OpsTestConfig& config) { + // Prepare inputs. + TensorShape lhs_shape{1, 2, 3, 1, 2, 1}; + TensorShape rhs_shape{1, 1, 1, 2, 3}; + auto repeated_lhs_input = + test::RepeatInputToMatchShape(lhs_input, lhs_shape.num_elements()); + auto repeated_rhs_input = + test::RepeatInputToMatchShape(rhs_input, rhs_shape.num_elements()); + + // Compute expected results. + TensorShape expected_shape{1, 2, 3, 1, 2, 3}; + std::vector lhs_indices = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, + 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, + 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11}; + std::vector rhs_indices = { + 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, + 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, + }; + auto expected_output = + ComputeExpectedOutput( + lhs_indices, repeated_lhs_input, rhs_indices, repeated_rhs_input, + baseline_callback); + + RunAndExpectResult(op_name, lhs_shape, repeated_lhs_input, + rhs_shape, repeated_rhs_input, expected_shape, + expected_output, config); + } + template void TestEmptyShapeBroadcasting(const std::string& op_name, @@ -392,6 +427,11 @@ class BinaryOpsTestBase : public OpsTestBase { #op_name, lhs_input, rhs_input, baseline_callback, config); \ } \ \ + TEST_F(BinaryOpsTest, op_name##BroadcastingRank6##test_name) { \ + TestBroadcastingRank6( \ + #op_name, lhs_input, rhs_input, baseline_callback, config); \ + } \ + \ TEST_F(BinaryOpsTest, op_name##EmptyShapeBroadcasting##test_name) { \ TestEmptyShapeBroadcasting( \ #op_name, lhs_input, rhs_input, config); \ ",0,train 7a24845e237f42d3f0bc6ab031ee96e7ef896800,tensorflow/tensorflow,fixes file loading mechanism in datasets,boston_housing.py,"@@ -45,10 +45,9 @@ def load_data(path='boston_housing.npz', test_split=0.2, seed=113): origin=origin_folder + 'boston_housing.npz', file_hash= 'f553886a1f8d56431e820c5b82552d9d95cfcb96d1e678153f8839538947dff5') - f = np.load(path) - x = f['x'] - y = f['y'] - f.close() + with np.load(path) as f: + x = f['x'] + y = f['y'] np.random.seed(seed) indices = np.arange(len(x)) ",0,test 7a24845e237f42d3f0bc6ab031ee96e7ef896800,tensorflow/tensorflow,fixes file loading mechanism in datasets,mnist.py,"@@ -51,4 +51,4 @@ def load_data(path='mnist.npz'): x_train, y_train = f['x_train'], f['y_train'] x_test, y_test = f['x_test'], f['y_test'] - return (x_train, y_train), (x_test, y_test) + return (x_train, y_train), (x_test, y_test) ",0,test fdd01eb06b2f01ca9db3fdde528aba6fffd8079b,tensorflow/tensorflow,"Correctly set execution mode on the context, not the module PiperOrigin-RevId: 235634263",core_test.py,"@@ -311,7 +311,7 @@ class TFETest(test_util.TensorFlowTestCase): three.dtype.as_datatype_enum)) context.async_wait() context.async_clear_error() - context.execution_mode = context.SYNC + context.context().execution_mode = context.SYNC def testExecuteTooManyNumOutputs(self): # num_outputs provided is 50, but only one output is produced. ",0,train a0d429cafe8226834365210caf703c28996d5795,tensorflow/tensorflow,"Avoid use of sufficient_statistics in moments() (#8906) * Avoid use of sufficient_statistics in moments() sufficient_statistics() uses reduce_prod if some of the dimensions are unknown. But reduce_prod is not differentiable on GPU for now. This commit avoids the usage of sufficient_statistics in moments(). * Add names to operations * Use squeeze() to reduce axes",nn_impl.py,"@@ -639,19 +639,21 @@ def moments(x, axes, shift=None, name=None, keep_dims=False): math_ops.reduce_mean(y, axes, keep_dims=True)) else: shift = math_ops.cast(shift, y.dtype) - counts, m_ss, v_ss, shift = sufficient_statistics( - y, axes, shift=shift, keep_dims=keep_dims, name=name) - # Reshape shift as needed. - shift = array_ops.reshape(shift, array_ops.shape(m_ss)) - shift.set_shape(m_ss.get_shape()) - with ops.control_dependencies([counts, m_ss, v_ss]): - mean, variance = normalize_moments(counts, m_ss, v_ss, shift, name=name) - if x.dtype == dtypes.float16: - return (math_ops.cast(mean, dtypes.float16), - math_ops.cast(variance, dtypes.float16)) - else: - return (mean, variance) - + shifted_mean = math_ops.reduce_mean( + math_ops.subtract(y, shift), axes, keep_dims=True, name=""shifted_mean"") + variance = math_ops.subtract( + math_ops.reduce_mean(math_ops.squared_difference(y, shift), axes, keep_dims=True), + math_ops.square(shifted_mean), + name=""variance"") + mean = math_ops.add(shifted_mean, shift, name=""mean"") + if not keep_dims: + mean = array_ops.squeeze(mean, axes) + variance = array_ops.squeeze(variance, axes) + if x.dtype == dtypes.float16: + return (math_ops.cast(mean, dtypes.float16), + math_ops.cast(variance, dtypes.float16)) + else: + return (mean, variance) def weighted_moments(x, axes, frequency_weights, name=None, keep_dims=False): """"""Returns the frequency-weighted mean and variance of `x`. ",0,train b97023504c53efab507c20ed8af5f6430c475834,tensorflow/tensorflow,Moved to experimental api,c_api.cc,"@@ -79,11 +79,6 @@ void TfLiteInterpreterOptionsSetNumThreads(TfLiteInterpreterOptions* options, options->num_threads = num_threads; } -void TfLiteInterpreterOptionsSetUseNNAPI(TfLiteInterpreterOptions* options, - bool enable) { - options->useNNAPI = enable; -} - void TfLiteInterpreterOptionsAddDelegate(TfLiteInterpreterOptions* options, TfLiteDelegate* delegate) { options->delegates.push_back(delegate); ",0,train b97023504c53efab507c20ed8af5f6430c475834,tensorflow/tensorflow,Moved to experimental api,c_api.h,"@@ -120,10 +120,6 @@ TFL_CAPI_EXPORT extern void TfLiteInterpreterOptionsDelete( TFL_CAPI_EXPORT extern void TfLiteInterpreterOptionsSetNumThreads( TfLiteInterpreterOptions* options, int32_t num_threads); -// Enable or disable the NN API for the interpreter (true to enable). -TFL_CAPI_EXPORT extern void TfLiteInterpreterOptionsSetUseNNAPI( - TfLiteInterpreterOptions* options, bool enable); - // Adds a delegate to be applied during `TfLiteInterpreter` creation. // // If delegate application fails, interpreter creation will also fail with an ",0,train b97023504c53efab507c20ed8af5f6430c475834,tensorflow/tensorflow,Moved to experimental api,c_api_experimental.cc,"@@ -50,6 +50,11 @@ void TfLiteInterpreterOptionsAddCustomOp(TfLiteInterpreterOptions* options, options->op_resolver.AddCustom(name, registration, min_version, max_version); } +void TfLiteInterpreterOptionsSetUseNNAPI(TfLiteInterpreterOptions* options, + bool enable) { + options->useNNAPI = enable; +} + #ifdef __cplusplus } // extern ""C"" #endif // __cplusplus ",0,train b97023504c53efab507c20ed8af5f6430c475834,tensorflow/tensorflow,Moved to experimental api,c_api_experimental.h,"@@ -49,6 +49,10 @@ TFL_CAPI_EXPORT void TfLiteInterpreterOptionsAddCustomOp( const TfLiteRegistration* registration, int32_t min_version, int32_t max_version); +// Enable or disable the NN API for the interpreter (true to enable). +TFL_CAPI_EXPORT extern void TfLiteInterpreterOptionsSetUseNNAPI( + TfLiteInterpreterOptions* options, bool enable); + #ifdef __cplusplus } // extern ""C"" #endif // __cplusplus ",0,train b97023504c53efab507c20ed8af5f6430c475834,tensorflow/tensorflow,Moved to experimental api,c_api_experimental_test.cc,"@@ -41,6 +41,7 @@ TEST(CApiExperimentalTest, Smoke) { TfLiteInterpreterOptions* options = TfLiteInterpreterOptionsCreate(); TfLiteInterpreterOptionsAddBuiltinOp(options, kTfLiteBuiltinAdd, GetDummyRegistration(), 1, 1); + TfLiteInterpreterOptionsSetUseNNAPI(options, true); TfLiteInterpreter* interpreter = TfLiteInterpreterCreate(model, options); ASSERT_NE(interpreter, nullptr); ",0,train b97023504c53efab507c20ed8af5f6430c475834,tensorflow/tensorflow,Moved to experimental api,c_api_test.cc,"@@ -38,7 +38,6 @@ TEST(CApiSimple, Smoke) { TfLiteInterpreterOptions* options = TfLiteInterpreterOptionsCreate(); ASSERT_NE(options, nullptr); TfLiteInterpreterOptionsSetNumThreads(options, 2); - TfLiteInterpreterOptionsSetUseNNAPI(options, true); TfLiteInterpreter* interpreter = TfLiteInterpreterCreate(model, options); ASSERT_NE(interpreter, nullptr); ",0,train bd9224f5a066be8ec591bb2ac79c8bd87a9a395b,tensorflow/tensorflow,"Modify reference quantized LSTM implementation so that it only needs one instantiation of fixed-point Tanh, for 3 integer bits, regardless of the value of StateIntegerBits PiperOrigin-RevId: 186075161",reference_ops.h,"@@ -1577,9 +1577,19 @@ void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims, FS new_state = gemmlowp::SaturatingAdd( gemmlowp::Rescale(input_times_input_modulation), prev_state_times_forget_state); - // Implementation of last internal tanh node, still in fixed-point. - F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state); + // Implementation of last internal Tanh node, still in fixed-point. + // Since a Tanh fixed-point implementation is specialized for a given + // number or integer bits, and each specialization can have a substantial + // code size, and we already used above a Tanh on an input with 3 integer + // bits, and per the table in the above function comment there is no + // significant accuracy to be lost by clamping to [-8, +8] for a + // 3-integer-bits representation, let us just do that. This helps people + // porting this to targets where code footprint must be minimized. + F3 new_state_f3 = gemmlowp::Rescale<3>(new_state); + F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3); // Store the new internal state back to memory, as 16-bit integers. + // Note: here we store the original value with StateIntegerBits, not + // the rescaled 3-integer-bits value fed to tanh. output_state_data_int16[b * output_depth + c] = new_state.raw(); // Down-scale the output activations to 8-bit integers, saturating, // and store back to memory. ",0,test e873954c8507c5b983c3dddd6f3ae1eeb2c04e01,tensorflow/tensorflow,"Expand the documentation of tf.learn feature columns. Change: 139102565",feature_column.py,"@@ -14,63 +14,102 @@ # ============================================================================== """"""This API defines FeatureColumn abstraction. -To distinguish the concept of a feature family and a specific binary feature -within a family, we refer to a feature family like ""country"" as a feature -column. For example ""country:US"" is a feature which is in ""country"" feature -column and has a feature value (""US""). +FeatureColumns provide a high level abstraction for ingesting and representing +features in tf.learn Estimator models. -Supported feature types are: - * _SparseColumn: also known as categorical features. - * _RealValuedColumn: also known as continuous features. +FeatureColumns are the primary way of encoding features for pre-canned +tf.learn Estimators. -Supported transformations on above features are: - * Bucketization: also known as binning. - * Crossing: also known as conjunction or combination. - * Embedding. +When using FeatureColumns with tf.learn models, the type of feature column you +should choose depends on (1) the feature type and (2) the model type. -Typical usage example: +(1) Feature type: + * Continuous features can be represented by `real_valued_column`. + * Categorical features can be represented by any `sparse_column_with_*` + column (`sparse_column_with_keys`, `sparse_column_with_hash_bucket`, + `sparse_column_with_integerized_feature`). - ```python - # Define features and transformations - sparse_feature_a = sparse_column_with_keys( - column_name=""sparse_feature_a"", keys=[""AB"", ""CD"", ...]) +(2) Model type: + * Deep neural network models (`DNNClassifier`, `DNNRegressor`). + + Continuous features can be directly fed into deep neural network models. + + age_column = real_valued_column(""age"") - embedding_feature_a = embedding_column( - sparse_id_column=sparse_feature_a, dimension=3, combiner=""sum"") + To feed sparse features into DNN models, wrap the column with + `embedding_column` or `one_hot_column`. `one_hot_column` is recommended for + features with only a few possible values. For features with many possible + values, `embedding_column` is recommended. - sparse_feature_b = sparse_column_with_hash_bucket( - column_name=""sparse_feature_b"", hash_bucket_size=1000) + embedded_dept_column = embedding_column( + sparse_column_with_keys(""department"", [""math"", ""philosphy"", ...]), + dimension=10) - embedding_feature_b = embedding_column( - sparse_id_column=sparse_feature_b, dimension=16, combiner=""sum"") +* Wide (aka linear) models (`LinearClassifier`, `LinearRegressor`). - crossed_feature_a_x_b = crossed_column( - columns=[sparse_feature_a, sparse_feature_b], hash_bucket_size=10000) + Sparse features can be fed directly into linear models. - real_feature = real_valued_column(""real_feature"") - real_feature_buckets = bucketized_column( - source_column=real_feature, + dept_column = sparse_column_with_keys(""department"", + [""math"", ""philosphy"", ""english""]) + + It is recommended that continuous features be bucketized before being + fed into linear models. + + bucketized_age_column = bucketized_column( + source_column=age_column, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) - my_features = [embedding_feature_b, real_feature_buckets, embedding_feature_a] + Sparse features can be crossed (also known as conjuncted or combined) in + order to form non-linearities, and then fed into linear models. + + cross_dept_age_column = crossed_column( + columns=[department_column, bucketized_age_column], + hash_bucket_size=1000) + +Example of building tf.learn model using FeatureColumns: + + # Define features and transformations + deep_feature_columns = [age_column, embedded_dept_column] + wide_feature_columns = [dept_column, bucketized_age_column, + cross_dept_age_column] + + # Build deep model + estimator = DNNClassifier( + feature_columns=deep_feature_columns, + hidden_units=[500, 250, 50]) + estimator.train(...) + + # Or build a wide model + estimator = LinearClassifier( + feature_columns=wide_feature_columns) + estimator.train(...) + + # Or build a wide and deep model! + estimator = DNNLinearCombinedClassifier( + linear_feature_columns=wide_feature_columns, + dnn_feature_columns=deep_feature_columns, + dnn_hidden_units=[500, 250, 50]) + estimator.train(...) + + +FeatureColumns can also be transformed into a generic input layer for +custom models using `input_from_feature_columns` within +`feature_column_ops.py`. + +Example of building non-tf.learn model using FeatureColumns: + # Building model via layers + + deep_feature_columns = [age_column, embedded_dept_column] columns_to_tensor = parse_feature_columns_from_examples( serialized=my_data, - feature_columns=my_features) + feature_columns=deep_feature_columns) first_layer = input_from_feature_columns( columns_to_tensors=columns_to_tensor, - feature_columns=my_features) + feature_columns=deep_feature_columns) second_layer = fully_connected(first_layer, ...) - # Building model via tf.learn.estimators - estimator = DNNLinearCombinedClassifier( - linear_feature_columns=my_wide_features, - dnn_feature_columns=my_deep_features, - dnn_hidden_units=[500, 250, 50]) - estimator.train(...) - ``` - - See feature_column_ops_test for more examples. +See feature_column_ops_test for more examples. """""" from __future__ import absolute_import @@ -871,7 +910,7 @@ class _EmbeddingColumn(_FeatureColumn, collections.namedtuple( def one_hot_column(sparse_id_column): - """"""Creates a _OneHotColumn. + """"""Creates an `_OneHotColumn` for a one-hot or multi-hot repr in a DNN. Args: sparse_id_column: A _SparseColumn which is created by @@ -891,7 +930,7 @@ def embedding_column(sparse_id_column, initializer=None, ckpt_to_load_from=None, tensor_name_in_ckpt=None): - """"""Creates an `_EmbeddingColumn`. + """"""Creates an `_EmbeddingColumn` for feeding sparse data into a DNN. Args: sparse_id_column: A `_SparseColumn` which is created by for example @@ -1244,7 +1283,7 @@ def real_valued_column(column_name, default_value=None, dtype=dtypes.float32, normalizer=None): - """"""Creates a _RealValuedColumn. + """"""Creates a `_RealValuedColumn` for dense numeric data. Args: column_name: A string defining real valued column name. @@ -1477,7 +1516,7 @@ class _BucketizedColumn(_FeatureColumn, collections.namedtuple( def bucketized_column(source_column, boundaries): - """"""Creates a _BucketizedColumn. + """"""Creates a _BucketizedColumn for discretizing dense input. Args: source_column: A _RealValuedColumn defining dense column. @@ -1676,7 +1715,7 @@ def crossed_column(columns, hash_bucket_size, combiner=None, ckpt_to_load_from=None, tensor_name_in_ckpt=None, hash_key=None): - """"""Creates a _CrossedColumn. + """"""Creates a _CrossedColumn for performing feature crosses. Args: columns: An iterable of _FeatureColumn. Items can be an instance of ",0,train e1f8843adce2333be3357650c24b3ecf97c42704,tensorflow/tensorflow,"Evaluation tool internal refactors PiperOrigin-RevId: 280984929 Change-Id: I4b8f085d6258d7e041ccf0cfbeaac9a155813cef",preprocess_coco_minival.py,"@@ -83,6 +83,7 @@ def _get_ground_truth_detections(instances_file, if image_id not in image_id_whitelist: continue image_data_dict = {} + image_data_dict['id'] = image_dict['id'] image_data_dict['file_name'] = image_dict['file_name'] all_file_names.append(image_data_dict['file_name']) image_data_dict['height'] = image_dict['height'] @@ -154,6 +155,7 @@ def _dump_data(ground_truth_detections, images_folder_path, output_folder_path): for image_dict in ground_truth_detections.values(): # Create an ObjectsSet proto for this file's ground truth. detection_result = ground_truth_data.detection_results.add() + detection_result.image_id = image_dict['id'] detection_result.image_name = image_dict['file_name'] for detection_dict in image_dict['detections']: object_instance = detection_result.objects.add() ",0,train ebb278520add4b046e283f81398df03395b5d342,tensorflow/tensorflow,"Give clear errors for bad input names. PiperOrigin-RevId: 155857515",strip_unused_lib.py,"@@ -41,14 +41,26 @@ def strip_unused(input_graph_def, input_node_names, output_node_names, a list that specifies one value per input node name. Returns: - A GraphDef with all unnecessary ops removed. + A `GraphDef` with all unnecessary ops removed. + + Raises: + ValueError: If any element in `input_node_names` refers to a tensor instead + of an operation. + KeyError: If any element in `input_node_names` is not found in the graph. """""" + for name in input_node_names: + if "":"" in name: + raise ValueError(""Name '%s' appears to refer to a Tensor, "" + ""not a Operation."" % name) + # Here we replace the nodes we're going to override as inputs with # placeholders so that any unused nodes that are inputs to them are # automatically stripped out by extract_sub_graph(). + not_found = {name for name in input_node_names} inputs_replaced_graph_def = graph_pb2.GraphDef() for node in input_graph_def.node: if node.name in input_node_names: + not_found.remove(node.name) placeholder_node = node_def_pb2.NodeDef() placeholder_node.op = ""Placeholder"" placeholder_node.name = node.name @@ -67,6 +79,9 @@ def strip_unused(input_graph_def, input_node_names, output_node_names, else: inputs_replaced_graph_def.node.extend([copy.deepcopy(node)]) + if not_found: + raise KeyError(""The following input nodes were not found: %s\n"" % not_found) + output_graph_def = graph_util.extract_sub_graph(inputs_replaced_graph_def, output_node_names) return output_graph_def ",0,train ebb278520add4b046e283f81398df03395b5d342,tensorflow/tensorflow,"Give clear errors for bad input names. PiperOrigin-RevId: 155857515",strip_unused_test.py,"@@ -58,16 +58,25 @@ class StripUnusedTest(test_util.TensorFlowTestCase): # routine. input_graph_path = os.path.join(self.get_temp_dir(), input_graph_name) input_binary = False - input_node_names = ""wanted_input_node"" output_binary = True output_node_names = ""output_node"" output_graph_path = os.path.join(self.get_temp_dir(), output_graph_name) - strip_unused_lib.strip_unused_from_files(input_graph_path, input_binary, - output_graph_path, output_binary, - input_node_names, - output_node_names, - dtypes.float32.as_datatype_enum) + def strip(input_node_names): + strip_unused_lib.strip_unused_from_files(input_graph_path, input_binary, + output_graph_path, output_binary, + input_node_names, + output_node_names, + dtypes.float32.as_datatype_enum) + + with self.assertRaises(KeyError): + strip(""does_not_exist"") + + with self.assertRaises(ValueError): + strip(""wanted_input_node:0"") + + input_node_names = ""wanted_input_node"" + strip(input_node_names) # Now we make sure the variable is now a constant, and that the graph still # produces the expected result. ",0,train 9ba6299242237b16dd812a2beb4a7d4bcad3a1c8,tensorflow/tensorflow,"Use correct spelling for Fake device in placer_test The test use FakeGPU/FakeGPU spelling in device types but all lower case spelling in device names. It has worked in Placer because we never directly compare names with types in Placer, but such misspelling are not generally supported (e.g. TF Python code converts lower case ""cpu/gpu"" to ""CPU/GPU"" before passing the string to C++ core). Having this unsupported usage in tests is misleading and wastes development time when working on Placer. PiperOrigin-RevId: 239868502",placer_test.cc,"@@ -49,11 +49,11 @@ using ::tensorflow::test::function::GDef; using ::tensorflow::test::function::NDef; using FDH = ::tensorflow::FunctionDefHelper; -constexpr char kCPU[] = ""/device:fakecpu:0""; -constexpr char kGPU[] = ""/device:fakegpu:0""; +constexpr char kCPU[] = ""/device:FakeCPU:0""; +constexpr char kGPU[] = ""/device:FakeGPU:0""; -constexpr char kFullCPU[] = ""/job:a/replica:0/task:0/device:fakecpu:0""; -constexpr char kFullGPU[] = ""/job:a/replica:0/task:0/device:fakegpu:0""; +constexpr char kFullCPU[] = ""/job:a/replica:0/task:0/device:FakeCPU:0""; +constexpr char kFullGPU[] = ""/job:a/replica:0/task:0/device:FakeGPU:0""; namespace { @@ -205,11 +205,11 @@ class PlacerTest : public ::testing::Test { // objects. for (int i = 0; i < 10; ++i) { local_devices_.emplace_back(FakeDevice::MakeCPU( - strings::StrCat(""/job:a/replica:0/task:0/device:fakecpu:"", i))); + strings::StrCat(""/job:a/replica:0/task:0/device:FakeCPU:"", i))); devices_.AddDevice(local_devices_.back().get()); // Insert the GPUs in reverse order. local_devices_.emplace_back(FakeDevice::MakeGPU( - strings::StrCat(""/job:a/replica:0/task:0/device:fakegpu:"", 9 - i))); + strings::StrCat(""/job:a/replica:0/task:0/device:FakeGPU:"", 9 - i))); devices_.AddDevice(local_devices_.back().get()); } } @@ -690,7 +690,7 @@ TEST_F(PlacerTest, TestIgnoreGeneratorHeuristicIfWrongPartialDevice) { // of valid devices for the generator. Node* input = ops::SourceOp(""TestCPUGPUOutput"", - b.opts().WithName(""in"").WithDevice(""/device:fakecpu:1"")); + b.opts().WithName(""in"").WithDevice(""/device:FakeCPU:1"")); // The assign is bound to CPU by the reference edge. ops::BinaryOp(""TestAssign"", var_cpu, input, b.opts().WithName(""assign"")); @@ -700,10 +700,10 @@ TEST_F(PlacerTest, TestIgnoreGeneratorHeuristicIfWrongPartialDevice) { TF_EXPECT_OK(Place(&g)); EXPECT_DEVICE_TYPE(g, ""in"", ""FakeCPU""); - EXPECT_DEVICE_CONTAINS(g, ""in"", ""/device:fakecpu:1""); + EXPECT_DEVICE_CONTAINS(g, ""in"", ""/device:FakeCPU:1""); EXPECT_DEVICE_TYPE(g, ""var_cpu"", ""FakeCPU""); EXPECT_COLOCATED(g, ""var_cpu"", ""assign""); - EXPECT_DEVICE_CONTAINS(g, ""var_cpu"", ""/device:fakecpu:0""); + EXPECT_DEVICE_CONTAINS(g, ""var_cpu"", ""/device:FakeCPU:0""); } // Test that a graph with partial device specifications on the ops @@ -735,10 +735,10 @@ TEST_F(PlacerTest, TestAssignedDevicePreserved) { } GetNodeByName(g, ""in"")->set_assigned_device_name( - ""/job:a/replica:0/task:0/device:fakecpu:7""); + ""/job:a/replica:0/task:0/device:FakeCPU:7""); TF_EXPECT_OK(Place(&g)); - EXPECT_EQ(""/job:a/replica:0/task:0/device:fakecpu:7"", + EXPECT_EQ(""/job:a/replica:0/task:0/device:FakeCPU:7"", GetNodeByName(g, ""in"")->assigned_device_name()); } @@ -749,17 +749,17 @@ TEST_F(PlacerTest, TestPartialSpecGpuToCpu) { { // Scope for temporary variables used to construct g. GraphDefBuilder b(GraphDefBuilder::kFailImmediately); ops::SourceOp(""TestInput"", - b.opts().WithName(""in"").WithDevice(""/device:fakegpu:0"")); + b.opts().WithName(""in"").WithDevice(""/device:FakeGPU:0"")); ops::SourceOp(""TestVariable"", - b.opts().WithName(""var"").WithDevice(""/device:fakegpu:0"")); + b.opts().WithName(""var"").WithDevice(""/device:FakeGPU:0"")); TF_EXPECT_OK(BuildGraph(b, &g)); } TF_EXPECT_OK(Place(&g, true, false)); EXPECT_DEVICE_TYPE(g, ""in"", ""FakeCPU""); - EXPECT_DEVICE_CONTAINS(g, ""in"", ""/device:fakecpu""); + EXPECT_DEVICE_CONTAINS(g, ""in"", ""/device:FakeCPU""); EXPECT_DEVICE_TYPE(g, ""var"", ""FakeGPU""); - EXPECT_DEVICE_CONTAINS(g, ""var"", ""/device:fakegpu:0""); + EXPECT_DEVICE_CONTAINS(g, ""var"", ""/device:FakeGPU:0""); } // Test that a node with an assigned GPU device but has not registered @@ -773,14 +773,15 @@ TEST_F(PlacerTest, TestAssignedGpuDeviceToCpuDevice) { } GetNodeByName(g, ""in"")->set_assigned_device_name( - ""/job:a/replica:0/task:0/device:fakegpu:0""); + ""/job:a/replica:0/task:0/device:FakeGPU:0""); Status s = Place(&g); - EXPECT_EQ(error::INTERNAL, s.code()); + EXPECT_EQ(error::INTERNAL, s.code()) << s.ToString(); EXPECT_TRUE(str_util::StrContains( s.error_message(), - ""Assigned device '/job:a/replica:0/task:0/device:fakegpu:0' "" - ""does not have registered OpKernel support for TestInput"")); + ""Assigned device '/job:a/replica:0/task:0/device:FakeGPU:0' "" + ""does not have registered OpKernel support for TestInput"")) + << s.ToString(); } // Test that graphs with reference connections are correctly placed. @@ -917,15 +918,15 @@ TEST_F(PlacerTest, TestResourceHandlesOnDifferentDevicesFails) { if (set_assigned) { GetNodeByName(g, ""var_cpu"") ->set_assigned_device_name( - ""/job:a/replica:0/task:0/device:fakecpu:0""); + ""/job:a/replica:0/task:0/device:FakeCPU:0""); GetNodeByName(g, ""var_gpu"") ->set_assigned_device_name( - ""/job:a/replica:0/task:0/device:fakegpu:0""); + ""/job:a/replica:0/task:0/device:FakeGPU:0""); } else { GetNodeByName(g, ""var_cpu"") - ->set_requested_device(""/job:a/replica:0/task:0/device:fakecpu:0""); + ->set_requested_device(""/job:a/replica:0/task:0/device:FakeCPU:0""); GetNodeByName(g, ""var_gpu"") - ->set_requested_device(""/job:a/replica:0/task:0/device:fakegpu:0""); + ->set_requested_device(""/job:a/replica:0/task:0/device:FakeGPU:0""); } } @@ -936,8 +937,8 @@ TEST_F(PlacerTest, TestResourceHandlesOnDifferentDevicesFails) { ""Cannot place the graph because a reference or resource edge "" ""connects "" ""colocation groups with incompatible assigned devices: "" - ""/job:a/replica:0/task:0/device:fakegpu:0 vs "" - ""/job:a/replica:0/task:0/device:fakecpu:0"")); + ""/job:a/replica:0/task:0/device:FakeGPU:0 vs "" + ""/job:a/replica:0/task:0/device:FakeCPU:0"")); return Status::OK(); }; @@ -958,16 +959,16 @@ TEST_F(PlacerTest, TestReferenceConnectionIgnoreInfeasible) { GraphDefBuilder b(GraphDefBuilder::kFailImmediately); Node* input = ops::SourceOp( ""TestDevice"", - b.opts().WithName(""in"").WithDevice(""/job:a/task:0/device:fakegpu:0"")); + b.opts().WithName(""in"").WithDevice(""/job:a/task:0/device:FakeGPU:0"")); Node* var = ops::SourceOp(""TestVariable"", b.opts().WithName(""var_0"").WithDevice( - ""/job:a/task:0/device:fakegpu:0"")); + ""/job:a/task:0/device:FakeGPU:0"")); // This op is specified on CPU, but in practice will be ignored, // because the reference edges forces it on GPU. ops::BinaryOp(""TestAssign"", var, input, b.opts().WithName(""assign"").WithDevice( - ""/job:a/task:0/device:fakecpu:0"")); + ""/job:a/task:0/device:FakeCPU:0"")); TF_EXPECT_OK(BuildGraph(b, &g)); } @@ -998,7 +999,7 @@ TEST_F(PlacerTest, TestReferenceConnectionMoreSpecificDestinationSourceWins) { // assigned to CPU. ops::BinaryOp(""TestAssign"", var, input, b.opts().WithName(""assign"").WithDevice( - ""/job:a/task:0/device:fakecpu:0"")); + ""/job:a/task:0/device:FakeCPU:0"")); TF_EXPECT_OK(BuildGraph(b, &g)); } @@ -1019,11 +1020,11 @@ TEST_F(PlacerTest, TestReferenceConnectionNoSourceDevice) { GraphDefBuilder b(GraphDefBuilder::kFailImmediately); Node* input = ops::SourceOp( ""TestDevice"", - b.opts().WithName(""in"").WithDevice(""/job:a/task:0/device:fakegpu:0"")); + b.opts().WithName(""in"").WithDevice(""/job:a/task:0/device:FakeGPU:0"")); Node* var = ops::SourceOp(""TestVariable"", b.opts().WithName(""var_0"")); ops::BinaryOp(""TestAssign"", var, input, b.opts().WithName(""assign"").WithDevice( - ""/job:a/task:0/device:fakecpu:0"")); + ""/job:a/task:0/device:FakeCPU:0"")); TF_EXPECT_OK(BuildGraph(b, &g)); } @@ -1260,10 +1261,10 @@ TEST_F(PlacerTest, TestHeterogeneousDeviceSetFailure) { DeviceSet heterogeneous; std::unique_ptr gpu( - FakeDevice::MakeGPU(""/job:b/replica:0/task:0/device:fakegpu:0"")); + FakeDevice::MakeGPU(""/job:b/replica:0/task:0/device:FakeGPU:0"")); heterogeneous.AddDevice(gpu.get()); std::unique_ptr cpu( - FakeDevice::MakeCPU(""/job:b/replica:0/task:1/device:fakecpu:0"")); + FakeDevice::MakeCPU(""/job:b/replica:0/task:1/device:FakeCPU:0"")); heterogeneous.AddDevice(cpu.get()); Status s = Place(&g, &heterogeneous); EXPECT_EQ(error::INVALID_ARGUMENT, s.code()); @@ -1361,7 +1362,7 @@ TEST_F(PlacerTest, TestNoDevicesRegistered) { DeviceSet cpu_only; std::unique_ptr cpu( - FakeDevice::MakeCPU(""/job:a/replica:0/task:0/device:fakecpu:0"")); + FakeDevice::MakeCPU(""/job:a/replica:0/task:0/device:FakeCPU:0"")); cpu_only.AddDevice(cpu.get()); Status s = Place(&g, &cpu_only); @@ -1429,12 +1430,12 @@ TEST_F(PlacerTest, TestNonexistentGpuAllowSoftPlacement) { { // Scope for temporary variables used to construct g. GraphDefBuilder b(GraphDefBuilder::kFailImmediately); ops::SourceOp(""TestDevice"", - b.opts().WithName(""in"").WithDevice(""/device:fakegpu:11"")); + b.opts().WithName(""in"").WithDevice(""/device:FakeGPU:11"")); TF_EXPECT_OK(BuildGraph(b, &g)); } TF_EXPECT_OK(Place(&g, true, false)); - EXPECT_DEVICE_CONTAINS(g, ""in"", ""/device:fakegpu:0""); + EXPECT_DEVICE_CONTAINS(g, ""in"", ""/device:FakeGPU:0""); } // Test that ops request to be placed on non-existent devices will fail if @@ -1444,13 +1445,13 @@ TEST_F(PlacerTest, TestNonexistentGpuNoAllowSoftPlacement) { { // Scope for temporary variables used to construct g. GraphDefBuilder b(GraphDefBuilder::kFailImmediately); ops::SourceOp(""TestDevice"", - b.opts().WithName(""in"").WithDevice(""/device:fakegpu:11"")); + b.opts().WithName(""in"").WithDevice(""/device:FakeGPU:11"")); TF_EXPECT_OK(BuildGraph(b, &g)); } Status s = Place(&g, false, false); EXPECT_EQ(error::INVALID_ARGUMENT, s.code()); - EXPECT_TRUE(str_util::StrContains(s.error_message(), ""/device:fakegpu:11"")); + EXPECT_TRUE(str_util::StrContains(s.error_message(), ""/device:FakeGPU:11"")); } // Test that the ""Cannot assign a device"" error message contains a format tag @@ -1460,7 +1461,7 @@ TEST_F(PlacerTest, TestNonexistentGpuNoAllowSoftPlacementFormatTag) { { // Scope for temporary variables used to construct g. GraphDefBuilder b(GraphDefBuilder::kFailImmediately); ops::SourceOp(""TestDevice"", - b.opts().WithName(""in"").WithDevice(""/device:fakegpu:11"")); + b.opts().WithName(""in"").WithDevice(""/device:FakeGPU:11"")); TF_EXPECT_OK(BuildGraph(b, &g)); } @@ -1479,16 +1480,18 @@ TEST_F(PlacerTest, TestUnsupportedDeviceNoAllowSoftPlacement) { { // Scope for temporary variables used to construct g. GraphDefBuilder b(GraphDefBuilder::kFailImmediately); ops::SourceOp(""VariableGPU"", - b.opts().WithName(""var"").WithDevice(""/device:fakecpu:0"")); + b.opts().WithName(""var"").WithDevice(""/device:FakeCPU:0"")); TF_EXPECT_OK(BuildGraph(b, &g)); } Status s = Place(&g, false, false); - EXPECT_EQ(error::INVALID_ARGUMENT, s.code()); - EXPECT_TRUE(str_util::StrContains(s.error_message(), ""/device:fakecpu:0"")); + EXPECT_EQ(error::INVALID_ARGUMENT, s.code()) << s.ToString(); + EXPECT_TRUE(str_util::StrContains(s.error_message(), ""/device:FakeCPU:0"")) + << s.ToString(); EXPECT_TRUE(str_util::StrContains( s.error_message(), - ""no supported kernel for fakecpu devices is available"")); + ""no supported kernel for FakeCPU devices is available"")) + << s.ToString(); } // Test that placement fails when a node requests an explicit device that is not @@ -1537,7 +1540,7 @@ TEST_F(PlacerTest, TestUnsupportedDeviceAllowSoftPlacement) { { // Scope for temporary variables used to construct g. GraphDefBuilder b(GraphDefBuilder::kFailImmediately); ops::SourceOp(""VariableGPU"", - b.opts().WithName(""var"").WithDevice(""/device:fakecpu:0"")); + b.opts().WithName(""var"").WithDevice(""/device:FakeCPU:0"")); TF_EXPECT_OK(BuildGraph(b, &g)); } @@ -1557,14 +1560,14 @@ TEST_F(PlacerTest, TestDeviceTypeConstraintsAllowSoftPlacement) { Node* var_gpu = ops::SourceOp(""VariableGPU"", b.opts().WithName(""var_gpu"")); ops::UnaryOp( ""TestDeviceEnforce"", var_gpu, - b.opts().WithName(""force_gpu"").WithDevice(""/device:fakecpu:0"")); + b.opts().WithName(""force_gpu"").WithDevice(""/device:FakeCPU:0"")); // var_cpu has ref output and runs on CPU. // force_cpu takes var_cpu and requested GPU. // Verify that both are placed on CPU. Node* var_cpu = ops::SourceOp(""VariableCPU"", b.opts().WithName(""var_cpu"")); ops::UnaryOp( ""TestDeviceEnforce"", var_cpu, - b.opts().WithName(""force_cpu"").WithDevice(""/device:fakegpu:0"")); + b.opts().WithName(""force_cpu"").WithDevice(""/device:FakeGPU:0"")); TF_EXPECT_OK(BuildGraph(b, &g)); } @@ -1655,10 +1658,10 @@ TEST_F(PlacerTest, TestGeneratorNodeDoesntFollowNonColocatedConsumers) { TF_EXPECT_OK(BuildGraph(b, &g)); GetNodeByName(g, ""var1_cpu"") - ->set_assigned_device_name(""/job:a/replica:0/task:0/device:fakecpu:1""); + ->set_assigned_device_name(""/job:a/replica:0/task:0/device:FakeCPU:1""); GetNodeByName(g, ""var2_cpu"") - ->set_assigned_device_name(""/job:a/replica:0/task:0/device:fakecpu:2""); + ->set_assigned_device_name(""/job:a/replica:0/task:0/device:FakeCPU:2""); } TF_EXPECT_OK(Place(&g)); @@ -1720,7 +1723,7 @@ TEST_P(SoftPlacementPlacerTest, s.error_message(), ""Cannot colocate nodes {{colocation_node id2}} and {{colocation_node "" ""id1}}: Cannot merge devices with incompatible types: "" - ""'/device:fakecpu:0' and '/device:fakegpu:0'"")) + ""'/device:FakeCPU:0' and '/device:FakeGPU:0'"")) << s.ToString(); } } @@ -1811,8 +1814,8 @@ TEST_P(SoftPlacementPlacerTest, s.error_message(), ""Cannot colocate nodes {{colocation_node id2}} and {{colocation_node "" ""id1}}: Cannot merge devices with incompatible types: "" - ""'/job:a/replica:0/task:0/device:fakecpu:0' and "" - ""'/job:a/replica:0/task:0/device:fakegpu:0'"")) + ""'/job:a/replica:0/task:0/device:FakeCPU:0' and "" + ""'/job:a/replica:0/task:0/device:FakeGPU:0'"")) << s.ToString(); } } ",0,train 19c39157c0ac76545ae82bf48d2e11784ff232fb,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2019-07-25 PiperOrigin-RevId: 259906647",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 7, 24) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 7, 25) _FORWARD_COMPATIBILITY_HORIZON_OVERRIDDEN = False _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" ",0,train 2e1822ff68a48091e934a1d7b572410a9fb42281,tensorflow/tensorflow,"Stabilize sorting order of nodes by hash. PiperOrigin-RevId: 389999148 Change-Id: I0564a2383fb00491a8ce4afde774bcace1457648",sig_node.cc,"@@ -275,8 +275,8 @@ void Signature::PrepareNodes() { void Signature::FindUniqueHashes(size_t* next_node_id_p) { // Start by sorting by the hash value. - std::sort(nodes.begin() + *next_node_id_p, nodes.end(), - SigNode::NodeOrderLess()); + std::stable_sort(nodes.begin() + *next_node_id_p, nodes.end(), + SigNode::NodeOrderLess()); // At each call, if no nodes have unique hashes, one node that has a // non-unique (shared) hash can be made unique by assigning a unique id. ",0,train 0e9a670e66bdc163ac3b8fb807ca5629caf4f784,tensorflow/tensorflow,"Make caching to be by default True under eager mode. PiperOrigin-RevId: 289769741 Change-Id: Iacd2d60749ec80d99c68deadfc2de7b8beb85b00",recurrent.py,"@@ -1270,7 +1270,11 @@ class SimpleRNNCell(DropoutRNNCellMixin, Layer): dropout=0., recurrent_dropout=0., **kwargs): - self._enable_caching_device = kwargs.pop('enable_caching_device', False) + # By default use cached variable under v2 mode, see b/143699808. + if ops.executing_eagerly_outside_functions(): + self._enable_caching_device = kwargs.pop('enable_caching_device', True) + else: + self._enable_caching_device = kwargs.pop('enable_caching_device', False) super(SimpleRNNCell, self).__init__(**kwargs) self.units = units self.activation = activations.get(activation) @@ -1701,7 +1705,11 @@ class GRUCell(DropoutRNNCellMixin, Layer): implementation=1, reset_after=False, **kwargs): - self._enable_caching_device = kwargs.pop('enable_caching_device', False) + # By default use cached variable under v2 mode, see b/143699808. + if ops.executing_eagerly_outside_functions(): + self._enable_caching_device = kwargs.pop('enable_caching_device', True) + else: + self._enable_caching_device = kwargs.pop('enable_caching_device', False) super(GRUCell, self).__init__(**kwargs) self.units = units self.activation = activations.get(activation) @@ -2255,7 +2263,11 @@ class LSTMCell(DropoutRNNCellMixin, Layer): recurrent_dropout=0., implementation=1, **kwargs): - self._enable_caching_device = kwargs.pop('enable_caching_device', False) + # By default use cached variable under v2 mode, see b/143699808. + if ops.executing_eagerly_outside_functions(): + self._enable_caching_device = kwargs.pop('enable_caching_device', True) + else: + self._enable_caching_device = kwargs.pop('enable_caching_device', False) super(LSTMCell, self).__init__(**kwargs) self.units = units self.activation = activations.get(activation) ",0,train b146fdcdf11217244f7984f8b88c71dda6f3dc02,tensorflow/tensorflow,"Make sure while body DT_RESOURCE _Retval comes from _Arg with same index. PiperOrigin-RevId: 245816414",rearrange_function_argument_pass_test.cc,"@@ -30,6 +30,7 @@ limitations under the License. #include ""tensorflow/core/framework/graph_to_functiondef.h"" #include ""tensorflow/core/framework/node_def_util.h"" #include ""tensorflow/core/framework/tensor_shape.h"" +#include ""tensorflow/core/lib/core/error_codes.pb.h"" #include ""tensorflow/core/platform/test.h"" #include ""tensorflow/core/public/session_options.h"" #include ""tensorflow/core/public/version.h"" @@ -211,4 +212,67 @@ TEST_F(RearrangeFunctionArgumentForFunctionTest, Basic) { EXPECT_EQ(input_node->name(), ""while""); } +TEST_F(RearrangeFunctionArgumentForFunctionTest, + WhileResourceRetvalFromDifferentArgUnimplemented) { + FunctionDefLibrary fdl; + { + // Function for While's ""body"". + // ""arg0"" (T=DT_RESOURCE), ""arg1"" (T=DT_RESOURCE), ""arg2"" (T=DT_INT32) + // ""ret0"" = ""arg1"" + // ""ret1"" = ""arg0"" + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output arg0 = ops::_Arg(s.WithOpName(""arg0""), DT_RESOURCE, 0); + Output arg1 = ops::_Arg(s.WithOpName(""arg1""), DT_RESOURCE, 1); + Output arg2 = ops::_Arg(s.WithOpName(""arg2""), DT_INT32, 2); + auto ret0 = ops::_Retval(s.WithOpName(""ret0""), arg1, 0); + auto ret1 = ops::_Retval(s.WithOpName(""ret1""), arg0, 1); + auto ret2 = ops::_Retval(s.WithOpName(""ret2""), arg2, 2); + std::unique_ptr g(new Graph(OpRegistry::Global())); + TF_CHECK_OK(s.ToGraph(g.get())); + FunctionDef *xla_fdef = fdl.add_function(); + TF_CHECK_OK(GraphToFunctionDef(*g, ""f2"", xla_fdef)); + } + { + // Function for While's ""cond"". + // ""arg0"" (T=DT_RESOURCE), ""arg1"" (T=DT_RESOURCE), ""arg2"" (T=DT_INT32) + // ""ret0"" = true + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output arg0 = ops::_Arg(s.WithOpName(""arg0""), DT_RESOURCE, 0); + Output arg1 = ops::_Arg(s.WithOpName(""arg1""), DT_RESOURCE, 1); + Output arg2 = ops::_Arg(s.WithOpName(""arg2""), DT_INT32, 2); + Output cond = ops::Const(s.WithOpName(""const""), true, TensorShape({})); + auto ret0 = ops::_Retval(s.WithOpName(""ret0""), cond, 0); + std::unique_ptr g(new Graph(OpRegistry::Global())); + TF_CHECK_OK(s.ToGraph(g.get())); + FunctionDef *xla_fdef = fdl.add_function(); + TF_CHECK_OK(GraphToFunctionDef(*g, ""f1"", xla_fdef)); + } + { + // Build the XLA computation func. + // ""arg0"" (T=DT_RESOURCE), ""arg1"" (T=DT_RESOURCE), ""arg2"" (T=DT_INT32) + // ""arg0"", ""arg1"" -> ""while"" (While) + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output arg0 = ops::_Arg(s.WithOpName(""arg0""), DT_RESOURCE, 0); + Output arg1 = ops::_Arg(s.WithOpName(""arg1""), DT_RESOURCE, 1); + Output arg2 = ops::_Arg(s.WithOpName(""arg2""), DT_INT32, 2); + NameAttrList cond_fn, body_fn; + cond_fn.set_name(""f1""); + body_fn.set_name(""f2""); + auto while_op = ops::While(s.WithOpName(""while""), + std::initializer_list{arg0, arg1, arg2}, + cond_fn, body_fn); + std::unique_ptr g(new Graph(OpRegistry::Global())); + TF_CHECK_OK(s.ToGraph(g.get())); + FunctionDef *xla_fdef = fdl.add_function(); + TF_CHECK_OK(GraphToFunctionDef(*g, ""cluster"", xla_fdef)); + } + FunctionLibraryDefinition fld(OpRegistry::Global(), fdl); + + bool modified; + protobuf::Map attrs; + Status s = RearrangeFunctionArgumentTest(""cluster"", ""cluster_rewritten"", + attrs, &fld, &modified); + EXPECT_EQ(s.code(), error::UNIMPLEMENTED); +} + } // namespace tensorflow ",0,test b146fdcdf11217244f7984f8b88c71dda6f3dc02,tensorflow/tensorflow,"Make sure while body DT_RESOURCE _Retval comes from _Arg with same index. PiperOrigin-RevId: 245816414",rearrange_function_argument_pass.cc,"@@ -309,6 +309,43 @@ Status MaybeRewriteWhileNode(Graph* g, Node* n, FunctionLibraryDefinition* fld, TF_RETURN_IF_ERROR( FunctionDefToBodyHelper(*fdef, AttrSlice(), fld, &fbody)); + // Check that resource _Arg nodes for While node are always returned with + // the same index, and we don't have cases like this: + // tf.while_loop( + // cond, + // lambda resource_var1, resource_var2: [resource_var2, resource_var1], + // [resource_var1, resource_var2]) + if (attr_name == ""body"") { + for (int i = 0; i < fbody->ret_nodes.size(); i++) { + Node* n = fbody->ret_nodes[i]; + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), ""T"", &dtype)); + if (dtype != DT_RESOURCE) { + continue; + } + + Node* input_node; + TF_RETURN_IF_ERROR(n->input_node(0, &input_node)); + while (input_node->IsIdentity()) { + TF_RETURN_IF_ERROR(input_node->input_node(0, &input_node)); + } + if (input_node->IsArg()) { + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(input_node->def(), ""index"", &index)); + if (index != i) { + return errors::Unimplemented(""While node "", n->DebugString(), + "" has resource _Retval["", i, + ""] coming from _Arg["", index, ""]""); + } + } else { + return errors::Unimplemented(""Encountered node "", + input_node->DebugString(), + "" while tracing _Arg node for _Retval["", + i, ""] of while node "", n->DebugString()); + } + } + } + RearrangeArgNodes(&fbody->arg_nodes, index_mapping); if (attr_name == ""body"") { for (int i = 0; i < fbody->ret_nodes.size(); i++) { ",0,test 9f7901630972ca5e53441c81f628c18d1137ab08,tensorflow/tensorflow,"[XLA:CPU] Fix fast-math flags in llvm_ir_runtime.cc Previously we were calling setFastMath(), but we should have been calling setFastMath(fast_math_enabled). Also disable fastmath in the exhaustive f32 elementwise op test. PiperOrigin-RevId: 231636911",llvm_ir_runtime.cc,"@@ -83,7 +83,7 @@ llvm::Function* EmitVectorF32ExpIfNeeded(llvm::Module* module, llvm::IRBuilder<> b(vector_exp_body); llvm::FastMathFlags fast_math_flags; - fast_math_flags.setFast(); + fast_math_flags.setFast(enable_fast_math); b.setFastMathFlags(fast_math_flags); VectorSupportLibrary vsl(F32, vector_width, &b, ""exp_f32""); @@ -166,7 +166,7 @@ llvm::Function* EmitVectorF32LogIfNeeded(llvm::Module* module, llvm::IRBuilder<> b(vector_log_body); llvm::FastMathFlags fast_math_flags; - fast_math_flags.setFast(); + fast_math_flags.setFast(enable_fast_math); b.setFastMathFlags(fast_math_flags); llvm::Value* input = &*vector_log_function->arg_begin(); ",0,train 9f7901630972ca5e53441c81f628c18d1137ab08,tensorflow/tensorflow,"[XLA:CPU] Fix fast-math flags in llvm_ir_runtime.cc Previously we were calling setFastMath(), but we should have been calling setFastMath(fast_math_enabled). Also disable fastmath in the exhaustive f32 elementwise op test. PiperOrigin-RevId: 231636911",exhaustive_f32_elementwise_op_test.cc,"@@ -25,12 +25,14 @@ class ExhaustiveF32ElementwiseOpTest : public ClientLibraryTestBase, public ::testing::WithParamInterface> { protected: - ErrorSpec error_spec_{0.0001, 0.0001, /*relaxed_nans=*/true}; + ErrorSpec error_spec_{0.0001, 0.0001}; template void ExhaustivelyTestF32Op(EnqueueOpTy enqueue_op, float (*evaluate_op)(float), std::pair known_incorrect_range) { + SetFastMathDisabled(true); + int64 begin, end; std::tie(begin, end) = GetParam(); int64 input_size = end - begin; ",0,train 150f44ce844725096241036a48cd78bcc8075ef3,tensorflow/tensorflow,"Reference math_ops symbols in the documentation and remove from whitelist. Change: 133902853",__init__.py,"@@ -152,12 +152,9 @@ _allowed_symbols = [ 'lin_space', 'list_diff', # Use tf.listdiff instead. 'parse_single_sequence_example', - 'scalar_mul', 'serialize_many_sparse', 'serialize_sparse', - 'sparse_matmul', - 'sparse_segment_mean_grad', - 'sparse_segment_sqrt_n_grad', + 'sparse_matmul', ## use tf.matmul instead. 'user_ops', ] ",0,train 150f44ce844725096241036a48cd78bcc8075ef3,tensorflow/tensorflow,"Reference math_ops symbols in the documentation and remove from whitelist. Change: 133902853",segment_reduction_ops_test.py,"@@ -21,6 +21,8 @@ from __future__ import print_function import numpy as np import tensorflow as tf +from tensorflow.python.util.all_util import reveal_undocumented + class SegmentReductionHelper(tf.test.TestCase): @@ -349,6 +351,12 @@ class SparseSegmentReductionHelper(SegmentReductionHelper): class SparseSegmentReductionOpTest(SparseSegmentReductionHelper): + def setUp(self): + reveal_undocumented(""tensorflow.python."" + ""sparse_segment_mean_grad"", tf) + reveal_undocumented(""tensorflow.python."" + ""sparse_segment_sqrt_n_grad"", tf) + def testValues(self): dtypes = [tf.float32, tf.float64, ",0,train 150f44ce844725096241036a48cd78bcc8075ef3,tensorflow/tensorflow,"Reference math_ops symbols in the documentation and remove from whitelist. Change: 133902853",math_ops.py,"@@ -24,6 +24,7 @@ operators to your graph. @@add @@sub @@mul +@@scalar_mul @@div @@truediv @@floordiv ",0,train cc00a3bc7077c75995c6a781decb1a3e7e279e30,tensorflow/tensorflow,"Avoid redundant bitcast. When creating the GlobalVariable for constants, we don't need to create a bitcast to the correct type. This is already done in HloToIrBindings::BindHloToIrValue(). PiperOrigin-RevId: 201924685",ir_emitter.cc,"@@ -94,10 +94,7 @@ Status IrEmitter::HandleConstant(HloInstruction* constant) { << std::endl << "" its type: "" << llvm_ir::DumpToString(*global_for_const->getType()); - llvm::Constant* shape_constant = llvm::ConstantExpr::getBitCast( - global_for_const, - llvm_ir::ShapeToIrType(literal.shape(), module_)->getPointerTo()); - bindings_.BindHloToIrValue(*constant, shape_constant); + bindings_.BindHloToIrValue(*constant, global_for_const); return Status::OK(); } ",0,train 6cc3d1c4ec15c4bc59870bf6f1db710d218b92a1,tensorflow/tensorflow,"Drop MemRefUtils from the ExecutionEngine The ExecutionEngine was updated recently to only take the LLVM dialect as input. Memrefs are no longer expected in the signature of the entry point function by the executor so there is no need to allocate and free them. The code in MemRefUtils is therefore dead and furthermore out of sync with the recent evolution of memref type to support strides. Drop it. PiperOrigin-RevId: 276272302 Change-Id: I097a5fa112bcfdbec8e5fd822c0968ae2c7ecc14",MemRefUtils.h,"@@ -1,54 +0,0 @@ -//===- MemRefUtils.h - MLIR runtime utilities for memrefs -------*- C++ -*-===// -// -// Copyright 2019 The MLIR Authors. -// -// Licensed under the Apache License, Version 2.0 (the ""License""); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an ""AS IS"" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// ============================================================================= -// -// This is a set of utilities to working with objects of memref type in an JIT -// context using the MLIR execution engine. -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_EXECUTIONENGINE_MEMREFUTILS_H_ -#define MLIR_EXECUTIONENGINE_MEMREFUTILS_H_ - -#include ""mlir/Support/LLVM.h"" - -namespace llvm { -template class Expected; -} - -namespace mlir { -class FuncOp; - -/// Simple memref descriptor class compatible with the ABI of functions emitted -/// by MLIR to LLVM IR conversion for statically-shaped memrefs of float type. -struct StaticFloatMemRef { - float *data; -}; - -/// Given an MLIR function that takes only statically-shaped memrefs with -/// element type f32, allocate the memref descriptor and the data storage for -/// each of the arguments, initialize the storage with `initialValue`, and -/// return a list of type-erased descriptor pointers. -llvm::Expected> -allocateMemRefArguments(FuncOp func, float initialValue = 0.0); - -/// Free a list of type-erased descriptors to statically-shaped memrefs with -/// element type f32. -void freeMemRefArguments(ArrayRef args); - -} // namespace mlir - -#endif // MLIR_EXECUTIONENGINE_MEMREFUTILS_H_ ",0,train 683e21314a80ac6cb89eb959465ded41e381d23c,tensorflow/tensorflow,"Automated rollback of commit 5aaebe06b476d7b7484d6eb2b68440654557018a PiperOrigin-RevId: 210594076",generate_validation_labels.py,"@@ -1,101 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the ""License""); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an ""AS IS"" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -""""""Tool to convert ILSVRC devkit validation ground truth to synset labels."""""" - -import argparse -from os import path -import sys -import scipy.io - -_SYNSET_ARRAYS_RELATIVE_PATH = 'data/meta.mat' -_VALIDATION_FILE_RELATIVE_PATH = 'data/ILSVRC2012_validation_ground_truth.txt' - - -def _synset_to_word(filepath): - """"""Returns synset to word dictionary by reading sysnset arrays."""""" - mat = scipy.io.loadmat(filepath) - entries = mat['synsets'] - # These fields are listed in devkit readme.txt - fields = [ - 'synset_id', 'WNID', 'words', 'gloss', 'num_children', 'children', - 'wordnet_height', 'num_train_images' - ] - synset_index = fields.index('synset_id') - words_index = fields.index('words') - synset_to_word = {} - for entry in entries: - entry = entry[0] - synset_id = int(entry[synset_index][0]) - first_word = entry[words_index][0].split(',')[0] - synset_to_word[synset_id] = first_word - return synset_to_word - - -def _validation_file_path(ilsvrc_dir): - return path.join(ilsvrc_dir, _VALIDATION_FILE_RELATIVE_PATH) - - -def _synset_array_path(ilsvrc_dir): - return path.join(ilsvrc_dir, _SYNSET_ARRAYS_RELATIVE_PATH) - - -def _generate_validation_labels(ilsvrc_dir, output_file): - synset_to_word = _synset_to_word(_synset_array_path(ilsvrc_dir)) - with open(_validation_file_path(ilsvrc_dir), 'r') as synset_id_file, open( - output_file, 'w') as output: - for synset_id in synset_id_file: - synset_id = int(synset_id) - output.write('%s\n' % synset_to_word[synset_id]) - - -def _check_arguments(args): - if not args.validation_labels_output: - raise ValueError('Invalid path to output file.') - ilsvrc_dir = args.ilsvrc_devkit_dir - if not ilsvrc_dir or not path.isdir(ilsvrc_dir): - raise ValueError('Invalid path to ilsvrc_dir') - if not path.exists(_validation_file_path(ilsvrc_dir)): - raise ValueError('Invalid path to ilsvrc_dir, cannot find validation file.') - if not path.exists(_synset_array_path(ilsvrc_dir)): - raise ValueError( - 'Invalid path to ilsvrc_dir, cannot find synset arrays file.') - - -def main(): - parser = argparse.ArgumentParser( - description='Converts ILSVRC devkit validation_ground_truth.txt to synset' - ' labels file that can be used by the accuracy script.') - parser.add_argument( - '--validation_labels_output', - type=str, - help='Full path for outputting validation labels.') - parser.add_argument( - '--ilsvrc_devkit_dir', - type=str, - help='Full path to ILSVRC 2012 devikit directory.') - args = parser.parse_args() - try: - _check_arguments(args) - except ValueError as e: - parser.print_usage() - file_name = path.basename(sys.argv[0]) - sys.stderr.write('{0}: error: {1}\n'.format(file_name, str(e))) - sys.exit(1) - _generate_validation_labels(args.ilsvrc_devkit_dir, - args.validation_labels_output) - - -if __name__ == '__main__': - main() ",0,train 62feb4525be38ee620fccabb6757f723adea5ba2,tensorflow/tensorflow,"NFC: Simplify ModuleOp by using the SingleBlockImplicitTerminator trait. PiperOrigin-RevId: 261944712",Module.h,"@@ -25,6 +25,8 @@ #include ""mlir/IR/SymbolTable.h"" namespace mlir { +class ModuleTerminatorOp; + //===----------------------------------------------------------------------===// // Module Operation. //===----------------------------------------------------------------------===// @@ -33,8 +35,11 @@ namespace mlir { /// single block containing opaque operations. The region of a module is not /// allowed to implicitly capture global values, and all external references /// must use symbolic references via attributes(e.g. via a string name). -class ModuleOp : public Op { +class ModuleOp + : public Op< + ModuleOp, OpTrait::ZeroOperands, OpTrait::ZeroResult, + OpTrait::IsIsolatedFromAbove, OpTrait::SymbolTable, + OpTrait::SingleBlockImplicitTerminator::Impl> { public: using Op::Op; using Op::print; ",0,train 4c149223e3bcf18b0c30b876dfed443f75593387,tensorflow/tensorflow,"TensorFlow: enable cuda host memory allocation for GPU compatible buffers when copying to the CPU device. Re-arranges some of the internal gpu libraries to be library vs. runtime specific. Change: 116472314",threadpool_device.cc,"@@ -15,6 +15,7 @@ limitations under the License. #include ""tensorflow/core/common_runtime/threadpool_device.h"" +#include ""tensorflow/core/common_runtime/gpu/process_state.h"" #include ""tensorflow/core/common_runtime/local_device.h"" #include ""tensorflow/core/framework/allocator.h"" #include ""tensorflow/core/framework/device_base.h"" @@ -52,7 +53,12 @@ void ThreadPoolDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) { } Allocator* ThreadPoolDevice::GetAllocator(AllocatorAttributes attr) { - return allocator_; + ProcessState* ps = ProcessState::singleton(); + if (attr.gpu_compatible()) { + return ps->GetCUDAHostAllocator(0); + } else { + return allocator_; + } } Status ThreadPoolDevice::MakeTensorFromProto( ",0,train 1e66408e0190dc16f3405fb69c3237a09be3c5ca,tensorflow/tensorflow,"Use resource id to distinguish different combinations of Resource Variables PiperOrigin-RevId: 394716703 Change-Id: I6b52913148296737f365b6fc5567f18193fbda52",function.py,"@@ -92,6 +92,12 @@ FORWARD_FUNCTION_ATTRIBUTE_NAME = ""forward_function_name"" BACKWARD_FUNCTION_ATTRIBUTE_NAME = ""backward_function_name"" IMPLEMENTS_ATTRIBUTE_NAME = ""_implements"" SHARED_RENDEZVOUS_ATTRIBUTE_NAME = ""shared_rendezvous"" +# A temporary flag. Turning this on will allow tf.function to aggressively avoid +# retracing ResourceVariable inputs. This feature will change tf.function's +# Variable tracing behavior, hence we want to limit the potential blockers that +# are not detected by Global TAP. +# TODO(jiaweix): remove this flag and related args (b/198782192) +ENCODE_VARIABLES_BY_RESOURCE_ID = False _graph_building_time_counter = monitoring.Counter( ""/tensorflow/core/tf_function/graph_building_time_usecs"", @@ -3175,8 +3181,8 @@ class Function(object): # This reduces ambiguity, for example, when args contains a dict and # kwargs is empty. inputs = (args, kwargs) - input_signature = pywrap_tfe.TFE_Py_EncodeArg(inputs, - include_tensor_ranks_only) + input_signature = pywrap_tfe.TFE_Py_EncodeArg( + inputs, include_tensor_ranks_only, ENCODE_VARIABLES_BY_RESOURCE_ID) hashable_input_signature = _make_input_signature_hashable(input_signature) else: del args, kwargs ",0,train 1e66408e0190dc16f3405fb69c3237a09be3c5ca,tensorflow/tensorflow,"Use resource id to distinguish different combinations of Resource Variables PiperOrigin-RevId: 394716703 Change-Id: I6b52913148296737f365b6fc5567f18193fbda52",pywrap_tfe.h,"@@ -370,7 +370,8 @@ PyObject* TFE_Py_TensorShapeOnDevice(PyObject* tensor); // then the encoding only stores tensor ranks, and the key is // agnostic to dimension sizes. Otherwise, full tensor shape encodings are // returned. -PyObject* TFE_Py_EncodeArg(PyObject*, bool include_tensor_ranks_only); +PyObject* TFE_Py_EncodeArg(PyObject*, bool include_tensor_ranks_only, + bool encode_var_by_res_id); void TFE_Py_EnableInteractivePythonLogging(); ",0,train 1e66408e0190dc16f3405fb69c3237a09be3c5ca,tensorflow/tensorflow,"Use resource id to distinguish different combinations of Resource Variables PiperOrigin-RevId: 394716703 Change-Id: I6b52913148296737f365b6fc5567f18193fbda52",pywrap_tfe_src.cc,"@@ -3972,6 +3972,7 @@ const char kTupleEnd[] = ""u""; const char kDIter[] = ""I""; const char kDict[] = ""D""; const char kRaw[] = ""R""; +const char kResourceVariable[] = ""r""; const char kShape[] = ""s""; const char kShapeDelim[] = ""-""; const char kDType[] = ""d""; @@ -4092,12 +4093,14 @@ tensorflow::Status TFE_Py_EncodeTensorOrTensorSpec( tensorflow::Status TFE_Py_EncodeArgHelperInternal( PyObject* arg, bool include_tensor_ranks_only, std::vector& res_vec, - absl::flat_hash_map& res_map, int& cur_res, EncodeResult* result); + absl::flat_hash_map& res_map, int& cur_res, + bool encode_var_by_res_id, EncodeResult* result); // This function doesn't set the type of sequence before tensorflow::Status TFE_Py_EncodeSequence(PyObject* arg, const char* type, const char* end_type, bool include_tensor_ranks_only, + bool encode_var_by_res_id, std::vector& res_vec, absl::flat_hash_map& res_map, int& cur_res, EncodeResult* result) { @@ -4113,7 +4116,8 @@ tensorflow::Status TFE_Py_EncodeSequence(PyObject* arg, const char* type, absl::StrAppend(&result->str, kNone); } else { TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelperInternal( - item, include_tensor_ranks_only, res_vec, res_map, cur_res, result)); + item, include_tensor_ranks_only, res_vec, res_map, cur_res, + encode_var_by_res_id, result)); } } absl::StrAppend(&result->str, end_type); @@ -4136,7 +4140,7 @@ void UpdateResourceCount(int res_id, std::vector& res_vec, tensorflow::Status TFE_Py_EncodeArgHelperInternal( PyObject* arg, bool include_tensor_ranks_only, std::vector& res_vec, absl::flat_hash_map& res_map, int& cur_res, - EncodeResult* result) { + bool encode_var_by_res_id, EncodeResult* result) { if (tensorflow::swig::IsTensorSpec(arg)) { TF_RETURN_IF_ERROR(TFE_Py_EncodeTensorOrTensorSpec( arg, true, include_tensor_ranks_only, result)); @@ -4182,13 +4186,13 @@ tensorflow::Status TFE_Py_EncodeArgHelperInternal( absl::StrAppend(&result->str, kCompositeTensor); } } else if (PyList_Check(arg)) { - TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence(arg, kList, kListEnd, - include_tensor_ranks_only, res_vec, - res_map, cur_res, result)); + TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence( + arg, kList, kListEnd, include_tensor_ranks_only, encode_var_by_res_id, + res_vec, res_map, cur_res, result)); } else if (tensorflow::swig::IsTuple(arg)) { - TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence(arg, kTuple, kTupleEnd, - include_tensor_ranks_only, res_vec, - res_map, cur_res, result)); + TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence( + arg, kTuple, kTupleEnd, include_tensor_ranks_only, encode_var_by_res_id, + res_vec, res_map, cur_res, result)); } else if (tensorflow::swig::IsMapping(arg)) { tensorflow::Safe_PyObjectPtr keys(tensorflow::swig::MappingKeys(arg)); if (PyList_Sort(keys.get()) == -1) { @@ -4201,11 +4205,12 @@ tensorflow::Status TFE_Py_EncodeArgHelperInternal( for (int i = 0; i < len; i++) { PyObject* key = PyList_GetItem(keys.get(), i); TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelperInternal( - key, include_tensor_ranks_only, res_vec, res_map, cur_res, result)); + key, include_tensor_ranks_only, res_vec, res_map, cur_res, + encode_var_by_res_id, result)); tensorflow::Safe_PyObjectPtr value(PyObject_GetItem(arg, key)); - TF_RETURN_IF_ERROR( - TFE_Py_EncodeArgHelperInternal(value.get(), include_tensor_ranks_only, - res_vec, res_map, cur_res, result)); + TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelperInternal( + value.get(), include_tensor_ranks_only, res_vec, res_map, cur_res, + encode_var_by_res_id, result)); } } else if (tensorflow::swig::IsCompositeTensor(arg)) { absl::StrAppend(&result->str, kCompositeTensor); @@ -4235,9 +4240,29 @@ tensorflow::Status TFE_Py_EncodeArgHelperInternal( tensorflow::Safe_PyObjectPtr attr_arg(PyObject_GetAttr(arg, name.get())); TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelperInternal( attr_arg.get(), include_tensor_ranks_only, res_vec, res_map, cur_res, - result)); + encode_var_by_res_id, result)); } absl::StrAppend(&result->str, kAttrsEnd); + } else if (tensorflow::swig::IsResourceVariable(arg) && + encode_var_by_res_id) { + absl::StrAppend(&result->str, kResourceVariable); + // Get resource id, similar to OwnedIterator + tensorflow::Safe_PyObjectPtr p_res_id( + PyObject_CallMethod(arg, ""__tf_resource_id__"", nullptr)); + if (p_res_id == nullptr) { + return tensorflow::errors::InvalidArgument( + ""Error while calling __tf_resource_id__().""); + } + int res_id = PyLong_AsSize_t(p_res_id.get()); + if (res_id < 0) { + return tensorflow::errors::InvalidArgument(""PyLong_AsSize_t failure""); + } + UpdateResourceCount(res_id, res_vec, res_map, cur_res); + + // Get dtype and shape, similar to Tensor. + tensorflow::Safe_PyObjectPtr type_spec( + PyObject_CallMethod(arg, ""__tf_function_cache_spec__"", nullptr)); + absl::StrAppend(&result->str, PyUnicode_AsUTF8(type_spec.get())); } else { PyObject* object = PyWeakref_NewRef(arg, nullptr); @@ -4257,12 +4282,14 @@ tensorflow::Status TFE_Py_EncodeArgHelperInternal( tensorflow::Status TFE_Py_EncodeArgHelper(PyObject* arg, bool include_tensor_ranks_only, + bool encode_var_by_res_id, EncodeResult* result) { std::vector res_vec; absl::flat_hash_map res_map; int cur_res = 0; - auto status = TFE_Py_EncodeArgHelperInternal( - arg, include_tensor_ranks_only, res_vec, res_map, cur_res, result); + auto status = TFE_Py_EncodeArgHelperInternal(arg, include_tensor_ranks_only, + res_vec, res_map, cur_res, + encode_var_by_res_id, result); // Add 'encoding' of resources std::string str_resource_encoding = """"; @@ -4289,10 +4316,11 @@ tensorflow::Status TFE_Py_EncodeArgHelper(PyObject* arg, // `include_tensor_ranks_only` allows caching on arguments excluding shape info, // so that a slow path using relaxed shape can rely on a cache key that excludes // shapes. -PyObject* TFE_Py_EncodeArg(PyObject* arg, bool include_tensor_ranks_only) { +PyObject* TFE_Py_EncodeArg(PyObject* arg, bool include_tensor_ranks_only, + bool encode_var_by_res_id) { EncodeResult result; - const auto status = - TFE_Py_EncodeArgHelper(arg, include_tensor_ranks_only, &result); + const auto status = TFE_Py_EncodeArgHelper(arg, include_tensor_ranks_only, + encode_var_by_res_id, &result); if (MaybeRaiseExceptionFromStatus(status, nullptr)) { return nullptr; } ",0,train 1e66408e0190dc16f3405fb69c3237a09be3c5ca,tensorflow/tensorflow,"Use resource id to distinguish different combinations of Resource Variables PiperOrigin-RevId: 394716703 Change-Id: I6b52913148296737f365b6fc5567f18193fbda52",resource_variable_ops.py,"@@ -463,6 +463,16 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor): return """" % ( self.name, self.get_shape(), self.dtype.name) + def __tf_function_cache_spec__(self): + res = f""d{self.dtype.as_datatype_enum}s"" + for dim_size in self.shape: + res += f""{dim_size}-"" + + return res + + def __tf_resource_id__(self): + return self._handle._id # pylint:disable=protected-access + @contextlib.contextmanager def _assign_dependencies(self): """"""Makes assignments depend on the cached value, if any. ",0,train 1e66408e0190dc16f3405fb69c3237a09be3c5ca,tensorflow/tensorflow,"Use resource id to distinguish different combinations of Resource Variables PiperOrigin-RevId: 394716703 Change-Id: I6b52913148296737f365b6fc5567f18193fbda52",tfe_wrapper.cc,"@@ -1159,11 +1159,12 @@ PYBIND11_MODULE(_pywrap_tfe, m) { m.def(""TFE_Py_RegisterVSpace"", [](const py::handle& o) { return tensorflow::PyoOrThrow(TFE_Py_RegisterVSpace(o.ptr())); }); - m.def(""TFE_Py_EncodeArg"", - [](const py::handle& o, bool include_tensor_ranks_only) { - return tensorflow::PyoOrThrow( - TFE_Py_EncodeArg(o.ptr(), include_tensor_ranks_only)); - }); + m.def(""TFE_Py_EncodeArg"", [](const py::handle& o, + bool include_tensor_ranks_only, + bool encode_variables_by_resource_id) { + return tensorflow::PyoOrThrow(TFE_Py_EncodeArg( + o.ptr(), include_tensor_ranks_only, encode_variables_by_resource_id)); + }); m.def(""TFE_EnableCollectiveOps"", [](const py::handle& ctx, py::bytes proto) { tensorflow::Safe_TF_StatusPtr status = tensorflow::make_safe(TF_NewStatus()); ",0,train 40cb9181f9c93a58471ed668fb1760717ff9baad,tensorflow/tensorflow,Use strcat,s3_file_system.cc,"@@ -292,6 +292,7 @@ class S3WritableFile : public WritableFile { ""application/octet-stream"", Aws::Map()); handle->WaitUntilFinished(); int retries = 0; + while (handle->GetStatus() == Aws::Transfer::TransferStatus::FAILED && retries++ < kUploadRetries) { // if multipart upload was used, only the failed parts will be re-sent @@ -300,6 +301,7 @@ class S3WritableFile : public WritableFile { transfer_manager_.get()->RetryUpload(outfile_, handle); handle->WaitUntilFinished(); } + if (handle->GetStatus() != Aws::Transfer::TransferStatus::COMPLETED) { auto error = handle->GetLastError(); if (error.GetResponseCode() == Aws::Http::HttpResponseCode::FORBIDDEN) { @@ -711,7 +713,7 @@ Status S3FileSystem::CopyFile(const Aws::String& source_bucket, const Aws::Strin Aws::String source = Aws::String((source_bucket + ""/"" + source_key).c_str()); Aws::String source_full_path = Aws::String(""s3://"") + source; uint64 file_length; - TF_RETURN_IF_ERROR(this->GetFileSize(std::string(source_full_path.c_str()), &file_length)); + TF_RETURN_IF_ERROR(this->GetFileSize(string(source_full_path.c_str()), &file_length)); int num_parts; if (file_length <= multi_part_copy_part_size_) { num_parts = 1; @@ -722,12 +724,12 @@ Status S3FileSystem::CopyFile(const Aws::String& source_bucket, const Aws::Strin if (num_parts == 1) { return SimpleCopy(source, target_bucket, target_key); } else if (num_parts > 10000) { - std::ostringstream s; - s << ""MultiPartCopy with number of parts more than 10000 is not supported. Your object "" - << source << "" required "" << num_parts << "" as multi_part_copy_part_size is set to "" - << multi_part_copy_part_size_ << "". You can control this part size using the environment variable "" - ""S3_MULTI_PART_COPY_PART_SIZE to increase it.""; - return tensorflow::errors::Unimplemented(s.str()); + string message = strings::StrCat( + ""MultiPartCopy with number of parts more than 10000 is not supported. Your object "", + source, "" required "", num_parts, "" as multi_part_copy_part_size is set to "", + multi_part_copy_part_size_, "". You can control this part size using the environment variable "", + ""S3_MULTI_PART_COPY_PART_SIZE to increase it.""); + return tensorflow::errors::Unimplemented(message); } else { return MultiPartCopy(source, target_bucket, target_key, num_parts, file_length); } @@ -798,9 +800,7 @@ Status S3FileSystem::MultiPartCopy(const Aws::String& source, endPos = file_length - 1; } - std::ostringstream rangeStream; - rangeStream << ""bytes="" << startPos << ""-"" << std::to_string(endPos); - string range = rangeStream.str(); + string range = strings::StrCat(""bytes="", startPos, ""-"", endPos); Aws::S3::Model::UploadPartCopyRequest uploadPartCopyRequest; uploadPartCopyRequest.SetBucket(target_bucket); ",0,train 10ad29455ada3003bb02abb737c6ba166d1be751,tensorflow/tensorflow,"[Pluggable Device] Use default settings when device ""architecture"" field is not set.",op_level_cost_estimator.cc,"@@ -736,29 +736,38 @@ DeviceInfo OpLevelCostEstimator::GetDeviceInfo( } } } else if (device.type() == ""GPU"") { - const std::string architecture = device.environment().at(""architecture""); - int cores_per_multiprocessor; - if (architecture < ""3"") { - // Fermi - cores_per_multiprocessor = 32; - } else if (architecture < ""4"") { - // Kepler - cores_per_multiprocessor = 192; - } else if (architecture < ""6"") { - // Maxwell - cores_per_multiprocessor = 128; - } else { - // Pascal (compute capability version 6) and Volta (compute capability - // version 7) - cores_per_multiprocessor = 64; - } - gflops = device.num_cores() * device.frequency() * 1e-3 * - cores_per_multiprocessor * kOpsPerMac; - if (device.bandwidth() > 0) { - gb_per_sec = device.bandwidth() / 1e6; + const auto& device_env = device.environment(); + auto it = device_env.find(""architecture""); + if (it != device_env.end()) { + const std::string architecture = device_env.at(""architecture""); + int cores_per_multiprocessor; + if (architecture < ""3"") { + // Fermi + cores_per_multiprocessor = 32; + } else if (architecture < ""4"") { + // Kepler + cores_per_multiprocessor = 192; + } else if (architecture < ""6"") { + // Maxwell + cores_per_multiprocessor = 128; + } else { + // Pascal (compute capability version 6) and Volta (compute capability + // version 7) + cores_per_multiprocessor = 64; + } + gflops = device.num_cores() * device.frequency() * 1e-3 * + cores_per_multiprocessor * kOpsPerMac; + if (device.bandwidth() > 0) { + gb_per_sec = device.bandwidth() / 1e6; + } else { + gb_per_sec = 100; + } } else { + // Architecture is not available (ex: pluggable device), return default value. gb_per_sec = 100; - } + gflops = 100; // Dummy value; + gb_per_sec = 12; // default PCIe x16 gen3. + } } else { LOG_EVERY_N(WARNING, 1000) << ""Unknown device type: "" << device.type() << "", assuming PCIe between CPU and GPU.""; ",0,train 27c78a334bfb7db71221818f6ba52982926993a3,tensorflow/tensorflow,Delete extra spaces from the changes & fix indent on sublists,densenet.py,"@@ -162,7 +162,7 @@ def DenseNet( and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. pooling: optional pooling mode for feature extraction - when `include_top` is `False`. + when `include_top` is `False`. It could be: - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. @@ -469,7 +469,6 @@ DOC = """""" or invalid input shape. ValueError: if `classifier_activation` is not `softmax` or `None` when using a pretrained top layer. - """""" setattr(DenseNet121, '__doc__', DenseNet121.__doc__ + DOC) ",0,train 27c78a334bfb7db71221818f6ba52982926993a3,tensorflow/tensorflow,Delete extra spaces from the changes & fix indent on sublists,nasnet.py,"@@ -347,7 +347,7 @@ def NASNetMobile(input_shape=None, `layers.Input()`) to use as image input for the model. pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. + when `include_top` is `False`. It could be: - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. @@ -438,7 +438,7 @@ def NASNetLarge(input_shape=None, `layers.Input()`) to use as image input for the model. pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. + when `include_top` is `False`. It could be: - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. ",0,train 27c78a334bfb7db71221818f6ba52982926993a3,tensorflow/tensorflow,Delete extra spaces from the changes & fix indent on sublists,resnet.py,"@@ -527,7 +527,7 @@ DOC = """""" Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. - + Arguments: include_top: whether to include the fully-connected layer at the top of the network. @@ -544,7 +544,7 @@ DOC = """""" and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. + when `include_top` is `False`. It could be: - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. ",0,train 27c78a334bfb7db71221818f6ba52982926993a3,tensorflow/tensorflow,Delete extra spaces from the changes & fix indent on sublists,resnet_v2.py,"@@ -220,7 +220,6 @@ DOC = """""" or invalid input shape. ValueError: if `classifier_activation` is not `softmax` or `None` when using a pretrained top layer. - """""" setattr(ResNet50V2, '__doc__', ResNet50V2.__doc__ + DOC) ",0,train 79c27b9b3acee58481cc55e6b249795713b00ca8,tensorflow/tensorflow,"Adding support for multiple input types. Change: 142507111",optimize_for_inference_lib.py,"@@ -72,7 +72,8 @@ def optimize_for_inference(input_graph_def, input_node_names, inference. output_node_names: A list of names of the nodes that produce the final results. - placeholder_type_enum: Data type of the placeholders used for inputs. + placeholder_type_enum: The AttrValue enum for the placeholder data type, or + a list that specifies one value per input node name. Returns: An optimized version of the input graph. ",0,test 79c27b9b3acee58481cc55e6b249795713b00ca8,tensorflow/tensorflow,"Adding support for multiple input types. Change: 142507111",strip_unused_lib.py,"@@ -35,7 +35,8 @@ def strip_unused(input_graph_def, input_node_names, output_node_names, input_graph_def: A graph with nodes we want to prune. input_node_names: A list of the nodes we use as inputs. output_node_names: A list of the output nodes. - placeholder_type_enum: The AttrValue enum for the placeholder data type. + placeholder_type_enum: The AttrValue enum for the placeholder data type, or + a list that specifies one value per input node name. Returns: A GraphDef with all unnecessary ops removed. @@ -49,8 +50,13 @@ def strip_unused(input_graph_def, input_node_names, output_node_names, placeholder_node = tf.NodeDef() placeholder_node.op = ""Placeholder"" placeholder_node.name = node.name - placeholder_node.attr[""dtype""].CopyFrom(tf.AttrValue( - type=placeholder_type_enum)) + if isinstance(placeholder_type_enum, list): + input_node_index = input_node_names.index(node.name) + placeholder_node.attr[""dtype""].CopyFrom(tf.AttrValue( + type=placeholder_type_enum[input_node_index])) + else: + placeholder_node.attr[""dtype""].CopyFrom(tf.AttrValue( + type=placeholder_type_enum)) if ""_output_shapes"" in node.attr: placeholder_node.attr[""_output_shapes""].CopyFrom( node.attr[""_output_shapes""]) ",0,test 79c27b9b3acee58481cc55e6b249795713b00ca8,tensorflow/tensorflow,"Adding support for multiple input types. Change: 142507111",strip_unused_test.py,"@@ -43,8 +43,9 @@ class StripUnusedTest(test_util.TensorFlowTestCase): # and that then multiplies it by 2. with ops.Graph().as_default(): constant_node = constant_op.constant(1.0, name=""constant_node"") - wanted_input_node = math_ops.sub( - constant_node, 3.0, name=""wanted_input_node"") + wanted_input_node = math_ops.sub(constant_node, + 3.0, + name=""wanted_input_node"") output_node = math_ops.multiply( wanted_input_node, 2.0, name=""output_node"") math_ops.add(output_node, 2.0, name=""later_node"") @@ -89,6 +90,66 @@ class StripUnusedTest(test_util.TensorFlowTestCase): output = sess.run(output_node, feed_dict={input_node: [10.0]}) self.assertNear(20.0, output, 0.00001) + def testStripUnusedMultipleInputs(self): + input_graph_name = ""input_graph.pb"" + output_graph_name = ""output_graph.pb"" + + # We'll create an input graph that multiplies two input nodes. + with ops.Graph().as_default(): + constant_node1 = constant_op.constant(1.0, name=""constant_node1"") + constant_node2 = constant_op.constant(2.0, name=""constant_node2"") + input_node1 = math_ops.sub(constant_node1, 3.0, name=""input_node1"") + input_node2 = math_ops.sub(constant_node2, 5.0, name=""input_node2"") + output_node = math_ops.multiply( + input_node1, input_node2, name=""output_node"") + math_ops.add(output_node, 2.0, name=""later_node"") + sess = session.Session() + output = sess.run(output_node) + self.assertNear(6.0, output, 0.00001) + graph_io.write_graph(sess.graph, self.get_temp_dir(), input_graph_name) + + # We save out the graph to disk, and then call the const conversion + # routine. + input_graph_path = os.path.join(self.get_temp_dir(), input_graph_name) + input_binary = False + input_node_names = ""input_node1,input_node2"" + input_node_types = [ + dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum + ] + output_binary = True + output_node_names = ""output_node"" + output_graph_path = os.path.join(self.get_temp_dir(), output_graph_name) + + strip_unused_lib.strip_unused_from_files(input_graph_path, input_binary, + output_graph_path, output_binary, + input_node_names, + output_node_names, + input_node_types) + + # Now we make sure the variable is now a constant, and that the graph still + # produces the expected result. + with ops.Graph().as_default(): + output_graph_def = graph_pb2.GraphDef() + with open(output_graph_path, ""rb"") as f: + output_graph_def.ParseFromString(f.read()) + _ = importer.import_graph_def(output_graph_def, name="""") + + self.assertEqual(3, len(output_graph_def.node)) + for node in output_graph_def.node: + self.assertNotEqual(""Add"", node.op) + self.assertNotEqual(""Sub"", node.op) + if node.name == input_node_names: + self.assertTrue(""shape"" in node.attr) + + with session.Session() as sess: + input_node1 = sess.graph.get_tensor_by_name(""input_node1:0"") + input_node2 = sess.graph.get_tensor_by_name(""input_node2:0"") + output_node = sess.graph.get_tensor_by_name(""output_node:0"") + output = sess.run(output_node, + feed_dict={input_node1: [10.0], + input_node2: [-5.0]}) + self.assertNear(-50.0, output, 0.00001) + if __name__ == ""__main__"": test.main() ",0,test ba4804031357abecd1f412eeb5a04810a248391a,tensorflow/tensorflow,"Add a global resource manager for TPU specific operations. PiperOrigin-RevId: 312388244 Change-Id: I30dd6ce3a2f0eed3d257750626e11b3bb6eded97",tpu_configuration.cc,"@@ -0,0 +1,44 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/core/tpu/tpu_configuration.h"" + +namespace tensorflow { + +namespace { + +ResourceMgr* GetGlobalResourceMgr() { + static ResourceMgr* const rmgr = new ResourceMgr(); + return rmgr; +} + +} // namespace + +#if !defined(PLATFORM_GOOGLE) +// Used only by Google-internal tests, so deliberately left empty. +void MaybeInitializeTPUSystemForTests() {} +#endif + +ResourceMgr* GetTPUConfigResourceMgr() { + MaybeInitializeTPUSystemForTests(); + + // Put all TPU-related state in the global ResourceMgr. This includes the + // TpuPodState, compilation cache, etc. We don't use the TPU_SYSTEM + // ResourceMgr because there may be more than one TPU_SYSTEM ResourceMgr when + // DirectSession or isolate_session_state are used. + return GetGlobalResourceMgr(); +} + +} // namespace tensorflow ",0,train ba4804031357abecd1f412eeb5a04810a248391a,tensorflow/tensorflow,"Add a global resource manager for TPU specific operations. PiperOrigin-RevId: 312388244 Change-Id: I30dd6ce3a2f0eed3d257750626e11b3bb6eded97",tpu_configuration.h,"@@ -0,0 +1,30 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_TPU_CONFIGURATION_H_ +#define TENSORFLOW_CORE_TPU_TPU_CONFIGURATION_H_ + +#include ""tensorflow/core/framework/resource_mgr.h"" + +namespace tensorflow { + +void MaybeInitializeTPUSystemForTests(); + +// Returns a process-wide global ResourceMgr. +ResourceMgr* GetTPUConfigResourceMgr(); + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_TPU_CONFIGURATION_H_ ",0,train 7358025743951b42fe0f99fb85b4418769de5357,tensorflow/tensorflow,"Add test cases with axis and keepdims for tf.count_nonzero and string Signed-off-by: Yong Tang ",reduction_ops_test.py,"@@ -889,9 +889,9 @@ class AnyReductionTest(test.TestCase): class CountNonzeroReductionTest(test.TestCase): - def _compare(self, x, reduction_axes, keepdims, use_gpu=False, + def _compare(self, x, reduction_axes, keepdims, use_gpu=False, zero=0, feed_dict=None): - np_ans = (x != 0).astype(np.int32) + np_ans = (x != zero).astype(np.int32) if reduction_axes is None: np_ans = np.sum(np_ans, keepdims=keepdims) else: @@ -964,6 +964,15 @@ class CountNonzeroReductionTest(test.TestCase): v = math_ops.count_nonzero(constant_op.constant([""test""])) self.assertAllClose(sess.run(v), 1) + def testStringReduce1D(self): + # Create a 1D array of strings + x = np.asarray(["""", """", ""a"", """", """", ""b""]) + self._compare(x, None, keepdims=False, zero=np.str("""")) + self._compare(x, [], keepdims=False, zero=np.str("""")) + self._compare(x, [0], keepdims=False, zero=np.str("""")) + self._compare(x, None, keepdims=True, zero=np.str("""")) + self._compare(x, [], keepdims=True, zero=np.str("""")) + self._compare(x, [0], keepdims=True, zero=np.str("""")) if __name__ == ""__main__"": test.main() ",0,train aa50969378a2efe745c37f120452bc89effaf7ba,tensorflow/tensorflow,"Remove SavedModel dependency on manifest proto. Change: 133885459",builder.py,"@@ -26,7 +26,7 @@ import os from google.protobuf.any_pb2 import Any -from tensorflow.contrib.session_bundle import manifest_pb2 +from tensorflow.core.protobuf import meta_graph_pb2 from tensorflow.core.protobuf import saved_model_pb2 from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -123,12 +123,12 @@ class SavedModelBuilder(object): Args: asset_filename: The filename of the asset to be added. - asset_tensor: The asset tensor used to populate the tensor binding of the + asset_tensor: The asset tensor used to populate the tensor info of the asset proto. """""" - asset_proto = manifest_pb2.AssetFile() + asset_proto = meta_graph_pb2.AssetFileDef() asset_proto.filename = asset_filename - asset_proto.tensor_binding.tensor_name = asset_tensor.name + asset_proto.tensor_info.name = asset_tensor.name asset_any_proto = Any() asset_any_proto.Pack(asset_proto) ",0,train aa50969378a2efe745c37f120452bc89effaf7ba,tensorflow/tensorflow,"Remove SavedModel dependency on manifest proto. Change: 133885459",saved_model_test.py,"@@ -20,8 +20,8 @@ from __future__ import print_function import os import tensorflow as tf -from tensorflow.contrib.session_bundle import manifest_pb2 from tensorflow.core.protobuf import config_pb2 +from tensorflow.core.protobuf import meta_graph_pb2 from tensorflow.python.framework import errors from tensorflow.python.lib.io import file_io from tensorflow.python.saved_model import builder as saved_model_builder @@ -363,7 +363,7 @@ class SavedModelTest(tf.test.TestCase): collection_def = foo_graph.collection_def assets_any = collection_def[constants.ASSETS_KEY].any_list.value self.assertEqual(len(assets_any), 1) - asset = manifest_pb2.AssetFile() + asset = meta_graph_pb2.AssetFileDef() assets_any[0].Unpack(asset) assets_path = os.path.join( compat.as_bytes(export_dir), @@ -372,7 +372,7 @@ class SavedModelTest(tf.test.TestCase): asset_contents = file_io.read_file_to_string(assets_path) self.assertEqual(""foo bar baz"", compat.as_text(asset_contents)) self.assertEqual(""hello42.txt"", asset.filename) - self.assertEqual(""asset_file_tensor:0"", asset.tensor_binding.tensor_name) + self.assertEqual(""asset_file_tensor:0"", asset.tensor_info.name) ignored_asset_path = os.path.join( compat.as_bytes(export_dir), compat.as_bytes(constants.ASSETS_DIRECTORY), ",0,train 5afcbe91aa90a7795b49910f0e542f07be796448,tensorflow/tensorflow,"eager: Some more backprop tests PiperOrigin-RevId: 166246790",backprop_test.py,"@@ -30,7 +30,12 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import gradients from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_grad # pylint: disable=unused-import +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import random_ops from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables +from tensorflow.python.training import training class BackpropTest(test.TestCase): @@ -70,16 +75,86 @@ class BackpropTest(test.TestCase): self.assertAllClose(grad.numpy(), tf_dense_grad.eval()) def testImplicitGradWithResourceVariable(self): - x = resource_variable_ops.ResourceVariable(initial_value=tensor.Tensor(1.0), - name='x') + x = resource_variable_ops.ResourceVariable( + initial_value=tensor.Tensor(1.0), name='x') + def fn(): tape.watch(x.handle) b = tensor.Tensor(2.0) c = math_ops.add(x.value(), b) return math_ops.add(c, tensor.Tensor(3.0)) + grad = backprop.implicit_grad(fn)()[0][1] self.assertEqual(grad.numpy(), 1.0) + def testImplicitGradOverEmbeddingLookup(self): + batch_size = 8 + embedding_size = 512 + vocab_size = 1000 + lrn_rate = 0.1 + random_init = random_ops.random_uniform([vocab_size, embedding_size]) + + x = array_ops.ones((batch_size), dtypes.int64) + embedding = resource_variable_ops.ResourceVariable( + initial_value=random_init, dtype=dtypes.float32, name='embedding') + + def f(): + tape.watch(embedding.handle) + embedded_x = embedding_ops.embedding_lookup(embedding, x) + return tensor.Tensor(1.0, dtypes.float32) - embedded_x + + grad = backprop.implicit_grad(f)()[0][1] + opt = training.GradientDescentOptimizer(lrn_rate) + + with context.graph_mode(), self.test_session(): + tf_x = array_ops.ones((batch_size), dtypes.int64) + # TODO(ashankar,apassos): Change to ResourceVariable. + tf_embedding = variables.Variable( + random_init.numpy(), name='tf_embedding') + tf_embedded_x = embedding_ops.embedding_lookup(tf_embedding, tf_x) + tf_y = 1.0 - tf_embedded_x + tf_grad = gradients.gradients(tf_y, [tf_embedding])[0] + tf_opt = training.GradientDescentOptimizer(0.1) + tf_embedding.initializer.run() + + self.assertAllClose(tf_grad.indices.eval(), grad.indices.numpy()) + self.assertAllClose(tf_grad.values.eval(), grad.values.numpy()) + + tf_opt.apply_gradients([(tf_grad, tf_embedding)]).run() + expected = tf_embedding.eval() + opt.apply_gradients([(grad, embedding)]) + self.assertAllClose(expected, embedding.read_value().numpy()) + + def testGradientNone(self): + + def loss(x, l): + return math_ops.reduce_mean( + nn_ops.softmax_cross_entropy_with_logits(logits=x, labels=l), + tensor.Tensor([0])) + + logits = tensor.Tensor([[0.0, 0.0]]) + labels = tensor.Tensor([[1.0, 0.0]]) + # softmax_cross_entropy_with_logits returns two outputs and in this case the + # gradient wrt the second is None. + g, = backprop.gradients_function(loss, [0])(logits, labels) + self.assertAllEqual(g.numpy(), [[-0.5, 0.5]]) + + def testSecondGrad(self): + + def first(x): + l = tensor.Tensor([[0.0]]) + x = nn_ops.softmax_cross_entropy_with_logits(labels=l, logits=x) + x = math_ops.reduce_sum(x, tensor.Tensor([0])) + return x + + def second(x): + grad = backprop.gradients_function(first, [0])(x)[0] + return math_ops.reduce_sum(grad, tensor.Tensor([0])) + + f = tensor.Tensor([[0.1]]) + grad = backprop.gradients_function(second, [0])(f)[0] + self.assertAllEqual([[0.0]], grad.numpy()) + def testGPU(self): if not context.context().num_gpus(): self.skipTest('No GPUs found') @@ -95,6 +170,20 @@ class BackpropTest(test.TestCase): grad = backprop.gradients_function(fn, [0])(tensor.Tensor(1.0))[0] self.assertEqual(grad.numpy(), 1.0) + def testGPUImplicitGrad(self): + if not context.context().num_gpus(): + self.skipTest('No GPU found') + with context.device('gpu:0'): + v = resource_variable_ops.ResourceVariable(tensor.Tensor(1.0), name='v') + + def f(): + with context.device('gpu:0'): + tape.watch(v.handle) + return v.read_value() + + self.assertEqual( + backprop.implicit_grad(f)()[0][1].as_cpu_tensor().numpy(), 1.0) + def testCPU(self): def fn(x): ",0,train 11f1dab4fce23c73073e32cda910a2a1a87c394f,tensorflow/tensorflow,"StridedSlice gradient more efficient in tfe. PiperOrigin-RevId: 210927458",pywrap_tfe_src.cc,"@@ -1784,6 +1784,7 @@ bool OpDoesntRequireOutput(const string& op_name) { ""ReadVariableOp"", ""VarHandleOp"", ""Shape"", + ""StridedSlice"", }); return ops_that_dont_require_outputs->find(op_name) != ",0,train 9b488c5d150b50db57f2e8560b37f0c1e4f0c92d,tensorflow/tensorflow,Add monitor.BaseMonitor and arg pass through from val monitor,digits.py,"@@ -46,7 +46,7 @@ def conv_model(X, y): features = tf.reshape(features, [-1, 12]) return skflow.models.logistic_regression(features, y) -val_monitor = monitors.ValidationMonitor(X_val, y_val, n_classes=10) +val_monitor = monitors.ValidationMonitor(X_val, y_val, n_classes=10, print_steps=50) # Create a classifier, train and predict. classifier = skflow.TensorFlowEstimator(model_fn=conv_model, n_classes=10, steps=1000, learning_rate=0.05, ",0,train 9b488c5d150b50db57f2e8560b37f0c1e4f0c92d,tensorflow/tensorflow,Add monitor.BaseMonitor and arg pass through from val monitor,base.py,"@@ -200,7 +200,7 @@ class TensorFlowEstimator(BaseEstimator): self.batch_size) if monitor is None: - self._monitor = monitors.BaseMonitor() + self._monitor = monitors.default_monitor() else: self._monitor = monitor ",0,train 9b488c5d150b50db57f2e8560b37f0c1e4f0c92d,tensorflow/tensorflow,Add monitor.BaseMonitor and arg pass through from val monitor,monitors.py,"@@ -26,16 +26,21 @@ from skflow.io.data_feeder import setup_train_data_feeder # pylint: disable=unused-argument # pylint: disable=attribute-defined-outside-init +def default_monitor(): + return(BaseMonitor()) + + class BaseMonitor(object): """""" Base class for all learning monitors. Stores and reports training loss throughout learning + Parameters: print_steps: Number of steps in between printing cost. early_stopping_rounds: Activates early stopping if this is not None. Loss needs to decrease at least every every round(s) to continue training. (default: None) """""" - def __init__(self, print_steps=100, early_stopping_rounds=500, verbose=1): + def __init__(self, print_steps=100, early_stopping_rounds=250, verbose=1): self.print_steps = print_steps self.early_stopping_rounds = early_stopping_rounds @@ -127,10 +132,15 @@ class ValidationMonitor(BaseMonitor): val_X: Validation features val_y: Validation labels n_classes: Number of labels in output. 0 for regression - See BaseMonitor for arguments + print_steps: Number of steps in between printing cost. + early_stopping_rounds: Activates early stopping if this is not None. + Loss needs to decrease at least every every + round(s) to continue training. (default: None) + """""" - def __init__(self, val_X, val_y, n_classes=0, *args, **kwargs): - super(ValidationMonitor, self).__init__() + def __init__(self, val_X, val_y, n_classes=0, print_steps=100, early_stopping_rounds=250): + super(ValidationMonitor, self).__init__(print_steps=print_steps, + early_stopping_rounds=early_stopping_rounds) self.val_feeder = setup_train_data_feeder(val_X, val_y, n_classes, -1) self.print_val_loss_buffer = [] self.all_val_loss_buffer = [] ",0,train 58a58794c915d70b4429eb5b80e21ba59d0f84f3,tensorflow/tensorflow,"Break Python-independent logic out of pywrap_tfe_src, to avoid cpython deps into C++-only targets. PiperOrigin-RevId: 424613840 Change-Id: I604e4d2a0dc79f0b675a91ff7cccc0820104e401",eager_context.cc,"@@ -1,36 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the ""License""); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an ""AS IS"" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include ""tensorflow/python/eager/eager_context.h"" - -#include ""tensorflow/c/eager/c_api.h"" - -namespace tensorflow { -namespace eager { - -namespace { -// This object tracks the EagerContext owned by global_py_eager_context in -// pywrap_tfe_src.cc. Since the vast majority of the Python API is dependent on -// that global_py_eager_context (including memory management), the Py object -// owns the C object, so this pointer is non-owning. -TFE_Context* global_c_eager_context = nullptr; -} // namespace - -void TFE_Py_SetCEagerContext(TFE_Context* ctx) { global_c_eager_context = ctx; } - -TFE_Context* GetCEagerContext() { return global_c_eager_context; } - -} // namespace eager -} // namespace tensorflow ",0,train 58a58794c915d70b4429eb5b80e21ba59d0f84f3,tensorflow/tensorflow,"Break Python-independent logic out of pywrap_tfe_src, to avoid cpython deps into C++-only targets. PiperOrigin-RevId: 424613840 Change-Id: I604e4d2a0dc79f0b675a91ff7cccc0820104e401",eager_context.h,"@@ -1,44 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the ""License""); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an ""AS IS"" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_PYTHON_EAGER_EAGER_CONTEXT_H_ -#define TENSORFLOW_PYTHON_EAGER_EAGER_CONTEXT_H_ - -#include ""tensorflow/c/eager/c_api.h"" - -namespace tensorflow { -namespace eager { - -// Sets the EagerContext owned by the current Python eager Context (see -// TFE_Py_SetEagerContext in pywrap_tfe.h). This is always called in tandem with -// TFE_Py_SetEagerContext (but not called by it, because its py_context -// argument is opaque). -// -// Do not use this function in production. It is only intended for testing. -// (see _reset_context in context.py). -// -// Not thread-safe. -void TFE_Py_SetCEagerContext(TFE_Context* ctx); - -// Returns the EagerContext owned by the current Python eager Context (see -// TFE_Py_SetEagerContext in pywrap_tfe.h). -// -// Not thread-safe. -TFE_Context* GetCEagerContext(); - -} // namespace eager -} // namespace tensorflow - -#endif // TENSORFLOW_PYTHON_EAGER_EAGER_CONTEXT_H_ ",0,train 58a58794c915d70b4429eb5b80e21ba59d0f84f3,tensorflow/tensorflow,"Break Python-independent logic out of pywrap_tfe_src, to avoid cpython deps into C++-only targets. PiperOrigin-RevId: 424613840 Change-Id: I604e4d2a0dc79f0b675a91ff7cccc0820104e401",pywrap_tfe.h,"@@ -384,6 +384,23 @@ PyObject* TFE_Py_SetEagerContext(PyObject* py_context); // some point. PyObject* GetPyEagerContext(); +// Sets the EagerContext owned by the current Python eager Context (see +// TFE_Py_SetEagerContext). This is always called in tandem with +// TFE_Py_SetEagerContext (but not called by it, because its py_context +// argument is opaque). +// +// Do not use this function in production. It is only intended for testing. +// (see _reset_context in context.py). +// +// Not thread-safe. +void TFE_Py_SetCEagerContext(TFE_Context* ctx); + +// Returns the EagerContext owned by the current Python eager Context (see +// TFE_Py_SetEagerContext). +// +// Not thread-safe. +TFE_Context* GetCEagerContext(); + // These are exposed since there is SWIG code that calls these. // Returns a pre-allocated status if it exists. TF_Status* GetStatus(); ",0,train 58a58794c915d70b4429eb5b80e21ba59d0f84f3,tensorflow/tensorflow,"Break Python-independent logic out of pywrap_tfe_src, to avoid cpython deps into C++-only targets. PiperOrigin-RevId: 424613840 Change-Id: I604e4d2a0dc79f0b675a91ff7cccc0820104e401",pywrap_tfe_src.cc,"@@ -4004,10 +4004,19 @@ namespace { // object currently active. This object is opaque and wrapped inside a Python // Capsule. However, the EagerContext object it holds is tracked by the // global_c_eager_context object. -// Also see eager_context.cc. PyObject* global_py_eager_context = nullptr; + +// This object tracks the EagerContext owned by global_py_eager_context. Since +// the vast majority of the Python API is dependent on that +// global_py_eager_context (including memory management), the Py object owns the +// C object, so this pointer is non-owning. +TFE_Context* global_c_eager_context = nullptr; } // namespace +void TFE_Py_SetCEagerContext(TFE_Context* ctx) { global_c_eager_context = ctx; } + +TFE_Context* GetCEagerContext() { return global_c_eager_context; } + PyObject* TFE_Py_SetEagerContext(PyObject* py_context) { Py_XDECREF(global_py_eager_context); global_py_eager_context = PyWeakref_NewRef(py_context, nullptr); ",0,train 58a58794c915d70b4429eb5b80e21ba59d0f84f3,tensorflow/tensorflow,"Break Python-independent logic out of pywrap_tfe_src, to avoid cpython deps into C++-only targets. PiperOrigin-RevId: 424613840 Change-Id: I604e4d2a0dc79f0b675a91ff7cccc0820104e401",tfe_wrapper.cc,"@@ -38,7 +38,6 @@ limitations under the License. #include ""tensorflow/c/tf_status_helper.h"" #include ""tensorflow/compiler/jit/flags.h"" #include ""tensorflow/compiler/jit/get_compiler_ir.h"" -#include ""tensorflow/python/eager/eager_context.h"" #include ""tensorflow/python/eager/pywrap_tensor_conversion.h"" #include ""tensorflow/python/eager/pywrap_tfe.h"" #include ""tensorflow/python/lib/core/py_exception_registry.h"" @@ -1202,8 +1201,7 @@ PYBIND11_MODULE(_pywrap_tfe, m) { return tensorflow::PyoOrThrow(TFE_Py_SetEagerContext(o.ptr())); }); m.def(""TFE_Py_SetCEagerContext"", [](const py::handle& ctx) { - tensorflow::eager::TFE_Py_SetCEagerContext( - tensorflow::InputTFE_Context(ctx)); + TFE_Py_SetCEagerContext(tensorflow::InputTFE_Context(ctx)); }); m.def(""TFE_Py_RegisterVSpace"", [](const py::handle& o) { return tensorflow::PyoOrThrow(TFE_Py_RegisterVSpace(o.ptr())); ",0,train 9614961027fbf30b4489054bb898056f7c0fda8e,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2021-04-16 PiperOrigin-RevId: 368805037 Change-Id: Idc7633582a2d8e70367934b6e5ed40d0da216229",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 4, 15) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 4, 16) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,test 7c642ef7f713a53b8c04730b33a4d55da8915ac1,tensorflow/tensorflow,"[tf.data] Fix noisy warning. (#22778) PiperOrigin-RevId: 215607171",dataset_ops.py,"@@ -1831,9 +1831,10 @@ class StructuredFunctionWrapper(object): flat_classes.append(component) flat_shapes.append(component) flat_types.append(component) - if t.options() is not None: # pylint: disable=protected-access - warnings.warn(""Encountered a nested dataset with options. These "" - ""options will not be applied to the outer dataset."") + if t.options() != Options(): + warnings.warn(""Encountered a nested dataset with non-default "" + ""options. These options will not be propagated to "" + ""the outer dataset."") else: try: t = ops.convert_to_tensor(t) ",0,test 612a5fb91ed6a6c229c5f4932307747699cabe90,tensorflow/tensorflow,"Clamp f32->f16 quantization to max/min range of float16 PiperOrigin-RevId: 339569171 Change-Id: Ic9695ef175aca449ec905b9d9e5d3893ca07fbd4",quantization_utils.cc,"@@ -502,9 +502,14 @@ TfLiteStatus QuantizeTensorFloat16(ModelT* model, TensorT* tensor) { // Transform float data to float16. std::vector quantized_buffer; quantized_buffer.resize(num_elements); - std::transform( - float_vector.begin(), float_vector.end(), quantized_buffer.begin(), - [](float a) { return Eigen::half_impl::float_to_half_rtne(a); }); + constexpr float kMaxFloat16Value = 65504.f; + constexpr float kMinFloat16Value = -65504.f; + std::transform(float_vector.begin(), float_vector.end(), + quantized_buffer.begin(), [=](float a) { + float clamped = std::min(std::max(a, kMinFloat16Value), + kMaxFloat16Value); + return Eigen::half_impl::float_to_half_rtne(clamped); + }); char* half_buffer = reinterpret_cast(quantized_buffer.data()); model->buffers[tensor->buffer]->data.assign( ",0,train 612a5fb91ed6a6c229c5f4932307747699cabe90,tensorflow/tensorflow,"Clamp f32->f16 quantization to max/min range of float16 PiperOrigin-RevId: 339569171 Change-Id: Ic9695ef175aca449ec905b9d9e5d3893ca07fbd4",quantization_utils_test.cc,"@@ -575,6 +575,42 @@ TEST_F(QuantizationUtilsTest, SymmetricQuantizeTensor) { EXPECT_EQ(quant_buffer_size * 4, float_buffer_size); } +TEST_F(QuantizationUtilsTest, QuantizeFloat16Clamp) { + // Create data. + auto model = absl::make_unique(); + auto subgraph = absl::make_unique(); + auto tensor = absl::make_unique(); + auto buffer = absl::make_unique(); + constexpr int kNumElements = 6; + const std::vector weights = {2.0, 1.0, 65504., 65505, -65504., -99999}; + auto weights_reinterpreted_data = + reinterpret_cast(weights.data()); + buffer->data.assign(weights_reinterpreted_data, + weights_reinterpreted_data + weights.size() * 4); + tensor->buffer = 0; + tensor->shape = {1, kNumElements}; + + // Wire the model. + model->subgraphs.push_back(std::move(subgraph)); + model->subgraphs[0]->tensors.push_back(std::move(tensor)); + model->buffers.push_back(std::move(buffer)); + + // Call and verify. + EXPECT_EQ( + QuantizeTensorFloat16(model.get(), model->subgraphs[0]->tensors[0].get()), + kTfLiteOk); + auto weightsf16 = reinterpret_cast( + model->buffers[model->subgraphs[0]->tensors[0]->buffer]->data.data()); + std::vector wf32(kNumElements); + std::transform(weightsf16, weightsf16 + 6, wf32.begin(), [](Eigen::half a) { + return Eigen::half_impl::half_to_float(a); + }); + + EXPECT_THAT(wf32, + ElementsAreArray({2.0, 1.0, 65504., 65504., -65504., -65504.})); + EXPECT_EQ(model->subgraphs[0]->tensors[0]->type, TensorType_FLOAT16); +} + TEST_F(QuantizationUtilsTest, QuantizeFloat16) { // Conv model has weights between 0 and 10. // Quantize the weights tensor. ",0,train eabf2ec12818c4e8ae7008ce14ed12ad38bd1537,tensorflow/tensorflow,"Work around a buggy get_config by ignoring its errors when checkpointing This is extra ""nice to have"" metadata, and otherwise it looks like a checkpointing error. Not worth bothering people about. PiperOrigin-RevId: 231318216",base.py,"@@ -846,11 +846,16 @@ class Checkpointable(object): return {} weak_self = weakref.ref(self) def _state_callback(): + """"""Serializes `self.get_config()` for saving."""""" dereferenced_self = weak_self() if dereferenced_self: - return json.dumps(dereferenced_self, - default=serialization.get_json_type, - sort_keys=True).encode(""utf8"") + try: + return json.dumps(dereferenced_self, + default=serialization.get_json_type, + sort_keys=True).encode(""utf8"") + except TypeError: + # Even if get_config worked objects may have produced garbage. + return """" else: return """" return {OBJECT_CONFIG_JSON_KEY: functools.partial( ",0,train eabf2ec12818c4e8ae7008ce14ed12ad38bd1537,tensorflow/tensorflow,"Work around a buggy get_config by ignoring its errors when checkpointing This is extra ""nice to have"" metadata, and otherwise it looks like a checkpointing error. Not worth bothering people about. PiperOrigin-RevId: 231318216",base_test.py,"@@ -83,6 +83,19 @@ class InterfaceTests(test.TestCase): with self.assertRaisesRegexp(AssertionError, ""foo_attr""): status.assert_consumed() + def testBuggyGetConfig(self): + + class NotSerializable(object): + pass + + class GetConfigRaisesError(base.Checkpointable): + + def get_config(self): + return NotSerializable() + + util.Checkpoint(obj=GetConfigRaisesError()).save( + os.path.join(self.get_temp_dir(), ""ckpt"")) + if __name__ == ""__main__"": ops.enable_eager_execution() ",0,train eaa3e88ec3322fd0aa4224040215c3c29a752613,tensorflow/tensorflow,"[XLA] Show metric name in categories table header. Instead of ********** microseconds above estimated optimum report ********** [...] ********** categories table ********** The left hand side numbers are microseconds above estimated optimum. [...] we now print ********** microseconds above estimated optimum report ********** [...] ********** categories table for microseconds above estimated optimum ********** [...] which I think is more explicit and harder to misread. PiperOrigin-RevId: 207325046",metric_table_report.cc,"@@ -134,8 +134,7 @@ void MetricTableReport::AppendHeader() { void MetricTableReport::AppendCategoryTable() { const std::vector categories = MakeCategories(&entries_); - AppendLine(""********** categories table **********""); - AppendLine(""The left hand side numbers are "", metric_name_, "".""); + AppendLine(""********** categories table for "", metric_name_, "" **********""); AppendLine(); double metric_sum = UnaccountedMetric(); @@ -185,8 +184,8 @@ void MetricTableReport::AppendCategoryTable() { } void MetricTableReport::AppendEntryTable() { - AppendLine(""********** "", entry_name_, "" table **********""); - AppendLine(""The left hand side numbers are "", metric_name_, "".""); + AppendLine(""********** "", entry_name_, "" table for "", metric_name_, + "" **********""); AppendLine(); double metric_sum = UnaccountedMetric(); ",0,train 27e6c7b49f4558dfc4bd59a9c492bf4f390a77da,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2019-12-16 PiperOrigin-RevId: 285720630 Change-Id: Ib744d5f7de70a6c6d73dd1a386712e404d0c2b99",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 12, 15) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 12, 16) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train 1d6f79ac5f55fe49b3410eb7c6e6a4269e53f9a3,tensorflow/tensorflow,"Cleanup: remove unused lambda captures, pass string view by value PiperOrigin-RevId: 233420932",bigtable_kernels.cc,"@@ -19,7 +19,6 @@ limitations under the License. #include ""tensorflow/core/lib/core/threadpool.h"" namespace tensorflow { - namespace { class BigtableClientOp : public OpKernel { @@ -341,8 +340,8 @@ class ToBigtableOp : public AsyncOpKernel { } template - Status ParseScalarArgument(OpKernelContext* ctx, - const StringPiece& argument_name, T* output) { + Status ParseScalarArgument(OpKernelContext* ctx, StringPiece argument_name, + T* output) { const Tensor* argument_t; TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t)); if (!TensorShapeUtils::IsScalar(argument_t->shape())) { @@ -360,5 +359,4 @@ REGISTER_KERNEL_BUILDER(Name(""DatasetToBigtable"").Device(DEVICE_CPU), } // namespace } // namespace data - } // namespace tensorflow ",0,train 1d6f79ac5f55fe49b3410eb7c6e6a4269e53f9a3,tensorflow/tensorflow,"Cleanup: remove unused lambda captures, pass string view by value PiperOrigin-RevId: 233420932",gdr_collective_executor_mgr.cc,"@@ -100,8 +100,7 @@ class CollectiveRemoteAccessDistributed : public CollectiveRemoteAccessLocal { // Logic to be executed on the RecvBufAsync callback. auto recv_buf_callback = [this, state, peer_task, to_device, to_alloc_attr, - to_device_ctx, to_tensor, dev_to_dev_stream_index, - done](const Status& s) { + to_device_ctx, to_tensor, done](const Status& s) { if (s.ok()) { remote_memory_manager_->TensorFromTransportOptions( to_tensor, state->call->resp_.transport_options(), to_device, ",0,train 1d6f79ac5f55fe49b3410eb7c6e6a4269e53f9a3,tensorflow/tensorflow,"Cleanup: remove unused lambda captures, pass string view by value PiperOrigin-RevId: 233420932",model_ops.cc,"@@ -304,7 +304,7 @@ class TraverseTreeV4Op : public OpKernel { auto worker_threads = context->device()->tensorflow_cpu_worker_threads(); int num_threads = worker_threads->num_threads; const int64 costPerTraverse = 500; - auto traverse = [this, &set_leaf_ids, &data_set, decision_tree_resource, + auto traverse = [&set_leaf_ids, &data_set, decision_tree_resource, num_data](int64 start, int64 end) { CHECK(start <= end); CHECK(end <= num_data); ",0,train 1d6f79ac5f55fe49b3410eb7c6e6a4269e53f9a3,tensorflow/tensorflow,"Cleanup: remove unused lambda captures, pass string view by value PiperOrigin-RevId: 233420932",stats_ops.cc,"@@ -307,7 +307,7 @@ class ProcessInputOp : public OpKernel { // from a digits run on local desktop. Heuristics might be necessary // if it really matters that much. const int64 costPerUpdate = 1000; - auto update = [this, &target, &leaf_ids_tensor, &num_targets, &data_set, + auto update = [&target, &leaf_ids_tensor, &num_targets, &data_set, fertile_stats_resource, &locks, &set_lock, &ready_to_split, num_data](int64 start, int64 end) { CHECK(start <= end); @@ -317,7 +317,7 @@ class ProcessInputOp : public OpKernel { static_cast(end), &ready_to_split); }; - auto update_collated = [this, &target, &num_targets, fertile_stats_resource, + auto update_collated = [&target, &num_targets, fertile_stats_resource, tree_resource, &leaf_examples, &set_lock, &ready_to_split, &data_set, num_leaves](int64 start, int64 end) { ",0,train ed64647b2b408c9b7c84af796793e6c32ab5f23e,tensorflow/tensorflow,"Add the support for IteratorGetNextAsOptionalOp. PiperOrigin-RevId: 309505494 Change-Id: I939fb769f8338d99402592858e4d7b8e7a1aa56c",group_events.cc,"@@ -434,6 +434,9 @@ std::vector CreateInterThreadConnectInfoList() { {HostEventType::kExecutorStateProcess, HostEventType::kIteratorGetNextOp, {StatType::kStepId, StatType::kIterNum}}, + {HostEventType::kExecutorStateProcess, + HostEventType::kIteratorGetNextAsOptionalOp, + {StatType::kStepId, StatType::kIterNum}}, {HostEventType::kKernelLaunch, HostEventType::kKernelExecute, {StatType::kCorrelationId}}, ",0,train ed64647b2b408c9b7c84af796793e6c32ab5f23e,tensorflow/tensorflow,"Add the support for IteratorGetNextAsOptionalOp. PiperOrigin-RevId: 309505494 Change-Id: I939fb769f8338d99402592858e4d7b8e7a1aa56c",xplane_schema.cc,"@@ -102,6 +102,7 @@ const HostEventTypeMap& GetHostEventTypeMap() { {""LocalExecutable::Execute"", kLocalExecutableExecute}, // tf.data related. {""IteratorGetNextOp::DoCompute"", kIteratorGetNextOp}, + {""IteratorGetNextAsOptionalOp::DoCompute"", kIteratorGetNextAsOptionalOp}, // Virtual events for grouping. {""HostTrainingLoopIteration"", kHostTrainingLoopIteration}, {""AsyncExecutorTraceContext"", kAsyncExecutorTraceContext}, ",0,train ed64647b2b408c9b7c84af796793e6c32ab5f23e,tensorflow/tensorflow,"Add the support for IteratorGetNextAsOptionalOp. PiperOrigin-RevId: 309505494 Change-Id: I939fb769f8338d99402592858e4d7b8e7a1aa56c",xplane_schema.h,"@@ -100,6 +100,7 @@ enum HostEventType { kLocalExecutableExecute, // tf.data related. kIteratorGetNextOp, + kIteratorGetNextAsOptionalOp, // Virtual events for grouping. kHostTrainingLoopIteration, kAsyncExecutorTraceContext, ",0,train bc87c28c60dddc6137b11f8a1fd31fa79bcf0c1f,tensorflow/tensorflow,"Register fp16 Reduce min on GPU. PiperOrigin-RevId: 177274800",reduction_ops_min.cc,"@@ -50,6 +50,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS); .TypeConstraint(""Tidx"") \ .HostMemory(""reduction_indices""), \ ReductionOp>); +REGISTER_GPU_KERNELS(Eigen::half); REGISTER_GPU_KERNELS(float); REGISTER_GPU_KERNELS(double); ",0,train bc87c28c60dddc6137b11f8a1fd31fa79bcf0c1f,tensorflow/tensorflow,"Register fp16 Reduce min on GPU. PiperOrigin-RevId: 177274800",reduction_ops_test.cc,"@@ -174,6 +174,11 @@ static void BM_Min2DToScalarGPU(int iters, int num_x, int num_y) { } BENCHMARK(BM_Min2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192); +static void BM_Min2DToScalarGPUHalf(int iters, int num_x, int num_y) { + ReduceToScalar(iters, ""gpu"", ""Min"", num_x, num_y); +} +BENCHMARK(BM_Min2DToScalarGPUHalf)->RangePair(2048, 8192, 2048, 8192); + static void BM_Bool2DToScalarGPU(int iters, int num_x, int num_y) { ReduceToScalar(iters, ""gpu"", ""All"", num_x, num_y); } ",0,train 163fd2ea39f550f45717dd70f26ebebdaf74411e,tensorflow/tensorflow,"Remove obsolete BernoulliWithSigmoidProbs (#16846) As was pointed out by 9485, BernoulliWithSigmoidProbs is covered by Bernoulli and is obsolete. This fix removes BernoulliWithSigmoidProbs. This fix closes 9485. Signed-off-by: Yong Tang ",__init__.py,"@@ -97,7 +97,6 @@ _allowed_symbols = [ 'Autoregressive', 'Binomial', 'Bernoulli', - 'BernoulliWithSigmoidProbs', 'Beta', 'BetaWithSoftplusConcentration', 'Categorical', ",0,train 163fd2ea39f550f45717dd70f26ebebdaf74411e,tensorflow/tensorflow,"Remove obsolete BernoulliWithSigmoidProbs (#16846) As was pointed out by 9485, BernoulliWithSigmoidProbs is covered by Bernoulli and is obsolete. This fix removes BernoulliWithSigmoidProbs. This fix closes 9485. Signed-off-by: Yong Tang ",bernoulli_test.py,"@@ -291,12 +291,6 @@ class BernoulliTest(test.TestCase): [np.sqrt(var(0.5)), np.sqrt(var(0.4))]], dtype=np.float32)) - def testBernoulliWithSigmoidProbs(self): - p = np.array([8.3, 4.2]) - dist = bernoulli.BernoulliWithSigmoidProbs(logits=p) - with self.test_session(): - self.assertAllClose(math_ops.sigmoid(p).eval(), dist.probs.eval()) - def testBernoulliBernoulliKL(self): with self.test_session() as sess: batch_size = 6 ",0,train 163fd2ea39f550f45717dd70f26ebebdaf74411e,tensorflow/tensorflow,"Remove obsolete BernoulliWithSigmoidProbs (#16846) As was pointed out by 9485, BernoulliWithSigmoidProbs is covered by Bernoulli and is obsolete. This fix removes BernoulliWithSigmoidProbs. This fix closes 9485. Signed-off-by: Yong Tang ",bernoulli.py,"@@ -167,26 +167,6 @@ class Bernoulli(distribution.Distribution): return math_ops.cast(self.probs > 0.5, self.dtype) -class BernoulliWithSigmoidProbs(Bernoulli): - """"""Bernoulli with `probs = nn.sigmoid(logits)`."""""" - - def __init__(self, - logits=None, - dtype=dtypes.int32, - validate_args=False, - allow_nan_stats=True, - name=""BernoulliWithSigmoidProbs""): - parameters = locals() - with ops.name_scope(name): - super(BernoulliWithSigmoidProbs, self).__init__( - probs=nn.sigmoid(logits, name=""sigmoid_probs""), - dtype=dtype, - validate_args=validate_args, - allow_nan_stats=allow_nan_stats, - name=name) - self._parameters = parameters - - @kullback_leibler.RegisterKL(Bernoulli, Bernoulli) def _kl_bernoulli_bernoulli(a, b, name=None): """"""Calculate the batched KL divergence KL(a || b) with a and b Bernoulli. ",0,train dffd32796e53749d4d3ce90d901b6c04c259d69f,tensorflow/tensorflow,"[XLA] Don't run a pass if it has not been changed in more than 3 fixed pass iterations. PiperOrigin-RevId: 386587289 Change-Id: I217b75c25fa53d1c5030c4668e395bbdb2ed88b9",hlo_pass_fix.h,"@@ -52,9 +52,6 @@ class HloPassFix : public Pass { StatusOr Run(HloModule* module) override { RunState run_state(module); TF_RETURN_IF_ERROR(RunToFixPoint(module, &run_state)); - if (Pass::IsPassPipeline()) { - Pass::ResetPassPipeline(); - } return !run_state.changed.empty(); } ",0,train dffd32796e53749d4d3ce90d901b6c04c259d69f,tensorflow/tensorflow,"[XLA] Don't run a pass if it has not been changed in more than 3 fixed pass iterations. PiperOrigin-RevId: 386587289 Change-Id: I217b75c25fa53d1c5030c4668e395bbdb2ed88b9",hlo_pass_interface.h,"@@ -92,8 +92,6 @@ class HloPassInterface { virtual StatusOr RunOnModuleGroup(HloModuleGroup* module_group) = 0; virtual bool IsPassPipeline() { return false; } - - virtual void ResetPassPipeline() {} }; // Base class for passes which are module-scoped. ",0,train dffd32796e53749d4d3ce90d901b6c04c259d69f,tensorflow/tensorflow,"[XLA] Don't run a pass if it has not been changed in more than 3 fixed pass iterations. PiperOrigin-RevId: 386587289 Change-Id: I217b75c25fa53d1c5030c4668e395bbdb2ed88b9",hlo_pass_pipeline.cc,"@@ -16,9 +16,7 @@ limitations under the License. #include ""tensorflow/compiler/xla/service/hlo_pass_pipeline.h"" #include -#include -#include ""absl/algorithm/container.h"" #include ""absl/container/flat_hash_map.h"" #include ""absl/container/flat_hash_set.h"" #include ""absl/strings/str_format.h"" @@ -122,10 +120,6 @@ void SetInstructionMetadata(HloModuleGroup& module_group) { } // namespace -void HloPassPipeline::ResetPassPipeline() { - absl::c_fill(pass_run_counts_since_change_, 0); -} - template Status HloPassPipeline::RunInvariantCheckers( HloT* hlo, absl::string_view after_pass_name) { @@ -172,14 +166,10 @@ StatusOr HloPassPipeline::RunPassesInternal( bool changed = false; for (int i = 0; i < passes.size(); i++) { HloPassInterface* pass = passes[i]; - if (pass_run_counts_since_change_[i] > 3) { - VLOG(1) << "" Skipping HLO pass "" << passes[i]->name(); - continue; - } XLA_SCOPED_LOGGING_TIMER(absl::StrCat(""HLO pass: "", pass->name())); std::string pass_name = std::string(pass->name()); VLOG(1) << "" HLO pass "" << pass_name; - VLOG(3) << "" Module hash "" << hlo->Hash(); + VLOG(2) << "" Module hash "" << hlo->Hash(); if (!pass->IsPassPipeline()) { compilation_stats_->StartPass(pass_name); } @@ -196,13 +186,7 @@ StatusOr HloPassPipeline::RunPassesInternal( RecordPassEndMetadata(*hlo, pass_name, pass_changed); changed |= pass_changed; if (pass_changed) { - VLOG(1) << name() << "":"" << pass->name() << "" -> "" - << pass_run_counts_since_change_[i]; - if (pass_run_counts_since_change_[i] <= 3) { - pass_run_counts_since_change_[i] = 0; - } - } else { - ++pass_run_counts_since_change_[i]; + VLOG(3) << "" Pass caused changes "" << pass->name(); } TF_RETURN_IF_ERROR(RunInvariantCheckers(hlo, pass_name)); if (!pass->IsPassPipeline()) { ",0,train dffd32796e53749d4d3ce90d901b6c04c259d69f,tensorflow/tensorflow,"[XLA] Don't run a pass if it has not been changed in more than 3 fixed pass iterations. PiperOrigin-RevId: 386587289 Change-Id: I217b75c25fa53d1c5030c4668e395bbdb2ed88b9",hlo_pass_pipeline.h,"@@ -58,7 +58,6 @@ class HloPassPipeline : public HloPassInterface { CHECK(!run_called_) << ""AddPass cannot be called after Run""; auto pass = new T(std::forward(args)...); passes_.push_back(std::unique_ptr(pass)); - pass_run_counts_since_change_.push_back(0); return *pass; } @@ -86,8 +85,6 @@ class HloPassPipeline : public HloPassInterface { bool IsPassPipeline() override { return true; } - void ResetPassPipeline() override; - // Return size of passes_. int PassesSize() { return passes_.size(); } // Return reference to pass specified by index. @@ -137,8 +134,6 @@ class HloPassPipeline : public HloPassInterface { const string name_; std::vector> passes_; - // How many times has the pass run without chaning. - std::vector pass_run_counts_since_change_; std::vector> invariant_checkers_; bool run_called_ = false; ",0,train 589a2d431cf7f1e3479f2f581da0f69b761df165,tensorflow/tensorflow,"TensorFlow for NVIDIA Tegra devices with CUDA support (#14167) This commit enables CUDA support on compatible devices running Android such as the Nvidia TX1 and TX2 when using Makefile builds. Note that JetPack for Android is required to build/run Android TF binaries with CUDA support. This should be released by Nvidia in the near future.",register_types.h,"@@ -52,7 +52,7 @@ limitations under the License. #undef REGISTER_PARTITION */ -#if !defined(IS_MOBILE_PLATFORM) || defined(SUPPORT_SELECTIVE_REGISTRATION) +#if !defined(IS_MOBILE_PLATFORM) || defined(SUPPORT_SELECTIVE_REGISTRATION) || defined(NVIDIA_TEGRA) // All types are supported, so all macros are invoked. // ",0,train 589a2d431cf7f1e3479f2f581da0f69b761df165,tensorflow/tensorflow,"TensorFlow for NVIDIA Tegra devices with CUDA support (#14167) This commit enables CUDA support on compatible devices running Android such as the Nvidia TX1 and TX2 when using Makefile builds. Note that JetPack for Android is required to build/run Android TF binaries with CUDA support. This should be released by Nvidia in the near future.",cuda_diagnostics.cc,"@@ -232,7 +232,7 @@ port::StatusOr Diagnostician::FindDsoVersion() { result = StringToDriverVersion(version); } #else -#if !defined(PLATFORM_WINDOWS) +#if !defined(PLATFORM_WINDOWS) && !defined(NVIDIA_TEGRA) // Callback used when iterating through DSOs. Looks for the driver-interfacing // DSO and yields its version number into the callback data, when found. auto iterate_phdr = ",0,train 4b87b12c96c56a4c6485195ca5123b1a21636221,tensorflow/tensorflow,"Update run_v1_only test with proper reason. PiperOrigin-RevId: 322164638 Change-Id: I17ffbc5c396c8fa97e2a4dff8ada380795c52ba2",server_lib_same_variables_no_clear_test.py,"@@ -34,7 +34,8 @@ class SameVariablesNoClearTest(test.TestCase): # TODO(b/34465411): Starting multiple servers with different configurations # in the same test is flaky. Move this test case back into # ""server_lib_test.py"" when this is no longer the case. - @test_util.run_v1_only(""b/120545219"") + @test_util.run_v1_only( + ""This exercises tensor lookup via names which is not supported in V2."") def testSameVariablesNoClear(self): server = server_lib.Server.create_local_server() ",0,train 05a122df524904dd8869fb564cae083ad53f3c73,tensorflow/tensorflow,"TFLITE_WITH_RUY_GEMV uses CustomGEMV for float PiperOrigin-RevId: 292930831 Change-Id: I3786ba562af1cf5f3a8e000af4abac65696bf3a3",cpu_backend_gemm.h,"@@ -94,15 +94,19 @@ void Gemm(const MatrixParams& lhs_params, const LhsScalar* lhs_data, CpuBackendContext* context) { ruy::profiler::ScopeLabel label(""cpu_backend_gemm::Gemm""); ValidateParams(lhs_params, rhs_params, dst_params, params); -#ifndef TFLITE_WITH_RUY_GEMV - if (dst_params.cols == 1) { + bool do_custom_gemv = dst_params.cols == 1; +#ifdef TFLITE_WITH_RUY_GEMV + // Prefer a Ruy GEMM to Custom GEMV unless we are doing float math. + // TODO(b/148692500): Add float GEMV kernels to Ruy. + do_custom_gemv = do_custom_gemv && std::is_floating_point::value; +#endif + if (do_custom_gemv) { // GEMV case: try a custom fast GEMV path. if (detail::CustomGemv(lhs_params, lhs_data, rhs_params, rhs_data, dst_params, dst_data, params, context)) { return; } } -#endif ruy::profiler::ScopeLabel label2(""cpu_backend_gemm::Gemm: general GEMM""); GemmImpl::Run(lhs_params, lhs_data, rhs_params, rhs_data, ",0,train 647ab367ed13cd166577a62e5088c083face328f,tensorflow/tensorflow,"Add float16 and bfloat16 support for tf.image.rgb_to_hsv/tf.image.hsv_to_rgb This PR addresses the issue raised in 54855 where there was no float16 and bfloat16 support for tf.image.rgb_to_hsv/tf.image.hsv_to_rgb This PR fixes 54855. Signed-off-by: Yong Tang ",colorspace_op.cc,"@@ -116,6 +116,8 @@ class HSVToRGBOp : public OpKernel { template class HSVToRGBOp; TF_CALL_float(REGISTER_CPU); TF_CALL_double(REGISTER_CPU); +TF_CALL_half(REGISTER_CPU); +TF_CALL_bfloat16(REGISTER_CPU); #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) ",0,train 3526f05b16ed8ab00f4287b62b8b49589fbf7971,tensorflow/tensorflow,"Add detection_responder which allows each platform to process the person detection output in its own way. For example, sparkfun_edge lights up the yellow LED for no person and the green LED for person, and toggles the blue LED on each run. PiperOrigin-RevId: 257638242",detection_responder.cc,"@@ -0,0 +1,25 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h"" + +// This dummy implementation writes person and no person scores to the error +// console. Real applications will want to take some custom action instead, and +// should implement their own versions of this function. +void RespondToDetection(tflite::ErrorReporter* error_reporter, + uint8_t person_score, uint8_t no_person_score) { + error_reporter->Report(""person score:%d no person score %d"", person_score, + no_person_score); +} ",0,train 3526f05b16ed8ab00f4287b62b8b49589fbf7971,tensorflow/tensorflow,"Add detection_responder which allows each platform to process the person detection output in its own way. For example, sparkfun_edge lights up the yellow LED for no person and the green LED for person, and toggles the blue LED on each run. PiperOrigin-RevId: 257638242",detection_responder.h,"@@ -0,0 +1,34 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Provides an interface to take an action based on the output from the person +// detection model. + +#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_VISION_DETECTION_RESPONDER_H_ +#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_VISION_DETECTION_RESPONDER_H_ + +#include ""tensorflow/lite/c/c_api_internal.h"" +#include ""tensorflow/lite/experimental/micro/micro_error_reporter.h"" + +// Called every time the results of a person detection run are available. The +// `person_score` has the numerical confidence that the captured image contains +// a person, and `no_person_score` has the numerical confidence that the image +// does not contain a person. Typically if person_score > no person score, the +// image is considered to contain a person. This threshold may be adjusted for +// particular applications. +void RespondToDetection(tflite::ErrorReporter* error_reporter, + uint8_t person_score, uint8_t no_person_score); + +#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_VISION_DETECTION_RESPONDER_H_ ",0,train 3526f05b16ed8ab00f4287b62b8b49589fbf7971,tensorflow/tensorflow,"Add detection_responder which allows each platform to process the person detection output in its own way. For example, sparkfun_edge lights up the yellow LED for no person and the green LED for person, and toggles the blue LED on each run. PiperOrigin-RevId: 257638242",detection_responder_test.cc,"@@ -0,0 +1,34 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h"" + +#include ""tensorflow/lite/experimental/micro/testing/micro_test.h"" +#include ""tensorflow/lite/experimental/micro/testing/test_utils.h"" + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(TestCallability) { + tflite::MicroErrorReporter micro_error_reporter; + tflite::ErrorReporter* error_reporter = µ_error_reporter; + + // This will have external side-effects (like printing to the debug console + // or lighting an LED) that are hard to observe, so the most we can do is + // make sure the call doesn't crash. + RespondToDetection(error_reporter, 100, 200); + RespondToDetection(error_reporter, 200, 100); +} + +TF_LITE_MICRO_TESTS_END ",0,train 3526f05b16ed8ab00f4287b62b8b49589fbf7971,tensorflow/tensorflow,"Add detection_responder which allows each platform to process the person detection output in its own way. For example, sparkfun_edge lights up the yellow LED for no person and the green LED for person, and toggles the blue LED on each run. PiperOrigin-RevId: 257638242",main.cc,"@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include ""tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h"" #include ""tensorflow/lite/experimental/micro/examples/micro_vision/image_provider.h"" #include ""tensorflow/lite/experimental/micro/examples/micro_vision/model_settings.h"" #include ""tensorflow/lite/experimental/micro/examples/micro_vision/person_detect_model_data.h"" @@ -69,12 +70,10 @@ int main(int argc, char* argv[]) { TfLiteTensor* output = interpreter.output(0); - // Log the person score and no person score. + // Process the inference results. uint8_t person_score = output->data.uint8[kPersonIndex]; uint8_t no_person_score = output->data.uint8[kNotAPersonIndex]; - error_reporter->Report( - ""person data. person score: %d, no person score: %d\n"", person_score, - no_person_score); + RespondToDetection(error_reporter, person_score, no_person_score); } return 0; ",0,train 3526f05b16ed8ab00f4287b62b8b49589fbf7971,tensorflow/tensorflow,"Add detection_responder which allows each platform to process the person detection output in its own way. For example, sparkfun_edge lights up the yellow LED for no person and the green LED for person, and toggles the blue LED on each run. PiperOrigin-RevId: 257638242",detection_responder.cc,"@@ -0,0 +1,54 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h"" + +#include ""am_bsp.h"" // NOLINT + +// This implementation will light up LEDs on the board in response to the +// inference results. +void RespondToDetection(tflite::ErrorReporter* error_reporter, + uint8_t person_score, uint8_t no_person_score) { + static bool is_initialized = false; + if (!is_initialized) { + // Setup LED's as outputs. Leave red LED alone since that's an error + // indicator for sparkfun_edge in image_provider. + am_hal_gpio_pinconfig(AM_BSP_GPIO_LED_BLUE, g_AM_HAL_GPIO_OUTPUT_12); + am_hal_gpio_pinconfig(AM_BSP_GPIO_LED_GREEN, g_AM_HAL_GPIO_OUTPUT_12); + am_hal_gpio_pinconfig(AM_BSP_GPIO_LED_YELLOW, g_AM_HAL_GPIO_OUTPUT_12); + is_initialized = true; + } + + // Toggle the blue LED every time an inference is performed. + static int count = 0; + if (++count & 1) { + am_hal_gpio_output_set(AM_BSP_GPIO_LED_BLUE); + } else { + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_BLUE); + } + + // Turn on the green LED if a person was detected. Turn on the yellow LED + // otherwise. + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_YELLOW); + am_hal_gpio_output_clear(AM_BSP_GPIO_LED_GREEN); + if (person_score > no_person_score) { + am_hal_gpio_output_set(AM_BSP_GPIO_LED_GREEN); + } else { + am_hal_gpio_output_set(AM_BSP_GPIO_LED_YELLOW); + } + + error_reporter->Report(""person score:%d no person score %d"", person_score, + no_person_score); +} ",0,train 0047edb7b1d35d588c3e3fb5bfe7de60c67c351c,tensorflow/tensorflow,"Return failure for ops with zero results in TF dialect folding hook Currently, the hook returns success for ops like tf.Yield that doesn't have any side effect and zero results. That will cause canonicalizer or any other pass using the greedy rewriter to not converge. Canonicalizer pass doesn't handle return value of the greedy rewriter so this change doesn't have any observable effect for canonicalizer. Added test is passing before this change as well because the canonicalizer pass ignores convergence issue in the rewriter. Ran into this issue while using greedy rewriter to replace _XlaHostComputeMlir op by XlaHostCompute op. PiperOrigin-RevId: 362152694 Change-Id: I30a3829de7c0a75fa8f0b137246a50aedc0db918",constant_fold.cc,"@@ -72,8 +72,11 @@ LogicalResult ConstantFoldFallbackHook( Operation* inst, ArrayRef operands, SmallVectorImpl& results) { // NOLINT // Instructions with side effects should not be constant folded to preserve - // the original semantics. - if (inst->hasTrait() || + // the original semantics. Ops that have no side effect and zero results but + // could be folded should have a custom folder instead of relying on the + // TensorFlow folding hook. + if (inst->getNumResults() == 0 || + inst->hasTrait() || inst->getNumRegions() != 0 || !MemoryEffectOpInterface::hasNoEffect(inst)) return failure(); ",0,train 401070d057969b522e0ef176a0adc5e14eb74979,tensorflow/tensorflow,"[TF:TRT] Change `AsyncHelper` to call callback function once. Macros such as OP_REQUIRES_OK_ASYNC requires AsyncHelper to have this operator, which can be used to invoke the callback function. However, in our case, we only need to invoke the callback function when the object is destructed. PiperOrigin-RevId: 364582970 Change-Id: I93d75d725eb0851b1122aaabe2b159fb60ad42a0",trt_engine_op.cc,"@@ -115,11 +115,11 @@ class AsyncHelper : public core::RefCounted { public: AsyncHelper(AsyncOpKernel::DoneCallback done) : done_(done) {} - ~AsyncHelper() override { this->operator()(); } + ~AsyncHelper() override { done_(); } - void operator()() { - done_(); - } + // The function call operator is used at error handling. However, the callback + // is deferred to destruction. + void operator()() {} private: AsyncOpKernel::DoneCallback done_; @@ -502,8 +502,9 @@ void TRTEngineOp::ExecuteNativeSegment(OpKernelContext* ctx, allow_soft_placement_, ctx->num_inputs(), ctx->num_outputs()); OP_REQUIRES_OK_ASYNC(ctx, status_or_handle.status(), *helper); - native_execution_func_handle_ = status_or_handle.ValueOrDie(); + native_execution_func_handle_ = *status_or_handle; } + auto lib = ctx->function_library(); FunctionLibraryRuntime::Options opts; opts.rendezvous = ctx->rendezvous(); ",0,train f66f384729b2a2f70fd01902f49b0b7a95be9f26,tensorflow/tensorflow,"Make CHLO->HLO patterns extend OpRewritePattern vs OpConversionPattern. * In the absence of type conversion, this is more generally compatible (ie. with the greedy rewriter). * Consistent with the rest of the legalize_tf patterns. PiperOrigin-RevId: 311209137 Change-Id: I3a409dbc307c141753c73ae7731276c61a2728d0",chlo_legalize_to_hlo.cc,"@@ -33,24 +33,23 @@ namespace { // Converts binary ops that statically are determined to not broadcast directly // to the corresponding xla_hlo non-broadcasting op. template -struct ConvertTrivialNonBroadcastBinaryOp - : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; - LogicalResult matchAndRewrite( - ChloOpTy op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { +struct ConvertTrivialNonBroadcastBinaryOp : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(ChloOpTy op, + PatternRewriter &rewriter) const override { // Only rewrite for statically determinable non-broadcasting cases. - auto lhs = operands[0].getType().dyn_cast(); - auto rhs = operands[1].getType().dyn_cast(); - if (!lhs || !rhs) return failure(); + auto lhs_type = op.lhs().getType().template dyn_cast(); + auto rhs_type = op.rhs().getType().template dyn_cast(); + if (!lhs_type || !rhs_type) return failure(); // Requires rank broadcast. - if (lhs.getRank() != rhs.getRank()) return failure(); + if (lhs_type.getRank() != rhs_type.getRank()) return failure(); // Any dynamic dimension may require broadcasting and requires more // analysis. - if (!lhs.hasStaticShape() || !rhs.hasStaticShape()) return failure(); + if (!lhs_type.hasStaticShape() || !rhs_type.hasStaticShape()) + return failure(); - for (auto extents : llvm::zip(lhs.getShape(), rhs.getShape())) { + for (auto extents : llvm::zip(lhs_type.getShape(), rhs_type.getShape())) { auto lhs_extent = std::get<0>(extents); auto rhs_extent = std::get<1>(extents); if (lhs_extent != rhs_extent) { @@ -58,9 +57,8 @@ struct ConvertTrivialNonBroadcastBinaryOp } } - rewriter.replaceOp( - op, {Adaptor::CreateOp(op, op.getResult().getType(), operands[0], - operands[1], rewriter)}); + rewriter.replaceOp(op, {Adaptor::CreateOp(op, op.getResult().getType(), + op.lhs(), op.rhs(), rewriter)}); return success(); } }; @@ -83,14 +81,13 @@ struct ConvertTrivialNonBroadcastBinaryOp // Whether that is of any practical benefit remains to be seen. template struct ConvertRankedDynamicBroadcastBinaryOp - : public OpConversionPattern { - using OpConversionPattern::OpConversionPattern; - LogicalResult matchAndRewrite( - ChloOpTy op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(ChloOpTy op, + PatternRewriter &rewriter) const override { // Only support ranked operands. - Value lhs = operands[0]; - Value rhs = operands[1]; + Value lhs = op.lhs(); + Value rhs = op.rhs(); auto lhs_type = lhs.getType().dyn_cast(); auto rhs_type = rhs.getType().dyn_cast(); auto result_type = ",0,train fdbd02c8d7f07bd1207938662716fad8857dcd55,tensorflow/tensorflow,"Make moments numerically stable by default. Added tests for moments. Change: 144114955",nn_impl.py,"@@ -580,6 +580,9 @@ def moments(x, axes, shift=None, name=None, keep_dims=False): across `axes`. If `x` is 1-D and `axes = [0]` this is just the mean and variance of a vector. + Note: for numerical stability, when shift=None, the true mean + would be computed and used as shift. + When using these moments for batch normalization (see `tf.nn.batch_normalization`): @@ -592,8 +595,9 @@ def moments(x, axes, shift=None, name=None, keep_dims=False): axes: Array of ints. Axes along which to compute mean and variance. shift: A `Tensor` containing the value by which to shift the data for - numerical stability, or `None` if no shift is to be performed. A shift - close to the true mean provides the most numerically stable results. + numerical stability, or `None` in which case the true mean of the data is + used as shift. A shift close to the true mean provides the most + numerically stable results. name: Name used to scope the operations that compute the moments. keep_dims: produce moments with the same dimensionality as the input. @@ -605,10 +609,17 @@ def moments(x, axes, shift=None, name=None, keep_dims=False): # sufficient statistics. As a workaround we simply perform the operations # on 32-bit floats before converting the mean and variance back to fp16 y = math_ops.cast(x, dtypes.float32) if x.dtype == dtypes.float16 else x - shift = math_ops.cast(shift, dtypes.float32) if ( - shift is not None and x.dtype == dtypes.float16) else shift + if shift is None: + # Compute true mean while keeping the dims for proper broadcasting. + shift = array_ops.stop_gradient( + math_ops.reduce_mean(y, axes, keep_dims=True)) + else: + shift = math_ops.cast(shift, y.dtype) counts, m_ss, v_ss, shift = sufficient_statistics( y, axes, shift=shift, keep_dims=keep_dims, name=name) + # Reshape shift as needed. + shift = array_ops.reshape(shift, array_ops.shape(m_ss)) + shift.set_shape(m_ss.get_shape()) with ops.control_dependencies([counts, m_ss, v_ss]): mean, variance = normalize_moments(counts, m_ss, v_ss, shift, name=name) if x.dtype == dtypes.float16: ",0,train fdbd02c8d7f07bd1207938662716fad8857dcd55,tensorflow/tensorflow,"Make moments numerically stable by default. Added tests for moments. Change: 144114955",nn_test.py,"@@ -25,6 +25,7 @@ from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import nn_impl @@ -791,5 +792,78 @@ class CReluTest(test_lib.TestCase): self.assertAllClose(y, z, 1e-4) +class MomentsTest(test_lib.TestCase): + + def doOutputTest(self, input_shape, moments_axes, tol=1e-4): + for mu in [0.0, 1.0, 1e3]: + for sigma in [1.0, 0.1]: + for keep_dims in [True, False]: + input_values = np.random.rand(*input_shape) * sigma + mu + expected_mean = np.mean(input_values, axis=moments_axes, + keepdims=keep_dims) + expected_var = np.var(input_values, axis=moments_axes, + keepdims=keep_dims) + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + inputs = constant_op.constant(input_values, + shape=input_shape, + dtype=dtypes.float32) + mean, variance = nn_impl.moments(inputs, + moments_axes, + keep_dims=keep_dims) + + [mean, variance] = sess.run([mean, variance]) + # Make sure that there are no NaNs + self.assertFalse(np.isnan(mean).any()) + self.assertFalse(np.isnan(variance).any()) + self.assertAllClose(mean, expected_mean, rtol=tol, atol=tol) + self.assertAllClose(variance, expected_var, rtol=tol, atol=tol) + + def testOutput2DInput0(self): + self.doOutputTest((10, 300), (0,)) + + def testOutput2DInput1(self): + self.doOutputTest((10, 300), (1,)) + + def testOutput2DInput01(self): + self.doOutputTest((10, 300), (0, 1)) + + def testOutput4DInput0(self): + self.doOutputTest((10, 10, 10, 30), (0,)) + + def testOutput4DInput1(self): + self.doOutputTest((10, 10, 10, 30), (1,)) + + def testOutput4DInput3(self): + self.doOutputTest((10, 10, 10, 30), (3,)) + + def testOutput4DInput012(self): + self.doOutputTest((10, 10, 10, 30), (0, 1, 2)) + + def testOutput4DInput123(self): + self.doOutputTest((10, 10, 10, 30), (1, 2, 3)) + + def testUnstableOutputShiftNone(self): + input_shape = (10, 300) + moments_axes = (0, 1) + mu, sigma = 1e3, 0.1 + tol = 1e-3 + input_values = np.random.rand(*input_shape) * sigma + mu + expected_mean = np.mean(input_values, axis=moments_axes) + expected_var = np.var(input_values, axis=moments_axes) + + with self.test_session() as sess: + inputs = constant_op.constant(input_values, shape=input_shape, + dtype=dtypes.float32) + mean, variance = nn_impl.moments(inputs, moments_axes, shift=0.0) + + [mean, variance] = sess.run([mean, variance]) + # Make sure that there are no NaNs + self.assertFalse(np.isnan(mean).any()) + self.assertFalse(np.isnan(variance).any()) + self.assertAllClose(mean, expected_mean, rtol=tol, atol=tol) + # The variance is unstable + self.assertGreater(np.abs(variance - expected_var), 0.1) + if __name__ == ""__main__"": test_lib.main() ",0,train 425ef6cfe94d84a876694e38cef3e3814378410d,tensorflow/tensorflow,"Fix mlir error_util_test on windows. PiperOrigin-RevId: 300828339 Change-Id: If89febf00d5727bcecf1958ddb6b0ede5736b80c",error_util_test.cc,"@@ -58,7 +58,8 @@ TEST(ErrorUtilTest, StatusScopedDiagnosticHandler) { emitError(loc) << ""Second diagnostic message reported""; return tensorflow::errors::Internal(""Passed in error""); }; - Status s = StatusScopedDiagnosticHandler(&context).Combine(function()); + StatusScopedDiagnosticHandler ssdh(&context); + Status s = ssdh.Combine(function()); ASSERT_TRUE(tensorflow::errors::IsInternal(s)); EXPECT_THAT(s.error_message(), HasSubstr(""Passed in error"")); EXPECT_THAT(s.error_message(), HasSubstr(""Diagnostic message reported"")); ",0,train 7478c2798e212f8373d286e953ac90d6f524d1bb,tensorflow/tensorflow,"Support using worker/0 as the client in SetServerDef. PiperOrigin-RevId: 428611848 Change-Id: Iab846ebcfc46713cfdb09ec40432e14a6083dec6",context_distributed_manager.cc,"@@ -52,6 +52,7 @@ limitations under the License. #include ""tensorflow/core/distributed_runtime/remote_device.h"" #include ""tensorflow/core/distributed_runtime/server_lib.h"" #include ""tensorflow/core/distributed_runtime/session_mgr.h"" +#include ""tensorflow/core/distributed_runtime/worker_cache.h"" #include ""tensorflow/core/distributed_runtime/worker_env.h"" #include ""tensorflow/core/distributed_runtime/worker_interface.h"" #endif // !IS_MOBILE_PLATFORM @@ -548,6 +549,29 @@ Status UpdateContextWithServerDef(EagerContext* context, // Initialize remote eager workers. if (reset_context) { + const auto& config = server_def.default_session_config(); + const bool enable_coordination = + !config.experimental().coordination_config().service_type().empty(); + + if (enable_coordination) { + WorkerCacheInterface* worker_cache = server->master_env()->worker_cache; + LOG_AND_RETURN_IF_ERROR( + context->GetDistributedManager()->EnableCoordinationService( + config.experimental().coordination_config().service_type(), + server->worker_env(), server_def, worker_cache)); + std::unique_ptr client_cache; + LOG_AND_RETURN_IF_ERROR( + worker_cache->GetCoordinationClientCache(&client_cache)); + TF_RETURN_IF_ERROR( + context->GetDistributedManager() + ->GetCoordinationServiceAgent() + ->Initialize(server->worker_env()->env, server_def, + std::move(client_cache), + /*error_fn=*/[](Status s) { + LOG(ERROR) + << ""Coordination agent is set to error: "" << s; + })); + } const Status s = CreateRemoteContexts( context, remote_workers, context_id, context_view_id, keep_alive_secs, server_def, remote_eager_workers.get(), context->Executor().Async(), ",0,train 7478c2798e212f8373d286e953ac90d6f524d1bb,tensorflow/tensorflow,"Support using worker/0 as the client in SetServerDef. PiperOrigin-RevId: 428611848 Change-Id: Iab846ebcfc46713cfdb09ec40432e14a6083dec6",c_api_coordination_test.cc,"@@ -38,7 +38,7 @@ namespace { constexpr char kCoordinationServiceType[] = ""standalone""; -void EnableCoordinationService(tensorflow::ServerDef* server_def) { +void ConfigCoordinationService(tensorflow::ServerDef* server_def) { auto coord_config = server_def->mutable_default_session_config() ->mutable_experimental() ->mutable_coordination_config(); @@ -104,7 +104,7 @@ TEST(CAPI, MultiClientCoordinationService) { const int cluster_size = 3; tensorflow::ServerDef server_def = GetMultiClientServerDef(""worker"", cluster_size); - EnableCoordinationService(&server_def); + ConfigCoordinationService(&server_def); auto worker_thread_fn = [&](int worker_id) { tensorflow::ServerDef server_def_copy = server_def; // By default, server_def has task index set to 0. @@ -170,7 +170,7 @@ TEST(CAPI, MultiClientSetGetConfigInOp) { const int cluster_size = 3; tensorflow::ServerDef server_def = GetMultiClientServerDef(""worker"", cluster_size); - EnableCoordinationService(&server_def); + ConfigCoordinationService(&server_def); BlockingCounter finish_counter(cluster_size); auto worker_thread_fn = [&](int worker_id) { tensorflow::ServerDef server_def_copy = server_def; @@ -257,7 +257,7 @@ TEST(CAPI, MultiClientCoordinationSetGetConfigs) { const int cluster_size = 3; tensorflow::ServerDef server_def = GetMultiClientServerDef(""worker"", cluster_size); - EnableCoordinationService(&server_def); + ConfigCoordinationService(&server_def); tensorflow::BlockingCounter counter1(cluster_size); tensorflow::BlockingCounter counter2(cluster_size); tensorflow::BlockingCounter counter3(cluster_size); @@ -327,7 +327,7 @@ TEST(CAPI, MultiClientPropagateError) { const int cluster_size = 3; tensorflow::ServerDef server_def = GetMultiClientServerDef(""worker"", cluster_size); - EnableCoordinationService(&server_def); + ConfigCoordinationService(&server_def); // Barrier for initializing the cluster. tensorflow::BlockingCounter counter1(cluster_size); // Barrier for finishing executing operations on all workers. @@ -387,35 +387,29 @@ TEST(CAPI, MultiClientPropagateError) { thread_worker3.join(); } -TEST(CAPI, SingleClientSetGetConfigInOp) { +class SingleClientCoordinationServiceTest + : public ::testing::Test, + public ::testing::WithParamInterface {}; + +TEST_P(SingleClientCoordinationServiceTest, TestSetGetConfigInOp) { + const bool use_worker0_as_client = GetParam(); tensorflow::ServerDef server_def = GetServerDef(""worker"", 3); const char task0_name[] = ""/job:worker/replica:0/task:0/device:CPU:0""; const char task1_name[] = ""/job:worker/replica:0/task:1/device:CPU:0""; const char task2_name[] = ""/job:worker/replica:0/task:2/device:CPU:0""; - EnableCoordinationService(&server_def); - // Add localhost job for the remote client task - auto cluster = server_def.mutable_cluster(); - auto client_job = cluster->add_job(); - client_job->set_name(""localhost""); - const int client_port = tensorflow::testing::PickUnusedPortOrDie(); - client_job->mutable_tasks()->insert( - {0, strings::StrCat(""localhost:"", client_port)}); - server_def.set_job_name(""localhost""); - server_def.mutable_default_session_config() - ->mutable_experimental() - ->mutable_coordination_config() - ->set_service_leader(task0_name); - string serialized = server_def.SerializeAsString(); - + ConfigCoordinationService(&server_def); ServerFactory* factory; ASSERT_TRUE(ServerFactory::GetFactory(server_def, &factory).ok()); server_def.set_job_name(""worker""); server_def.set_task_index(0); std::unique_ptr w0; - ASSERT_TRUE( - factory->NewServer(server_def, ServerFactory::Options(), &w0).ok()); - ASSERT_TRUE(w0->Start().ok()); + if (!use_worker0_as_client) { + // Start a separate server for worker0 if it's not used as the client + ASSERT_TRUE( + factory->NewServer(server_def, ServerFactory::Options(), &w0).ok()); + ASSERT_TRUE(w0->Start().ok()); + } server_def.set_task_index(1); std::unique_ptr w1; ASSERT_TRUE( @@ -435,6 +429,23 @@ TEST(CAPI, SingleClientSetGetConfigInOp) { EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); TFE_DeleteContextOptions(opts); + server_def.set_task_index(0); + if (!use_worker0_as_client) { + // Add localhost job for the remote client task + auto cluster = server_def.mutable_cluster(); + auto client_job = cluster->add_job(); + client_job->set_name(""localhost""); + const int client_port = tensorflow::testing::PickUnusedPortOrDie(); + client_job->mutable_tasks()->insert( + {0, strings::StrCat(""localhost:"", client_port)}); + server_def.set_job_name(""localhost""); + } + server_def.mutable_default_session_config() + ->mutable_experimental() + ->mutable_coordination_config() + ->set_service_leader(task0_name); + const std::string serialized = server_def.SerializeAsString(); + TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status); EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); @@ -541,5 +552,12 @@ TEST(CAPI, SingleClientSetGetConfigInOp) { w2.release(); } +INSTANTIATE_TEST_SUITE_P(CAPI, SingleClientCoordinationServiceTest, + ::testing::Bool(), + [](const ::testing::TestParamInfo arg) { + return arg.param ? ""use_worker0_as_client"" + : ""use_remote_client""; + }); + } // namespace } // namespace tensorflow ",0,train 4dee31dc561f8101f4d1275c3640e5da38069215,tensorflow/tensorflow,Added doc in MobileNet for decode_predictions() and preprocess_input(),mobilenet.py,"@@ -436,9 +436,31 @@ def _depthwise_conv_block(inputs, @keras_export('keras.applications.mobilenet.preprocess_input') def preprocess_input(x, data_format=None): + """"""Preprocesses a numpy array encoding a batch of images. + + Arguments + x: A 4D numpy array consists of RGB values within [0, 255]. + + Returns + Preprocessed array. + """""" return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf') @keras_export('keras.applications.mobilenet.decode_predictions') def decode_predictions(preds, top=5): + """"""Decodes the prediction result from the model. + + Arguments + preds: Numpy tensor encoding a batch of predictions. + top: Integer, how many top-guesses to return. + + Returns + A list of lists of top class prediction tuples + `(class_name, class_description, score)`. + One list of tuples per sample in batch input. + + Raises + ValueError: In case of invalid shape of the `preds` array (must be 2D). + """""" return imagenet_utils.decode_predictions(preds, top=top) ",0,test 4d0a420c4b4d1fbe3e666bd377de2a40401177d2,tensorflow/tensorflow,Fix clang-format and pylint,convert_nodes.cc,"@@ -3856,8 +3856,10 @@ tensorflow::Status ConvertSegmentToGraphDef( marker_nodes.insert(node_name); auto seg_node = segment_def->add_node(); tensorflow::NodeDefBuilder builder(node_name, ""Identity""); - auto status = builder.Input(connection.inside_node_name, connection.inside_port, dtype) - .Finalize(seg_node); + auto status = + builder + .Input(connection.inside_node_name, connection.inside_port, dtype) + .Finalize(seg_node); VLOG(1) << ""Constructing output "" << node_name << "" for the edge "" << connection.inside_node_name << "":"" << connection.inside_port << "" -> "" << connection.outside_node_name << "":"" ",0,train 4d0a420c4b4d1fbe3e666bd377de2a40401177d2,tensorflow/tensorflow,Fix clang-format and pylint,topk_test.py,"@@ -18,10 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np - from tensorflow.contrib.tensorrt.test import tf_trt_integration_test_base as trt_test -from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import nn_ops ",0,train 66f4f4cf31b86b7dd20f10ce6d968348b502f2ee,tensorflow/tensorflow,"Automated g4 rollback of changelist 185072479 PiperOrigin-RevId: 185900165",constant_folding.cc,"@@ -1375,6 +1375,29 @@ void ConstantFolding::ReplaceOperationWithIdentity(int input_to_forward, graph_modified_ = true; } +void ConstantFolding::ReplaceOperationWithSnapshot(int input_to_forward, + NodeDef* node, + GraphDef* graph) { + node->set_op(""Snapshot""); + DataType dtype = node->attr().at(""T"").type(); + node->clear_attr(); + (*node->mutable_attr())[""T""].set_type(dtype); + + // Propagate the designated input through the Snapshot. + node->mutable_input()->SwapElements(0, input_to_forward); + // Add all other inputs as control dependencies. + for (int i = 1; i < node->input_size(); ++i) { + if (IsControlInput(node->input(i))) { + break; + } + const string ctrl_dep = + AddControlDependency(node->input(i), graph, node_map_.get()); + node_map_->UpdateInput(node->name(), node->input(i), ctrl_dep); + node->set_input(i, ctrl_dep); + } + graph_modified_ = true; +} + void ConstantFolding::ReplaceDivisionOfOnesByReciprocal(NodeDef* node, GraphDef* graph) { node->set_op(""Reciprocal""); @@ -1443,15 +1466,14 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, graph_modified_ = true; continue; } - const bool safe_to_use_shapes = - use_shape_info && (feed_nodes_.empty() || is_aggressive); + const bool is_mul = IsMul(*node); const bool is_matmul = IsMatMul(*node); const bool is_add = IsAdd(*node) || IsBiasAdd(*node); const bool is_sub = IsSub(*node); const bool is_any_div = IsAnyDiv(*node); // Simplify arithmetic operations with ones or zeros. - if (safe_to_use_shapes && + if (use_shape_info && (is_mul || is_matmul || is_add || is_sub || is_any_div) && properties.HasInputProperties(node->name()) && properties.HasOutputProperties(node->name())) { @@ -1475,7 +1497,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, ((is_mul && x_is_one) || (is_add && x_is_zero))) { // TODO(rmlarsen): Handle subtraction 0 - y. // 1 * y = y or 0 + y = y. - ReplaceOperationWithIdentity(1, node, output); + ReplaceOperationWithSnapshot(1, node, output); continue; } @@ -1495,9 +1517,9 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape); if (x_matches_output_shape && (((is_mul || is_any_div) && y_is_one) || - ((is_add || is_sub) && y_is_zero && is_aggressive))) { + ((is_add || is_sub) && y_is_zero))) { // x * 1 = x or x / 1 = x or x +/- 0 = x - ReplaceOperationWithIdentity(0, node, output); + ReplaceOperationWithSnapshot(0, node, output); continue; } @@ -1690,6 +1712,7 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* output) { + LOG(INFO) << ""Graph before: "" << item.graph.DebugString(); nodes_to_preserve_ = item.NodesToPreserve(); for (const auto& feed : item.feed) { feed_nodes_.insert(NodeName(feed.first)); @@ -1716,6 +1739,7 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, *output->mutable_library() = item.graph.library(); *output->mutable_versions() = item.graph.versions(); + LOG(INFO) << ""Graph after: "" << output->DebugString(); return Status::OK(); } ",0,train 66f4f4cf31b86b7dd20f10ce6d968348b502f2ee,tensorflow/tensorflow,"Automated g4 rollback of changelist 185072479 PiperOrigin-RevId: 185900165",constant_folding.h,"@@ -79,6 +79,8 @@ class ConstantFolding : public GraphOptimizer { bool IsZeros(const NodeDef& node) const; void ReplaceOperationWithIdentity(int input_to_forward, NodeDef* node, GraphDef* graph); + void ReplaceOperationWithSnapshot(int input_to_forward, NodeDef* node, + GraphDef* graph); Status ReplaceOperationWithConstant(double value, const TensorShapeProto& shape, NodeDef* node, GraphDef* graph); ",0,train 66f4f4cf31b86b7dd20f10ce6d968348b502f2ee,tensorflow/tensorflow,"Automated g4 rollback of changelist 185072479 PiperOrigin-RevId: 185900165",constant_folding_test.cc,"@@ -195,8 +195,7 @@ TEST_F(ConstantFoldingTest, NeutralElement) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch = {""addn"", ""matmul3"", ""matmul4""}; - ConstantFolding optimizer(RewriterConfig::AGGRESSIVE, - nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -214,11 +213,11 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ(""^zeros"", node.input(0)); EXPECT_EQ(""^y"", node.input(1)); } else if (name == ""mul3"") { - EXPECT_EQ(""Identity"", node.op()); + EXPECT_EQ(""Snapshot"", node.op()); EXPECT_EQ(""x"", node.input(0)); EXPECT_EQ(""^ones"", node.input(1)); } else if (name == ""mul4"") { - EXPECT_EQ(""Identity"", node.op()); + EXPECT_EQ(""Snapshot"", node.op()); EXPECT_EQ(""y"", node.input(0)); EXPECT_EQ(""^ones"", node.input(1)); } else if (name == ""mul5"") { @@ -230,7 +229,7 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ(""^zeros_1d"", node.input(0)); EXPECT_EQ(""^y"", node.input(1)); } else if (name == ""div1"") { - EXPECT_EQ(""Identity"", node.op()); + EXPECT_EQ(""Snapshot"", node.op()); EXPECT_EQ(""x"", node.input(0)); EXPECT_EQ(""^ones"", node.input(1)); } else if (name == ""div2"") { @@ -266,15 +265,15 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ(2, t.tensor_shape().dim(0).size()); EXPECT_EQ(3, t.tensor_shape().dim(1).size()); } else if (name == ""add1"") { - EXPECT_EQ(""Identity"", node.op()); + EXPECT_EQ(""Snapshot"", node.op()); EXPECT_EQ(""x"", node.input(0)); EXPECT_EQ(""^zeros"", node.input(1)); } else if (name == ""add2"") { - EXPECT_EQ(""Identity"", node.op()); + EXPECT_EQ(""Snapshot"", node.op()); EXPECT_EQ(""y"", node.input(0)); EXPECT_EQ(""^zeros"", node.input(1)); } else if (name == ""bias_add1"") { - EXPECT_EQ(""Identity"", node.op()); + EXPECT_EQ(""Snapshot"", node.op()); EXPECT_EQ(""x"", node.input(0)); EXPECT_EQ(""^zeros_1d"", node.input(1)); } else if (name == ""bias_add2"") { @@ -283,7 +282,7 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ(""zeros"", node.input(0)); EXPECT_EQ(""bias"", node.input(1)); } else if (name == ""sub1"") { - EXPECT_EQ(""Identity"", node.op()); + EXPECT_EQ(""Snapshot"", node.op()); EXPECT_EQ(""x"", node.input(0)); EXPECT_EQ(""^zeros"", node.input(1)); } else if (name == ""sub2"") { @@ -322,8 +321,7 @@ TEST_F(ConstantFoldingTest, StrengthReduce_Reciprocal) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch = {""div_f"", ""div_i"", ""realdiv""}; - ConstantFolding optimizer(RewriterConfig::AGGRESSIVE, - nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -413,8 +411,7 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_UnknownOutputShape) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - ConstantFolding optimizer(RewriterConfig::AGGRESSIVE, - nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -468,8 +465,7 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_KnownOutputShape) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - ConstantFolding optimizer(RewriterConfig::AGGRESSIVE, - nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -1337,7 +1333,7 @@ TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - ConstantFolding fold(RewriterConfig::AGGRESSIVE, nullptr /* cpu_device */); + ConstantFolding fold(nullptr /* cpu_device */); GraphDef output; Status status = fold.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -1398,7 +1394,7 @@ TEST_F(ConstantFoldingTest, MaterializeReductionIndices) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch.push_back(""reshape""); - ConstantFolding fold(RewriterConfig::AGGRESSIVE, nullptr /* cpu_device */); + ConstantFolding fold(nullptr /* cpu_device */); GraphDef output; Status status = fold.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); ",0,train 66f4f4cf31b86b7dd20f10ce6d968348b502f2ee,tensorflow/tensorflow,"Automated g4 rollback of changelist 185072479 PiperOrigin-RevId: 185900165",snapshot_op.h,"@@ -35,12 +35,17 @@ class SnapshotOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& input = context->input(0); Tensor* output = nullptr; - OP_REQUIRES_OK(context, - context->allocate_output(0, input.shape(), &output)); - const Device& device = context->eigen_device(); - device.memcpy(output->template flat().data(), - input.template flat().data(), - input.NumElements() * sizeof(Scalar)); + // Try to use buffer forwarding to avoid an explicit copy. + OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( + {0}, 0, input.shape(), &output)); + if (!output->SharesBufferWith(input)) { + // We had to allocate a new buffer since the refcount on the input was + // greater than 1. Copy the input to the new buffer. + const Device& device = context->eigen_device(); + device.memcpy(output->template flat().data(), + input.template flat().data(), + input.NumElements() * sizeof(Scalar)); + } } }; ",0,train 66f4f4cf31b86b7dd20f10ce6d968348b502f2ee,tensorflow/tensorflow,"Automated g4 rollback of changelist 185072479 PiperOrigin-RevId: 185900165",cluster_test.py,"@@ -45,7 +45,7 @@ class ClusterTest(test.TestCase): op_perfs, run_time, step_stats = grappler_cluster.MeasureCosts( grappler_item) self.assertTrue(run_time > 0) - self.assertEqual(len(op_perfs), 7) + self.assertEqual(len(op_perfs), 8) self.assertTrue(step_stats.dev_stats) def testNoDetailedStats(self): @@ -125,7 +125,7 @@ class ClusterTest(test.TestCase): disable_detailed_stats=False, disable_timeline=False) as gcluster: op_perfs, run_time, step_stats = gcluster.MeasureCosts(grappler_item) self.assertTrue(run_time > 0) - self.assertEqual(len(op_perfs), 7) + self.assertEqual(len(op_perfs), 8) self.assertTrue(step_stats.dev_stats) def testAvailableOps(self): ",0,train b6877dfa7b9c45bfecc66a22e1922cc44c37b2fc,tensorflow/tensorflow,"Add migration block for tf.compat.v1.Dimension PiperOrigin-RevId: 388326479 Change-Id: I634efbe2c988ebd04806c1d466b413f395c6dd93",tensor_shape.py,"@@ -183,7 +183,17 @@ def dimension_at_index(shape, index): @tf_export(v1=[""Dimension""]) class Dimension(object): - """"""Represents the value of one dimension in a TensorShape."""""" + """"""Represents the value of one dimension in a TensorShape. + + @compatibility(TF2) + In TF2, members of a `TensorShape` object are integers. The `Dimension` class + is not part of TF2's data model. + + Please refer to the [TensorShape section of the migration guide] + (https://www.tensorflow.org/guide/migrate/index#tensorshape) on common code + patterns adapting Dimension objects to a TF2 syntax. + @end_compatibility + """""" __slots__ = [""_value""] ",0,train 1e5c128c551050d9e43fbac0d57a432596a799ad,tensorflow/tensorflow,"Adding comments to the FileIO class. Also adding a couple of more test cases. Change: 129653503",file_io.py,"@@ -29,36 +29,105 @@ from tensorflow.python.util import compat def file_exists(filename): + """"""Determines whether a path exists or not. + + Args: + filename: string, a path + + Returns: + True if the path exists, whether its a file or a directory. + """""" return pywrap_tensorflow.FileExists(compat.as_bytes(filename)) def delete_file(filename): + """"""Deletes the file located at 'filename'. + + Args: + filename: string, a filename + + Raises: + errors.OpError: Propagates any errors reported by the FileSystem API. E.g., + NotFoundError if the file does not exist. + """""" with errors.raise_exception_on_not_ok_status() as status: pywrap_tensorflow.DeleteFile(compat.as_bytes(filename), status) def read_file_to_string(filename): + """"""Reads the entire contents of a file to a string. + + Args: + filename: string, path to a file + + Returns: + contents of the file as a string + + Raises: + errors.OpError: Raises variety of errors that are subtypes e.g. + NotFoundError etc. + """""" with errors.raise_exception_on_not_ok_status() as status: return pywrap_tensorflow.ReadFileToString(compat.as_bytes(filename), status) def write_string_to_file(filename, file_content): + """"""Writes a string to a given file. + + Args: + filename: string, path to a file + file_content: string, contents that need to be written to the file + + Raises: + errors.OpError: If there are errors during the operation. + """""" with errors.raise_exception_on_not_ok_status() as status: pywrap_tensorflow.WriteStringToFile( compat.as_bytes(filename), compat.as_bytes(file_content), status) def get_matching_files(filename): + """"""Returns a list of files that match the given pattern. + + Args: + filename: string, the pattern + + Returns: + Returns a list of strings containing filenames that match the given pattern. + + Raises: + errors.OpError: If there are filesystem / directory listing errors. + """""" with errors.raise_exception_on_not_ok_status() as status: return pywrap_tensorflow.GetMatchingFiles(compat.as_bytes(filename), status) def create_dir(dirname): + """"""Creates a directory with the name 'dirname'. + + Args: + dirname: string, name of the directory to be created + + Notes: + The parent directories need to exist. Use recursive_create_dir instead if + there is the possibility that the parent dirs don't exist. + + Raises: + errors.OpError: If the operation fails. + """""" with errors.raise_exception_on_not_ok_status() as status: pywrap_tensorflow.CreateDir(compat.as_bytes(dirname), status) def recursive_create_dir(dirname): + """"""Create a directory and all parent/intermediate directories. + + Args: + dirname: string, name of the directory to be created + + Raises: + errors.OpError: If the operation fails. + """""" with errors.raise_exception_on_not_ok_status() as status: dirs = dirname.split('/') for i in range(len(dirs)): @@ -68,23 +137,64 @@ def recursive_create_dir(dirname): def copy(oldpath, newpath, overwrite=False): + """"""Copies data from oldpath to newpath. + + Args: + oldpath: string, name of the file who's contents need to be copied + newpath: string, name of the file to which to copy to + overwrite: boolean, if false its an error for newpath to be occupied by an + existing file. + + Raises: + errors.OpError: If the operation fails. + """""" with errors.raise_exception_on_not_ok_status() as status: pywrap_tensorflow.CopyFile( compat.as_bytes(oldpath), compat.as_bytes(newpath), overwrite, status) def rename(oldname, newname, overwrite=False): + """"""Rename or move a file / directory. + + Args: + oldname: string, pathname for a file + newname: string, pathname to which the file needs to be moved + overwrite: boolean, if false its an error for newpath to be occupied by an + existing file. + + Raises: + errors.OpError: If the operation fails. + """""" with errors.raise_exception_on_not_ok_status() as status: - return pywrap_tensorflow.RenameFile( + pywrap_tensorflow.RenameFile( compat.as_bytes(oldname), compat.as_bytes(newname), overwrite, status) def delete_recursively(dirname): + """"""Deletes everything under dirname recursively. + + Args: + dirname: string, a path to a directory + + Raises: + errors.OpError: If the operation fails. + """""" with errors.raise_exception_on_not_ok_status() as status: - return pywrap_tensorflow.DeleteRecursively(compat.as_bytes(dirname), status) + pywrap_tensorflow.DeleteRecursively(compat.as_bytes(dirname), status) def is_directory(dirname): + """"""Returns whether the path is a directory or not. + + Args: + dirname: string, path to a potential directory + + Returns: + True, if the path is a directory; False otherwise + + Raises: + errors.OpError: If the path doesn't exist or other errors + """""" with errors.raise_exception_on_not_ok_status() as status: return pywrap_tensorflow.IsDirectory(compat.as_bytes(dirname), status) @@ -98,11 +208,11 @@ def list_directory(dirname): Args: dirname: string, path to a directory - Raises: - NotFoundError if directory doesn't exist - Returns: [filename1, filename2, ... filenameN] + + Raises: + errors.NotFoundError if directory doesn't exist """""" if not is_directory(dirname): raise errors.NotFoundError(None, None, 'Could not find directory') @@ -154,6 +264,17 @@ def walk(top, in_order=True): def stat(filename): + """"""Returns file statistics for a given path. + + Args: + filename: string, path to a file + + Returns: + FileStatistics struct that contains information about the path + + Raises: + errors.OpError: If the operation fails. + """""" file_statistics = pywrap_tensorflow.FileStatistics() with errors.raise_exception_on_not_ok_status() as status: pywrap_tensorflow.Stat(compat.as_bytes(filename), file_statistics, status) ",0,train 1e5c128c551050d9e43fbac0d57a432596a799ad,tensorflow/tensorflow,"Adding comments to the FileIO class. Also adding a couple of more test cases. Change: 129653503",file_io_test.py,"@@ -38,6 +38,8 @@ class FileIoTest(tf.test.TestCase): def testFileDoesntExist(self): file_path = os.path.join(self._base_dir, ""temp_file"") self.assertFalse(file_io.file_exists(file_path)) + with self.assertRaises(errors.NotFoundError): + _ = file_io.read_file_to_string(file_path) def testFileWrite(self): file_path = os.path.join(self._base_dir, ""temp_file"") @@ -52,6 +54,11 @@ class FileIoTest(tf.test.TestCase): file_io.delete_file(file_path) self.assertFalse(file_io.file_exists(file_path)) + def testFileDeleteFail(self): + file_path = os.path.join(self._base_dir, ""temp_file"") + with self.assertRaises(errors.NotFoundError): + file_io.delete_file(file_path) + def testGetMatchingFiles(self): dir_path = os.path.join(self._base_dir, ""temp_dir"") file_io.create_dir(dir_path) ",0,train e6adc8a90b9eca1133a249121683ac9ec0570002,tensorflow/tensorflow,Add tests in TFLite micro for Logistic Int8,logistic.cc,"@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include ""tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"" #include ""tensorflow/lite/kernels/internal/reference/logistic.h"" #include ""tensorflow/lite/c/builtin_op_data.h"" @@ -27,11 +28,59 @@ namespace tflite { namespace ops { namespace micro { namespace activations { - +namespace { constexpr int kInputTensor = 0; constexpr int kOutputTensor = 0; -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { +struct OpData { + int32_t input_zero_point; + int32_t input_range_radius; + int32_t input_multiplier; + int32_t input_left_shift; +}; + +TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node, + OpData* data) { + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, input->type, output->type); + if (input->type == kTfLiteInt8) { + TF_LITE_ENSURE_EQ(context, output->params.zero_point, + std::numeric_limits::min()); + TF_LITE_ENSURE_EQ(context, static_cast(output->params.scale), + 1. / 256); + + static constexpr int kInputIntegerBits = 4; + const double input_real_multiplier = + static_cast(input->params.scale) * + static_cast(1 << (31 - kInputIntegerBits)); + + const double q = std::frexp(input_real_multiplier, &data->input_left_shift); + data->input_multiplier = static_cast(TfLiteRound(q * (1ll << 31))); + + data->input_range_radius = + CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31); + } + return kTfLiteOk; +} +} // namespace + +void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) { + OpData* data = new OpData(); + return data; +} + +void LogisticFree(TfLiteContext* context, void* buffer) {} + +TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + CalculateArithmeticOpData(context, node, data); + + return kTfLiteOk; +} + +TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); @@ -52,11 +101,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } else if (input->type == kTfLiteInt8) { switch (output->type) { case kTfLiteInt8: { - reference_ops::Logistic( - GetTensorShape(input), GetTensorData(input), - input->params.scale, input->params.zero_point, - GetTensorShape(output), GetTensorData(output), - output->params.scale, output->params.zero_point); + OpData* data = reinterpret_cast(node->user_data); + reference_integer_ops::Logistic( + input->params.zero_point, data->input_range_radius, + data->input_multiplier, data->input_left_shift, + NumElements(input->dims), GetTensorData(input), + GetTensorData(output)); return kTfLiteOk; } default: @@ -79,14 +129,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace activations TfLiteRegistration* Register_LOGISTIC() { - static TfLiteRegistration r = {/*init=*/nullptr, - /*free=*/nullptr, - /*prepare=*/nullptr, - /*invoke=*/activations::Eval, - /*profiling_string=*/nullptr, - /*builtin_code=*/0, - /*custom_name=*/nullptr, - /*version=*/0}; + static TfLiteRegistration r = { + activations::LogisticInit, activations::LogisticFree, + activations::LogisticPrepare, activations::LogisticEval, + /*profiling_string=*/nullptr, /*builtin_code=*/0, + /*custom_name=*/nullptr, /*version=*/0}; return &r; } } // namespace micro ",0,test e6adc8a90b9eca1133a249121683ac9ec0570002,tensorflow/tensorflow,Add tests in TFLite micro for Logistic Int8,logistic_test.cc,"@@ -82,13 +82,75 @@ void TestLogisticFloat(std::initializer_list input_dims_data, } } +void TestLogisticInt8(std::initializer_list input_dims_data, + std::initializer_list input_data, float input_min, + float input_max, + std::initializer_list expected_output_data, + std::initializer_list output_dims_data, + float output_min, float output_max, int8_t* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int output_elements_count = ElementCount(*output_dims); + + constexpr int inputs_size = 1; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateQuantizedTensor(input_data, input_dims, ""input_tensor"", input_min, + input_max), + CreateQuantizedTensor(output_data, output_dims, ""output_tensor"", + output_min, output_max), + }; + + TfLiteContext context; + PopulateContext(tensors, tensors_size, micro_test::reporter, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = + resolver.FindOp(tflite::BuiltinOperator_LOGISTIC, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + const char* init_data = nullptr; + size_t init_data_size = 1; + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, init_data, init_data_size); + } + int inputs_array_data[] = {1, 0}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 1}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = nullptr; + node.user_data = user_data; + node.builtin_data = nullptr; + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + if (registration->free) { + registration->free(&context, user_data); + } + for (int i = 0; i < output_elements_count; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], + 1); + } +} + } // namespace } // namespace testing } // namespace tflite TF_LITE_MICRO_TESTS_BEGIN -TF_LITE_MICRO_TEST(SimpleTest) { +TF_LITE_MICRO_TEST(SimpleTestFloat) { const int output_elements_count = 10; float output_data[output_elements_count]; tflite::testing::TestLogisticFloat({2, 1, 5}, // Input shape. @@ -121,4 +183,38 @@ TF_LITE_MICRO_TEST(SimpleTest) { output_data); } +TF_LITE_MICRO_TEST(SimpleTestInt8) { + using tflite::testing::F2QS; + + const float input_min = -63.5f; + const float input_max = 64.0f; + const float output_min = 0.0f; + const float output_max = (255.0f / 256.0f); + + const int output_elements_count = 10; + int8_t output_data[output_elements_count]; + tflite::testing::TestLogisticInt8( + {2, 1, output_elements_count}, // Input shape. + {F2QS(1.0, input_min, input_max), F2QS(2.0, input_min, input_max), + F2QS(3.0, input_min, input_max), F2QS(4.0, input_min, input_max), + F2QS(5.0, input_min, input_max), F2QS(-1.0, input_min, input_max), + F2QS(-2.0, input_min, input_max), F2QS(-3.0, input_min, input_max), + F2QS(-4.0, input_min, input_max), F2QS(-5.0, input_min, input_max)}, + input_min, input_max, // Input quantized range. + { // Expected results. + F2QS(0.73105858, output_min, output_max), + F2QS(0.88079708, output_min, output_max), + F2QS(0.95257413, output_min, output_max), + F2QS(0.98201379, output_min, output_max), + F2QS(0.99330715, output_min, output_max), + F2QS(0.26894142, output_min, output_max), + F2QS(0.11920292, output_min, output_max), + F2QS(0.04742587, output_min, output_max), + F2QS(0.01798621, output_min, output_max), + F2QS(0.00669285, output_min, output_max)}, + {2, 1, output_elements_count}, // Output shape. + output_min, output_max, // Output quantized range. + output_data); +} + TF_LITE_MICRO_TESTS_END ",0,test 932e1bfb87ac3d0e7d6d4c2979f77817bcc41590,tensorflow/tensorflow,Specify boolean in the soft_placement docstrings,config.py,"@@ -254,7 +254,7 @@ def get_soft_device_placement(): An error is raised when an Op cannot be placed onto its intended device. Returns: - If soft placement is enabled. + A boolean indicating if soft placement is enabled. """""" return context.context().soft_device_placement @@ -269,7 +269,7 @@ def set_soft_device_placement(enabled): 3. need to co-locate with reftype input(s) which are from CPU Args: - enabled: Whether to enable soft placement. + enabled: A boolean indicating whether to enable soft placement. """""" context.context().soft_device_placement = enabled ",0,train d6362c90e7ef942808bc31887175e2c0ef437896,tensorflow/tensorflow,"Add var name to errors on variable restore. Change: 152963830",save_restore_tensor.cc,"@@ -268,7 +268,8 @@ Status RestoreTensorsV2(OpKernelContext* context, const Tensor& prefix, &parsed_slice, &parsed_slice_shape)); if (!restored_full_shape.IsSameSize(parsed_full_shape)) { return errors::InvalidArgument( - ""Shape in shape_and_slice spec "", parsed_full_shape.DebugString(), + ""tensor_name = "", tensor_name, ""; shape in shape_and_slice spec "", + parsed_full_shape.DebugString(), "" does not match the shape stored in checkpoint: "", restored_full_shape.DebugString()); } @@ -279,10 +280,10 @@ Status RestoreTensorsV2(OpKernelContext* context, const Tensor& prefix, reader.LookupSlice(tensor_name, parsed_slice, restored_tensor)); } if (dtypes[i] != restored_tensor->dtype()) { - return errors::InvalidArgument(""Expected dtype "", - DataTypeString(dtypes[i]), - "" does not equal restored dtype "", - DataTypeString(restored_tensor->dtype())); + return errors::InvalidArgument( + ""tensor_name = "", tensor_name, ""; expected dtype "", + DataTypeString(dtypes[i]), "" does not equal restored dtype "", + DataTypeString(restored_tensor->dtype())); } } return Status::OK(); ",0,train 0421b4be60b3d641e5e0c2ac133fba4c9d80a44e,tensorflow/tensorflow,"Update GraphDef version to 714. PiperOrigin-RevId: 364510348 Change-Id: I18c89e5492256773a4e3b41b2e04400735ffca4f",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 713 // Updated: 2021/3/22 +#define TF_GRAPH_DEF_VERSION 714 // Updated: 2021/3/23 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,train 45f2aab17f66d235f4643dda142850457ad9e894,tensorflow/tensorflow,"Removed unnecessary check. More uniform code. PiperOrigin-RevId: 286472636 Change-Id: I6d2f9356993c2b08ea8985003abbe4e11194d833",convolution_transposed_thin.cc,"@@ -87,24 +87,17 @@ std::string GenerateConvolutionTransposedCode( for (int x = 0; x < kernel_size.x; ++x) { std::string r_s = "" r["" + std::to_string(y) + ""]["" + std::to_string(x) + ""]""; - const std::string to_accum = - op_def.precision == CalculationsPrecision::F32_F16 ? ""convert_float"" - : """"; for (int d = 0; d < dst_channels; ++d) { - c += r_s + postfix[d] + "" = "" + to_accum + ""(dot(src, filters["" + - std::to_string(index) + ""]));\n""; + c += r_s + postfix[d] + "" = dot(src, filters["" + std::to_string(index) + + ""]);\n""; index++; } } } c += "" }\n""; for (int i = 1; i < src_depth; ++i) { - if (op_def.precision != CalculationsPrecision::F32_F16) { - c += "" if (X > "" + std::to_string(-i) + - "") { // always true, to reduce registers usage\n""; - } else { - c += "" {\n""; - } + c += "" if (X > "" + std::to_string(-i) + + "") { // always true, to reduce registers usage\n""; c += "" FLT4 src = "" + src_tensor.Read4D(""X"", ""Y"", std::to_string(i), batch_id) + "";\n""; for (int y = 0; y < kernel_size.y; ++y) { @@ -112,8 +105,8 @@ std::string GenerateConvolutionTransposedCode( std::string r_s = "" r["" + std::to_string(y) + ""]["" + std::to_string(x) + ""]""; for (int d = 0; d < dst_channels; ++d) { - c += r_s + postfix[d] + "" += TO_ACCUM_FLT(dot(src, filters["" + - std::to_string(index) + ""]));\n""; + c += r_s + postfix[d] + "" += dot(src, filters["" + + std::to_string(index) + ""]);\n""; index++; } } ",0,train 879b758fae5d0a5babf88846468afaab22299524,tensorflow/tensorflow,"Add correct import logic for string constants In TFLite flatbuffer, strings are serialized with tflite::DynamicBuffer. PiperOrigin-RevId: 387109297 Change-Id: I2fd2fb9c2905e54b34f417ecaa4efe5e1c6a479c",flatbuffer_import.cc,"@@ -77,6 +77,7 @@ limitations under the License. #include ""tensorflow/lite/model.h"" #include ""tensorflow/lite/schema/schema_generated.h"" #include ""tensorflow/lite/schema/schema_utils.h"" +#include ""tensorflow/lite/string_util.h"" using llvm::ArrayRef; using mlir::Builder; @@ -358,6 +359,14 @@ tensorflow::TensorProto ConvertTfliteConstTensor( for (auto dim : tensor.shape) { shape->add_dim()->set_size(int64_t{dim}); } + // TensorFlow Lite uses tflite::DynamicBufer to encode vector of strings. + if (tensor.type == tflite::TensorType_STRING) { + for (int i = 0; i < tflite::GetStringCount(buffer.data()); ++i) { + tflite::StringRef str = tflite::GetString(buffer.data(), i); + ret.add_string_val(str.str, str.len); + } + return ret; + } std::string content; content.assign(reinterpret_cast(buffer.data()), buffer.size()); ret.set_tensor_content(content); ",0,test 62f3c16a9aa823acd9ba919fd232ae44991c7bbc,tensorflow/tensorflow,"Fix bug in upgrade script where function_transformers aren't skipped when `import tensorflow.compat.v* as tf` is seen. PiperOrigin-RevId: 249326595",ast_edits.py,"@@ -233,6 +233,7 @@ class NoUpdateSpec(APIChangeSpec): self.function_warnings = {} self.change_to_function = {} self.module_deprecations = {} + self.function_transformers = {} self.import_renames = {} ",0,train 62f3c16a9aa823acd9ba919fd232ae44991c7bbc,tensorflow/tensorflow,"Fix bug in upgrade script where function_transformers aren't skipped when `import tensorflow.compat.v* as tf` is seen. PiperOrigin-RevId: 249326595",tf_upgrade_v2.py,"@@ -1519,6 +1519,7 @@ class TFAPIChangeSpec(ast_edits.NoUpdateSpec): self.function_warnings = {} self.change_to_function = {} self.module_deprecations = module_deprecations_v2.MODULE_DEPRECATIONS + self.function_transformers = {} self.import_renames = {} return visitor.log, visitor.warnings_and_errors ",0,train 62f3c16a9aa823acd9ba919fd232ae44991c7bbc,tensorflow/tensorflow,"Fix bug in upgrade script where function_transformers aren't skipped when `import tensorflow.compat.v* as tf` is seen. PiperOrigin-RevId: 249326595",tf_upgrade_v2_test.py,"@@ -2072,19 +2072,20 @@ def _log_prob(self, x): self.assertEmpty(errors) def test_api_spec_reset_between_files(self): - old_symbol = ""tf.conj(a)"" - new_symbol = ""tf.math.conj(a)"" - - ## Test that the api spec is reset in between files: - import_header = ""import tensorflow.compat.v2 as tf\n"" - text_a = import_header + old_symbol - expected_text_a = import_header + old_symbol - text_b = old_symbol - expected_text_b = new_symbol - results = self._upgrade_multiple([text_a, text_b]) - result_a, result_b = results[0], results[1] - self.assertEqual(result_a[3], expected_text_a) - self.assertEqual(result_b[3], expected_text_b) + for old_symbol, new_symbol in [ + (""tf.conj(a)"", ""tf.math.conj(a)""), + (""tf.to_int32(x)"", ""tf.cast(x, dtype=tf.int32)"")]: + + ## Test that the api spec is reset in between files: + import_header = ""import tensorflow.compat.v2 as tf\n"" + text_a = import_header + old_symbol + expected_text_a = import_header + old_symbol + text_b = old_symbol + expected_text_b = new_symbol + results = self._upgrade_multiple([text_a, text_b]) + result_a, result_b = results[0], results[1] + self.assertEqual(result_a[3], expected_text_a) + self.assertEqual(result_b[3], expected_text_b) class TestUpgradeFiles(test_util.TensorFlowTestCase): ",0,train db63348bf14d911f2eebeb418a0b570b65b64f92,tensorflow/tensorflow,"Add test with tf.cond. PiperOrigin-RevId: 195745718",make_test_graphs.py,"@@ -78,6 +78,22 @@ def tfadd_with_ckpt_saver(out_dir): f.write(saver.as_saver_def().SerializeToString()) +def tfassert_eq(_): + x = array_ops.placeholder(dtypes.int32, name='x_hold') + y = array_ops.placeholder(dtypes.int32, name='y_hold') + control_flow_ops.Assert( + math_ops.equal(x, y), ['Expected x == y.'], name='assert_eq') + math_ops.add(x, math_ops.negative(y), name='x_y_diff') + + +def tfcond(_): + p = array_ops.placeholder(dtypes.bool, name='p_hold') + x = array_ops.placeholder(dtypes.int32, name='x_hold') + y = array_ops.placeholder(dtypes.int32, name='y_hold') + z = control_flow_ops.cond(p, lambda: x, lambda: y) + array_ops.identity(z, name='result') + + def tfgather(_): params = array_ops.placeholder(dtypes.float32, name='params') indices = array_ops.placeholder(dtypes.int32, name='indices') @@ -126,14 +142,6 @@ def tfsplits(_): array_ops.identity(y, name='result') -def tfassert_eq(_): - x = array_ops.placeholder(dtypes.int32, name='x_hold') - y = array_ops.placeholder(dtypes.int32, name='y_hold') - control_flow_ops.Assert( - math_ops.equal(x, y), ['Expected x == y.'], name='assert_eq') - math_ops.add(x, math_ops.negative(y), name='x_y_diff') - - def write_graph(build_graph, out_dir): """"""Build a graph using build_graph and write it out."""""" g = ops.Graph() @@ -148,12 +156,13 @@ def main(_): write_graph(tfadd, FLAGS.out_dir) write_graph(tfadd_with_ckpt, FLAGS.out_dir) write_graph(tfadd_with_ckpt_saver, FLAGS.out_dir) + write_graph(tfassert_eq, FLAGS.out_dir) + write_graph(tfcond, FLAGS.out_dir) + write_graph(tffunction, FLAGS.out_dir) write_graph(tfgather, FLAGS.out_dir) write_graph(tfmatmul, FLAGS.out_dir) write_graph(tfmatmulandadd, FLAGS.out_dir) - write_graph(tffunction, FLAGS.out_dir) write_graph(tfsplits, FLAGS.out_dir) - write_graph(tfassert_eq, FLAGS.out_dir) if __name__ == '__main__': ",0,test db63348bf14d911f2eebeb418a0b570b65b64f92,tensorflow/tensorflow,"Add test with tf.cond. PiperOrigin-RevId: 195745718",tfcompile_test.cc,"@@ -21,6 +21,7 @@ limitations under the License. #include ""tensorflow/compiler/aot/tests/test_graph_tfadd_with_ckpt.h"" #include ""tensorflow/compiler/aot/tests/test_graph_tfadd_with_ckpt_saver.h"" #include ""tensorflow/compiler/aot/tests/test_graph_tfassert_eq.h"" +#include ""tensorflow/compiler/aot/tests/test_graph_tfcond.h"" #include ""tensorflow/compiler/aot/tests/test_graph_tffunction.h"" #include ""tensorflow/compiler/aot/tests/test_graph_tfgather.h"" #include ""tensorflow/compiler/aot/tests/test_graph_tfmatmul.h"" @@ -150,6 +151,31 @@ TEST(TFCompileTest, AddWithCkptSaver) { EXPECT_EQ(add_const.result0_data(), add_const.results()[0]); } +TEST(TFCompileTest, Cond) { + CondComp cond; + EXPECT_EQ(cond.arg0_data(), cond.args()[0]); + EXPECT_EQ(cond.arg1_data(), cond.args()[1]); + EXPECT_EQ(cond.arg2_data(), cond.args()[2]); + cond.arg1() = 10; + cond.arg2() = 20; + { + cond.arg0() = true; + const int32 expected_result = cond.arg1(); + EXPECT_TRUE(cond.Run()); + EXPECT_EQ(cond.result0(), expected_result); + EXPECT_EQ(cond.result0_data()[0], expected_result); + EXPECT_EQ(cond.result0_data(), cond.results()[0]); + } + { + cond.arg0() = false; + const int32 expected_result = cond.arg2(); + EXPECT_TRUE(cond.Run()); + EXPECT_EQ(cond.result0(), expected_result); + EXPECT_EQ(cond.result0_data()[0], expected_result); + EXPECT_EQ(cond.result0_data(), cond.results()[0]); + } +} + TEST(TFCompileTest, Gather) { GatherComp gather; EXPECT_EQ(gather.arg0_data(), gather.args()[0]); ",0,test bd2f1ed1c28505c3ab3b325e8c481091b111db3a,tensorflow/tensorflow,"Update GraphDef version to 982. PiperOrigin-RevId: 416746592 Change-Id: I748de8f3442c4a09305d8d08f1f1849a460f4ce5",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 981 // Updated: 2021/12/15 +#define TF_GRAPH_DEF_VERSION 982 // Updated: 2021/12/16 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,test 690d47e60bdb8adfd387f0c0db01af153a3ed6b9,tensorflow/tensorflow,"TpuDriver: Improve handling of restarted clients/servers. PiperOrigin-RevId: 280727303 Change-Id: I9c11368de26d8ce851c799a53a903a452d385975",event_id.h,"@@ -31,8 +31,8 @@ namespace tpu_driver { // This class provides a typed interface for these values as well as support for // hashing and ostreams (for logging). struct EventId { - int64_t client_id; - int64_t operation_id; + uint64_t client_id; + uint64_t operation_id; template friend H AbslHashValue(H h, const EventId& c) { @@ -51,9 +51,9 @@ struct EventId { return absl::StrCat(client_id, "":"", operation_id); } - int64_t AsInt() const { return client_id << 44 | operation_id; } + uint64_t AsInt() const { return client_id << 44 | operation_id; } - static EventId FromInt(int64_t value) { + static EventId FromInt(uint64_t value) { return EventId{value >> 44, value & 0xfffffffffff}; } }; ",0,test 690d47e60bdb8adfd387f0c0db01af153a3ed6b9,tensorflow/tensorflow,"TpuDriver: Improve handling of restarted clients/servers. PiperOrigin-RevId: 280727303 Change-Id: I9c11368de26d8ce851c799a53a903a452d385975",grpc_tpu_driver.cc,"@@ -64,6 +64,25 @@ class GrpcEvent : public Event { GrpcTpuStream* stream_; }; +class ErrorEvent : public GrpcEvent { + public: + explicit ErrorEvent(Status status) : GrpcEvent(EventId{0, 0}, nullptr) { + status_ = status; + } + + xla::Status Await() override { return status_; } + absl::optional AwaitWithTimeout( + absl::Duration duration) override { + return status_; + } + void AddCallback(std::function callback) override { + callback(status_); + } + + private: + Status status_; +}; + class GrpcBufferHandle : public BufferHandle { public: explicit GrpcBufferHandle( @@ -417,17 +436,19 @@ class GrpcTpuDriver : public TpuDriver { static std::unique_ptr CreateTpuDriverStub( const TpuDriverConfig& config); + uint32 client_id() const { return client_id_; } + private: std::unique_ptr AllocateStream(int32_t core_id); const TpuDriverConfig config_; - const int32_t client_id_; + const uint32_t client_id_; // Map from stream IDs to streams. absl::flat_hash_map> streams_; std::unique_ptr host_stream_; // Shared by all streams. - std::atomic operation_id_{0}; -}; + std::atomic operation_id_{0}; +}; // namespace GrpcEvent::~GrpcEvent() { stream_->DeleteEvent(id_); } @@ -464,8 +485,11 @@ GrpcTpuStream::~GrpcTpuStream() { // Mark all remaining events invalid. absl::MutexLock lock(&events_mutex_); for (auto e : events_) { - UpdateEventStatus(e.first, xla::Status(tensorflow::error::Code::ABORTED, - ""Tpustream was closed."")); + if (!e.second.done) { + LOG(ERROR) << ""Resetting: "" << e.first; + UpdateEventStatus(e.first, xla::Status(tensorflow::error::Code::ABORTED, + ""Driver was closed."")); + } } } VLOG(1) << ""Closing stream.""; @@ -511,8 +535,9 @@ void GrpcTpuStream::UpdateEventStatus(EventId id, Status status) { // This is the first time this event finishes. Remember the results and call // the callbacks. - VLOG(1) << ""Response received for GrpcEvent "" << id << "". Firing "" - << it->second.callbacks.size() << "" callbacks.""; + VLOG(1) << ""Response received for GrpcEvent "" << id << "". "" + << status.ToString() << "". Firing "" << it->second.callbacks.size() + << "" callbacks.""; it->second.done = true; it->second.status = status; for (const auto& callback : it->second.callbacks) { @@ -544,6 +569,7 @@ absl::optional GrpcTpuStream::WaitForEvent(EventId id, events_mutex_.AssertHeld(); return !events_.contains(id) || events_[id].done; }; + if (events_mutex_.AwaitWithTimeout(absl::Condition(&done), duration)) { return events_.contains(id) ? events_[id].status : Status(); } @@ -594,6 +620,8 @@ void GrpcTpuStream::StreamWriterFn() { reqs.push_back(StreamRequest()); request_bytes = 0; } + VLOG(1) << ""Sending request: "" << EventId::FromInt(e->operation_id()); + VLOG(2) << ""Sending request: "" << e->DebugString(); reqs.back().mutable_entry()->AddAllocated(e); } num_pending_requests_ = 0; @@ -611,9 +639,10 @@ void GrpcTpuStream::StreamWriterFn() { void GrpcTpuStream::StreamReaderFn() { StreamResponse resp; while (stream_->Read(&resp)) { - VLOG(1) << ""Received response: "" << resp.DebugString(); + VLOG(2) << ""Received response: "" << resp.DebugString(); for (const StreamResponse::Entry entry : resp.entry()) { EventId event_id = EventId::FromInt(entry.operation_id()); + VLOG(1) << ""Received response for: "" << event_id; TraceMe activity(""GrpcTpuStream::RequestComplete""); if (entry.has_transfer_from()) { @@ -805,8 +834,15 @@ std::unique_ptr GrpcTpuStream::LoadProgram( InitializeRequest(req.get(), wait_for); TraceMe activity(absl::StrCat(""GrpcTpuStream::LoadProgram"")); req->mutable_load()->set_core_id(core_id); - req->mutable_load()->set_compiled_program_handle( - static_cast(handle)->id().AsInt()); + auto grpc_handle = static_cast(handle); + if (grpc_handle->id().client_id != driver_->client_id()) { + auto event = absl::make_unique( + xla::InvalidArgument(""Invalid program handle (wrong client id). Did "" + ""you restart the server or use a stale handle?"")); + return absl::make_unique(event->id(), + std::move(event)); + } + req->mutable_load()->set_compiled_program_handle(grpc_handle->id().AsInt()); auto event = absl::make_unique(EventId::FromInt(req->operation_id()), this); AddWriteRequest(std::move(req)); @@ -835,13 +871,33 @@ std::unique_ptr GrpcTpuStream::ExecuteProgram( absl::Span wait_for) { auto req = absl::make_unique(); InitializeRequest(req.get(), wait_for); + auto program_handle = static_cast(program); + if (program_handle->id().client_id != driver_->client_id()) { + return absl::make_unique( + xla::InvalidArgument(""Invalid program handle (wrong client id). Did "" + ""you restart the server or use a stale handle?"")); + } + req->mutable_execute()->set_loaded_program_handle( - static_cast(program)->id().AsInt()); + program_handle->id().AsInt()); + for (BufferHandle* input : inputs) { - req->mutable_execute()->add_input_handle( - static_cast(input)->id().AsInt()); + auto* grpc_handle = static_cast(input); + if (grpc_handle->id().client_id != driver_->client_id()) { + return absl::make_unique(xla::InvalidArgument( + ""Invalid input buffer (wrong client id). Did you restart the server "" + ""or use a stale handle?"")); + } + req->mutable_execute()->add_input_handle(grpc_handle->id().AsInt()); } + for (BufferHandle* output : outputs) { + auto* grpc_handle = static_cast(output); + if (grpc_handle->id().client_id != driver_->client_id()) { + return absl::make_unique(xla::InvalidArgument( + ""Invalid output buffer (wrong client id). Did you restart the server "" + ""or use a stale handle?"")); + } req->mutable_execute()->add_output_handle( static_cast(output)->id().AsInt()); } ",0,test bbd2047cf3a715a1431889ad8f558576a5382876,tensorflow/tensorflow,"[XLA:HLO] Minor fix for Clamp shape inference, and add some tests. Previously Clamp(f32[5], f32[], f32[9]) returned success, but it now returns a failure. Noticed while debugging a different problem. Change: 151835981",shape_inference.cc,"@@ -633,26 +633,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( TF_DCHECK_OK(ShapeUtil::ValidateShape(ehs)); switch (operation) { case TRIOP_CLAMP: - TF_RETURN_IF_ERROR( - ExpectNotTupleOrOpaque(lhs, ""lhs of ternary operation"")); - TF_RETURN_IF_ERROR( - ExpectNotTupleOrOpaque(rhs, ""rhs of ternary operation"")); - TF_RETURN_IF_ERROR( - ExpectNotTupleOrOpaque(ehs, ""ehs of ternary operation"")); - if (((ShapeUtil::Compatible(lhs, rhs) || ShapeUtil::Rank(lhs) == 0) && - (ShapeUtil::Compatible(rhs, ehs) || ShapeUtil::Rank(ehs) == 0))) { - return rhs; - } - if (ShapeUtil::Rank(rhs) == 0) { - if (ShapeUtil::Compatible(lhs, ehs)) { - return lhs; - } - return ShapeUtil::Rank(ehs) == 0 ? lhs : ehs; - } - return Unimplemented(""not yet implemented: %s, %s %s"", - lhs.ShortDebugString().c_str(), - ehs.ShortDebugString().c_str(), - rhs.ShortDebugString().c_str()); + return InferClampShape(lhs, rhs, ehs); case TRIOP_SELECT: return InferSelectShape(lhs, rhs, ehs); case TRIOP_UPDATE: @@ -1332,6 +1313,41 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return ShapeUtil::PermuteDimensions(InversePermutation(dimensions), operand); } +// TODO(b/36794510): Make broadcast semantics more consistent, by supporting +// ""degenerate"" cases, as with binary elementwise ops. +/* static */ StatusOr ShapeInference::InferClampShape( + const Shape& min, const Shape& operand, const Shape& max) { + TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(min, ""clamp min"")); + TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(operand, ""clamp operand"")); + TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(max, ""clamp max"")); + if (!ShapeUtil::SameElementType(min, operand) || + !ShapeUtil::SameElementType(max, operand)) { + return InvalidArgument(""clamp op with different operand types: %s, %s, %s"", + ShapeUtil::HumanString(min).c_str(), + ShapeUtil::HumanString(operand).c_str(), + ShapeUtil::HumanString(max).c_str()); + } + if (((ShapeUtil::Compatible(min, operand) || ShapeUtil::IsScalar(min)) && + (ShapeUtil::Compatible(max, operand) || ShapeUtil::IsScalar(max)))) { + return operand; + } + if (ShapeUtil::IsScalar(operand)) { + if (ShapeUtil::Compatible(min, max)) { + return min; + } else if (ShapeUtil::IsScalar(min)) { + return max; + } else if (ShapeUtil::IsScalar(max)) { + return min; + } + } + return Unimplemented( + ""not yet implemented: %s, %s %s"", min.ShortDebugString().c_str(), + max.ShortDebugString().c_str(), operand.ShortDebugString().c_str()); +} + +// TODO(b/36794510): Make broadcast semantics more consistent, by supporting +// ""degenerate"" cases, as with binary elementwise ops, as well as scalar +// broadcast from all operands, not just the predicate. /* static */ StatusOr ShapeInference::InferSelectShape( const Shape& pred, const Shape& on_true, const Shape& on_false) { if (!ShapeUtil::Compatible(on_true, on_false)) { ",0,train bbd2047cf3a715a1431889ad8f558576a5382876,tensorflow/tensorflow,"[XLA:HLO] Minor fix for Clamp shape inference, and add some tests. Previously Clamp(f32[5], f32[], f32[9]) returned success, but it now returns a failure. Noticed while debugging a different problem. Change: 151835981",shape_inference.h,"@@ -190,6 +190,10 @@ class ShapeInference { BinaryOperation operation, const Shape& lhs, const Shape& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions); + // Helper for inferring the shape of Clamp ops. + static StatusOr InferClampShape(const Shape& min, const Shape& operand, + const Shape& max); + // Helper for inferring the shape of Select ops. static StatusOr InferSelectShape(const Shape& pred, const Shape& on_true, ",0,train bbd2047cf3a715a1431889ad8f558576a5382876,tensorflow/tensorflow,"[XLA:HLO] Minor fix for Clamp shape inference, and add some tests. Previously Clamp(f32[5], f32[], f32[9]) returned success, but it now returns a failure. Noticed while debugging a different problem. Change: 151835981",shape_inference_test.cc,"@@ -157,6 +157,99 @@ TEST_F(ShapeInferenceTest, SelectBadShapes) { testing::ContainsRegex(""pred operand must have PRED element type"")); } +TEST_F(ShapeInferenceTest, ClampAllMatrix) { + auto inferred_status = ShapeInference::InferTernaryOpShape( + TernaryOperation::TRIOP_CLAMP, matrix_64_48_, matrix_64_48_, + matrix_64_48_); + ASSERT_IS_OK(inferred_status.status()); + ASSERT_TRUE(ShapeUtil::Equal(matrix_64_48_, inferred_status.ValueOrDie())); +} + +TEST_F(ShapeInferenceTest, ClampAllScalar) { + auto inferred_status = ShapeInference::InferTernaryOpShape( + TernaryOperation::TRIOP_CLAMP, f32_, f32_, f32_); + ASSERT_IS_OK(inferred_status.status()); + ASSERT_TRUE(ShapeUtil::Equal(f32_, inferred_status.ValueOrDie())); +} + +TEST_F(ShapeInferenceTest, ClampMinScalar) { + auto inferred_status = ShapeInference::InferTernaryOpShape( + TernaryOperation::TRIOP_CLAMP, f32_, matrix_64_48_, matrix_64_48_); + ASSERT_IS_OK(inferred_status.status()); + ASSERT_TRUE(ShapeUtil::Equal(matrix_64_48_, inferred_status.ValueOrDie())); +} + +TEST_F(ShapeInferenceTest, ClampMaxScalar) { + auto inferred_status = ShapeInference::InferTernaryOpShape( + TernaryOperation::TRIOP_CLAMP, matrix_64_48_, matrix_64_48_, f32_); + ASSERT_IS_OK(inferred_status.status()); + ASSERT_TRUE(ShapeUtil::Equal(matrix_64_48_, inferred_status.ValueOrDie())); +} + +TEST_F(ShapeInferenceTest, ClampOperandScalar) { + auto inferred_status = ShapeInference::InferTernaryOpShape( + TernaryOperation::TRIOP_CLAMP, matrix_64_48_, f32_, matrix_64_48_); + ASSERT_IS_OK(inferred_status.status()); + ASSERT_TRUE(ShapeUtil::Equal(matrix_64_48_, inferred_status.ValueOrDie())); +} + +TEST_F(ShapeInferenceTest, ClampMinMatrix) { + auto inferred_status = ShapeInference::InferTernaryOpShape( + TernaryOperation::TRIOP_CLAMP, matrix_64_48_, f32_, f32_); + ASSERT_IS_OK(inferred_status.status()); + ASSERT_TRUE(ShapeUtil::Equal(matrix_64_48_, inferred_status.ValueOrDie())); +} + +TEST_F(ShapeInferenceTest, ClampMaxMatrix) { + auto inferred_status = ShapeInference::InferTernaryOpShape( + TernaryOperation::TRIOP_CLAMP, f32_, f32_, matrix_64_48_); + ASSERT_IS_OK(inferred_status.status()); + ASSERT_TRUE(ShapeUtil::Equal(matrix_64_48_, inferred_status.ValueOrDie())); +} + +TEST_F(ShapeInferenceTest, ClampOperandMatrix) { + auto inferred_status = ShapeInference::InferTernaryOpShape( + TernaryOperation::TRIOP_CLAMP, f32_, matrix_64_48_, f32_); + ASSERT_IS_OK(inferred_status.status()); + ASSERT_TRUE(ShapeUtil::Equal(matrix_64_48_, inferred_status.ValueOrDie())); +} + +TEST_F(ShapeInferenceTest, ClampBadShapes) { + // Type mismatch + ASSERT_FALSE(ShapeInference::InferTernaryOpShape( + TernaryOperation::TRIOP_CLAMP, s32_, f32_, f32_) + .ok()); + ASSERT_FALSE(ShapeInference::InferTernaryOpShape( + TernaryOperation::TRIOP_CLAMP, f32_, s32_, f32_) + .ok()); + ASSERT_FALSE(ShapeInference::InferTernaryOpShape( + TernaryOperation::TRIOP_CLAMP, f32_, f32_, s32_) + .ok()); + // Dimension mismatch + ASSERT_FALSE( + ShapeInference::InferTernaryOpShape(TernaryOperation::TRIOP_CLAMP, + vector_64_, vector_32_, vector_32_) + .ok()); + ASSERT_FALSE( + ShapeInference::InferTernaryOpShape(TernaryOperation::TRIOP_CLAMP, + vector_32_, vector_64_, vector_32_) + .ok()); + ASSERT_FALSE( + ShapeInference::InferTernaryOpShape(TernaryOperation::TRIOP_CLAMP, + vector_32_, vector_32_, vector_64_) + .ok()); + // Dimension mismatch, where one operand is a scalar + ASSERT_FALSE(ShapeInference::InferTernaryOpShape( + TernaryOperation::TRIOP_CLAMP, vector_64_, vector_32_, f32_) + .ok()); + ASSERT_FALSE(ShapeInference::InferTernaryOpShape( + TernaryOperation::TRIOP_CLAMP, vector_64_, f32_, vector_32_) + .ok()); + ASSERT_FALSE(ShapeInference::InferTernaryOpShape( + TernaryOperation::TRIOP_CLAMP, f32_, vector_64_, vector_32_) + .ok()); +} + TEST_F(ShapeInferenceTest, VariadicOpTuplify) { StatusOr result = ShapeInference::InferVariadicOpShape( VariadicOperation::VAROP_TUPLE, {&s32_, &f32_}); ",0,train 26cb47b77f4029083e765259c329602e8a478ea1,tensorflow/tensorflow,"ROCm build fix. PiperOrigin-RevId: 292377641 Change-Id: Id6669836b45cd450c40552be2779d7269ecad344",cost_utils.cc,"@@ -88,8 +88,8 @@ TfOpRoofLineCostEstimator::OpRoofLineStats TfOpRoofLineCostEstimator::Predict( } grappler::OpContext op_context; - op_context.name = tf_op.type; - op_context.op_info.set_op(tf_op.type); + op_context.name = std::string(tf_op.type); + op_context.op_info.set_op(op_context.name); for (const auto& tensor : input_tensors) { *op_context.op_info.add_inputs() = GetTensorProperties(tensor); } ",0,train ead4fda06535ce547d014fba1656ae53f0b64996,tensorflow/tensorflow,"Fixes a bug in tf.train.Saver(), where classes using the `VARIABLE_VALUE_KEY` used different naming in the checkpoint file when `var_list` was a dict. PiperOrigin-RevId: 217182136",saver.py,"@@ -626,7 +626,12 @@ class BaseSaverBuilder(object): op, variables.Variable): # pylint: disable=protected-access for attr, factory in op._gather_saveables_for_checkpoint().items(): - op = (factory(name + ""_"" + attr) if callable(factory) else factory) + if attr == checkpointable.VARIABLE_VALUE_KEY: + # Keep original name for classes masquerading as variables. + full_name = name + else: + full_name = name + ""_"" + attr + op = (factory(full_name) if callable(factory) else factory) for op in BaseSaverBuilder.SaveableObjectsForOp(op, op.name): yield op # pylint: enable=protected-access ",0,train 7b5d04c60437a415fc4edb5a97d939a1a3babe14,tensorflow/tensorflow,"Makes most variable writes depend on the cached value. This disallows some undefined behavior with unordered reads and writes. PiperOrigin-RevId: 198633444",resource_variable_ops_test.py,"@@ -119,6 +119,13 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): dtype=dtypes.int32, shape=[1], name=""foo"") self.assertGreater(len(handle.eval()), 0) + def testCachedValueReadBeforeWrite(self): + with self.test_session() as sess: + v = resource_variable_ops.ResourceVariable(0.0, caching_device=""cpu:0"") + sess.run(v.initializer) + value, _ = sess.run([v, v.assign_add(1.0)]) + self.assertAllEqual(value, 0.0) + def testAssignVariableDtypeMismatchEager(self): with context.eager_mode(): handle = resource_variable_ops.var_handle_op( ",0,train 7b5d04c60437a415fc4edb5a97d939a1a3babe14,tensorflow/tensorflow,"Makes most variable writes depend on the cached value. This disallows some undefined behavior with unordered reads and writes. PiperOrigin-RevId: 198633444",resource_variable_ops.py,"@@ -576,6 +576,21 @@ class ResourceVariable(variables.Variable): self._constraint = None self._cached_shape_as_list = None + @contextlib.contextmanager + def _assign_dependencies(self): + """"""Makes assignments depend on the cached value, if any. + + This prevents undefined behavior with reads not ordered wrt writes. + + Yields: + None. + """""" + if self._cached_value is not None: + with ops.control_dependencies([self._cached_value]): + yield + else: + yield + def __nonzero__(self): return self.__bool__() @@ -865,7 +880,7 @@ class ResourceVariable(variables.Variable): # TODO(apassos): this here and below is not atomic. Consider making it # atomic if there's a way to do so without a performance cost for those who # don't need it. - with _handle_graph(self.handle): + with _handle_graph(self.handle), self._assign_dependencies(): assign_sub_op = gen_resource_variable_ops.assign_sub_variable_op( self.handle, ops.convert_to_tensor(delta, dtype=self.dtype), name=name) @@ -889,7 +904,7 @@ class ResourceVariable(variables.Variable): it will return the `Operation` that does the assignment, and when in eager mode it will return `None`. """""" - with _handle_graph(self.handle): + with _handle_graph(self.handle), self._assign_dependencies(): assign_add_op = gen_resource_variable_ops.assign_add_variable_op( self.handle, ops.convert_to_tensor(delta, dtype=self.dtype), name=name) @@ -921,6 +936,8 @@ class ResourceVariable(variables.Variable): it will return the `Operation` that does the assignment, and when in eager mode it will return `None`. """""" + # Note: not depending on the cached value here since this can used to + # initialize the variable. with _handle_graph(self.handle): value_tensor = ops.convert_to_tensor(value, dtype=self.dtype) self._shape.assert_is_compatible_with(value_tensor.shape) @@ -933,7 +950,7 @@ class ResourceVariable(variables.Variable): def _strided_slice_assign(self, begin, end, strides, value, name, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask): - with _handle_graph(self.handle): + with _handle_graph(self.handle), self._assign_dependencies(): return self._lazy_read( gen_array_ops.resource_strided_slice_assign( ref=self.handle, ",0,train bb2d8e9dc75dbb6f2d22a438c5e5fb3908700438,tensorflow/tensorflow,"Introduce TestCase.assertRaisesIncompatibleShapesError. The new helper allows to test for incompatible shapes errors independently of whether they were produced by XLA, mlir generated kernels or classic TensorFlow kernels. PiperOrigin-RevId: 361612621 Change-Id: Ic9ba732ccb3483e30af6535024a37ab4e53776a3",binary_ops_test.py,"@@ -24,7 +24,6 @@ import numpy as np from tensorflow.compiler.tests import xla_test from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import bitwise_ops @@ -1612,8 +1611,7 @@ class BinaryOpsTest(xla_test.XLATestCase): @test_util.disable_mlir_bridge(""Error handling"") def testBroadcastArgsError(self): - with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError, - ""Incompatible shapes""): + with self.assertRaisesIncompatibleShapesError(): self._testBinary(array_ops.broadcast_dynamic_shape, np.array([1, 2, 3], dtype=np.int32), np.array([4, 5, 6], dtype=np.int32), ",0,train bb2d8e9dc75dbb6f2d22a438c5e5fb3908700438,tensorflow/tensorflow,"Introduce TestCase.assertRaisesIncompatibleShapesError. The new helper allows to test for incompatible shapes errors independently of whether they were produced by XLA, mlir generated kernels or classic TensorFlow kernels. PiperOrigin-RevId: 361612621 Change-Id: Ic9ba732ccb3483e30af6535024a37ab4e53776a3",test_util.py,"@@ -3100,6 +3100,12 @@ class TensorFlowTestCase(googletest.TestCase): return self.assertRaisesWithPredicateMatch(errors.OpError, expected_err_re_or_predicate) + def assertRaisesIncompatibleShapesError( + self, exception_type=errors.InvalidArgumentError): + return self.assertRaisesWithPredicateMatch( + exception_type, r""Incompatible shapes|Dimensions must be equal|"" + r""required broadcastable shapes"") + def assertShapeEqual(self, np_array, tf_tensor, msg=None): """"""Asserts that a Numpy ndarray and a TensorFlow tensor have the same shape. ",0,train bb2d8e9dc75dbb6f2d22a438c5e5fb3908700438,tensorflow/tensorflow,"Introduce TestCase.assertRaisesIncompatibleShapesError. The new helper allows to test for incompatible shapes errors independently of whether they were produced by XLA, mlir generated kernels or classic TensorFlow kernels. PiperOrigin-RevId: 361612621 Change-Id: Ic9ba732ccb3483e30af6535024a37ab4e53776a3",check_ops_test.py,"@@ -285,9 +285,8 @@ First 2 elements of y: # The exception in eager and non-eager mode is different because # eager mode relies on shape check done as part of the C++ op, while # graph mode does shape checks when creating the `Operation` instance. - with self.assertRaisesRegex((errors.InvalidArgumentError, ValueError), - (r""Incompatible shapes: \[3\] vs. \[2\]|"" - r""Dimensions must be equal, but are 3 and 2"")): + with self.assertRaisesIncompatibleShapesError( + (errors.InvalidArgumentError, ValueError)): with ops.control_dependencies([check_ops.assert_equal(small, small_2)]): out = array_ops.identity(small) self.evaluate(out) @@ -353,9 +352,8 @@ class AssertNoneEqualTest(test.TestCase): # The exception in eager and non-eager mode is different because # eager mode relies on shape check done as part of the C++ op, while # graph mode does shape checks when creating the `Operation` instance. - with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError), - (r""Incompatible shapes: \[3\] vs. \[2\]|"" - r""Dimensions must be equal, but are 3 and 2"")): + with self.assertRaisesIncompatibleShapesError( + (ValueError, errors.InvalidArgumentError)): with ops.control_dependencies( [check_ops.assert_none_equal(small, big)]): out = array_ops.identity(small) @@ -581,10 +579,8 @@ class AssertLessTest(test.TestCase): # The exception in eager and non-eager mode is different because # eager mode relies on shape check done as part of the C++ op, while # graph mode does shape checks when creating the `Operation` instance. - with self.assertRaisesRegex( # pylint:disable=g-error-prone-assert-raises - (ValueError, errors.InvalidArgumentError), - (r""Incompatible shapes: \[3\] vs. \[2\]|"" - ""Dimensions must be equal, but are 3 and 2"")): + with self.assertRaisesIncompatibleShapesError( + (ValueError, errors.InvalidArgumentError)): with ops.control_dependencies([check_ops.assert_less(small, big)]): out = array_ops.identity(small) self.evaluate(out) ",0,train bb2d8e9dc75dbb6f2d22a438c5e5fb3908700438,tensorflow/tensorflow,"Introduce TestCase.assertRaisesIncompatibleShapesError. The new helper allows to test for incompatible shapes errors independently of whether they were produced by XLA, mlir generated kernels or classic TensorFlow kernels. PiperOrigin-RevId: 361612621 Change-Id: Ic9ba732ccb3483e30af6535024a37ab4e53776a3",cwise_ops_binary_test.py,"@@ -958,10 +958,8 @@ class ComparisonOpTest(test.TestCase): y = np.arange(0, 10).reshape([5, 2]) for t in dtypes: for f in funcs: - with self.assertRaisesRegex( - (ValueError, errors.InvalidArgumentError), - ""Incompatible shapes|Dimensions must be equal|"" - ""required broadcastable shapes""): + with self.assertRaisesIncompatibleShapesError( + (ValueError, errors.InvalidArgumentError)): f(x.astype(t), y.astype(t)) def testEqualDType(self): ",0,train bb2d8e9dc75dbb6f2d22a438c5e5fb3908700438,tensorflow/tensorflow,"Introduce TestCase.assertRaisesIncompatibleShapesError. The new helper allows to test for incompatible shapes errors independently of whether they were produced by XLA, mlir generated kernels or classic TensorFlow kernels. PiperOrigin-RevId: 361612621 Change-Id: Ic9ba732ccb3483e30af6535024a37ab4e53776a3",cwise_ops_test.py,"@@ -217,10 +217,8 @@ class ComparisonOpTest(test.TestCase): for t in dtypes: for f in funcs: with self.subTest(t=t, f=f): - with self.assertRaisesRegex( - (ValueError, errors.InvalidArgumentError), - ""Incompatible shapes|Dimensions must be equal|"" - ""required broadcastable shapes""): + with self.assertRaisesIncompatibleShapesError( + (ValueError, errors.InvalidArgumentError)): f(x.astype(t), y.astype(t)) ",0,train bb2d8e9dc75dbb6f2d22a438c5e5fb3908700438,tensorflow/tensorflow,"Introduce TestCase.assertRaisesIncompatibleShapesError. The new helper allows to test for incompatible shapes errors independently of whether they were produced by XLA, mlir generated kernels or classic TensorFlow kernels. PiperOrigin-RevId: 361612621 Change-Id: Ic9ba732ccb3483e30af6535024a37ab4e53776a3",bincount_ops_test.py,"@@ -801,8 +801,7 @@ class TestSparseCountFailureModes(test.TestCase): np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32)) weights = sparse_ops.from_dense( np.array([[3, 1, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32)) - with self.assertRaisesRegex(errors.InvalidArgumentError, - ""Incompatible shapes""): + with self.assertRaisesIncompatibleShapesError(): self.evaluate(bincount_ops.sparse_bincount(x, weights=weights, axis=-1)) def test_sparse_input_wrong_shape_fails(self): ",0,train bb2d8e9dc75dbb6f2d22a438c5e5fb3908700438,tensorflow/tensorflow,"Introduce TestCase.assertRaisesIncompatibleShapesError. The new helper allows to test for incompatible shapes errors independently of whether they were produced by XLA, mlir generated kernels or classic TensorFlow kernels. PiperOrigin-RevId: 361612621 Change-Id: Ic9ba732ccb3483e30af6535024a37ab4e53776a3",nn_loss_scaling_utilities_test.py,"@@ -98,9 +98,8 @@ class LossUtilitiesTest(test_lib.TestCase, parameterized.TestCase): self.evaluate(loss), (2. * 0.3 + 0.5 * 0.7 + 4. * 0.2 + 1. * 0.8) / 2) def testComputeAverageLossInvalidSampleWeights(self): - with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError), - (r""Incompatible shapes: \[3\] vs. \[2\]|"" - ""Dimensions must be equal"")): + with self.assertRaisesIncompatibleShapesError( + (ValueError, errors_impl.InvalidArgumentError)): nn_impl.compute_average_loss([2.5, 6.2, 5.], sample_weight=[0.2, 0.8], global_batch_size=10) ",0,train 5b725bc16f5f548439ef06353952205677076f5a,tensorflow/tensorflow,"cmsis-nn: Revert dynamic allocation for quant params File affected: cmsis-nn/depthwise_conv.cc Dynamic allocation of memory for output shift and multiplier fails(running whole networks) when done together with scratch buffer for optimization. The issue is tracked in b/158779832. This patch reverts back to static allocation for output shift and multiplier until the scratch buffer issue is fixed.",depthwise_conv.cc,"@@ -36,6 +36,7 @@ constexpr int kInputTensor = 0; constexpr int kFilterTensor = 1; constexpr int kBiasTensor = 2; constexpr int kOutputTensor = 0; +constexpr int kMaxChannels = 256; // Depthwise conv is quantized along dimension 3: // https://www.tensorflow.org/lite/performance/quantization_spec @@ -49,8 +50,9 @@ struct OpData { int output_shift; // Per channel output multiplier and shift. - int32_t* per_channel_output_multiplier; - int32_t* per_channel_output_shift; + // TODO: Allocate dynamic buffers when b/158779832 is resolved + int32_t per_channel_output_multiplier[kMaxChannels]; + int32_t per_channel_output_shift[kMaxChannels]; // The range of the fused activation layer. For example for kNone and // uint8_t these would be 0 and 255. int32_t output_activation_min; @@ -129,13 +131,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Allocate memory for per-channel quantization parameters const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; - // Dynamically allocate per-channel quantization parameters. - TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( - context, num_channels * sizeof(int32_t), - reinterpret_cast(&data->per_channel_output_multiplier))); - TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( - context, num_channels * sizeof(int32_t), - reinterpret_cast(&data->per_channel_output_shift))); + TFLITE_DCHECK_LE(num_channels, kMaxChannels); + TF_LITE_ENSURE_EQ(context, filter->quantization.type, kTfLiteAffineQuantization); @@ -236,7 +233,7 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, TfLiteDepthwiseConvParams* params, - const OpData* data, const TfLiteTensor* input, + OpData* data, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { cmsis_nn_dw_conv_params dw_conv_params; @@ -408,7 +405,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); - const OpData& data = *(static_cast(node->user_data)); + OpData& data = *(static_cast(node->user_data)); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); const TfLiteTensor* input = GetInput(context, node, kInputTensor); ",0,train 441979ff0399418b7883ca6c267c08fc716ce74b,tensorflow/tensorflow,"[XLA] Add an unoptimized HLO output flag to ExecutableBuildOptions and to the XLA local Python client. PiperOrigin-RevId: 198930874",executable_build_options.cc,"@@ -87,6 +87,18 @@ ExecutableBuildOptions::dump_optimized_hlo_proto_to() const { return dump_optimized_hlo_proto_to_; } +ExecutableBuildOptions& +ExecutableBuildOptions::set_dump_unoptimized_hlo_proto_to( + tensorflow::StringPiece dirpath) { + dump_unoptimized_hlo_proto_to_ = dirpath.ToString(); + return *this; +} + +const tensorflow::gtl::optional& +ExecutableBuildOptions::dump_unoptimized_hlo_proto_to() const { + return dump_unoptimized_hlo_proto_to_; +} + ExecutableBuildOptions& ExecutableBuildOptions::set_dump_per_pass_hlo_proto_to( tensorflow::StringPiece dirpath) { dump_per_pass_hlo_proto_to_ = dirpath.ToString(); ",0,train 441979ff0399418b7883ca6c267c08fc716ce74b,tensorflow/tensorflow,"[XLA] Add an unoptimized HLO output flag to ExecutableBuildOptions and to the XLA local Python client. PiperOrigin-RevId: 198930874",executable_build_options.h,"@@ -65,6 +65,13 @@ class ExecutableBuildOptions { tensorflow::StringPiece dirpath); const tensorflow::gtl::optional& dump_optimized_hlo_proto_to() const; + // If set, specifies a dirpath to dump the start-of-optimization-pipeline HLO + // protobuf to (as in DebugOptions). + ExecutableBuildOptions& set_dump_unoptimized_hlo_proto_to( + tensorflow::StringPiece dirpath); + const tensorflow::gtl::optional& dump_unoptimized_hlo_proto_to() + const; + // If set, specifies a dirpath to dump the per-pass-in-pipeline HLO protobufs // to (as in DebugOptions). ExecutableBuildOptions& set_dump_per_pass_hlo_proto_to( @@ -95,6 +102,7 @@ class ExecutableBuildOptions { bool result_layout_set_ = false; tensorflow::gtl::optional generate_hlo_graph_; tensorflow::gtl::optional dump_optimized_hlo_proto_to_; + tensorflow::gtl::optional dump_unoptimized_hlo_proto_to_; tensorflow::gtl::optional dump_per_pass_hlo_proto_to_; DeviceMemoryAllocator* device_allocator_ = nullptr; std::vector disabled_hlo_passes_; ",0,train 441979ff0399418b7883ca6c267c08fc716ce74b,tensorflow/tensorflow,"[XLA] Add an unoptimized HLO output flag to ExecutableBuildOptions and to the XLA local Python client. PiperOrigin-RevId: 198930874",xla_client.py,"@@ -353,6 +353,7 @@ class CompileOptions(object): def __init__(self): self.generate_hlo_graph = None self.dump_optimized_hlo_proto_to = None + self.dump_unoptimized_hlo_proto_to = None self.dump_per_pass_hlo_proto_to = None self.hlo_profile = False ",0,train 441979ff0399418b7883ca6c267c08fc716ce74b,tensorflow/tensorflow,"[XLA] Add an unoptimized HLO output flag to ExecutableBuildOptions and to the XLA local Python client. PiperOrigin-RevId: 198930874",local_service.cc,"@@ -108,6 +108,11 @@ ExecutionOptions CreateExecutionOptions( ->set_xla_dump_optimized_hlo_proto_to( build_options.dump_optimized_hlo_proto_to().value()); } + if (build_options.dump_unoptimized_hlo_proto_to().has_value()) { + execution_options.mutable_debug_options() + ->set_xla_dump_unoptimized_hlo_proto_to( + build_options.dump_unoptimized_hlo_proto_to().value()); + } if (build_options.dump_per_pass_hlo_proto_to().has_value()) { execution_options.mutable_debug_options() ->set_xla_dump_per_pass_hlo_proto_to( ",0,train 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",shape_refiner_test.cc,"@@ -980,10 +980,10 @@ TEST_F(ShapeRefinerTest, ConstantValueAsShape_PackInt64) { InputList inputs{ // clang-format off - Input(ops::Const(root, 10LL)), - Input(ops::Const(root, 20LL)), + Input(ops::Const(root, int64{10})), + Input(ops::Const(root, int64{20})), Input(Output(scalar_non_const)), - Input(ops::Const(root, 1LL << 40)), + Input(ops::Const(root, int64{1} << 40)), }; // clang-format on auto pack = ops::Stack(root, inputs); TF_ASSERT_OK(root.status()); @@ -1008,8 +1008,8 @@ TEST_F(ShapeRefinerTest, ConstantValueAsShape_PackUnknownDim) { Scope root = Scope::NewRootScope(); InputList inputs{ - Input(ops::Const(root, 10LL)), - Input(ops::Const(root, -1LL)), + Input(ops::Const(root, int64{10})), + Input(ops::Const(root, int64{-1})), }; auto pack = ops::Stack(root, inputs); TF_ASSERT_OK(root.status()); @@ -1035,8 +1035,8 @@ TEST_F(ShapeRefinerTest, ConstantValueAsShape_PackInvalidInput) { // Inputs are length 2 vectors instead of scalars. InputList inputs{ - Input(ops::Const(root, {10LL, 20LL})), - Input(ops::Const(root, {10LL, 21LL})), + Input(ops::Const(root, {int64{10}, int64{20}})), + Input(ops::Const(root, {int64{10}, int64{21}})), }; auto pack = ops::Stack(root, inputs); TF_ASSERT_OK(root.status()); ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",debug_io_utils.cc,"@@ -395,11 +395,12 @@ Status DebugIO::PublishDebugMetadata( } else if (absl::StartsWith(absl::AsciiStrToLower(url), kFileURLScheme)) { const string dump_root_dir = url.substr(strlen(kFileURLScheme)); const string core_metadata_path = AppendTimestampToFilePath( - io::JoinPath( - dump_root_dir, - strings::StrCat(DebugNodeKey::kMetadataFilePrefix, - DebugIO::kCoreMetadataTag, ""sessionrun"", - strings::Printf(""%.14lld"", session_run_index))), + io::JoinPath(dump_root_dir, + strings::StrCat( + DebugNodeKey::kMetadataFilePrefix, + DebugIO::kCoreMetadataTag, ""sessionrun"", + strings::Printf(""%.14lld"", static_cast( + session_run_index)))), Env::Default()->NowMicros()); status.Update(DebugFileIO::DumpEventProtoToFile( event, string(io::Dirname(core_metadata_path)), ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",eager_service_impl.cc,"@@ -557,7 +557,7 @@ tensorflow::Status EagerServiceImpl::GetServerContext( return errors::InvalidArgument(strings::Printf( ""Unable to find a context_id matching the specified one "" ""(%llu). Perhaps the worker was restarted, or the context was GC'd?"", - context_id)); + static_cast(context_id))); } *server_context = iter->second; ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",graph_mgr.cc,"@@ -303,7 +303,8 @@ Status GraphMgr::Register( // Inserts one item into table_. { mutex_lock l(mu_); - *graph_handle = strings::Printf(""%016llx"", ++next_id_); + *graph_handle = + strings::Printf(""%016llx"", static_cast(++next_id_)); item->handle = *graph_handle; CHECK(table_.insert({*graph_handle, item}).second); } ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",batch_dataset_op.cc,"@@ -54,7 +54,8 @@ class BatchDatasetOp::Dataset : public DatasetBase { input_(input), op_version_(op_version), traceme_metadata_( - {{""batch_size"", strings::Printf(""%lld"", batch_size)}, + {{""batch_size"", + strings::Printf(""%lld"", static_cast(batch_size))}, {""drop_remainder"", drop_remainder ? ""true"" : ""false""}, {""parallel_copy"", parallel_copy ? ""true"" : ""false""}}) { input_->Ref(); ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",map_and_batch_dataset_op.cc,"@@ -100,7 +100,8 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase { traceme_metadata_( {{""autotune"", num_parallel_calls == model::kAutotune ? ""true"" : ""false""}, - {""batch_size"", strings::Printf(""%lld"", batch_size)}, + {""batch_size"", + strings::Printf(""%lld"", static_cast(batch_size))}, {""drop_remainder"", drop_remainder ? ""true"" : ""false""}}) { input_->Ref(); } @@ -285,8 +286,8 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase { } TraceMeMetadata GetTraceMeMetadata() const override { - int64 parallelism = -1; - int64 max_batch_results = -1; + long long parallelism = -1; // NOLINT + long long max_batch_results = -1; // NOLINT // NOTE: We only set the parallelism value if the lock can be acquired // right away to avoid introducing tracing overhead. if (mu_->try_lock()) { ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",parallel_interleave_dataset_op.cc,"@@ -107,8 +107,10 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { output_types_(output_types), output_shapes_(output_shapes), traceme_metadata_( - {{""block_length"", strings::Printf(""%lld"", block_length)}, - {""cycle_length"", strings::Printf(""%lld"", cycle_length)}, + {{""block_length"", + strings::Printf(""%lld"", static_cast(block_length))}, + {""cycle_length"", + strings::Printf(""%lld"", static_cast(cycle_length))}, {""deterministic"", deterministic.IsDeterministic() || deterministic.IsDefault() ? ""true"" ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",rebatch_dataset_op.cc,"@@ -62,7 +62,8 @@ class RebatchDatasetOp : public UnaryDatasetOpKernel { output_types_(output_types), output_shapes_(output_shapes), traceme_metadata_( - {{""num_replicas"", strings::Printf(""%lld"", num_replicas)}}) { + {{""num_replicas"", strings::Printf(""%lld"", static_cast( + num_replicas))}}) { input_->Ref(); } ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",snapshot_dataset_op.cc,"@@ -1206,7 +1206,9 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel { string GetSnapshotFilename() { mutex_lock l(mu_); string snapshot_data_filename = io::JoinPath( - run_dir_, strings::Printf(""%08llu.snapshot"", next_file_index_)); + run_dir_, strings::Printf( + ""%08llu.snapshot"", + static_cast(next_file_index_))); next_file_index_++; return snapshot_data_filename; } ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",interleave_dataset_op.cc,"@@ -62,8 +62,10 @@ class InterleaveDatasetOp::Dataset : public DatasetBase { output_types_(output_types), output_shapes_(output_shapes), traceme_metadata_( - {{""block_length"", strings::Printf(""%lld"", block_length)}, - {""cycle_length"", strings::Printf(""%lld"", cycle_length)}}) { + {{""block_length"", + strings::Printf(""%lld"", static_cast(block_length))}, + {""cycle_length"", + strings::Printf(""%lld"", static_cast(cycle_length))}}) { input_->Ref(); } ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",padded_batch_dataset_op.cc,"@@ -61,7 +61,8 @@ class PaddedBatchDatasetOp::Dataset : public DatasetBase { input_(input), op_version_(op_version), traceme_metadata_( - {{""batch_size"", strings::Printf(""%lld"", batch_size)}, + {{""batch_size"", + strings::Printf(""%lld"", static_cast(batch_size))}, {""drop_remainder"", drop_remainder ? ""true"" : ""false""}}) { input_->Ref(); ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",parallel_interleave_dataset_op.cc,"@@ -172,8 +172,10 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { traceme_metadata_( {{""autotune"", num_parallel_calls == model::kAutotune ? ""true"" : ""false""}, - {""block_length"", strings::Printf(""%lld"", block_length)}, - {""cycle_length"", strings::Printf(""%lld"", cycle_length)}, + {""block_length"", + strings::Printf(""%lld"", static_cast(block_length))}, + {""cycle_length"", + strings::Printf(""%lld"", static_cast(cycle_length))}, {""deterministic"", deterministic.IsNondeterministic() ? ""false"" : ""true""}}) { input_->Ref(); @@ -467,8 +469,9 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { mu_->unlock(); } auto result = dataset()->traceme_metadata_; - result.push_back( - std::make_pair(""parallelism"", strings::Printf(""%lld"", parallelism))); + result.push_back(std::make_pair( + ""parallelism"", + strings::Printf(""%lld"", static_cast(parallelism)))); return result; } ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",parallel_map_dataset_op.cc,"@@ -471,8 +471,9 @@ class ParallelMapIterator : public DatasetBaseIterator { result.push_back(std::make_pair(""autotune"", autotune_ ? ""true"" : ""false"")); result.push_back( std::make_pair(""deterministic"", deterministic_ ? ""true"" : ""false"")); - result.push_back( - std::make_pair(""parallelism"", strings::Printf(""%lld"", parallelism))); + result.push_back(std::make_pair( + ""parallelism"", + strings::Printf(""%lld"", static_cast(parallelism)))); return result; } ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",prefetch_dataset_op.cc,"@@ -278,11 +278,13 @@ class PrefetchDatasetOp::Dataset : public DatasetBase { mu_->unlock(); } data::TraceMeMetadata result; - result.push_back( - std::make_pair(""buffer_limit"", strings::Printf(""%lld"", limit))); + result.push_back(std::make_pair( + ""buffer_limit"", + strings::Printf(""%lld"", static_cast(limit)))); if (dataset()->slack_period_ > 0) { - result.push_back( - std::make_pair(""slack"", strings::Printf(""%lld"", slack_us_.load()))); + result.push_back(std::make_pair( + ""slack"", + strings::Printf(""%lld"", static_cast(slack_us_.load())))); } return result; } ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",shard_dataset_op.cc,"@@ -48,8 +48,9 @@ class ShardDatasetOp::Dataset : public DatasetBase { input_(input), require_non_empty_(require_non_empty), traceme_metadata_( - {{""index"", strings::Printf(""%lld"", index)}, - {""num_shards"", strings::Printf(""%lld"", num_shards)}}) { + {{""index"", strings::Printf(""%lld"", static_cast(index))}, + {""num_shards"", + strings::Printf(""%lld"", static_cast(num_shards))}}) { input_->Ref(); } ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",shuffle_dataset_op.cc,"@@ -108,7 +108,8 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase { buffer_size_(buffer_size), count_(count), traceme_metadata_( - {{""buffer_size"", strings::Printf(""%lld"", buffer_size)}}) { + {{""buffer_size"", + strings::Printf(""%lld"", static_cast(buffer_size))}}) { input_->Ref(); } ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",window_dataset_op.cc,"@@ -54,9 +54,12 @@ class WindowDatasetOp::Dataset : public DatasetBase { output_dtypes_(input_->output_dtypes().size(), {DT_VARIANT}), output_shapes_(input_->output_shapes().size(), TensorShape({})), traceme_metadata_( - {{""window_size"", strings::Printf(""%lld"", window_size)}, - {""window_shift"", strings::Printf(""%lld"", window_shift)}, - {""window_stride"", strings::Printf(""%lld"", window_stride)}}) { + {{""window_size"", + strings::Printf(""%lld"", static_cast(window_size))}, + {""window_shift"", + strings::Printf(""%lld"", static_cast(window_shift))}, + {""window_stride"", strings::Printf(""%lld"", static_cast( + window_stride))}}) { input_->Ref(); } ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",math_grad.cc,"@@ -78,7 +78,7 @@ REGISTER_OP_GRADIENT(""Reciprocal"", InvGrad); Status SquareGrad(const AttrSlice& attrs, FunctionDef* g) { // clang-format off return GradForUnaryCwise(g, { - FDH::Const(""c"", 2LL), + FDH::Const(""c"", int64{2}), {{""two""}, ""Cast"", {""c""}, {{""SrcT"", DT_INT64}, {""DstT"", ""$T""}}}, {{""x2""}, ""Mul"", {""x"", ""two""}, {}, {""dy""}}, // x * 2 {{""dx""}, ""Mul"", {""dy"", ""x2""}}, // dy * (x * 2) @@ -619,7 +619,7 @@ REGISTER_OP_GRADIENT(""Xdivy"", XdivyGrad); Status SquaredDifferenceGrad(const AttrSlice& attrs, FunctionDef* g) { // clang-format off return GradForBinaryCwise(g, { - FDH::Const(""c"", 2LL), + FDH::Const(""c"", int64{2}), {{""two""}, ""Cast"", {""c""}, {{""SrcT"", DT_INT64}, {""DstT"", ""$T""}}}, {{""x_sub_y""}, ""Sub"", {""x"", ""y""}}, {{""two_x_sub_y""}, ""Mul"", {""two"", ""x_sub_y""}}, // 2 * (x - y) ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",curl_http_request.cc,"@@ -141,7 +141,8 @@ CurlHttpRequest::CurlHttpRequest(LibCurl* libcurl, Env* env) // TODO(b/74351157): Enable HTTP/2. // Set up the progress meter. - CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, 0ULL)); + CHECK_CURL_OK( + libcurl_->curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, uint64{0})); CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_XFERINFODATA, this)); CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_XFERINFOFUNCTION, &CurlHttpRequest::ProgressCallback)); ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",env.cc,"@@ -400,7 +400,7 @@ bool Env::CreateUniqueFileName(string* prefix, const string& suffix) { #else int32 pid = static_cast(getpid()); #endif - uint64 now_microsec = NowMicros(); + long long now_microsec = NowMicros(); // NOLINT *prefix += strings::Printf(""%s-%x-%d-%llx"", port::Hostname().c_str(), tid, pid, now_microsec); ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",numbers.cc,"@@ -439,7 +439,7 @@ string HumanReadableNum(int64 value) { value = -value; } if (value < 1000) { - Appendf(&s, ""%lld"", value); + Appendf(&s, ""%lld"", static_cast(value)); } else if (value >= static_cast(1e15)) { // Number bigger than 1E15; use that notation. Appendf(&s, ""%0.3G"", static_cast(value)); @@ -472,7 +472,7 @@ string HumanReadableNumBytes(int64 num_bytes) { // No fractions for bytes. char buf[8]; // Longest possible string is '-XXXXB' snprintf(buf, sizeof(buf), ""%s%lldB"", neg_str, - static_cast(num_bytes)); + static_cast(num_bytes)); return string(buf); } ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",debug_events_writer_test.cc,"@@ -68,8 +68,9 @@ class DebugEventsWriterTest : public ::testing::Test { } void SetUp() override { - dump_root_ = io::JoinPath(testing::TmpDir(), - strings::Printf(""%010lld"", env()->NowMicros())); + dump_root_ = io::JoinPath( + testing::TmpDir(), + strings::Printf(""%010lld"", static_cast(env()->NowMicros()))); } void TearDown() override { ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",events_writer.cc,"@@ -66,7 +66,7 @@ Status EventsWriter::InitIfNeeded() { filename_ = strings::Printf(""%s.out.tfevents.%010lld.%s%s"", file_prefix_.c_str(), - static_cast(time_in_seconds), + static_cast(time_in_seconds), port::Hostname().c_str(), file_suffix_.c_str()); // Reset recordio_writer (which has a reference to recordio_file_) so final ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",session_ref.cc,"@@ -53,7 +53,8 @@ struct RunCounter { }; std::string SessionToHandle(Session* session) { - return strings::Printf(""%llu"", reinterpret_cast(session)); + return strings::Printf(""%llu"", static_cast( + reinterpret_cast(session))); } // The Session interface has many methods of the form: ",0,test 8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf PiperOrigin-RevId: 299753695 Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",cuda_dnn.cc,"@@ -2620,8 +2620,8 @@ port::StatusOr GetCudnnConvolutionForwardAlgorithm( bool specify_workspace_limit = scratch_allocator != nullptr; auto memory_limit_bytes = specify_workspace_limit - ? std::max(scratch_allocator->GetMemoryLimitInBytes(), 0ll) - : 0ll; + ? std::max(scratch_allocator->GetMemoryLimitInBytes(), int64{0}) + : int64{0}; SE_ASSIGN_OR_RETURN(cudnnConvolutionFwdAlgo_t algo, GetCudnnConvolutionForwardAlgo( cudnn, input_nd, filter, conv, output_nd, @@ -2673,8 +2673,8 @@ port::StatusOr GetCudnnConvolutionBackwardDataAlgorithm( bool specify_workspace_limit = scratch_allocator != nullptr; auto memory_limit_bytes = specify_workspace_limit - ? std::max(scratch_allocator->GetMemoryLimitInBytes(), 0ll) - : 0ll; + ? std::max(scratch_allocator->GetMemoryLimitInBytes(), int64{0}) + : int64{0}; SE_ASSIGN_OR_RETURN(cudnnConvolutionBwdDataAlgo_t algo, GetCudnnConvolutionBackwardDataAlgo( cudnn, input_nd, filter, conv, output_nd, @@ -2725,8 +2725,8 @@ port::StatusOr GetCudnnConvolutionBackwardFilterAlgorithm( bool specify_workspace_limit = scratch_allocator != nullptr; auto memory_limit_bytes = specify_workspace_limit - ? std::max(scratch_allocator->GetMemoryLimitInBytes(), 0ll) - : 0ll; + ? std::max(scratch_allocator->GetMemoryLimitInBytes(), int64{0}) + : int64{0}; SE_ASSIGN_OR_RETURN(cudnnConvolutionBwdFilterAlgo_t algo, GetCudnnConvolutionBackwardFilterAlgo( cudnn, input_nd, filter, conv, output_nd, ",0,test f600f4d82de7feded78087b4edf7295eea64dae3,tensorflow/tensorflow,"TensorFlow: change cuda-diagnostics to search for so.1 Change: 115010103",cuda_diagnostics.cc,"@@ -165,7 +165,7 @@ port::StatusOr Diagnostician::FindDsoVersion() { // DSO and yields its version number into the callback data, when found. auto iterate_phdr = [](struct dl_phdr_info *info, size_t size, void *data) -> int { - if (strstr(info->dlpi_name, ""libcuda.so"")) { + if (strstr(info->dlpi_name, ""libcuda.so.1"")) { VLOG(1) << ""found DLL info with name: "" << info->dlpi_name; char resolved_path[PATH_MAX] = {0}; if (realpath(info->dlpi_name, resolved_path) == nullptr) { ",0,train cd377811d118f408945c97c9aead6f7dbc8322cb,tensorflow/tensorflow,"Comment and error message consistency cleanup. PiperOrigin-RevId: 168422582",backprop.py,"@@ -88,14 +88,14 @@ def _magic_gradient_function(op_name, attr_tuple, num_inputs, num_outputs, Args: op_name: the name of the op to be differentiated. - attr_tuple: the attrs, as a tuple - num_inputs: the number of inputs to the op - num_outputs: the number of outputs of the op + attr_tuple: the attrs, as a tuple. + num_inputs: the number of inputs to the op. + num_outputs: the number of outputs of the op. *tensors: a list of tensors, composed of, in order, the inputs, the outputs, and the gradients with respect to the outputs. Returns: - the gradients with respect to the inputs of the function, as a list. + The gradients with respect to the inputs of the function, as a list. """""" inputs = tensors[:num_inputs] outputs = tensors[num_inputs:num_inputs + num_outputs] @@ -232,9 +232,9 @@ def implicit_val_and_grad(f): ag_core.active_progenitors.remove(start_node) if not ag_core.isnode(end_node): raise ValueError( - ""Target not part of a computation being traced. %s"" % end_node) + ""Target not part of a computation being traced. %s."" % end_node) if start_node not in end_node.progenitors: - raise ValueError(""Target not derived from source. %s %s"" % + raise ValueError(""Target not derived from source. %s %s."" % (end_node.progenitors, repr(start_node))) output_gradients = kwds.get(""output_gradients"", None) if output_gradients is None: @@ -282,7 +282,7 @@ def _get_arg_spec(f, params): return params else: raise ValueError( - ""params must be all strings or all integers; got %s"" % params) + ""params must be all strings or all integers; got %s."" % params) def gradients_function(f, params=None): ",0,train cd377811d118f408945c97c9aead6f7dbc8322cb,tensorflow/tensorflow,"Comment and error message consistency cleanup. PiperOrigin-RevId: 168422582",context.py,"@@ -286,8 +286,8 @@ class Context(object): it is unset. `attrs` contains the attributes of the operation as a `tuple` of alternating attribute names and attribute values. - `inputs` is the `list` of input `tfe.Tensor`(s) to the op. - `outputs` is the `list` of output `tfe.Tensor`(s) from the op. + `inputs` is the `list` of input `Tensor`(s) to the op. + `outputs` is the `list` of output `Tensor`(s) from the op. Return value(s) from the callback are ignored. """""" # TODO(cais): (b/64674139) Allow access to function-internal operations. @@ -314,7 +314,7 @@ def _initialize_context(): def context(): - """"""Returns a singleton Context object."""""" + """"""Returns a singleton context object."""""" if _context is None: _initialize_context() return _context @@ -373,7 +373,7 @@ def device(name): ```python with tfe.device('gpu:0'): with tfe.device('cpu:0'): - shape = tfe.Tensor([], dtype=tf.int32) + shape = Tensor([], dtype=tf.int32) x = ops.truncated_normal(shape, tf.float32) ``` will ensure that the `shape` Tensor is on CPU but the `truncated_normal` @@ -390,13 +390,13 @@ def device(name): def run(main=None, argv=None): - """"""Runs the program with an optional 'main' function and 'argv' list. + """"""Runs the program with an optional main function and argv list. The program will run with eager execution enabled. Args: - main: the main function to run - argv: the arguments to pass to it + main: the main function to run. + argv: the arguments to pass to it. """""" enable_eager_execution() app.run(main, argv) ",0,train cd377811d118f408945c97c9aead6f7dbc8322cb,tensorflow/tensorflow,"Comment and error message consistency cleanup. PiperOrigin-RevId: 168422582",custom_gradient.py,"@@ -38,7 +38,7 @@ def custom_gradient(f): """"""Decorator to define a function with a custom gradient. The input function is expected to return the tuple - (results, gradient_function) + (results, gradient_function). The output function will return results while possibly recording the gradient_function and inputs in the tape. ",0,train cd377811d118f408945c97c9aead6f7dbc8322cb,tensorflow/tensorflow,"Comment and error message consistency cleanup. PiperOrigin-RevId: 168422582",execute.py,"@@ -153,9 +153,10 @@ def make_shape(v, arg_name): try: shape = tensor_shape.as_shape(v) except TypeError as e: - raise TypeError(""Error converting %s to a TensorShape: %s"" % (arg_name, e)) + raise TypeError(""Error converting %s to a TensorShape: %s."" % (arg_name, e)) except ValueError as e: - raise ValueError(""Error converting %s to a TensorShape: %s"" % (arg_name, e)) + raise ValueError(""Error converting %s to a TensorShape: %s."" % (arg_name, + e)) if shape.ndims is None: return None else: @@ -171,7 +172,7 @@ def make_tensor(v, arg_name): text_format.Merge(v, pb) return pb raise TypeError( - ""Don't know how to convert %s to a TensorProto for argument '%s'"" % + ""Don't know how to convert %s to a TensorProto for argument '%s'."" % (repr(v), arg_name)) @@ -217,7 +218,7 @@ def args_to_mixed_eager_tensors(lists): for l in lists[1:]: if len(l) != len(lists[0]): raise ValueError( - ""Expected list arguments to be the same length: %d != %d (%r vs. %r)"" + ""Expected list arguments to be the same length: %d != %d (%r vs. %r)."" % (len(lists[0]), len(l), lists[0], l)) lists_ret.append([]) ",0,train cd377811d118f408945c97c9aead6f7dbc8322cb,tensorflow/tensorflow,"Comment and error message consistency cleanup. PiperOrigin-RevId: 168422582",execution_callbacks.py,"@@ -228,8 +228,8 @@ def add_execution_callback(callback): it is unset. `attrs` contains the attributes of the operation as a `tuple` of alternating attribute name and attribute value. - `inputs` is the `list` of input `tfe.Tensor`(s) to the op. - `outputs` is the `list` of output `tfe.Tensor`(s) from the op. + `inputs` is the `list` of input `Tensor`(s) to the op. + `outputs` is the `list` of output `Tensor`(s) from the op. Return value(s) from the callback are ignored. """""" context.get_default_context().add_post_execution_callback(callback) @@ -246,8 +246,8 @@ def seterr(inf_or_nan=None): Example: ``` python tfe.seterr(inf_or_nan=""raise"") - a = tfe.Tensor(10.0) - b = tfe.Tensor(0.0) + a = Tensor(10.0) + b = Tensor(0.0) c = a / b # <-- Raises InfOrNanError. tfe.seterr(inf_or_nan=""ignore"") ",0,train cd377811d118f408945c97c9aead6f7dbc8322cb,tensorflow/tensorflow,"Comment and error message consistency cleanup. PiperOrigin-RevId: 168422582",function.py,"@@ -41,7 +41,7 @@ from tensorflow.python.util import nest # Thread-local storage for tfe Tensors which are referenced while evaluating a # graph-mode function. _scoped_captures = threading.local() -# _scoped_captures.tensors is either None or a map from tfe.Tensor id to a pair +# _scoped_captures.tensors is either None or a map from Tensor id to a pair # of a tfe tensor and its corresponding placeholder to pass as a function # argument. The value should be None unless we're in function definition # context. @@ -62,7 +62,7 @@ def _convert_to_graph_tensor(value, dtype=None, name=None, as_ref=False): """"""Captures a Tensor while building a graph mode function. Arguments: - value: A tfe.Tensor object + value: A Tensor object. dtype: The datatype of the value produced by the node in the graph. name: Name of the node in the graph. as_ref: Ignored (required by register_tensor_conversion_function). @@ -482,12 +482,12 @@ def defun(func): func must be a Python function that constructs a TensorFlow graph, typically using functions in the tensorflow module. - Arguments to func can be either tfe.Tensor objects or Python + Arguments to func can be either Tensor objects or Python objects. Non-Tensor python objects are treated as constants, and new function definitions are created internally based on their values. - func must return a tf.Tensor (NOT a tfe.Tensor) or a list of tf.Tensor (NOT a - tfe.Tensor). TODO(apassos) make the wrapped tfe ops return tf.Tensors when in + func must return a tf.Tensor (NOT a Tensor) or a list of tf.Tensor (NOT a + Tensor). TODO(apassos) make the wrapped tfe ops return tf.Tensors when in graph mode. TODO(apassos): deal with captured global state. Deal with control flow. @@ -497,6 +497,6 @@ def defun(func): Returns: A callable that will execute the compiled function (and return zero - or more tfe.Tensor objects) + or more Tensor objects). """""" return named_defun(func, func.__name__) ",0,train cd377811d118f408945c97c9aead6f7dbc8322cb,tensorflow/tensorflow,"Comment and error message consistency cleanup. PiperOrigin-RevId: 168422582",ops_test.py,"@@ -99,7 +99,7 @@ class TargetTest(test_util.TensorFlowTestCase): # with tfe.device('/gpu:0'): # ... # code here # with tfe.device('/cpu:0'): - # shape = tfe.Tensor(...) + # shape = Tensor(...) # y = tfe.ops.random_uniform(.., shape) # # Without the CPU device block tfe.ops.random_uniform would fail since the @@ -108,7 +108,7 @@ class TargetTest(test_util.TensorFlowTestCase): # After this change, we simplify the code: # # with tfe.device('/gpu:0'): - # y = tfe.ops.random_uniform(, tfe.Tensor(...)) + # y = tfe.ops.random_uniform(, Tensor(...)) # # The approximation is not exact since if there are GPU kernels which do not # require host memory for int32 tensors, there will be a discrepancy between ",0,train cd377811d118f408945c97c9aead6f7dbc8322cb,tensorflow/tensorflow,"Comment and error message consistency cleanup. PiperOrigin-RevId: 168422582",pywrap_tfe.h,"@@ -64,7 +64,7 @@ PyObject* TFE_Py_RegisterExceptionClass(PyObject* e); // class registered via TFE_Py_RegisterExceptionClass) and returns -1. int TFE_Py_MayBeRaiseException(TF_Status* status); -// Returns the string associated with the passed-in python object/ +// Returns the string associated with the passed-in python object. char* TFE_GetPyThonString(PyObject* o); #endif // TENSORFLOW_PYTHON_EAGER_PYWRAP_TFE_H_ ",0,train 568b9e56038d2d2fe8927f9e0b538bf5e49116f6,tensorflow/tensorflow,"Fix Toco IdentifyL2Normalization bugs PiperOrigin-RevId: 229780959",generate_examples.py,"@@ -1424,6 +1424,36 @@ def make_conv_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) +# Note: This is a regression test for a bug (b/122651451) that Toco incorrectly +# erases the reduction indices array while it's shared with other ops. +def make_l2norm_shared_epsilon_tests(zip_path): + """"""Regression test for a bug (b/122651451)."""""" + + # Chose a set of parameters + test_parameters = [{ + ""input_shape"": [[5, 7]], + ""dim"": [1], + ""epsilon"": [1e-8], + }] + + def build_graph(parameters): + input_tensor = tf.placeholder( + dtype=tf.float32, name=""input"", shape=parameters[""input_shape""]) + epsilon = tf.constant(parameters[""epsilon""]) + out1 = tf.nn.l2_normalize(input_tensor, parameters[""dim""], epsilon=epsilon) + out2 = tf.nn.l2_normalize(input_tensor, parameters[""dim""], epsilon=epsilon) + out = out1 + out2 + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_values = create_tensor_data( + np.float32, parameters[""input_shape""], min_value=-4, max_value=10) + return [input_values], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + # Note: This is a regression test for a bug (b/112436267) that Toco incorrectly # fuses weights when multiple Conv2D/FULLY_CONNECTED ops share the same constant # weight tensor. ",0,train 568b9e56038d2d2fe8927f9e0b538bf5e49116f6,tensorflow/tensorflow,"Fix Toco IdentifyL2Normalization bugs PiperOrigin-RevId: 229780959",identify_l2_normalization.cc,"@@ -151,20 +151,12 @@ std::vector>::iterator FindOperator( // Erase the subgraph that is now replaced by L2Normalization model->operators.erase(FindOperator(model, square_op)); - model->EraseArray(sum_op->inputs[0]); - if (sum_op->inputs.size() > 1) { - model->EraseArray(sum_op->inputs[1]); - } - model->operators.erase(FindOperator(model, sum_op)); + DeleteOpAndArraysIfUnused(model, sum_op); if (add_op) { - model->EraseArray(add_op->inputs[0]); - model->EraseArray(add_op->inputs[1]); - model->operators.erase(FindOperator(model, add_op)); + DeleteOpAndArraysIfUnused(model, add_op); } - model->EraseArray(sqrt_or_rsqrt_op->inputs[0]); - model->operators.erase(FindOperator(model, sqrt_or_rsqrt_op)); - model->EraseArray(div_or_mul_op->inputs[1]); - model->operators.erase(FindOperator(model, div_or_mul_op)); + DeleteOpAndArraysIfUnused(model, sqrt_or_rsqrt_op); + DeleteOpAndArraysIfUnused(model, div_or_mul_op); *modified = true; return ::tensorflow::Status::OK(); } ",0,train 568b9e56038d2d2fe8927f9e0b538bf5e49116f6,tensorflow/tensorflow,"Fix Toco IdentifyL2Normalization bugs PiperOrigin-RevId: 229780959",tooling_util.cc,"@@ -173,7 +173,7 @@ bool DeleteArrayIfUsedOnce(const string& array_name, Model* model) { return false; } -void DeleteOpAndArraysIfUnused(Model* model, Operator* op) { +void DeleteOpAndArraysIfUnused(Model* model, const Operator* op) { for (const string& array_name : op->inputs) { DeleteArrayIfUsedOnce(array_name, model); } ",0,train 568b9e56038d2d2fe8927f9e0b538bf5e49116f6,tensorflow/tensorflow,"Fix Toco IdentifyL2Normalization bugs PiperOrigin-RevId: 229780959",tooling_util.h,"@@ -72,7 +72,7 @@ bool DeleteArrayIfUsedOnce(const string& array_name, Model* model); // Deletes the op and any of its input and output arrays if they are unused // after the op has been deleted. -void DeleteOpAndArraysIfUnused(Model* model, Operator* op); +void DeleteOpAndArraysIfUnused(Model* model, const Operator* op); std::vector>::const_iterator FindOpWithOutput( const Model& model, const string& array_name); ",0,train 6fd13a50e5c48eb385cd0a5431cd6ca966fc4152,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2021-09-11 PiperOrigin-RevId: 396085135 Change-Id: I5219fbf4293c5aac0c04fa426b5c712ac7c77fbc",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 9, 10) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 9, 11) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train f59a82f2b08dca1641d5766fdd2234d3b665a862,tensorflow/tensorflow,"Replacing the current inner Cholesky decomposition loop with a While loop rolled version. This will allow for much larger Cholesky decompositions (and thus matrix inversions) than previously possible on TPU because of the use of rolled While loops so XLA compilation will no longer timeout. While there is a minor runtime performance decrease (now 25ms vs 15ms for a 500x500 matrix) the compilation time is significantly faster (12.8s vs 55.2s for a 500x500 matrix.) PiperOrigin-RevId: 193114816",cholesky.cc,"@@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the ""License""); you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ limitations under the License. #include ""tensorflow/compiler/tf2xla/lib/batch_dot.h"" #include ""tensorflow/compiler/tf2xla/lib/triangular_solve.h"" #include ""tensorflow/compiler/tf2xla/lib/util.h"" +#include ""tensorflow/compiler/tf2xla/lib/while_loop.h"" #include ""tensorflow/compiler/xla/literal_util.h"" #include ""tensorflow/compiler/xla/shape_util.h"" #include ""tensorflow/compiler/xla/status_macros.h"" @@ -31,68 +32,122 @@ namespace tensorflow { namespace { +// The Cholesky–Banachiewicz algorithm. See +// https://en.wikipedia.org/wiki/Cholesky_decomposition#The_Cholesky–Banachiewicz_and_Cholesky–Crout_algorithms +// for a description. +// // def cholesky_unblocked(a): // assert len(a.shape) == 2 and a.shape[-2] == a.shape[-1] // n = a.shape[-2] // l = np.zeros_like(a) // for j in xrange(n): -// r = l[..., j, :j] -// l[..., j, j] = np.sqrt(a[..., j, j] - np.dot(r, r)) -// l[..., j+1:, j] = (a[..., j+1:, j] - np.dot(l[..., j+1:, :j], -// np.transpose(r))) / l[..., j, j] +// row = l[..., j, :j] +// row_t = np.swapaxes(row, -1, -2) +// l[..., j, j] = np.sqrt(a[..., j, j] - np.dot(row, row_t)) +// l[..., j+1:, j] = (a[..., j+1:, j] - np.dot(l[..., j+1:, :j], row_t)) / +// l[..., j, j] // return l xla::StatusOr CholeskyUnblocked( xla::ComputationBuilder* builder, const xla::ComputationDataHandle& a) { - TF_ASSIGN_OR_RETURN(std::unique_ptr shape, builder->GetShape(a)); - xla::ComputationDataHandle l = Zeros(builder, *shape); - const int64 n = xla::ShapeUtil::GetDimension(*shape, -2); - for (int j = 0; j < n; ++j) { - // Picture of block structure: - // ... \ - // \ - // -- r -- d - // |\ - // B c \ - // | \ - // | ... - // - // ^ - // column j - TF_ASSIGN_OR_RETURN(auto d, - SliceInMinorDims(builder, a, {j, j}, {j + 1, j + 1})); - TF_ASSIGN_OR_RETURN(auto c, - SliceInMinorDims(builder, a, {j + 1, j}, {n, j + 1})); - xla::ComputationDataHandle new_d_squared = d; - xla::ComputationDataHandle br; - if (j > 0) { - TF_ASSIGN_OR_RETURN(auto r, - SliceInMinorDims(builder, l, {j, 0}, {j + 1, j})); - TF_ASSIGN_OR_RETURN(auto b, - SliceInMinorDims(builder, l, {j + 1, 0}, {n, j})); - TF_ASSIGN_OR_RETURN(auto r_squared, - BatchDot(builder, r, r, /*transpose_x=*/false, - /*transpose_y=*/true, /*conjugate_x=*/false, - /*conjugate_y=*/false)); - new_d_squared = builder->Sub(new_d_squared, r_squared); + TF_ASSIGN_OR_RETURN(std::unique_ptr a_shape, + builder->GetShape(a)); + const int n_dims = xla::ShapeUtil::Rank(*a_shape); + const int64 n = xla::ShapeUtil::GetDimension(*a_shape, -1); + gtl::ArraySlice major_dims(xla::AsInt64Slice(a_shape->dimensions()), + /*pos=*/0, + /*len=*/n_dims - 2); - TF_ASSIGN_OR_RETURN(br, BatchDot(builder, b, r, /*transpose_x=*/false, - /*transpose_y=*/true, - /*conjugate_x=*/false, - /*conjugate_y=*/false)); - } - auto new_d_inv = builder->Pow( - new_d_squared, FloatLiteral(builder, shape->element_type(), -0.5)); - auto new_d = builder->Mul(new_d_inv, new_d_squared); - TF_ASSIGN_OR_RETURN(l, UpdateSliceInMinorDims(builder, l, new_d, {j, j})); + xla::ComputationDataHandle l = Zeros(builder, *a_shape); - if (j > 0) { - c = builder->Sub(c, br); + // Construct the for loop body to iterate over rows. + auto body_fn = [&](xla::ComputationDataHandle i, + gtl::ArraySlice loop_vars, + xla::ComputationBuilder* body_builder) + -> xla::StatusOr> { + xla::Shape col_shape; + xla::Shape row_shape; + for (int64 d : major_dims) { + row_shape.add_dimensions(d); + col_shape.add_dimensions(d); } - auto new_c = builder->Mul(c, new_d_inv); - TF_ASSIGN_OR_RETURN(l, - UpdateSliceInMinorDims(builder, l, new_c, {j + 1, j})); - } - return l; + row_shape.add_dimensions(1); + row_shape.add_dimensions(n); + row_shape.set_element_type(a_shape->element_type()); + auto mask_zeros_row = Zeros(body_builder, row_shape); + + col_shape.add_dimensions(n); + col_shape.add_dimensions(1); + col_shape.set_element_type(a_shape->element_type()); + auto mask_zeros_col = Zeros(body_builder, col_shape); + + std::vector mask_vector(n); + std::iota(mask_vector.begin(), mask_vector.end(), 0); + auto mask_range = body_builder->ConstantR1(mask_vector); + auto mask_range_row = body_builder->Broadcast( + body_builder->Reshape(mask_range, {0}, {1, n}), major_dims); + auto mask_range_col = body_builder->Broadcast( + body_builder->Reshape(mask_range, {0}, {n, 1}), major_dims); + auto body_a = loop_vars[0]; + auto body_l = loop_vars[1]; + + // row = l[..., i, :i] + // select the whole i-th row, then mask out all columns past i-1 + auto zero = body_builder->ConstantR0(0); + TF_ASSIGN_OR_RETURN(auto l_i, DynamicSliceInMinorDims(body_builder, body_l, + {i, zero}, {1, n})); + auto row = body_builder->Select(body_builder->Ge(mask_range_row, i), + mask_zeros_row, l_i); + // a[..., i, i] + TF_ASSIGN_OR_RETURN(auto a_ii, DynamicSliceInMinorDims(body_builder, body_a, + {i, i}, {1, 1})); + // np.dot(row, np.swapaxes(row, -1, -2)) + xla::ComputationDataHandle diag_dot; + TF_ASSIGN_OR_RETURN(diag_dot, BatchDot(body_builder, row, row, + /*transpose_x=*/false, + /*transpose_y=*/true)); + // l[..., i, i] = np.sqrt(a[..., i, i] - np.dot(row, + // np.swapaxes(row, -1, -2))) + auto l_ii = body_builder->Pow( + body_builder->Sub(a_ii, diag_dot), + FloatLiteral(body_builder, a_shape->element_type(), 0.5)); + + // a[..., i+1:, i] + auto ip1 = body_builder->Add(i, body_builder->ConstantR0(1)); + // select the whole i-th column, then mask out all rows above i+1 + TF_ASSIGN_OR_RETURN( + auto a_0i, DynamicSliceInMinorDims(body_builder, body_a, {i}, {1})); + auto a_ip1i = body_builder->Select(body_builder->Le(mask_range_col, i), + mask_zeros_col, a_0i); + + // l[..., i+1:, i] = (a[..., i+1:, i] - np.dot(l[..., i+1:, :i], r.T)) / + // l[..., i, i] + // The columns in [i, n] are zeroed out in `row`, so we just have to + // zero out rows above i+1 after the BatchDot. np.dot(l[..., :, :i], + // r.T) + TF_ASSIGN_OR_RETURN(auto dot, BatchDot(body_builder, body_l, row, + /*transpose_x=*/false, + /*transpose_y=*/true)); + // np.dot(l[..., i+1:, :i], r.T) + auto dot_ip1 = body_builder->Select(body_builder->Le(mask_range_col, i), + mask_zeros_col, dot); + + auto col_update = + body_builder->Div(body_builder->Sub(a_ip1i, dot_ip1), l_ii); + TF_ASSIGN_OR_RETURN(body_l, DynamicUpdateSliceInMinorDims( + body_builder, body_l, col_update, {i})); + // Assign the diagonal after the rest of the column because otherwise the + // column assign will wrap around and overwrite the diagonal assign. + TF_ASSIGN_OR_RETURN(body_l, DynamicUpdateSliceInMinorDims( + body_builder, body_l, l_ii, {i, i})); + + return std::vector{body_a, body_l}; + }; + + TF_ASSIGN_OR_RETURN( + auto cholesky_while, + XlaForEachIndex(n, xla::S32, body_fn, {a, l}, ""unblocked"", builder)); + + return cholesky_while[1]; } } // namespace ",0,train f59a82f2b08dca1641d5766fdd2234d3b665a862,tensorflow/tensorflow,"Replacing the current inner Cholesky decomposition loop with a While loop rolled version. This will allow for much larger Cholesky decompositions (and thus matrix inversions) than previously possible on TPU because of the use of rolled While loops so XLA compilation will no longer timeout. While there is a minor runtime performance decrease (now 25ms vs 15ms for a 500x500 matrix) the compilation time is significantly faster (12.8s vs 55.2s for a 500x500 matrix.) PiperOrigin-RevId: 193114816",cholesky.h,"@@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the ""License""); you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ namespace tensorflow { // the block size to use. // TODO(phawkins): check for negative values on the diagonal and return an // error, instead of silently yielding NaNs. -// TODO(mattjj): handle the complex Hermitian case +// TODO(znado): handle the complex Hermitian case xla::StatusOr Cholesky( xla::ComputationBuilder* builder, xla::ComputationDataHandle a, int64 block_size = 256); ",0,train d8168396f11ad34939819b8e866668ad375998c1,tensorflow/tensorflow,"Excluding test failing on windows with cmake. PiperOrigin-RevId: 172130104",batch_dataset_op_test.py,"@@ -22,7 +22,7 @@ import math import numpy as np from tensorflow.contrib.data.python.ops import batching -from tensorflow.python.data.ops import dataset_ops +from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors ",0,train 747ca958fa6c51ffd81616867e7f043216cbe4ad,tensorflow/tensorflow,"Trace CUPTI synchronization events. - cuEventSynchronize - cuStreamWaitEvent - cuStreamSynchronize - cuCtxSynchronize PiperOrigin-RevId: 353984518 Change-Id: Ib3a5a1f50993248ae71f32baa54390fe8b29d298",cupti_tracer.cc,"@@ -776,6 +776,36 @@ void AddMemsetActivityEvent(CuptiTraceCollector *collector, collector->AddEvent(std::move(event)); } +void AddSynchronizationActivityEvent( + CuptiTraceCollector *collector, const CUpti_ActivitySynchronization *sync) { + CuptiTracerEvent event{}; + event.type = CuptiTracerEventType::Generic; + event.source = CuptiTracerEventSource::Activity; + switch (sync->type) { + case CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_EVENT_SYNCHRONIZE: + event.name = ""cuEventSynchronize""; + break; + case CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_STREAM_WAIT_EVENT: + event.name = ""cuStreamWaitEvent""; + break; + case CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_STREAM_SYNCHRONIZE: + event.name = ""cuStreamSynchronize""; + break; + case CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_CONTEXT_SYNCHRONIZE: + event.name = ""cuCtxSynchronize""; + break; + default: + event.name = ""unknown synchronization event""; + break; + } + event.start_time_ns = sync->start; + event.end_time_ns = std::max(sync->end, sync->start + 1); + event.correlation_id = sync->correlationId; + event.context_id = sync->contextId; + VLOG(5) << ""Cuda activity "" << event.name; + collector->AddEvent(std::move(event)); +} + // This hook uses cupti activity api to measure device side activities. class CuptiDriverApiHookWithActivityApi : public CuptiDriverApiHook { public: @@ -1901,6 +1931,11 @@ Status CuptiTracer::ProcessActivityBuffer(CUcontext context, uint32_t stream_id, AddMemsetActivityEvent( collector_, reinterpret_cast(record)); break; + case CUPTI_ACTIVITY_KIND_SYNCHRONIZATION: + AddSynchronizationActivityEvent( + collector_, + reinterpret_cast(record)); + break; default: LOG(ERROR) << ""Activity type "" << record->kind << "" not supported.""; break; ",0,train 80406a561aa3226cf06c8ddfc2ff528ca7fa0ff7,tensorflow/tensorflow,"Fix a typo PiperOrigin-RevId: 249281178",lstm_test.cc,"@@ -1957,14 +1957,14 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest, const int n_input = 5; const int n_cell = 4; const int n_output = 3; - const float ceil_clip = 0.0; + const float cell_clip = 0.0; const float proj_clip = 0.0; LayerNormLSTMOpModel layer_norm_lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, - /*use_projection_bias=*/false, ceil_clip, proj_clip, + /*use_projection_bias=*/false, cell_clip, proj_clip, { {n_batch, n_input}, // input tensor @@ -2052,14 +2052,14 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest, const int n_input = 5; const int n_cell = 4; const int n_output = 3; - const float ceil_clip = 0.0; + const float cell_clip = 0.0; const float proj_clip = 0.0; HybridLayerNormLSTMOpModel layer_norm_lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, - /*use_projection_bias=*/false, ceil_clip, proj_clip, + /*use_projection_bias=*/false, cell_clip, proj_clip, { {n_batch, n_input}, // input tensor @@ -2147,14 +2147,14 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest, const int n_input = 5; const int n_cell = 4; const int n_output = 3; - const float ceil_clip = 0.0; + const float cell_clip = 0.0; const float proj_clip = 0.0; HybridLayerNormLSTMOpModel layer_norm_lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, - /*use_projection_bias=*/false, ceil_clip, proj_clip, + /*use_projection_bias=*/false, cell_clip, proj_clip, { {n_batch, n_input}, // input tensor @@ -2289,14 +2289,14 @@ TEST_F(CifgPeepholeProjectionNoClippingLayerNormLstmTest, const int n_input = 5; const int n_cell = 4; const int n_output = 3; - const float ceil_clip = 0.0; + const float cell_clip = 0.0; const float proj_clip = 0.0; LayerNormLSTMOpModel layer_norm_lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/true, - /*use_projection_bias=*/false, ceil_clip, proj_clip, + /*use_projection_bias=*/false, cell_clip, proj_clip, { {n_batch, n_input}, // input tensor @@ -2379,14 +2379,14 @@ TEST_F(CifgPeepholeProjectionNoClippingLayerNormLstmTest, const int n_input = 5; const int n_cell = 4; const int n_output = 3; - const float ceil_clip = 0.0; + const float cell_clip = 0.0; const float proj_clip = 0.0; HybridLayerNormLSTMOpModel layer_norm_lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/true, - /*use_projection_bias=*/false, ceil_clip, proj_clip, + /*use_projection_bias=*/false, cell_clip, proj_clip, { {n_batch, n_input}, // input tensor @@ -2470,14 +2470,14 @@ TEST_F(CifgPeepholeProjectionNoClippingLayerNormLstmTest, const int n_input = 5; const int n_cell = 4; const int n_output = 3; - const float ceil_clip = 0.0; + const float cell_clip = 0.0; const float proj_clip = 0.0; HybridLayerNormLSTMOpModel layer_norm_lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/true, - /*use_projection_bias=*/false, ceil_clip, proj_clip, + /*use_projection_bias=*/false, cell_clip, proj_clip, { {n_batch, n_input}, // input tensor ",0,train bcf47bd5ed73e500f9d16d503023665d9921133b,tensorflow/tensorflow,"Fix to SeparableConv2D that did not work when a stride >1 was used with the channels_first data_format. PiperOrigin-RevId: 165028212",convolutional.py,"@@ -975,11 +975,15 @@ class SeparableConv2D(Conv2D): def call(self, inputs): # Apply the actual ops. + if self.data_format == 'channels_last': + strides = (1,) + self.strides + (1,) + else: + strides = (1, 1) + self.strides outputs = nn.separable_conv2d( inputs, self.depthwise_kernel, self.pointwise_kernel, - strides=(1,) + self.strides + (1,), + strides=strides, padding=self.padding.upper(), rate=self.dilation_rate, data_format=utils.convert_data_format(self.data_format, ndim=4)) ",0,test bcf47bd5ed73e500f9d16d503023665d9921133b,tensorflow/tensorflow,"Fix to SeparableConv2D that did not work when a stride >1 was used with the channels_first data_format. PiperOrigin-RevId: 165028212",convolutional_test.py,"@@ -439,6 +439,31 @@ class SeparableConv2DTest(test.TestCase): self.assertListEqual(output.get_shape().as_list(), [5, height / 2, width, 32]) + def testCreateSeparableConvWithStridesChannelsFirst(self): + data_format = 'channels_first' + height, width = 6, 8 + # Test strides tuple + images = random_ops.random_uniform((5, 3, height, width), seed=1) + layer = conv_layers.SeparableConv2D( + 32, [3, 3], strides=(2, 2), padding='same', data_format=data_format) + output = layer.apply(images) + self.assertListEqual(output.get_shape().as_list(), + [5, 32, height / 2, width / 2]) + + # Test strides integer + layer = conv_layers.SeparableConv2D(32, [3, 3], strides=2, padding='same', + data_format=data_format) + output = layer.apply(images) + self.assertListEqual(output.get_shape().as_list(), + [5, 32, height / 2, width / 2]) + + # Test unequal strides + layer = conv_layers.SeparableConv2D( + 32, [3, 3], strides=(2, 1), padding='same', data_format=data_format) + output = layer.apply(images) + self.assertListEqual(output.get_shape().as_list(), + [5, 32, height / 2, width]) + def testFunctionalConv2DReuse(self): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 3), seed=1) ",0,test 621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,direct_session.cc,"@@ -94,7 +94,7 @@ string GetRendezvousKey(const string& tensor_name, // // 2) Recv nodes always complete immediately: The inputs are sent into // the local rendezvous before we start the executor, so the -// corresonding recvs will not block. +// corresponding recvs will not block. // // Based on these assumptions, we can use the same thread pool for // both ""non-blocking"" and ""blocking"" OpKernels on Android. ",0,train 621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,direct_session_test.cc,"@@ -94,7 +94,7 @@ TEST_F(DirectSessionMinusAXTest, RunSimpleNetwork) { ASSERT_OK(s); ASSERT_EQ(1, outputs.size()); - // The first output should be initiailzed and have the correct + // The first output should be initialized and have the correct // output. auto mat = outputs[0].matrix(); ASSERT_TRUE(outputs[0].IsInitialized()); ",0,train 621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,executor.cc,"@@ -374,7 +374,7 @@ Status ExecutorImpl::InferAllocAttr( return s; } -// The state associated with one invokation of ExecutorImpl::Run. +// The state associated with one invocation of ExecutorImpl::Run. // ExecutorState dispatches nodes when they become ready and keeps // track of how many predecessors of a node have not done (pending_). class ExecutorState { ",0,train 621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,function.cc,"@@ -430,7 +430,7 @@ Status FunctionLibraryRuntimeImpl::InstantiateSymbolicGradient( const auto& func = f->func(); const FunctionDef* fdef = lib_def_->Find(func.name()); if (fdef == nullptr) { - // f is a primitve op. + // f is a primitive op. gradient::Creator creator; TF_RETURN_IF_ERROR(gradient::GetOpGradientCreator(func.name(), &creator)); if (creator == nullptr) { @@ -1100,7 +1100,7 @@ class SymbolicGradientHelper { // 'ready' keeps track of nodes that have been completely // backpropped. Initially, for every output y of the function f, we - // add dy as an input of the the gradient function. + // add dy as an input of the gradient function. std::deque ready_; // Makes a copy of fbody_ in gbody_. ",0,train 621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,function.h,"@@ -90,7 +90,7 @@ bool RemoveListArrayConverter(Graph* g); // multiple times by calling ExpandInlineFunctions a few times. bool ExpandInlineFunctions(FunctionLibraryRuntime* lib, Graph* graph); -// Applies graph rewrite optimzation such as inlining, dead code +// Applies graph rewrite optimization such as inlining, dead code // removal, etc. // // **g is a graph constructed based on the runtime library 'lib'. ",0,train 621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,gpu_allocator_retry.h,"@@ -32,7 +32,7 @@ class GPUAllocatorRetry { // then wait up to 'max_millis_to_wait' milliseconds, retrying each // time a call to DeallocateRaw() is detected, until either a good // pointer is returned or the deadline is exhausted. If the - // deadline is exahusted, try one more time with 'verbose_failure' + // deadline is exhausted, try one more time with 'verbose_failure' // set to true. The value returned is either the first good pointer // obtained from 'alloc_func' or nullptr. void* AllocateRaw(std::function in_use_by_size; for (auto& it : ptr_to_chunk_map_) { ",0,train 621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,gpu_event_mgr_test.cc,"@@ -199,7 +199,7 @@ TEST(EventMgr, StreamSwitchingFlushesImmediately) { EXPECT_GT(initial_live_bytes, live_tensor_bytes); } -TEST(EventMgr, ManySmallTensorsSeperateCallsFlushed) { +TEST(EventMgr, ManySmallTensorsSeparateCallsFlushed) { auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); EventMgr em(stream_exec, GPUOptions()); TEST_EventMgrHelper th(&em); ",0,train 621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,gpu_region_allocator.cc,"@@ -279,7 +279,7 @@ void GPURegionAllocator::CheckForMemoryLeaks() { } // Since there's no merging of chunks once allocated, we want to -// maximize their reusablity (which argues for fewer, larger sizes), +// maximize their reusability (which argues for fewer, larger sizes), // while minimizing waste (which argues for tight-fitting sizes). // // The smallest unit of allocation is 256 bytes. ",0,train 621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,gpu_stream_util.cc,"@@ -61,7 +61,7 @@ Status AssignStreams(const Graph* graph, const AssignStreamsOpts& opts, } } } - // We perform stream assigmnent assuming a large number of + // We perform stream assignment assuming a large number of // stream IDs and then map these down to the required number of streams // using simple round-robin. // Stream Assignment strategy: ",0,train 621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,gpu_util.h,"@@ -36,7 +36,7 @@ class GPUUtil { // ""tensor"" is GPU-local. ""dev"" is the hosting GPU. // ""device_context"" should be the context of the GPU ""_Send"" op // which provides the Tensor. - // Sets all necessasry fields of ""proto"" by transferring value + // Sets all necessary fields of ""proto"" by transferring value // bytes from GPU to CPU RAM. ""is_dead"" indicates that the // tensor is dead with an uninit value. static void SetProtoFromGPU(const Tensor& tensor, Device* dev, ",0,train 621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,pool_allocator.cc,"@@ -47,7 +47,7 @@ PoolAllocator::PoolAllocator(size_t pool_size_limit, bool auto_resize, PoolAllocator::~PoolAllocator() { Clear(); } namespace { -// Pools contain Chunks allocatated from the underlying Allocator. +// Pools contain Chunks allocated from the underlying Allocator. // Chunk alignment is always on kPoolAlignment boundaries. Each Chunk // begins with a descriptor (ChunkPrefix) that gives its size and a // pointer to itself. The pointer returned to the user is just past @@ -56,7 +56,7 @@ namespace { // pointer and also re-write the ChunkPrefix.chunk_ptr value // immediately before it. This way the Chunk address and size can be // recovered from the returned user pointer, regardless of alignment. -// Note that this deferencing of the pointers means that we cannot +// Note that this dereferencing of the pointers means that we cannot // handle GPU memory, only CPU memory. struct ChunkPrefix { size_t num_bytes; ",0,train 621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,process_state.cc,"@@ -46,7 +46,7 @@ const bool FLAGS_brain_gpu_region_allocator_reset_to_nan = false; const bool FLAGS_brain_gpu_use_bfc_allocator = true; // If true, record attributes of memory allocations and -// dyanmically check for appropriate use of registered memory. +// dynamically check for appropriate use of registered memory. // Should only be true for debugging or diagnosis of // performance issues. bool FLAGS_brain_gpu_record_mem_types = false; ",0,train 621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,process_state.h,"@@ -67,7 +67,7 @@ class ProcessState { MemDesc PtrType(const void* ptr); // Returns the one CPUAllocator used for the given numa_node. - // TEMPORY: ignores numa_node. + // TEMPORARY: ignores numa_node. Allocator* GetCPUAllocator(int numa_node); // Returns the one GPU allocator used for the indexed GPU. @@ -80,7 +80,7 @@ class ProcessState { // used on that first call is used. // // ""Allocator type"" describes the type of algorithm to use for the - // underlying allocator. REQURES: Must be a valid type (see + // underlying allocator. REQUIRES: Must be a valid type (see // config.proto for the list of supported strings.). // // REQUIRES: gpu_id must be a valid ordinal for a GPU available in the @@ -98,7 +98,7 @@ class ProcessState { // interface to be used for network device memory registration. // ""bus_id"" is platform-specific. On many platforms it // should be 0. On machines with multiple PCIe buses, it should be - // the index of one of the PCIe buses. If the the bus_id is invalid, + // the index of one of the PCIe buses. If the bus_id is invalid, // results are undefined. typedef std::function AllocVisitor; void AddGPUAllocVisitor(int bus_id, AllocVisitor visitor); ",0,train 621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,simple_placer.cc,"@@ -37,7 +37,7 @@ namespace { // types in 'supported_device_types' and returns the *first* subset of devices // that match. // -// For example, if suported_device_types contains {GPU, CPU} and +// For example, if supported_device_types contains {GPU, CPU} and // 'devices' contains CPU and GPU devices, the returned vector will // include *only* GPU devices, since that is higher in the priority // order in 'supported_device_types'. ",0,train b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,builtin_ops.h,"@@ -172,7 +172,6 @@ typedef enum { kTfLiteBuiltinVarHandle = 142, kTfLiteBuiltinReadVariable = 143, kTfLiteBuiltinAssignVariable = 144, - kTfLiteBuiltinTable = 145, } TfLiteBuiltinOperator; #ifdef __cplusplus ",0,train b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,flatbuffer_conversions.cc,"@@ -844,7 +844,6 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_HASHTABLE_SIZE: case BuiltinOperator_READ_VARIABLE: case BuiltinOperator_ASSIGN_VARIABLE: - case BuiltinOperator_TABLE: return kTfLiteOk; case BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES: return kTfLiteError; ",0,train b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,builtin_op_kernels.h,"@@ -159,7 +159,6 @@ TfLiteRegistration* Register_STRIDED_SLICE(); TfLiteRegistration* Register_SUB(); TfLiteRegistration* Register_SUM(); TfLiteRegistration* Register_SVDF(); -TfLiteRegistration* Register_TABLE(); TfLiteRegistration* Register_TANH(); TfLiteRegistration* Register_TILE(); TfLiteRegistration* Register_TOPK_V2(); ",0,train b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,register.cc,"@@ -28,6 +28,7 @@ TfLiteRegistration* Register_NUMERIC_VERIFY(); TfLiteRegistration* Register_AUDIO_SPECTROGRAM(); TfLiteRegistration* Register_MFCC(); TfLiteRegistration* Register_DETECTION_POSTPROCESS(); +TfLiteRegistration* Register_TABLE(); } // namespace custom @@ -325,7 +326,6 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_VAR_HANDLE, Register_VAR_HANDLE()); AddBuiltin(BuiltinOperator_READ_VARIABLE, Register_READ_VARIABLE()); AddBuiltin(BuiltinOperator_ASSIGN_VARIABLE, Register_ASSIGN_VARIABLE()); - AddBuiltin(BuiltinOperator_TABLE, Register_TABLE()); AddCustom(""NumericVerify"", tflite::ops::custom::Register_NUMERIC_VERIFY()); // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that // custom ops aren't always included by default. @@ -334,6 +334,7 @@ BuiltinOpResolver::BuiltinOpResolver() { tflite::ops::custom::Register_AUDIO_SPECTROGRAM()); AddCustom(""TFLite_Detection_PostProcess"", tflite::ops::custom::Register_DETECTION_POSTPROCESS()); + AddCustom(""Table"", tflite::ops::custom::Register_TABLE()); // By definition, all of the ops added above are not user-defined ops, // since they are supported by BuiltinOpResolver. may_directly_contain_user_defined_ops_ = false; ",0,train b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,register_ref.cc,"@@ -29,6 +29,7 @@ TfLiteRegistration* Register_NUMERIC_VERIFY_REF(); TfLiteRegistration* Register_AUDIO_SPECTROGRAM(); TfLiteRegistration* Register_MFCC(); TfLiteRegistration* Register_DETECTION_POSTPROCESS(); +TfLiteRegistration* Register_TABLE(); } // namespace custom @@ -163,7 +164,6 @@ TfLiteRegistration* Register_IMAG(); TfLiteRegistration* Register_REAL(); TfLiteRegistration* Register_COMPLEX_ABS(); TfLiteRegistration* Register_CONV_3D_TRANSPOSE_REF(); -TfLiteRegistration* Register_TABLE(); namespace { @@ -475,7 +475,6 @@ BuiltinRefOpResolver::BuiltinRefOpResolver() { AddBuiltin(BuiltinOperator_COMPLEX_ABS, Register_COMPLEX_ABS()); AddBuiltin(BuiltinOperator_CONV_3D_TRANSPOSE, Register_CONV_3D_TRANSPOSE_REF()); - AddBuiltin(BuiltinOperator_TABLE, Register_TABLE()); AddCustom(""NumericVerify"", tflite::ops::custom::Register_NUMERIC_VERIFY_REF()); // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that @@ -485,6 +484,7 @@ BuiltinRefOpResolver::BuiltinRefOpResolver() { tflite::ops::custom::Register_AUDIO_SPECTROGRAM()); AddCustom(""TFLite_Detection_PostProcess"", tflite::ops::custom::Register_DETECTION_POSTPROCESS()); + AddCustom(""Table"", tflite::ops::custom::Register_TABLE()); } } // namespace builtin ",0,train b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,table.cc,"@@ -20,7 +20,7 @@ limitations under the License. namespace tflite { namespace ops { -namespace builtin { +namespace custom { namespace table { constexpr int kInputTensor = 0; @@ -124,6 +124,6 @@ TfLiteRegistration* Register_TABLE() { return &r; } -} // namespace builtin +} // namespace custom } // namespace ops } // namespace tflite \ No newline at end of file ",0,train b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,table_test.cc,"@@ -20,6 +20,11 @@ limitations under the License. #include ""tensorflow/lite/schema/schema_generated.h"" namespace tflite { +namespace ops { +namespace custom { + +TfLiteRegistration* Register_TABLE(); + namespace { using ::testing::ElementsAreArray; @@ -31,8 +36,7 @@ class TableOpModel : public SingleOpModel { input_ = AddInput(input); table_ = AddInput(table); output_ = AddOutput(output); - SetBuiltinOp(BuiltinOperator_TABLE, BuiltinOptions_TableOptions, - CreateSubOptions(builder_).Union()); + SetCustomOp(""Table"", {}, Register_TABLE); BuildInterpreter({GetShape(input_), GetShape(table_)}); } @@ -139,4 +143,6 @@ TEST(TableOpTest, Int16ToInt8WithExpLUT) { } } // namespace -} // namespace tflite \ No newline at end of file +} // namespace custom +} // namespace ops +} // namespace tflite ",0,train b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,schema_generated.h,"@@ -385,9 +385,6 @@ struct ReadVariableOptionsT; struct AssignVariableOptions; struct AssignVariableOptionsT; -struct TableOptions; -struct TableOptionsT; - struct OperatorCode; struct OperatorCodeT; @@ -856,12 +853,11 @@ enum BuiltinOperator { BuiltinOperator_VAR_HANDLE = 142, BuiltinOperator_READ_VARIABLE = 143, BuiltinOperator_ASSIGN_VARIABLE = 144, - BuiltinOperator_TABLE = 145, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_TABLE + BuiltinOperator_MAX = BuiltinOperator_ASSIGN_VARIABLE }; -inline const BuiltinOperator (&EnumValuesBuiltinOperator())[146] { +inline const BuiltinOperator (&EnumValuesBuiltinOperator())[145] { static const BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -1007,14 +1003,13 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[146] { BuiltinOperator_CONV_3D_TRANSPOSE, BuiltinOperator_VAR_HANDLE, BuiltinOperator_READ_VARIABLE, - BuiltinOperator_ASSIGN_VARIABLE, - BuiltinOperator_TABLE + BuiltinOperator_ASSIGN_VARIABLE }; return values; } inline const char * const *EnumNamesBuiltinOperator() { - static const char * const names[147] = { + static const char * const names[146] = { ""ADD"", ""AVERAGE_POOL_2D"", ""CONCATENATION"", @@ -1160,14 +1155,13 @@ inline const char * const *EnumNamesBuiltinOperator() { ""VAR_HANDLE"", ""READ_VARIABLE"", ""ASSIGN_VARIABLE"", - ""TABLE"", nullptr }; return names; } inline const char *EnumNameBuiltinOperator(BuiltinOperator e) { - if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_TABLE)) return """"; + if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_ASSIGN_VARIABLE)) return """"; const size_t index = static_cast(e); return EnumNamesBuiltinOperator()[index]; } @@ -1287,12 +1281,11 @@ enum BuiltinOptions { BuiltinOptions_VarHandleOptions = 111, BuiltinOptions_ReadVariableOptions = 112, BuiltinOptions_AssignVariableOptions = 113, - BuiltinOptions_TableOptions = 114, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_TableOptions + BuiltinOptions_MAX = BuiltinOptions_AssignVariableOptions }; -inline const BuiltinOptions (&EnumValuesBuiltinOptions())[115] { +inline const BuiltinOptions (&EnumValuesBuiltinOptions())[114] { static const BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -1407,14 +1400,13 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[115] { BuiltinOptions_HashtableSizeOptions, BuiltinOptions_VarHandleOptions, BuiltinOptions_ReadVariableOptions, - BuiltinOptions_AssignVariableOptions, - BuiltinOptions_TableOptions + BuiltinOptions_AssignVariableOptions }; return values; } inline const char * const *EnumNamesBuiltinOptions() { - static const char * const names[116] = { + static const char * const names[115] = { ""NONE"", ""Conv2DOptions"", ""DepthwiseConv2DOptions"", @@ -1529,14 +1521,13 @@ inline const char * const *EnumNamesBuiltinOptions() { ""VarHandleOptions"", ""ReadVariableOptions"", ""AssignVariableOptions"", - ""TableOptions"", nullptr }; return names; } inline const char *EnumNameBuiltinOptions(BuiltinOptions e) { - if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_TableOptions)) return """"; + if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_AssignVariableOptions)) return """"; const size_t index = static_cast(e); return EnumNamesBuiltinOptions()[index]; } @@ -1997,10 +1988,6 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_AssignVariableOptions; }; -template<> struct BuiltinOptionsTraits { - static const BuiltinOptions enum_value = BuiltinOptions_TableOptions; -}; - struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -2937,14 +2924,6 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_AssignVariableOptions ? reinterpret_cast(value) : nullptr; } - tflite::TableOptionsT *AsTableOptions() { - return type == BuiltinOptions_TableOptions ? - reinterpret_cast(value) : nullptr; - } - const tflite::TableOptionsT *AsTableOptions() const { - return type == BuiltinOptions_TableOptions ? - reinterpret_cast(value) : nullptr; - } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -10361,46 +10340,6 @@ inline flatbuffers::Offset CreateAssignVariableOptions( flatbuffers::Offset CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb, const AssignVariableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); -struct TableOptionsT : public flatbuffers::NativeTable { - typedef TableOptions TableType; - TableOptionsT() { - } -}; - -struct TableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef TableOptionsT NativeTableType; - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - verifier.EndTable(); - } - TableOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo(TableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const TableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); -}; - -struct TableOptionsBuilder { - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - explicit TableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - TableOptionsBuilder &operator=(const TableOptionsBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateTableOptions( - flatbuffers::FlatBufferBuilder &_fbb) { - TableOptionsBuilder builder_(_fbb); - return builder_.Finish(); -} - -flatbuffers::Offset CreateTableOptions(flatbuffers::FlatBufferBuilder &_fbb, const TableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); - struct OperatorCodeT : public flatbuffers::NativeTable { typedef OperatorCode TableType; int8_t deprecated_builtin_code; @@ -10890,9 +10829,6 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const tflite::AssignVariableOptions *builtin_options_as_AssignVariableOptions() const { return builtin_options_type() == tflite::BuiltinOptions_AssignVariableOptions ? static_cast(builtin_options()) : nullptr; } - const tflite::TableOptions *builtin_options_as_TableOptions() const { - return builtin_options_type() == tflite::BuiltinOptions_TableOptions ? static_cast(builtin_options()) : nullptr; - } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -11381,10 +11317,6 @@ template<> inline const tflite::AssignVariableOptions *Operator::builtin_options return builtin_options_as_AssignVariableOptions(); } -template<> inline const tflite::TableOptions *Operator::builtin_options_as() const { - return builtin_options_as_TableOptions(); -} - struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -15405,29 +15337,6 @@ inline flatbuffers::Offset CreateAssignVariableOptions(fl _fbb); } -inline TableOptionsT *TableOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { - auto _o = new TableOptionsT(); - UnPackTo(_o, _resolver); - return _o; -} - -inline void TableOptions::UnPackTo(TableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { - (void)_o; - (void)_resolver; -} - -inline flatbuffers::Offset TableOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { - return CreateTableOptions(_fbb, _o, _rehasher); -} - -inline flatbuffers::Offset CreateTableOptions(flatbuffers::FlatBufferBuilder &_fbb, const TableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { - (void)_rehasher; - (void)_o; - struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TableOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; - return tflite::CreateTableOptions( - _fbb); -} - inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new OperatorCodeT(); UnPackTo(_o, _resolver); @@ -16358,10 +16267,6 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } - case BuiltinOptions_TableOptions: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } default: return true; } } @@ -16832,10 +16737,6 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } - case BuiltinOptions_TableOptions: { - auto ptr = reinterpret_cast(obj); - return ptr->UnPack(resolver); - } default: return nullptr; } } @@ -17294,10 +17195,6 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateAssignVariableOptions(_fbb, ptr, _rehasher).Union(); } - case BuiltinOptions_TableOptions: { - auto ptr = reinterpret_cast(value); - return CreateTableOptions(_fbb, ptr, _rehasher).Union(); - } default: return 0; } } @@ -17756,10 +17653,6 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new tflite::AssignVariableOptionsT(*reinterpret_cast(u.value)); break; } - case BuiltinOptions_TableOptions: { - value = new tflite::TableOptionsT(*reinterpret_cast(u.value)); - break; - } default: break; } @@ -18332,11 +18225,6 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } - case BuiltinOptions_TableOptions: { - auto ptr = reinterpret_cast(value); - delete ptr; - break; - } default: break; } value = nullptr; ",0,train b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,runtime_version.cc,"@@ -357,7 +357,6 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code, {{BuiltinOperator_VAR_HANDLE, 1}, ""2.6.0""}, {{BuiltinOperator_READ_VARIABLE, 1}, ""2.6.0""}, {{BuiltinOperator_ASSIGN_VARIABLE, 1}, ""2.6.0""}, - {{BuiltinOperator_TABLE, 1}, ""2.6.0""}, }); std::pair version_key = {op_code, op_version}; ",0,train d784a48aea16c150e618d5aaf42b320d108dd20d,tensorflow/tensorflow,"Fix bug causing inconsistent AST when collections of lambdas are involved in template expansions. PiperOrigin-RevId: 233964704",templates.py,"@@ -92,6 +92,14 @@ class ContextAdjuster(gast.NodeTransformer): return self.generic_visit(node) def visit_comprehension(self, node): + # We may be able to override some of these, but for now it's simpler + # to just assert that they're set. + self._ctx_override = None + return self.generic_visit(node) + + def visit_Lambda(self, node): + # We may be able to override some of these, but for now it's simpler + # to just assert that they're set. self._ctx_override = None return self.generic_visit(node) ",0,train d784a48aea16c150e618d5aaf42b320d108dd20d,tensorflow/tensorflow,"Fix bug causing inconsistent AST when collections of lambdas are involved in template expansions. PiperOrigin-RevId: 233964704",templates_test.py,"@@ -248,6 +248,16 @@ class TemplatesTest(test.TestCase): self.assertIsInstance(arg_node.generators[0].target.ctx, gast.Store) self.assertIsInstance(arg_node.elt.ctx, gast.Load) + def test_lambda_in_function_call(self): + template = """""" + a = foo(arg) + """""" + source = parser.parse_expression('[lambda i: i]') + node = templates.replace(template, arg=source) + lambda_arg = node[0].value.args[0].elts[0] + self.assertIsInstance(lambda_arg.args.args[0].ctx, gast.Param) + self.assertIsInstance(lambda_arg.body.ctx, gast.Load) + if __name__ == '__main__': test.main() ",0,train 9065899e9252d5d9472b45d5a3dbecfb8b039117,tensorflow/tensorflow,"LocalResponseNormalization: We're spending about half of the time in this function converting to and from double. Do the computation in the float domain, I don't think there is any risk of numerical instability here. PiperOrigin-RevId: 320588654 Change-Id: Ia641f6359b5966aa669de037d355292a25c08bed",optimized_ops.h,"@@ -3815,6 +3815,7 @@ inline void LocalResponseNormalization( const int double_range = op_params.range * 2; Eigen::VectorXf padded_square(data_in.rows() + double_range); padded_square.setZero(); + const float bias = op_params.bias; for (int r = 0; r < data_in.cols(); ++r) { // Do local response normalization for data_in(:, r) // first, compute the square and store them in buffer for repeated use @@ -3827,7 +3828,7 @@ inline void LocalResponseNormalization( } for (int i = 0; i < data_in.rows(); ++i) { accumulated_scale += padded_square(i + double_range); - data_out(i, r) = op_params.bias + accumulated_scale; + data_out(i, r) = bias + accumulated_scale; accumulated_scale -= padded_square(i); } } ",0,train cc83067469bc30bba55932c587f31ef68f15792f,tensorflow/tensorflow,"Migrate a few conv kernels to use new kernel signatures. PiperOrigin-RevId: 214831837",conv.cc,"@@ -86,6 +86,18 @@ struct OpData { bool run_multithreaded_kernel; }; +inline PaddingType RuntimePaddingType(TfLitePadding padding) { + switch (padding) { + case TfLitePadding::kTfLitePaddingSame: + return PaddingType::kSame; + case TfLitePadding::kTfLitePaddingValid: + return PaddingType::kValid; + case TfLitePadding::kTfLitePaddingUnknown: + default: + return PaddingType::kNone; + } +} + void* Init(TfLiteContext* context, const char* buffer, size_t length) { // This is a builtin op, so we don't use the contents in 'buffer', if any. // Instead, we allocate a new object to use as scratch space for im2col, and @@ -487,18 +499,18 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, } else { effective_kernel_type = kernel_type; } + ConvParams op_params; + op_params.padding_type = RuntimePaddingType(params->padding); + op_params.padding_values.width = data->padding.width; + op_params.padding_values.height = data->padding.height; + op_params.stride_width = params->stride_width; + op_params.stride_height = params->stride_height; + op_params.dilation_width_factor = params->dilation_width_factor; + op_params.dilation_height_factor = params->dilation_height_factor; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; switch (effective_kernel_type) { case kReference: { - ConvParams op_params; - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = data->padding.width; - op_params.padding_values.height = data->padding.height; - op_params.stride_width = params->stride_width; - op_params.stride_height = params->stride_height; - op_params.dilation_width_factor = params->dilation_width_factor; - op_params.dilation_height_factor = params->dilation_height_factor; - op_params.float_activation_min = output_activation_min; - op_params.float_activation_max = output_activation_max; reference_ops::Conv(op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(filter), GetTensorData(filter), GetTensorShape(bias), @@ -508,16 +520,6 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, break; } case kGenericOptimized: { - ConvParams op_params; - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = data->padding.width; - op_params.padding_values.height = data->padding.height; - op_params.stride_width = params->stride_width; - op_params.stride_height = params->stride_height; - op_params.dilation_width_factor = params->dilation_width_factor; - op_params.dilation_height_factor = params->dilation_height_factor; - op_params.float_activation_min = output_activation_min; - op_params.float_activation_max = output_activation_max; optimized_ops::Conv(op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(filter), GetTensorData(filter), GetTensorShape(bias), @@ -534,25 +536,21 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, filter_data = GetTensorData(filter); } multithreaded_ops::Conv( - *eigen_support::GetThreadPoolDevice(context), - GetTensorData(input), GetTensorDims(input), filter_data, - GetTensorDims(filter), GetTensorData(bias), - GetTensorDims(bias), params->stride_width, params->stride_height, - data->padding.width, data->padding.height, params->padding, - output_activation_min, output_activation_max, - GetTensorData(output), GetTensorDims(output), - GetTensorData(im2col), GetTensorDims(im2col)); + *eigen_support::GetThreadPoolDevice(context), op_params, + GetTensorShape(input), GetTensorData(input), + GetTensorShape(filter), filter_data, GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output), GetTensorShape(im2col), + GetTensorData(im2col)); break; } case kCblasOptimized: { - cblas_ops::Conv(GetTensorData(input), GetTensorDims(input), - GetTensorData(filter), GetTensorDims(filter), - GetTensorData(bias), GetTensorDims(bias), - params->stride_width, params->stride_height, - data->padding.width, data->padding.height, - output_activation_min, output_activation_max, - GetTensorData(output), GetTensorDims(output), - GetTensorData(im2col), GetTensorDims(im2col)); + cblas_ops::Conv(op_params, GetTensorShape(input), + GetTensorData(input), GetTensorShape(filter), + GetTensorData(filter), GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output), GetTensorShape(im2col), + GetTensorData(im2col)); break; } } ",0,test cc83067469bc30bba55932c587f31ef68f15792f,tensorflow/tensorflow,"Migrate a few conv kernels to use new kernel signatures. PiperOrigin-RevId: 214831837",cblas_conv.h,"@@ -31,20 +31,29 @@ limitations under the License. namespace tflite { namespace cblas_ops { -inline void Conv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, float output_activation_min, - float output_activation_max, float* output_data, - const Dims<4>& output_dims, float* im2col_data, - const Dims<4>& im2col_dims) { +inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& filter_shape, + const float* filter_data, const RuntimeShape& bias_shape, + const float* bias_data, const RuntimeShape& output_shape, + float* output_data, const RuntimeShape& im2col_shape, + float* im2col_data) { + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); gemmlowp::ScopedProfilingLabel label(""Conv/cblas""); const float* gemm_input_data = nullptr; - const Dims<4>* gemm_input_dims = nullptr; - const int filter_width = ArraySize(filter_dims, 1); - const int filter_height = ArraySize(filter_dims, 2); + const RuntimeShape* gemm_input_shape = nullptr; + const int filter_width = filter_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); const bool need_im2col = stride_width != 1 || stride_height != 1 || filter_width != 1 || filter_height != 1; if (need_im2col) { @@ -55,18 +64,17 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims, op_params.padding_values.height = pad_height; op_params.stride_width = stride_width; op_params.stride_height = stride_height; - op_params.dilation_width_factor = 1; - op_params.dilation_height_factor = 1; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; optimized_ops::Im2col(op_params, filter_height, filter_width, 0, - DimsToShape(input_dims), input_data, - DimsToShape(im2col_dims), im2col_data); + input_shape, input_data, im2col_shape, im2col_data); gemm_input_data = im2col_data; - gemm_input_dims = &im2col_dims; + gemm_input_shape = &im2col_shape; } else { TFLITE_DCHECK(!im2col_data); gemm_input_data = input_data; - gemm_input_dims = &input_dims; + gemm_input_shape = &input_shape; } // The following code computes matrix multiplication c = a * transponse(b) @@ -78,10 +86,10 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims, const float* a = gemm_input_data; const float* b = filter_data; float* c = output_data; - int m = gemm_input_dims->sizes[1] * gemm_input_dims->sizes[2] * - gemm_input_dims->sizes[3]; - int n = output_dims.sizes[0]; - int k = gemm_input_dims->sizes[0]; + const int gemm_input_dims = gemm_input_shape->DimensionsCount(); + int m = FlatSizeSkipDim(*gemm_input_shape, gemm_input_dims - 1); + int n = output_shape.Dims(3); + int k = gemm_input_shape->Dims(gemm_input_dims - 1); // The stride of matrix a, b and c respectively. int stride_a = k; int stride_b = k; @@ -91,8 +99,8 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims, stride_a, b, stride_b, 0.0f, c, stride_c); optimized_ops::AddBiasAndEvalActivationFunction( - output_activation_min, output_activation_max, DimsToShape(bias_dims), - bias_data, DimsToShape(output_dims), output_data); + output_activation_min, output_activation_max, bias_shape, bias_data, + output_shape, output_data); } } // namespace cblas_ops ",0,test cc83067469bc30bba55932c587f31ef68f15792f,tensorflow/tensorflow,"Migrate a few conv kernels to use new kernel signatures. PiperOrigin-RevId: 214831837",multithreaded_conv.h,"@@ -69,13 +69,13 @@ struct MatMulConvFunctor { template class EigenTensorConvFunctor { private: - Eigen::PaddingType TfLitePadding2EigenPadding(TfLitePadding padding) { + Eigen::PaddingType RuntimePadding2EigenPadding(PaddingType padding) { switch (padding) { - case kTfLitePaddingValid: + case PaddingType::kValid: return Eigen::PADDING_VALID; - case kTfLitePaddingSame: + case PaddingType::kSame: return Eigen::PADDING_SAME; - case kTfLitePaddingUnknown: + case PaddingType::kNone: assert(false); // should never get here. return Eigen::PADDING_VALID; } @@ -89,7 +89,7 @@ class EigenTensorConvFunctor { int input_width, int input_depth, const T* filter_data, int filter_height, int filter_width, int filter_count, int stride_rows, int stride_cols, int pad_width, - int pad_height, TfLitePadding padding, T* output_data, + int pad_height, PaddingType padding, T* output_data, int output_height, int output_width) { const bool is_1x1_kernel = (filter_height == 1 && filter_width == 1 && stride_rows == 1 && stride_cols == 1); @@ -127,28 +127,38 @@ class EigenTensorConvFunctor { input_depth, filter_count); output.device(device) = Eigen::SpatialConvolution(input, filter, stride_cols, stride_rows, - TfLitePadding2EigenPadding(padding)); + RuntimePadding2EigenPadding(padding)); } } }; -inline void Conv(const Eigen::ThreadPoolDevice& device, const float* input_data, - const Dims<4>& input_dims, const float* filter_data, - const Dims<4>& filter_dims, const float* bias_data, - const Dims<4>& bias_dims, int stride_width, int stride_height, - int pad_width, int pad_height, TfLitePadding padding, - float output_activation_min, float output_activation_max, - float* output_data, const Dims<4>& output_dims, - float* im2col_data, const Dims<4>& im2col_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0); - const int output_depth = MatchingArraySize(filter_dims, 3, output_dims, 0); - const int input_height = ArraySize(input_dims, 2); - const int input_width = ArraySize(input_dims, 1); - const int filter_height = ArraySize(filter_dims, 2); - const int filter_width = ArraySize(filter_dims, 1); - const int output_height = ArraySize(output_dims, 2); - const int output_width = ArraySize(output_dims, 1); +inline void Conv(const Eigen::ThreadPoolDevice& device, + const ConvParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& filter_shape, + const float* filter_data, const RuntimeShape& bias_shape, + const float* bias_data, const RuntimeShape& output_shape, + float* output_data, const RuntimeShape& im2col_shape, + float* im2col_data) { + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const PaddingType padding = params.padding_type; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); EigenTensorConvFunctor conv_functor; conv_functor(device, input_data, im2col_data, batches, input_height, input_width, input_depth, filter_data, filter_height, @@ -157,8 +167,8 @@ inline void Conv(const Eigen::ThreadPoolDevice& device, const float* input_data, output_width); optimized_ops::AddBiasAndEvalActivationFunction( - output_activation_min, output_activation_max, DimsToShape(bias_dims), - bias_data, DimsToShape(output_dims), output_data); + output_activation_min, output_activation_max, bias_shape, bias_data, + output_shape, output_data); } } // namespace multithreaded_ops ",0,test 04ea2096ade2cf323312cb1a1ff008c667994e24,tensorflow/tensorflow,"Use tf.lite as the py_module name. Made the necessary changes to the api generator to accomodate for `dots` in the py_module name PiperOrigin-RevId: 329537229 Change-Id: If695ba06a0252b0094eafa629c43c5d65c344d13",build_py_api_docs.py,"@@ -55,7 +55,7 @@ FLAGS = flags.FLAGS def main(_): doc_generator = generate_lib.DocGenerator( root_title='TensorFlow Lite', - py_modules=[('lite', tf.lite)], + py_modules=[('tf.lite', tf.lite)], base_dir=str(pathlib.Path(tf.__file__).parent), code_url_prefix=FLAGS.code_url_prefix, search_hints=FLAGS.search_hints, ",0,train ddf9a100370bfbb27a2801202bb33a13ca4b9999,tensorflow/tensorflow,address comments,stream_executor_internal.h,"@@ -37,6 +37,9 @@ port::Status InitStreamExecutorPlugin(void* dso_handle); // testing). port::Status InitStreamExecutorPlugin(SEInitPluginFn init_fn); +// This file implements core stream executor base classes in terms of +// the C API defined in stream_executor.h. A class ""CSomething"" represents a +// ""Something"" that can be manipulated via calls in the C interface. class CPlatform : public Platform { public: explicit CPlatform(SP_Platform platform, ",0,train 89b81679155ce4e0b25af28440ae4d0906e69e8e,tensorflow/tensorflow,"Make `maybe_set_static_shape` a no-op when `shape` is a python constant. `maybe_set_static_shape` is only meant to handle cases that C++ shape inference cannot, which is when shape is a tensor that has a path to a captured placeholder inside a FuncGraph. So this change does not break any use-cases we care about. This fixes an issue with creating spurious constants in the Graph which are unused after shape inference. PiperOrigin-RevId: 263666943",tensor_util.py,"@@ -964,10 +964,40 @@ def shape_tensor(shape): # pylint: disable=invalid-name return ops.convert_to_tensor(shape, dtype=dtype, name=""shape"") +# DO NOT USE: For testing only. +_ENABLE_MAYBE_SET_STATIC_SHAPE = True + + def maybe_set_static_shape(tensor, shape): # pylint: disable=invalid-name - if (not context.executing_eagerly() and + """"""Sets the shape of `tensor` to the `shape`'s constant value, if inferrable. + + This is a temporary workaround to fix shape inference across functional op + boundaries. E.g. + + ```python + shape = tf.constant([3]) + @tf.function + def f(): + u = tf.random_uniform(shape) + return u + ``` + + If we were to rely solely on C++ shape inference, the shape of `u` inside + `f` would be unknown because C++ shape inference is not aware of the outer + graph and all it sees is a Placeholder node when backtracing the captured + tensor for `shape`. `maybe_set_static_shape` computes the static shape value + of `shape` by traversing the `FuncGraph` boundaries and sets the correct + shape. + + A longer term solution would be to fix C++ shape inference. + + Args: + tensor: A tensor. + shape: A shape tensor. + """""" + if (_ENABLE_MAYBE_SET_STATIC_SHAPE and not context.executing_eagerly() and ops.get_default_graph().building_function and - not tensor.shape.is_fully_defined()): + not tensor.shape.is_fully_defined() and is_tensor(shape)): shape = shape_tensor(shape) const_shape = constant_value_as_shape(shape) tensor.set_shape(const_shape) ",0,train 89b81679155ce4e0b25af28440ae4d0906e69e8e,tensorflow/tensorflow,"Make `maybe_set_static_shape` a no-op when `shape` is a python constant. `maybe_set_static_shape` is only meant to handle cases that C++ shape inference cannot, which is when shape is a tensor that has a path to a captured placeholder inside a FuncGraph. So this change does not break any use-cases we care about. This fixes an issue with creating spurious constants in the Graph which are unused after shape inference. PiperOrigin-RevId: 263666943",tensor_util_test.py,"@@ -18,11 +18,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import contextlib import sys import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import func_graph from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.framework import test_util @@ -1080,6 +1082,52 @@ class ConstantValueAsShapeTest(test.TestCase): c_val = tensor_util.constant_value_as_shape(tf_val) +class MaybeSetStaticShapeTest(test.TestCase): + + @contextlib.contextmanager + def disableSetStaticShape(self): + flag_old = tensor_util._ENABLE_MAYBE_SET_STATIC_SHAPE + tensor_util._ENABLE_MAYBE_SET_STATIC_SHAPE = False + try: + yield + finally: + tensor_util._ENABLE_MAYBE_SET_STATIC_SHAPE = flag_old + + @test_util.run_deprecated_v1 + def testMaybeSetStaticShape(self): + shape = constant_op.constant([2, 5], dtype=dtypes.int32) + + def reshape(): + v = array_ops.zeros([10]) + return array_ops.reshape(v, shape) + + with self.disableSetStaticShape(): + graph_without_shape_propagation = func_graph.func_graph_from_py_func( + ""without_shape_propagation"", reshape, [], {}) + graph_with_shape_propagation = func_graph.func_graph_from_py_func( + ""with_shape_propagation"", reshape, [], {}) + self.assertCountEqual( + [op.type for op in graph_without_shape_propagation.get_operations()], + [op.type for op in graph_with_shape_propagation.get_operations()]) + + @test_util.run_deprecated_v1 + def testMaybeSetStaticShapeScalarShape(self): + + def reshape(): + v = array_ops.placeholder(dtypes.float32) + t = array_ops.reshape(v, [-1]) + return t + + with self.disableSetStaticShape(): + graph_without_shape_propagation = func_graph.func_graph_from_py_func( + ""without_shape_propagation"", reshape, [], {}) + graph_with_shape_propagation = func_graph.func_graph_from_py_func( + ""with_shape_propagation"", reshape, [], {}) + self.assertCountEqual( + [op.type for op in graph_without_shape_propagation.get_operations()], + [op.type for op in graph_with_shape_propagation.get_operations()]) + + class ShapeTensorTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes ",0,train 50ae55b2eb72527f94bdeb68881fa19a60c9c975,tensorflow/tensorflow,"Using Tensor objects in FullyConnected kernel instead of raw buffers. PiperOrigin-RevId: 348653113 Change-Id: I8e383cebc28d1218e6d4507a593f7e568b077120",compiled_model.cc,"@@ -533,6 +533,9 @@ NodeDescriptor FuseChain(const FusionSequence& chain) { absl::Substitute(non_linkable.task->shader_source, function_code + ""$0"", buffer_declarations + ""$1"", call_code); fused_descriptor->AddDstTensor("""", {}); + fused_descriptor->src_tensors_names = non_linkable.task->src_tensors_names; + fused_descriptor->dst_tensors_names = non_linkable.task->dst_tensors_names; + fused_descriptor->tensors_as_args = non_linkable.task->tensors_as_args; fused_descriptor->resize_function = non_linkable.task->resize_function; node_desc.dst_tensors_ids = {fused_id}; node_desc.task = fused_descriptor; ",0,train 50ae55b2eb72527f94bdeb68881fa19a60c9c975,tensorflow/tensorflow,"Using Tensor objects in FullyConnected kernel instead of raw buffers. PiperOrigin-RevId: 348653113 Change-Id: I8e383cebc28d1218e6d4507a593f7e568b077120",compute_task.cc,"@@ -114,6 +114,9 @@ absl::Status ComputeTask::CompileWithDevice(id device, } resize_function_ = desc.task->resize_function; program_ = program; + src_tensors_names_ = desc.task->src_tensors_names; + dst_tensors_names_ = desc.task->dst_tensors_names; + tensors_as_args_ = desc.task->tensors_as_args; return absl::OkStatus(); } @@ -228,10 +231,22 @@ std::vector ComputeTask::GetInputIds() const { void ComputeTask::SetSrcTensor(const MetalSpatialTensor& tensor, int index) { input_buffers_[index].metal_handle = tensor.GetBufferHandle(); + if (tensors_as_args_) { + auto name = src_tensors_names_[index]; + // extracting tensor_name from ""device FLT4* tensor_name_buffer""; + name = name.substr(13, name.size() - 20); + auto status = metal_args_.SetObjectRef(name, tensor); + } } void ComputeTask::SetDstTensor(const MetalSpatialTensor& tensor, int index) { output_buffers_[index].metal_handle = tensor.GetBufferHandle(); + if (tensors_as_args_) { + auto name = dst_tensors_names_[index]; + // extracting tensor_name from ""device FLT4* tensor_name_buffer""; + name = name.substr(13, name.size() - 20); + auto status = metal_args_.SetObjectRef(name, tensor); + } } void ComputeTask::SetDescription(const std::string& description) { ",0,train 50ae55b2eb72527f94bdeb68881fa19a60c9c975,tensorflow/tensorflow,"Using Tensor objects in FullyConnected kernel instead of raw buffers. PiperOrigin-RevId: 348653113 Change-Id: I8e383cebc28d1218e6d4507a593f7e568b077120",compute_task.h,"@@ -94,6 +94,9 @@ class ComputeTask { DispatchParamsFunction resize_function_; std::string description_; MetalArguments metal_args_; + std::vector src_tensors_names_; + std::vector dst_tensors_names_; + bool tensors_as_args_; }; } // namespace metal ",0,train 50ae55b2eb72527f94bdeb68881fa19a60c9c975,tensorflow/tensorflow,"Using Tensor objects in FullyConnected kernel instead of raw buffers. PiperOrigin-RevId: 348653113 Change-Id: I8e383cebc28d1218e6d4507a593f7e568b077120",compute_task_descriptor.cc,"@@ -62,12 +62,24 @@ ComputeTaskDescriptor::ComputeTaskDescriptor(const OperationDef& def) void ComputeTaskDescriptor::AddSrcTensor(const std::string& tensor_name, const TensorDescriptor& desc) { - src_tensors_names.push_back(""device FLT4* "" + tensor_name); + if (tensors_as_args) { + src_tensors_names.push_back(""device FLT4* "" + tensor_name + ""_buffer""); + auto desc_new = absl::make_unique(desc); + args.AddObjectRef(tensor_name, AccessType::READ, std::move(desc_new)); + } else { + src_tensors_names.push_back(""device FLT4* "" + tensor_name); + } } void ComputeTaskDescriptor::AddDstTensor(const std::string& tensor_name, const TensorDescriptor& desc) { - dst_tensors_names.push_back(""device FLT4* "" + tensor_name); + if (tensors_as_args) { + dst_tensors_names.push_back(""device FLT4* "" + tensor_name + ""_buffer""); + auto desc_new = absl::make_unique(desc); + args.AddObjectRef(tensor_name, AccessType::WRITE, std::move(desc_new)); + } else { + dst_tensors_names.push_back(""device FLT4* "" + tensor_name); + } } } // namespace metal ",0,train 50ae55b2eb72527f94bdeb68881fa19a60c9c975,tensorflow/tensorflow,"Using Tensor objects in FullyConnected kernel instead of raw buffers. PiperOrigin-RevId: 348653113 Change-Id: I8e383cebc28d1218e6d4507a593f7e568b077120",compute_task_descriptor.h,"@@ -65,6 +65,10 @@ struct ComputeTaskDescriptor { ComputeTaskDescriptor(const ComputeTaskDescriptor&) = delete; ComputeTaskDescriptor& operator=(const ComputeTaskDescriptor&) = delete; + // temporary + bool tensors_as_args = + false; // must be true if input/output tensors used throught args.tensor + OperationDef definition; Arguments args; bool is_linkable = false; ",0,train 50ae55b2eb72527f94bdeb68881fa19a60c9c975,tensorflow/tensorflow,"Using Tensor objects in FullyConnected kernel instead of raw buffers. PiperOrigin-RevId: 348653113 Change-Id: I8e383cebc28d1218e6d4507a593f7e568b077120",fully_connected.cc,"@@ -62,26 +62,28 @@ std::string GetFullyConnectedCode(const GpuInfo& gpu_info, int src_channels, code << R""( float summa = 0.0f; threadgroup FLT4 local_vector[32]; - for (int j = 0; j < $0; ++j) { - local_vector[tid_index] = j * 32 + tid_index >= args.src_slices ? - FLT4(0.0f) : src_tensor[j * 32 + tid_index]; - $1(mem_flags::mem_threadgroup); + for (int j = 0; j < args.src_depth_sub_groups; ++j) { + local_vector[tid_index] = j * 32 + tid_index >= args.src_tensor.Slices() ? + FLT4(0.0f) : args.src_tensor.Read(0, 0, j * 32 + tid_index); + $0(mem_flags::mem_threadgroup); for (uint i = 0, counter = j * 32 + tid.y * 8; i < 8; ++i, ++counter) { summa += dot(local_vector[tid.y * 8 + i], args.weights.Read(counter * args.dst_channels_alignedx8 + ugid.x)); } - $1(mem_flags::mem_none); + $0(mem_flags::mem_none); } )""; } else { code << R""( float summa = 0.0f; - uint counter = ugid.y * $0; - for (uint i = 0; i < $0; ++i, ++counter) { + int counter = int(ugid.y) * args.src_depth_sub_groups; + for (int i = 0; i < args.src_depth_sub_groups; ++i, ++counter) { )""; if (src_depth % 4 != 0) { - code << "" if (counter >= args.src_slices) continue;"" << std::endl; + code << "" if (counter >= args.src_tensor.Slices()) continue;"" + << std::endl; } - code << "" summa += dot(src_tensor[counter], args.weights.Read(counter * "" + code << "" summa += dot(args.src_tensor.Read(0, 0, counter), "" + ""args.weights.Read(counter * "" ""args.dst_channels_alignedx8 + ugid.x));"" << std::endl; code << "" }"" << std::endl; @@ -90,27 +92,25 @@ std::string GetFullyConnectedCode(const GpuInfo& gpu_info, int src_channels, threadgroup float temp[8][4]; temp[tid.x][tid.y] = summa; - $1(mem_flags::mem_threadgroup); + $0(mem_flags::mem_threadgroup); if (tid.y == 0) { summa += temp[tid.x][1]; summa += temp[tid.x][2]; summa += temp[tid.x][3]; temp[tid.x][0] = summa; } - $1(mem_flags::mem_threadgroup); - if (tid.y == 0 && tid.x % 4 == 0 && ugid.x < args.dst_channels) { - const int linear_index = ugid.x / 4; + $0(mem_flags::mem_threadgroup); + const int linear_index = ugid.x / 4; + if (tid.y == 0 && tid.x % 4 == 0 && linear_index < args.dst_tensor.Slices()) { FLT4 value = FLT4(temp[tid.x][0], temp[tid.x + 1][0], temp[tid.x + 2][0], temp[tid.x + 3][0]) + args.bias.Read(linear_index); uint3 gid = uint3(0u, 0u, uint(linear_index)); $$2 - dst_tensor[linear_index] = value; + args.dst_tensor.Write(value, 0, 0, linear_index); } } )""; - const int src_depth_sub_groups = shared_memory ? DivideRoundUp(src_depth, 32) - : DivideRoundUp(src_depth, 4); - return absl::Substitute(code.str(), src_depth_sub_groups, barrier); + return absl::Substitute(code.str(), barrier); } } // namespace @@ -118,19 +118,21 @@ ComputeTaskDescriptor FullyConnected(const OperationDef& definition, const FullyConnectedAttributes& attr, const GpuInfo& gpu_info) { ComputeTaskDescriptor desc(definition); + desc.tensors_as_args = true; desc.shader_source = GetFullyConnectedCode(gpu_info, attr.weights.shape.i, attr.weights.shape.o); - desc.args.AddInt(""dst_channels"", attr.weights.shape.o); - desc.args.AddInt(""src_slices"", DivideRoundUp(attr.weights.shape.i, 4)); + bool shared_memory = gpu_info.IsApple() && + gpu_info.apple_info.IsLocalMemoryPreferredOverGlobal(); + const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); + const int src_depth_sub_groups = shared_memory ? DivideRoundUp(src_depth, 32) + : DivideRoundUp(src_depth, 4); desc.args.AddInt(""dst_channels_alignedx8"", AlignByN(attr.weights.shape.o, 8)); + desc.args.AddInt(""src_depth_sub_groups"", src_depth_sub_groups); desc.AddSrcTensor(""src_tensor"", definition.src_tensors[0]); desc.AddDstTensor(""dst_tensor"", definition.dst_tensors[0]); - bool shared_memory = gpu_info.IsApple() && - gpu_info.apple_info.IsLocalMemoryPreferredOverGlobal(); - const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); const int src_depth_aligned = AlignByN(src_depth, shared_memory ? 32 : 4); const int dst_channels_aligned = AlignByN(attr.weights.shape.o, 8); ",0,train 50ae55b2eb72527f94bdeb68881fa19a60c9c975,tensorflow/tensorflow,"Using Tensor objects in FullyConnected kernel instead of raw buffers. PiperOrigin-RevId: 348653113 Change-Id: I8e383cebc28d1218e6d4507a593f7e568b077120",metal_arguments.cc,"@@ -141,6 +141,7 @@ absl::Status MetalArguments::Init(id device, int buffer_offset, RETURN_IF_ERROR(AddObjectArgs(args)); RETURN_IF_ERROR(ResolveSelectorsPass(*args, {}, code)); RETURN_IF_ERROR(SetObjectsResources(*args)); + object_refs_ = std::move(args->object_refs_); args->GetActiveArguments(kArgsPrefix, *code); std::string struct_desc = ""struct uniforms_buffer {\n""; int pos = 0; @@ -229,6 +230,25 @@ absl::Status MetalArguments::SetHalf(const std::string& name, half value) { ""No support of half uniforms in Metal backend""); } +absl::Status MetalArguments::SetObjectRef(const std::string& name, + const GPUObject& object) { + auto it = object_refs_.find(name); + if (it == object_refs_.end()) { + return absl::NotFoundError( + absl::StrCat(""No object ref with name - "", name)); + } + GPUResourcesWithValue resources; + RETURN_IF_ERROR(object.GetGPUResources(it->second.get(), &resources)); + for (const auto& r : resources.ints) { + RETURN_IF_ERROR(SetInt(absl::StrCat(name, ""_"", r.first), r.second)); + } + for (const auto& r : resources.floats) { + RETURN_IF_ERROR(SetFloat(absl::StrCat(name, ""_"", r.first), r.second)); + } + return absl::OkStatus(); + // return SetGPUResources(name, resources); +} + void MetalArguments::Encode(id encoder, int buffer_offset) const { for (auto& b : buffers_) { @@ -258,7 +278,14 @@ absl::Status MetalArguments::AddObjectArgs(Arguments* args) { AddGPUResources(t.first, t.second->GetGPUResources(), args); } for (auto& t : args->object_refs_) { - AddGPUResources(t.first, t.second->GetGPUResources(), args); + auto resources = t.second->GetGPUResources(); + for (const auto& r : resources.ints) { + args->AddInt(absl::StrCat(t.first, ""_"", r)); + } + for (const auto& r : resources.floats) { + args->AddFloat(absl::StrCat(t.first, ""_"", r)); + } + // AddGPUResources(t.first, t.second->GetGPUResources(), args); } return absl::OkStatus(); } ",0,train 50ae55b2eb72527f94bdeb68881fa19a60c9c975,tensorflow/tensorflow,"Using Tensor objects in FullyConnected kernel instead of raw buffers. PiperOrigin-RevId: 348653113 Change-Id: I8e383cebc28d1218e6d4507a593f7e568b077120",metal_arguments.h,"@@ -46,6 +46,7 @@ class MetalArguments : public ArgumentsBinder { absl::Status SetInt(const std::string& name, int value) override; absl::Status SetFloat(const std::string& name, float value) override; absl::Status SetHalf(const std::string& name, half value) override; + absl::Status SetObjectRef(const std::string& name, const GPUObject& object); void Encode(id encoder, int buffer_offset) const; ",0,train d5bd4932569106da79a7ac7ba7a9e7cf0141ab06,tensorflow/tensorflow,"Internal change. PiperOrigin-RevId: 226560588",gather_op_test.py,"@@ -421,6 +421,12 @@ class GatherTest(test.TestCase, parameterized.TestCase): # expected result. expected = self._batchNumpyGather(params, indices, axis, batch_dims) + # On Windows, we get an exception if we pass in the transformed numpy + # arrays (""Failed to convert numpy ndarray to a Tensor (Unsupported + # feed type).""); so convert them back to lists before calling tf.gather. + params = params.tolist() + indices = indices.tolist() + result = array_ops.gather(params, indices, axis=axis, batch_dims=batch_dims) self.assertAllEqual(output_shape, result.shape.as_list()) self.assertAllEqual(expected, result) ",0,train a7c8a73a9f6d3a13fad085d6cff79cf96cfd5b18,tensorflow/tensorflow,"[xla::gpu] fuse bias addition for bf16 gemms. PiperOrigin-RevId: 441133520",gemm_rewriter.cc,"@@ -132,29 +132,48 @@ class GemmRewriterVisitor : public DfsHloRewriteVisitor { m::AddAnyOrder( m::Op(&existing_gemm).WithCustomCallTarget(kGemmCallTarget), m::Op(&bias)))) { - // Do not fuse bias into S32 GEMM, as for this datatype cuBLAS only - // supports fixed values for alpha/beta. - if (existing_gemm->shape().element_type() == S32) { - return Status::OK(); - } - auto config = - existing_gemm->backend_config().ValueOrDie(); - if (config.beta() == 0 && bias->user_count() == 1 && - existing_gemm->user_count() == 1 && - bias->shape() == existing_gemm->shape()) { - config.set_beta(1.0); - CHECK_EQ(existing_gemm->operand_count(), 2); - std::unique_ptr gemm_call = - HloInstruction::CreateCustomCall( - instr->shape(), - {existing_gemm->mutable_operand(0), - existing_gemm->mutable_operand(1), bias}, - kGemmCallTarget); - TF_RETURN_IF_ERROR(gemm_call->set_backend_config(config)); - TF_RETURN_IF_ERROR(SetName(instr->GetModule(), gemm_call.get())); - TF_RETURN_IF_ERROR( - ReplaceWithNewInstruction(instr, std::move(gemm_call))); - } + return FuseBiasedGemm(instr, bias, existing_gemm); + } + return Status::OK(); + } + + Status HandleConvert(HloInstruction *instr) override { + HloInstruction *bias, *existing_gemm; + if (Match( + instr, + m::Convert(m::AddAnyOrder( + m::Convert(m::Op(&existing_gemm) + .WithCustomCallTarget(kGemmCallTarget) + .WithElementType(BF16)), + m::Convert(m::Op(&bias).WithElementType(BF16)))) + .WithElementType(BF16))) { + return FuseBiasedGemm(instr, bias, existing_gemm); + } + return Status::OK(); + } + + Status FuseBiasedGemm(HloInstruction *instr, HloInstruction *bias, + HloInstruction *existing_gemm) { + // Do not fuse bias into S32 GEMM, as for this datatype cuBLAS only + // supports fixed values for alpha/beta. + if (existing_gemm->shape().element_type() == S32) { + return Status::OK(); + } + auto config = + existing_gemm->backend_config().ValueOrDie(); + if (config.beta() == 0 && bias->user_count() == 1 && + existing_gemm->user_count() == 1 && + bias->shape() == existing_gemm->shape()) { + config.set_beta(1.0); + CHECK_EQ(existing_gemm->operand_count(), 2); + std::unique_ptr gemm_call = + existing_gemm->CloneWithNewOperands( + instr->shape(), {existing_gemm->mutable_operand(0), + existing_gemm->mutable_operand(1), bias}); + TF_RETURN_IF_ERROR(gemm_call->set_backend_config(config)); + TF_RETURN_IF_ERROR(SetName(instr->GetModule(), gemm_call.get())); + TF_RETURN_IF_ERROR( + ReplaceWithNewInstruction(instr, std::move(gemm_call))); } return Status::OK(); } ",0,train a7c8a73a9f6d3a13fad085d6cff79cf96cfd5b18,tensorflow/tensorflow,"[xla::gpu] fuse bias addition for bf16 gemms. PiperOrigin-RevId: 441133520",gemm_rewrite_test.cc,"@@ -576,6 +576,30 @@ ENTRY int8gemm { /*print_operand_shape=*/true); } } + +TEST_F(GemmRewriteTest, BF16GemmWithBias) { + const char* hlo_text = R""( +HloModule BF16GemmWithBias + +ENTRY BF16GemmWithBias { + x = bf16[8,8]{1,0} parameter(0) + y = bf16[8,8]{1,0} parameter(1) + dot.5 = bf16[8,8]{1,0} dot(x, y), lhs_contracting_dims={1}, rhs_contracting_dims={0} + bias = bf16[8,8]{1,0} parameter(2) + ROOT add.6 = bf16[8,8]{1,0} add(dot.5, bias) +} + )""; + + EXPECT_TRUE(RunAndCompare(hlo_text, ErrorSpec{1e-3, 1e-3})); + MatchOptimizedHlo(hlo_text, + R""( +; CHECK-LABEL: ENTRY %BF16GemmWithBias (x: bf16[8,8], y: bf16[8,8], bias: bf16[8,8]) -> bf16[8,8] { +; CHECK-NEXT: [[INSTR_0:%[^ ]+]] = bf16[8,8]{1,0} parameter(0) +; CHECK-NEXT: [[INSTR_1:%[^ ]+]] = bf16[8,8]{1,0} parameter(1) +; CHECK-NEXT: [[INSTR_2:%[^ ]+]] = bf16[8,8]{1,0} parameter(2) +; CHECK-NEXT: ROOT [[INSTR_3:%[^ ]+]] = bf16[8,8]{1,0} custom-call([[INSTR_0]], [[INSTR_1]], [[INSTR_2]]), custom_call_target=""__cublas$gemm"", backend_config=""{\""alpha_real\"":1,\""alpha_imag\"":0,\""beta\"":1,\""dot_dimension_numbers\"":{\""lhs_contracting_dimensions\"":[\""1\""],\""rhs_contracting_dimensions\"":[\""0\""],\""lhs_batch_dimensions\"":[],\""rhs_batch_dimensions\"":[]},\""batch_size\"":\""1\"",\""lhs_stride\"":\""64\"",\""rhs_stride\"":\""64\"",\""selected_algorithm\"":\""{{-?[0-9]+}}\""}"" + )""); +} } // namespace } // namespace gpu } // namespace xla ",0,train a455319208888e72af34fc3021122803a53a047d,tensorflow/tensorflow,"Automated g4 rollback of changelist 201217989 PiperOrigin-RevId: 201257755",arithmetic_optimizer.cc,"@@ -2519,14 +2519,14 @@ class ConvertLog1pStage : public ArithmeticOptimizerStage { bool* modified) { const auto& t = ctx().graph_properties->GetInputProperties(input->name())[i]; - const auto& c = - ctx().graph_properties->GetInputProperties(input->name())[j]; - for (int k = 0; k < c.shape().dim_size(); ++k) { - // Skip if c shape is not fully determined. - if (c.shape().dim(k).size() < 0) { + for (int k = 0; k < t.shape().dim_size(); ++k) { + // Skip if t shape is not fully determined. + if (t.shape().dim(k).size() < 0) { return Status::OK(); } } + const auto& c = + ctx().graph_properties->GetInputProperties(input->name())[j]; TensorShapeProto broadcast_shape; if (!ShapeAfterBroadcast(t.shape(), c.shape(), &broadcast_shape)) { return errors::InvalidArgument(""Cannot get broadcast shape for: "", @@ -2537,15 +2537,15 @@ class ConvertLog1pStage : public ArithmeticOptimizerStage { // broadcast. return Status::OK(); } - if (TensorShape::IsValid(c.shape()) && c.has_value()) { - Tensor constant(c.dtype(), c.shape()); - if (!constant.FromProto(c.value())) { + if (TensorShape::IsValid(t.shape()) && t.has_value()) { + Tensor tensor(t.dtype(), t.shape()); + if (!tensor.FromProto(t.value())) { return errors::InvalidArgument(""Cannot parse tensor from proto: "", t.value().DebugString()); } complex128 element; - for (int k = 0; k < constant.NumElements(); ++k) { - if (!GetElement(constant, k, &element)) { + for (int k = 0; k < tensor.NumElements(); ++k) { + if (!GetElement(tensor, k, &element)) { // input data type is not supported by log1p. Skip. return Status::OK(); } @@ -2558,8 +2558,8 @@ class ConvertLog1pStage : public ArithmeticOptimizerStage { TF_RETURN_IF_ERROR(GetInputNode(input->input(i), &x)); TF_RETURN_IF_ERROR(GetInputNode(input->input(j), &y)); node->set_op(""Log1p""); - node->set_input(0, x->name()); - node->add_input(AsControlDependency(y->name())); + node->set_input(0, y->name()); + node->add_input(AsControlDependency(x->name())); ForwardControlDependencies(node, {input}); AddToOptimizationQueue(node); ",0,test 6311992aa95f5e9ea88f400a404328071f8b6bea,tensorflow/tensorflow,"MultiProcessRunner: Register faulthandler so that subprocesses' stack trace can be dumped at timeout. Set default timeout second so timeout is handled for regular medium size test target. PiperOrigin-RevId: 304674677 Change-Id: I516315c6d2d4f951fc394c6bf2fb6f029096ed74",multi_process_runner.py,"@@ -36,13 +36,6 @@ from tensorflow.python.distribute import multi_process_lib from tensorflow.python.eager import context from tensorflow.python.platform import test -# pylint: disable=g-import-not-at-top -try: - # `faulthandler` is not available in py2. - import faulthandler -except ImportError: - faulthandler = None - # _ProcessStatusInfo contains process status information. When is_successful # attribute is True, the subprocess has ended successfully, or if False, the # exception stack trace info is stored in exc_info to pass on to parent process @@ -312,7 +305,7 @@ class MultiProcessRunner(object): break return list_to_return - def join(self, timeout=250): + def join(self, timeout=None): """"""Joins all the processes with timeout. Args: @@ -343,9 +336,6 @@ class MultiProcessRunner(object): if self._all_forced_terminated: break if time.time() - start_time > timeout: - # Send SIGTERM signal to subprocesses to dump their current - # stack trace. - self.terminate_all(sig=signal.SIGTERM) # If none of those did, report timeout to user. raise RuntimeError('One or more subprocesses timed out. ' 'Number of outstanding subprocesses ' @@ -384,7 +374,7 @@ class MultiProcessRunner(object): _resource(PARENT_TO_SUB_QUEUE).put('terminate {} {}'.format( task_type, task_id)) - def terminate_all(self, sig=signal.SIGKILL): + def terminate_all(self): """"""Terminates all subprocesses."""""" subprocess_infos = [] @@ -398,7 +388,7 @@ class MultiProcessRunner(object): for subprocess_info in subprocess_infos: logging.info('Parent process is now killing PID: %d', subprocess_info.pid) try: - os.kill(subprocess_info.pid, sig) + os.kill(subprocess_info.pid, signal.SIGKILL) except ProcessLookupError: # TODO(rchao): Remove subprocess info from the queue once a subprocess # is terminated. @@ -459,14 +449,11 @@ class _Subprocess(object): *arg, **kwargs): """"""The wrapper function that actually gets run in child process(es)."""""" - if faulthandler is not None: - faulthandler.enable() - faulthandler.register(signal.SIGTERM, chain=True) - pid = os.getpid() logging.info('Subprocess with PID %d (%s, %d) is now being started.', pid, task_type, task_id) _resource(SUBPROCESS_INFO_QUEUE).put(_SubprocessInfo(pid=pid)) + # Assign sys.stdout and sys.stderr as duplicates of `pipe_w` so print() and # logging.*() write directly to `pipe_w`. Unfortunately since we cannot # prepend task_type and task_id information to the streamed logs we will @@ -557,7 +544,7 @@ def run(proc_func, grpc_fail_fast=None, stream_stdout=True, list_stdout=False, - timeout=250, + timeout=None, args=None, kwargs=None): # pylint: disable=g-doc-args """"""Runs functions in local child processes. ",0,train 92b66929070da69a40a0bfc78dfc1ac10bbf26d8,tensorflow/tensorflow,"Handle Python corner case: the name of a variable caught in an except block (i.e. `e` in `except Name as e` is not visible outside the except block itself. PiperOrigin-RevId: 293801105 Change-Id: I76faf7b0957170c18f739f531c44123b4dca3835",activity.py,"@@ -57,6 +57,8 @@ class Scope(object): the terminology of the Python 3 reference documentation, True roughly represents an actual scope, whereas False represents an ordinary code block. + isolated_names: Set[qual_names.QN], identifiers that are isolated to this + scope (even if the scope is not isolated). read: Set[qual_names.QN], identifiers read in this scope. modified: Set[qual_names.QN], identifiers modified in this scope. deleted: Set[qual_names.QN], identifiers deleted in this scope. @@ -99,6 +101,8 @@ class Scope(object): self.parent = parent self.isolated = isolated + self.isolated_names = set() + self.read = set() self.modified = set() self.deleted = set() @@ -136,6 +140,7 @@ class Scope(object): if self.parent is not None: assert other.parent is not None self.parent.copy_from(other.parent) + self.isolated_names = copy.copy(other.isolated_names) self.modified = copy.copy(other.modified) self.read = copy.copy(other.read) self.deleted = copy.copy(other.deleted) @@ -158,6 +163,7 @@ class Scope(object): if self.parent is not None: assert other.parent is not None self.parent.merge_from(other.parent) + self.isolated_names.update(other.isolated_names) self.read.update(other.read) self.modified.update(other.modified) self.bound.update(other.deleted) @@ -170,9 +176,9 @@ class Scope(object): if self.parent is not None: assert not self.parent.is_final if not self.isolated: - self.parent.read.update(self.read) - self.parent.modified.update(self.modified) - self.parent.bound.update(self.bound) + self.parent.read.update(self.read - self.isolated_names) + self.parent.modified.update(self.modified - self.isolated_names) + self.parent.bound.update(self.bound - self.isolated_names) self.parent.globals.update(self.globals) else: # TODO(mdan): This is not accurate. @@ -537,6 +543,16 @@ class ActivityAnalyzer(transformer.Base): (node.orelse, NodeAnno.ORELSE_SCOPE))) return node + def visit_ExceptHandler(self, node): + self._enter_scope(False) + # try/except oddity: as expected, it leaks any names you defined inside the + # except block, but not the name of the exception variable. + if node.name is not None: + self.scope.isolated_names.add(anno.getanno(node.name, anno.Basic.QN)) + node = self.generic_visit(node) + self._exit_scope() + return node + def resolve(node, context, parent_scope=None): return ActivityAnalyzer(context, parent_scope).visit(node) ",0,train 92b66929070da69a40a0bfc78dfc1ac10bbf26d8,tensorflow/tensorflow,"Handle Python corner case: the name of a variable caught in an except block (i.e. `e` in `except Name as e` is not visible outside the except block itself. PiperOrigin-RevId: 293801105 Change-Id: I76faf7b0957170c18f739f531c44123b4dca3835",activity_test.py,"@@ -403,6 +403,32 @@ class ActivityAnalyzerTest(ActivityAnalyzerTestBase): self.assertScopeIs( anno.getanno(node.body[0], anno.Static.SCOPE), ('b',), ()) + def test_except_exposes_names(self): + + def test_fn(a, b, c): # pylint: disable=unused-argument + try: + pass + except: # pylint: disable=bare-except + b = c + + node, _ = self._parse_and_analyze(test_fn) + fn_node = node + self.assertScopeIs( + anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('c',), ('b',)) + + def test_except_hides_exception_var_name(self): + + def test_fn(a, b, c): # pylint: disable=unused-argument + try: + pass + except a as e: + b = e + + node, _ = self._parse_and_analyze(test_fn) + fn_node = node + self.assertScopeIs( + anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('a',), ('b',)) + def test_aug_assign(self): def test_fn(a, b): ",0,train 92b66929070da69a40a0bfc78dfc1ac10bbf26d8,tensorflow/tensorflow,"Handle Python corner case: the name of a variable caught in an except block (i.e. `e` in `except Name as e` is not visible outside the except block itself. PiperOrigin-RevId: 293801105 Change-Id: I76faf7b0957170c18f739f531c44123b4dca3835",liveness_test.py,"@@ -245,6 +245,23 @@ class LivenessAnalyzerTest(LivenessAnalyzerTestBase): self.assertHasLiveIn(fn_body[0].body[0], ('b', 'c')) self.assertHasLiveIn(fn_body[1], ('x',)) + def test_live_out_except_variable(self): + + def test_fn(x, a): + try: + pass + except a as b: + raise b + return x + + node = self._parse_and_analyze(test_fn) + fn_body = node.body + + # Note: 'a' is not live because there is no raise statement inside the + # try, and we discount the possibility of other code in the try block + # raising an error. + self.assertHasLiveIn(fn_body[0], ('b', 'x')) + def test_live_in_return_statement(self): def test_fn(x, a, b, c): # pylint:disable=unused-argument ",0,train 3e0f9502b76778bb714de0317cfe2bdf256257ed,tensorflow/tensorflow,"Update docs about the Android version requirement for using nnapi execution priority. PiperOrigin-RevId: 319464023 Change-Id: I8ac9522caf6ab7db361b17c022659a2729ced286",nnapi_delegate_provider.cc,"@@ -62,7 +62,7 @@ std::vector NnapiDelegateProvider::CreateFlags(ToolParams* params) const { CreateFlag(""nnapi_execution_priority"", params, ""The model execution priority in nnapi, and it "" ""should be one of the following: default, low, "" - ""medium, high.""), + ""medium and high. This requires Android 11+.""), CreateFlag( ""nnapi_accelerator_name"", params, ""the name of the nnapi accelerator to use (requires Android Q+)""), ",0,train d99a5dd275b2e9fc54e9d40830c3d8b3622b79c7,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-02-08 PiperOrigin-RevId: 293967672 Change-Id: I2bf6a86c4360d8cc5c8429e96539ab1a44ed2972",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 2, 7) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 2, 8) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train 0642f8155f9d1391471067e7d97fee39521d3c44,tensorflow/tensorflow,"Narrow down argmin/argmax contract to always return the smallest index for ties Currently we get this behavior consistently across TF/XLA:CPU/XLA:GPU/XLA:TPU, and it also matches Numpy semantics. PiperOrigin-RevId: 312528188 Change-Id: I16901ff67052182fe374235f8c7521cbdf047779",math_ops.py,"@@ -256,7 +256,7 @@ def argmax(input, def argmax_v2(input, axis=None, output_type=dtypes.int64, name=None): """"""Returns the index with the largest value across axes of a tensor. - Note that in case of ties the identity of the return value is not guaranteed. + In case of identity returns the smallest index. For example: @@ -269,6 +269,9 @@ def argmax_v2(input, axis=None, output_type=dtypes.int64, name=None): >>> tf.math.argmax(B, 1) + >>> C = tf.constant([0, 0, 0, 0]) + >>> tf.math.argmax(C) # Returns smallest index in case of ties + Args: input: A `Tensor`. @@ -307,7 +310,7 @@ def argmin(input, def argmin_v2(input, axis=None, output_type=dtypes.int64, name=None): """"""Returns the index with the smallest value across axes of a tensor. - Note that in case of ties the identity of the return value is not guaranteed. + Returns the smallest index in case of ties. Args: input: A `Tensor`. Must be one of the following types: `float32`, `float64`, ",0,train 48aef32dcd356fa6bae490fa1c853b9b2cdd4846,tensorflow/tensorflow,removing redundant semicolon,toco_from_protos_test.py,"@@ -50,7 +50,7 @@ class TocoFromProtosTest(googletest.TestCase): toco_flags.output_format = toco_flags_pb2.TFLITE toco_flags.inference_input_type = types_pb2.FLOAT toco_flags.inference_type = types_pb2.FLOAT - toco_flags.allow_custom_ops = True; + toco_flags.allow_custom_ops = True model_flags = model_flags_pb2.ModelFlags() input_array = model_flags.input_arrays.add() input_array.name = TensorName(in_tensor) ",0,test 74fb47ccd26da99e57a14fccf7561e7ba7bcb000,tensorflow/tensorflow,"Add a new pass for promoting VarHandle ops to TF saved model arguments PiperOrigin-RevId: 315275908 Change-Id: Icbc5c032bd9474d279fecf48267665025a53c1bf",passes.h,"@@ -95,11 +95,9 @@ std::unique_ptr> CreateResourceDeviceInferencePass(); // of their aliasing output arguments. std::unique_ptr> CreatePromoteResourcesToArgsPass(); -// Creates a pass that promotes tf.VarHandleOp to to resource arguments of where -// resource names are `tf_saved_model.bound_input` symbol argument attributes -// for all functions. -std::unique_ptr> -CreatePromoteVarHandlesToSavedModelArgsPass(); +// Creates a pass that promotes tf.VarHandleOp to resource arguments for all +// functions. +std::unique_ptr> CreatePromoteVarHandlesToArgsPass(); // Creates a pass that converts readonly reference variables to the // corresponding resource variables. ",0,train 74fb47ccd26da99e57a14fccf7561e7ba7bcb000,tensorflow/tensorflow,"Add a new pass for promoting VarHandle ops to TF saved model arguments PiperOrigin-RevId: 315275908 Change-Id: Icbc5c032bd9474d279fecf48267665025a53c1bf",promote_resources_to_args.cc,"@@ -389,18 +389,15 @@ void PromoteResourcesToArgsPass::runOnOperation() { return signalPassFailure(); } -// This pass is for promoting Varhandle ops to tf_saved_model.bound_input -// attributes, which are required for TensorFlowSavedModelDialect. -class PromoteVarHandlesToSavedModelArgsPass - : public PassWrapper> { +class PromoteVarHandlesToArgsPass + : public PassWrapper> { public: void runOnOperation() override; }; -void PromoteVarHandlesToSavedModelArgsPass::runOnOperation() { +void PromoteVarHandlesToArgsPass::runOnOperation() { ModuleOp module = getOperation(); - + MLIRContext* context = module.getContext(); for (auto function : module.getOps()) { if (failed(CheckSingleBlockFunction(function))) return signalPassFailure(); @@ -409,15 +406,13 @@ void PromoteVarHandlesToSavedModelArgsPass::runOnOperation() { &var_handle_shared_names); // Add resource names for each `tf.VarHandleOp` that were promoted to - // saved model arguments. + // resource arguments. const int var_handle_args_offset = function.getNumArguments() - var_handle_shared_names.size(); - for (auto var_name_and_index : llvm::enumerate(var_handle_shared_names)) { - auto symbol_ref = - SymbolRefAttr::get(var_name_and_index.value(), &getContext()); + for (auto var_name_and_index : llvm::enumerate(var_handle_shared_names)) function.setArgAttr(var_name_and_index.index() + var_handle_args_offset, - ""tf_saved_model.bound_input"", symbol_ref); - } + kResourceNameArgAttr, + StringAttr::get(var_name_and_index.value(), context)); } } @@ -427,19 +422,17 @@ std::unique_ptr> CreatePromoteResourcesToArgsPass() { return std::make_unique(); } -std::unique_ptr> -CreatePromoteVarHandlesToSavedModelArgsPass() { - return std::make_unique(); +std::unique_ptr> CreatePromoteVarHandlesToArgsPass() { + return std::make_unique(); } static PassRegistration pass( ""tf-promote-resources-to-args"", ""Promote resources reads/writes to function inputs/outputs.""); -static PassRegistration saved_model_pass( - ""tf-saved-model-promote-var-handles-to-args"", - ""Promote tf.VarHandleOps to function arguments in a format of "" - ""TensorFlowSavedModelDialect.""); +static PassRegistration var_handle_pass( + ""tf-promote-var-handles-to-args"", + ""Promote tf.VarHandleOps to function arguments.""); } // namespace TF } // namespace mlir ",0,train 56a86ce36e09fdedeb84b5ebfa8f83f7778edf4a,tensorflow/tensorflow,Add CreateDir,hadoop_filesystem.cc,"@@ -525,6 +525,21 @@ void DeleteFile(const TF_Filesystem* filesystem, const char* path, TF_SetStatus(status, TF_OK, """"); } +void CreateDir(const TF_Filesystem* filesystem, const char* path, + TF_Status* status) { + auto libhdfs = static_cast(filesystem->plugin_filesystem); + auto fs = Connect(libhdfs, path, status); + if (TF_GetCode(status) != TF_OK) return; + + std::string scheme, namenode, hdfs_path; + ParseHadoopPath(path, &scheme, &namenode, &hdfs_path); + + if (libhdfs->hdfsCreateDirectory(fs, hdfs_path.c_str()) != 0) + TF_SetStatusFromIOError(status, errno, path); + else + TF_SetStatus(status, TF_OK, """"); +} + // TODO(vnvo2409): Implement later } // namespace tf_hadoop_filesystem ",0,train 2f25e2d448f55e8218974d4cf7acbe703a17ddae,tensorflow/tensorflow,"[tfrt:jit] Insert a copy when returning a dynamic broadcast We bufferize dynamic broadcasts into a memref reinterpret cast that yields a memref with affine map. This clashes with the return type of the function that doesn't support affine maps. Insert a copy for this special case. This is still a bit of a hack, but I prefer not to invest too much as a different representation for dynamic broaddcasts is on the horizon. PiperOrigin-RevId: 405473962 Change-Id: I81b67e9a0d17c83b0a0673f83fda09b015b10a72",rewriters.h,"@@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_TRANSFORMS_REWRITERS_H_ #define TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_TRANSFORMS_REWRITERS_H_ +#include #include #include ""mlir/IR/MLIRContext.h"" @@ -64,12 +65,15 @@ void populateHLOToLHLOConversionPattern(MLIRContext *context, // Collection of rewrite patterns for lowering of HLO to memref dialect. // These patterns generally assume that the HLO operation are aliasing their -// input memrefs. If enforce_identity_map is set to true, copies will be +// input memrefs. If enforce_identity_map returns true for an op, copies will be // inserted when the lowering would otherwise lead to a memref with a // non-identity map. void populateHLOToMemrefConversionPattern( BufferizeTypeConverter *converter, RemoveSignTypeConverter *sign_converter, - OwningRewritePatternList *patterns, bool enforce_identity_map = true); + OwningRewritePatternList *patterns, + std::function enforce_identity_map = [](Operation *) { + return true; + }); // Collection of rewrite patterns for lowering of HLO to Linalg dialect. void populateHLOToLinalgConversionPattern(MLIRContext *context, ",0,train 2f25e2d448f55e8218974d4cf7acbe703a17ddae,tensorflow/tensorflow,"[tfrt:jit] Insert a copy when returning a dynamic broadcast We bufferize dynamic broadcasts into a memref reinterpret cast that yields a memref with affine map. This clashes with the return type of the function that doesn't support affine maps. Insert a copy for this special case. This is still a bit of a hack, but I prefer not to invest too much as a different representation for dynamic broaddcasts is on the horizon. PiperOrigin-RevId: 405473962 Change-Id: I81b67e9a0d17c83b0a0673f83fda09b015b10a72",hlo_legalize_to_memref.cc,"@@ -15,6 +15,7 @@ limitations under the License. // This file implements logic for lowering HLO dialect to LHLO dialect. +#include #include #include ""mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"" @@ -159,10 +160,10 @@ class HloToMemrefDynamicBroadcastInDimOpConverter public: HloToMemrefDynamicBroadcastInDimOpConverter( TypeConverter& converter, RemoveSignTypeConverter* sign_converter, - MLIRContext* ctx, bool enforce_identity_maps) + MLIRContext* ctx, std::function enforce_identity_maps) : BaseOpConversion(converter, sign_converter, ctx), - enforce_identity_maps_(enforce_identity_maps) {} + enforce_identity_maps_(std::move(enforce_identity_maps)) {} Value signlessRewrite(mhlo::DynamicBroadcastInDimOp op, ArrayRef operands, Type op_result_type, @@ -171,7 +172,7 @@ class HloToMemrefDynamicBroadcastInDimOpConverter if (!result_type) return {}; Value result = InsertDynamicMemrefCastOp(op, operands.front(), &rewriter); - if (enforce_identity_maps_) { + if (enforce_identity_maps_(op)) { result = CreateCopy(op, result, &rewriter); } @@ -295,7 +296,7 @@ class HloToMemrefDynamicBroadcastInDimOpConverter return copy; } - bool enforce_identity_maps_; + std::function enforce_identity_maps_; }; struct HloLegalizeToMemrefPass @@ -331,10 +332,11 @@ struct HloLegalizeToMemrefPass void populateHLOToMemrefConversionPattern( BufferizeTypeConverter* converter, RemoveSignTypeConverter* sign_converter, - OwningRewritePatternList* patterns, bool enforce_identity_maps) { + OwningRewritePatternList* patterns, + std::function enforce_identity_maps) { MLIRContext* context = patterns->getContext(); patterns->insert( - *converter, sign_converter, context, enforce_identity_maps); + *converter, sign_converter, context, std::move(enforce_identity_maps)); patterns->insert( *converter, sign_converter, context); ",0,train 2f25e2d448f55e8218974d4cf7acbe703a17ddae,tensorflow/tensorflow,"[tfrt:jit] Insert a copy when returning a dynamic broadcast We bufferize dynamic broadcasts into a memref reinterpret cast that yields a memref with affine map. This clashes with the return type of the function that doesn't support affine maps. Insert a copy for this special case. This is still a bit of a hack, but I prefer not to invest too much as a different representation for dynamic broaddcasts is on the horizon. PiperOrigin-RevId: 405473962 Change-Id: I81b67e9a0d17c83b0a0673f83fda09b015b10a72",tf_broadcast_to_test.py,"@@ -0,0 +1,49 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""""""Tests for Tensorflow -> CPURT compilation."""""" + +import numpy as np + +from tensorflow.compiler.mlir.tfrt.jit.python_binding import tf_cpurt +from tensorflow.python.platform import test + +cpurt = tf_cpurt.TfCpurtExecutor() + + +class TfBroadcastToTest(test.TestCase): + + def test_broadcast_return(self): + mlir_function = """""" + func @test(%arg0: tensor, %arg1: tensor<2xi32>) + -> (tensor, tensor) { + %1 = ""tf.BroadcastTo""(%arg0, %arg1) + : (tensor, tensor<2xi32>) -> tensor + %2 = ""tf.Add""(%1, %1) + : (tensor, tensor) -> tensor + return %1, %2 : tensor, tensor + }"""""" + + compiled = cpurt.compile(mlir_function, 'test') + + arg0 = np.random.uniform(0, 10.0, size=1).astype(np.float32) + arg1 = np.random.uniform(0, 10, size=2).astype(np.int32) + + [res1, res2] = cpurt.execute(compiled, [arg0, arg1]) + np.testing.assert_allclose(res1, np.broadcast_to(arg0, arg1), atol=0.0) + np.testing.assert_allclose(res2, np.broadcast_to(arg0, arg1) * 2, atol=0.0) + + +if __name__ == '__main__': + test.main() ",0,train 2f25e2d448f55e8218974d4cf7acbe703a17ddae,tensorflow/tensorflow,"[tfrt:jit] Insert a copy when returning a dynamic broadcast We bufferize dynamic broadcasts into a memref reinterpret cast that yields a memref with affine map. This clashes with the return type of the function that doesn't support affine maps. Insert a copy for this special case. This is still a bit of a hack, but I prefer not to invest too much as a different representation for dynamic broaddcasts is on the horizon. PiperOrigin-RevId: 405473962 Change-Id: I81b67e9a0d17c83b0a0673f83fda09b015b10a72",bufferize_pass.cc,"@@ -136,7 +136,12 @@ struct ComputeOpAndFuncBufferizePass // Configure bufferize pattern for functions and lhlo. mhlo::populateHLOToMemrefConversionPattern( &converter, &remove_sign_converter, &patterns, - /*enforce_identity_map=*/false); + /*enforce_identity_map=*/[](Operation* op) { + // Insert a copy if the broadcast escapes. + return llvm::any_of(op->getUsers(), [](Operation* user) { + return isa(user); + }); + }); populateFuncOpTypeConversionPattern(patterns, converter); populateCallOpTypeConversionPattern(patterns, converter); populateBranchOpInterfaceTypeConversionPattern(patterns, converter); ",0,train 7e148a6e17c45487f68fbe025346babf7bbd14f2,tensorflow/tensorflow,Fix build of TensorFlow Lite,c_api.cc,"@@ -77,7 +77,7 @@ void TFL_InterpreterOptionsSetNumThreads(TFL_InterpreterOptions* options, options->num_threads = num_threads; } -TFL_CAPI_EXPORT extern void TFL_InterpreterOptionsSetErrorReporter( +void TFL_InterpreterOptionsSetErrorReporter( TFL_InterpreterOptions* options, void (*reporter)(void* user_data, const char* format, va_list args), void* user_data) { ",0,train ea135aee180e47abe3ed45cf5a3be75f18955d20,tensorflow/tensorflow,"Fix issue with return value of evaluate() in models that add custom metrics via overriding train_step. PiperOrigin-RevId: 330022551 Change-Id: Ie928e792d678a1142c0f27dbdb2a7b4c39ee3974",training.py,"@@ -1366,13 +1366,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): if return_dict: return logs else: - results = [] - for name in self.metrics_names: - if name in logs: - results.append(logs[name]) - for key in sorted(logs.keys()): - if key not in self.metrics_names: - results.append(logs[key]) + results = [logs.get(name, None) for name in self.metrics_names] if len(results) == 1: return results[0] return results ",0,test ea135aee180e47abe3ed45cf5a3be75f18955d20,tensorflow/tensorflow,"Fix issue with return value of evaluate() in models that add custom metrics via overriding train_step. PiperOrigin-RevId: 330022551 Change-Id: Ie928e792d678a1142c0f27dbdb2a7b4c39ee3974",training_test.py,"@@ -1618,64 +1618,6 @@ class TrainingTest(keras_parameterized.TestCase): model.evaluate(x, batch_size=batch_size) model.predict(x, batch_size=batch_size) - @keras_parameterized.run_all_keras_modes( - always_skip_v1=True) - @parameterized.named_parameters( - ('custom_metrics', False, True), - ('compiled_metrics', True, False), - ('both_compiled_and_custom_metrics', True, True)) - def test_evaluate_with_custom_test_step( - self, use_compiled_metrics, use_custom_metrics): - - class MyModel(training_module.Model): - - def test_step(self, data): - x, y = data - pred = self(x) - metrics = {} - if use_compiled_metrics: - self.compiled_metrics.update_state(y, pred) - self.compiled_loss(y, pred) - for metric in self.metrics: - metrics[metric.name] = metric.result() - if use_custom_metrics: - custom_metrics = { - 'mean': math_ops.reduce_mean(pred), - 'sum': math_ops.reduce_sum(pred) - } - metrics.update(custom_metrics) - return metrics - - inputs = layers_module.Input((2,)) - outputs = layers_module.Dense(3)(inputs) - model = MyModel(inputs, outputs) - if use_compiled_metrics: - model.compile('adam', 'mse', metrics=['mae', 'mape'], - run_eagerly=testing_utils.should_run_eagerly()) - else: - model.compile('adam', 'mse', - run_eagerly=testing_utils.should_run_eagerly()) - x = np.random.random((4, 2)) - y = np.random.random((4, 3)) - results_list = model.evaluate(x, y) - results_dict = model.evaluate(x, y, return_dict=True) - self.assertLen(results_list, len(results_dict)) - if use_compiled_metrics and use_custom_metrics: - self.assertLen(results_list, 5) - self.assertEqual(results_list, - [results_dict['loss'], - results_dict['mae'], results_dict['mape'], - results_dict['mean'], results_dict['sum']]) - if use_compiled_metrics and not use_custom_metrics: - self.assertLen(results_list, 3) - self.assertEqual(results_list, - [results_dict['loss'], - results_dict['mae'], results_dict['mape']]) - if not use_compiled_metrics and use_custom_metrics: - self.assertLen(results_list, 2) - self.assertEqual(results_list, - [results_dict['mean'], results_dict['sum']]) - class TestExceptionsAndWarnings(keras_parameterized.TestCase): ",0,test 8c933654194da1588e37d0088f701ff8f157764a,tensorflow/tensorflow,"Do not restore previous context in ScopedActivateContext destructor. PiperOrigin-RevId: 239643428",cuda_driver.cc,"@@ -207,8 +207,11 @@ ScopedActivateContext::ScopedActivateContext(GpuContext* cuda_context) { if (FLAGS_gpuexec_cuda_sync_around_driver_calls) SynchronizeOrDie(); auto* tls = &tls_data.get(); - tls->depth++; - if (tls->id == cuda_context->id()) { + if (tls->depth++ > 0) { + CHECK(tls->id == cuda_context->id()) + << ""Trying to activate a CUDA context in the current thread which is "" + ""different than an existing instance of ScopedActivateContext.""; + if (kVerifyGpuContext) { CHECK_EQ(CurrentContext(), cuda_context->context()); } @@ -219,8 +222,6 @@ ScopedActivateContext::ScopedActivateContext(GpuContext* cuda_context) { VLOG(3) << ""ScopedActivateContext switching context from "" << tls->id << "" to "" << cuda_context->id(); - to_restore_ = (tls->depth == 1 ? nullptr : tls->context); - // Set the context and update thread local. CHECK_EQ(CUDA_SUCCESS, cuCtxSetCurrent(cuda_context->context())); tls->id = cuda_context->id(); @@ -241,15 +242,6 @@ ScopedActivateContext::~ScopedActivateContext() { tls->depth--; DCHECK_GE(tls->depth, 0); - if (to_restore_ == nullptr) { - // Leave context, tls->id, and tls->context set. - return; - } - - // Set context and update thread local. - CHECK_EQ(CUDA_SUCCESS, cuCtxSetCurrent(to_restore_->context())); - tls->id = to_restore_->id(); - tls->context = to_restore_; } namespace { @@ -964,10 +956,8 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) { } } -/* static */ port::Status GpuDriver::RecordEvent(GpuContext* context, - CUevent event, +/* static */ port::Status GpuDriver::RecordEvent(GpuContext*, CUevent event, CUstream stream) { - ScopedActivateContext activated{context}; CUresult res = cuEventRecord(event, stream); switch (res) { case CUDA_SUCCESS: @@ -986,9 +976,8 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) { } } -/* static */ port::StatusOr GpuDriver::QueryEvent(GpuContext* context, +/* static */ port::StatusOr GpuDriver::QueryEvent(GpuContext*, CUevent event) { - ScopedActivateContext activated{context}; CUresult res = cuEventQuery(event); if (res != CUDA_SUCCESS && res != CUDA_ERROR_NOT_READY) { return port::Status( @@ -1020,9 +1009,8 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) { return true; } -/* static */ bool GpuDriver::WaitStreamOnEvent(GpuContext* context, - CUstream stream, CUevent event) { - ScopedActivateContext activation(context); +/* static */ bool GpuDriver::WaitStreamOnEvent(GpuContext*, CUstream stream, + CUevent event) { CUresult res = cuStreamWaitEvent(stream, event, 0 /* = flags */); if (res != CUDA_SUCCESS) { LOG(ERROR) << ""could not wait stream on event: "" << ToString(res); ",0,train 8c933654194da1588e37d0088f701ff8f157764a,tensorflow/tensorflow,"Do not restore previous context in ScopedActivateContext destructor. PiperOrigin-RevId: 239643428",gpu_driver.h,"@@ -512,11 +512,8 @@ class ScopedActivateContext { explicit ScopedActivateContext(GpuContext* context); // Checks that the context has remained activated for the duration of the - // scope. + // scope. Does not restore the previously active context! ~ScopedActivateContext(); - - private: - GpuContext* to_restore_ = nullptr; }; } // namespace gpu ",0,train f5c2e5d968d371c0855c6d7b2cc4f050615d4bc4,tensorflow/tensorflow,"Fix issue with gradients of resource variables in cond. PiperOrigin-RevId: 192369091",control_flow_grad.py,"@@ -20,6 +20,7 @@ from __future__ import print_function from six.moves import xrange # pylint: disable=redefined-builtin +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import control_flow_ops @@ -74,6 +75,11 @@ def _SwitchGrad(op, *grad): # At this point, we have created zero_grad guarded by the right switch. # Unfortunately, we may still get None here for not trainable data types. if zero_grad is None: + # For resource variables we get None always on the other branch, so bypass + # this. + if op.inputs[0].dtype == dtypes.resource: + return merge( + [grad[op_ctxt.branch]] * 2, name=""cond_resource_grad"")[0], None return None, None return merge(grad, name=""cond_grad"")[0], None else: ",0,train f5c2e5d968d371c0855c6d7b2cc4f050615d4bc4,tensorflow/tensorflow,"Fix issue with gradients of resource variables in cond. PiperOrigin-RevId: 192369091",gradients_test.py,"@@ -44,6 +44,7 @@ from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_grad # pylint: disable=unused-import from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_grad # pylint: disable=unused-import +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_grad # pylint: disable=unused-import from tensorflow.python.ops import tensor_array_grad # pylint: disable=unused-import from tensorflow.python.ops import tensor_array_ops @@ -810,5 +811,29 @@ class OnlyRealGradientsTest(test_util.TensorFlowTestCase): gradients.gradients(y, x) +class ResourceCondTest(test_util.TensorFlowTestCase): + + def testBasic(self): + gamma = resource_variable_ops.ResourceVariable( + np.random.random((3,)), + dtype=""float32"", name=""gamma"") + + inputs = array_ops.ones(shape=(3,), dtype=""float32"") + + def TestFn(): + output = inputs + gamma + return output + + training = array_ops.placeholder_with_default(True, shape=()) + output = control_flow_ops.cond( + training, TestFn, lambda: inputs) + + loss = output + + grads = gradients.gradients( + loss, [gamma]) + self.assertTrue(None not in grads) + + if __name__ == ""__main__"": googletest.main() ",0,train 8898af469c6d00310ce9f2a7ed18e331442a60ba,tensorflow/tensorflow,"Enable test now that the underlying problem has been fixed for TPU. PiperOrigin-RevId: 217773726",raw_api_test.cc,"@@ -479,8 +479,7 @@ TEST(RawApiTest, CompileWithXlaReturnShapes) { xla_program_shape.result().layout())); } -// Disabled because of failure on TPU (b/117876141) -TEST(RawApiTest, DISABLED_DotGeneralWithLayoutTest) { +TEST(RawApiTest, DotGeneralWithLayoutTest) { auto layout = xla::LayoutUtil::MakeLayout({0, 1}); xrt::XLAAllocation p0; ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",flatbuffer_export.cc,"@@ -167,7 +167,8 @@ static StatusOr GetTFLiteType(Type type, case 32: return tflite::TensorType_INT32; case 64: - return tflite::TensorType_INT64; + return itype.isUnsigned() ? tflite::TensorType_UINT64 + : tflite::TensorType_INT64; } } else if (auto q_uniform_type = type.dyn_cast()) { ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",tf_tfl_flatbuffer_helpers.cc,"@@ -119,6 +119,8 @@ DataType ConvertIODataTypeToDataType(toco::IODataType dtype) { return DT_INT32; case toco::IODataType::INT64: return DT_INT64; + case toco::IODataType::UINT64: + return DT_UINT64; case toco::IODataType::STRING: return DT_STRING; case toco::IODataType::BOOL: ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",convert_type.cc,"@@ -57,6 +57,8 @@ mlir::Type ConvertElementType(tflite::TensorType type, mlir::Builder builder) { return mlir::ComplexType::get(builder.getF64Type()); case tflite::TensorType_INT8: return builder.getIntegerType(8); + case tflite::TensorType_UINT64: + return builder.getIntegerType(64, /*isSigned=*/false); } } @@ -86,6 +88,8 @@ tensorflow::DataType TflTypeToTfType(tflite::TensorType type) { return tensorflow::DT_STRING; case tflite::TensorType_UINT8: return tensorflow::DT_UINT8; + case tflite::TensorType_UINT64: + return tensorflow::DT_UINT64; } } ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",common.c,"@@ -203,6 +203,8 @@ const char* TfLiteTypeGetName(TfLiteType type) { return ""INT8""; case kTfLiteInt64: return ""INT64""; + case kTfLiteUInt64: + return ""UINT64""; case kTfLiteBool: return ""BOOL""; case kTfLiteComplex64: ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",common.h,"@@ -300,6 +300,7 @@ typedef enum { kTfLiteFloat16 = 10, kTfLiteFloat64 = 11, kTfLiteComplex128 = 12, + kTfLiteUInt64 = 13, } TfLiteType; // Return the name of a given type, for error reporting purposes. @@ -354,6 +355,7 @@ typedef union TfLitePtrUnion { * members are deprecated. */ int32_t* i32; int64_t* i64; + uint64_t* u64; float* f; TfLiteFloat16* f16; double* f64; ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",common_test.cc,"@@ -84,6 +84,7 @@ TEST(Types, TestTypeNames) { EXPECT_EQ(type_name(kTfLiteInt16), ""INT16""); EXPECT_EQ(type_name(kTfLiteInt32), ""INT32""); EXPECT_EQ(type_name(kTfLiteUInt8), ""UINT8""); + EXPECT_EQ(type_name(kTfLiteUInt64), ""UINT64""); EXPECT_EQ(type_name(kTfLiteInt8), ""INT8""); EXPECT_EQ(type_name(kTfLiteInt64), ""INT64""); EXPECT_EQ(type_name(kTfLiteBool), ""BOOL""); ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",flatbuffer_conversions.cc,"@@ -859,6 +859,9 @@ TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type, case TensorType_INT64: *type = kTfLiteInt64; return kTfLiteOk; + case TensorType_UINT64: + *type = kTfLiteUInt64; + return kTfLiteOk; case TensorType_STRING: *type = kTfLiteString; return kTfLiteOk; ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",util.cc,"@@ -74,6 +74,8 @@ TF_DataType GetTensorFlowDataType(TfLiteType type) { return TF_INT8; case kTfLiteInt64: return TF_INT64; + case kTfLiteUInt64: + return TF_UINT64; case kTfLiteComplex64: return TF_COMPLEX64; case kTfLiteComplex128: @@ -103,6 +105,8 @@ TfLiteType GetTensorFlowLiteType(TF_DataType type) { return kTfLiteInt8; case TF_INT64: return kTfLiteInt64; + case TF_UINT64: + return kTfLiteUInt64; case TF_COMPLEX64: return kTfLiteComplex64; case TF_COMPLEX128: ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",util_test.cc,"@@ -115,6 +115,7 @@ TEST(UtilTest, TypeConversionsFromTFLite) { EXPECT_EQ(TF_INT32, GetTensorFlowDataType(kTfLiteInt32)); EXPECT_EQ(TF_UINT8, GetTensorFlowDataType(kTfLiteUInt8)); EXPECT_EQ(TF_INT64, GetTensorFlowDataType(kTfLiteInt64)); + EXPECT_EQ(TF_UINT64, GetTensorFlowDataType(kTfLiteUInt64)); EXPECT_EQ(TF_COMPLEX64, GetTensorFlowDataType(kTfLiteComplex64)); EXPECT_EQ(TF_COMPLEX128, GetTensorFlowDataType(kTfLiteComplex128)); EXPECT_EQ(TF_STRING, GetTensorFlowDataType(kTfLiteString)); @@ -129,6 +130,7 @@ TEST(UtilTest, TypeConversionsFromTensorFlow) { EXPECT_EQ(kTfLiteInt32, GetTensorFlowLiteType(TF_INT32)); EXPECT_EQ(kTfLiteUInt8, GetTensorFlowLiteType(TF_UINT8)); EXPECT_EQ(kTfLiteInt64, GetTensorFlowLiteType(TF_INT64)); + EXPECT_EQ(kTfLiteUInt64, GetTensorFlowLiteType(TF_UINT64)); EXPECT_EQ(kTfLiteComplex64, GetTensorFlowLiteType(TF_COMPLEX64)); EXPECT_EQ(kTfLiteComplex128, GetTensorFlowLiteType(TF_COMPLEX128)); EXPECT_EQ(kTfLiteString, GetTensorFlowLiteType(TF_STRING)); ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",enum_mapping.h,"@@ -74,6 +74,8 @@ inline TensorType TfLiteTypeToSchemaType(TfLiteType type) { return TensorType_INT8; case kTfLiteInt64: return TensorType_INT64; + case kTfLiteUInt64: + return TensorType_UINT64; case kTfLiteString: return TensorType_STRING; case kTfLiteBool: ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",kernel_util.cc,"@@ -460,6 +460,9 @@ int TfLiteTypeGetSize(TfLiteType type) { case kTfLiteInt64: TF_LITE_ASSERT_EQ(sizeof(int64_t), 8); return 8; + case kTfLiteUInt64: + TF_LITE_ASSERT_EQ(sizeof(uint64_t), 8); + return 8; case kTfLiteFloat64: TF_LITE_ASSERT_EQ(sizeof(double), 8); return 8; ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",memory_helpers.cc,"@@ -66,6 +66,9 @@ TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size) { case kTfLiteInt64: *size = sizeof(int64_t); break; + case kTfLiteUInt64: + *size = sizeof(uint64_t); + break; case kTfLiteBool: *size = sizeof(bool); break; ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",memory_helpers_test.cc,"@@ -139,6 +139,10 @@ TF_LITE_MICRO_TEST(TestTypeSizeOf) { tflite::TfLiteTypeSizeOf(kTfLiteInt64, &size)); TF_LITE_MICRO_EXPECT_EQ(sizeof(int64_t), size); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, + tflite::TfLiteTypeSizeOf(kTfLiteUInt64, &size)); + TF_LITE_MICRO_EXPECT_EQ(sizeof(uint64_t), size); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, tflite::TfLiteTypeSizeOf(kTfLiteBool, &size)); TF_LITE_MICRO_EXPECT_EQ(sizeof(bool), size); ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",micro_interpreter.cc,"@@ -192,6 +192,9 @@ void MicroInterpreter::CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr) { case TfLiteType::kTfLiteInt64: CorrectTensorDataEndianness(tensorCorr->data.i64, tensorSize); break; + case TfLiteType::kTfLiteUInt64: + CorrectTensorDataEndianness(tensorCorr->data.u64, tensorSize); + break; case TfLiteType::kTfLiteInt32: CorrectTensorDataEndianness(tensorCorr->data.i32, tensorSize); break; ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",optional_debug_tools.cc,"@@ -50,6 +50,8 @@ const char* TensorTypeName(TfLiteType type) { return ""kTfLiteInt8""; case kTfLiteInt64: return ""kTfLiteInt64""; + case kTfLiteUInt64: + return ""kTfLiteUInt64""; case kTfLiteString: return ""kTfLiteString""; case kTfLiteBool: ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",portable_type_to_tflitetype.h,"@@ -69,6 +69,7 @@ MATCH_TYPE_AND_TFLITE_TYPE(std::complex, kTfLiteComplex64); MATCH_TYPE_AND_TFLITE_TYPE(std::complex, kTfLiteComplex128); MATCH_TYPE_AND_TFLITE_TYPE(TfLiteFloat16, kTfLiteFloat16); MATCH_TYPE_AND_TFLITE_TYPE(double, kTfLiteFloat64); +MATCH_TYPE_AND_TFLITE_TYPE(uint64_t, kTfLiteUInt64); } // namespace tflite #endif // TENSORFLOW_LITE_PORTABLE_TYPE_TO_TFLITETYPE_H_ ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",numpy.cc,"@@ -50,6 +50,8 @@ int TfLiteTypeToPyArrayType(TfLiteType tf_lite_type) { return NPY_INT8; case kTfLiteInt64: return NPY_INT64; + case kTfLiteUInt64: + return NPY_UINT64; case kTfLiteString: return NPY_STRING; case kTfLiteBool: ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",calibration_wrapper.cc,"@@ -79,6 +79,8 @@ inline TensorType TfLiteTypeToSchemaType(TfLiteType type) { return TensorType_INT8; case kTfLiteInt64: return TensorType_INT64; + case kTfLiteUInt64: + return TensorType_UINT64; case kTfLiteString: return TensorType_STRING; case kTfLiteBool: ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",util.py,"@@ -56,6 +56,7 @@ _MAP_TF_TO_TFLITE_TYPES = { dtypes.int32: _types_pb2.INT32, dtypes.uint8: _types_pb2.QUANTIZED_UINT8, dtypes.int64: _types_pb2.INT64, + dtypes.uint64: _types_pb2.UINT64, dtypes.string: _types_pb2.STRING, dtypes.bool: _types_pb2.BOOL, dtypes.int16: _types_pb2.QUANTIZED_INT16, ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",schema_generated.h,"@@ -395,11 +395,12 @@ enum TensorType { TensorType_INT8 = 9, TensorType_FLOAT64 = 10, TensorType_COMPLEX128 = 11, + TensorType_UINT64 = 12, TensorType_MIN = TensorType_FLOAT32, - TensorType_MAX = TensorType_COMPLEX128 + TensorType_MAX = TensorType_UINT64 }; -inline const TensorType (&EnumValuesTensorType())[12] { +inline const TensorType (&EnumValuesTensorType())[13] { static const TensorType values[] = { TensorType_FLOAT32, TensorType_FLOAT16, @@ -412,13 +413,14 @@ inline const TensorType (&EnumValuesTensorType())[12] { TensorType_COMPLEX64, TensorType_INT8, TensorType_FLOAT64, - TensorType_COMPLEX128 + TensorType_COMPLEX128, + TensorType_UINT64 }; return values; } inline const char * const *EnumNamesTensorType() { - static const char * const names[13] = { + static const char * const names[14] = { ""FLOAT32"", ""FLOAT16"", ""INT32"", @@ -431,13 +433,14 @@ inline const char * const *EnumNamesTensorType() { ""INT8"", ""FLOAT64"", ""COMPLEX128"", + ""UINT64"", nullptr }; return names; } inline const char *EnumNameTensorType(TensorType e) { - if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_COMPLEX128)) return """"; + if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_UINT64)) return """"; const size_t index = static_cast(e); return EnumNamesTensorType()[index]; } ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",split.h,"@@ -52,6 +52,7 @@ template <> inline std::vector Split(const string& s, const string& delimiter) { std::vector fields; for (const auto& p : SplitToPos(s, delimiter)) { + // NOLINTNEXTLINE(runtime/deprecated_fn) fields.push_back(strtol(s.data() + p.first, nullptr, 10)); } return fields; @@ -61,11 +62,22 @@ template <> inline std::vector Split(const string& s, const string& delimiter) { std::vector fields; for (const auto& p : SplitToPos(s, delimiter)) { + // NOLINTNEXTLINE(runtime/deprecated_fn) fields.push_back(strtoll(s.data() + p.first, nullptr, 10)); } return fields; } +template <> +inline std::vector Split(const string& s, const string& delimiter) { + std::vector fields; + for (const auto& p : SplitToPos(s, delimiter)) { + // NOLINTNEXTLINE(runtime/deprecated_fn) + fields.push_back(strtoull(s.data() + p.first, nullptr, 10)); + } + return fields; +} + template <> inline std::vector Split(const string& s, const string& delimiter) { std::vector fields; @@ -79,6 +91,7 @@ template <> inline std::vector Split(const string& s, const string& delimiter) { std::vector fields; for (const auto& p : SplitToPos(s, delimiter)) { + // NOLINTNEXTLINE(runtime/deprecated_fn) fields.push_back(strtol(s.data() + p.first, nullptr, 10)); } return fields; @@ -88,6 +101,7 @@ template <> inline std::vector Split(const string& s, const string& delimiter) { std::vector fields; for (const auto& p : SplitToPos(s, delimiter)) { + // NOLINTNEXTLINE(runtime/deprecated_fn) fields.push_back(strtol(s.data() + p.first, nullptr, 10)); } return fields; @@ -97,6 +111,7 @@ template <> inline std::vector Split(const string& s, const string& delimiter) { std::vector fields; for (const auto& p : SplitToPos(s, delimiter)) { + // NOLINTNEXTLINE(runtime/deprecated_fn) fields.push_back(strtol(s.data() + p.first, nullptr, 10)); } return fields; @@ -106,8 +121,9 @@ template <> inline std::vector Split(const string& s, const string& delimiter) { std::vector fields; for (const auto& p : SplitToPos(s, delimiter)) { - fields.push_back( - static_cast(strtol(s.data() + p.first, nullptr, 10))); + // NOLINTNEXTLINE(runtime/deprecated_fn) + bool val = static_cast(strtol(s.data() + p.first, nullptr, 10)); + fields.push_back(val); } return fields; } ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",tflite_driver.cc,"@@ -325,6 +325,8 @@ bool TfLiteDriver::DataExpectation::Check(bool verbose, return TypedCheck(verbose, tensor); case kTfLiteInt64: return TypedCheck(verbose, tensor); + case kTfLiteUInt64: + return TypedCheck(verbose, tensor); case kTfLiteUInt8: return TypedCheck(verbose, tensor); case kTfLiteInt8: @@ -477,6 +479,12 @@ void TfLiteDriver::SetInput(int id, const string& csv_values) { SetTensorData(values, tensor->data.raw); break; } + case kTfLiteUInt64: { + const auto& values = testing::Split(csv_values, "",""); + if (!CheckSizes(tensor->bytes, values.size())) return; + SetTensorData(values, tensor->data.raw); + break; + } case kTfLiteUInt8: { const auto& values = testing::Split(csv_values, "",""); if (!CheckSizes(tensor->bytes, values.size())) return; @@ -554,6 +562,9 @@ void TfLiteDriver::SetExpectation(int id, const string& csv_values) { case kTfLiteInt64: expected_output_[id]->SetData(csv_values); break; + case kTfLiteUInt64: + expected_output_[id]->SetData(csv_values); + break; case kTfLiteUInt8: expected_output_[id]->SetData(csv_values); break; @@ -653,6 +664,8 @@ string TfLiteDriver::ReadOutput(int id) { return JoinDefault(tensor->data.i32, num_elements, "",""); case kTfLiteInt64: return JoinDefault(tensor->data.i64, num_elements, "",""); + case kTfLiteUInt64: + return JoinDefault(tensor->data.u64, num_elements, "",""); case kTfLiteUInt8: return Join(tensor->data.uint8, num_elements, "",""); case kTfLiteInt8: ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",operator.cc,"@@ -49,6 +49,7 @@ namespace tflite { {ArrayDataType::kInt16, ::tflite::TensorType_INT16}, {ArrayDataType::kInt32, ::tflite::TensorType_INT32}, {ArrayDataType::kInt64, ::tflite::TensorType_INT64}, + {ArrayDataType::kUint64, ::tflite::TensorType_UINT64}, {ArrayDataType::kString, ::tflite::TensorType_STRING}, {ArrayDataType::kComplex64, ::tflite::TensorType_COMPLEX64}, {ArrayDataType::kComplex128, ::tflite::TensorType_COMPLEX128}, ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",tooling_util.cc,"@@ -2309,6 +2309,8 @@ ArrayDataType ConvertIODataTypeToArrayDataType(IODataType type) { return ArrayDataType::kInt32; case INT64: return ArrayDataType::kInt64; + case UINT64: + return ArrayDataType::kUint64; case BOOL: return ArrayDataType::kBool; case STRING: ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",c_api_types.h,"@@ -300,6 +300,7 @@ typedef enum { kTfLiteFloat16 = 10, kTfLiteFloat64 = 11, kTfLiteComplex128 = 12, + kTfLiteUInt64 = 13, } TfLiteType; // Return the name of a given type, for error reporting purposes. @@ -354,6 +355,7 @@ typedef union TfLitePtrUnion { * members are deprecated. */ int32_t* i32; int64_t* i64; + uint64_t* u64; float* f; TfLiteFloat16* f16; double* f64; ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",verifier.cc,"@@ -418,6 +418,9 @@ bool VerifyNumericTensorBuffer(const Tensor& tensor, const Buffer& buffer, case TensorType_INT64: bytes_required *= sizeof(int64_t); break; + case TensorType_UINT64: + bytes_required *= sizeof(uint64_t); + break; case TensorType_BOOL: bytes_required *= sizeof(bool); break; ",0,train f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite Even though we do not support uint64 op kernels on mobile, it is inevitable to support uint64 tensors in order to enable TF uint64 ops via flex delegate. This CL enables the uint64 tensor type in MLIR converter only. PiperOrigin-RevId: 342939673 Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",util.cc,"@@ -96,6 +96,9 @@ TfLiteStatus GetSizeOfType(TfLiteContext* context, const TfLiteType type, case kTfLiteInt64: *bytes = sizeof(int64_t); break; + case kTfLiteUInt64: + *bytes = sizeof(uint64_t); + break; case kTfLiteBool: *bytes = sizeof(bool); break; ",0,train d969cd8ff9a337503963c8f4a02f56b7d776171e,tensorflow/tensorflow,"Fix flakiness in the test case. PiperOrigin-RevId: 226370519",unified_gru_test.py,"@@ -176,8 +176,8 @@ class UnifiedGRUTest(keras_parameterized.TestCase): cudnn_model.fit(x_train, y_train) y_4 = cudnn_model.predict(x_train) - self.assertAllClose(y_1, y_3) - self.assertAllClose(y_2, y_4) + self.assertAllClose(y_1, y_3, rtol=1e-5, atol=1e-5) + self.assertAllClose(y_2, y_4, rtol=1e-5, atol=1e-5) @parameterized.named_parameters( # test_name, use_bias, bias_initializer, activation ",0,train d969cd8ff9a337503963c8f4a02f56b7d776171e,tensorflow/tensorflow,"Fix flakiness in the test case. PiperOrigin-RevId: 226370519",unified_lstm_test.py,"@@ -332,8 +332,8 @@ class UnifiedLSTMTest(keras_parameterized.TestCase): cudnn_model.fit(x_train, y_train) y_4 = cudnn_model.predict(x_train) - self.assertAllClose(y_1, y_3) - self.assertAllClose(y_2, y_4) + self.assertAllClose(y_1, y_3, rtol=1e-5, atol=1e-5) + self.assertAllClose(y_2, y_4, rtol=1e-5, atol=1e-5) @parameterized.named_parameters(('v0', 0), ('v1', 1), ('v2', 2)) def test_implementation_mode_LSTM(self, implementation_mode): ",0,train 08963dfe9c24c3fdb28b0a5eabaac93f615d3509,tensorflow/tensorflow,"[tf.data] Changing `tf.data.Dataset.reduce` user-defined function device placement logic to match TensorFlow. Prior to this change, `tf.data.Dataset.reduce` would -- like the rest of tf.data operations -- default the placement of ops in its user-defined function to CPU. After this change, ops without explicit device placement will be placed on GPU (if possible). The rationale behind this change is that, unlike other tf.data transformations, `tf.data.Dataset.reduce` computation is not expected to be executed within a training step where the accelerator (if present) would be expected to be used for model computation. PiperOrigin-RevId: 306760623 Change-Id: Ia2602b7cde7503e9d9519b44b7f6b7621bedb547",iterator_ops.cc,"@@ -606,6 +606,7 @@ class ReduceDatasetOp : public HybridAsyncOpKernel { FunctionMetadata::Params params; OP_REQUIRES_OK(ctx, ctx->GetAttr(""use_inter_op_parallelism"", ¶ms.use_inter_op_parallelism)); + params.use_default_device = false; OP_REQUIRES_OK(ctx, FunctionMetadata::Create(ctx, ""f"", params, &func_metadata_)); OP_REQUIRES_OK(ctx, ctx->GetAttr(kOutputTypes, &output_types_)); ",0,test 2b7fb42e3b7112fc712edf05f29bbfd865a5515a,tensorflow/tensorflow,"Provides an environment variable via TF_XLA_FLAGS to turn on the MLIR bridge. PiperOrigin-RevId: 316116765 Change-Id: I005c5b6712a4e7cdd72f4302caae93f58c5f840e",flags.cc,"@@ -33,6 +33,7 @@ MarkForCompilationPassFlags* mark_for_compilation_flags; XlaDeviceFlags* device_flags; XlaOpsCommonFlags* ops_flags; IntroduceFloatingPointJitterPassFlags* jitter_flags; +MlirCommonFlags* mlir_flags; std::vector* flag_list; absl::once_flag flags_init; @@ -166,6 +167,9 @@ void AllocateAndParseFlags() { jitter_flags = new IntroduceFloatingPointJitterPassFlags; jitter_flags->jitter_amount = 1e-5; + mlir_flags = new MlirCommonFlags; + mlir_flags->tf_mlir_enable_mlir_bridge = false; + auto setter_for_jitter_tensor_names = [](string sequence) { jitter_flags->tensor_names = absl::StrSplit(sequence, ','); return true; @@ -211,7 +215,11 @@ void AllocateAndParseFlags() { Flag(""tf_introduce_floating_point_jitter_amount"", &jitter_flags->jitter_amount, ""The amount of jitter to introduce. This amount is added to each "" - ""element in the tensors named in `tensor_names."")}); + ""element in the tensors named in `tensor_names.""), + + Flag(""tf_mlir_enable_mlir_bridge"", + &mlir_flags->tf_mlir_enable_mlir_bridge, + ""Enables experimental MLIR-Based TensorFlow Compiler Bridge."")}); AppendMarkForCompilationPassFlagsInternal(flag_list); xla::ParseFlagsFromEnvAndDieIfUnknown(""TF_XLA_FLAGS"", *flag_list); @@ -250,6 +258,11 @@ GetIntroduceFloatingPointJitterPassFlags() { return *jitter_flags; } +MlirCommonFlags* GetMlirCommonFlags() { + absl::call_once(flags_init, &AllocateAndParseFlags); + return mlir_flags; +} + void AppendMarkForCompilationPassFlags(std::vector* flag_list) { absl::call_once(flags_init, &AllocateAndParseFlags); AppendMarkForCompilationPassFlagsInternal(flag_list); ",0,test 2b7fb42e3b7112fc712edf05f29bbfd865a5515a,tensorflow/tensorflow,"Provides an environment variable via TF_XLA_FLAGS to turn on the MLIR bridge. PiperOrigin-RevId: 316116765 Change-Id: I005c5b6712a4e7cdd72f4302caae93f58c5f840e",flags.h,"@@ -133,6 +133,11 @@ struct IntroduceFloatingPointJitterPassFlags { std::vector tensor_names; }; +// Flags for common MLIR configurations. +struct MlirCommonFlags { + bool tf_mlir_enable_mlir_bridge; +}; + // Return a pointer to the DumpGraphFlags struct; // repeated calls return the same pointer. // This should be called only after Flags::Parse() has returned. @@ -148,6 +153,8 @@ const XlaOpsCommonFlags& GetXlaOpsCommonFlags(); const IntroduceFloatingPointJitterPassFlags& GetIntroduceFloatingPointJitterPassFlags(); +MlirCommonFlags* GetMlirCommonFlags(); + // Appends the flag definitions associated with // MarkForCompilationPassFlags/DumpGraphFlags to `flag_list`. // ",0,test 2b7fb42e3b7112fc712edf05f29bbfd865a5515a,tensorflow/tensorflow,"Provides an environment variable via TF_XLA_FLAGS to turn on the MLIR bridge. PiperOrigin-RevId: 316116765 Change-Id: I005c5b6712a4e7cdd72f4302caae93f58c5f840e",context.py,"@@ -451,7 +451,6 @@ class Context(object): self._inter_op_parallelism_threads = None self._soft_device_placement = None self._log_device_placement = None - self._enable_mlir_bridge = None self._enable_mlir_graph_optimization = None self._optimizer_experimental_options = {} @@ -927,8 +926,7 @@ class Context(object): if self._log_device_placement is not None: config.log_device_placement = self._log_device_placement - if self._enable_mlir_bridge is not None: - config.experimental.enable_mlir_bridge = self._enable_mlir_bridge + config.experimental.enable_mlir_bridge = pywrap_tfe.TF_IsMlirBridgeEnabled() if self._enable_mlir_graph_optimization is not None: config.experimental.enable_mlir_graph_optimization = ( self._enable_mlir_graph_optimization) @@ -1466,7 +1464,7 @@ class Context(object): @property def enable_mlir_bridge(self): - return self._enable_mlir_bridge + return pywrap_tfe.TF_IsMlirBridgeEnabled() @property def enable_mlir_graph_optimization(self): @@ -1474,7 +1472,7 @@ class Context(object): @enable_mlir_bridge.setter def enable_mlir_bridge(self, enabled): - self._enable_mlir_bridge = enabled + pywrap_tfe.TF_EnableMlirBridge(enabled) self._thread_local_data.function_call_options = None @enable_mlir_graph_optimization.setter ",0,test 2b7fb42e3b7112fc712edf05f29bbfd865a5515a,tensorflow/tensorflow,"Provides an environment variable via TF_XLA_FLAGS to turn on the MLIR bridge. PiperOrigin-RevId: 316116765 Change-Id: I005c5b6712a4e7cdd72f4302caae93f58c5f840e",tfe_wrapper.cc,"@@ -364,6 +364,14 @@ PYBIND11_MODULE(_pywrap_tfe, m) { m.def(""TF_SetXlaMinClusterSize"", &TF_SetXlaMinClusterSize); m.def(""TF_IsXlaEnabled"", [] { return tensorflow::IsXlaEnabled(); }); + // MLIR Logic + m.def(""TF_IsMlirBridgeEnabled"", [] { + return tensorflow::GetMlirCommonFlags()->tf_mlir_enable_mlir_bridge; + }); + m.def(""TF_EnableMlirBridge"", [](bool enabled) { + tensorflow::GetMlirCommonFlags()->tf_mlir_enable_mlir_bridge = enabled; + }); + // // TFE_Context Logic m.def( ""TFE_NewContext"", ",0,test 4c0c199222fdcffbfb548aefa0ea82c853aea609,tensorflow/tensorflow,"Add support for testing per channel quantization kernels. PiperOrigin-RevId: 230947961",test_util.cc,"@@ -47,7 +47,12 @@ std::vector>> ArrayComplex64Near( } int SingleOpModel::AddInput(const TensorData& t, bool is_variable) { - int id = AddTensor(t, {}, is_variable); + int id = 0; + if (t.per_channel_quantization) { + id = AddTensorPerChannelQuant(t); + } else { + id = AddTensor(t, {}, is_variable); + } inputs_.push_back(id); return id; } ",0,train 4c0c199222fdcffbfb548aefa0ea82c853aea609,tensorflow/tensorflow,"Add support for testing per channel quantization kernels. PiperOrigin-RevId: 230947961",test_util.h,"@@ -21,13 +21,14 @@ limitations under the License. #include #include +#include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/lite/interpreter.h"" #include ""tensorflow/lite/kernels/internal/tensor_utils.h"" #include ""tensorflow/lite/kernels/register.h"" #include ""tensorflow/lite/model.h"" #include ""tensorflow/lite/string_util.h"" #include ""tensorflow/lite/testing/util.h"" -#include ""tensorflow/core/platform/logging.h"" +#include ""tensorflow/lite/tools/optimize/quantization_utils.h"" namespace tflite { @@ -82,7 +83,7 @@ inline std::vector Dequantize(const std::vector& data, float scale, // A helper struct to construct test tensors. This is particularly useful for // quantized tensor which must have their scale and zero_point defined before // the actual data is known. This mimics what happens in practice: quantization -// parameters are calculated during training. +// parameters are calculated during training or post training.. struct TensorData { TensorType type; std::vector shape; @@ -90,6 +91,10 @@ struct TensorData { float max; float scale; int32_t zero_point; + bool per_channel_quantization; + std::vector per_channel_quantization_scales; + std::vector per_channel_quantization_offsets; + int32_t channel_index; }; class SingleOpResolver : public OpResolver { @@ -172,6 +177,46 @@ class SingleOpModel { PopulateTensor(index, /*offset=*/0, q.data(), q.data() + q.size()); } + // Quantize and populate data for filter with per channel quantization. + void PerChannelSymmetricQuantizeAndPopulate( + int index, const std::vector& input_data) { + TfLiteTensor* t = interpreter_->tensor(index); + auto* params = + reinterpret_cast(t->quantization.params); + const int channel_index = params->quantized_dimension; + + std::vector shape(t->dims->size); + for (int i = 0; i < shape.size(); ++i) { + shape[i] = t->dims->data[i]; + } + const int32_t num_inputs = input_data.size(); + const int32_t num_channel = shape[channel_index]; + std::vector quantized_output(num_inputs); + std::vector scales_inv(num_channel); + for (int i = 0; i < num_channel; ++i) { + scales_inv[i] = 1.0f / params->scale->data[i]; + } + optimize::utils::SymmetricPerChannelQuantizeValues( + input_data.data(), scales_inv, shape, channel_index, &quantized_output); + + PopulateTensor(index, /*offset=*/0, quantized_output.data(), + quantized_output.data() + quantized_output.size()); + } + + // Quantize and populate data for bias with per channel quantization. + void PerChannelQuantizeBias(int index, const std::vector& input_data) { + const int32_t num_inputs = input_data.size(); + std::vector quantized_output(num_inputs); + TfLiteTensor* t = interpreter_->tensor(index); + auto* params = + reinterpret_cast(t->quantization.params); + for (int i = 0; i < num_inputs; ++i) { + quantized_output[i] = input_data[i] * params->scale->data[i]; + } + PopulateTensor(index, /*offset=*/0, quantized_output.data(), + quantized_output.data() + quantized_output.size()); + } + const std::vector& GetShape(int id) { return tensor_data_.at(id).shape; } float GetScale(int id) { return tensor_data_.at(id).scale; } @@ -292,6 +337,24 @@ class SingleOpModel { return {scale, zero_point}; } + int AddTensorPerChannelQuant(TensorData t) { + const int id = tensors_.size(); + flatbuffers::Offset q_params = 0; + q_params = CreateQuantizationParameters( + builder_, /*min=*/0, /*max=*/0, + /*scale=*/ + builder_.CreateVector(t.per_channel_quantization_scales), + /*zero point=*/ + builder_.CreateVector(t.per_channel_quantization_offsets), + QuantizationDetails_NONE, 0, t.channel_index); + tensors_.push_back( + CreateTensor(builder_, builder_.CreateVector(t.shape), t.type, + /*buffer=*/0, + /*name=*/0, q_params, /*is_variable=*/false)); + tensor_data_[id] = t; + return id; + } + template int AddTensor(TensorData t, std::initializer_list data, bool is_variable = false) { ",0,train 17fe6574eb7929f92d081a754144747527af2a24,tensorflow/tensorflow,"Add warning note to Variable.initialized_value documentation. Change: 140374197",variables.py,"@@ -518,6 +518,10 @@ class Variable(object): You should use this instead of the variable itself to initialize another variable with a value that depends on the value of this variable. + Beware of using initialized_value except during initialization: + initialized_value causes the Variable's initializer op to be run, so running + this op resets the variable to the initial value. + ```python # Initialize 'v' with a random tensor. v = tf.Variable(tf.truncated_normal([10, 40])) ",0,test 884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error. Some of these double promotion is not obvious as va_args implicitly promotes float to double. PiperOrigin-RevId: 290881894 Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",check_macros.h,"@@ -35,7 +35,7 @@ struct ToString { template <> struct ToString { static void Run(float value, char* buf) { - snprintf(buf, kValueBufSize, ""%.9g"", value); + snprintf(buf, kValueBufSize, ""%.9g"", static_cast(value)); } }; ",0,train 884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error. Some of these double promotion is not obvious as va_args implicitly promotes float to double. PiperOrigin-RevId: 290881894 Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",quantization_util.cc,"@@ -183,11 +183,11 @@ double DoubleFromFractionAndShift(int64_t fraction, int shift) { // Detect NaNs and infinities. if (shift == std::numeric_limits::max()) { if (fraction == 0) { - return NAN; + return std::numeric_limits::quiet_NaN(); } else if (fraction > 0) { - return INFINITY; + return std::numeric_limits::infinity(); } else { - return -INFINITY; + return -std::numeric_limits::infinity(); } } @@ -229,7 +229,7 @@ double IntegerDoubleMultiply(double a, double b) { // Detect NaNs and infinities. if (a_shift == std::numeric_limits::max() || (b_shift == std::numeric_limits::max())) { - return NAN; + return std::numeric_limits::quiet_NaN(); } const int result_shift = a_shift + b_shift + 1; const int64_t result_fraction = (a_fraction * b_fraction) >> 32; @@ -379,7 +379,7 @@ bool CheckedLog2(const float x, int* log2_result) { const float x_log2_fracpart = x_log2 - x_log2_rounded; *log2_result = static_cast(x_log2_rounded); - return std::abs(x_log2_fracpart) < 1e-3; + return std::abs(x_log2_fracpart) < 1e-3f; } void QuantizeMultiplierArray(const double* effective_scales, size_t size, ",0,train 884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error. Some of these double promotion is not obvious as va_args implicitly promotes float to double. PiperOrigin-RevId: 290881894 Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",quantize.h,"@@ -36,7 +36,9 @@ inline void AffineQuantize(const tflite::QuantizationParams& op_params, for (int i = 0; i < flat_size; i++) { const float val = input_data[i]; - int32 unclamped = static_cast(TfLiteRound(val / scale)) + zero_point; + int32 unclamped = + static_cast(TfLiteRound(val / static_cast(scale))) + + zero_point; int32 clamped = std::min(std::max(unclamped, min_val), max_val); output_data[i] = clamped; } ",0,train 884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error. Some of these double promotion is not obvious as va_args implicitly promotes float to double. PiperOrigin-RevId: 290881894 Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",softmax.h,"@@ -43,16 +43,20 @@ inline void Softmax(const SoftmaxParams& params, max = std::max(max, input_data[i * depth + c]); } + // TODO(b/148114827): Improve this code. // Compute sum. float sum = 0.f; for (int c = 0; c < depth; ++c) { - sum += std::exp((input_data[i * depth + c] - max) * params.beta); + sum += std::exp(static_cast(input_data[i * depth + c] - max) * + params.beta); } // Compute result. for (int c = 0; c < depth; ++c) { output_data[i * depth + c] = - std::exp((input_data[i * depth + c] - max) * params.beta) / sum; + std::exp(static_cast(input_data[i * depth + c] - max) * + params.beta) / + static_cast(sum); } } } ",0,train 884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error. Some of these double promotion is not obvious as va_args implicitly promotes float to double. PiperOrigin-RevId: 290881894 Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",kernel_util.cc,"@@ -118,11 +118,12 @@ TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context, const TfLiteTensor* bias, TfLiteTensor* output, double* multiplier) { - const double input_product_scale = input->params.scale * filter->params.scale; + const double input_product_scale = static_cast(input->params.scale) * + static_cast(filter->params.scale); // TODO(ahentz): The following conditions must be guaranteed by the training // pipeline. if (bias) { - const double bias_scale = bias->params.scale; + const double bias_scale = static_cast(bias->params.scale); TF_LITE_ENSURE(context, std::abs(input_product_scale - bias_scale) <= 1e-6 * std::min(input_product_scale, bias_scale)); @@ -136,9 +137,10 @@ TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context, const TfLiteTensor* filter, TfLiteTensor* output, double* multiplier) { - const double input_product_scale = input->params.scale * filter->params.scale; + const double input_product_scale = static_cast(input->params.scale) * + static_cast(filter->params.scale); TF_LITE_ENSURE(context, input_product_scale >= 0); - *multiplier = input_product_scale / output->params.scale; + *multiplier = input_product_scale / static_cast(output->params.scale); return kTfLiteOk; } ",0,train 884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error. Some of these double promotion is not obvious as va_args implicitly promotes float to double. PiperOrigin-RevId: 290881894 Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",output_handler.cc,"@@ -18,5 +18,7 @@ limitations under the License. void HandleOutput(tflite::ErrorReporter* error_reporter, float x_value, float y_value) { // Log the current X and Y values - error_reporter->Report(""x_value: %f, y_value: %f\n"", x_value, y_value); + error_reporter->Report(""x_value: %f, y_value: %f\n"", + static_cast(x_value), + static_cast(y_value)); } ",0,train 884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error. Some of these double promotion is not obvious as va_args implicitly promotes float to double. PiperOrigin-RevId: 290881894 Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",add.cc,"@@ -77,14 +77,15 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params, data->output_offset = output->params.zero_point; data->left_shift = 20; const double twice_max_input_scale = - 2 * std::max(input1->params.scale, input2->params.scale); + 2 * static_cast( + std::max(input1->params.scale, input2->params.scale)); const double real_input1_multiplier = - input1->params.scale / twice_max_input_scale; + static_cast(input1->params.scale) / twice_max_input_scale; const double real_input2_multiplier = - input2->params.scale / twice_max_input_scale; + static_cast(input2->params.scale) / twice_max_input_scale; const double real_output_multiplier = twice_max_input_scale / - ((1 << data->left_shift) * output->params.scale); + ((1 << data->left_shift) * static_cast(output->params.scale)); QuantizeMultiplierSmallerThanOneExp( real_input1_multiplier, &data->input1_multiplier, &data->input1_shift); ",0,train 884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error. Some of these double promotion is not obvious as va_args implicitly promotes float to double. PiperOrigin-RevId: 290881894 Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",comparisons.cc,"@@ -43,12 +43,14 @@ constexpr int kOutputTensor = 0; \ int32 input1_multiplier; \ int input1_shift; \ - QuantizeMultiplierSmallerThanOneExp(input1->params.scale, \ - &input1_multiplier, &input1_shift); \ + QuantizeMultiplierSmallerThanOneExp( \ + static_cast(input1->params.scale), &input1_multiplier, \ + &input1_shift); \ int32 input2_multiplier; \ int input2_shift; \ - QuantizeMultiplierSmallerThanOneExp(input2->params.scale, \ - &input2_multiplier, &input2_shift); \ + QuantizeMultiplierSmallerThanOneExp( \ + static_cast(input2->params.scale), &input2_multiplier, \ + &input2_shift); \ \ ComparisonParams op_params; \ op_params.left_shift = left_shift; \ ",0,train 884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error. Some of these double promotion is not obvious as va_args implicitly promotes float to double. PiperOrigin-RevId: 290881894 Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",dequantize.cc,"@@ -46,7 +46,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { tflite::DequantizationParams op_params; op_params.zero_point = input->params.zero_point; - op_params.scale = input->params.scale; + op_params.scale = static_cast(input->params.scale); switch (input->type) { case kTfLiteUInt8: reference_ops::Dequantize( ",0,train 884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error. Some of these double promotion is not obvious as va_args implicitly promotes float to double. PiperOrigin-RevId: 290881894 Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",mul.cc,"@@ -55,8 +55,9 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, &data->output_activation_max)); if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { - double real_multiplier = - input1->params.scale * input2->params.scale / output->params.scale; + double real_multiplier = static_cast(input1->params.scale) * + static_cast(input2->params.scale) / + static_cast(output->params.scale); QuantizeMultiplier(real_multiplier, &data->output_multiplier, &data->output_shift); } ",0,train 884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error. Some of these double promotion is not obvious as va_args implicitly promotes float to double. PiperOrigin-RevId: 290881894 Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",pad.cc,"@@ -152,8 +152,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // same quantized range as the input and output tensors. TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point, op_context.constant_values->params.zero_point); - TF_LITE_ENSURE_EQ(context, op_context.output->params.scale, - op_context.constant_values->params.scale); + TF_LITE_ENSURE_EQ( + context, static_cast(op_context.output->params.scale), + static_cast(op_context.constant_values->params.scale)); pad_value = *GetTensorData(op_context.constant_values); } if (op_context.resizing_category == ResizingCategory::kImageStyle) { ",0,train 884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error. Some of these double promotion is not obvious as va_args implicitly promotes float to double. PiperOrigin-RevId: 290881894 Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",prelu.cc,"@@ -53,7 +53,7 @@ inline void BroadcastPrelu4DSlowFloat( auto in2_idx = SubscriptToIndex(desc2, b, y, x, c); auto in1_val = input1_data[in1_idx]; auto in2_val = input2_data[in2_idx]; - output_data[out_idx] = in1_val >= 0.0 ? in1_val : in1_val * in2_val; + output_data[out_idx] = in1_val >= 0.0f ? in1_val : in1_val * in2_val; } } } @@ -67,8 +67,9 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { int32_t output_multiplier = 0; int output_shift = 0; if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) { - double real_multiplier = - input->params.scale * alpha->params.scale / output->params.scale; + double real_multiplier = static_cast(input->params.scale) * + static_cast(alpha->params.scale) / + static_cast(output->params.scale); QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier, &output_shift); } ",0,train 884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error. Some of these double promotion is not obvious as va_args implicitly promotes float to double. PiperOrigin-RevId: 290881894 Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",quantize.cc,"@@ -60,7 +60,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { tflite::QuantizationParams op_params; op_params.zero_point = output->params.zero_point; - op_params.scale = output->params.scale; + op_params.scale = static_cast(output->params.scale); switch (output->type) { case kTfLiteInt8: reference_ops::AffineQuantize( ",0,train 884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error. Some of these double promotion is not obvious as va_args implicitly promotes float to double. PiperOrigin-RevId: 290881894 Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",softmax.cc,"@@ -53,7 +53,8 @@ TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context, static const int kScaledDiffIntegerBits = 5; tflite::PreprocessSoftmaxScaling( - params->beta, input->params.scale, kScaledDiffIntegerBits, + static_cast(params->beta), + static_cast(input->params.scale), kScaledDiffIntegerBits, &data->input_multiplier, &data->input_left_shift); data->diff_min = -1.0 * tflite::CalculateInputRadius( kScaledDiffIntegerBits, data->input_left_shift); @@ -143,7 +144,7 @@ void Softmax2DQuantized(const TfLiteTensor* input, TfLiteTensor* output, void Softmax4DFloat(const TfLiteTensor* input, TfLiteTensor* output, TfLiteSoftmaxParams* params) { SoftmaxParams op_params; - op_params.beta = params->beta; + op_params.beta = static_cast(params->beta); tflite::reference_ops::Softmax( op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(output), GetTensorData(output)); ",0,train 884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error. Some of these double promotion is not obvious as va_args implicitly promotes float to double. PiperOrigin-RevId: 290881894 Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",svdf.cc,"@@ -526,12 +526,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* output_params = reinterpret_cast( output->quantization.params); const double effective_scale_1 = - input_params->scale->data[0] * - weights_feature_params->scale->data[0] / - state_params->scale->data[0]; - const double effective_scale_2 = state_params->scale->data[0] * - weight_time_params->scale->data[0] / - output_params->scale->data[0]; + static_cast(input_params->scale->data[0] * + weights_feature_params->scale->data[0] / + state_params->scale->data[0]); + const double effective_scale_2 = static_cast( + state_params->scale->data[0] * weight_time_params->scale->data[0] / + output_params->scale->data[0]); QuantizeMultiplier(effective_scale_1, &op_data.effective_scale_1_a, &op_data.effective_scale_1_b); QuantizeMultiplier(effective_scale_2, &op_data.effective_scale_2_a, ",0,train c51da68e1dbe80029b0ef93b86cf6fde4447aaa4,tensorflow/tensorflow,"Pluggable device/op_handler support in c_api_tfrt. And it starts to reuse device name (e.g. /device:CPU:0) borrowed from TensorFlow. It also allows creating different op handler for different GPU devices. PiperOrigin-RevId: 320713554 Change-Id: Id554249713fe7571e29e8f2f36fc0986ee44e9ec",c_api.cc,"@@ -725,13 +725,7 @@ void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; } TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) { if (opts->use_tfrt) { #ifdef PLATFORM_GOOGLE - tfrt::SmallVector op_handler_chains; - tfrt::SmallVector device_attributes; - status->status = tfrt::ListOpHandlerChains( - opts->session_options.options, &op_handler_chains, &device_attributes); - if (!status->status.ok()) return nullptr; - return tensorflow::wrap(new tfrt::ContextInterface( - op_handler_chains, device_attributes, opts->async)); + return tensorflow::wrap(new tfrt::ContextInterface(opts->async)); #else status->status = tensorflow::errors::Unimplemented(""TFRT is not supported""); return nullptr; ",0,train d4b3956c3759afac03f2a21c77399a01150f2928,tensorflow/tensorflow,"Make name_scopes work correctly in V2. PiperOrigin-RevId: 232397249",base_layer.py,"@@ -551,64 +551,59 @@ class Layer(checkpointable.Checkpointable): # pass to __call__, hence we set previous_mask as the default value. kwargs['mask'] = previous_mask - with ops.name_scope(self._name_scope()): - if not self.built: + # Check input assumptions set after layer building, e.g. input shape. + if build_graph: + # Symbolic execution on symbolic tensors. We will attempt to build + # the corresponding TF subgraph inside `backend.get_graph()` + input_spec.assert_input_compatibility(self.input_spec, inputs, self.name) + graph = backend.get_graph() + with graph.as_default(), ops.name_scope(self._name_scope()): # Build layer if applicable (if the `build` method has been overridden). self._maybe_build(inputs) - # We must set self.built since user defined build functions are not - # constrained to set self.built. - self.built = True - - # Check input assumptions set after layer building, e.g. input shape. - if build_graph: - # Symbolic execution on symbolic tensors. We will attempt to build - # the corresponding TF subgraph inside `backend.get_graph()` - input_spec.assert_input_compatibility( - self.input_spec, inputs, self.name) - graph = backend.get_graph() - with graph.as_default(): - if not self.dynamic: - try: - outputs = self.call(inputs, *args, **kwargs) - except TypeError as e: - messages = ('`tf.Tensor` as a Python `bool` is not allowed', - 'Tensor objects are only iterable when eager') - exception_str = str(e) - for msg in messages: - if msg in exception_str: - raise TypeError('You are attempting to use Python control ' - 'flow in a layer that was not declared to be ' - 'dynamic. Pass `dynamic=True` to the class ' - 'constructor.\nEncountered error:\n""""""\n' + - exception_str + '\n""""""') - raise - else: - # We will use static shape inference to return symbolic tensors - # matching the specifications of the layer outputs. - # Since `self.dynamic` is True, we will never attempt to - # run the underlying TF graph (which is disconnected). - # TODO(fchollet): consider py_func as an alternative, which - # would enable us to run the underlying graph if needed. - outputs = self._symbolic_call(inputs) - - if outputs is None: - raise ValueError('A layer\'s `call` method should return a ' - 'Tensor or a list of Tensors, not None ' - '(layer: ' + self.name + ').') - if base_layer_utils.have_all_keras_metadata(inputs): - inputs, outputs = self._set_connectivity_metadata_( - inputs, outputs, args, kwargs) - self._handle_activity_regularization(inputs, outputs) - self._set_mask_metadata(inputs, outputs, previous_mask) - if hasattr(self, '_set_inputs') and not self.inputs: - # Subclassed network: explicitly set metadata normally set by - # a call to self._set_inputs(). - # TODO(b/120997007): This should be done in Eager as well, but - # causes garbage collection issues because of the placeholders - # created on the default Keras graph. - self._set_inputs(inputs, outputs) - else: - # Eager execution on data tensors. + if not self.dynamic: + try: + outputs = self.call(inputs, *args, **kwargs) + except TypeError as e: + messages = ('`tf.Tensor` as a Python `bool` is not allowed', + 'Tensor objects are only iterable when eager') + exception_str = str(e) + for msg in messages: + if msg in exception_str: + raise TypeError('You are attempting to use Python control ' + 'flow in a layer that was not declared to be ' + 'dynamic. Pass `dynamic=True` to the class ' + 'constructor.\nEncountered error:\n""""""\n' + + exception_str + '\n""""""') + raise + else: + # We will use static shape inference to return symbolic tensors + # matching the specifications of the layer outputs. + # Since `self.dynamic` is True, we will never attempt to + # run the underlying TF graph (which is disconnected). + # TODO(fchollet): consider py_func as an alternative, which + # would enable us to run the underlying graph if needed. + outputs = self._symbolic_call(inputs) + + if outputs is None: + raise ValueError('A layer\'s `call` method should return a ' + 'Tensor or a list of Tensors, not None ' + '(layer: ' + self.name + ').') + if base_layer_utils.have_all_keras_metadata(inputs): + inputs, outputs = self._set_connectivity_metadata_( + inputs, outputs, args, kwargs) + self._handle_activity_regularization(inputs, outputs) + self._set_mask_metadata(inputs, outputs, previous_mask) + if hasattr(self, '_set_inputs') and not self.inputs: + # Subclassed network: explicitly set metadata normally set by + # a call to self._set_inputs(). + # TODO(b/120997007): This should be done in Eager as well, but + # causes garbage collection issues because of the placeholders + # created on the default Keras graph. + self._set_inputs(inputs, outputs) + else: + # Eager execution on data tensors. + with ops.name_scope(self._name_scope()): + self._maybe_build(inputs) outputs = self.call(inputs, *args, **kwargs) self._handle_activity_regularization(inputs, outputs) self._set_mask_metadata(inputs, outputs, previous_mask) @@ -1578,6 +1573,9 @@ class Layer(checkpointable.Checkpointable): def _maybe_build(self, inputs): # Check input assumptions set before layer building, e.g. input rank. + if self.built: + return + input_spec.assert_input_compatibility( self.input_spec, inputs, self.name) input_list = nest.flatten(inputs) @@ -1592,6 +1590,9 @@ class Layer(checkpointable.Checkpointable): # Only call `build` if the user has manually overridden the build method. if not hasattr(self.build, '_is_default'): self.build(input_shapes) + # We must set self.built since user defined build functions are not + # constrained to set self.built. + self.built = True def _symbolic_call(self, inputs): input_shapes = nest.map_structure(lambda x: x.shape, inputs) ",0,train d4b3956c3759afac03f2a21c77399a01150f2928,tensorflow/tensorflow,"Make name_scopes work correctly in V2. PiperOrigin-RevId: 232397249",base_layer_test.py,"@@ -456,6 +456,34 @@ class NestedTrackingTest(test.TestCase): self.assertEqual(len(layer.updates), 3) +@test_util.run_all_in_graph_and_eager_modes +class NameScopingTest(keras_parameterized.TestCase): + + def test_name_scope_layer(self): + x = keras.backend.placeholder(shape=(10, 10)) + layer = keras.layers.Dense(10, name='MyName') + layer(x) + self.assertEqual(layer.bias.name, 'MyName/bias:0') + self.assertEqual(layer.kernel.name, 'MyName/kernel:0') + + def test_name_scope_sublayer(self): + x = keras.backend.placeholder(shape=(10, 10)) + layer = keras.layers.Dense( + 10, activation=keras.layers.ReLU(name='MyAct'), name='MyName2') + y = layer(x) + self.assertEqual(layer.bias.name, 'MyName2/bias:0') + self.assertEqual(layer.kernel.name, 'MyName2/kernel:0') + self.assertEqual(y.name, 'MyName2/MyAct/Relu:0') + + def test_name_scope_tf_tensor(self): + x = ops.convert_to_tensor(np.ones((10, 10))) + layer = keras.layers.Dense( + 10, activation=keras.layers.ReLU(name='MyAct'), name='MyName3') + layer(x) + self.assertEqual(layer.bias.name, 'MyName3/bias:0') + self.assertEqual(layer.kernel.name, 'MyName3/kernel:0') + + if __name__ == '__main__': ops.enable_eager_execution() test.main() ",0,train 036c2c3e720ba65a975eb2db8e2b2dbc71417b74,tensorflow/tensorflow,"Fix incorrect gradient w.r.t. A for matrix_solve_ls in the underdetermined case. Add missing name to gradient test that caused most tests for matrix_solve_ls_grad to be skipped. Set proper initial values in linalg_grad_test and tighten test tolerances for float64. Change: 138725515",linalg_grad_test.py,"@@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - """"""Tests for tensorflow.ops.linalg_grad."""""" from __future__ import absolute_import from __future__ import division @@ -22,6 +21,13 @@ import numpy as np import tensorflow as tf +def _AddTest(test, op_name, testcase_name, fn): + test_name = '_'.join(['test', op_name, testcase_name]) + if hasattr(test, test_name): + raise RuntimeError('Test %s defined more than once' % test_name) + setattr(test, test_name, fn) + + class ShapeTest(tf.test.TestCase): def testBatchGradientUnknownSize(self): @@ -29,8 +35,8 @@ class ShapeTest(tf.test.TestCase): batch_size = tf.constant(3) matrix_size = tf.constant(4) batch_identity = tf.tile( - tf.expand_dims( - tf.diag(tf.ones([matrix_size])), 0), [batch_size, 1, 1]) + tf.expand_dims(tf.diag(tf.ones([matrix_size])), 0), + [batch_size, 1, 1]) determinants = tf.matrix_determinant(batch_identity) reduced = tf.reduce_sum(determinants) sum_grad = tf.gradients(reduced, batch_identity)[0] @@ -46,24 +52,26 @@ def _GetMatrixUnaryFunctorGradientTest(functor_, dtype_, shape_, **kwargs_): def Test(self): with self.test_session(): np.random.seed(1) - m = np.random.uniform(low=-1.0, - high=1.0, - size=np.prod(shape_)).reshape(shape_).astype(dtype_) - a = tf.constant(m) + a_np = np.random.uniform( + low=-1.0, high=1.0, + size=np.prod(shape_)).reshape(shape_).astype(dtype_) + a = tf.constant(a_np) b = functor_(a, **kwargs_) # Optimal stepsize for central difference is O(epsilon^{1/3}). epsilon = np.finfo(dtype_).eps - delta = 0.1 * epsilon**(1.0 / 3.0) + delta = epsilon**(1.0 / 3.0) # tolerance obtained by looking at actual differences using # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build - tol = 1e-3 if dtype_ == np.float64 else 0.05 - - theoretical, numerical = tf.test.compute_gradient(a, - a.get_shape().as_list(), - b, - b.get_shape().as_list(), - delta=delta) + tol = 1e-6 if dtype_ == np.float64 else 0.05 + + theoretical, numerical = tf.test.compute_gradient( + a, + a.get_shape().as_list(), + b, + b.get_shape().as_list(), + x_init_value=a_np, + delta=delta) self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol) return Test @@ -73,42 +81,47 @@ class MatrixBinaryFunctorGradientTest(tf.test.TestCase): pass # Filled in below -def _GetMatrixBinaryFunctorGradientTest(functor_, dtype_, shape_, **kwargs_): +def _GetMatrixBinaryFunctorGradientTest(functor_, + dtype_, + shape_, + float32_tol_fudge=1.0, + **kwargs_): def Test(self): with self.test_session(): np.random.seed(1) - m = np.random.uniform(low=-1.0, - high=1.0, - size=np.prod(shape_)).reshape(shape_).astype(dtype_) - a = tf.constant(m) - - n = np.random.uniform(low=-1.0, - high=1.0, - size=np.prod(shape_)).reshape(shape_).astype(dtype_) - b = tf.constant(n) + a_np = np.random.uniform( + low=-1.0, high=1.0, + size=np.prod(shape_)).reshape(shape_).astype(dtype_) + a = tf.constant(a_np) + + b_np = np.random.uniform( + low=-1.0, high=1.0, + size=np.prod(shape_)).reshape(shape_).astype(dtype_) + b = tf.constant(b_np) c = functor_(a, b, **kwargs_) # Optimal stepsize for central difference is O(epsilon^{1/3}). epsilon = np.finfo(dtype_).eps - delta = 0.1 * epsilon**(1.0 / 3.0) + delta = epsilon**(1.0 / 3.0) # tolerance obtained by looking at actual differences using # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build - tol = 1e-3 if dtype_ == np.float64 else 0.05 - + tol = 1e-6 if dtype_ == np.float64 else float32_tol_fudge * 0.04 # The gradients for a and b may be of very different magnitudes, # so to not get spurious failures we test them separately. - for factor in a, b: + for factor, factor_init in [a, a_np], [b, b_np]: theoretical, numerical = tf.test.compute_gradient( factor, factor.get_shape().as_list(), c, c.get_shape().as_list(), + x_init_value=factor_init, delta=delta) self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol) return Test + if __name__ == '__main__': # Tests for gradients of binary matrix operations. for dtype in np.float32, np.float64: @@ -120,29 +133,32 @@ if __name__ == '__main__': shape = extra + (size, size) name = '%s_%s_adj_%s' % (dtype.__name__, '_'.join(map(str, shape)), str(adjoint)) - setattr(MatrixBinaryFunctorGradientTest, - 'testMatrixSolveGradient_' + name, - _GetMatrixBinaryFunctorGradientTest(tf.matrix_solve, - dtype, shape, - adjoint=adjoint)) + _AddTest( + MatrixBinaryFunctorGradientTest, + 'MatrixSolveGradient', + name, + _GetMatrixBinaryFunctorGradientTest( + tf.matrix_solve, dtype, shape, adjoint=adjoint)) if dtype == np.float64: # TODO(rmlarsen): The gradients of triangular solves seems # particularly sensitive to round-off when computed in float32. # In some tests, a few gradient elements differ by 25% between the # numerical and theoretical values. Disable tests for float32 until # we understand this better. - setattr( + _AddTest( MatrixBinaryFunctorGradientTest, - 'testMatrixTriangularSolveGradient_' + name + '_low_True', + 'MatrixTriangularSolveGradient', + name + '_low_True', _GetMatrixBinaryFunctorGradientTest( tf.matrix_triangular_solve, dtype, shape, adjoint=adjoint, lower=True)) - setattr( + _AddTest( MatrixBinaryFunctorGradientTest, - 'testMatrixTriangularSolveGradient_' + name + '_low_False', + 'MatrixTriangularSolveGradient', + name + '_low_False', _GetMatrixBinaryFunctorGradientTest( tf.matrix_triangular_solve, dtype, @@ -158,14 +174,13 @@ if __name__ == '__main__': for extra in [(), (2,), (3,)] + [(3, 2)] * (size < 10): shape = extra + (size, size) name = '%s_%s' % (dtype.__name__, '_'.join(map(str, shape))) - setattr(MatrixUnaryFunctorGradientTest, - 'testMatrixInverseGradient_' + name, - _GetMatrixUnaryFunctorGradientTest(tf.matrix_inverse, - dtype, shape)) - setattr(MatrixUnaryFunctorGradientTest, - 'testMatrixDeterminantGradient_' + name, - _GetMatrixUnaryFunctorGradientTest(tf.matrix_determinant, dtype, - shape)) + _AddTest(MatrixUnaryFunctorGradientTest, 'MatrixInverseGradient', name, + _GetMatrixUnaryFunctorGradientTest(tf.matrix_inverse, dtype, + shape)) + _AddTest(MatrixUnaryFunctorGradientTest, 'MatrixDeterminantGradient', + name, + _GetMatrixUnaryFunctorGradientTest(tf.matrix_determinant, + dtype, shape)) # Tests for gradients of matrix_solve_ls for dtype in np.float32, np.float64: @@ -173,9 +188,16 @@ if __name__ == '__main__': for cols in 2, 5, 10: for l2_regularization in 0.0, 0.001, 1.0: shape = (rows, cols) - setattr(MatrixBinaryFunctorGradientTest, - 'testMatrixSolveLsGradient_' + name, - _GetMatrixBinaryFunctorGradientTest(tf.matrix_solve_ls, dtype, - shape)) + name = '%s_%s_%s' % (dtype.__name__, '_'.join(map(str, shape)), + l2_regularization) + _AddTest( + MatrixBinaryFunctorGradientTest, + 'MatrixSolveLsGradient', + name, + _GetMatrixBinaryFunctorGradientTest( + lambda a, b, l=l2_regularization: tf.matrix_solve_ls(a, b, l), + dtype, + shape, + float32_tol_fudge=4.0)) tf.test.main() ",0,test 036c2c3e720ba65a975eb2db8e2b2dbc71417b74,tensorflow/tensorflow,"Fix incorrect gradient w.r.t. A for matrix_solve_ls in the underdetermined case. Add missing name to gradient test that caused most tests for matrix_solve_ls_grad to be skipped. Set proper initial values in linalg_grad_test and tighten test tolerances for float64. Change: 138725515",linalg_grad.py,"@@ -95,7 +95,7 @@ def _MatrixSolveLsGrad(op, grad): """""" a = op.inputs[0] b = op.inputs[1] - l2_regularizer = op.inputs[2] + l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype) x = op.outputs[0] a_shape = array_ops.shape(a) batch_shape = a_shape[:-2] @@ -125,7 +125,7 @@ def _MatrixSolveLsGrad(op, grad): """""" a = op.inputs[0] b = op.inputs[1] - l2_regularizer = op.inputs[2] + l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype) a_shape = array_ops.shape(a) batch_shape = a_shape[:-2] m = a_shape[-2] @@ -135,11 +135,13 @@ def _MatrixSolveLsGrad(op, grad): a, a, adj_y=True) + l2_regularizer * identity chol = linalg_ops.cholesky(gramian) grad_b = linalg_ops.cholesky_solve(chol, math_ops.batch_matmul(a, grad)) - # Temporary z = (A * A^T + lambda * I)^{-1} * B. - z = linalg_ops.cholesky_solve(chol, b) - bz = -math_ops.batch_matmul(grad_b, z, adj_y=True) - bz_sym = bz + array_ops.matrix_transpose(bz) - grad_a = math_ops.batch_matmul(bz_sym, a) + math_ops.batch_matmul(z, grad) + # Temporary tmp = (A * A^T + lambda * I)^{-1} * B. + tmp = linalg_ops.cholesky_solve(chol, b) + a1 = math_ops.batch_matmul(tmp, a, adj_x=True) + a1 = -math_ops.batch_matmul(grad_b, a1) + a2 = grad - math_ops.batch_matmul(a, grad_b, adj_x=True) + a2 = math_ops.batch_matmul(tmp, a2, adj_y=True) + grad_a = a1 + a2 return (grad_a, grad_b, None) fast = op.get_attr(""fast"") ",0,test c0fdbc8eec34a3bd744b58ea9786c4fbf381bf0c,tensorflow/tensorflow,"Cosmetic change: fix header ordering. There are two different ""jni_utils.h"" headers; the one that should be included first is the one that matches the path of this .cc file, which declares the entities defined in this file. PiperOrigin-RevId: 399651260 Change-Id: I004817fbec02d55b0cce3b710e1ee4df12438895",jni_utils.cc,"@@ -13,13 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include ""tensorflow/lite/core/shims/jni/jni_utils.h"" +#include ""tensorflow/lite/java/src/main/native/jni_utils.h"" #include #include #include -#include ""tensorflow/lite/java/src/main/native/jni_utils.h"" +#include ""tensorflow/lite/core/shims/jni/jni_utils.h"" namespace tflite { namespace jni { ",0,train 653bdbd4ffefb008a4074617cae518ab143420ed,tensorflow/tensorflow,"Fix potential use-after-free of `worker_cache` in NewRemoteDevices(). Change: 142623343",remote_device.cc,"@@ -77,8 +77,8 @@ void NewRemoteDevices(Env* env, WorkerCacheInterface* worker_cache, remote_devices.push_back(d); } } - done(s, &remote_devices); worker_cache->ReleaseWorker(worker_name, wi); + done(s, &remote_devices); delete call; }; wi->GetStatusAsync(&call->req, &call->resp, cb); ",0,train 346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK PiperOrigin-RevId: 157522142",literal_util_test.cc,"@@ -771,7 +771,7 @@ TEST_F(LiteralUtilTest, F16) { // TODO - modify if we make the data format machine endianess dependent auto m1 = LiteralUtil::CreateFromShape(ShapeUtil::MakeShape(F16, {2, 2})); Literal* l1 = m1.get(); - const char* d1 = (const char*)LiteralUtil::InternalData(*l1); + const char* d1 = static_cast(LiteralUtil::InternalData(*l1)); EXPECT_EQ(d1[0], 0); EXPECT_EQ(d1[1], 0); EXPECT_EQ(d1[2], 0); @@ -787,7 +787,7 @@ TEST_F(LiteralUtilTest, F16) { half h2(2.0f); auto m2 = LiteralUtil::CreateR2({{h1, h2}, {h2, h1}}); Literal* l2 = m2.get(); - const char* d2 = (const char*)LiteralUtil::InternalData(*l2); + const char* d2 = static_cast(LiteralUtil::InternalData(*l2)); EXPECT_EQ(d2[0], 0); EXPECT_EQ(d2[1], 0x3C); EXPECT_EQ(d2[2], 0); ",0,train 346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK PiperOrigin-RevId: 157522142",single_image_random_dot_stereograms_ops.cc,"@@ -57,8 +57,8 @@ class SingleImageRandomDotStereogramsOp : public OpKernel { ::tensorflow::TensorShapeProto output_image_shape; ::tensorflow::TensorShapeProto output_data_window; - uint8 Cblack = (uint8)0; - uint8 Cwhite = (uint8)255; + uint8 Cblack = 0; + uint8 Cwhite = 255; int indexMode = 0; // 0 - truncate XY, 1 - round XY, 2 - Interpolate XY (not // implemented yet, keep default of 0) ",0,train 346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK PiperOrigin-RevId: 157522142",gpu_tracer.cc,"@@ -398,8 +398,8 @@ Status GPUTracerImpl::Start() { // There can only be one CUPTI subscriber. If we can't create one then // there is another trace in progress (possibly by external code). CUptiResult ret; - ret = cupti_wrapper_->Subscribe(&subscriber_, (CUpti_CallbackFunc)ApiCallback, - this); + ret = cupti_wrapper_->Subscribe( + &subscriber_, static_cast(ApiCallback), this); if (ret == CUPTI_ERROR_MAX_LIMIT_REACHED) { return errors::Unavailable(""CUPTI subcriber limit reached.""); } else if (ret != CUPTI_SUCCESS) { ",0,train 346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK PiperOrigin-RevId: 157522142",fft_ops.cc,"@@ -112,14 +112,14 @@ class FFTCPU : public FFTBase { auto device = ctx->eigen_device(); if (!IsReal()) { - auto input = ((Tensor)in).flat_inner_dims(); + auto input = (Tensor(in)).flat_inner_dims(); // Compute the FFT using eigen. auto output = out->flat_inner_dims(); output.device(device) = input.template fft < Eigen::BothParts, Forward ? Eigen::FFT_FORWARD : Eigen::FFT_REVERSE > (axes); } else { if (IsForward()) { - auto input = ((Tensor)in).flat_inner_dims(); + auto input = (Tensor(in)).flat_inner_dims(); auto output = out->flat_inner_dims(); Eigen::DSizes startIndices; ",0,train 346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK PiperOrigin-RevId: 157522142",snappy_inputbuffer.cc,"@@ -106,7 +106,7 @@ Status SnappyInputBuffer::Inflate() { // Output buffer must be large enough to fit the uncompressed block. DCHECK_GE(output_buffer_capacity_, uncompressed_length); - next_out_ = (char*)output_buffer_.get(); + next_out_ = output_buffer_.get(); bool status = port::Snappy_Uncompress(next_in_, compressed_block_length, output_buffer_.get()); ",0,train 346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK PiperOrigin-RevId: 157522142",table_test.cc,"@@ -526,8 +526,9 @@ static bool Between(uint64 val, uint64 low, uint64 high) { bool result = (val >= low) && (val <= high); if (!result) { fprintf(stderr, ""Value %llu is not in range [%llu, %llu]\n"", - (unsigned long long)(val), (unsigned long long)(low), - (unsigned long long)(high)); + static_cast(val), + static_cast(low), + static_cast(high)); } return result; } ",0,train 346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK PiperOrigin-RevId: 157522142",ordered_code.cc,"@@ -134,7 +134,9 @@ inline static void AppendBytes(string* dest, const char* src, size_t len) { dest->append(src, len); } -inline bool IsSpecialByte(char c) { return ((unsigned char)(c + 1)) < 2; } +inline bool IsSpecialByte(char c) { + return (static_cast(c + 1)) < 2; +} // Return a pointer to the first byte in the range ""[start..limit)"" // whose value is 0 or 255 (kEscape1 or kEscape2). If no such byte @@ -201,7 +203,7 @@ void OrderedCode::WriteNumIncreasing(string* dest, uint64 val) { buf[9 - len] = (val & 0xff); val >>= 8; } - buf[9 - len - 1] = (unsigned char)len; + buf[9 - len - 1] = len; len++; AppendBytes(dest, reinterpret_cast(buf + 9 - len), len); } ",0,train 346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK PiperOrigin-RevId: 157522142",logging.cc,"@@ -156,7 +156,7 @@ void MakeCheckOpValueString(std::ostream* os, const char& v) { if (v >= 32 && v <= 126) { (*os) << ""'"" << v << ""'""; } else { - (*os) << ""char value "" << (short)v; + (*os) << ""char value "" << static_cast(v); } } @@ -165,7 +165,7 @@ void MakeCheckOpValueString(std::ostream* os, const signed char& v) { if (v >= 32 && v <= 126) { (*os) << ""'"" << v << ""'""; } else { - (*os) << ""signed char value "" << (short)v; + (*os) << ""signed char value "" << static_cast(v); } } @@ -174,7 +174,7 @@ void MakeCheckOpValueString(std::ostream* os, const unsigned char& v) { if (v >= 32 && v <= 126) { (*os) << ""'"" << v << ""'""; } else { - (*os) << ""unsigned char value "" << (unsigned short)v; + (*os) << ""unsigned char value "" << static_cast(v); } } ",0,train 346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK PiperOrigin-RevId: 157522142",net.cc,"@@ -57,15 +57,16 @@ bool IsPortAvailable(int* port, bool is_tcp) { // Try binding to port. addr.sin_family = AF_INET; addr.sin_addr.s_addr = INADDR_ANY; - addr.sin_port = htons((uint16_t)*port); - if (bind(fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) { + addr.sin_port = htons(static_cast(*port)); + if (bind(fd, reinterpret_cast(&addr), sizeof(addr)) < 0) { LOG(WARNING) << ""bind(port="" << *port << "") failed: "" << strerror(errno); close(fd); return false; } // Get the bound port number. - if (getsockname(fd, (struct sockaddr*)&addr, &addr_len) < 0) { + if (getsockname(fd, reinterpret_cast(&addr), &addr_len) < + 0) { LOG(WARNING) << ""getsockname() failed: "" << strerror(errno); close(fd); return false; ",0,train 346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK PiperOrigin-RevId: 157522142",yuv2rgb.cc,"@@ -39,9 +39,9 @@ static inline uint32_t YUV2RGB(int nY, int nU, int nV) { // nG = (int)(1.164 * nY - 0.813 * nV - 0.391 * nU); // nB = (int)(1.164 * nY + 1.596 * nV); - int nR = (int)(1192 * nY + 1634 * nV); - int nG = (int)(1192 * nY - 833 * nV - 400 * nU); - int nB = (int)(1192 * nY + 2066 * nU); + int nR = 1192 * nY + 1634 * nV; + int nG = 1192 * nY - 833 * nV - 400 * nU; + int nB = 1192 * nY + 2066 * nU; nR = MIN(kMaxChannelValue, MAX(0, nR)); nG = MIN(kMaxChannelValue, MAX(0, nG)); @@ -171,9 +171,9 @@ void ConvertYUV420SPToRGB565(const uint8_t* const input, uint16_t* const output, // nG = (int)(1.164 * nY - 0.813 * nV - 0.391 * nU); // nB = (int)(1.164 * nY + 1.596 * nV); - int nR = (int)(1192 * nY + 1634 * nV); - int nG = (int)(1192 * nY - 833 * nV - 400 * nU); - int nB = (int)(1192 * nY + 2066 * nU); + int nR = 1192 * nY + 1634 * nV; + int nG = 1192 * nY - 833 * nV - 400 * nU; + int nB = 1192 * nY + 2066 * nU; nR = MIN(kMaxChannelValue, MAX(0, nR)); nG = MIN(kMaxChannelValue, MAX(0, nG)); ",0,train 1390dd68fe5f2f83138e19a86b6699254ad38734,tensorflow/tensorflow,"When Op Type is not registered, log the hostname of the machine that it is running on in the error message, since the message could be routed back during a failure on a remote binary, and it is hard to tell which machine it came from. Ideally, we'd somehow log the name of the binary running instead, but we don't have a function to get that right now. PiperOrigin-RevId: 156337679",node_def_builder_test.cc,"@@ -208,9 +208,8 @@ TEST_F(NodeDefBuilderTest, OpDoesNotExist) { .ControlInput(""y"") .Attr(""foo"", 12) .Device(""device""); - ExpectFailure( - builder, - ""Op type not registered 'Op Does Not Exist' while building NodeDef 'n'""); + ExpectFailures(builder, {""Op type not registered 'Op Does Not Exist'"", + ""while building NodeDef 'n'""}); } TEST_F(NodeDefBuilderTest, Polymorphic) { ",0,test 1390dd68fe5f2f83138e19a86b6699254ad38734,tensorflow/tensorflow,"When Op Type is not registered, log the hostname of the machine that it is running on in the error message, since the message could be routed back during a failure on a remote binary, and it is hard to tell which machine it came from. Ideally, we'd somehow log the name of the binary running instead, but we don't have a function to get that right now. PiperOrigin-RevId: 156337679",op.cc,"@@ -21,6 +21,7 @@ limitations under the License. #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/lib/core/errors.h"" #include ""tensorflow/core/lib/gtl/map_util.h"" +#include ""tensorflow/core/platform/host_info.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/mutex.h"" #include ""tensorflow/core/platform/protobuf.h"" @@ -83,7 +84,10 @@ Status OpRegistry::LookUp(const string& op_type_name, first_unregistered = false; } Status status = - errors::NotFound(""Op type not registered '"", op_type_name, ""'""); + errors::NotFound(""Op type not registered '"", op_type_name, + ""' in binary running on "", port::Hostname(), "". "", + ""Make sure the Op and Kernel are registered in the "" + ""binary running in this process.""); VLOG(1) << status.ToString(); return status; } @@ -225,7 +229,10 @@ Status OpListOpRegistry::LookUp(const string& op_type_name, auto iter = index_.find(op_type_name); if (iter == index_.end()) { *op_reg_data = nullptr; - return errors::NotFound(""Op type not registered '"", op_type_name, ""'""); + return errors::NotFound(""Op type not registered '"", op_type_name, + ""' in binary running on "", port::Hostname(), "". "", + ""Make sure the Op and Kernel are registered in the "" + ""binary running in this process.""); } *op_reg_data = iter->second; return Status::OK(); ",0,test 1390dd68fe5f2f83138e19a86b6699254ad38734,tensorflow/tensorflow,"When Op Type is not registered, log the hostname of the machine that it is running on in the error message, since the message could be routed back during a failure on a remote binary, and it is hard to tell which machine it came from. Ideally, we'd somehow log the name of the binary running instead, but we don't have a function to get that right now. PiperOrigin-RevId: 156337679",shape_inference_testutil_test.cc,"@@ -93,10 +93,11 @@ TEST(ShapeInferenceTestutilTest, Failures) { RunInferShapes(op, ""[1];[2];[1]"", ""e"", fn_copy_input_0)); EXPECT_CONTAINS(RunInferShapes(op, ""[1];[2];[1]"", ""[1];[2]"", fn_copy_input_0), ""wrong number of outputs""); - EXPECT_EQ(""Op type not registered 'NoSuchOp'"", - ShapeInferenceTestutil::InferShapes( - ShapeInferenceTestOp(""NoSuchOp""), """", """") - .error_message()); + auto error_message = ShapeInferenceTestutil::InferShapes( + ShapeInferenceTestOp(""NoSuchOp""), """", """") + .error_message(); + EXPECT_TRUE(StringPiece(error_message) + .starts_with(""Op type not registered 'NoSuchOp'"")); // Wrong shape error messages. EXPECT_CONTAINS(RunInferShapes(op, ""[1];[2];[1]"", ""?"", fn_copy_input_0), ",0,test 2932851e5d58ea729f4f5c8346f79e61df5f1126,tensorflow/tensorflow,"Annotate arg in FastMem for XLA compiler. PiperOrigin-RevId: 272525033",xla_compiler.cc,"@@ -463,9 +463,10 @@ string XlaCompiler::Argument::HumanString() const { return absl::StrCat(""kind=constant"", common, "" value="", constant_value.DebugString()); case kResource: { - string output = absl::StrCat(""kind=resource"", common, "" resource_kind="", - XlaResource::KindToString(resource_kind), - "" initialized="", initialized); + string output = absl::StrCat( + ""kind=resource"", common, + "" resource_kind="", XlaResource::KindToString(resource_kind), + "" initialized="", initialized, "" is_fast_mem="", fast_mem); if (max_array_size >= 0) { absl::StrAppend(&output, "" max_array_size="", max_array_size); } @@ -800,8 +801,7 @@ Status XlaCompiler::XLAShapeForArgument(const XlaCompiler::Argument& arg, TF_ASSIGN_OR_RETURN(*xla_shape, options_.shape_representation_fn( absl::get(arg.shape), arg.type, - /*use_fast_memory=*/false)); - + /*use_fast_memory=*/arg.fast_mem)); return Status::OK(); } case XlaResource::kTensorArray: { ",0,train 2932851e5d58ea729f4f5c8346f79e61df5f1126,tensorflow/tensorflow,"Annotate arg in FastMem for XLA compiler. PiperOrigin-RevId: 272525033",xla_compiler.h,"@@ -153,6 +153,9 @@ class XlaCompiler { // For a kResource, has this resource been initialized? bool initialized = false; + // For a kResource, is this resource on Fast Memory. + bool fast_mem = false; + // For a TensorArray or Stack resource, what is the array's declared size? // (Used for lazy initialization.) int64 max_array_size = -1; ",0,train 2932851e5d58ea729f4f5c8346f79e61df5f1126,tensorflow/tensorflow,"Annotate arg in FastMem for XLA compiler. PiperOrigin-RevId: 272525033",xla_compiler_test.cc,"@@ -328,6 +328,49 @@ TEST_F(XlaCompilerTest, HonorShapeRepresentationFnForUnwrittenResource) { xla::ShapeUtil::MakeTupleShape({transposed})); } +// Tests that the compiler can correctly propagate fast mem attribute for input +// resource variable. +TEST_F(XlaCompilerTest, HonorShapeRepresentationFnForFastMemVar) { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto var = ops::_Arg(scope.WithOpName(""V""), DT_RESOURCE, 0); + auto d = ops::_Retval(scope.WithOpName(""D""), var, 0); + std::unique_ptr graph(new Graph(OpRegistry::Global())); + TF_ASSERT_OK(scope.ToGraph(graph.get())); + + // Builds a description of the arguments. + std::vector args(1); + args[0].kind = XlaCompiler::Argument::kResource; + args[0].resource_kind = XlaResource::kVariable; + args[0].initialized = true; + args[0].type = DT_INT32; + args[0].shape = TensorShape({2, 3}); + args[0].fast_mem = true; + + auto options = DefaultOptions(); + int fast_mem_arg_count = 0; + options.shape_representation_fn = + [&fast_mem_arg_count](const TensorShape& shape, DataType dt, + bool use_fast_memory) -> xla::StatusOr { + xla::Shape xla_shape; + TF_RETURN_IF_ERROR(TensorShapeToXLAShape(dt, shape, &xla_shape)); + *xla_shape.mutable_layout() = xla::LayoutUtil::MakeLayout({0, 1}); + if (use_fast_memory) { + fast_mem_arg_count++; + } + return xla_shape; + }; + // Compiles the graph. + XlaCompiler compiler(options); + + XlaCompiler::CompilationResult result; + XlaCompiler::CompileOptions compile_options; + compile_options.return_updated_values_for_all_resources = true; + TF_ASSERT_OK(compiler.CompileGraph(compile_options, ""add"", std::move(graph), + args, + /*user_aliases=*/{}, &result)); + EXPECT_EQ(fast_mem_arg_count, 1); +} + // Tests that the compiler can correctly propagate the layout assigned by // shape_representation_fn_ to return types. TEST_F(XlaCompilerTest, HonorShapeRepresentationFnForRetVal) { ",0,train 34bff30979896879815dd6fc4d77c1a37d9b98a0,tensorflow/tensorflow,"[XLA] Add tests for Clamp with scalars S32 and U32. PiperOrigin-RevId: 184376425",scalar_computations_test.cc,"@@ -737,7 +737,61 @@ XLA_TEST_F(ScalarComputationsTest, PowScalar) { ComputeAndCompareR0(&builder, 8.0, {}, error_spec_); } -XLA_TEST_F(ScalarComputationsTest, ClampScalarHigh) { +XLA_TEST_F(ScalarComputationsTest, ClampScalarHighS32) { + ComputationBuilder builder(client_, TestName()); + builder.Clamp(builder.ConstantR0(-1), // The lower bound. + builder.ConstantR0(5), // The operand to be clamped. + builder.ConstantR0(3)); // The upper bound. + + ComputeAndCompareR0(&builder, 3, {}); +} + +XLA_TEST_F(ScalarComputationsTest, ClampScalarMiddleS32) { + ComputationBuilder builder(client_, TestName()); + builder.Clamp(builder.ConstantR0(-1), // The lower bound. + builder.ConstantR0(2), // The operand to be clamped. + builder.ConstantR0(3)); // The upper bound. + + ComputeAndCompareR0(&builder, 2, {}); +} + +XLA_TEST_F(ScalarComputationsTest, ClampScalarLowS32) { + ComputationBuilder builder(client_, TestName()); + builder.Clamp(builder.ConstantR0(-1), // The lower bound. + builder.ConstantR0(-5), // The operand to be clamped. + builder.ConstantR0(3)); // The upper bound. + + ComputeAndCompareR0(&builder, -1, {}); +} + +XLA_TEST_F(ScalarComputationsTest, ClampScalarHighU32) { + ComputationBuilder builder(client_, TestName()); + builder.Clamp(builder.ConstantR0(1), // The lower bound. + builder.ConstantR0(5), // The operand to be clamped. + builder.ConstantR0(3)); // The upper bound. + + ComputeAndCompareR0(&builder, 3, {}); +} + +XLA_TEST_F(ScalarComputationsTest, ClampScalarMiddleU32) { + ComputationBuilder builder(client_, TestName()); + builder.Clamp(builder.ConstantR0(1), // The lower bound. + builder.ConstantR0(2), // The operand to be clamped. + builder.ConstantR0(3)); // The upper bound. + + ComputeAndCompareR0(&builder, 2, {}); +} + +XLA_TEST_F(ScalarComputationsTest, ClampScalarLowU32) { + ComputationBuilder builder(client_, TestName()); + builder.Clamp(builder.ConstantR0(1), // The lower bound. + builder.ConstantR0(0), // The operand to be clamped. + builder.ConstantR0(3)); // The upper bound. + + ComputeAndCompareR0(&builder, 1, {}); +} + +XLA_TEST_F(ScalarComputationsTest, ClampScalarHighF32) { ComputationBuilder builder(client_, TestName()); builder.Clamp(builder.ConstantR0(2.0f), // The lower bound. builder.ConstantR0(5.0f), // The operand to be clamped. @@ -746,7 +800,7 @@ XLA_TEST_F(ScalarComputationsTest, ClampScalarHigh) { ComputeAndCompareR0(&builder, 3.0, {}, error_spec_); } -XLA_TEST_F(ScalarComputationsTest, ClampScalarMiddle) { +XLA_TEST_F(ScalarComputationsTest, ClampScalarMiddleF32) { ComputationBuilder builder(client_, TestName()); builder.Clamp(builder.ConstantR0(2.0f), // The lower bound. builder.ConstantR0(2.5f), // The operand to be clamped. @@ -755,7 +809,7 @@ XLA_TEST_F(ScalarComputationsTest, ClampScalarMiddle) { ComputeAndCompareR0(&builder, 2.5, {}, error_spec_); } -XLA_TEST_F(ScalarComputationsTest, ClampScalarLow) { +XLA_TEST_F(ScalarComputationsTest, ClampScalarLowF32) { ComputationBuilder builder(client_, TestName()); builder.Clamp(builder.ConstantR0(2.0f), // The lower bound. builder.ConstantR0(-5.0f), // The operand to be clamped. ",0,train 06e80ff230f1551d528f082a1821a82d3229305f,tensorflow/tensorflow,"Add in optimizations for softmax for Fusion F1. Confirmed that the test passes with: ``` make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa OPTIMIZED_KERNEL_DIR=xtensa TARGET_ARCH=fusion_f1 XTENSA_CORE=F1_190305_swupgrade test_kernel_softmax_test -j8 ``` However, the latency improvement is only ~1000 ticks, as tested with: ``` make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa OPTIMIZED_KERNEL_DIR=xtensa TARGET_ARCH=fusion_f1 XTENSA_CORE=F1_190305_swupgrade test_keyword_benchmark -j8 ``` Since Softmax is currently a small fraction of the overall keyword_benchmark latency we will focus on the latency of only this particular OP. With the optimized implementation: ``` SOFTMAX took 749 ticks (0 ms). ``` Reference implementation: ``` SOFTMAX took 2052 ticks (2 ms). ``` And with the LUT hifimini implementation (for completeness): ``` SOFTMAX took 1142 ticks (1 ms). ``` The gain of ~1500 ticks ticks is still worth merging because after all the optimizations (e.g. https://github.com/tensorflow/tensorflow/pull/47098), this will still mean a ~5% improvement for the keyword benchmark. And the benefits might be more significant for other models too.",softmax.cc,"@@ -24,6 +24,7 @@ limitations under the License. #include ""tensorflow/lite/kernels/kernel_util.h"" #include ""tensorflow/lite/kernels/op_macros.h"" #include ""tensorflow/lite/micro/kernels/kernel_util.h"" +#include ""tensorflow/lite/micro/kernels/xtensa/xtensa.h"" namespace tflite { namespace { @@ -32,7 +33,14 @@ namespace { struct OpData { uint16_t* exp_lut; }; +#elif defined(FUSION_F1) +struct OpData { + SoftmaxParams params; + int scratch_tensor_index; +}; +#endif +#if defined(HIFIMINI) // Number of unique int8_t and int16_t values. Used in exponent lookup table // computation. constexpr int kInt8Range = @@ -173,8 +181,63 @@ TfLiteStatus PrepareHifimini(TfLiteContext* context, TfLiteNode* node) { } #endif // defined(HIFIMINI) +#if defined(FUSION_F1) +TfLiteStatus PrepareHifi4(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_OK(context, SoftmaxPrepare(context, node)); + + // Calculate scratch memory requirements and request scratch buffer + const TfLiteTensor* input = GetInput(context, node, 0); + const TfLiteTensor* output = GetOutput(context, node, 0); + + const RuntimeShape& input_shape = GetTensorShape(input); + const RuntimeShape& output_shape = GetTensorShape(output); + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int depth = + MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + + if (input->type == kTfLiteInt8) { + int required_scratch = + get_softmax_scratch_size(PREC_ASYM8S, PREC_ASYM8S, depth); + TF_LITE_ENSURE(context, required_scratch > 0); + + auto* data = static_cast(node->user_data); + TF_LITE_ENSURE_OK( + context, context->RequestScratchBufferInArena( + context, required_scratch, &(data->scratch_tensor_index))); + } + + return kTfLiteOk; +} + +TfLiteStatus EvalHifi4(const OpData* op_data, const TfLiteEvalTensor* input, + TfLiteEvalTensor* output, TfLiteContext* context) { + const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input); + const int8_t* input_data = tflite::micro::GetTensorData(input); + const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output); + int16_t* output_data = tflite::micro::GetTensorData(output); + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = + MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = + MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + + void* p_scratch = static_cast( + context->GetScratchBuffer(context, op_data->scratch_tensor_index)); + + for (int i = 0; i < outer_size; ++i) { + int err = xa_nn_vec_softmax_asym8s_16( + &output_data[i * depth], &input_data[i * depth], + op_data->params.diff_min, op_data->params.input_left_shift, + op_data->params.input_multiplier, depth, p_scratch); + TF_LITE_ENSURE(context, err == 0); + } + return kTfLiteOk; +} + +#endif // defined(FUSION_F1) + void* Init(TfLiteContext* context, const char* buffer, size_t length) { -#if defined(HIFIMINI) +#if defined(HIFIMINI) || defined(FUSION_F1) TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); return context->AllocatePersistentBuffer(context, sizeof(OpData)); #else @@ -185,6 +248,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) { TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { #if defined(HIFIMINI) return PrepareHifimini(context, node); +#elif defined(FUSION_F1) + return PrepareHifi4(context, node); #else return SoftmaxPrepare(context, node); #endif @@ -208,7 +273,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TfLiteTypeGetName(input->type), input->type); return kTfLiteError; } -#else // !defined(HIFIMINI) +#else // !defined(HIFIMINI) switch (input->type) { case kTfLiteFloat32: { SoftmaxParams op_data = *static_cast(node->user_data); @@ -221,12 +286,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } case kTfLiteInt8: { if (output->type == kTfLiteInt16) { +#if defined(FUSION_F1) + return EvalHifi4(static_cast(node->user_data), input, output, + context); +#else SoftmaxParams op_data = *static_cast(node->user_data); tflite::reference_ops::Softmax( op_data, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); +#endif } else { SoftmaxParams op_data = *static_cast(node->user_data); tflite::reference_ops::Softmax( ",0,test 06e80ff230f1551d528f082a1821a82d3229305f,tensorflow/tensorflow,"Add in optimizations for softmax for Fusion F1. Confirmed that the test passes with: ``` make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa OPTIMIZED_KERNEL_DIR=xtensa TARGET_ARCH=fusion_f1 XTENSA_CORE=F1_190305_swupgrade test_kernel_softmax_test -j8 ``` However, the latency improvement is only ~1000 ticks, as tested with: ``` make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa OPTIMIZED_KERNEL_DIR=xtensa TARGET_ARCH=fusion_f1 XTENSA_CORE=F1_190305_swupgrade test_keyword_benchmark -j8 ``` Since Softmax is currently a small fraction of the overall keyword_benchmark latency we will focus on the latency of only this particular OP. With the optimized implementation: ``` SOFTMAX took 749 ticks (0 ms). ``` Reference implementation: ``` SOFTMAX took 2052 ticks (2 ms). ``` And with the LUT hifimini implementation (for completeness): ``` SOFTMAX took 1142 ticks (1 ms). ``` The gain of ~1500 ticks ticks is still worth merging because after all the optimizations (e.g. https://github.com/tensorflow/tensorflow/pull/47098), this will still mean a ~5% improvement for the keyword benchmark. And the benefits might be more significant for other models too.",xtensa.h,"@@ -20,6 +20,7 @@ limitations under the License. #include #elif defined(FUSION_F1) #include ""include/nnlib/xa_nnlib_api.h"" +#include ""include/nnlib/xa_nnlib_standards.h"" #endif #endif // TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_XTENSA_H_ ",0,test 32de8d33177d6f46c7ecd06363bd492194341731,tensorflow/tensorflow,"[XLA] Propagate debug_options in ComputationStatsRequest And fix graph dumping tools to add graph dumping options automatically. PiperOrigin-RevId: 161082718",client.cc,"@@ -18,6 +18,7 @@ limitations under the License. #include #include +#include ""tensorflow/compiler/xla/legacy_flags/debug_options_flags.h"" #include ""tensorflow/compiler/xla/literal_util.h"" #include ""tensorflow/compiler/xla/ptr_util.h"" #include ""tensorflow/compiler/xla/status_macros.h"" @@ -376,9 +377,10 @@ StatusOr>> Client::DeconstructTuple( } StatusOr Client::GetComputationStats( - const Computation& computation) const { + const Computation& computation, const DebugOptions& debug_options) const { ComputationStatsRequest request; *request.mutable_computation() = computation.handle(); + *request.mutable_debug_options() = debug_options; ComputationStatsResponse response; VLOG(1) << ""making computation stats request""; @@ -427,7 +429,10 @@ StatusOr Client::GetShape(const GlobalData& data) { StatusOr Client::ExecutionStatsAsString( const Computation& computation, const ExecutionProfile& profile) { - TF_ASSIGN_OR_RETURN(auto computation_stats, GetComputationStats(computation)); + TF_ASSIGN_OR_RETURN( + auto computation_stats, + GetComputationStats(computation, + legacy_flags::GetDebugOptionsFromFlags())); int64 total_flops = computation_stats.flop_count() + computation_stats.transcendental_count(); if (profile.compute_time_ns() > 0) { ",0,train 32de8d33177d6f46c7ecd06363bd492194341731,tensorflow/tensorflow,"[XLA] Propagate debug_options in ComputationStatsRequest And fix graph dumping tools to add graph dumping options automatically. PiperOrigin-RevId: 161082718",client.h,"@@ -150,7 +150,7 @@ class Client { // Retrieves the statistics of the given computation. StatusOr GetComputationStats( - const Computation& computation) const; + const Computation& computation, const DebugOptions& debug_options) const; // Returns the Shape of the given array specified by 'data'. The shape // includes the Layout of the array as it is stored on the service. ",0,train 32de8d33177d6f46c7ecd06363bd492194341731,tensorflow/tensorflow,"[XLA] Propagate debug_options in ComputationStatsRequest And fix graph dumping tools to add graph dumping options automatically. PiperOrigin-RevId: 161082718",debug_options_flags.cc,"@@ -189,6 +189,7 @@ xla::DebugOptions GetDebugOptionsFromFlags() { options.set_xla_hlo_graph_addresses(flag_values->xla_hlo_graph_addresses); options.set_xla_hlo_graph_layout(flag_values->xla_hlo_graph_layout); options.set_xla_hlo_graph_path(flag_values->xla_hlo_graph_path); + options.set_xla_hlo_dump_as_graphdef(flag_values->xla_hlo_dump_as_graphdef); options.set_xla_log_hlo_text(flag_values->xla_log_hlo_text); options.set_xla_generate_hlo_text_to(flag_values->xla_generate_hlo_text_to); ",0,train 32de8d33177d6f46c7ecd06363bd492194341731,tensorflow/tensorflow,"[XLA] Propagate debug_options in ComputationStatsRequest And fix graph dumping tools to add graph dumping options automatically. PiperOrigin-RevId: 161082718",service.cc,"@@ -1173,9 +1173,11 @@ tensorflow::Status Service::GetComputationStats( VersionedComputationHandle versioned_handle = user_computation->GetVersionedHandle(); + HloModuleConfig config; + config.set_debug_options(arg->debug_options()); TF_ASSIGN_OR_RETURN( std::unique_ptr module, - computation_tracker_.BuildHloModule(versioned_handle, HloModuleConfig())); + computation_tracker_.BuildHloModule(versioned_handle, config)); hlo_graph_dumper::MaybeDumpHloModule(*module, ""computation statistics subject""); ",0,train 32de8d33177d6f46c7ecd06363bd492194341731,tensorflow/tensorflow,"[XLA] Propagate debug_options in ComputationStatsRequest And fix graph dumping tools to add graph dumping options automatically. PiperOrigin-RevId: 161082718",dumped_computation_to_graphviz.cc,"@@ -53,8 +53,12 @@ void RealMain(tensorflow::gtl::ArraySlice args) { TF_CHECK_OK( tensorflow::ReadBinaryProto(tensorflow::Env::Default(), arg, &module)); Computation computation = client->LoadSnapshot(module).ConsumeValueOrDie(); + DebugOptions debug_options = legacy_flags::GetDebugOptionsFromFlags(); + debug_options.set_xla_generate_hlo_graph("".*""); + debug_options.set_xla_hlo_graph_layout(true); ComputationStats stats = - client->GetComputationStats(computation).ConsumeValueOrDie(); + client->GetComputationStats(computation, debug_options) + .ConsumeValueOrDie(); fprintf(stdout, "">>> %s :: %s\n"", arg, stats.DebugString().c_str()); } } ",0,train 32de8d33177d6f46c7ecd06363bd492194341731,tensorflow/tensorflow,"[XLA] Propagate debug_options in ComputationStatsRequest And fix graph dumping tools to add graph dumping options automatically. PiperOrigin-RevId: 161082718",dumped_computation_to_tf_graphdef.cc,"@@ -52,8 +52,12 @@ void RealMain(tensorflow::gtl::ArraySlice args) { TF_CHECK_OK( tensorflow::ReadBinaryProto(tensorflow::Env::Default(), arg, &module)); Computation computation = client->LoadSnapshot(module).ConsumeValueOrDie(); + DebugOptions debug_options = legacy_flags::GetDebugOptionsFromFlags(); + debug_options.set_xla_generate_hlo_graph("".*""); + debug_options.set_xla_hlo_dump_as_graphdef(true); ComputationStats stats = - client->GetComputationStats(computation).ConsumeValueOrDie(); + client->GetComputationStats(computation, debug_options) + .ConsumeValueOrDie(); fprintf(stdout, "">>> %s :: %s\n"", arg, stats.DebugString().c_str()); } } ",0,train db9265af2548648dd3aa15af7073076eb393b8d9,tensorflow/tensorflow,Fix build errors.,nvptx_compiler.cc,"@@ -391,7 +391,7 @@ NVPTXCompiler::CompileTargetBinary(const HloModule* module, VLOG(2) << ""Libdevice dir = "" << libdevice_dir << ""\n""; string ptx; - if (!MaybeLoadPtxFromFile(module.get(), &ptx)) { + if (!MaybeLoadPtxFromFile(module, &ptx)) { XLA_SCOPED_LOGGING_TIMER( ""NVPTXCompiler::CompileTargetBinary - CompileToPtx""); TF_ASSIGN_OR_RETURN( ",0,train 43c95696c0ca68314c613ed0a55e4f58afc784df,tensorflow/tensorflow,[tensorflow/compiler/xla/service/space_to_batch_converter.cc] Use `const auto&` instead of `const auto`,space_to_batch_converter.cc,"@@ -1329,7 +1329,7 @@ void ConvolutionVisitor::PropagateOnBroadcast(HloInstruction* consumer, } std::vector broadcast_dims; - const auto dimensions = consumer->dimensions(); + const auto& dimensions = consumer->dimensions(); broadcast_dims.reserve(dimensions.size()); for (auto j : dimensions) { broadcast_dims.push_back(DimLookUp(permute_dims, j)); ",0,train 09713e439363d763ca7c12d0c279b8d55d5b6053,tensorflow/tensorflow,"We should be using on host shape as the device one can have tuples in place of complex or S64 types. PiperOrigin-RevId: 228262394",raw_api_test.cc,"@@ -956,6 +956,7 @@ TEST(RawApiTest, CompileAndExecuteWithS64Argument) { xrt::XRTExecutionConfig e; e.set_release_input_handles(true); e.set_release_compilation_handle(true); + e.set_return_exploded_tuple(true); Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag()); auto e_config = ",0,test 1d48d2f58417e78853b7ed9b77eb83c030056619,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@df47368d406a Updates LLVM usage to match [df47368d406a](https://github.com/llvm/llvm-project/commit/df47368d406a) PiperOrigin-RevId: 373136440 Change-Id: I479781bf2147874ecaec0d3d4d5ed726acd899a8",tensorflow_abi_knowledge_propagation.cc,"@@ -141,7 +141,7 @@ struct PropagateTfAbiKnowledgeToKernelsPass // Add the no_alias attribute to the corresponding pointer. kernel.setArgAttr(kernel_p + 1, LLVM::LLVMDialect::getNoAliasAttrName(), - b.getBoolAttr(true)); + b.getUnitAttr()); } // Advance base, aligned, offset, strides and sizes many arguments. kernel_p += memref.getRank() * 2 + 3; ",0,train 90da05cd1c07b0c84e102944a9a634127ecdc52b,tensorflow/tensorflow,"[TF-numpy] Exports `np.newaxis`. PiperOrigin-RevId: 321687455 Change-Id: I47a9f566b9a961368cfb4f076674fc0b94a6e140",np_array_ops.py,"@@ -37,10 +37,14 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import sort_ops from tensorflow.python.ops.numpy_ops import np_arrays from tensorflow.python.ops.numpy_ops import np_dtypes +from tensorflow.python.ops.numpy_ops import np_export from tensorflow.python.ops.numpy_ops import np_utils from tensorflow.python.util import nest +newaxis = np_export.np_export_constant(__name__, 'newaxis', np.newaxis) + + @np_utils.np_doc('empty') def empty(shape, dtype=float): # pylint: disable=redefined-outer-name return zeros(shape, dtype) ",0,train b2ab2da16f22007e0f4d61d8806ebac6d5d0edd5,tensorflow/tensorflow,"Support arbitrary many values in KeyValueSort on GPU backend. PiperOrigin-RevId: 216688700",ir_emitter_unnested.cc,"@@ -34,6 +34,7 @@ limitations under the License. #include ""llvm/IR/Instructions.h"" #include ""llvm/IR/LLVMContext.h"" #include ""llvm/IR/Module.h"" +#include ""tensorflow/compiler/xla/layout_util.h"" #include ""tensorflow/compiler/xla/literal.h"" #include ""tensorflow/compiler/xla/service/buffer_assignment.h"" #include ""tensorflow/compiler/xla/service/dfs_hlo_visitor.h"" @@ -2192,34 +2193,34 @@ Status IrEmitterUnnested::HandleSelect(HloInstruction* select) { Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { std::vector> thunks; - auto keys = sort->operand(0); - auto values = sort->operand_count() > 1 ? sort->operand(1) : nullptr; - ShapeIndex keys_shape_index({}); - ShapeIndex values_shape_index({}); - if (values != nullptr) { - keys_shape_index = ShapeIndex({0}); - values_shape_index = ShapeIndex({1}); - } - auto keys_destination = GetAllocationSlice(*sort, keys_shape_index); - auto values_destination = GetAllocationSlice(*sort, values_shape_index); - - if (keys_destination != GetAllocationSlice(*keys)) { - thunks.push_back(absl::make_unique( - /*source_address=*/GetAllocationSlice(*keys), - /*destination_buffer=*/keys_destination, - /*mem_size=*/ShapeUtil::ByteSizeOf(keys->shape()), nullptr)); - } - if (values != nullptr && values_destination != GetAllocationSlice(*values)) { - // TODO(b/26783907): Figure out why we never seem to share buffers for - // key/value sort. - thunks.push_back(absl::make_unique( - /*source_address=*/GetAllocationSlice(*values), - /*destination_buffer=*/values_destination, - /*mem_size=*/ShapeUtil::ByteSizeOf(values->shape()), nullptr)); + Shape keys_shape = sort->operand(0)->shape(); + for (int64 i = 0; i < sort->operand_count(); ++i) { + ShapeIndex shape_index = + sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({}); + // We assume that the layout of all involved operands and outputs is the + // same. + TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual(keys_shape, + sort->operand(i)->shape())); + TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual( + keys_shape, ShapeUtil::GetSubshape(sort->shape(), shape_index))); + + // If possible, we share buffers. If that is not possible, we need to copy + // the values, because the emitter does the sorting in-place. + auto destination_buffer = GetAllocationSlice(*sort, shape_index); + auto source_address = GetAllocationSlice(*sort->operand(i)); + if (destination_buffer != source_address) { + // TODO(b/26783907): Figure out why we never seem to share buffers for + // key/value sort. + thunks.push_back(absl::make_unique( + /*source_address=*/source_address, + /*destination_buffer=*/destination_buffer, + /*mem_size=*/ShapeUtil::ByteSizeOf(sort->operand(i)->shape()), + nullptr)); + } } int64 dimension_to_sort = sort->dimensions(0); - int64 dimension_to_sort_bound = keys->shape().dimensions(dimension_to_sort); + int64 dimension_to_sort_bound = keys_shape.dimensions(dimension_to_sort); int64 num_stages = tensorflow::Log2Ceiling(dimension_to_sort_bound); auto index_type = b_.getInt64Ty(); @@ -2243,7 +2244,7 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { thunks.push_back( BuildKernelThunk(sort, /*implements_whole_instruction=*/false)); LaunchDimensions launch_dimensions = CalculateLaunchDimensions( - keys->shape(), ir_emitter_context_->device_description()); + keys_shape, ir_emitter_context_->device_description()); UpdateLaunchDimensions(launch_dimensions, thunks.back().get(), ir_emitter_context_->llvm_module()); @@ -2254,12 +2255,21 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { xor_mask = llvm::ConstantInt::get(index_type, 1LL << mask); } + IrArray keys_array; + std::vector values_arrays; + values_arrays.reserve(sort->operand_count() - 1); + for (int64 i = 0; i < sort->operand_count(); ++i) { + ShapeIndex shape_index = + sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({}); + if (i == 0) { + keys_array = GetIrArray(*sort, *sort, shape_index); + } else { + values_arrays.push_back(GetIrArray(*sort, *sort, shape_index)); + } + } TF_RETURN_IF_ERROR(llvm_ir::EmitSortInPlace( - dimension_to_sort, GetIrArray(*sort, *sort, keys_shape_index), - values != nullptr ? absl::make_optional( - GetIrArray(*sort, *sort, values_shape_index)) - : absl::nullopt, - IrName(sort), xor_mask, &b_, &launch_dimensions)); + dimension_to_sort, keys_array, values_arrays, IrName(sort), xor_mask, + &b_, &launch_dimensions)); } } ",0,train b2ab2da16f22007e0f4d61d8806ebac6d5d0edd5,tensorflow/tensorflow,"Support arbitrary many values in KeyValueSort on GPU backend. PiperOrigin-RevId: 216688700",sort_util.cc,"@@ -15,9 +15,10 @@ limitations under the License. #include ""tensorflow/compiler/xla/service/llvm_ir/sort_util.h"" +#include + // IWYU pragma: no_include ""llvm/IR/Intrinsics.gen.inc"" #include ""absl/strings/string_view.h"" -#include ""absl/types/optional.h"" #include ""llvm/ADT/APInt.h"" #include ""llvm/IR/BasicBlock.h"" #include ""llvm/IR/Constants.h"" @@ -43,7 +44,7 @@ namespace { void EmitCompareLoop(int64 dimension_to_sort, const IrArray::Index& keys_index, const IrArray::Index& compare_keys_index, const IrArray& keys_array, - const absl::optional& values_array, + const std::vector& values_arrays, llvm::IRBuilder<>* b) { // if (is_smaller_index && // compare_keys[dimension_to_sort] < dimension_to_sort_bound) @@ -100,19 +101,18 @@ void EmitCompareLoop(int64 dimension_to_sort, const IrArray::Index& keys_index, // Swap key1 with key2. keys_array.EmitWriteArrayElement(keys_index, key2, b); keys_array.EmitWriteArrayElement(compare_keys_index, key1, b); - if (values_array.has_value()) { + for (const auto& values_array : values_arrays) { // Also swap the values. - auto value1 = values_array.value().EmitReadArrayElement(keys_index, b); - auto value2 = - values_array.value().EmitReadArrayElement(compare_keys_index, b); - values_array.value().EmitWriteArrayElement(keys_index, value2, b); - values_array.value().EmitWriteArrayElement(compare_keys_index, value1, b); + auto value1 = values_array.EmitReadArrayElement(keys_index, b); + auto value2 = values_array.EmitReadArrayElement(compare_keys_index, b); + values_array.EmitWriteArrayElement(keys_index, value2, b); + values_array.EmitWriteArrayElement(compare_keys_index, value1, b); } } } // namespace Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array, - const absl::optional& values_array, + const std::vector& values_arrays, absl::string_view name, llvm::Value* xor_mask, llvm::IRBuilder<>* b, const gpu::LaunchDimensions* launch_dimensions) { @@ -162,7 +162,7 @@ Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array, compare_keys_index[dimension_to_sort] = b->CreateXor(compare_index[0], xor_mask); EmitCompareLoop(dimension_to_sort, keys_index, compare_keys_index, - keys_array, values_array, b); + keys_array, values_arrays, b); return Status::OK(); }; if (launch_dimensions != nullptr) { ",0,train b2ab2da16f22007e0f4d61d8806ebac6d5d0edd5,tensorflow/tensorflow,"Support arbitrary many values in KeyValueSort on GPU backend. PiperOrigin-RevId: 216688700",sort_util.h,"@@ -16,8 +16,9 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_SORT_UTIL_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_SORT_UTIL_H_ +#include + #include ""absl/strings/string_view.h"" -#include ""absl/types/optional.h"" #include ""llvm/IR/Value.h"" #include ""tensorflow/compiler/xla/service/gpu/partition_assignment.h"" #include ""tensorflow/compiler/xla/service/llvm_ir/ir_array.h"" @@ -31,7 +32,7 @@ namespace llvm_ir { // implements the inner loop of BitonicSort. If 'launch_dimensions' is nullptr, // the inner compare loop will not be parallelized. Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array, - const absl::optional& values_array, + const std::vector& values_arrays, absl::string_view name, llvm::Value* xor_mask, llvm::IRBuilder<>* b, const gpu::LaunchDimensions* launch_dimensions); ",0,train 2e5bfbbb902b66720cd8d41a0fa1bce292efd31b,tensorflow/tensorflow,"[XLA] Fix cost analysis interval picking when there is no prefetch start time that satisfies earliest < x < latest. We specify how long a prefetch can be in relation to how long the overlapped instructions take using flags. However, sometimes there is a long-executing HLO before the HLO and the earliest and latest durations both fall within this HLO. In this case, we were previously not attempting to prefetch at all because there is no valid prefetch start time that satisifies the earliest/latest constraints. However, this can be detrimental to some models. We now allow prefetches to start a little earlier than the specified earliest time in such cases. PiperOrigin-RevId: 370087066 Change-Id: Ief3b6cd58cdf6d26b38569a5a541c8e06b946840",memory_space_assignment.cc,"@@ -578,7 +578,7 @@ void CostAnalysisPrefetchIntervalPicker::Begin(const HloUse& use, // Find the earliest time we're allowed to start prefetching. float max_interval = GetMaxElapsedInAlternateMemory(async_copy_elapsed_); for (earliest_prefetch_time_ = start_time; - earliest_prefetch_time_ <= end_logical_time_ && + earliest_prefetch_time_ < latest_prefetch_time_ && (computation_nest_level_[earliest_prefetch_time_] != end_nest_level || max_interval < GetLogicalIntervalElapsed(earliest_prefetch_time_, end_logical_time_)); ",0,train 2e5bfbbb902b66720cd8d41a0fa1bce292efd31b,tensorflow/tensorflow,"[XLA] Fix cost analysis interval picking when there is no prefetch start time that satisfies earliest < x < latest. We specify how long a prefetch can be in relation to how long the overlapped instructions take using flags. However, sometimes there is a long-executing HLO before the HLO and the earliest and latest durations both fall within this HLO. In this case, we were previously not attempting to prefetch at all because there is no valid prefetch start time that satisifies the earliest/latest constraints. However, this can be detrimental to some models. We now allow prefetches to start a little earlier than the specified earliest time in such cases. PiperOrigin-RevId: 370087066 Change-Id: Ief3b6cd58cdf6d26b38569a5a541c8e06b946840",memory_space_assignment_test.cc,"@@ -5786,5 +5786,51 @@ TEST_F(CostAnalysisPrefetchIntervalPickerTest, ConsecutiveConditionals) { 5); } +TEST_F(CostAnalysisPrefetchIntervalPickerTest, EarliestLatestWindowTooSmall) { + // This tests the scenario where there is an op that takes a long time (tanh + // in this example) and as a result the earliest and latest times both fall + // inside this long-running op. In this case, we should still return a valid + // prefetch interval just before the long-running op. + absl::string_view hlo_string = R""( + HloModule bug, is_scheduled=true + + ENTRY Entry { + param0 = f32[2,4] parameter(0) + negate = f32[2,4] negate(param0) + tanh = f32[2,4] tanh(param0) + ROOT add = f32[2,4] add(tanh, negate) + } + )""; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + HloCostAnalysis hlo_cost_analysis(ShapeSize); + TF_ASSERT_OK_AND_ASSIGN(auto cost_analysis, + FakeMemorySpaceAssignmentCostAnalysis::Create( + hlo_cost_analysis, *module)); + cost_analysis->SetOverrideForGetInstructionElapsed( + [](const HloInstruction& hlo) { + if (hlo.opcode() == HloOpcode::kTanh) { + return 20.0; + } + return 1.0; + }); + CostAnalysisPrefetchIntervalPicker interval_picker( + *cost_analysis, + /*min_async_copy_to_overlap_ratio=*/1.0, + /*max_async_copy_to_overlap_ratio=*/4.0, + /*preferred_async_copy_to_overlap_ratio=*/2.0, + /*buffer_size_for_max_async_copy=*/0); + + HloInstruction* root = module->entry_computation()->root_instruction(); + const HloUse use{root, /*operand_number=*/1, /*operand_index=*/{}}; + interval_picker.Begin(use, /*start_time=*/1, /*end_time=*/3); + + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_FALSE(interval_picker.Done()); + EXPECT_EQ(interval_picker.Next(), 1); + EXPECT_TRUE(interval_picker.Done()); +} + } // namespace } // namespace xla ",0,train 530dc71d0487cacccbe270490d460bc401040dc9,tensorflow/tensorflow,"Fix tsan detected error in core/util/exec_on_stall_test.cc Enforce mutex around access to test variable. PiperOrigin-RevId: 200078751",exec_on_stall_test.cc,"@@ -16,6 +16,7 @@ limitations under the License. #include ""tensorflow/core/util/exec_on_stall.h"" #include ""tensorflow/core/platform/macros.h"" +#include ""tensorflow/core/platform/mutex.h"" #include ""tensorflow/core/platform/test.h"" namespace tensorflow { @@ -32,14 +33,24 @@ Chunk* NewChunk(int stall_seconds, std::function f) { } TEST(ExecuteOnStallTest, BothWays) { - bool a_triggered = false; - bool b_triggered = false; - Chunk* a = NewChunk(1, [&a_triggered]() { a_triggered = true; }); - Chunk* b = NewChunk(1, [&b_triggered]() { b_triggered = true; }); + mutex mu; + bool a_triggered(false); + bool b_triggered(false); + Chunk* a = NewChunk(1, [&mu, &a_triggered]() { + mutex_lock l(mu); + a_triggered = true; + }); + Chunk* b = NewChunk(1, [&mu, &b_triggered]() { + mutex_lock l(mu); + b_triggered = true; + }); delete a; Env::Default()->SleepForMicroseconds(2000000); - EXPECT_FALSE(a_triggered); - EXPECT_TRUE(b_triggered); + { + mutex_lock l(mu); + EXPECT_FALSE(a_triggered); + EXPECT_TRUE(b_triggered); + } delete b; } ",0,train 37408c89124e8bf4a005ba89d17b18a0dc29f94a,tensorflow/tensorflow,"Fixing a python3 issue in bias_op_test.py Issue 1: range in python3 does not return a list as in python2",bias_op_test.py,"@@ -63,14 +63,14 @@ class BiasAddTest(tf.test.TestCase): (1,) * (3 - np_value.ndim) + np_value.shape) # move the last dimension to third-to-last np_dim = range(np_value.ndim) - np_dim_new = np_dim[0:-3] + np_dim[-1:] + np_dim[-3:-1] + np_dim_new = list(np_dim[0:-3]) + list(np_dim[-1:]) + list(np_dim[-3:-1]) return np.transpose(np_value, np_dim_new) def _NCHWToNHWC(self, np_value): - assert np_value.shape >= 3 + assert len(np_value.shape) >= 3 np_dim = range(np_value.ndim) # move the third-to-last dimension to the last - np_dim_new = np_dim[0:-3] + np_dim[-2:] + np_dim[-3:-2] + np_dim_new = list(np_dim[0:-3]) + list(np_dim[-2:]) + list(np_dim[-3:-2]) return np.transpose(np_value, np_dim_new) def _testBiasNCHW(self, np_inputs, np_bias, use_gpu): ",0,train 9b721a246bef8210d5ee3d9bb4a6e43004aa0f8a,tensorflow/tensorflow,"Support full [b]float16 in embedding_lookup_sparse - This removes a forced cast to float32 and instead outputs the same type as the input. The inner computations are still done in float32 to avoid numerical issues. - This improves performance and makes the op consistent with all other ops that output the same type as the input.",embedding_ops_test.py,"@@ -718,10 +718,7 @@ class EmbeddingLookupSparseTest(test.TestCase): self.assertEqual(embedding_sum.get_shape().as_list(), expected_lookup_result_shape) - if dtype in (dtypes.float16, dtypes.bfloat16): - self.assertEqual(embedding_sum.dtype, dtypes.float32) - else: - self.assertEqual(embedding_sum.dtype, dtype) + self.assertEqual(embedding_sum.dtype, dtype) tf_embedding_sum = embedding_sum.eval(feed_dict=feed_dict) ",0,train 9b721a246bef8210d5ee3d9bb4a6e43004aa0f8a,tensorflow/tensorflow,"Support full [b]float16 in embedding_lookup_sparse - This removes a forced cast to float32 and instead outputs the same type as the input. The inner computations are still done in float32 to avoid numerical issues. - This improves performance and makes the op consistent with all other ops that output the same type as the input.",embedding_ops.py,"@@ -511,18 +511,21 @@ def embedding_lookup_sparse(params, embeddings = embedding_lookup( params, ids, partition_strategy=partition_strategy, max_norm=max_norm) - if embeddings.dtype in (dtypes.float16, dtypes.bfloat16): - embeddings = math_ops.cast(embeddings, dtypes.float32) if not ignore_weights: if segment_ids.dtype != dtypes.int32: segment_ids = math_ops.cast(segment_ids, dtypes.int32) weights = sp_weights.values + embeddings = array_ops.gather(embeddings, idx) + + original_dtype = embeddings.dtype + if embeddings.dtype in (dtypes.float16, dtypes.bfloat16): + # Cast low-precision embeddings to float32 during the computation to + # avoid numerical issues. + embeddings = math_ops.cast(embeddings, dtypes.float32) if weights.dtype != embeddings.dtype: weights = math_ops.cast(weights, embeddings.dtype) - embeddings = array_ops.gather(embeddings, idx) - # Reshape weights to allow broadcast ones_shape = array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0) ones = array_ops.ones(ones_shape, dtype=dtypes.int32) @@ -555,6 +558,8 @@ def embedding_lookup_sparse(params, embeddings = math_ops.divide(embeddings, weight_sum_sqrt, name=name) else: assert False, ""Unrecognized combiner"" + if embeddings.dtype != original_dtype: + embeddings = math_ops.cast(embeddings, original_dtype) else: if segment_ids.dtype not in (dtypes.int32, dtypes.int64): segment_ids = math_ops.cast(segment_ids, dtypes.int32) ",0,train 71c9f15ea2f953fcdb4ff33316547c71930ed4d7,tensorflow/tensorflow,"Delete AutoGraphParseError now that it is no longer used because we are moving to AutoGraphError, InternalError, etc. PiperOrigin-RevId: 231601621",__init__.py,"@@ -49,9 +49,8 @@ from tensorflow.python.autograph.impl.api import to_graph from tensorflow.python.autograph.lang.directives import set_element_type from tensorflow.python.autograph.lang.directives import set_loop_options from tensorflow.python.autograph.lang.special_functions import stack -from tensorflow.python.autograph.lang.special_functions import tensor_list from tensorflow.python.autograph.pyct.errors import AutoGraphError -from tensorflow.python.autograph.pyct.transformer import AutoGraphParseError +from tensorflow.python.autograph.lang.special_functions import tensor_list from tensorflow.python.autograph.utils import ag_logging from tensorflow.python.util.all_util import remove_undocumented @@ -79,7 +78,6 @@ _allowed_symbols = [ 'stack', 'tensor_list', # Exceptions - 'AutoGraphParseError', 'AutoGraphError', # Utilities: to be removed 'utils', ",0,test 71c9f15ea2f953fcdb4ff33316547c71930ed4d7,tensorflow/tensorflow,"Delete AutoGraphParseError now that it is no longer used because we are moving to AutoGraphError, InternalError, etc. PiperOrigin-RevId: 231601621",transformer.py,"@@ -27,18 +27,6 @@ from tensorflow.python.autograph.pyct import pretty_printer from tensorflow.python.autograph.pyct import templates -class AutoGraphParseError(SyntaxError): - """"""Error for graph construction errors from AutoGraph generated code."""""" - - def __init__(self, error, origin_info): - file_path = origin_info.loc.filename - line_number = origin_info.loc.lineno - col_offset = origin_info.loc.col_offset - source_line = origin_info.source_code_line - super(AutoGraphParseError, self).__init__( - error, (file_path, line_number, col_offset, source_line)) - - # TODO(znado): Use namedtuple. class Context(object): """"""Contains information about a source code transformation. ",0,test 647f7ae610f0b1f009b3af70735263598d13e292,tensorflow/tensorflow,"Disable lower using switch and merge in grappler optimization PiperOrigin-RevId: 297023390 Change-Id: I562efaf9226391624789d2a45912731724318f2e",lite.py,"@@ -1031,10 +1031,16 @@ class TFLiteConverter(TFLiteConverterBase): (self.inference_type == constants.INT8 and (post_training_optimize or weight_only_quantize))): try: + # TODO(b/150163103): Merge `disabling lower using switch merge' calls. + # Grappler will also try to lower while loop into switch merge + # representation which is undesired for Ophints, so we simply remove + # those attributes to prevent Grappler from doing so. + graph_def = _convert_to_constants.disable_lower_using_switch_merge( + optimized_graph) # Run function inlining optimization to ensure any models generated # through the from_frozen_graph path have been inlined. optimized_graph = _run_graph_optimizations( - self._graph_def, + graph_def, self._input_tensors, self._output_tensors, config=self._grappler_config([""function""])) ",0,train d5b5e1148ab3ef1817fadb864694ec3139746400,tensorflow/tensorflow,"Add a Keras LSTM+batch_jacobian integration test PiperOrigin-RevId: 335425103 Change-Id: Ife380fe0ff62baaac80dde0a990eb561b7664164",gradients_test.py,"@@ -79,6 +79,34 @@ class GradientsTest(tf.test.TestCase): for g, g_re in zip(grads, grads_re): self.assertAllClose(g, g_re) + def testLSTMBatchJacobian(self): + class HasLSTM(tf.keras.Model): + + def __init__(self): + super(HasLSTM, self).__init__() + self.lstm = tf.keras.layers.LSTM(units=5) + self.dense = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid) + + def call(self, x): + return self.dense(self.lstm(x)) + + m = HasLSTM() + + def jacobian(x): + with tf.GradientTape() as tape: + tape.watch(x) + y = m(x) # pylint: disable=not-callable + return tape.batch_jacobian(y, x) + + inp = tf.nn.l2_normalize(tf.ones([1, 2, 3]), axis=[1, 2]) + eager_result = jacobian(inp) + function_result = tf.function(jacobian)(inp) + self.assertAllClose(eager_result, function_result) + backprop_result, numeric_result = tf.test.compute_gradient( + m, [inp], delta=1e-3) + self.assertAllClose(numeric_result, backprop_result, rtol=1e-2) + self.assertAllClose(tf.reshape(numeric_result, [-1]), + tf.reshape(eager_result, [-1]), rtol=1e-2) if __name__ == ""__main__"": tf.test.main() ",0,train 17c5a9bbd67f34b25c3b5ffaba2f72a54ec0a105,tensorflow/tensorflow,"Change std::max, std::min, std::round to tflite::TfLiteMax, tflite::TfLiteMin, tflite::TfLiteRound Signed-off-by: Kamil Rakoczy Signed-off-by: Karol Gugala ",cppmath.h,"@@ -20,7 +20,7 @@ limitations under the License. namespace tflite { #if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \ - (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO) + (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO) || defined(__ZEPHYR__) #define TF_LITE_GLOBAL_STD_PREFIX #else #define TF_LITE_GLOBAL_STD_PREFIX std ",0,train 17c5a9bbd67f34b25c3b5ffaba2f72a54ec0a105,tensorflow/tensorflow,"Change std::max, std::min, std::round to tflite::TfLiteMax, tflite::TfLiteMin, tflite::TfLiteRound Signed-off-by: Kamil Rakoczy Signed-off-by: Karol Gugala ",max.h,"@@ -0,0 +1,35 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_ + +#include + +namespace tflite { + +#if defined(TF_LITE_USE_GLOBAL_MAX) || defined(__ZEPHYR__) +inline float TfLiteMax(const float& x, const float& y) { + return std::max(x, y); +} +#else +template +inline T TfLiteMax(const T& x, const T& y) { + return std::fmax(x, y); +} +#endif + +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_ ",0,train 17c5a9bbd67f34b25c3b5ffaba2f72a54ec0a105,tensorflow/tensorflow,"Change std::max, std::min, std::round to tflite::TfLiteMax, tflite::TfLiteMin, tflite::TfLiteRound Signed-off-by: Kamil Rakoczy Signed-off-by: Karol Gugala ",min.h,"@@ -0,0 +1,35 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_ + +#include + +namespace tflite { + +#if defined(TF_LITE_USE_GLOBAL_MIN) || defined(__ZEPHYR__) +inline float TfLiteMin(const float& x, const float& y) { + return std::min(x, y); +} +#else +template +inline T TfLiteMin(const T& x, const T& y) { + return std::fmin(x, y); +} +#endif + +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_ ",0,train 17c5a9bbd67f34b25c3b5ffaba2f72a54ec0a105,tensorflow/tensorflow,"Change std::max, std::min, std::round to tflite::TfLiteMax, tflite::TfLiteMin, tflite::TfLiteRound Signed-off-by: Kamil Rakoczy Signed-off-by: Karol Gugala ",reduce.h,"@@ -20,6 +20,8 @@ limitations under the License. #include ""tensorflow/lite/kernels/internal/cppmath.h"" #include ""tensorflow/lite/kernels/internal/quantization_util.h"" #include ""tensorflow/lite/kernels/internal/types.h"" +#include ""tensorflow/lite/kernels/internal/min.h"" +#include ""tensorflow/lite/kernels/internal/max.h"" namespace tflite { @@ -382,10 +384,10 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32 input_zero_point, float float_mean = static_cast(temp_sum[idx]) / static_cast(num_elements_in_axis); float result = - std::min(TfLiteRound(float_mean * scale + bias) + output_zero_point, + TfLiteMin(TfLiteRound(float_mean * scale + bias) + output_zero_point, static_cast(std::numeric_limits::max())); result = - std::max(result, static_cast(std::numeric_limits::min())); + TfLiteMax(result, static_cast(std::numeric_limits::min())); output_data[idx] = static_cast(result); } } ",0,train 17c5a9bbd67f34b25c3b5ffaba2f72a54ec0a105,tensorflow/tensorflow,"Change std::max, std::min, std::round to tflite::TfLiteMax, tflite::TfLiteMin, tflite::TfLiteRound Signed-off-by: Kamil Rakoczy Signed-off-by: Karol Gugala ",resize_nearest_neighbor.h,"@@ -18,6 +18,7 @@ limitations under the License. #include #include ""tensorflow/lite/kernels/internal/types.h"" +#include ""tensorflow/lite/kernels/internal/cppmath.h"" namespace tflite { @@ -34,7 +35,7 @@ inline int32 GetNearestNeighbor(const int input_value, const int32 input_size, const float offset = half_pixel_centers ? 0.5f : 0.0f; int32 output_value = std::min( align_corners - ? static_cast(std::round((input_value + offset) * scale)) + ? static_cast(TfLiteRound((input_value + offset) * scale)) : static_cast(std::floor((input_value + offset) * scale)), input_size - 1); if (half_pixel_centers) { ",0,train 17c5a9bbd67f34b25c3b5ffaba2f72a54ec0a105,tensorflow/tensorflow,"Change std::max, std::min, std::round to tflite::TfLiteMax, tflite::TfLiteMin, tflite::TfLiteRound Signed-off-by: Kamil Rakoczy Signed-off-by: Karol Gugala ",activation_utils.h,"@@ -21,6 +21,8 @@ limitations under the License. #include ""tensorflow/lite/c/builtin_op_data.h"" #include ""tensorflow/lite/kernels/internal/cppmath.h"" +#include ""tensorflow/lite/kernels/internal/max.h"" +#include ""tensorflow/lite/kernels/internal/min.h"" namespace tflite { namespace ops { @@ -32,11 +34,11 @@ inline float ActivationValFloat(TfLiteFusedActivation act, float a) { case kTfLiteActNone: return a; case kTfLiteActRelu: - return std::max(0.0f, a); + return TfLiteMax(0.0f, a); case kTfLiteActRelu1: - return std::max(-1.0f, std::min(a, 1.0f)); + return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f)); case kTfLiteActRelu6: - return std::max(0.0f, std::min(a, 6.0f)); + return TfLiteMax(0.0f, TfLiteMin(a, 6.0f)); case kTfLiteActTanh: return std::tanh(a); case kTfLiteActSignBit: ",0,train d7503555753420aba3a4f9010bb5f7ed13d6c9ca,tensorflow/tensorflow,"Update GraphDef version to 401. PiperOrigin-RevId: 311492238 Change-Id: I93cb2eda8127d2ca0504ba2e06911a994c190347",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 400 // Updated: 2020/5/13 +#define TF_GRAPH_DEF_VERSION 401 // Updated: 2020/5/14 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,test b4a5927472f72b4c01683d14d76e3957336dc8c4,tensorflow/tensorflow,Update pfor.py,pfor.py,"@@ -1593,7 +1593,7 @@ class PFor: else: converter = _pfor_converter_registry.get(y_op.type, None) if converter is None: - root_cause = (f""there is no register converter."") + root_cause = (f""there is no register converter for this op."") has_variant_outputs = any(x.dtype == dtypes.variant for x in y_op.outputs) has_vectorized_variant_inputs = any( ",0,train 010599cb5005ec14c1021adec3079d1504c986a0,tensorflow/tensorflow,"Update GraphDef version to 47. PiperOrigin-RevId: 250029076",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 46 // Updated: 2019/5/25 +#define TF_GRAPH_DEF_VERSION 47 // Updated: 2019/5/26 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,train 63ed3e6b5ae5ff2f8f1f4e93201a07995ebf7d7f,tensorflow/tensorflow,"Fix Model.fit for TPU async eager when catching OutOfRange errors. PiperOrigin-RevId: 297377388 Change-Id: Id7a32962bb5f9451bb5d039dbe9e60d8287c64f5",training.py,"@@ -769,7 +769,11 @@ class Model(network.Network, version_utils.ModelVersionSelector): step_num=step, batch_size=batch_size): callbacks.on_train_batch_begin(step) - logs = train_function(iterator) + tmp_logs = train_function(iterator) + # Catch possible OutOfRangeError here. + # TODO(b/150292341): Allow multiple async steps. + context.async_wait() + logs = tmp_logs callbacks.on_train_batch_end(step, logs) epoch_logs = {m.name: m.result() for m in self.metrics} @@ -996,7 +1000,9 @@ class Model(network.Network, version_utils.ModelVersionSelector): graph_type='test', step_num=step): callbacks.on_test_batch_begin(step) - logs = test_function(iterator) + tmp_logs = test_function(iterator) + context.async_wait() # Possible OutOfRangeError here. + logs = tmp_logs callbacks.on_test_batch_end(step, logs) callbacks.on_test_end() @@ -1176,7 +1182,9 @@ class Model(network.Network, version_utils.ModelVersionSelector): with data_handler.catch_stop_iteration(): for step in data_handler.steps(): callbacks.on_predict_batch_begin(step) - batch_outputs = predict_function(iterator) + tmp_batch_outputs = predict_function(iterator) + context.async_wait() # Possible OutOfRangeError here. + batch_outputs = tmp_batch_outputs if outputs is None: outputs = nest.map_structure(lambda batch_output: [batch_output], batch_outputs) ",0,train 8bf282a345ca80f9e6d154df3bc7ac7f12a6457d,tensorflow/tensorflow,"Fixes tf.bool.as_numpy_dtype to return np.bool_ instead of np.bool (which is the same as Python `bool`). PiperOrigin-RevId: 353340422 Change-Id: Ie2a243a5ab2d1372308d63b0ec2c34b9c3f0084c",dtypes.py,"@@ -518,7 +518,7 @@ _TF_TO_NP = { types_pb2.DT_INT64: np.int64, types_pb2.DT_BOOL: - np.bool, + np.bool_, types_pb2.DT_QINT8: _np_qint8, types_pb2.DT_QUINT8: ",0,train 34911101beb2e302b4afcaff79310845998e3530,tensorflow/tensorflow,"Fix error message formatting. PiperOrigin-RevId: 208911623",conv_grad_ops.cc,"@@ -63,7 +63,7 @@ Status ConvBackpropExtractAndVerifyDimensionV2( return errors::InvalidArgument( label, "": Size of out_backprop doesn't match computed: "", ""actual = "", dim->output_size, "", computed = "", out_size, - ""spatial_dim: "", spatial_dim, "" input: "", dim->input_size, + "" spatial_dim: "", spatial_dim, "" input: "", dim->input_size, "" filter: "", dim->filter_size, "" output: "", dim->output_size, "" stride: "", dim->stride, "" dilation: "", dim->dilation); } ",0,train 7985e520910962b96d5f71f77d3f4ead1cb24f75,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2019-07-04 PiperOrigin-RevId: 256515094",compat.py,"@@ -27,7 +27,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 7, 3) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 7, 4) @tf_export(""compat.forward_compatible"") ",0,test fc9bde9c0675116490d204c21f81c764691503f9,tensorflow/tensorflow,Only register the _Arg and _Retval kernel for POD types on sycl,function_ops.cc,"@@ -87,8 +87,28 @@ class RetvalOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name(""_Arg"").Device(DEVICE_CPU), ArgOp); REGISTER_KERNEL_BUILDER(Name(""_Retval"").Device(DEVICE_CPU), RetvalOp); -REGISTER_KERNEL_BUILDER(Name(""_Arg"").Device(DEVICE_SYCL), ArgOp); -REGISTER_KERNEL_BUILDER(Name(""_Retval"").Device(DEVICE_SYCL), RetvalOp); +#if TENSORFLOW_USE_SYCL +#define REGISTER(type) \ + REGISTER_KERNEL_BUILDER( \ + Name(""_Arg"").Device(DEVICE_SYCL).TypeConstraint(""T""), ArgOp); + TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER) + TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name(""_Arg"") + .Device(DEVICE_GPU) + .HostMemory(""output"") + .TypeConstraint(""T""), + ArgOp); +#undef REGISTER +#define REGISTER(type) \ + REGISTER_KERNEL_BUILDER( \ + Name(""_Retval"").Device(DEVICE_SYCL).TypeConstraint(""T""), RetvalOp); + TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER) + TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name(""_Retval"") + .Device(DEVICE_GPU) + .HostMemory(""input"") + .TypeConstraint(""T""), + RetvalOp); +#undef REGISTER +#endif #define REGISTER(type) \ REGISTER_KERNEL_BUILDER( \ ",0,train e22785521834aac1b0b0fb7ccc4458c3ee5f8e62,tensorflow/tensorflow,"Internal change PiperOrigin-RevId: 402966641 Change-Id: I4b8bebb3c16a81b1716b6feaaff442c2083a7948",mhlo_to_lhlo_with_xla.cc,"@@ -1662,11 +1662,6 @@ Status HloToLhloModule(const BufferAssignment& assignment, module->setLoc(mlir::NameLoc::get( mlir::Identifier::get(hlo_module.name(), module.getContext()))); - // Store the HloModule's unique_id in the MLIR module. - Builder builder(module.getContext()); - module->setAttr(""mhlo.unique_id"", - builder.getI64IntegerAttr(hlo_module.unique_id())); - const HloComputation* computation = hlo_module.entry_computation(); LhloDialectEmitter emitter(assignment, *computation, module); ",0,test e22785521834aac1b0b0fb7ccc4458c3ee5f8e62,tensorflow/tensorflow,"Internal change PiperOrigin-RevId: 402966641 Change-Id: I4b8bebb3c16a81b1716b6feaaff442c2083a7948",cpu_executable.cc,"@@ -52,15 +52,6 @@ limitations under the License. namespace xla { namespace cpu { -static std::string ModuleUniqueName(absl::string_view module_name, - const HloModule* module) { - std::string unique_id; - if (module != nullptr) { - unique_id = absl::StrCat(""module."", module->unique_id(), "".""); - } - return absl::StrCat(unique_id, module_name); -} - CpuExecutable::CpuExecutable( std::unique_ptr jit, std::unique_ptr assignment, @@ -75,9 +66,8 @@ CpuExecutable::CpuExecutable( if (assignment_) { buffer_assignment_.reset(new BufferAssignmentProto(assignment_->ToProto())); } - XlaDebugInfoManager::Get()->RegisterModule( - ModuleUniqueName(module_name_, shared_module().get()), shared_module(), - buffer_assignment_); + XlaDebugInfoManager::Get()->RegisterModule(module_name_, shared_module(), + buffer_assignment_); // Resolve symbols in the constructor rather than at execution time to avoid // races because FindSymbol is not thread safe. @@ -95,9 +85,8 @@ CpuExecutable::CpuExecutable( } CpuExecutable::~CpuExecutable() { - XlaDebugInfoManager::Get()->UnregisterModule( - ModuleUniqueName(module_name_, shared_module().get()), shared_module(), - buffer_assignment_); + XlaDebugInfoManager::Get()->UnregisterModule(module_name_, shared_module(), + buffer_assignment_); } static StatusOr MemoryForAllocation( ",0,test e22785521834aac1b0b0fb7ccc4458c3ee5f8e62,tensorflow/tensorflow,"Internal change PiperOrigin-RevId: 402966641 Change-Id: I4b8bebb3c16a81b1716b6feaaff442c2083a7948",gpu_executable.cc,"@@ -78,15 +78,6 @@ bool NeedsAsyncCommsStream(Thunk& thunk) { } } -static std::string ModuleUniqueName(absl::string_view module_name, - const HloModule* module) { - std::string unique_id; - if (module != nullptr) { - unique_id = absl::StrCat(""module."", module->unique_id(), "".""); - } - return absl::StrCat(unique_id, module_name); -} - } // namespace void GpuExecutable::BefBufferDeleter::operator()(uint8_t* ptr) const { @@ -116,15 +107,13 @@ GpuExecutable::GpuExecutable(GpuExecutable::Params params) entry_computation_profile_index_(params.entry_computation_profile_index), constants_(std::move(params.constants)), output_info_(std::move(params.output_info)) { - XlaDebugInfoManager::Get()->RegisterModule( - ModuleUniqueName(module_name_, shared_module().get()), shared_module(), - debug_buffer_assignment_); + XlaDebugInfoManager::Get()->RegisterModule(module_name_, shared_module(), + debug_buffer_assignment_); } GpuExecutable::~GpuExecutable() { - XlaDebugInfoManager::Get()->UnregisterModule( - ModuleUniqueName(module_name_, shared_module().get()), shared_module(), - debug_buffer_assignment_); + XlaDebugInfoManager::Get()->UnregisterModule(module_name_, shared_module(), + debug_buffer_assignment_); { // We could have issued host->device mem copies in ResolveConstantGlobals. ",0,test e22785521834aac1b0b0fb7ccc4458c3ee5f8e62,tensorflow/tensorflow,"Internal change PiperOrigin-RevId: 402966641 Change-Id: I4b8bebb3c16a81b1716b6feaaff442c2083a7948",ir_emitter_unnested.cc,"@@ -5671,19 +5671,10 @@ Status IrEmitterUnnested::EmitLmhloRegion(mlir::Region* region) { Thunk::ThunkInfo IrEmitterUnnested::GetThunkInfo(mlir::Operation* op) { auto module = op->getParentOfType(); - // Include the HloModule's unique_id in the thunk's module name so that xprof - // shows different modules differently, addressing b/202415436#comment24. - // xprof calls this the ""program_id"". - std::string unique_id_str; - if (auto unique_id_attr = - module->getAttrOfType(""mhlo.unique_id"")) { - unique_id_str = absl::StrFormat("",program_id=%d"", - unique_id_attr.getValue().getZExtValue()); - } Thunk::ThunkInfo thunk_info; thunk_info.profile_annotation = absl::StrFormat( - ""Thunk:#hlo_op=%s,hlo_module=%s%s#"", mlir::GetNameFromLoc(op->getLoc()), - mlir::GetNameFromLoc(module->getLoc()), unique_id_str); + ""Thunk:#hlo_op=%s,hlo_module=%s#"", mlir::GetNameFromLoc(op->getLoc()), + mlir::GetNameFromLoc(module->getLoc())); return thunk_info; } ",0,test 8a78a2973eee143fe8a255761b214ebe0687b585,tensorflow/tensorflow,merge and add more erase tests,map_ops_test.py,"@@ -30,7 +30,7 @@ from tensorflow.python.platform import test @test_util.run_all_in_graph_and_eager_modes class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): - + ''' def testEmptyTensorMapSize(self): m = map_ops.empty_tensor_map() s = map_ops.tensor_map_size(m) @@ -105,82 +105,78 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): self.assertAllEqual(b2, False) def testHasKeyLookup(self): - with self.test_session(): - m = map_ops.empty_tensor_map() - k = constant_op.constant(1.0) - k2 = constant_op.constant(2.0) - v = constant_op.constant(2.0) - m = map_ops.tensor_map_insert(m, k, v) + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + k2 = constant_op.constant(2.0) + v = constant_op.constant(2.0) + m = map_ops.tensor_map_insert(m, k, v) - default_value = array_ops.zeros_like(v) - l = control_flow_ops.cond(map_ops.tensor_map_has_key(m, k), + default_value = array_ops.zeros_like(v) + l = control_flow_ops.cond(map_ops.tensor_map_has_key(m, k), + lambda: map_ops.tensor_map_lookup(m, k, dtypes.float32), + lambda: default_value) + l2 = control_flow_ops.cond(map_ops.tensor_map_has_key(m, k2), lambda: map_ops.tensor_map_lookup(m, k, dtypes.float32), lambda: default_value) - l2 = control_flow_ops.cond(map_ops.tensor_map_has_key(m, k2), - lambda: map_ops.tensor_map_lookup(m, k, dtypes.float32), - lambda: default_value) - self.assertAllClose(l, v) - self.assertAllClose(l2, default_value) - + self.assertAllClose(l, v) + self.assertAllClose(l2, default_value) +''' def testInsertLookupGrad(self): with backprop.GradientTape() as tape: m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) - v = constant_op.constant(2.0) + v = constant_op.constant(11.0) tape.watch(v) m = map_ops.tensor_map_insert(m, k, v) l = map_ops.tensor_map_lookup(m, k, dtypes.float32) l *= 5 g = tape.gradient(l, v) - self.assertAllClose(g, 5) + self.assertAllEqual(g, 5) def testMultipleInsertLookupGrad(self): with backprop.GradientTape(persistent=True) as tape: m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) - v = constant_op.constant(2.0) - k2 = constant_op.constant(12.0) - v2 = constant_op.constant(22.0) - k3 = constant_op.constant(13.0) - v3 = constant_op.constant(23.0) + k2 = constant_op.constant(2.0) + k3 = constant_op.constant(3.0) + v = constant_op.constant(11.0) + v2 = constant_op.constant(12.0) + v3 = constant_op.constant(13.0) tape.watch(v) tape.watch(v2) tape.watch(v3) m = map_ops.tensor_map_insert(m, k, v) m = map_ops.tensor_map_insert(m, k2, v2) m = map_ops.tensor_map_insert(m, k3, v3) - l = map_ops.tensor_map_lookup(m, k, v.dtype) l2 = map_ops.tensor_map_lookup(m, k2, v2.dtype) l3 = map_ops.tensor_map_lookup(m, k3, v3.dtype) g = tape.gradient(l * 5, v) g2 = tape.gradient(l2 * 6, v2) g3 = tape.gradient(l3 * 7, v3) - self.assertAllClose(g, 5) - self.assertAllClose(g2, 6) - self.assertAllClose(g3, 7) - - def testSameKeyInsertLookupGrad(self): + self.assertAllEqual(g, 5) + self.assertAllEqual(g2, 6) + self.assertAllEqual(g3, 7) + + def testInsertLookupComposeGrad(self): with backprop.GradientTape(persistent=True) as tape: m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) - v = constant_op.constant(2.0) - v2 = constant_op.constant(22.0) + k2 = constant_op.constant(2.0) + v = constant_op.constant(11.0) tape.watch(v) - tape.watch(v2) m = map_ops.tensor_map_insert(m, k, v) - m = map_ops.tensor_map_insert(m, k, v2) - l = map_ops.tensor_map_lookup(m, k, v2.dtype) - g = tape.gradient(l * 5, v) - g2 = tape.gradient(l * 5, v2) - self.assertAllClose(g, array_ops.zeros_like(v)) - self.assertAllClose(g2, 5) + l = map_ops.tensor_map_lookup(m, k, v.dtype) + m = map_ops.tensor_map_insert(m, k2, l) + l2 = map_ops.tensor_map_lookup(m, k2, l.dtype) + g = tape.gradient(l2 * 5, v) + self.assertAllEqual(g, 5) - def testSameKeyAlternatingInsertLookupGrad(self): + def testReplaceLookupGrad(self): with backprop.GradientTape(persistent=True) as tape: m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) - v = constant_op.constant(2.0) + v = constant_op.constant(11.0) v2 = constant_op.constant(22.0) tape.watch(v) tape.watch(v2) @@ -200,8 +196,8 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testLookupAddGrad(self): with backprop.GradientTape(persistent=True) as tape: k = constant_op.constant(1.0) - v = constant_op.constant(2.0) - k2 = constant_op.constant(12.0) + k2 = constant_op.constant(2.0) + v = constant_op.constant(11.0) v2 = constant_op.constant(22.0) tape.watch(v) tape.watch(v2) @@ -217,14 +213,32 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): g3 = tape.gradient(l1 + l2 * 4, v2) self.assertAllEqual(g3, 4) - def testEraseGrad(self): + def testLookupMultiplyGrad(self): with backprop.GradientTape(persistent=True) as tape: - m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) - v = constant_op.constant(2.0) + k2 = constant_op.constant(2.0) + v = constant_op.constant(11.0) + v2 = constant_op.constant(22.0) tape.watch(v) - k2 = constant_op.constant(12.0) + tape.watch(v2) + m = map_ops.empty_tensor_map() + m = map_ops.tensor_map_insert(m, k, v) + m = map_ops.tensor_map_insert(m, k2, v2) + l1 = map_ops.tensor_map_lookup(m, k, v.dtype) + l2 = map_ops.tensor_map_lookup(m, k2, v2.dtype) + g = tape.gradient(l1 * l2, [v, v2]) + self.assertAllClose(g, [v2, v]) + g2 = tape.gradient(l1 * l1, v) + self.assertAllClose(g2, 2*v) + + def testEraseSecondGrad(self): + with backprop.GradientTape(persistent=True) as tape: + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + k2 = constant_op.constant(2.0) + v = constant_op.constant(11.0) v2 = constant_op.constant(22.0) + tape.watch(v) tape.watch(v2) m = map_ops.tensor_map_insert(m, k, v) m = map_ops.tensor_map_insert(m, k2, v2) @@ -236,7 +250,49 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): self.assertAllEqual(g, 5) g2 = tape.gradient(e * 6, v2) self.assertAllEqual(g2, 6) + + def testEraseFirstGrad(self): + with backprop.GradientTape(persistent=True) as tape: + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + k2 = constant_op.constant(2.0) + v = constant_op.constant(11.0) + v2 = constant_op.constant(22.0) + tape.watch(v) + tape.watch(v2) + m = map_ops.tensor_map_insert(m, k, v) + m = map_ops.tensor_map_insert(m, k2, v2) + m, e = map_ops.tensor_map_erase(m, k, v.dtype) + l = map_ops.tensor_map_lookup(m, k2, v2.dtype) + self.assertAllClose(l, v2) + self.assertAllClose(e, v) + g = tape.gradient(l * 5, v2) + self.assertAllEqual(g, 5) + g2 = tape.gradient(e * 6, v) + self.assertAllEqual(g2, 6) + m, e2 = map_ops.tensor_map_erase(m, k2, v2.dtype) + g3 = tape.gradient(e2 * 7, v2) + def testEraseComposedGrad(self): + with backprop.GradientTape(persistent=True) as tape: + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + k2 = constant_op.constant(2.0) + v = constant_op.constant(11.0) + v2 = constant_op.constant(22.0) + tape.watch(v) + tape.watch(v2) + m = map_ops.tensor_map_insert(m, k, v) + m, e = map_ops.tensor_map_erase(m, k, v.dtype) + m = map_ops.tensor_map_insert(m, k2, e) + l = map_ops.tensor_map_lookup(m, k2, e.dtype) + self.assertAllClose(e, v) + self.assertAllClose(l, e) + g = tape.gradient(l * 5, v) + self.assertAllEqual(g, 5) + g2 = tape.gradient(e * 6, v) + self.assertAllEqual(g2, 6) + def testStringKeyGrad(self): with backprop.GradientTape(persistent=True) as tape: m = map_ops.empty_tensor_map() @@ -312,20 +368,6 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): g2 = tape.gradient(l + l2, v2) self.assertAllEqual(g2, 1) - def testReplaceGrad(self): - with backprop.GradientTape(persistent=True) as tape: - m = map_ops.empty_tensor_map() - k = constant_op.constant(1.0) - v = constant_op.constant(2.0) - v2 = constant_op.constant(3.0) - tape.watch(v) - tape.watch(v2) - m = map_ops.tensor_map_insert(m, k, v) - l = map_ops.tensor_map_lookup(m, k, v.dtype) - m = map_ops.tensor_map_insert(m, k, v2) - l2 = map_ops.tensor_map_lookup(m, k, v2.dtype) - g = tape.gradient(l + l2, v) - self.assertAllEqual(g, 1) if __name__ == '__main__': test.main() ",0,train 477cfa2aaa7a65c603b4e04df928ec45a1e0d4ca,tensorflow/tensorflow,"Let CategoryEncoding error out for negative values and gives better error message. PiperOrigin-RevId: 336751008 Change-Id: If7fb43127c2587b7658e8aed63331413ac932779",category_encoding.py,"@@ -298,12 +298,18 @@ class CategoryEncoding(base_preprocessing_layer.CombinerPreprocessingLayer): binary_output = (self._output_mode == BINARY) if isinstance(inputs, sparse_tensor.SparseTensor): max_value = math_ops.reduce_max(inputs.values) + min_value = math_ops.reduce_min(inputs.values) else: max_value = math_ops.reduce_max(inputs) - condition = math_ops.greater_equal( - math_ops.cast(out_depth, max_value.dtype), max_value) + min_value = math_ops.reduce_min(inputs) + condition = math_ops.logical_and( + math_ops.greater_equal( + math_ops.cast(out_depth, max_value.dtype), max_value), + math_ops.greater_equal( + min_value, math_ops.cast(0, min_value.dtype))) control_flow_ops.Assert( - condition, [""Input must be less than max_token {}"".format(out_depth)]) + condition, [""Input values must be in the range 0 <= values < max_tokens"" + "" with max_tokens={}"".format(out_depth)]) if self._sparse: result = bincount_ops.sparse_bincount( inputs, ",0,test 477cfa2aaa7a65c603b4e04df928ec45a1e0d4ca,tensorflow/tensorflow,"Let CategoryEncoding error out for negative values and gives better error message. PiperOrigin-RevId: 336751008 Change-Id: If7fb43127c2587b7658e8aed63331413ac932779",category_encoding_test.py,"@@ -277,8 +277,23 @@ class CategoryEncodingInputTest(keras_parameterized.TestCase, int_data = encoder_layer(input_data) self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) model = keras.Model(inputs=input_data, outputs=int_data) - with self.assertRaisesRegex(errors.InvalidArgumentError, - "".*must be less than max_token 3""): + with self.assertRaisesRegex( + errors.InvalidArgumentError, + "".*must be in the range 0 <= values < max_tokens.*""): + _ = model.predict(input_array, steps=1) + + def test_dense_negative(self): + input_array = constant_op.constant([[1, 2, 0], [2, 2, -1]]) + max_tokens = 3 + expected_output_shape = [None, max_tokens] + encoder_layer = get_layer_class()(max_tokens) + input_data = keras.Input(shape=(3,), dtype=dtypes.int32) + int_data = encoder_layer(input_data) + self.assertAllEqual(expected_output_shape, int_data.shape.as_list()) + model = keras.Model(inputs=input_data, outputs=int_data) + with self.assertRaisesRegex( + errors.InvalidArgumentError, + "".*must be in the range 0 <= values < max_tokens.*""): _ = model.predict(input_array, steps=1) ",0,test e7c7116eabbcc6889da34d2ba0bca4ffe5639d84,tensorflow/tensorflow,"Core RNNCell implementations now use state_is_tuple=True by default This is part of the deprecation process for non-tuple LSTM and MultiRNNCell states. Change: 129507912",models.py,"@@ -19,6 +19,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools + from tensorflow.contrib import rnn as contrib_rnn from tensorflow.contrib.learn.python.learn.ops import autoencoder_ops from tensorflow.contrib.learn.python.learn.ops import dnn_ops @@ -378,7 +380,8 @@ def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn, bidirectional, elif cell_type == 'gru': cell_fn = nn.rnn_cell.GRUCell elif cell_type == 'lstm': - cell_fn = nn.rnn_cell.BasicLSTMCell + cell_fn = functools.partial( + nn.rnn_cell.BasicLSTMCell, state_is_tuple=False) else: raise ValueError('cell_type {} is not supported. '.format(cell_type)) # TODO: state_is_tuple=False is deprecated @@ -394,9 +397,11 @@ def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn, bidirectional, bw_cell = contrib_rnn.AttentionCellWrapper( fw_cell, attn_length=attn_length, attn_size=attn_size, attn_vec_size=attn_vec_size, state_is_tuple=False) - rnn_fw_cell = nn.rnn_cell.MultiRNNCell([fw_cell] * num_layers) + rnn_fw_cell = nn.rnn_cell.MultiRNNCell([fw_cell] * num_layers, + state_is_tuple=False) # backward direction cell - rnn_bw_cell = nn.rnn_cell.MultiRNNCell([bw_cell] * num_layers) + rnn_bw_cell = nn.rnn_cell.MultiRNNCell([bw_cell] * num_layers, + state_is_tuple=False) # pylint: disable=unexpected-keyword-arg, no-value-for-parameter _, encoding = bidirectional_rnn(rnn_fw_cell, rnn_bw_cell, @@ -411,7 +416,8 @@ def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn, bidirectional, rnn_cell = contrib_rnn.AttentionCellWrapper( rnn_cell, attn_length=attn_length, attn_size=attn_size, attn_vec_size=attn_vec_size, state_is_tuple=False) - cell = nn.rnn_cell.MultiRNNCell([rnn_cell] * num_layers) + cell = nn.rnn_cell.MultiRNNCell([rnn_cell] * num_layers, + state_is_tuple=False) _, encoding = nn.rnn(cell, x, dtype=dtypes.float32, ",0,train 55ad623ecaf12de0260008395afb061cdf75f55d,tensorflow/tensorflow,"Automated rollback of change 152465346 Change: 152465844",math_ops.cc,"@@ -1256,8 +1256,6 @@ REGISTER_OP(""ArgMax"") .Doc(R""doc( Returns the index with the largest value across dimensions of a tensor. -Note: in case of ties the identity of the return value is not guaranteed. - dimension: int32, 0 <= dimension < rank(input). Describes which dimension of the input Tensor to reduce across. For vectors, use dimension = 0. )doc""); @@ -1272,8 +1270,6 @@ REGISTER_OP(""ArgMin"") .Doc(R""doc( Returns the index with the smallest value across dimensions of a tensor. -Note: in case of ties the identity of the return value is not guaranteed. - dimension: int32, 0 <= dimension < rank(input). Describes which dimension of the input Tensor to reduce across. For vectors, use dimension = 0. )doc""); ",0,train b07f8211409f2b2e46ab539291e824f2b7865885,tensorflow/tensorflow,remove unused sparse_ops import,nn_grad.py,"@@ -27,7 +27,6 @@ from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops -from tensorflow.python.ops import sparse_ops @ops.RegisterGradient(""Conv2DBackpropInput"") ",0,train 52aeafdf04af9f95500067dc353fd80728032b63,tensorflow/tensorflow,documenting that init_op will not be run when loading from checkpoint (#18051),session_manager.py,"@@ -229,10 +229,14 @@ class SessionManager(object): up to `max_wait_secs`, for recovery to succeed. If the model cannot be recovered successfully then it is initialized by - either running the provided `init_op`, or calling the provided `init_fn`. - The local_init_op is also run after init_op and init_fn, regardless of + running the `init_op` and calling `init_fn` if they are provided. + The `local_init_op` is also run after init_op and init_fn, regardless of whether the model was recovered successfully, but only if - ready_for_local_init_op passes. + `ready_for_local_init_op` passes. + + If the model is recovered from a checkpoint it is assumed that all + global variables have been initialized, in particular neither `init_op` + nor `init_fn` will be executed. It is an error if the model cannot be recovered and no `init_op` or `init_fn` or `local_init_op` are passed. ",0,train cdc13381936ce47a06318df8ec7ace48330940f3,tensorflow/tensorflow,"Add gemmlowp label for SquaredDifference and Sum ops PiperOrigin-RevId: 258267779",reduce.cc,"@@ -523,6 +523,7 @@ TfLiteStatus EvalGeneric(TfLiteContext* context, TfLiteNode* node) { TfLiteStatus EvalSum(TfLiteContext* context, TfLiteNode* node) { OpContext op_context(context, node); + gemmlowp::ScopedProfilingLabel label(""Sum""); const auto& input = op_context.input; const auto& output = op_context.output; const bool same_scale = ",0,train cdc13381936ce47a06318df8ec7ace48330940f3,tensorflow/tensorflow,"Add gemmlowp label for SquaredDifference and Sum ops PiperOrigin-RevId: 258267779",squared_difference.cc,"@@ -95,6 +95,7 @@ void EvalSquaredDifference(TfLiteContext* context, TfLiteNode* node, TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { OpData* data = reinterpret_cast(node->user_data); + gemmlowp::ScopedProfilingLabel label(""SquaredDifference""); const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); ",0,train 48b24214dd5da842bd00414b46f3e46319c777ee,tensorflow/tensorflow,"Update model in keras dist strat learning phase test to return consistent values. PiperOrigin-RevId: 216461637",keras_test.py,"@@ -592,33 +592,37 @@ class TestDistributionStrategyWithDatasets(test.TestCase, # meaningful values. Currently we don't pass the learning phase if the # Lambda layer uses the learning phase. with self.cached_session(): - x = keras.layers.Input(shape=(16,), name='input') - y = keras.layers.Dense(16)(x) + x = keras.layers.Input(shape=(1,), name='input') + y = keras.layers.Dense(1, kernel_initializer='ones')(x) z = keras.layers.Dropout(0.9999)(y) model = keras.Model(x, z) + initial_weights = model.get_weights() optimizer = gradient_descent.GradientDescentOptimizer(0.005) loss = 'mse' metrics = ['acc'] - strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', - '/device:CPU:0']) + strategy = mirrored_strategy.MirroredStrategy( + ['/device:GPU:0', '/device:GPU:1']) model.compile(optimizer, loss, metrics=metrics, distribute=strategy) - inputs = np.random.rand(10, 16) - targets = np.ones((10, 16), dtype=np.float32) + inputs = np.ones((10, 1), dtype=np.float32) + targets = np.ones((10, 1), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(8) - - hist = model.fit(dataset, epochs=5, steps_per_epoch=20, verbose=1) - self.assertEqual(hist.history['acc'][0], 1) + dataset = dataset.repeat().batch(8) + hist = model.fit(dataset, epochs=1, steps_per_epoch=20, verbose=1) + self.assertAlmostEqual(hist.history['acc'][0], 0, 0) + model.set_weights(initial_weights) evaluate_output = model.evaluate(dataset, steps=20) - self.assertEqual(evaluate_output[1], 0) - - predict_output = model.predict(dataset, steps=1) - self.assertNotEqual(np.mean(predict_output), 0) + self.assertAlmostEqual(evaluate_output[1], 1, 0) + + inputs = np.ones((10, 1), dtype=np.float32) + predict_dataset = dataset_ops.Dataset.from_tensor_slices(inputs) + predict_dataset = predict_dataset.repeat().batch(5) + output = model.predict(predict_dataset, steps=10) + ref_output = np.ones((50, 1), dtype=np.float32) + self.assertArrayNear(output[0], ref_output, 1e-1) class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase): ",0,train b5bfebf6669982ccf818c3e9a69197ceca9dc456,tensorflow/tensorflow,"Comment style nitpick for StrategyExtendV2. PiperOrigin-RevId: 282941111 Change-Id: Ide992cfe95dd02f5ecb1627c758326d577c49f9e",distribute_lib.py,"@@ -1164,8 +1164,8 @@ class StrategyExtendedV2(object): *Replica context vs. Cross-replica context* - _replica context_ is when we are in some function that is being called once - for each replica. Otherwise we are in cross-replica context, which is + A _replica context_ applies when we are in some function that is being called + once for each replica. Otherwise we are in cross-replica context, which is useful for calling `tf.distribute.Strategy` methods which operate across the replicas (like `reduce_to()`). By default you start in a replica context (the ""default single replica context"") and then some methods can switch you ",0,train 119161fed5d4c2ed38895aa19bcfc5893bd58995,tensorflow/tensorflow,"Better handling when `operators` is `None` instead of `[]`. PiperOrigin-RevId: 368179066 Change-Id: I337a3cead57ba2b67b24499a750c85a1899e478f",visualize.py,"@@ -293,7 +293,7 @@ def GenerateGraph(subgraph_idx, g, opcode_mapper): second = {} pixel_mult = 200 # TODO(aselle): multiplier for initial placement width_mult = 170 # TODO(aselle): multiplier for initial placement - for op_index, op in enumerate(g[""operators""]): + for op_index, op in enumerate(g[""operators""] or []): for tensor_input_position, tensor_index in enumerate(op[""inputs""]): if tensor_index not in first: @@ -487,8 +487,9 @@ def CreateHtmlFile(tflite_input, html_output): html += GenerateTableHtml(g[""tensors""], tensor_keys_to_display) # Print the ops. - html += ""

Ops

\n"" - html += GenerateTableHtml(g[""operators""], op_keys_to_display) + if g[""operators""]: + html += ""

Ops

\n"" + html += GenerateTableHtml(g[""operators""], op_keys_to_display) # Visual graph. html += ""\n"" % ( ",0,train 085a8afe4e67c036d2e21c1c178d32e5a6b5b401,tensorflow/tensorflow,"Allow MLIR bridge to run in the fallback mode if user sets the mlir_bridge_safe_mode. PiperOrigin-RevId: 362441574 Change-Id: Iab408822ab29c216df061f1fe08a6acf7b78a459",mlir_bridge_rollout_policy.h,"@@ -35,6 +35,9 @@ enum class MlirBridgeRolloutPolicy { // features in the model, the MLIR bridge should be run. If the MLIR Bridge // errors, the fallback path should be used whenever possible. kEnabledAfterGraphAnalysis, + // The bridge was fallback enabled in a safe mode and passed all graph + // analysis checks. + kEnabledAfterGraphAnalysisSafeModeFallback }; // Analyzes the user requested policy as well as the contents of the graph and ",0,train 085a8afe4e67c036d2e21c1c178d32e5a6b5b401,tensorflow/tensorflow,"Allow MLIR bridge to run in the fallback mode if user sets the mlir_bridge_safe_mode. PiperOrigin-RevId: 362441574 Change-Id: Iab408822ab29c216df061f1fe08a6acf7b78a459",mlir_bridge_pass.cc,"@@ -103,6 +103,8 @@ MlirOptimizationPassState MlirBridgePass::GetPassState( return MlirOptimizationPassState::Enabled; case MlirBridgeRolloutPolicy::kEnabledAfterGraphAnalysis: return MlirOptimizationPassState::ShadowEnabled; + case MlirBridgeRolloutPolicy::kEnabledAfterGraphAnalysisSafeModeFallback: + return MlirOptimizationPassState::FallbackEnabled; case MlirBridgeRolloutPolicy::kDisabledByUser: case MlirBridgeRolloutPolicy::kDisabledAfterGraphAnalysis: return MlirOptimizationPassState::Disabled; @@ -173,9 +175,16 @@ MlirOptimizationPassState MlirBridgeV1CompatPass::GetPassState( MlirBridgeRolloutPolicy policy = GetMlirBridgeRolloutPolicy( graph, /*function_library=*/&function_library, config_proto, /*uses_uninitialized_resource_args=*/false); - return (policy == MlirBridgeRolloutPolicy::kEnabledByUser) - ? MlirOptimizationPassState::Enabled - : MlirOptimizationPassState::Disabled; + switch (policy) { + case MlirBridgeRolloutPolicy::kEnabledByUser: + return MlirOptimizationPassState::Enabled; + case MlirBridgeRolloutPolicy::kEnabledAfterGraphAnalysisSafeModeFallback: + return MlirOptimizationPassState::FallbackEnabled; + case MlirBridgeRolloutPolicy::kEnabledAfterGraphAnalysis: + case MlirBridgeRolloutPolicy::kDisabledByUser: + case MlirBridgeRolloutPolicy::kDisabledAfterGraphAnalysis: + return MlirOptimizationPassState::Disabled; + } } Status MlirBridgeV1CompatPass::Run(const GraphOptimizationPassOptions& options, ",0,train e424ba4a6d6e2c10f78f7f899de3c5d8dfb2e8c9,tensorflow/tensorflow,"Track symbolic shapes through shapeN operations PiperOrigin-RevId: 177029912",shape_refiner.cc,"@@ -707,6 +707,8 @@ Status ShapeRefiner::ConstantPartialShape(InferenceContext* target_context, *result = target_context->Scalar(); } else if (src_op == ""Shape"") { *result = src_context->input(0); + } else if (src_op == ""ShapeN"") { + *result = src_context->input(input_edge->src_output()); } else if (src_op == ""Pack"") { std::vector dims; // Pack is concatenating its input scalars to form the shape tensor vector. ",0,train e424ba4a6d6e2c10f78f7f899de3c5d8dfb2e8c9,tensorflow/tensorflow,"Track symbolic shapes through shapeN operations PiperOrigin-RevId: 177029912",graph_properties_test.cc,"@@ -825,6 +825,32 @@ TEST_F(GraphPropertiesTest, DoNotValidateColocationConstraints) { TF_EXPECT_OK(properties.InferStatically()); } +TEST_F(GraphPropertiesTest, ShapeTracking) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output a = + ops::Placeholder(s.WithOpName(""a""), DT_FLOAT, + ops::Placeholder::Shape(PartialTensorShape({-1, -1}))); + Output b = + ops::Placeholder(s.WithOpName(""b""), DT_FLOAT, + ops::Placeholder::Shape(PartialTensorShape({-1}))); + Output zero = ops::Const(s.WithOpName(""zero""), 0.0f, {}); + auto shp = ops::ShapeN(s.WithOpName(""shapes""), {a, b}); + Output o1 = ops::Fill(s.WithOpName(""o1""), shp[0], zero); + Output o2 = ops::Fill(s.WithOpName(""o2""), shp[1], zero); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + GraphProperties properties(item); + TF_CHECK_OK(properties.InferStatically()); + const auto shape_a = properties.GetOutputProperties(""a"").at(0).shape(); + const auto shape_b = properties.GetOutputProperties(""b"").at(0).shape(); + const auto shape_o1 = properties.GetOutputProperties(""o1"").at(0).shape(); + const auto shape_o2 = properties.GetOutputProperties(""o2"").at(0).shape(); + EXPECT_EQ(shape_a.DebugString(), shape_o1.DebugString()); + EXPECT_EQ(shape_b.DebugString(), shape_o2.DebugString()); +} + } // namespace } // namespace grappler } // namespace tensorflow ",0,train 02f55400f87b22f7ea0849c39022792d1e381afb,tensorflow/tensorflow,"custom_gradient functions should be able to return their inputs PiperOrigin-RevId: 173723462",backprop_test.py,"@@ -569,5 +569,17 @@ class BackpropTest(test.TestCase): var.assign_sub(lr*grad) self.assertAllEqual(losses, [4.0, 3., 2., 1., 0.]) + def testCustomGradientIdentity(self): + + @custom_gradient.custom_gradient + def my_identity(x): + + def grad(dresult): + return [2 * dresult] + + return x, grad + + self.assertAllEqual(backprop.gradients_function(my_identity)(1.0)[0], 2.0) + if __name__ == '__main__': test.main() ",0,test 02f55400f87b22f7ea0849c39022792d1e381afb,tensorflow/tensorflow,"custom_gradient functions should be able to return their inputs PiperOrigin-RevId: 173723462",custom_gradient.py,"@@ -22,6 +22,7 @@ from tensorflow.python.eager import context from tensorflow.python.eager import tape from tensorflow.python.framework import ops as tf_ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops from tensorflow.python.util import nest from tensorflow.python.util import tf_decorator @@ -72,17 +73,19 @@ def custom_gradient(f): with tape.stop_recording(): result, grad_fn = f(*args, **kwargs) + flat_result = nest.flatten(result) + # TODO(apassos) consider removing the identity below. + flat_result = [gen_array_ops.identity(x) for x in flat_result] def actual_grad_fn(*outputs): return nest.flatten(grad_fn(*outputs)) - flat_result = nest.flatten(result) tape.record_operation( f.__name__, flat_result, input_tensors, actual_grad_fn) flat_result = list(flat_result) - return result + return nest.pack_sequence_as(result, flat_result) return tf_decorator.make_decorator(f, decorated) ",0,test 8dee064f1b06ca3cf225daa16c9a2a75ddd323f1,tensorflow/tensorflow,"Call Py_CLEAR on dead fields during TF_RESOURCE-to-ndarray conversion Change: 152338333",tf_session_helper.cc,"@@ -375,6 +375,8 @@ Status GetPyArrayDescrForTensor(const TF_Tensor* tensor, PyObject* fields = PyList_New(1); PyList_SetItem(fields, 0, field); int convert_result = PyArray_DescrConverter(fields, descr); + Py_CLEAR(field); + Py_CLEAR(fields); if (convert_result != 1) { return errors::Internal(""Failed to create numpy array description for "", ""TF_RESOURCE-type tensor""); ",0,train 5a6117908e8ddf563333459d5eea825db51cc6d0,tensorflow/tensorflow,"The behavior of outer_dimensions was different than before when the selected indices had a 0. Making it emulate the old behavior because gather stopped working for length 0 Tensors. e.g.: Gathering row 1 from a Tensor of shape [2, 0] should result in a [1, 0] Tensor, but instead resulted in an error like: AttributeError: index 1 is not in [0, 2) Change: 119998790",tensor.cc,"@@ -714,34 +714,43 @@ void Tensor::FillDescription(TensorDescription* description) const { } } -gtl::InlinedVector Tensor::ComputeFlatInnerDims( +gtl::InlinedVector Tensor::ComputeFlatInnerDims( int64 num_out_dims) const { - gtl::InlinedVector out_dims(num_out_dims, 0); + if (num_out_dims == dims()) { + return shape_.dim_sizes(); + } + gtl::InlinedVector out_dims(num_out_dims, 0); const int64 num_elements = NumElements(); - if (num_elements != 0) { - int64 prod_out_dims = 1; - for (int64 out_dim = num_out_dims - 1; out_dim > 0; --out_dim) { - const int64 in_dim = out_dim + (dims() - num_out_dims); - out_dims[out_dim] = - (in_dim >= dims() || in_dim < 0) ? 1 : dim_size(in_dim); - prod_out_dims *= out_dims[out_dim]; - } + int64 prod_out_dims = 1; + for (int64 out_dim = num_out_dims - 1; out_dim > 0; --out_dim) { + const int64 in_dim = out_dim + (dims() - num_out_dims); + out_dims[out_dim] = (in_dim >= dims() || in_dim < 0) ? 1 : dim_size(in_dim); + prod_out_dims *= out_dims[out_dim]; + } + if (prod_out_dims != 0) { out_dims[0] = num_elements / prod_out_dims; + } else { + out_dims[0] = 0; } return out_dims; } -gtl::InlinedVector Tensor::ComputeFlatOuterDims( +gtl::InlinedVector Tensor::ComputeFlatOuterDims( int64 num_out_dims) const { - gtl::InlinedVector out_dims(num_out_dims, 0); + if (num_out_dims == dims()) { + return shape_.dim_sizes(); + } + gtl::InlinedVector out_dims(num_out_dims, 0); const int64 num_elements = NumElements(); - if (num_elements != 0) { - int64 prod_out_dims = 1; - for (int64 out_dim = 0; out_dim < num_out_dims - 1; ++out_dim) { - out_dims[out_dim] = out_dim >= dims() ? 1 : dim_size(out_dim); - prod_out_dims *= out_dims[out_dim]; - } + int64 prod_out_dims = 1; + for (int64 out_dim = 0; out_dim < num_out_dims - 1; ++out_dim) { + out_dims[out_dim] = out_dim >= dims() ? 1 : dim_size(out_dim); + prod_out_dims *= out_dims[out_dim]; + } + if (prod_out_dims != 0) { out_dims[num_out_dims - 1] = num_elements / prod_out_dims; + } else { + out_dims[num_out_dims - 1] = 0; } return out_dims; } ",0,test 5a6117908e8ddf563333459d5eea825db51cc6d0,tensorflow/tensorflow,"The behavior of outer_dimensions was different than before when the selected indices had a 0. Making it emulate the old behavior because gather stopped working for length 0 Tensors. e.g.: Gathering row 1 from a Tensor of shape [2, 0] should result in a [1, 0] Tensor, but instead resulted in an error like: AttributeError: index 1 is not in [0, 2) Change: 119998790",tensor.h,"@@ -361,8 +361,11 @@ class Tensor { void FillDimsAndValidateCompatibleShape( gtl::ArraySlice new_sizes, Eigen::array* dims) const; - gtl::InlinedVector ComputeFlatInnerDims(int64 num_out_dims) const; - gtl::InlinedVector ComputeFlatOuterDims(int64 num_out_dims) const; + + // TODO(rmlarsen): These shouldn't hardcode '4' so that it lines up with + // TensorShape's InlineVector. + gtl::InlinedVector ComputeFlatInnerDims(int64 num_out_dims) const; + gtl::InlinedVector ComputeFlatOuterDims(int64 num_out_dims) const; TensorShape shape_; TensorBuffer* buf_; ",0,test 5a6117908e8ddf563333459d5eea825db51cc6d0,tensorflow/tensorflow,"The behavior of outer_dimensions was different than before when the selected indices had a 0. Making it emulate the old behavior because gather stopped working for length 0 Tensors. e.g.: Gathering row 1 from a Tensor of shape [2, 0] should result in a [1, 0] Tensor, but instead resulted in an error like: AttributeError: index 1 is not in [0, 2) Change: 119998790",tensor_test.cc,"@@ -267,6 +267,46 @@ TEST(Tensor_Float, Reshape) { EXPECT_EQ(flat_outer_dims(0, 0, 0, 0, 0), 0.01f); EXPECT_EQ(flat_outer_dims(1, 2, 3, 4, 0), 0.02f); } + + Tensor zero_t(DT_FLOAT, TensorShape({3, 0, 2, 0, 5})); + { + auto flat_outer_dims = zero_t.flat_outer_dims(); + EXPECT_EQ(3, flat_outer_dims.dimension(0)); + EXPECT_EQ(0, flat_outer_dims.dimension(1)); + } + { + auto flat_outer_dims = zero_t.flat_outer_dims(); + EXPECT_EQ(3, flat_outer_dims.dimension(0)); + EXPECT_EQ(0, flat_outer_dims.dimension(1)); + EXPECT_EQ(0, flat_outer_dims.dimension(2)); + } + { + auto flat_outer_dims = zero_t.flat_outer_dims(); + EXPECT_EQ(3, flat_outer_dims.dimension(0)); + EXPECT_EQ(0, flat_outer_dims.dimension(1)); + EXPECT_EQ(2, flat_outer_dims.dimension(2)); + EXPECT_EQ(0, flat_outer_dims.dimension(3)); + EXPECT_EQ(5, flat_outer_dims.dimension(4)); + } + { + auto flat_inner_dims = zero_t.flat_inner_dims(); + EXPECT_EQ(0, flat_inner_dims.dimension(0)); + EXPECT_EQ(5, flat_inner_dims.dimension(1)); + } + { + auto flat_inner_dims = zero_t.flat_inner_dims(); + EXPECT_EQ(0, flat_inner_dims.dimension(0)); + EXPECT_EQ(0, flat_inner_dims.dimension(1)); + EXPECT_EQ(5, flat_inner_dims.dimension(2)); + } + { + auto flat_inner_dims = zero_t.flat_inner_dims(); + EXPECT_EQ(3, flat_inner_dims.dimension(0)); + EXPECT_EQ(0, flat_inner_dims.dimension(1)); + EXPECT_EQ(2, flat_inner_dims.dimension(2)); + EXPECT_EQ(0, flat_inner_dims.dimension(3)); + EXPECT_EQ(5, flat_inner_dims.dimension(4)); + } } TEST(Tensor_Scalar, Basics) { ",0,test 5a6117908e8ddf563333459d5eea825db51cc6d0,tensorflow/tensorflow,"The behavior of outer_dimensions was different than before when the selected indices had a 0. Making it emulate the old behavior because gather stopped working for length 0 Tensors. e.g.: Gathering row 1 from a Tensor of shape [2, 0] should result in a [1, 0] Tensor, but instead resulted in an error like: AttributeError: index 1 is not in [0, 2) Change: 119998790",gather_op_test.cc,"@@ -78,6 +78,19 @@ TEST_F(GatherOpTest, Simple_TwoD32) { test::ExpectTensorEqual(expected, *GetOutput(0)); } +TEST_F(GatherOpTest, ZeroSize_TwoD32) { + MakeOp(DT_INT32); + + // Feed and run + AddInputFromArray(TensorShape({5, 0}), {}); + AddInputFromArray(TensorShape({4}), {0, 4, 0, 2}); + TF_ASSERT_OK(RunOpKernel()); + + // Check the output. + Tensor expected(allocator(), DT_FLOAT, TensorShape({4, 0})); + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + TEST_F(GatherOpTest, Simple_TwoD64) { MakeOp(DT_INT64); ",0,test 2cb73e33cefd00b51672cf59c588b95c0af79223,tensorflow/tensorflow,"[MLIR:HLO] Extend CustomCall to support multiple outputs. - Extend MHLO CustomCall to have multiple tensors as results. - Extend LHLO CustomCall to have multiple memrefs for output operands. - Fix HLO->LHLO and XLA HLO->LHLO mapping for CustomCall to setup the operand_segment_sizes attribute correctly. PiperOrigin-RevId: 342067762 Change-Id: Ic7cc31683168c9e0802a19d90831333abddbe7bf",hlo_legalize_to_lhlo.cc,"@@ -165,6 +165,32 @@ class HloToLhloOpConverter : public BaseOpConversion { } }; +struct HloToLhloCustomCallOpConverter + : public BaseOpConversion { + public: + using BaseOpConversion::BaseOpConversion; + + LogicalResult matchAndRewrite( + mhlo::CustomCallOp hloOp, ArrayRef operands, + ConversionPatternRewriter& rewriter) const final { + Operation* op = hloOp.getOperation(); + SmallVector buffer_args(operands.begin(), operands.end()); + if (failed(ConvertResults(op, buffer_args, rewriter))) return failure(); + + auto lhloOp = rewriter.create( + op->getLoc(), llvm::None, buffer_args, op->getAttrs()); + // Setup AttrSizedOperandSegments attribute to indicate number of operands + // for args and outputs. + const int32_t segments[2] = {static_cast(operands.size()), + static_cast(op->getNumResults())}; + lhloOp.setAttr(lhloOp.getOperandSegmentSizeAttr(), + rewriter.getI32VectorAttr(segments)); + + rewriter.replaceOp(op, ArrayRef(buffer_args).slice(operands.size())); + return success(); + } +}; + struct HloToLhloDynamicBroadcastInDimOpConverter : public BaseOpConversion { public: @@ -572,6 +598,7 @@ void populateHLOToLHLOConversionPattern(MLIRContext* context, OwningRewritePatternList* patterns) { // clang-format off patterns->insert< + HloToLhloCustomCallOpConverter, HloToLhloDotGeneralOpConverter, HloToLhloDynamicBroadcastInDimOpConverter, HloToLhloDynamicReshapeConverter, @@ -588,7 +615,6 @@ void populateHLOToLHLOConversionPattern(MLIRContext* context, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, - HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, ",0,test 2cb73e33cefd00b51672cf59c588b95c0af79223,tensorflow/tensorflow,"[MLIR:HLO] Extend CustomCall to support multiple outputs. - Extend MHLO CustomCall to have multiple tensors as results. - Extend LHLO CustomCall to have multiple memrefs for output operands. - Fix HLO->LHLO and XLA HLO->LHLO mapping for CustomCall to setup the operand_segment_sizes attribute correctly. PiperOrigin-RevId: 342067762 Change-Id: Ic7cc31683168c9e0802a19d90831333abddbe7bf",mlir_hlo_builder.cc,"@@ -149,7 +149,7 @@ StatusOr MlirHloBuilder::CustomCallInternal( loc_, ty, GetValues(operands), builder_.getStringAttr(call_target_name), /*has_side_effect=*/builder_.getBoolAttr(has_side_effect), builder_.getStringAttr(opaque)); - return MakeXlaOp(op); + return MakeXlaOp(op.getResult(0)); } StatusOr MlirHloBuilder::ReduceInternal( ",0,test 2cb73e33cefd00b51672cf59c588b95c0af79223,tensorflow/tensorflow,"[MLIR:HLO] Extend CustomCall to support multiple outputs. - Extend MHLO CustomCall to have multiple tensors as results. - Extend LHLO CustomCall to have multiple memrefs for output operands. - Fix HLO->LHLO and XLA HLO->LHLO mapping for CustomCall to setup the operand_segment_sizes attribute correctly. PiperOrigin-RevId: 342067762 Change-Id: Ic7cc31683168c9e0802a19d90831333abddbe7bf",mlir_hlo_to_hlo.cc,"@@ -770,11 +770,12 @@ LogicalResult ExportXlaOp(ConvertOp op, OpLoweringContext ctx) { LogicalResult ExportXlaOp(CustomCallOp op, OpLoweringContext ctx) { // XLA client builder API does not support generating custom call instructions // with side effect. - if (op.has_side_effect()) return failure(); + if (op.has_side_effect() || op.getNumResults() != 1) return failure(); + Value result = op.getResult(0); auto& value_map = *ctx.values; - value_map[op] = xla::CustomCall( + value_map[result] = xla::CustomCall( ctx.builder, std::string(op.call_target_name()), GetTuple(op.args(), ctx), - xla::TypeToShape(op.getType()), std::string(op.backend_config())); + xla::TypeToShape(result.getType()), std::string(op.backend_config())); return success(); } ",0,test 2cb73e33cefd00b51672cf59c588b95c0af79223,tensorflow/tensorflow,"[MLIR:HLO] Extend CustomCall to support multiple outputs. - Extend MHLO CustomCall to have multiple tensors as results. - Extend LHLO CustomCall to have multiple memrefs for output operands. - Fix HLO->LHLO and XLA HLO->LHLO mapping for CustomCall to setup the operand_segment_sizes attribute correctly. PiperOrigin-RevId: 342067762 Change-Id: Ic7cc31683168c9e0802a19d90831333abddbe7bf",legalize_tf.cc,"@@ -5155,7 +5155,7 @@ class ConvertXlaShardingOp : public OpRewritePattern { /*has_side_effect=*/rewriter.getBoolAttr(false), /*backend_config=*/rewriter.getStringAttr("""")); custom_call.setAttr(kShardingAttr, op._XlaShardingAttr()); - rewriter.replaceOp(op, custom_call.getResult()); + rewriter.replaceOp(op, custom_call.getResult(0)); return success(); } ",0,test 2cb73e33cefd00b51672cf59c588b95c0af79223,tensorflow/tensorflow,"[MLIR:HLO] Extend CustomCall to support multiple outputs. - Extend MHLO CustomCall to have multiple tensors as results. - Extend LHLO CustomCall to have multiple memrefs for output operands. - Fix HLO->LHLO and XLA HLO->LHLO mapping for CustomCall to setup the operand_segment_sizes attribute correctly. PiperOrigin-RevId: 342067762 Change-Id: Ic7cc31683168c9e0802a19d90831333abddbe7bf",mhlo_to_lhlo_with_xla.cc,"@@ -188,23 +188,31 @@ class XlaHloToLhloPass } // namespace +Status LhloDialectEmitter::CreateOperands( + HloInstruction* instr, llvm::SmallVectorImpl& operands, + size_t& num_arguments, size_t& num_results) { + for (const HloInstruction* operand : instr->operands()) { + TF_RETURN_IF_ERROR(GetOrCreateView(operand, &operands)); + } + num_arguments = operands.size(); + TF_RETURN_IF_ERROR(GetOrCreateView(instr, &operands)); + num_results = operands.size() - num_arguments; + return Status::OK(); +} + template -StatusOr LhloDialectEmitter::CreateOpWithoutAttrs( - HloInstruction* instr) { +StatusOr LhloDialectEmitter::CreateOpWithoutAttrs(HloInstruction* instr, + size_t& num_arguments, + size_t& num_results) { Location loc = getLocation(instr); std::pair attrs[] = { {Identifier::get(""name"", builder_.getContext()), builder_.getStringAttr(instr->name())}, }; - ArrayRef rets{}; - llvm::SmallVector operands; - for (const HloInstruction* operand : instr->operands()) { - TF_RETURN_IF_ERROR(GetOrCreateView(operand, &operands)); - } - TF_RETURN_IF_ERROR(GetOrCreateView(instr, &operands)); - - return builder_.create(loc, rets, operands, attrs); + TF_RETURN_IF_ERROR( + CreateOperands(instr, operands, num_arguments, num_results)); + return builder_.create(loc, llvm::None, operands, attrs); } StatusOr LhloDialectEmitter::EmitOp(HloInstruction* instr) { @@ -479,13 +487,19 @@ StatusOr LhloDialectEmitter::EmitSelectAndScatterOp( StatusOr LhloDialectEmitter::EmitCustomCallOp( HloInstruction* instr) { + size_t num_arguments, num_results; TF_ASSIGN_OR_RETURN(auto custom_call, - CreateOpWithoutAttrs(instr)); + CreateOpWithoutAttrs( + instr, num_arguments, num_results)); auto* custom_call_instr = ::xla::Cast<::xla::HloCustomCallInstruction>(instr); custom_call.call_target_nameAttr( builder_.getStringAttr(custom_call_instr->custom_call_target())); custom_call.backend_configAttr( builder_.getStringAttr(custom_call_instr->opaque())); + const int32_t segments[2] = {static_cast(num_arguments), + static_cast(num_results)}; + custom_call.setAttr(lmhlo::CustomCallOp::getOperandSegmentSizeAttr(), + builder_.getI32VectorAttr(segments)); return custom_call; } ",0,test 2cb73e33cefd00b51672cf59c588b95c0af79223,tensorflow/tensorflow,"[MLIR:HLO] Extend CustomCall to support multiple outputs. - Extend MHLO CustomCall to have multiple tensors as results. - Extend LHLO CustomCall to have multiple memrefs for output operands. - Fix HLO->LHLO and XLA HLO->LHLO mapping for CustomCall to setup the operand_segment_sizes attribute correctly. PiperOrigin-RevId: 342067762 Change-Id: Ic7cc31683168c9e0802a19d90831333abddbe7bf",mhlo_to_lhlo_with_xla.h,"@@ -58,8 +58,20 @@ class LhloDialectEmitter : public ::xla::DfsHloVisitorWithDefault { ::xla::StatusOr EmitCustomCallOp( ::xla::HloInstruction* instr); + ::xla::Status CreateOperands(::xla::HloInstruction* instr, + SmallVectorImpl& operands, + size_t& num_arguments, size_t& num_results); + + template + ::xla::StatusOr CreateOpWithoutAttrs(::xla::HloInstruction* instr) { + size_t unused; + return CreateOpWithoutAttrs(instr, unused, unused); + } + template - ::xla::StatusOr CreateOpWithoutAttrs(::xla::HloInstruction* instr); + ::xla::StatusOr CreateOpWithoutAttrs(::xla::HloInstruction* instr, + size_t& num_arguments, + size_t& num_results); template DenseIntElementsAttr GetI64DenseElementsAttr(const T& container) { @@ -117,25 +129,25 @@ class LhloDialectEmitter : public ::xla::DfsHloVisitorWithDefault { // This map provides access to MLIR buffers for each HLO buffer allocation. // The MLIR buffers are all `memref<{size}xi8>` and correspond to function - // parameters. It is populated at the beginning of the processing with all the - // buffer allocations and is unchanged afterward. Every HLOInstruction is - // using a ""slice"" of the buffer allocation and providing shape, layout, and - // Dtype. An MLIR view is used separately to model slices into the allocations - // (see below). + // parameters. It is populated at the beginning of the processing with all + // the buffer allocations and is unchanged afterward. Every HLOInstruction + // is using a ""slice"" of the buffer allocation and providing shape, layout, + // and Dtype. An MLIR view is used separately to model slices into the + // allocations (see below). llvm::DenseMap allocations_; // This map provides access to MLIR buffers for each HLO instruction, keyed // instruction identity. A slice is contained in a BufferAllocation, and has // an offset and a size. // - // As for why we don't use HloInstruction*, see GetOrCreateView(), but mostly - // we want to leverage better of the aliased buffers. + // As for why we don't use HloInstruction*, see GetOrCreateView(), but + // mostly we want to leverage better of the aliased buffers. // // If the HloInstruction is a tuple, all leaf nodes are stored flattened. // Otherwise, there will be a single buffer. // - // An MLIR buffer is either an input parameter, or a ViewOp in the case where - // the slice is only part of its allocation. + // An MLIR buffer is either an input parameter, or a ViewOp in the case + // where the slice is only part of its allocation. // // `slices_` is populated lazily in the `GetOrCreateView()` helper as we // process every instruction. @@ -153,7 +165,8 @@ class LhloDialectEmitter : public ::xla::DfsHloVisitorWithDefault { // computation. ModuleOp module_; - // The builder keeps track of the current insertion point in the MLIR module. + // The builder keeps track of the current insertion point in the MLIR + // module. OpBuilder builder_; // Convenient ""cached"" access to this widely used MLIR type (i8). Type i8_type_; ",0,test 2cb73e33cefd00b51672cf59c588b95c0af79223,tensorflow/tensorflow,"[MLIR:HLO] Extend CustomCall to support multiple outputs. - Extend MHLO CustomCall to have multiple tensors as results. - Extend LHLO CustomCall to have multiple memrefs for output operands. - Fix HLO->LHLO and XLA HLO->LHLO mapping for CustomCall to setup the operand_segment_sizes attribute correctly. PiperOrigin-RevId: 342067762 Change-Id: Ic7cc31683168c9e0802a19d90831333abddbe7bf",ir_emitter_unnested.cc,"@@ -698,7 +698,9 @@ Status IrEmitterUnnested::EmitSliceToDynamicFromMlir( const Shape& input_shape = TypeToShape(slice_to_dynamic.args().front().getType()); - const Shape& data_shape = TypeToShape(slice_to_dynamic.output().getType()); + TF_RET_CHECK(slice_to_dynamic.output().size() == 1); + const Shape& data_shape = + TypeToShape(slice_to_dynamic.output().front().getType()); // TODO(jurahul): data_shape here is the static shape of the output (which has // a dynamic shape in XLA). Currently, we are mapping that to a static shaped ",0,test d26ee9801c8117f7fd6297a05a82eab98023a2c3,tensorflow/tensorflow,bug fix in the ROCm python implementation for gpu_lstm op,recurrent_v2.py,"@@ -1380,6 +1380,8 @@ def gpu_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, mask, # o is output gate weights. # c is cell gate weights. weights = [weights[x] for x in (0, 1, 3, 2, 4, 5, 7, 6)] + # full_bias is a tensor of shape (8*n,) + full_bias = array_ops.split(full_bias, 8, axis=0) full_bias = [full_bias[x] for x in (0, 1, 3, 2, 4, 5, 7, 6)] params = _canonical_to_params( ",0,train e6ab20a481029d8839114867bf070ac6ebf8d0f3,tensorflow/tensorflow,"Skips another two test cases in ttf test where we don't have cpu kernels for fft. Change: 110621369",fft_ops_test.py,"@@ -74,16 +74,18 @@ class FFT2DOpsTest(tf.test.TestCase): self._Compare(gen(shape)) def testEmpty(self): - x = np.zeros([40, 0]).astype(np.complex64) - self.assertEqual(x.shape, self._tfFFT2D(x).shape) - self.assertEqual(x.shape, self._tfIFFT2D(x).shape) + if tf.test.IsBuiltWithCuda(): + x = np.zeros([40, 0]).astype(np.complex64) + self.assertEqual(x.shape, self._tfFFT2D(x).shape) + self.assertEqual(x.shape, self._tfIFFT2D(x).shape) def testError(self): - x = np.zeros([1, 2, 3]).astype(np.complex64) - with self.assertRaisesOpError(""Input is not a matrix""): - self._tfFFT2D(x) - with self.assertRaisesOpError(""Input is not a matrix""): - self._tfIFFT2D(x) + if tf.test.IsBuiltWithCuda(): + x = np.zeros([1, 2, 3]).astype(np.complex64) + with self.assertRaisesOpError(""Input is not a matrix""): + self._tfFFT2D(x) + with self.assertRaisesOpError(""Input is not a matrix""): + self._tfIFFT2D(x) if __name__ == ""__main__"": ",0,train 4f8ce7437431e9a1a47535ff05ef5011a694f244,tensorflow/tensorflow,"[TF2XLA] Deprecate xla.experimental.compile PiperOrigin-RevId: 325151668 Change-Id: I1a0ac5d58e8237cf47785034086c7cdc240ba116",xla.py,"@@ -37,6 +37,7 @@ from tensorflow.python.util import compat from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect from tensorflow.python.util.compat import collections_abc +from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.tf_export import tf_export _XLA_COMPILE_ATTR = '_xla_compile_id' @@ -64,6 +65,10 @@ _UNSUPPORTED_OPS = set([ @tf_export('xla.experimental.compile') +@deprecated( + None, 'xla.experimental.compile is deprecated. Consider using ' + 'tf.function(experimental_compile=True)', + warn_once=True) def compile(computation, inputs=None): # pylint: disable=redefined-builtin """"""Builds an operator that compiles and runs `computation` with XLA. ",0,train d604689ea7a24dfc4f8994825b3ca9e0c63ddc9b,tensorflow/tensorflow,"Add some missing dependencies so that the TPU version of TensorFlow builds PiperOrigin-RevId: 323477747 Change-Id: I13393c728bda8f6c541955513a7e6315799ec844",tpu_compilation_device.cc,"@@ -18,7 +18,14 @@ limitations under the License. #include ""tensorflow/core/tpu/tpu_node_device_util.h"" namespace tensorflow { +namespace { -REGISTER_XLA_BACKEND(DEVICE_TPU_XLA_JIT, kTpuAllTypes, TpuOpFilter); +bool RegisterTpuXlaBackend() { + REGISTER_XLA_BACKEND(DEVICE_TPU_XLA_JIT, kTpuAllTypes, TpuOpFilter); + return true; +} +static bool tpu_xla_backend_registered = RegisterTpuXlaBackend(); + +} // namespace } // namespace tensorflow ",0,train b6ed9186089de852c933244a7d772f836cc3eb27,tensorflow/tensorflow,"Update tensorboard dependency to 1.13.x TensorBoard release: https://pypi.org/project/tensorboard/1.13.0/ PiperOrigin-RevId: 235563447",setup.py,"@@ -57,7 +57,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.14.5, < 2.0', 'six >= 1.10.0', 'protobuf >= 3.6.1', - 'tensorboard >= 1.12.0, < 1.13.0', + 'tensorboard >= 1.13.0, < 1.14.0', 'tensorflow_estimator >= 1.13.0rc0, < 1.14.0rc0', 'termcolor >= 1.1.0', ] ",0,test 8cdd551adad84cb10631c72ad8b931061b350166,tensorflow/tensorflow,Adding ROCm support to optional_ops,optional_ops.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #define EIGEN_USE_THREADS -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU #include ""tensorflow/core/kernels/data/optional_ops.h"" @@ -34,4 +34,4 @@ REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION(ADD_VARIANT_BINARY_OP, DEVICE_GPU, } // namespace data } // namespace tensorflow -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM ",0,test 023ab92b0a2b63392214443c20f3279bb75845d4,tensorflow/tensorflow,refactoring image_resize_state,resize_area_op.cc,"@@ -19,7 +19,6 @@ limitations under the License. #include #include -#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/register_types.h"" #include ""tensorflow/core/framework/tensor.h"" @@ -28,6 +27,7 @@ limitations under the License. #include ""tensorflow/core/lib/core/status.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/util/image_resizer_state.h"" +#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" namespace tensorflow { @@ -144,17 +144,17 @@ class ResizeAreaOp : public OpKernel { } void Compute(OpKernelContext* context) override { - const Tensor& input = context->input(0); // The op always did the correct thing with regard to pixel centers, so we // always pass false here for half_pixel_centers since ImageResizerState // enforces that if align_corners_ is true, half_pixel_centers must be // false. ImageResizerState st(align_corners_, /*unused half_pixel_centers=*/false); - st.ValidateAndCreateOutput(context, input); + st.ValidateAndCreateOutput(context); if (!context->status().ok()) return; - typename TTypes::ConstTensor input_data(input.tensor()); + typename TTypes::ConstTensor input_data( + context->input(0).tensor()); // Precompute values used when iterating over x coordinates within a row. // Note that it may be useful to cache x_interps for a given ",0,train 023ab92b0a2b63392214443c20f3279bb75845d4,tensorflow/tensorflow,refactoring image_resize_state,resize_bicubic_op.cc,"@@ -21,7 +21,6 @@ limitations under the License. #include #include -#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/register_types.h"" #include ""tensorflow/core/framework/tensor.h"" @@ -30,6 +29,7 @@ limitations under the License. #include ""tensorflow/core/lib/core/status.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/util/image_resizer_state.h"" +#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" namespace tensorflow { namespace { @@ -557,13 +557,13 @@ class ResizeBicubicOp : public OpKernel { } void Compute(OpKernelContext* context) override { - const Tensor& input = context->input(0); ImageResizerState st(align_corners_, half_pixel_centers_); - st.ValidateAndCreateOutput(context, input); + st.ValidateAndCreateOutput(context); if (!context->status().ok()) return; - typename TTypes::ConstTensor input_data(input.tensor()); + typename TTypes::ConstTensor input_data( + context->input(0).tensor()); TTypes::Tensor output_data = st.output->tensor(); interpolate_with_caching(input_data, st, half_pixel_centers_, @@ -587,16 +587,15 @@ class ResizeBicubicOpGrad : public OpKernel { void Compute(OpKernelContext* context) override { // Validate input. - // First argument is gradient with respect to resized image. - const Tensor& input = context->input(0); - const Tensor& original_image = context->input(1); - ImageResizerGradientState st(align_corners_, half_pixel_centers_); - st.ValidateAndCreateOutput(context, input, original_image); + st.ValidateAndCreateOutput(context); if (!context->status().ok()) return; - TTypes::ConstTensor input_grad = input.tensor(); + // First argument is gradient with respect to resized image. + TTypes::ConstTensor input_grad = + context->input(0).tensor(); + typename TTypes::Tensor output_grad(st.output->tensor()); ResizeBicubicGrad(input_grad, st, half_pixel_centers_, output_grad); ",0,train 023ab92b0a2b63392214443c20f3279bb75845d4,tensorflow/tensorflow,refactoring image_resize_state,resize_bilinear_op.cc,"@@ -28,7 +28,6 @@ limitations under the License. #include -#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/register_types.h"" #include ""tensorflow/core/framework/tensor.h"" @@ -38,6 +37,7 @@ limitations under the License. #include ""tensorflow/core/lib/core/status.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/util/image_resizer_state.h"" +#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" namespace tensorflow { @@ -54,16 +54,16 @@ class ResizeBilinearOp : public OpKernel { } void Compute(OpKernelContext* context) override { - const Tensor& input = context->input(0); ImageResizerState st(align_corners_, half_pixel_centers_); - st.ValidateAndCreateOutput(context, input); + st.ValidateAndCreateOutput(context); if (!context->status().ok()) return; // Return if the output is empty. if (st.output->NumElements() == 0) return; - typename TTypes::ConstTensor image_data(input.tensor()); + typename TTypes::ConstTensor image_data( + context->input(0).tensor()); TTypes::Tensor output_data = st.output->tensor(); functor::ResizeBilinear()( @@ -370,16 +370,14 @@ class ResizeBilinearOpGrad : public OpKernel { void Compute(OpKernelContext* context) override { // Validate input. - // First argument is gradient with respect to resized image. - const Tensor& input = context->input(0); - const Tensor& original_image = context->input(1); - ImageResizerGradientState st(align_corners_, half_pixel_centers_); - st.ValidateAndCreateOutput(context, input, original_image); + st.ValidateAndCreateOutput(context); if (!context->status().ok()) return; - TTypes::ConstTensor input_grad = input.tensor(); + // First argument is gradient with respect to resized image. + TTypes::ConstTensor input_grad = + context->input(0).tensor(); if (!std::is_same::value && !std::is_same::value) { ",0,train 023ab92b0a2b63392214443c20f3279bb75845d4,tensorflow/tensorflow,refactoring image_resize_state,resize_nearest_neighbor_op.cc,"@@ -20,7 +20,6 @@ limitations under the License. #include -#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/register_types.h"" #include ""tensorflow/core/framework/tensor.h"" @@ -29,6 +28,7 @@ limitations under the License. #include ""tensorflow/core/lib/core/status.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/util/image_resizer_state.h"" +#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" namespace tensorflow { @@ -46,9 +46,8 @@ class ResizeNearestNeighborOp : public OpKernel { } void Compute(OpKernelContext* context) override { - const Tensor& input = context->input(0); ImageResizerState st(align_corners_, half_pixel_centers_); - st.ValidateAndCreateOutput(context, input); + st.ValidateAndCreateOutput(context); if (!context->status().ok()) return; @@ -59,7 +58,8 @@ class ResizeNearestNeighborOp : public OpKernel { // Return if the output is empty. if (st.output->NumElements() == 0) return; - typename TTypes::ConstTensor input_data(input.tensor()); + typename TTypes::ConstTensor input_data( + context->input(0).tensor()); typename TTypes::Tensor output_data(st.output->tensor()); bool status; ",0,train 023ab92b0a2b63392214443c20f3279bb75845d4,tensorflow/tensorflow,refactoring image_resize_state,quantized_resize_bilinear_op.cc,"@@ -700,19 +700,19 @@ class QuantizedResizeBilinearOp : public OpKernel { } void Compute(OpKernelContext* context) override { - const Tensor& input = context->input(0); const float in_min = context->input(2).flat()(0); const float in_max = context->input(3).flat()(0); ImageResizerState st(align_corners_, false); - st.ValidateAndCreateOutput(context, input); + st.ValidateAndCreateOutput(context); if (!context->status().ok()) return; // Return if the output is empty. if (st.output->NumElements() == 0) return; - typename TTypes::ConstTensor image_data(input.tensor()); + typename TTypes::ConstTensor image_data( + context->input(0).tensor()); typename TTypes::Tensor output_data(st.output->tensor()); ResizeBilinear(image_data, st.height_scale, st.width_scale, in_min, ",0,train 023ab92b0a2b63392214443c20f3279bb75845d4,tensorflow/tensorflow,refactoring image_resize_state,image_resizer_state.h,"@@ -27,13 +27,13 @@ limitations under the License. #include #include -#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" #include ""tensorflow/core/framework/bounds_check.h"" #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/register_types.h"" #include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/tensor_shape.h"" #include ""tensorflow/core/framework/types.h"" +#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" namespace tensorflow { @@ -76,16 +76,38 @@ struct ImageResizerState { // height_scale and width_scale, and calculates the output size. // If any of these operations fails, it sets an error status in // the context, which the caller must check. - void ValidateAndCalculateOutputSize(OpKernelContext* context, - const Tensor& input) { + void ValidateAndCalculateOutputSize(OpKernelContext* context) { OP_REQUIRES( context, !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_), errors::InvalidArgument(""If half_pixel_centers is True, "" ""align_corners must be False."")); - OP_REQUIRES(context, input.dims() == 4, + + const TensorShape& input_shape = context->input(0).shape(); + OP_REQUIRES(context, input_shape.dims() == 4, errors::InvalidArgument(""input must be 4-dimensional"", - input.shape().DebugString())); + input_shape.DebugString())); + batch_size = input_shape.dim_size(0); + channels = input_shape.dim_size(3); + OP_REQUIRES( + context, channels > 0, + errors::InvalidArgument(""image must have at least one channel"")); + + // Verify and assign `in_height` and `in_width`. + OP_REQUIRES( + context, input_shape.dim_size(1) > 0 && input_shape.dim_size(2) > 0, + errors::InvalidArgument(""input image must be of non-zero size"")); + OP_REQUIRES( + context, + FastBoundsCheck(input_shape.dim_size(1), + std::numeric_limits::max()) && + FastBoundsCheck(input_shape.dim_size(2), + std::numeric_limits::max()), + errors::InvalidArgument(""input sizes must be between 0 and max int32"")); + in_height = static_cast(input_shape.dim_size(1)); + in_width = static_cast(input_shape.dim_size(2)); + + // Verify the output tensor's shape. const Tensor& shape_t = context->input(1); OP_REQUIRES(context, shape_t.dims() == 1, errors::InvalidArgument(""shape_t must be 1-dimensional"", @@ -93,28 +115,14 @@ struct ImageResizerState { OP_REQUIRES(context, shape_t.NumElements() == 2, errors::InvalidArgument(""shape_t must have two elements"", shape_t.shape().DebugString())); + + // Verify and assign `out_height` and `out_width`. auto Svec = shape_t.vec(); - batch_size = input.dim_size(0); out_height = internal::SubtleMustCopy(Svec(0)); out_width = internal::SubtleMustCopy(Svec(1)); - OP_REQUIRES( - context, - FastBoundsCheck(input.dim_size(1), std::numeric_limits::max()) && - FastBoundsCheck(input.dim_size(2), - std::numeric_limits::max()), - errors::InvalidArgument(""input sizes must be between 0 and max int32"")); - - in_height = static_cast(input.dim_size(1)); - in_width = static_cast(input.dim_size(2)); - channels = input.dim_size(3); OP_REQUIRES(context, out_height > 0 && out_width > 0, errors::InvalidArgument(""output dimensions must be positive"")); - OP_REQUIRES( - context, channels > 0, - errors::InvalidArgument(""image must have at least one channel"")); - OP_REQUIRES( - context, input.dim_size(1) > 0 && input.dim_size(2) > 0, - errors::InvalidArgument(""input image must be of non-zero size"")); + height_scale = CalculateResizeScale(in_height, out_height, align_corners_); width_scale = CalculateResizeScale(in_width, out_width, align_corners_); @@ -132,14 +140,14 @@ struct ImageResizerState { } // Calculates all the required variables, and allocates the output. - void ValidateAndCreateOutput(OpKernelContext* context, const Tensor& input) { - ValidateAndCalculateOutputSize(context, input); + void ValidateAndCreateOutput(OpKernelContext* context) { + ValidateAndCalculateOutputSize(context); if (!context->status().ok()) return; - OP_REQUIRES_OK(context, context->allocate_output( - 0, - TensorShape({input.dim_size(0), out_height, - out_width, input.dim_size(3)}), - &output)); + OP_REQUIRES_OK( + context, + context->allocate_output( + 0, TensorShape({batch_size, out_height, out_width, channels}), + &output)); } int64 batch_size; @@ -163,34 +171,29 @@ struct ImageResizerGradientState { : align_corners_(align_corners), half_pixel_centers_(half_pixel_centers) {} - void ValidateAndCreateOutput(OpKernelContext* context, const Tensor& input, - const Tensor& original_image) { + void ValidateAndCreateOutput(OpKernelContext* context) { OP_REQUIRES( context, !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_), errors::InvalidArgument(""If half_pixel_centers is True, "" ""align_corners must be False."")); + const Tensor& input = context->input(0); OP_REQUIRES(context, input.dims() == 4, errors::InvalidArgument(""input_grad must be 4-dimensional"", input.shape().DebugString())); + // Resizers always produce float images, so input gradient must // always be a float. OP_REQUIRES(context, input.dtype() == DT_FLOAT, errors::InvalidArgument(""input_grad must be of type float"", DataTypeString(input.dtype()))); - OP_REQUIRES(context, original_image.dims() == 4, - errors::InvalidArgument(""original_image must be 4-dimensional"", - original_image.shape().DebugString())); - - // Allocate output and initialize to zeros. batch_size = input.dim_size(0); channels = input.dim_size(3); + resized_height = input.dim_size(1); resized_width = input.dim_size(2); - original_height = original_image.dim_size(1); - original_width = original_image.dim_size(2); // The following check is also carried out for the forward op. It is added // here to prevent a divide-by-zero exception when either height_scale or @@ -198,6 +201,13 @@ struct ImageResizerGradientState { OP_REQUIRES(context, resized_height > 0 && resized_width > 0, errors::InvalidArgument(""resized dimensions must be positive"")); + const TensorShape& output_shape = context->input(1).shape(); + OP_REQUIRES(context, output_shape.dims() == 4, + errors::InvalidArgument(""original_image must be 4-dimensional"", + output_shape.DebugString())); + original_height = output_shape.dim_size(1); + original_width = output_shape.dim_size(2); + // The following check is also carried out for the forward op. It is added // here to prevent either height_scale or width_scale from being set to // zero, which would cause a divide-by-zero exception in the deterministic @@ -217,7 +227,7 @@ struct ImageResizerGradientState { CalculateResizeScale(original_height, resized_height, align_corners_); width_scale = CalculateResizeScale(original_width, resized_width, align_corners_); - output = nullptr; + OP_REQUIRES_OK(context, context->allocate_output( 0, TensorShape({batch_size, original_height, @@ -233,7 +243,7 @@ struct ImageResizerGradientState { int64 original_width; float height_scale; float width_scale; - Tensor* output; + Tensor* output = nullptr; private: bool align_corners_; ",0,train ddad9749b93f896ba6cafa3e95f52f6562657d0b,tensorflow/tensorflow,TRT Test ConvertTopK in dynamic shape mode,convert_nodes_test.cc,"@@ -1642,13 +1642,18 @@ class OpConverterTest : public ::testing::Test { } // Helper method to run both validation and conversion, and check the output - // shape. + // shapes. void RunValidationAndConversion( const NodeDef& node_def, const Status& status, const char* output_name, const std::vector>& exp_out_dims) { RunValidationAndConversion(node_def, status.code(), status.error_message().c_str(), true); if (status.ok()) { + // TODO(tfeher): Enable this check in explicit_batch_mode. + // In dynamic shape mode the output dims cannot be tested here. In that + // case we need to wait for the concrate input shapes to be defined (by + // setBindingDimensions before enqueue) before we can check the output + // dims. if (converter_->use_implicit_batch()) { for (int i = 0; i < exp_out_dims.size(); i++) { TRT_TensorOrWeights output; @@ -1656,14 +1661,7 @@ class OpConverterTest : public ::testing::Test { TF_EXPECT_OK(GetTensorOrWeights(name.c_str(), &output)); ASSERT_TRUE(output.is_tensor()); if (!exp_out_dims[i].empty()) { - // We only check output shape implicit batch mode. In dynamic shape - // mode we need to wait for the concrate input shapes to be defined - // (by setBindingDimensions before enqueue) before we can check - // whether the output dims are equal. - // - // TODO(tfeher): Enable this check in explicit_batch_mode. - - // Removing batch dim + // Removing batch dim. auto out_dims = std::vector(exp_out_dims[i].begin() + 1, exp_out_dims[i].end()); VLOG(2) << ""Testing output shape for tensor "" << name; @@ -5111,51 +5109,33 @@ TEST_P(OpConverter_FP32_Test, ConvertPool) { } } -TEST_F(OpConverterTest, ConvertTopK) { - // TODO(tmorris): This test isn't setting the input dtype properly. TopK with - // int32 is unsupported by TRT. - for (const auto dtype : {DT_FLOAT}) { - // Get the NodeDef for TopKV2. - Scope s = Scope::NewRootScope(); - auto input = ops::Placeholder(s.WithOpName(""input""), dtype); - auto weights = ops::Placeholder(s.WithOpName(""weights""), DT_INT32); - auto topk = ops::TopK(s.WithOpName(""my_topk""), input, weights); - const NodeDef& node_def = topk.operation.node()->def(); - { - // K is a tensor, should fail. - Reset(); - nvinfer1::DataType trt_type; - TF_ASSERT_OK(TfTypeToTrtType(dtype, &trt_type)); - AddTestTensor(""input"", {1, 2, 3}, /*batch_size=*/1, trt_type); - AddTestTensor(""weights"", {2}); - RunValidationAndConversion( - node_def, error::UNIMPLEMENTED, - ""The input \""k\"" for TopKV2 must be a constant, at my_topk""); - } - { - // Ok. - Reset(); - AddTestTensor(""input"", {1, 2, 5}); - AddTestWeights(""weights"", {1}, {2}); - RunValidationAndConversion(node_def); - TRT_TensorOrWeights outputs[2]; - TF_EXPECT_OK(GetTensorOrWeights(""my_topk"", &outputs[0])); - TF_EXPECT_OK(GetTensorOrWeights(""my_topk:1"", &outputs[1])); - for (auto& output : outputs) { - ASSERT_TRUE(output.is_tensor()); - ExpectTrtDimsEqualsArray({1, 2, 2}, output.tensor()->getDimensions()); - } - - const DataVec input_data{ - {""input"", AsTensor({-9, 3, 5, 1, 6, -5, 7, 1, 0, -1})}}; - DataVec output_data{{""my_topk"", ConstructTensor(4)}, - {""my_topk:1"", ConstructTensor(4)}}; - TF_EXPECT_OK(BuildAndRun(input_data, &output_data)); - EXPECT_THAT(GetSpanForData(output_data[0]), - ElementsAre(6, 5, 7, 1)); - EXPECT_THAT(GetSpanForData(output_data[1]), - ElementsAre(4, 2, 1, 2)); - } +TEST_P(OpConverter_FP32_FP16_Test, ConvertTopK) { + // Get the NodeDef for TopKV2. + Scope s = Scope::NewRootScope(); + auto input = ops::Placeholder(s.WithOpName(""input""), tf_type_); + auto weights = ops::Placeholder(s.WithOpName(""weights""), DT_INT32); + auto topk = ops::TopK(s.WithOpName(""my_topk""), input, weights); + const NodeDef& node_def = topk.operation.node()->def(); + { + // K is a tensor, should fail. + Reset(); + AddTestTensor(""input"", {1, 1, 2, 3}); + AddTestTensor(""weights"", {1}, DT_INT32, {}); + RunValidationAndConversion( + node_def, error::UNIMPLEMENTED, + ""The input \""k\"" for TopKV2 must be a constant, at my_topk""); + } + { + // Ok. + Reset(); + AddTestTensor(""input"", {1, 1, 2, 5}, {-9, 3, 5, 1, 6, -5, 7, 1, 0, -1}); + AddTestWeights(""weights"", {1}, {2}); + std::vector> expected_output_dims{{1, 1, 2, 2}, + {1, 1, 2, 2}}; + TestOpConverterMultiOut(""my_topk"", node_def, expected_output_dims, + Status::OK(), Status::OK(), + {ElementsAre(6, 5, 7, 1), ElementsAre(4, 2, 1, 2)}, + {tf_type_, DT_INT32}); } } ",0,test 3cb03d093610e51cf2d36bfbf43c446a5de52941,tensorflow/tensorflow,"Update GraphDef version to 95. PiperOrigin-RevId: 257935668",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 94 // Updated: 2019/7/12 +#define TF_GRAPH_DEF_VERSION 95 // Updated: 2019/7/13 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,train 459afb493f51095a3e1bfe63c01c982555bf4382,tensorflow/tensorflow,"VLOG(1) buffer allocation stats from gpu_compiler PiperOrigin-RevId: 315411231 Change-Id: Id96519dd8ec69ddf1afadbe81c5a12c47118c778",gpu_compiler.cc,"@@ -499,6 +499,8 @@ StatusOr> GpuCompiler::RunBackend( /*allocate_buffers_for_constants=*/true, /*colorer=*/BufferAssigner::DefaultColorer(), /*must_not_live_out=*/{}, GetCanShareBuffer())); + VLOG(1) << ""Buffer Assignment Stats "" + << buffer_assignment->GetStats().ToString(); DumpHloModuleIfEnabled(*module, *buffer_assignment, ""after_optimizations""); IrEmitterContext ir_emitter_context( ",0,train f38dd432f4300de2a34374caab2616d1f82e5ce6,tensorflow/tensorflow,use a tuple for batch/time_id,cudnn_rnn_ops.py,"@@ -1119,8 +1119,7 @@ def _cudnn_rnn(inputs, args[""num_proj""] = 0 if num_proj is None else num_proj outputs, output_h, output_c, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv3(**args) elif time_major is False or num_proj: - batch_id = 1 if time_major else 0 - time_id = 0 if time_major else 1 + batch_id, time_id = (1, 0) if time_major else (0, 1) batch_size = array_ops.shape(inputs)[batch_id] max_time = array_ops.shape(inputs)[time_id] sequence_lengths = array_ops.fill([batch_size], max_time) ",0,train 7bf9fa38b6744049b617e722b3f03f2deed1d51f,tensorflow/tensorflow,"Fix interfaces incompatibilities between Classifier and Estimator. Change: 131316326",classifier.py,"@@ -42,7 +42,8 @@ class Classifier(estimator.Estimator): CLASS_OUTPUT = 'classes' PROBABILITY_OUTPUT = 'probabilities' - def __init__(self, model_fn, n_classes, model_dir=None, config=None): + def __init__(self, model_fn, n_classes, model_dir=None, config=None, + params=None): """"""Constructor for Classifier. Args: @@ -52,11 +53,17 @@ class Classifier(estimator.Estimator): also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. config: Configuration object (optional) + params: `dict` of hyper parameters that will be passed into `model_fn`. """""" self._n_classes = n_classes self._logits_fn = model_fn - super(Classifier, self).__init__(model_fn=self._classifier_model, - model_dir=model_dir, config=config) + if params: + model_fn = self._classifier_model_with_params + else: + model_fn = self._classifier_model + super(Classifier, self).__init__(model_fn=model_fn, + model_dir=model_dir, config=config, + params=params) def evaluate(self, x=None, @@ -161,7 +168,15 @@ class Classifier(estimator.Estimator): return predictions[self.PROBABILITY_OUTPUT] def _classifier_model(self, features, targets, mode): - logits, loss, train_op = self._logits_fn(features, targets, mode) + return self._convert_to_estimator_model_result( + self._logits_fn(features, targets, mode)) + + def _classifier_model_with_params(self, features, targets, mode, params): + return self._convert_to_estimator_model_result( + self._logits_fn(features, targets, mode, params)) + + def _convert_to_estimator_model_result(self, logits_fn_result): + logits, loss, train_op = logits_fn_result return { 'classes': math_ops.argmax(logits, len(logits.get_shape()) - 1), 'probabilities': nn.softmax(logits) ",0,train 7bf9fa38b6744049b617e722b3f03f2deed1d51f,tensorflow/tensorflow,"Fix interfaces incompatibilities between Classifier and Estimator. Change: 131316326",classifier_test.py,"@@ -46,19 +46,27 @@ def logistic_model_fn(features, target, unused_mode): return prediction, loss, train_op +def logistic_model_params_fn(features, target, unused_mode, params): + target = tf.one_hot(target, 3, 1, 0) + prediction, loss = tf.contrib.learn.models.logistic_regression_zero_init( + features, target) + train_op = tf.contrib.layers.optimize_loss( + loss, tf.contrib.framework.get_global_step(), optimizer='Adagrad', + learning_rate=params['learning_rate']) + return prediction, loss, train_op + + class ClassifierTest(tf.test.TestCase): def testIrisAll(self): - iris = tf.contrib.learn.datasets.load_iris() est = tf.contrib.learn.Classifier(model_fn=logistic_model_fn, n_classes=3) - est.fit(iris.data, iris.target, steps=100) - scores = est.evaluate(x=iris.data, y=iris.target, name='eval') - predictions = est.predict(x=iris.data) - predictions_proba = est.predict_proba(x=iris.data) - self.assertEqual(predictions.shape[0], iris.target.shape[0]) - self.assertAllEqual(predictions, np.argmax(predictions_proba, axis=1)) - other_score = _sklearn.accuracy_score(iris.target, predictions) - self.assertAllClose(other_score, scores['accuracy']) + self._runIrisAll(est) + + def testIrisAllWithParams(self): + est = tf.contrib.learn.Classifier(model_fn=logistic_model_params_fn, + n_classes=3, + params={'learning_rate': 0.01}) + self._runIrisAll(est) def testIrisPredictAsIterable(self): iris = tf.contrib.learn.datasets.load_iris() @@ -89,6 +97,17 @@ class ClassifierTest(tf.test.TestCase): predictions = list(est.predict(input_fn=predict_input_fn, as_iterable=True)) self.assertEqual(len(predictions), iris.target.shape[0]) + def _runIrisAll(self, est): + iris = tf.contrib.learn.datasets.load_iris() + est.fit(iris.data, iris.target, steps=100) + scores = est.evaluate(x=iris.data, y=iris.target, name='eval') + predictions = est.predict(x=iris.data) + predictions_proba = est.predict_proba(x=iris.data) + self.assertEqual(predictions.shape[0], iris.target.shape[0]) + self.assertAllEqual(predictions, np.argmax(predictions_proba, axis=1)) + other_score = _sklearn.accuracy_score(iris.target, predictions) + self.assertAllClose(other_score, scores['accuracy']) + if __name__ == '__main__': tf.test.main() ",0,train cd3a6effe4f7bbaa3857bfe6432a361a7676507f,tensorflow/tensorflow,"Fix documentation for the real shape of the output of crf_log_likelihood. PiperOrigin-RevId: 185552171",crf.py,"@@ -166,8 +166,8 @@ def crf_log_likelihood(inputs, sequence_lengths: A [batch_size] vector of true sequence lengths. transition_params: A [num_tags, num_tags] transition matrix, if available. Returns: - log_likelihood: A scalar containing the log-likelihood of the given sequence - of tag indices. + log_likelihood: A [batch_size] `Tensor` containing the log-likelihood of + each example, given the sequence of tag indices. transition_params: A [num_tags, num_tags] transition matrix. This is either provided by the caller or created in this function. """""" @@ -182,7 +182,7 @@ def crf_log_likelihood(inputs, transition_params) log_norm = crf_log_norm(inputs, sequence_lengths, transition_params) - # Normalize the scores to get the log-likelihood. + # Normalize the scores to get the log-likelihood per example. log_likelihood = sequence_scores - log_norm return log_likelihood, transition_params ",0,test 76ca9f1060fa7c789bcddcdb756a3b598cb634bd,tensorflow/tensorflow,"Add code-fences to doctest blocks. Most >>> blocks already have ``` fences. Doctest runs them with or without the fences. This change adds the ``` anywhere they're missing when api docs are generated from the docstrings. This will ensure that they look right when viewed as markdown. + fix docstring for `constant_initializer`: you can't have blank lines inside a doctest block. This prevents the rendering from getting corrupted. PiperOrigin-RevId: 267009925",init_ops.py,"@@ -173,50 +173,42 @@ class Constant(Initializer): of the `value` list, even reshaped, as shown in the two commented lines below the `value` list initialization. - ```python - >>> import numpy as np - >>> import tensorflow as tf - - >>> value = [0, 1, 2, 3, 4, 5, 6, 7] - >>> # value = np.array(value) - >>> # value = value.reshape([2, 4]) - >>> init = tf.compat.v1.constant_initializer(value) - - >>> print('fitting shape:') - >>> with tf.compat.v1.Session(): - >>> x = tf.compat.v1.get_variable('x', shape=[2, 4], initializer=init) - >>> x.initializer.run() - >>> print(x.eval()) - - fitting shape: - [[ 0. 1. 2. 3.] - [ 4. 5. 6. 7.]] - - >>> print('larger shape:') - >>> with tf.compat.v1.Session(): - >>> x = tf.compat.v1.get_variable('x', shape=[3, 4], initializer=init) - >>> x.initializer.run() - >>> print(x.eval()) - - larger shape: - [[ 0. 1. 2. 3.] - [ 4. 5. 6. 7.] - [ 7. 7. 7. 7.]] - - >>> print('smaller shape:') - >>> with tf.compat.v1.Session(): - >>> x = tf.compat.v1.get_variable('x', shape=[2, 3], initializer=init) - - ValueError: Too many elements provided. Needed at most 6, but received 8 - - >>> print('shape verification:') - >>> init_verify = tf.compat.v1.constant_initializer(value, - verify_shape=True) - >>> with tf.compat.v1.Session(): - >>> x = tf.compat.v1.get_variable('x', shape=[3, 4], - initializer=init_verify) - - TypeError: Expected Tensor's shape: (3, 4), got (8,). + ``` + >>> value = [0, 1, 2, 3, 4, 5, 6, 7] + >>> # value = np.array(value) + >>> # value = value.reshape([2, 4]) + >>> init = tf.compat.v1.constant_initializer(value) + >>> + >>> # fitting shape + >>> with tf.compat.v1.Session(): + ... x = tf.compat.v1.get_variable('x', shape=[2, 4], initializer=init) + ... x.initializer.run() + ... print(x.eval()) + [[0. 1. 2. 3.] + [4. 5. 6. 7.]] + >>> + >>> # Larger shape + >>> with tf.compat.v1.Session(): + ... x = tf.compat.v1.get_variable('x', shape=[3, 4], initializer=init) + ... x.initializer.run() + ... print(x.eval()) + [[ 0. 1. 2. 3.] + [ 4. 5. 6. 7.] + [ 7. 7. 7. 7.]] + >>> + >>> # Smaller shape + >>> with tf.compat.v1.Session(): + ... x = tf.compat.v1.get_variable('x', shape=[2, 3], initializer=init) + ValueError: Too many elements provided. Needed at most 6, but received 8 + >>> + >>> # Shape verification + >>> init_verify = tf.compat.v1.constant_initializer(value, + verify_shape=True) + >>> with tf.compat.v1.Session(): + ... x = tf.compat.v1.get_variable('x', shape=[3, 4], + ... initializer=init_verify) + TypeError: Expected Tensor's shape: (3, 4), got (8,). + >>> ``` """""" ",0,test 76ca9f1060fa7c789bcddcdb756a3b598cb634bd,tensorflow/tensorflow,"Add code-fences to doctest blocks. Most >>> blocks already have ``` fences. Doctest runs them with or without the fences. This change adds the ``` anywhere they're missing when api docs are generated from the docstrings. This will ensure that they look right when viewed as markdown. + fix docstring for `constant_initializer`: you can't have blank lines inside a doctest block. This prevents the rendering from getting corrupted. PiperOrigin-RevId: 267009925",init_ops_v2.py,"@@ -150,40 +150,31 @@ class Constant(Initializer): below the `value` list initialization. ```python - >>> import numpy as np - >>> import tensorflow as tf - - >>> value = [0, 1, 2, 3, 4, 5, 6, 7] - >>> # value = np.array(value) - >>> # value = value.reshape([2, 4]) - >>> init = tf.compat.v1.constant_initializer(value) - - >>> print('fitting shape:') - >>> with tf.compat.v1.Session(): - >>> x = tf.compat.v1.get_variable('x', shape=[2, 4], initializer=init) - >>> x.initializer.run() - >>> print(x.eval()) - - fitting shape: - [[ 0. 1. 2. 3.] - [ 4. 5. 6. 7.]] - - >>> print('larger shape:') - >>> with tf.compat.v1.Session(): - >>> x = tf.compat.v1.get_variable('x', shape=[3, 4], initializer=init) - >>> x.initializer.run() - >>> print(x.eval()) - - larger shape: - [[ 0. 1. 2. 3.] - [ 4. 5. 6. 7.] - [ 7. 7. 7. 7.]] - - >>> print('smaller shape:') - >>> with tf.compat.v1.Session(): - >>> x = tf.compat.v1.get_variable('x', shape=[2, 3], initializer=init) - - ValueError: Too many elements provided. Needed at most 6, but received 8 + >>> value = [0, 1, 2, 3, 4, 5, 6, 7] + >>> # value = np.array(value) + >>> # value = value.reshape([2, 4]) + >>> init = tf.compat.v1.constant_initializer(value) + >>> + >>> # Fitting shape + >>> with tf.compat.v1.Session(): + ... x = tf.compat.v1.get_variable('x', shape=[2, 4], initializer=init) + ... x.initializer.run() + ... print(x.eval()) + [[0. 1. 2. 3.] + [4. 5. 6. 7.]] + >>> # Larger shape + >>> with tf.compat.v1.Session(): + ... x = tf.compat.v1.get_variable('x', shape=[3, 4], initializer=init) + ... x.initializer.run() + ... print(x.eval()) + [[ 0. 1. 2. 3.] + [ 4. 5. 6. 7.] + [ 7. 7. 7. 7.]] + >>> # Smaller shape + >>> with tf.compat.v1.Session(): + ... x = tf.compat.v1.get_variable('x', shape=[2, 3], initializer=init) + ValueError: Too many elements provided. Needed at most 6, but received 8 + ``` """""" ",0,test 76d204f38757c2b4a3a82020b90b7e739b2c90b6,tensorflow/tensorflow,"Keep side effectful ops in grappler function items PiperOrigin-RevId: 221653198",op_types.cc,"@@ -571,6 +571,10 @@ bool IsFreeOfSideEffect(const NodeDef& node) { if (node.op().find(""Queue"") != string::npos) { return false; } + // Sending a tensor via a network is a side effect. + if (IsSend(node)) { + return false; + } return !ModifiesInputsInPlace(node); } ",0,train 76d204f38757c2b4a3a82020b90b7e739b2c90b6,tensorflow/tensorflow,"Keep side effectful ops in grappler function items PiperOrigin-RevId: 221653198",functions.cc,"@@ -347,12 +347,6 @@ GrapplerFunctionItem::GrapplerFunctionItem( fetch.push_back(output_tensor); } } - // Stateful and Send (it's not stateful) nodes must be preserved in the graph. - for (const NodeDef& node : graph.node()) { - if (IsSend(node)) { - keep_ops.push_back(node.name()); - } - } } const string& GrapplerFunctionItem::description() const { return description_; } @@ -584,8 +578,8 @@ Status MakeGrapplerFunctionItem(const FunctionDef& func, TF_RETURN_IF_ERROR(RegisterFunctionBodyOutputs(*registration, func_def_node, &connectivity)); - // Stateful and Send nodes must be preserved in a function body - if (registration->op_def.is_stateful() || IsSend(func_def_node)) { + // Ops with side effects must be preserved in a function body. + if (!IsFreeOfSideEffect(func_def_node)) { keep_nodes.push_back(func_def_node.name()); } } ",0,train 76d204f38757c2b4a3a82020b90b7e739b2c90b6,tensorflow/tensorflow,"Keep side effectful ops in grappler function items PiperOrigin-RevId: 221653198",functions.h,"@@ -142,12 +142,6 @@ class GrapplerFunctionItemInstantiation { class GrapplerFunctionItem : public GrapplerItem { public: GrapplerFunctionItem() = default; - GrapplerFunctionItem(string func_name, string description, - AttrSlice func_attr, - std::vector input_arg_expansions, - std::vector output_arg_expansions, - std::vector keep_nodes, int graph_def_version, - bool is_stateful, GraphDef&& function_body); const string& description() const; @@ -170,12 +164,22 @@ class GrapplerFunctionItem : public GrapplerItem { GrapplerFunctionItem& SwapFunctionBody(GraphDef&& other); private: + friend Status MakeGrapplerFunctionItem(const FunctionDef&, const AttrSlice&, + const FunctionLibraryDefinition&, int, + GrapplerFunctionItem*); friend Status ReplaceInputWithConst(const NodeDef&, int, GrapplerFunctionItem*); friend Status RemoveUnusedOutputs( const gtl::FlatSet& active_outputs, GrapplerFunctionItem* item, std::vector>* output_mapping); + GrapplerFunctionItem(string func_name, string description, + AttrSlice func_attr, + std::vector input_arg_expansions, + std::vector output_arg_expansions, + std::vector keep_nodes, int graph_def_version, + bool is_stateful, GraphDef&& function_body); + string description_; AttrSlice func_attr_; // Attributes specific to function definition that // produced this item (FuncDef.attr field). ",0,train 76d204f38757c2b4a3a82020b90b7e739b2c90b6,tensorflow/tensorflow,"Keep side effectful ops in grappler function items PiperOrigin-RevId: 221653198",functions_test.cc,"@@ -576,6 +576,33 @@ TEST_F(FunctionsTest, FromFunctionDefWithoutInput) { EXPECT_EQ(""two"", cast.input(0)); } +TEST_F(FunctionsTest, FromFunctionDefWithSideEffectfulOps) { + const Tensor kOne = test::AsScalar(1.0); + FunctionDef func = FunctionDefHelper::Define( + /* Name */ ""SideEffects"", + /* Args */ {""x: Ref(float)""}, + /* Return values */ {}, + /* Attr def */ {}, + /* Nodes */ + {{{""one""}, ""Const"", {}, {{""value"", kOne}, {""dtype"", DT_FLOAT}}}, + {{""update""}, ""AssignAdd"", {""x"", ""one""}, {{""T"", DT_FLOAT}}}}); + + protobuf::Map func_instantiation_attr; + FunctionLibraryDefinition flib(OpRegistry::Global(), FunctionDefLibrary()); + + GrapplerFunctionItem item; + TF_EXPECT_OK(MakeGrapplerFunctionItem(func, + AttrSlice(&func_instantiation_attr), + flib, TF_GRAPH_DEF_VERSION, &item)); + + EXPECT_EQ(""SideEffects"", item.id); + EXPECT_EQ(3, item.function_body().node_size()); + EXPECT_EQ(1, item.input_size()); + EXPECT_EQ(0, item.output_size()); + ASSERT_EQ(1, item.keep_ops.size()); + EXPECT_EQ(""update"", item.keep_ops[0]); +} + TEST_F(FunctionsTest, MakeFunctionDef) { const Tensor kTwo = test::AsScalar(2); FunctionDef func = FunctionDefHelper::Define( ",0,train 0d39eea6e7e2b02a8812c46532af6bfcb5604865,tensorflow/tensorflow,"[NFC] Eliminate references to HLO Inst from CollectivePermute Thunk. - Introduce a CollectivePermuteConfig object to hold relevant properties needed for execution of the Thunk and use that in the thunk object. PiperOrigin-RevId: 335650568 Change-Id: I6c326100e2540fd7f80a9f2b5357ef4a781a6683",collective_permute_thunk.cc,"@@ -24,6 +24,7 @@ limitations under the License. #include ""absl/memory/memory.h"" #include ""tensorflow/compiler/xla/refcounting_hash_map.h"" #include ""tensorflow/compiler/xla/service/hlo_casting_utils.h"" +#include ""tensorflow/compiler/xla/service/hlo_instruction.h"" #include ""tensorflow/compiler/xla/service/hlo_instructions.h"" #include ""tensorflow/compiler/xla/statusor.h"" #include ""tensorflow/core/lib/core/blocking_counter.h"" @@ -217,16 +218,23 @@ RefcountingHashMap& GlobalRendezvousMap() { } // anonymous namespace +CollectivePermuteConfig GetCollectivePermuteConfig( + const HloInstruction* instr) { + CollectivePermuteConfig config; + auto* collective_permute = Cast(instr); + config.source_target_pairs = collective_permute->source_target_pairs(); + return config; +} + CollectivePermuteThunk::CollectivePermuteThunk( - ThunkInfo thunk_info, const BufferAllocation::Slice& src, - const BufferAllocation::Slice& dest) + ThunkInfo thunk_info, CollectivePermuteConfig&& config, + const BufferAllocation::Slice& src, const BufferAllocation::Slice& dest) : Thunk(kCollectivePermute, thunk_info), - hlo_instruction_(thunk_info.hlo_instruction), + config_(std::move(config)), src_(src), dest_(dest) {} Status CollectivePermuteThunk::ExecuteOnStream(const ExecuteParams& params) { - auto* instr = Cast(hlo_instruction_); auto op_profiler = params.profiler->MakeScopedInstructionProfiler(profile_index()); @@ -245,7 +253,7 @@ Status CollectivePermuteThunk::ExecuteOnStream(const ExecuteParams& params) { // Figure out which replicas our data is copied to. std::vector dest_replicas; - for (const auto& src_dest : instr->source_target_pairs()) { + for (const auto& src_dest : config_.source_target_pairs) { if (src_dest.first == replica_id) { dest_replicas.push_back(src_dest.second); } @@ -260,7 +268,7 @@ Status CollectivePermuteThunk::ExecuteOnStream(const ExecuteParams& params) { // If no replica writes into us (i.e. we aren't the target of any copies), our // contract is that we zero our output. - if (absl::c_none_of(instr->source_target_pairs(), + if (absl::c_none_of(config_.source_target_pairs, [&](std::pair src_dest) { return src_dest.second == replica_id; })) { ",0,train 0d39eea6e7e2b02a8812c46532af6bfcb5604865,tensorflow/tensorflow,"[NFC] Eliminate references to HLO Inst from CollectivePermute Thunk. - Introduce a CollectivePermuteConfig object to hold relevant properties needed for execution of the Thunk and use that in the thunk object. PiperOrigin-RevId: 335650568 Change-Id: I6c326100e2540fd7f80a9f2b5357ef4a781a6683",collective_permute_thunk.h,"@@ -19,21 +19,28 @@ limitations under the License. #include ""tensorflow/compiler/xla/service/buffer_assignment.h"" #include ""tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h"" #include ""tensorflow/compiler/xla/service/gpu/thunk.h"" +#include ""tensorflow/compiler/xla/service/hlo_instruction.h"" namespace xla { namespace gpu { +struct CollectivePermuteConfig { + std::vector> source_target_pairs; +}; + +CollectivePermuteConfig GetCollectivePermuteConfig(const HloInstruction* instr); + // Thunk that implements the collective-permute HLO. class CollectivePermuteThunk : public Thunk { public: - CollectivePermuteThunk(ThunkInfo thunk_info, + CollectivePermuteThunk(ThunkInfo thunk_info, CollectivePermuteConfig&& config, const BufferAllocation::Slice& src, const BufferAllocation::Slice& dest); Status ExecuteOnStream(const ExecuteParams& params) override; private: - const HloInstruction* hlo_instruction_; + CollectivePermuteConfig config_; BufferAllocation::Slice src_; BufferAllocation::Slice dest_; }; ",0,train 0d39eea6e7e2b02a8812c46532af6bfcb5604865,tensorflow/tensorflow,"[NFC] Eliminate references to HLO Inst from CollectivePermute Thunk. - Introduce a CollectivePermuteConfig object to hold relevant properties needed for execution of the Thunk and use that in the thunk object. PiperOrigin-RevId: 335650568 Change-Id: I6c326100e2540fd7f80a9f2b5357ef4a781a6683",ir_emitter_unnested.cc,"@@ -1623,9 +1623,10 @@ Status IrEmitterUnnested::HandleReplicaId(HloInstruction* hlo) { } Status IrEmitterUnnested::HandleCollectivePermute(HloInstruction* hlo) { + CollectivePermuteConfig config = GetCollectivePermuteConfig(hlo); AddThunkToThunkSequence(absl::make_unique( - GetThunkInfo(hlo), GetAllocationSlice(*hlo->operand(0)), - GetAllocationSlice(*hlo))); + GetThunkInfo(hlo), std::move(config), + GetAllocationSlice(*hlo->operand(0)), GetAllocationSlice(*hlo))); return Status::OK(); } ",0,train 003110094e8daa14306f872ceb8596e14d1f69d1,tensorflow/tensorflow,"[JAX] Make C++ jit code tolerant to jax.interpreters.xla._DeviceArray not existing. Change in preparation for deleting jax.interpreters.xla._DeviceArray. PiperOrigin-RevId: 407608971 Change-Id: I857aab2517dc2c08d36eec9fbb6a005d7289e253",jax_jit.cc,"@@ -566,7 +566,10 @@ xla::Status ComputeSignature(bool jax_enable_x64, xla::PyClient& pyclient, }; static const auto& types = *[]() -> PythonTypes* { py::module xla_module(py::module::import(""jax.interpreters.xla"")); - py::object device_array(xla_module.attr(""_DeviceArray"")); + py::object device_array; + if (py::hasattr(xla_module, ""_DeviceArray"")) { + device_array = xla_module.attr(""_DeviceArray""); + } return new PythonTypes{device_array}; }(); // When the jitted function is not committed, we first check whether any ",0,train 65d193342103d972328934044df9e285438904b9,tensorflow/tensorflow,"Reformatted the error messages to f-strings or .format() command. PiperOrigin-RevId: 430567942 Change-Id: I614475150d63cd4e68695609cf69a0317ceb2944",feature_column.py,"@@ -104,12 +104,12 @@ def embedding_column(categorical_column, """""" if isinstance(categorical_column, _DENYLISTED_CATEGORICAL_COLUMNS_V2): raise TypeError('categorical_column for tpu ' - ' embedding_column was denylisted type %s' % - type(categorical_column)) + ' embedding_column was ' + f'denylisted type {type(categorical_column)}') if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS): raise TypeError( 'categorical_column for tpu ' - ' embedding_column must be type %s, got %s.' % (' or '.join([ + ' embedding_column must be type {}, got {}.'.format(' or '.join([ cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS ]), type(categorical_column))) if (dimension is None) or (dimension < 1): @@ -221,14 +221,15 @@ def shared_embedding_columns(categorical_columns, for categorical_column in categorical_columns: if isinstance(categorical_column, _DENYLISTED_CATEGORICAL_COLUMNS_V2): raise TypeError('categorical_column for tpu ' - ' embedding_column was denylisted type %s' % - type(categorical_column)) + ' embedding_column was denylisted type ' + f'{type(categorical_column)}') if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS): raise TypeError( 'categorical_column for tpu ' - ' shared_embedding_columns must be type %s, got %s.' % (' or '.join([ - cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS - ]), type(categorical_column))) + ' shared_embedding_columns must be type {}, got {}.'.format( + ' or '.join( + [cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS]), + type(categorical_column))) if not max_sequence_lengths: max_sequence_lengths = [0] * len(categorical_columns) @@ -711,7 +712,7 @@ def split_sequence_columns(feature_columns): if not isinstance(column, (_TPUEmbeddingColumn, _TPUSharedEmbeddingColumn)): raise TypeError( 'column must be a _TPUEmbeddingColumn or _TPUSharedEmbeddingColumn ' - 'but got %s instead.' % (type(column))) + f'but got {type(column)} instead.') if column.is_sequence_column(): sequence_columns.append(column) else: ",0,train 65d193342103d972328934044df9e285438904b9,tensorflow/tensorflow,"Reformatted the error messages to f-strings or .format() command. PiperOrigin-RevId: 430567942 Change-Id: I614475150d63cd4e68695609cf69a0317ceb2944",feature_column_v2.py,"@@ -144,7 +144,7 @@ def embedding_column_v2(categorical_column, if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS_V2): raise TypeError( 'categorical_column for tpu ' - ' embedding_column must be type %s, got %s.' % (' or '.join([ + 'embedding_column must be type {}, got {}.'.format(' or '.join([ cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS_V2 ]), type(categorical_column))) if (dimension is None) or (dimension < 1): @@ -163,8 +163,8 @@ def embedding_column_v2(categorical_column, if (embedding_lookup_device and embedding_lookup_device not in _ALLOWED_DEVICES): - raise ValueError('If set, embedding_lookup_device must be in ', - _ALLOWED_DEVICES) + raise ValueError( + f'If set, embedding_lookup_device must be in {_ALLOWED_DEVICES}') if embedding_lookup_device == 'cpu': embedding_lookup_device = EmbeddingDevice.CPU @@ -314,9 +314,10 @@ def shared_embedding_columns_v2(categorical_columns, if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS_V2): raise TypeError( 'categorical_column for tpu ' - ' shared_embedding_columns must be type %s, got %s.' % (' or '.join([ - cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS_V2 - ]), type(categorical_column))) + ' shared_embedding_columns must be type {}, got {}.'.format( + ' or '.join( + [cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS_V2]), + type(categorical_column))) if not max_sequence_lengths: max_sequence_lengths = [0] * len(categorical_columns) @@ -364,8 +365,8 @@ def shared_embedding_columns_v2(categorical_columns, if (embedding_lookup_device and embedding_lookup_device not in _ALLOWED_DEVICES): - raise ValueError('If set, embedding_lookup_device must be in ', - _ALLOWED_DEVICES) + raise ValueError( + f'If set, embedding_lookup_device must be in {_ALLOWED_DEVICES}') if embedding_lookup_device == 'cpu': embedding_lookup_device = EmbeddingDevice.CPU @@ -779,7 +780,7 @@ def split_sequence_columns_v2(feature_columns): _TPUSharedEmbeddingColumnV2)): raise TypeError( 'column must be a _TPUEmbeddingColumnV2 or ' - '_TPUSharedEmbeddingColumnV2 but got %s instead.' % (type(column))) + f'_TPUSharedEmbeddingColumnV2 but got {type(column)} instead.') if column.is_sequence_column(): sequence_columns.append(column) else: ",0,train 65d193342103d972328934044df9e285438904b9,tensorflow/tensorflow,"Reformatted the error messages to f-strings or .format() command. PiperOrigin-RevId: 430567942 Change-Id: I614475150d63cd4e68695609cf69a0317ceb2944",tpu_feed.py,"@@ -185,8 +185,8 @@ class InfeedQueue(object): ""number of tuple elements cannot be inferred from InfeedQueue "" ""constructor"") if number_of_tuple_elements <= 0: - raise ValueError(""number_of_tuple_elements %d must be > 0"" % - number_of_tuple_elements) + raise ValueError(f""number_of_tuple_elements {number_of_tuple_elements} "" + ""must be > 0"") # Make an empty sharding policy for each tuple element. self._sharding_policies = [ tpu_sharding.ShardingPolicy() for _ in range(number_of_tuple_elements) @@ -241,22 +241,24 @@ class InfeedQueue(object): dtype. """""" if len(tuple_types) != self.number_of_tuple_elements: - raise ValueError(""tuple_types is %s, but must be a list of length %d"" % - (str(tuple_types), self.number_of_tuple_elements)) + raise ValueError( + f""tuple_types is {str(tuple_types)}, but must be a list of "" + f""length {self.number_of_tuple_elements}"" + ) if self._frozen: for (frozen, updated) in zip(self._tuple_types, tuple_types): if frozen != updated: raise ValueError( ""Trying to update InfeedQueue with frozen configuration with an "" - ""incompatible type. Frozen types are %s, updated types are %s"" % ( - str(self._tuple_types), str(tuple_types))) + f""incompatible type. Frozen types are {str(self._tuple_types)}, "" + f""updated types are {str(tuple_types)}"") else: try: self._tuple_types = [dtypes.as_dtype(t) for t in tuple_types] except (TypeError) as e: raise TypeError( - ""tuple_types is %s, but must be a list of elements each "" - ""convertible to dtype: got error %s"" % (str(tuple_types), str(e))) + f""tuple_types is {str(tuple_types)}, but must be a list of "" + f""elements each convertible to dtype: got error {str(e)}"") from e @property def tuple_shapes(self): @@ -280,22 +282,26 @@ class InfeedQueue(object): a TensorShape. """""" if len(tuple_shapes) != self.number_of_tuple_elements: - raise ValueError(""tuple_shapes is %s, but must be a list of length %d"" % - (str(tuple_shapes), self.number_of_tuple_elements)) + raise ValueError( + f""tuple_shapes is {str(tuple_shapes)}, but must be a list of "" + f""length {self.number_of_tuple_elements}"" + ) try: tuple_shapes = [tensor_shape.as_shape(shape) for shape in tuple_shapes] except (ValueError, TypeError) as e: raise TypeError( - ""tuple_shapes is %s, but must be a list of elements each "" - ""convertible to TensorShape: got error %s"" % (str(tuple_shapes), - str(e))) + f""tuple_shapes is {str(tuple_shapes)}, but must be a list of "" + ""elements each convertible to TensorShape: got error "" + f""{str(e)}"") from e if self._frozen: for (frozen, updated) in zip(self._tuple_shapes, tuple_shapes): if frozen != updated: raise ValueError( ""Trying to update InfeedQueue with frozen configuration with an "" - ""incompatible shape. Frozen shapes are %s, updated shapes are %s"" - % (str(self._tuple_shapes), str(tuple_shapes))) + ""incompatible shape. Frozen shapes are "" + f""{str(self._tuple_shapes)}, updated shapes are "" + f""{str(tuple_shapes)}"") + else: self._tuple_shapes = tuple_shapes self._validate() @@ -335,9 +341,8 @@ class InfeedQueue(object): range for the corresponding tuple element shape. """""" if len(shard_dimensions) != self.number_of_tuple_elements: - raise ValueError(""shard_dimensions is %s, but must be a list of length %d"" - % (str(shard_dimensions), - self.number_of_tuple_elements)) + raise ValueError(f""shard_dimensions is {str(shard_dimensions)}, but must "" + f""be a list of length {self.number_of_tuple_elements}"") for (policy, dimension) in zip(self._sharding_policies, shard_dimensions): policy.set_shard_dimension(dimension) self._validate() @@ -383,8 +388,8 @@ class InfeedQueue(object): self.number_of_tuple_elements """""" if len(input_tensors) != self.number_of_tuple_elements: - raise ValueError(""input_tensors is %s, but should be a list of %d Tensors"" - % (str(input_tensors), self.number_of_tuple_elements)) + raise ValueError(f""input_tensors is {str(input_tensors)}, but should be "" + f""a list of {self.number_of_tuple_elements} Tensors"") self.set_tuple_shapes([t.shape for t in input_tensors]) self.set_tuple_types([t.dtype for t in input_tensors]) @@ -417,9 +422,9 @@ class InfeedQueue(object): for t in input_tensors: if len(t) != self.number_of_tuple_elements: raise ValueError( - ""input_tensors is %s but must be a list of lists, where each inner"" - "" list has length number_of_tuple_elements=%d"" % ( - str(input_tensors), self.number_of_tuple_elements)) + f""input_tensors is {str(input_tensors)} but must be a list of "" + ""lists, where each inner list has length "" + f""number_of_tuple_elements={self.number_of_tuple_elements}"") # Transpose the inputs to make a list of shard shapes for each tuple # element. sharded_shapes = [[t[i].shape @@ -435,8 +440,8 @@ class InfeedQueue(object): for (t1, t2) in zip(input_tensors[0], input_tensors[i]): if t1.dtype != t2.dtype: raise TypeError( - ""types of the tuple elements of input_tensors %s are not "" - ""consistent"" % str(input_tensors)) + ""types of the tuple elements of input_tensors "" + f""{str(input_tensors)} are not consistent"") self.set_tuple_types([t.dtype for t in input_tensors[0]]) def freeze(self): @@ -548,8 +553,8 @@ class InfeedQueue(object): for i in range(1, self.number_of_tuple_elements): if devices[0] != devices[i]: raise ValueError( - ""input devices for shard %d are %s, but should all be the same"" % - (index, str(devices))) + f""input devices for shard {index} are {str(devices)}, but should "" + ""all be the same"") with ops.colocate_with(inputs[0]): return tpu_ops.infeed_enqueue_tuple( inputs=inputs, ",0,train 72028307fdd8b00559ed631a409c9237ff0c24b8,tensorflow/tensorflow,"fix the GitHub Issue #43789 that file_io.delete_recursively_v2 not compatible while calling on files on cloud storage. PiperOrigin-RevId: 336951308 Change-Id: Ieb43a96b1f6c6fc481785cd4c60b1a5c31cb5c1c",multi_worker_callback_tf2_test.py,"@@ -205,7 +205,7 @@ class KerasCallbackMultiProcessTest(parameterized.TestCase, test.TestCase): raise multi_process_runner.get_barrier().wait() - backup_filepath = os.path.join(bar_dir, 'chief', 'checkpoint') + backup_filepath = os.path.join(bar_dir, 'checkpoint') test_obj.assertTrue(file_io.file_exists_v2(backup_filepath)) test_obj.assertTrue(file_io.file_exists_v2(saving_filepath)) ",0,train 72028307fdd8b00559ed631a409c9237ff0c24b8,tensorflow/tensorflow,"fix the GitHub Issue #43789 that file_io.delete_recursively_v2 not compatible while calling on files on cloud storage. PiperOrigin-RevId: 336951308 Change-Id: Ieb43a96b1f6c6fc481785cd4c60b1a5c31cb5c1c",worker_training_state.py,"@@ -73,17 +73,15 @@ class WorkerTrainingState(object): # workers need to perform `save()`. # But all workers should restore from the same checkpoint_dir as passed in # read_checkpoint_manager. - self.read_checkpoint_manager = checkpoint_management.CheckpointManager( - checkpoint, - directory=os.path.join(checkpoint_dir, 'chief'), - max_to_keep=1) - write_checkpoint_dir = distributed_file_utils.write_dirpath( + self.write_checkpoint_dir = distributed_file_utils.write_dirpath( checkpoint_dir, self._model.distribute_strategy) - if write_checkpoint_dir == checkpoint_dir: - self.write_checkpoint_manager = self.read_checkpoint_manager + self.write_checkpoint_manager = checkpoint_management.CheckpointManager( + checkpoint, directory=self.write_checkpoint_dir, max_to_keep=1) + if self.write_checkpoint_dir == checkpoint_dir: + self.read_checkpoint_manager = self.write_checkpoint_manager else: - self.write_checkpoint_manager = checkpoint_management.CheckpointManager( - checkpoint, directory=write_checkpoint_dir, max_to_keep=1) + self.read_checkpoint_manager = checkpoint_management.CheckpointManager( + checkpoint, directory=checkpoint_dir, max_to_keep=1) def back_up(self, epoch): """"""Back up the current state of training into a checkpoint file. @@ -113,8 +111,13 @@ class WorkerTrainingState(object): Delete the backup directories which should not exist after `fit()` successfully finishes. """""" - if self.write_checkpoint_manager is self.read_checkpoint_manager: - file_io.delete_recursively_v2(self.write_checkpoint_manager.directory) + # pylint: disable=protected-access + for pathname in file_io.get_matching_files_v2( + self.write_checkpoint_manager._prefix + '*'): + file_io.delete_recursively_v2(pathname) + for pathname in file_io.get_matching_files_v2( + os.path.join(self.write_checkpoint_manager.directory, 'checkpoint')): + file_io.delete_recursively_v2(pathname) def maybe_load_initial_epoch_from_ckpt(self, initial_epoch, mode): """"""Maybe load initial epoch from ckpt considering possible worker recovery. ",0,train 8d32eb3bd10aceea68118556e500e87f5565a983,tensorflow/tensorflow,"Handle tf.Case in tf-tensor-list-ops-decomposition pass. PiperOrigin-RevId: 316490068 Change-Id: I8f9502c3b8361e767b6333428cffa68fe3d8a3ad",tensor_list_ops_decomposition.cc,"@@ -216,59 +216,62 @@ LogicalResult HandleWhileOp( return success(); } -LogicalResult HandleIfOp(TF::IfOp if_op, ModuleOp module, - llvm::SmallDenseMap* buffer_to_size, - llvm::StringMap* - decomposed_partitioned_call_callees) { +template +LogicalResult HandleCaseOrIfOp( + CaseOrIfOp op, ArrayRef branches, ModuleOp module, + llvm::SmallDenseMap* buffer_to_size, + llvm::StringMap* + decomposed_partitioned_call_callees) { // Rewrite the branches. - auto then_branch = module.lookupSymbol(if_op.then_branch()); - auto else_branch = module.lookupSymbol(if_op.else_branch()); - llvm::SmallDenseMap then_map; - llvm::SmallDenseMap else_map; + SmallVector, 2> branch_maps; + branch_maps.resize(branches.size()); auto find_arg_buffer_type = [&](int64_t index) -> llvm::Optional { - auto it = buffer_to_size->find(if_op.getOperand(index + 1)); + auto it = buffer_to_size->find(op.getOperand(index + 1)); if (it == buffer_to_size->end()) return llvm::None; return it->getFirst().getType(); }; auto arg_buffer_size_is_fixed = [&](int64_t index) { - return (*buffer_to_size)[if_op.getOperand(index + 1)].fixed; + return (*buffer_to_size)[op.getOperand(index + 1)].fixed; }; - OpBuilder builder(if_op); - ModifyFunctionSignature(then_branch, cutil::GetSizeType(builder), &then_map, - find_arg_buffer_type, arg_buffer_size_is_fixed); - ModifyFunctionSignature(else_branch, cutil::GetSizeType(builder), &else_map, - find_arg_buffer_type, arg_buffer_size_is_fixed); - const bool arg_no_changed = then_map.empty(); - if (failed(DecomposeTensorListOpsInternal( - &then_branch.front(), module, &then_map, - decomposed_partitioned_call_callees)) || - failed(DecomposeTensorListOpsInternal( - &else_branch.front(), module, &else_map, - decomposed_partitioned_call_callees))) { - return failure(); + OpBuilder builder(op); + for (const auto& pair : llvm::zip(branches, branch_maps)) { + FuncOp branch = std::get<0>(pair); + llvm::SmallDenseMap& branch_map = std::get<1>(pair); + ModifyFunctionSignature(branch, cutil::GetSizeType(builder), &branch_map, + find_arg_buffer_type, arg_buffer_size_is_fixed); + + if (failed(DecomposeTensorListOpsInternal( + &branch.front(), module, &branch_map, + decomposed_partitioned_call_callees))) + return failure(); } + + const bool arg_no_changed = branch_maps.front().empty(); auto output_buffer_to_size = - AddTensorListSizesToReturn(then_branch, then_map); - AddTensorListSizesToReturn(else_branch, else_map); + AddTensorListSizesToReturn(branches.front(), branch_maps.front()); + for (const auto& pair : llvm::drop_begin(llvm::zip(branches, branch_maps), 1)) + AddTensorListSizesToReturn(std::get<0>(pair), std::get<1>(pair)); + if (output_buffer_to_size.empty() && arg_no_changed) return success(); - // Recreate the If op. - auto new_if_operands = llvm::to_vector<8>(if_op.getOperands()); - for (int64_t i = 1; i < if_op.getNumOperands(); ++i) { - auto it = buffer_to_size->find(if_op.getOperand(i)); + + // Recreate the op. + auto new_operands = llvm::to_vector<8>(op.getOperands()); + for (int64_t i = 1; i < op.getNumOperands(); ++i) { + auto it = buffer_to_size->find(op.getOperand(i)); if (it == buffer_to_size->end()) continue; - new_if_operands.push_back(it->getSecond().size); + new_operands.push_back(it->getSecond().size); } - auto new_if = OpBuilder(if_op).create( - if_op.getLoc(), then_branch.getType().getResults(), new_if_operands, - if_op.getAttrs()); + FuncOp first_branch = branches.front(); + auto new_op = OpBuilder(op).create( + op.getLoc(), first_branch.getType().getResults(), new_operands, + op.getAttrs()); for (const auto& entry : output_buffer_to_size) { - (*buffer_to_size)[new_if.getResult(std::get<0>(entry))] = { - new_if.getResult(std::get<1>(entry)), std::get<2>(entry)}; + (*buffer_to_size)[new_op.getResult(std::get<0>(entry))] = { + new_op.getResult(std::get<1>(entry)), std::get<2>(entry)}; } - if_op.replaceAllUsesWith( - new_if.getResults().take_front(if_op.getNumResults())); - if_op.erase(); + op.replaceAllUsesWith(new_op.getResults().take_front(op.getNumResults())); + op.erase(); return success(); } @@ -710,8 +713,22 @@ LogicalResult DecomposeTensorListOpsInternal( return failure(); } } else if (auto if_op = llvm::dyn_cast(&op)) { - if (failed(HandleIfOp(if_op, module, buffer_to_size, - decomposed_partitioned_call_callees))) { + auto then_branch = module.lookupSymbol(if_op.then_branch()); + auto else_branch = module.lookupSymbol(if_op.else_branch()); + + if (failed(HandleCaseOrIfOp(if_op, {then_branch, else_branch}, module, + buffer_to_size, + decomposed_partitioned_call_callees))) { + return failure(); + } + } else if (auto case_op = llvm::dyn_cast(&op)) { + SmallVector branches; + for (auto branch_symbol : case_op.branches()) { + branches.push_back(module.lookupSymbol( + branch_symbol.cast())); + } + if (failed(HandleCaseOrIfOp(case_op, branches, module, buffer_to_size, + decomposed_partitioned_call_callees))) { return failure(); } } else if (auto pcall = llvm::dyn_cast(&op)) { ",0,train 8de821fc169fb9bad8be681801e8551171f8e44a,tensorflow/tensorflow,"make_vjp in eager PiperOrigin-RevId: 172363016",backprop.py,"@@ -581,6 +581,62 @@ def val_and_grad_function(f, params=None): return decorated +def make_vjp(f, params=None): + """"""Returns a function that computes f and is vjp w.r.t. params. + + The term ""vjp"" here is an abbreviation for vector-jacobian product. + + Args: + f: the function to be differentiated. + params: the parameters (numbers or names) to differentiate with respect to. + A value of None will differentiate with respect to all parameters. + + Returns: + A function, which when called, returns a tuple (value, vjp), where: + - value is the result of calling f. + - vjp is a function, which takes a vector as an argument and + returns the product of that vector with the Jacobian of f. + Providing no argument to vjp is equivalent to providing a + vector of ones. + + For example, + ```python + def f(x): + return x * x + + wrapped_fn = tfe.make_vjp(f) + result, vjp = wrapped_fn(tf.constant(3.0)) + # result is 9.0 + vjp() # the vjp function rturns 6.0 + + """""" + + parameter_positions = _get_arg_spec(f, params) + + def decorated(*args, **kwds): + """"""Computes the value and gradient of the decorated function."""""" + assert not kwds, ""The gradient function can't take keyword arguments."" + tape.push_new_tape() + sources = [] + args = [ + ops.convert_to_tensor(args[i]) if i in parameter_positions else args[i] + for i in range(len(args)) + ] + args = _ensure_unique_tensor_objects(parameter_positions, args) + for i in parameter_positions: + sources.append(args[i]) + tape.watch(args[i]) + result = f(*args) + t = tape.pop_tape() + def vjp(dy=None): + return imperative_grad.imperative_grad( + _default_vspace, t, nest.flatten(result), sources, + output_gradients=nest.flatten(dy) if dy is not None else None) + return result, vjp + + return decorated + + def _aggregate_grads(gradients): """"""Aggregate gradients from multiple sources. ",0,test 8de821fc169fb9bad8be681801e8551171f8e44a,tensorflow/tensorflow,"make_vjp in eager PiperOrigin-RevId: 172363016",backprop_test.py,"@@ -168,6 +168,16 @@ class BackpropTest(test.TestCase): grad = backprop.gradients_function(second, [0])(f)[0] self.assertAllEqual([[0.0]], grad.numpy()) + def testMakeVJP(self): + + def f(x): + return x * x + + wrapped_fn = backprop.make_vjp(f) + result, vjp = wrapped_fn(constant_op.constant(3.0)) + self.assertEqual(result.numpy(), 9.0) + self.assertEqual(vjp(2.0)[0].numpy(), 12.0) + def testGradGrad(self): def sq(x): ",0,test 3c5ef53e374cf029a3d595fac6a83d3d337568e2,tensorflow/tensorflow,[3] Review comments handled,conv.cc,"@@ -171,7 +171,7 @@ bool IsIm2ColRequired(TfLiteTensor* input, TfLiteConvParams* params, switch (kernel_type) { case kReference: - if (input->type == kTfLiteFloat32) { + if (is_hybrid) { return true; } else { return false; ",0,train a376886b16f2bb3cc268594cf23dadb826d12d48,tensorflow/tensorflow,"Added more constraints in Texture3D creation check. Moved check for one layer support check. PiperOrigin-RevId: 288365835 Change-Id: I19e6026a5c1e2bc9a1d2f82c2b4075eb41878f4e",tensor.cc,"@@ -297,10 +297,19 @@ bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device, return shape.b * shape.w * shape.h * depth <= device.GetInfo().image_buffer_max_size; case TensorStorageType::TEXTURE_3D: + if (device.cl_version() < OpenCLVersion::CL_1_2 && depth == 1) { + // clCreateImage3D (that used in CL 1.0/1.1) can not create image with + // depth = 1 by specification; + return false; + } return shape.w * shape.b <= device.GetInfo().image3d_max_width && shape.h <= device.GetInfo().image3d_max_height && depth <= device.GetInfo().image3d_max_depth; case TensorStorageType::TEXTURE_ARRAY: + // Bug on some Adreno. b/131099086 + if (depth == 1 && !device.SupportsOneLayerTextureArray()) { + return false; + } return shape.w * shape.b <= device.GetInfo().image2d_max_width && shape.h <= device.GetInfo().image2d_max_height && depth <= device.GetInfo().image_array_max_layers; @@ -412,12 +421,7 @@ Status AllocateTensorMemory(const CLContext& context, const CLDevice& device, desc.image_width = shape.w * shape.b; desc.image_height = shape.h; desc.image_depth = 0; - int layers_count = depth; - // Adreno bug. b/131099086 - if (layers_count == 1 && !device.SupportsOneLayerTextureArray()) { - layers_count = 2; - } - desc.image_array_size = layers_count; + desc.image_array_size = depth; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; desc.num_mip_levels = 0; ",0,train 52581df7928b137e4831c26696fea8634dbaefb0,tensorflow/tensorflow,"Support zero shapes for random_poisson. This matches random_uniform. PiperOrigin-RevId: 159771215",random_poisson_op.cc,"@@ -303,10 +303,6 @@ class RandomPoissonOp : public OpKernel { const auto rate_flat = rate_t.flat().data(); const int64 num_rate = rate_t.NumElements(); - OP_REQUIRES( - ctx, num_rate > 0, - errors::InvalidArgument( - ""Input rate should have non-zero element count, got: "", num_rate)); auto samples_flat = samples_t->flat().data(); random::PhiloxRandom rng = generator_.ReserveRandomOutputs( num_samples * num_rate, kReservedSamplesPerOutput); ",0,train 52581df7928b137e4831c26696fea8634dbaefb0,tensorflow/tensorflow,"Support zero shapes for random_poisson. This matches random_uniform. PiperOrigin-RevId: 159771215",random_poisson_test.py,"@@ -131,8 +131,14 @@ class RandomPoissonTest(test.TestCase): # be at least 1 if they are different. self.assertGreaterEqual(np.linalg.norm(diff.eval()), 1) + def testZeroShape(self): + with self.test_session(): + rnd = random_ops.random_poisson([], [], seed=12345) + self.assertEqual([0], rnd.get_shape().as_list()) + self.assertAllClose(np.array([], dtype=np.float32), rnd.eval()) + def testShape(self): - # Fully known shape. + # Fully known shape rnd = random_ops.random_poisson(2.0, [150], seed=12345) self.assertEqual([150], rnd.get_shape().as_list()) rnd = random_ops.random_poisson( ",0,train c90a24463ded1d22fc7b029a37d481dc3a626da6,tensorflow/tensorflow,"Make GRPC client events wait for requirement events before waiting for itself. PiperOrigin-RevId: 281374353 Change-Id: I84483a658683b52091f488b7ed47a5a4d7833f3b",grpc_tpu_driver.cc,"@@ -215,10 +215,13 @@ class GrpcTpuStream { friend class GrpcTpuDriver; struct EventInfo { + bool all_deps_done = false; bool done = false; // response received bool deleted = false; // deleted by the user Status status; absl::InlinedVector, 1> callbacks; + // Most events should have <= 2 requirement events. + absl::InlinedVector deps; }; struct TransferInfo { @@ -491,13 +494,22 @@ GrpcTpuStream::~GrpcTpuStream() { void GrpcTpuStream::InitializeRequest(StreamRequest::Entry* req, absl::Span wait_for) { auto operation_id = driver_->NewOperationId(); + EventInfo event_info; + req->set_operation_id(operation_id.AsInt()); - for (auto* event : wait_for) { - auto grpc_event = static_cast(event); - req->add_wait_for_id(grpc_event->id().AsInt()); + if (wait_for.empty()) { + event_info.all_deps_done = true; + } else { + event_info.deps.reserve(wait_for.size()); + for (auto* event : wait_for) { + auto grpc_event = static_cast(event); + req->add_wait_for_id(grpc_event->id().AsInt()); + event_info.deps.push_back(grpc_event->id()); + } } + absl::MutexLock lock(&events_mutex_); - events_[EventId::FromInt(req->operation_id())] = EventInfo(); + events_[operation_id] = event_info; } void GrpcTpuStream::UpdateEventStatus(EventId id, Status status) { @@ -551,16 +563,46 @@ void GrpcTpuStream::DeleteEvent(EventId id) { absl::optional GrpcTpuStream::WaitForEvent(EventId id, absl::Duration duration) { - absl::MutexLock lock(&events_mutex_); + events_mutex_.Lock(); + auto it = events_.find(id); + + if (it == events_.end()) { + // This event has already been marked as done and deleted. Assume success. + events_mutex_.Unlock(); + return Status::OK(); + } + + if (!it->second.all_deps_done) { + absl::InlinedVector deps = it->second.deps; + events_mutex_.Unlock(); + for (auto dep : deps) { + // If a requirement event timed out, no point in any further waiting. + if (!WaitForEvent(dep, duration)) { + return absl::nullopt; + } + } + events_mutex_.Lock(); + } + + // Set the flag here, as we're guaranteed they have all completed at this + // point. This helps terminate recursion on a chain of completed events as + // soon as possible, at this event. + it = events_.find(id); + if (it != events_.end()) { + it->second.all_deps_done = true; + } + auto done = [this, id]() { events_mutex_.AssertHeld(); return !events_.contains(id) || events_[id].done; }; - if (events_mutex_.AwaitWithTimeout(absl::Condition(&done), duration)) { - return events_.contains(id) ? events_[id].status : Status(); + auto status = events_.contains(id) ? events_[id].status : Status::OK(); + events_mutex_.Unlock(); + return status; } - return absl::optional(); + events_mutex_.Unlock(); + return absl::nullopt; } void GrpcTpuStream::AddEventCallback(EventId id, ",0,train c90a24463ded1d22fc7b029a37d481dc3a626da6,tensorflow/tensorflow,"Make GRPC client events wait for requirement events before waiting for itself. PiperOrigin-RevId: 281374353 Change-Id: I84483a658683b52091f488b7ed47a5a4d7833f3b",tpu_driver.h,"@@ -52,8 +52,9 @@ class Event { public: virtual ~Event() {} - // Block until the event completes and returns the result status. + // Blocks until the event completes and returns the result status. virtual xla::Status Await() = 0; + // Returns an empty result if the wait times out. virtual absl::optional AwaitWithTimeout( absl::Duration duration) = 0; ",0,train 680966059e7a5ddc70f1ec4f10e7b19c64c60e4b,tensorflow/tensorflow,"Added a check for output_buffer_size <= 1 for ZlibOutputBuffer. Also adding some tests for Zlib compression reading / writing. Change: 132370925",record_reader_writer_test.cc,"@@ -67,4 +67,42 @@ TEST(RecordReaderWriterTest, TestBasics) { } } +TEST(RecordReaderWriterTest, TestZlib) { + Env* env = Env::Default(); + string fname = testing::TmpDir() + ""/record_reader_writer_zlib_test""; + + for (auto buf_size : BufferSizes()) { + // Zlib compression needs output buffer size > 1. + if (buf_size == 1) continue; + { + std::unique_ptr file; + TF_CHECK_OK(env->NewWritableFile(fname, &file)); + + io::RecordWriterOptions options; + options.compression_type = io::RecordWriterOptions::ZLIB_COMPRESSION; + options.zlib_options.output_buffer_size = buf_size; + io::RecordWriter writer(file.get(), options); + writer.WriteRecord(""abc""); + writer.WriteRecord(""defg""); + TF_CHECK_OK(writer.Flush()); + } + + { + std::unique_ptr read_file; + // Read it back with the RecordReader. + TF_CHECK_OK(env->NewRandomAccessFile(fname, &read_file)); + io::RecordReaderOptions options; + options.compression_type = io::RecordReaderOptions::ZLIB_COMPRESSION; + options.zlib_options.input_buffer_size = buf_size; + io::RecordReader reader(read_file.get(), options); + uint64 offset = 0; + string record; + TF_CHECK_OK(reader.ReadRecord(&offset, &record)); + EXPECT_EQ(""abc"", record); + TF_CHECK_OK(reader.ReadRecord(&offset, &record)); + EXPECT_EQ(""defg"", record); + } + } +} + } // namespace tensorflow ",0,train 680966059e7a5ddc70f1ec4f10e7b19c64c60e4b,tensorflow/tensorflow,"Added a check for output_buffer_size <= 1 for ZlibOutputBuffer. Also adding some tests for Zlib compression reading / writing. Change: 132370925",record_writer.cc,"@@ -33,6 +33,11 @@ RecordWriter::RecordWriter(WritableFile* dest, zlib_output_buffer_.reset(new ZlibOutputBuffer( dest_, options.zlib_options.input_buffer_size, options.zlib_options.output_buffer_size, options.zlib_options)); + Status s = zlib_output_buffer_->Init(); + if (!s.ok()) { + LOG(FATAL) << ""Failed to initialize Zlib inputbuffer. Error: "" + << s.ToString(); + } #endif // IS_SLIM_BUILD } else if (options.compression_type == RecordWriterOptions::NONE) { // Nothing to do ",0,train 680966059e7a5ddc70f1ec4f10e7b19c64c60e4b,tensorflow/tensorflow,"Added a check for output_buffer_size <= 1 for ZlibOutputBuffer. Also adding some tests for Zlib compression reading / writing. Change: 132370925",zlib_buffers_test.cc,"@@ -73,6 +73,7 @@ void TestAllCombinations(CompressionOptions input_options, ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size, output_options); + TF_CHECK_OK(out.Init()); TF_CHECK_OK(out.Write(StringPiece(data))); TF_CHECK_OK(out.Close()); @@ -120,6 +121,7 @@ void TestMultipleWrites(uint8 input_buf_size, uint8 output_buf_size, TF_CHECK_OK(env->NewWritableFile(fname, &file_writer)); ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size, output_options); + TF_CHECK_OK(out.Init()); for (int i = 0; i < num_writes; i++) { TF_CHECK_OK(out.Write(StringPiece(data))); @@ -172,6 +174,7 @@ TEST(ZlibInputStream, FailsToReadIfWindowBitsAreIncompatible) { string result; ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size, output_options); + TF_CHECK_OK(out.Init()); TF_CHECK_OK(out.Write(StringPiece(data))); TF_CHECK_OK(out.Close()); ",0,train 680966059e7a5ddc70f1ec4f10e7b19c64c60e4b,tensorflow/tensorflow,"Added a check for output_buffer_size <= 1 for ZlibOutputBuffer. Also adding some tests for Zlib compression reading / writing. Change: 132370925",zlib_outputbuffer.cc,"@@ -15,6 +15,8 @@ limitations under the License. #include ""tensorflow/core/lib/io/zlib_outputbuffer.h"" +#include ""tensorflow/core/lib/core/errors.h"" + namespace tensorflow { namespace io { @@ -25,35 +27,45 @@ ZlibOutputBuffer::ZlibOutputBuffer( const ZlibCompressionOptions& zlib_options) // size of z_stream.next_out buffer : file_(file), + init_status_(), input_buffer_capacity_(input_buffer_bytes), output_buffer_capacity_(output_buffer_bytes), z_stream_input_(new Bytef[input_buffer_bytes]), z_stream_output_(new Bytef[output_buffer_bytes]), zlib_options_(zlib_options), - z_stream_(new z_stream) { + z_stream_(new z_stream) {} + +ZlibOutputBuffer::~ZlibOutputBuffer() { + if (z_stream_.get()) { + LOG(WARNING) << ""ZlibOutputBuffer::Close() not called. Possible data loss""; + } +} + +Status ZlibOutputBuffer::Init() { + // Output buffer size should be greater than 1 because deflation needs atleast + // one byte for book keeping etc. + if (output_buffer_capacity_ <= 1) { + return errors::InvalidArgument( + ""output_buffer_bytes should be greater than "" + ""1""); + } memset(z_stream_.get(), 0, sizeof(z_stream)); z_stream_->zalloc = Z_NULL; z_stream_->zfree = Z_NULL; z_stream_->opaque = Z_NULL; int status = - deflateInit2(z_stream_.get(), zlib_options.compression_level, - zlib_options.compression_method, zlib_options.window_bits, - zlib_options.mem_level, zlib_options.compression_strategy); + deflateInit2(z_stream_.get(), zlib_options_.compression_level, + zlib_options_.compression_method, zlib_options_.window_bits, + zlib_options_.mem_level, zlib_options_.compression_strategy); if (status != Z_OK) { - LOG(FATAL) << ""deflateInit failed with status "" << status; z_stream_.reset(NULL); - } else { - z_stream_->next_in = z_stream_input_.get(); - z_stream_->next_out = z_stream_output_.get(); - z_stream_->avail_in = 0; - z_stream_->avail_out = output_buffer_capacity_; - } -} - -ZlibOutputBuffer::~ZlibOutputBuffer() { - if (z_stream_.get()) { - LOG(WARNING) << ""ZlibOutputBuffer::Close() not called. Possible data loss""; + return errors::InvalidArgument(""deflateInit failed with status"", status); } + z_stream_->next_in = z_stream_input_.get(); + z_stream_->next_out = z_stream_output_.get(); + z_stream_->avail_in = 0; + z_stream_->avail_out = output_buffer_capacity_; + return Status::OK(); } int32 ZlibOutputBuffer::AvailableInputSpace() const { ",0,train 680966059e7a5ddc70f1ec4f10e7b19c64c60e4b,tensorflow/tensorflow,"Added a check for output_buffer_size <= 1 for ZlibOutputBuffer. Also adding some tests for Zlib compression reading / writing. Change: 132370925",zlib_outputbuffer.h,"@@ -45,6 +45,7 @@ class ZlibOutputBuffer { // 2. the deflated output // with sizes `input_buffer_bytes` and `output_buffer_bytes` respectively. // Does not take ownership of `file`. + // output_buffer_bytes should be greater than 1. ZlibOutputBuffer( WritableFile* file, int32 input_buffer_bytes, // size of z_stream.next_in buffer @@ -53,6 +54,10 @@ class ZlibOutputBuffer { ~ZlibOutputBuffer(); + // Initializes some state necessary for the output buffer. This call is + // required before any other operation on the buffer. + Status Init(); + // Adds `data` to the compression pipeline. // // The input data is buffered in `z_stream_input_` and is compressed in bulk @@ -78,6 +83,7 @@ class ZlibOutputBuffer { private: WritableFile* file_; // Not owned + Status init_status_; size_t input_buffer_capacity_; size_t output_buffer_capacity_; ",0,train af08f0ae55a7e4cc9f539dbd41266ac41903b7ef,tensorflow/tensorflow,"Avoid implicit double promotion in portable_tensor_utils.cc PiperOrigin-RevId: 389050862 Change-Id: Ife07730df682baec4882ecb72db182173a3a8d7a",portable_tensor_utils.cc,"@@ -484,9 +484,9 @@ void PortableApplyLayerNormFloat(const int16_t* input, float stddev_inv = 0.0f; const float variance = sum_sq / n_input - mean * mean; if (variance == 0) { - stddev_inv = 1.0f / sqrt(1e-8f); + stddev_inv = 1.0f / std::sqrt(1e-8f); } else { - stddev_inv = 1.0f / sqrt(variance); + stddev_inv = 1.0f / std::sqrt(variance); } for (int i = 0; i < n_input; ++i) { const int index = batch * n_input + i; ",0,train b1c9e600e02b93885dbebfa5dae92436c63d6c03,tensorflow/tensorflow,"[XLA] Add range check for xla::Array<> indexing. PiperOrigin-RevId: 356981991 Change-Id: I73343a8776b0df0f2570bcd596247164c8588cb9",array.h,"@@ -561,6 +561,7 @@ class Array { index *= sizes_[i]; index += indexes[i]; } + DCHECK_LT(index, this->num_elements()); return index; } ",0,train 82e53ebecdac677b37cd9316f8d1be5b1627eec3,tensorflow/tensorflow,"Fix sparse case of ProximalGradientDescent not being plumbed correctly Change: 127908886",proximal_gradient_descent.py,"@@ -68,9 +68,14 @@ class ProximalGradientDescentOptimizer(optimizer.Optimizer): use_locking=self._use_locking).op def _apply_sparse(self, grad, var): - delta = ops.IndexedSlices(grad.values * self._learning_rate_tensor, - grad.indices, grad.dense_shape) - return var.scatter_sub(delta, use_locking=self._use_locking) + return training_ops.sparse_apply_proximal_gradient_descent( + var, + self._learning_rate_tensor, + self._l1_regularization_strength_tensor, + self._l2_regularization_strength_tensor, + grad.values, + grad.indices, + use_locking=self._use_locking).op def _prepare(self): self._learning_rate_tensor = ops.convert_to_tensor(self._learning_rate, ",0,train f5a9d24c847ffcc7ae09e850aad39e2cb55ae4f3,tensorflow/tensorflow,"Added shape inference for tf_device.LaunchOp. PiperOrigin-RevId: 317875384 Change-Id: Idb070c9e92d07ee19cd8ed26c1beec3de86f43df",shape_inference.cc,"@@ -215,6 +215,10 @@ bool InferShapeForNonTFDialectOperation(Operation* op, Dialect* tf_dialect) { return InferShapeForPassThroughOps( tensor_cast.getOperation()->getOperands(), op, tf_dialect); } + if (auto launch_op = dyn_cast(op)) { + return InferShapeForPassThroughOps( + launch_op.GetBody().getTerminator()->getOperands(), op, tf_dialect); + } return false; } ",0,test 8d2178ea81b5e18c52d9742904e0de06d1362a10,tensorflow/tensorflow,"[TF:MLIR] Add Tensorflow RuntimeDevices to TF IR. Keep track of Tensorflow devices available at runtime in class available to the Ops defined in the Tensorflow IR. This allows writing optimization passes and Op interfaces that require device information. PiperOrigin-RevId: 300227160 Change-Id: Ic4d7ed6e56cbb40e14d5bb50d82b56fc8ee662b6",tf_ops.cc,"@@ -57,6 +57,7 @@ limitations under the License. #include ""mlir/Support/LogicalResult.h"" // TF:llvm-project #include ""mlir/Support/STLExtras.h"" // TF:llvm-project #include ""mlir/Transforms/InliningUtils.h"" // TF:llvm-project +#include ""tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h"" #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/util/tensor_format.h"" @@ -1000,6 +1001,11 @@ LogicalResult Conv2DOp::UpdateDataFormat(StringRef data_format) { return success(); } +StringRef Conv2DOp::GetOptimalLayout(const RuntimeDevices &devices) { + // TODO(ezhulenev): Implement optimal layout selection. + return """"; +} + //===----------------------------------------------------------------------===// // Conv2dBackpropInputOp //===----------------------------------------------------------------------===// ",0,test 8d2178ea81b5e18c52d9742904e0de06d1362a10,tensorflow/tensorflow,"[TF:MLIR] Add Tensorflow RuntimeDevices to TF IR. Keep track of Tensorflow devices available at runtime in class available to the Ops defined in the Tensorflow IR. This allows writing optimization passes and Op interfaces that require device information. PiperOrigin-RevId: 300227160 Change-Id: Ic4d7ed6e56cbb40e14d5bb50d82b56fc8ee662b6",tf_ops.h,"@@ -29,6 +29,7 @@ limitations under the License. #include ""mlir/IR/OpImplementation.h"" // TF:llvm-project #include ""mlir/IR/StandardTypes.h"" // TF:llvm-project #include ""mlir/IR/TypeUtilities.h"" // TF:llvm-project +#include ""tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h"" #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h"" #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"" #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_verifiers.h"" ",0,test 8d2178ea81b5e18c52d9742904e0de06d1362a10,tensorflow/tensorflow,"[TF:MLIR] Add Tensorflow RuntimeDevices to TF IR. Keep track of Tensorflow devices available at runtime in class available to the Ops defined in the Tensorflow IR. This allows writing optimization passes and Op interfaces that require device information. PiperOrigin-RevId: 300227160 Change-Id: Ic4d7ed6e56cbb40e14d5bb50d82b56fc8ee662b6",tf_structs.cc,"@@ -20,4 +20,27 @@ namespace mlir { // NOLINTNEXTLINE #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_structs.cc.inc"" +namespace TF { + +void RuntimeDevices::AddDevice(const ParsedName& device) { + device_names_.push_back(device); +} + +void RuntimeDevices::AddGpuDevice(const ParsedName& device, + const GpuDeviceMetadata& metadata) { + device_names_.push_back(device); + gpu_metadata_.insert({DeviceNameUtils::ParsedNameToString(device), metadata}); +} + +llvm::Optional RuntimeDevices::GetGpuDeviceMetadata( + const ParsedName& device) const { + auto it = gpu_metadata_.find(DeviceNameUtils::ParsedNameToString(device)); + if (it != gpu_metadata_.end()) { + return it->second; + } else { + return llvm::None; + } +} + +} // namespace TF } // namespace mlir ",0,test 8d2178ea81b5e18c52d9742904e0de06d1362a10,tensorflow/tensorflow,"[TF:MLIR] Add Tensorflow RuntimeDevices to TF IR. Keep track of Tensorflow devices available at runtime in class available to the Ops defined in the Tensorflow IR. This allows writing optimization passes and Op interfaces that require device information. PiperOrigin-RevId: 300227160 Change-Id: Ic4d7ed6e56cbb40e14d5bb50d82b56fc8ee662b6",tf_structs.h,"@@ -18,16 +18,50 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_STRUCTS_H_ #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_STRUCTS_H_ +#include ""llvm/ADT/StringMap.h"" #include ""mlir/IR/Diagnostics.h"" // TF:llvm-project #include ""mlir/IR/Location.h"" // TF:llvm-project #include ""mlir/IR/Operation.h"" // TF:llvm-project #include ""mlir/IR/StandardTypes.h"" // TF:llvm-project #include ""mlir/IR/Types.h"" // TF:llvm-project +#include ""tensorflow/core/util/device_name_utils.h"" namespace mlir { #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h.inc"" -} // end namespace mlir +namespace TF { + +// Tensorflow devices available at runtime with corresponding metadata if it is +// available. It's completely valid to have a device without any metadata +// attached to it. +class RuntimeDevices { + using DeviceNameUtils = ::tensorflow::DeviceNameUtils; + using ParsedName = ::tensorflow::DeviceNameUtils::ParsedName; + + public: + // Adds a device with and empty metadata. Device can be of any type. + void AddDevice(const ParsedName& device); + + // Adds a GPU device with GPU specific metadata. + void AddGpuDevice(const ParsedName& device, + const GpuDeviceMetadata& metadata); + + llvm::ArrayRef device_names() const { return device_names_; } + size_t NumDevices() const { return device_names_.size(); } + + // Returns GPU device metadata if it is available, otherwise returns None. + llvm::Optional GetGpuDeviceMetadata( + const ParsedName& device) const; + + private: + llvm::SmallVector device_names_; + // TODO(ezhulenev): Add DenseMapInfo specialization to be able to + // use ParsedName as a key in a DenseMap. + llvm::StringMap gpu_metadata_; +}; + +} // namespace TF +} // namespace mlir #endif // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_STRUCTS_H_ ",0,test 8d2178ea81b5e18c52d9742904e0de06d1362a10,tensorflow/tensorflow,"[TF:MLIR] Add Tensorflow RuntimeDevices to TF IR. Keep track of Tensorflow devices available at runtime in class available to the Ops defined in the Tensorflow IR. This allows writing optimization passes and Op interfaces that require device information. PiperOrigin-RevId: 300227160 Change-Id: Ic4d7ed6e56cbb40e14d5bb50d82b56fc8ee662b6",tpu_rewrite_pass.cc,"@@ -723,13 +723,14 @@ LogicalResult Rewrite( } void TPURewritePass::runOnModule() { - llvm::SmallVector devices; + mlir::TF::RuntimeDevices devices; if (failed(tensorflow::GetDevicesFromOp(getModule(), &devices))) return signalPassFailure(); OpBuilder builder(&getContext()); auto result = getModule().walk([&](tf_device::LaunchFuncOp op) { - if (failed(Rewrite(op, devices, &builder))) return WalkResult::interrupt(); + if (failed(Rewrite(op, devices.device_names(), &builder))) + return WalkResult::interrupt(); return WalkResult::advance(); }); ",0,test 8d2178ea81b5e18c52d9742904e0de06d1362a10,tensorflow/tensorflow,"[TF:MLIR] Add Tensorflow RuntimeDevices to TF IR. Keep track of Tensorflow devices available at runtime in class available to the Ops defined in the Tensorflow IR. This allows writing optimization passes and Op interfaces that require device information. PiperOrigin-RevId: 300227160 Change-Id: Ic4d7ed6e56cbb40e14d5bb50d82b56fc8ee662b6",device_util.cc,"@@ -37,8 +37,6 @@ constexpr char kDevicesAttr[] = ""tf.devices""; namespace { -using DeviceNames = llvm::SmallVectorImpl; - // Parse GPU compute capability from physical device description. If compute // capability is not found in device description, return an empty dictionary // attribute. @@ -58,11 +56,13 @@ mlir::DictionaryAttr ParseGpuDeviceMetadata(const Device& device, return builder->getDictionaryAttr({}); } -// Get device names from an array of string attributes. +// Get devices from an array of string attributes. +// TODO(ezhulenev): Update all tests to use dictionary attribute for +// `tf.devices` and remove this function. mlir::LogicalResult GetDevicesFromOp(mlir::Operation* op, mlir::ArrayAttr array_attr, - DeviceNames* devices) { - devices->resize(array_attr.size()); + mlir::TF::RuntimeDevices* devices) { + DeviceNameUtils::ParsedName device; for (auto& kv : llvm::enumerate(array_attr)) { const int idx = kv.index(); @@ -72,30 +72,39 @@ mlir::LogicalResult GetDevicesFromOp(mlir::Operation* op, return op->emitOpError(llvm::formatv( ""bad '{0}' attribute at index {1}, not a string"", kDevicesAttr, idx)); - if (!DeviceNameUtils::ParseFullName(string_attr.getValue().str(), - &(*devices)[idx])) + if (DeviceNameUtils::ParseFullName(string_attr.getValue().str(), &device)) { + devices->AddDevice(device); + } else { return op->emitOpError( llvm::formatv(""bad '{0}' attribute, '{1}', not a valid device"", kDevicesAttr, string_attr.getValue())); + } } return mlir::success(); } -// Get device names from a metadata dictionary. +// Get devices from a dictionary attribute. mlir::LogicalResult GetDevicesFromOp(mlir::Operation* op, mlir::DictionaryAttr dict_attr, - DeviceNames* devices) { - devices->resize(dict_attr.size()); + mlir::TF::RuntimeDevices* devices) { + DeviceNameUtils::ParsedName device; // Parse device names and metadata from dictionary attribute. - for (auto& kv : llvm::enumerate(dict_attr)) { - const mlir::Identifier name = kv.value().first; + for (auto& kv : dict_attr) { + const mlir::Identifier name = kv.first; + const mlir::Attribute attr = kv.second; - if (!DeviceNameUtils::ParseFullName(name.str(), &(*devices)[kv.index()])) + if (!DeviceNameUtils::ParseFullName(name.str(), &device)) return op->emitOpError( llvm::formatv(""bad '{0}' attribute, '{1}', not a valid device"", kDevicesAttr, name.strref())); + + if (auto gpu_metadata = attr.dyn_cast()) { + devices->AddGpuDevice(device, gpu_metadata); + } else { + devices->AddDevice(device); + } } return mlir::success(); @@ -131,7 +140,7 @@ void AddDevicesToOp(mlir::Operation* op, const DeviceSet* device_set) { } mlir::LogicalResult GetDevicesFromOp(mlir::Operation* op, - DeviceNames* devices) { + mlir::TF::RuntimeDevices* devices) { auto devices_attr = op->getAttr(kDevicesAttr); if (!devices_attr) return mlir::success(); @@ -146,15 +155,4 @@ mlir::LogicalResult GetDevicesFromOp(mlir::Operation* op, llvm::formatv(""unsupported '{0}' attribute"", kDevicesAttr)); } -llvm::Optional GetGpuDeviceMetadata( - mlir::Operation* op, const DeviceNameUtils::ParsedName& device) { - auto metadata = op->getAttrOfType(kDevicesAttr); - if (!metadata) return llvm::None; - - auto device_attr = metadata.get(DeviceNameUtils::ParsedNameToString(device)); - if (!device_attr) return llvm::None; - - return device_attr.dyn_cast(); -} - } // namespace tensorflow ",0,test 8d2178ea81b5e18c52d9742904e0de06d1362a10,tensorflow/tensorflow,"[TF:MLIR] Add Tensorflow RuntimeDevices to TF IR. Keep track of Tensorflow devices available at runtime in class available to the Ops defined in the Tensorflow IR. This allows writing optimization passes and Op interfaces that require device information. PiperOrigin-RevId: 300227160 Change-Id: Ic4d7ed6e56cbb40e14d5bb50d82b56fc8ee662b6",device_util.h,"@@ -36,16 +36,10 @@ namespace tensorflow { // (1) GpuDeviceMetadata: GPU device compute capability. void AddDevicesToOp(mlir::Operation* op, const DeviceSet* device_set); -// Collects devices as DeviceNameUtils::ParsedName from an op `tf.devices` -// attribute. A failure will be returned if device name is not valid. -mlir::LogicalResult GetDevicesFromOp( - mlir::Operation* op, - llvm::SmallVectorImpl* devices); - -// Returns GPU device metadata for the parsed device name if it exists in the -// device metadata attributes, returns None otherwise. -llvm::Optional GetGpuDeviceMetadata( - mlir::Operation* op, const DeviceNameUtils::ParsedName& device); +// Collects devices information from an op `tf.devices` attributes. Returns +// failure if can't parse device metadata from the attribute. +mlir::LogicalResult GetDevicesFromOp(mlir::Operation* op, + mlir::TF::RuntimeDevices* devices); } // namespace tensorflow ",0,test 8d2178ea81b5e18c52d9742904e0de06d1362a10,tensorflow/tensorflow,"[TF:MLIR] Add Tensorflow RuntimeDevices to TF IR. Keep track of Tensorflow devices available at runtime in class available to the Ops defined in the Tensorflow IR. This allows writing optimization passes and Op interfaces that require device information. PiperOrigin-RevId: 300227160 Change-Id: Ic4d7ed6e56cbb40e14d5bb50d82b56fc8ee662b6",device_util_test.cc,"@@ -113,7 +113,7 @@ TEST(DeviceUtilTest, GetDevicesFromOpNoDevicesAttribute) { mlir::OwningModuleRef module_ref = mlir::ModuleOp::create(mlir::UnknownLoc::get(&context)); - llvm::SmallVector devices; + mlir::TF::RuntimeDevices devices; EXPECT_TRUE(mlir::succeeded(GetDevicesFromOp(*module_ref, &devices))); } @@ -124,7 +124,7 @@ TEST(DeviceUtilTest, GetDevicesFromOpBadDevicesAttributeType) { mlir::Builder builder(*module_ref); module_ref->setAttr(""tf.devices"", builder.getBoolAttr(false)); - llvm::SmallVector devices; + mlir::TF::RuntimeDevices devices; EXPECT_TRUE(mlir::failed(GetDevicesFromOp(*module_ref, &devices))); } @@ -135,7 +135,7 @@ TEST(DeviceUtilTest, GetDevicesFromOpBadDevicesAttributeArraySubtype) { mlir::Builder builder(*module_ref); module_ref->setAttr(""tf.devices"", builder.getI32ArrayAttr({8})); - llvm::SmallVector devices; + mlir::TF::RuntimeDevices devices; EXPECT_TRUE(mlir::failed(GetDevicesFromOp(*module_ref, &devices))); } @@ -148,7 +148,7 @@ TEST(DeviceUtilTest, GetDevicesFromOpBadDevicesInDevicesAttribute) { builder.getDictionaryAttr(builder.getNamedAttr( ""bad_device"", builder.getDictionaryAttr({})))); - llvm::SmallVector devices; + mlir::TF::RuntimeDevices devices; EXPECT_TRUE(mlir::failed(GetDevicesFromOp(*module_ref, &devices))); } @@ -163,10 +163,12 @@ TEST(DeviceUtilTest, GetDevicesFromOpValidDeviceInDevicesAttribute) { builder.getDictionaryAttr({}))}); module_ref->setAttr(""tf.devices"", device_dict); - llvm::SmallVector devices; + mlir::TF::RuntimeDevices devices; EXPECT_TRUE(mlir::succeeded(GetDevicesFromOp(*module_ref, &devices))); - ASSERT_EQ(devices.size(), 1); - EXPECT_EQ(DeviceNameUtils::ParsedNameToString(devices[0]), + + ASSERT_EQ(devices.NumDevices(), 1); + ASSERT_EQ(devices.device_names().size(), 1); + ASSERT_EQ(DeviceNameUtils::ParsedNameToString(devices.device_names()[0]), ""/job:worker/replica:0/task:0/device:CPU:0""); } @@ -188,15 +190,18 @@ TEST(DeviceUtilTest, GetGpuDeviceMetadata) { module_ref->setAttr(""tf.devices"", builder.getDictionaryAttr(metadata)); + mlir::TF::RuntimeDevices devices; + EXPECT_TRUE(mlir::succeeded(GetDevicesFromOp(*module_ref, &devices))); + DeviceNameUtils::ParsedName parsed_name; DeviceNameUtils::ParseFullName(gpu0, &parsed_name); - auto meta_0 = GetGpuDeviceMetadata(*module_ref, parsed_name); + auto meta_0 = devices.GetGpuDeviceMetadata(parsed_name); ASSERT_TRUE(meta_0.hasValue()); ASSERT_EQ(meta_0->cc_major().getInt(), 1); ASSERT_EQ(meta_0->cc_minor().getInt(), 2); DeviceNameUtils::ParseFullName(gpu1, &parsed_name); - auto meta_1 = GetGpuDeviceMetadata(*module_ref, parsed_name); + auto meta_1 = devices.GetGpuDeviceMetadata(parsed_name); ASSERT_FALSE(meta_1.hasValue()); } ",0,test 1fd93618d4e40ee126cc4406ade66782a6f632a8,tensorflow/tensorflow,"[tf.data service] Add a Lint check to keep policies in sync. PiperOrigin-RevId: 386998531 Change-Id: I50496ce7022745318a84dd6fe9b61397c1295087",auto_shard_test.py,"@@ -501,12 +501,12 @@ class AutoShardTest(data_service_test_base.TestBase, _ = _make_service_cluster( num_workers=5, local_shard_index=1, worker_addresses=worker_addresses) - # TODO(b/186023347): Use Lint to keep the policies in sync. @combinations.generate( combinations.times( test_base.default_test_combinations(), combinations.combine(sharding_policy=list(ShardingPolicy)))) def testEnumerateShardingPolicies(self, sharding_policy): + """"""Verifies tf.data service handles every sharding policy with no errors."""""" cluster = _make_service_cluster(num_workers=5, local_shard_index=3) dataset = dataset_ops.Dataset.list_files(self._filenames, shuffle=False) dataset = dataset.flat_map(readers.TFRecordDataset) ",0,train 1fd93618d4e40ee126cc4406ade66782a6f632a8,tensorflow/tensorflow,"[tf.data service] Add a Lint check to keep policies in sync. PiperOrigin-RevId: 386998531 Change-Id: I50496ce7022745318a84dd6fe9b61397c1295087",data_service_ops.py,"@@ -89,12 +89,14 @@ class ShardingPolicy(enum.IntEnum): placeholder to replace with `shard(num_workers, worker_index)`. """""" + # LINT.IfChange(tf_data_service_sharding_policy) OFF = 0 DYNAMIC = 1 FILE = 2 DATA = 3 FILE_OR_DATA = 4 HINT = 5 + # LINT.ThenChange() def _to_proto(self): """"""Converts the policy to ProcessingModeDef proto enum."""""" ",0,train 1fd93618d4e40ee126cc4406ade66782a6f632a8,tensorflow/tensorflow,"[tf.data service] Add a Lint check to keep policies in sync. PiperOrigin-RevId: 386998531 Change-Id: I50496ce7022745318a84dd6fe9b61397c1295087",distribute_options.py,"@@ -45,11 +45,14 @@ class AutoShardPolicy(enum.IntEnum): HINT: Looks for the presence of `shard(SHARD_HINT, ...)` which is treated as a placeholder to replace with `shard(num_workers, worker_index)`. """""" + + # LINT.IfChange OFF = -1 AUTO = 0 FILE = 1 DATA = 2 HINT = 3 + # LINT.ThenChange(//tensorflow/python/data/experimental/ops/data_service_ops.py:tf_data_service_sharding_policy) @classmethod def _to_proto(cls, obj): ",0,train eef4e3acc83c73963258ce848d1b9bad8021e036,tensorflow/tensorflow,Added pytest for Bfloat16,sparse_xent_op_test.py,"@@ -182,6 +182,23 @@ class SparseXentTest(test.TestCase): np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float64), np.array([0, 3]).astype(label_dtype)) + def testBfloat(self): + for label_dtype in np.int32, np.int64: + np_features = np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]] + ).astype(np.float32) + np_labels = np.array([0, 3]).astype(label_dtype) + np_loss, np_backprop = self._npXent(np_features, np_labels) + + bf_np_features = math_ops.cast(np_features, dtypes.bfloat16) + bf_np_loss = math_ops.cast(np_loss, dtypes.bfloat16) + bf_np_backprop = math_ops.cast(np_backprop, dtypes.bfloat16) + with self.cached_session(use_gpu=False) as sess: + loss, backprop = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( + bf_np_features, np_labels) + tf_loss, tf_backprop = self.evaluate([loss, backprop]) + self.assertAllCloseAccordingToType(bf_np_loss, tf_loss) + self.assertAllCloseAccordingToType(bf_np_backprop, tf_backprop) + def testHalf(self): for label_dtype in np.int32, np.int64: self._testXent( ",0,train 731ba067d07db04ecffb176f4310ce0f163dac14,tensorflow/tensorflow,"[XLA:CPU] Avoid UB in cpu_runtime PiperOrigin-RevId: 420339155 Change-Id: I91ee2affba5af35e731503ede3ea6669d093d713",cpu_runtime.cc,"@@ -587,13 +587,29 @@ class CpuAllReduceRendezvous } } + template + struct SumProductTypeForReductionStep { + using type = T; + }; + + template + struct SumProductTypeForReductionStep { + using type = typename std::make_unsigned_t; + }; + template T PerformReductionStep(xla::ReductionKind reduction_kind, T a, T b) { + using SumProductType = typename SumProductTypeForReductionStep< + T, std::is_integral::value && std::is_signed::value>::type; switch (reduction_kind) { case xla::ReductionKind::SUM: - return a + b; + return absl::bit_cast( + static_cast(absl::bit_cast(a) + + absl::bit_cast(b))); case xla::ReductionKind::PRODUCT: - return a * b; + return absl::bit_cast( + static_cast(absl::bit_cast(a) * + absl::bit_cast(b))); case xla::ReductionKind::MIN: return std::min(a, b); case xla::ReductionKind::MAX: ",0,test 2b482a4bd7bd69cee5775278ab7b4d22563b6e81,tensorflow/tensorflow,Keras: Replace trivial control flow with tf.where,metrics.py,"@@ -12,13 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -# pylint: disable=unused-import # pylint: disable=g-classes-have-attributes # pylint: disable=g-doc-return-or-yield """"""Built-in metrics."""""" import abc -import math import types import warnings @@ -33,7 +31,6 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape -from tensorflow.python.framework import tensor_spec from tensorflow.python.keras import activations from tensorflow.python.keras import backend from tensorflow.python.keras.engine import base_layer @@ -56,7 +53,6 @@ from tensorflow.python.keras.saving.saved_model import metric_serialization from tensorflow.python.keras.utils import generic_utils from tensorflow.python.keras.utils import losses_utils from tensorflow.python.keras.utils import metrics_utils -from tensorflow.python.keras.utils import tf_inspect from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object from tensorflow.python.keras.utils.generic_utils import serialize_keras_object from tensorflow.python.keras.utils.generic_utils import to_list @@ -64,13 +60,11 @@ from tensorflow.python.keras.utils.tf_utils import is_tensor_or_variable from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import confusion_matrix -from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import variables as variables_module from tensorflow.python.ops import weights_broadcast_ops -from tensorflow.python.training.tracking import base as trackable from tensorflow.python.util import dispatch from tensorflow.python.util import nest from tensorflow.python.util.tf_export import keras_export @@ -1575,13 +1569,11 @@ class SensitivitySpecificityBase(Metric, metaclass=abc.ABCMeta): Returns maximal dependent value, if no value satiesfies the constraint 0.0. """""" - feasible = array_ops.where(predicate(constrained, self.value)) + feasible = array_ops.where_v2(predicate(constrained, self.value)) feasible_exists = math_ops.greater(array_ops.size(feasible), 0) + max_dependent = math_ops.reduce_max(array_ops.gather(dependent, feasible)) - def get_max(): - return math_ops.reduce_max(array_ops.gather(dependent, feasible)) - - return control_flow_ops.cond(feasible_exists, get_max, lambda: 0.0) + return array_ops.where_v2(feasible_exists, max_dependent, 0.0) @keras_export('keras.metrics.SensitivityAtSpecificity') ",0,train 2b482a4bd7bd69cee5775278ab7b4d22563b6e81,tensorflow/tensorflow,Keras: Replace trivial control flow with tf.where,learning_rate_schedule.py,"@@ -20,6 +20,7 @@ import math from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.keras.utils import generic_utils +from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops @@ -416,9 +417,9 @@ class PolynomialDecay(LearningRateSchedule): if self.cycle: # Find the first multiple of decay_steps that is bigger than # global_step. If global_step is zero set the multiplier to 1 - multiplier = control_flow_ops.cond( - math_ops.equal(global_step_recomp, 0), lambda: 1.0, - lambda: math_ops.ceil(global_step_recomp / self.decay_steps)) + multiplier = array_ops.where_v2( + math_ops.equal(global_step_recomp, 0), 1.0, + math_ops.ceil(global_step_recomp / self.decay_steps)) decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier) else: # Make sure that the global_step used is not bigger than decay_steps. ",0,train 2b482a4bd7bd69cee5775278ab7b4d22563b6e81,tensorflow/tensorflow,Keras: Replace trivial control flow with tf.where,metrics_utils.py,"@@ -377,9 +377,8 @@ def update_confusion_matrix_variables(variables_to_update, num_labels = 1 else: num_labels = gen_math_ops.Prod(input=pred_shape[1:], axis=0) - thresh_label_tile = control_flow_ops.cond( - one_thresh, lambda: num_labels, - lambda: math_ops.cast(1, dtype=dtypes.int32)) + thresh_label_tile = array_ops.where_v2( + one_thresh, num_labels, array_ops.ones([], dtype=dtypes.int32)) # Reshape predictions and labels, adding a dim for thresholding. if multi_label: ",0,train 50eb1f5f289a77298b72a87d4aa74274e28c5a98,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2021-06-02 PiperOrigin-RevId: 377011648 Change-Id: Iff283dd9876abc4676061ef412769393df73fbd7",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 6, 1) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 6, 2) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,test aa0695e115a51d2ec1998c68463f83a9121f0ee1,tensorflow/tensorflow,"The original motivation seems to have been that inside fusions we don't have layouts and thus cannot replace reshapes and transposes with bitcasts. Therefore we preferred not fusing potential bitcasts. Now that we run layout assignment before fusion, we do already replace potential bitcasts with bitcasts, so this check is obsolete. PiperOrigin-RevId: 302648869 Change-Id: I2b07192afc06be3a7220797571c19e9e747482c9",gpu_fusible.cc,"@@ -239,13 +239,6 @@ bool IsProducerConsumerFusible(const HloInstruction& producer, !LayoutsAreReduceInputFusionFriendly(producer, consumer)) { return false; } - // We can't fuse library calls, so if a user of such an op could become a - // bitcast, leave it unfused. See `xla::InstructionFusion::ShouldFuse` for - // further rationale. - if (producer.CouldBeBitcast() && - ImplementedAsLibraryCall(*producer.operand(0))) { - return false; - } // Fuse scalar constants into loop fusion nodes. This reduces the number of // parameters and makes matching scalar broadcasts easier. // ",0,train aa0695e115a51d2ec1998c68463f83a9121f0ee1,tensorflow/tensorflow,"The original motivation seems to have been that inside fusions we don't have layouts and thus cannot replace reshapes and transposes with bitcasts. Therefore we preferred not fusing potential bitcasts. Now that we run layout assignment before fusion, we do already replace potential bitcasts with bitcasts, so this check is obsolete. PiperOrigin-RevId: 302648869 Change-Id: I2b07192afc06be3a7220797571c19e9e747482c9",instruction_fusion_test.cc,"@@ -109,21 +109,23 @@ TEST_F(InstructionFusionTest, EXPECT_THAT(computation->root_instruction(), op::Fusion()); } -TEST_F(InstructionFusionTest, PotentialBitcastReshapeOfDotUnfused) { +TEST_F(InstructionFusionTest, PotentialBitcastReshapeOfDotFused) { HloComputation::Builder builder(TestName()); auto param0 = builder.AddInstruction(HloInstruction::CreateParameter( - 0, ShapeUtil::MakeShape(S32, {1, 1}), ""0"")); + 0, ShapeUtil::MakeShape(F32, {1, 1}), ""0"")); auto dot1 = builder.AddInstruction( - CreateCanonicalDot(ShapeUtil::MakeShape(S32, {1, 1}), param0, param0)); + CreateCanonicalDot(ShapeUtil::MakeShape(F32, {1, 1}), param0, param0)); auto reshape2 = builder.AddInstruction(HloInstruction::CreateReshape( - ShapeUtil::MakeShape(S32, {1, 1, 1}), dot1)); + ShapeUtil::MakeShape(F32, {1, 1, 1}), dot1)); + auto log = builder.AddInstruction(HloInstruction::CreateUnary( + reshape2->shape(), xla::HloOpcode::kLog, reshape2)); auto module = CreateNewVerifiedModule(); auto computation = module->AddEntryComputation(builder.Build()); - EXPECT_EQ(reshape2, computation->root_instruction()); - EXPECT_FALSE(GpuInstructionFusion(/*may_duplicate=*/true) - .Run(module.get()) - .ValueOrDie()); + EXPECT_EQ(log, computation->root_instruction()); + EXPECT_TRUE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()); } TEST_F(InstructionFusionTest, PotentialBitcastTransposeOfDotUnfused) { ",0,train f4ebdaba60d2fb2698d14ee87960e3a6294be196,tensorflow/tensorflow,"Add missing license header Change: 117999025",optimizers_test.py,"@@ -1,3 +1,17 @@ +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== """"""Tests for optimizers."""""" from __future__ import absolute_import ",0,test 937c279fd9758ce75c43ad8d2d828475b496334a,tensorflow/tensorflow,"[tf.data] Fix MultiDeviceIterator initialization to use correct FLR. In the recent change to instantiate functions at iterator initialization time, I forgot to make the corresponding change was to MultiDeviceIterator. This change fixes that problem and unbreaks MultiDeviceIterator. PiperOrigin-RevId: 209838406",prefetching_kernels.cc,"@@ -548,7 +548,9 @@ class MultiDeviceIterator : public ResourceBase { devices_(devices), flib_def_(std::move(flib_def)), pflr_(std::move(pflr)), - lib_(lib) {} + lib_(lib) { + CHECK_NOTNULL(lib_); + } string DebugString() override { return strings::StrCat(""MultiDeviceIterator for "", devices_.size(), @@ -600,6 +602,11 @@ class MultiDeviceIterator : public ResourceBase { return lib_def_; } + FunctionLibraryRuntime* const lib() { + tf_shared_lock l(mu_); + return lib_; + } + private: // A private class that uses a background thread to keep a per device buffer // full. @@ -930,8 +937,10 @@ class MultiDeviceIteratorInitOp : public OpKernel { core::ScopedUnref unref(resource); std::unique_ptr iterator; - OP_REQUIRES_OK(ctx, dataset->MakeIterator(IteratorContext(ctx), ""Iterator"", - &iterator)); + IteratorContext iter_ctx(ctx); + iter_ctx.set_lib(resource->lib()); + OP_REQUIRES_OK( + ctx, dataset->MakeIterator(std::move(iter_ctx), ""Iterator"", &iterator)); int64 incarnation_id; OP_REQUIRES_OK(ctx, resource->Init(std::move(iterator), max_buffer_size, &incarnation_id)); ",0,train ddbb2c52db5cfab02b80b2ef563d8d6251dcfe77,tensorflow/tensorflow,"Fix a crash in Quantize() when tf.contrib.framework.get_name_scope() == None. PiperOrigin-RevId: 191068059",quantize.py,"@@ -416,7 +416,9 @@ def _InsertQuantOp(context, # name_prefix starts with 'TPUReplicate/loop/'; without dropping it # variables are created as TPUReplicate/loop/TPUReplicate/loop/..., which # breaks things later. - name_prefix = common.DropStringPrefix(name_prefix, ops.get_name_scope() + '/') + name_scope = ops.get_name_scope() + if name_scope: + name_prefix = common.DropStringPrefix(name_prefix, name_scope + '/') inputs = producer.outputs[0] # Prevent ops from being quantized multiple times. Bypass ops can sometimes ",0,train ddbb2c52db5cfab02b80b2ef563d8d6251dcfe77,tensorflow/tensorflow,"Fix a crash in Quantize() when tf.contrib.framework.get_name_scope() == None. PiperOrigin-RevId: 191068059",quantize_test.py,"@@ -247,6 +247,27 @@ class QuantizeTest(test_util.TensorFlowTestCase): self.assertTrue(not op.name.startswith('name_scope/name_scope/'), 'Broken op: %s' % op.name) + def testWithNullNameScope(self): + self._RunTestOverParameters(self._TestWithNullNameScope) + + def _TestWithNullNameScope(self, is_training): + graph = ops.Graph() + with graph.as_default(): + with graph.name_scope(None): + batch_size, height, width, depth = 5, 128, 128, 3 + input1 = array_ops.zeros((batch_size, height, width, depth)) + _ = conv2d( + input1, + 32, [5, 5], + stride=2, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=None, + scope='test') + + quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8) + # Passes if Quantize() does not crash. + def _WeightInit(self, stddev): """"""Returns truncated normal variable initializer. ",0,train 719ad3bfde3eae7169229853d3844155aa49f62f,tensorflow/tensorflow,"Ruy: Minor fix to x86 (AVX-512) code. Minor bug made possible by very poor type checking. PiperOrigin-RevId: 260925827",pack_avx512.cc,"@@ -337,8 +337,8 @@ inline void HalfPackFloatAvx512(const float* src_ptr, const float* zerobuf, // available_src_rows = std::max(0, std::min(8, src_rows - k - 8 * m)); // but treat each case separately. if (available_src_rows > 7) { - __m512i t0, t1, t2, t3; - __m512i r0, r1, r2, r3; + __m512 t0, t1, t2, t3; + __m512 r0, r1, r2, r3; t0 = LoaduTwo(src_ptr0, src_ptr4); t1 = LoaduTwo(src_ptr1, src_ptr5); @@ -376,8 +376,8 @@ inline void HalfPackFloatAvx512(const float* src_ptr, const float* zerobuf, const __mmask8 row_mask = (static_cast(1) << available_src_rows) - 1; - __m512i t0, t1, t2, t3; - __m512i r0, r1, r2, r3; + __m512 t0, t1, t2, t3; + __m512 r0, r1, r2, r3; t0 = MaskLoaduTwo(row_mask, src_ptr0, src_ptr4); t1 = MaskLoaduTwo(row_mask, src_ptr1, src_ptr5); ",0,train cd63c718be123324b6c39e0f8fbe453319799746,tensorflow/tensorflow,"[update] fix naming stuff in tensorflow/contrib/tensorrt/convert/convert_nodes.cc",convert_nodes.cc,"@@ -120,7 +120,7 @@ class TRT_ShapedWeights { type_(type), values_(values), owned_values_(owned_values ? *owned_values : std::vector({})), - dummy_flag_(false) { + empty_weight_flag_(false) { // Note: this->shape.type[] is not used } @@ -129,14 +129,14 @@ class TRT_ShapedWeights { type_(type), values_(nullptr), owned_values_(), - dummy_flag_(true) {} + empty_weight_flag_(true) {} TRT_ShapedWeights(const TRT_ShapedWeights& rhs) : shape_(rhs.shape_), type_(rhs.type_), values_(rhs.values_), owned_values_(rhs.owned_values_), - dummy_flag_(rhs.dummy_flag_) {} + empty_weight_flag_(rhs.empty_weight_flag_) {} int64_t count() const { int64_t c = 1; @@ -147,7 +147,7 @@ class TRT_ShapedWeights { nvinfer1::Weights GetWeightsForTRT() const { nvinfer1::DataType trt_type(nvinfer1::DataType::kFLOAT); TF_CHECK_OK(ConvertDType(type_, &trt_type)); - if (dummy_flag_) return nvinfer1::Weights{trt_type, nullptr, 0}; + if (empty_weight_flag_) return nvinfer1::Weights{trt_type, nullptr, 0}; // Note: this->shape.type[] is not used return nvinfer1::Weights{trt_type, GetValues(), GetShapeSize(shape_)}; @@ -178,39 +178,39 @@ class TRT_ShapedWeights { private: const void* values_; std::vector owned_values_; - bool dummy_flag_; + bool empty_weight_flag_; }; class TRT_TensorOrWeights { public: explicit TRT_TensorOrWeights(nvinfer1::ITensor* tensor) - : _tensor_(tensor), _weights_(DT_FLOAT), _variant_(TRT_NODE_TENSOR) {} + : tensor_(tensor), weights_(DT_FLOAT), variant_(TRT_NODE_TENSOR) {} explicit TRT_TensorOrWeights(const TRT_ShapedWeights& weights) - : _tensor_(nullptr), _weights_(weights), _variant_(TRT_NODE_WEIGHTS) {} + : tensor_(nullptr), weights_(weights), variant_(TRT_NODE_WEIGHTS) {} TRT_TensorOrWeights(const TRT_TensorOrWeights& rhs) - : _tensor_(rhs._tensor_), - _weights_(rhs._weights_), - _variant_(rhs._variant_) {} + : tensor_(rhs.tensor_), + weights_(rhs.weights_), + variant_(rhs.variant_) {} ~TRT_TensorOrWeights() {} - bool is_tensor() const { return _variant_ == TRT_NODE_TENSOR; } - bool is_weights() const { return _variant_ == TRT_NODE_WEIGHTS; } + bool is_tensor() const { return variant_ == TRT_NODE_TENSOR; } + bool is_weights() const { return variant_ == TRT_NODE_WEIGHTS; } nvinfer1::ITensor* tensor() { CHECK_EQ(is_tensor(), true); - return _tensor_; + return tensor_; } const nvinfer1::ITensor* tensor() const { CHECK_EQ(is_tensor(), true); - return _tensor_; + return tensor_; } TRT_ShapedWeights& weights() { CHECK_EQ(is_weights(), true); - return _weights_; + return weights_; } const TRT_ShapedWeights& weights() const { CHECK_EQ(is_weights(), true); - return _weights_; + return weights_; } nvinfer1::Dims shape() const { if (is_tensor()) { @@ -221,69 +221,35 @@ class TRT_TensorOrWeights { } private: - nvinfer1::ITensor* _tensor_; - TRT_ShapedWeights _weights_; - enum { TRT_NODE_TENSOR, TRT_NODE_WEIGHTS } _variant_; -}; - -class TRT_LayerOrWeights { - public: - explicit TRT_LayerOrWeights(nvinfer1::ILayer* layer) - : _layer_(layer), _variant_(TRT_NODE_LAYER) {} - explicit TRT_LayerOrWeights(const TRT_ShapedWeights& weights) - : _weights_(weights), _variant_(TRT_NODE_WEIGHTS) {} - bool is_layer() const { return _variant_ == TRT_NODE_LAYER; } - bool is_weights() const { return _variant_ == TRT_NODE_WEIGHTS; } - nvinfer1::ILayer* layer() { - CHECK_EQ(this->is_layer(), true); - return _layer_; - } - TRT_ShapedWeights& weights() { - CHECK_EQ(this->is_weights(), true); - return _weights_; - } - TRT_TensorOrWeights output(int index = 0) const { - if (this->is_layer()) { - nvinfer1::ITensor* tensor = _layer_->getOutput(index); - return TRT_TensorOrWeights(tensor); - } else { - CHECK_EQ(index, 0); - return TRT_TensorOrWeights(_weights_); - } - } - - private: - union { - nvinfer1::ILayer* _layer_; - TRT_ShapedWeights _weights_; - }; - enum { TRT_NODE_LAYER, TRT_NODE_WEIGHTS } _variant_; + nvinfer1::ITensor* tensor_; + TRT_ShapedWeights weights_; + enum { TRT_NODE_TENSOR, TRT_NODE_WEIGHTS } variant_; }; class TFAttrs { public: explicit TFAttrs(const tensorflow::NodeDef& tf_node) { for (const auto& attr : tf_node.attr()) { - _attrs.insert({attr.first, &attr.second}); + attrs_.insert({attr.first, &attr.second}); } } - bool count(string key) const { return _attrs.count(key); } + bool count(string key) const { return attrs_.count(key); } tensorflow::AttrValue const* at(string key) const { - if (!_attrs.count(key)) { + if (!attrs_.count(key)) { LOG(FATAL) << ""Attribute not found: "" << key; } - return _attrs.at(key); + return attrs_.at(key); } template T get(string key) const; template T get(string key, const T& default_value) const { - return _attrs.count(key) ? this->get(key) : default_value; + return attrs_.count(key) ? this->get(key) : default_value; } private: typedef std::map AttrMap; - AttrMap _attrs; + AttrMap attrs_; }; template <> @@ -385,10 +351,10 @@ using OpConverter = std::vector*)>; class Converter { - std::unordered_map _trt_tensors; - std::unordered_map _op_registry; - nvinfer1::INetworkDefinition* _trt_network; - std::list> _temp_bufs; + std::unordered_map trt_tensors_; + std::unordered_map op_registry_; + nvinfer1::INetworkDefinition* trt_network_; + std::list> temp_bufs_; void register_op_converters(); @@ -397,14 +363,14 @@ class Converter { std::vector inputs; for (const auto& input_name : node_def.input()) { VLOG(2) << ""Retrieve input: "" << input_name; - inputs.push_back(_trt_tensors.at(input_name)); + inputs.push_back(trt_tensors_.at(input_name)); } return inputs; } public: explicit Converter(nvinfer1::INetworkDefinition* trt_network) - : _trt_network(trt_network) { + : trt_network_(trt_network) { this->register_op_converters(); } @@ -412,8 +378,8 @@ class Converter { nvinfer1::Dims shape) { TRT_ShapedWeights weights(type, nullptr, shape); // TODO(jie): check weights size_bytes. 0 means type error - _temp_bufs.push_back(std::vector(weights.size_bytes())); - weights.SetValues(_temp_bufs.back().data()); + temp_bufs_.push_back(std::vector(weights.size_bytes())); + weights.SetValues(temp_bufs_.back().data()); return weights; } @@ -424,11 +390,11 @@ class Converter { tensorflow::Status convert_node(const tensorflow::NodeDef& node_def) { std::vector inputs = this->get_inputs(node_def); string op = node_def.op(); - if (!_op_registry.count(op)) { + if (!op_registry_.count(op)) { return tensorflow::errors::Unimplemented( ""No converter registered for op: "" + op); } - OpConverter op_converter = _op_registry.at(op); + OpConverter op_converter = op_registry_.at(op); std::vector outputs; TF_RETURN_IF_ERROR(op_converter(*this, node_def, inputs, &outputs)); for (size_t i = 0; i < outputs.size(); ++i) { @@ -440,7 +406,7 @@ class Converter { output.tensor()->setName(output_name.c_str()); } VLOG(2) << ""Write out tensor: "" << output_name; - if (!_trt_tensors.insert({output_name, output}).second) { + if (!trt_tensors_.insert({output_name, output}).second) { return tensorflow::errors::AlreadyExists( ""Output tensor already exists for op: "" + op); } @@ -448,17 +414,17 @@ class Converter { return tensorflow::Status::OK(); } - nvinfer1::INetworkDefinition* network() { return _trt_network; } + nvinfer1::INetworkDefinition* network() { return trt_network_; } TRT_TensorOrWeights get_tensor(string name) { - if (!_trt_tensors.count(name)) { + if (!trt_tensors_.count(name)) { return TRT_TensorOrWeights(nullptr); } - return _trt_tensors.at(name); + return trt_tensors_.at(name); } bool insert_input_tensor(string name, nvinfer1::ITensor* tensor) { - return _trt_tensors.insert({name, TRT_TensorOrWeights(tensor)}).second; + return trt_tensors_.insert({name, TRT_TensorOrWeights(tensor)}).second; } nvinfer1::ITensor* TransposeTensor(nvinfer1::ITensor* input_tensor, @@ -1428,25 +1394,25 @@ tensorflow::Status ConvertPad(Converter& ctx, void Converter::register_op_converters() { // vgg_16 slim implementation - _op_registry[""Placeholder""] = ConvertPlaceholder; - _op_registry[""Conv2D""] = ConvertConv2D; - _op_registry[""Relu""] = ConvertActivation; - _op_registry[""MaxPool""] = ConvertPool; + op_registry_[""Placeholder""] = ConvertPlaceholder; + op_registry_[""Conv2D""] = ConvertConv2D; + op_registry_[""Relu""] = ConvertActivation; + op_registry_[""MaxPool""] = ConvertPool; // This could be really handled as ConvertBinary - _op_registry[""BiasAdd""] = ConvertScale; - _op_registry[""Const""] = ConvertConst; - // _op_registry[""MatMul""] = ConvertFullyConnected; // Not used in vgg + op_registry_[""BiasAdd""] = ConvertScale; + op_registry_[""Const""] = ConvertConst; + // op_registry_[""MatMul""] = ConvertFullyConnected; // Not used in vgg // TODO(ben,jie): this is a temp hack. - _op_registry[""Identity""] = ConvertIdentity; // Identity should be removed - // _op_registry[""AvgPool""] = ConvertPool; + op_registry_[""Identity""] = ConvertIdentity; // Identity should be removed + // op_registry_[""AvgPool""] = ConvertPool; // resnet_50_v1 slim implementation - _op_registry[""Add""] = ConvertBinary; - _op_registry[""Mul""] = ConvertBinary; - _op_registry[""Sub""] = ConvertBinary; - _op_registry[""Rsqrt""] = ConvertUnary; - _op_registry[""Mean""] = ConvertReduce; - _op_registry[""Pad""] = ConvertPad; + op_registry_[""Add""] = ConvertBinary; + op_registry_[""Mul""] = ConvertBinary; + op_registry_[""Sub""] = ConvertBinary; + op_registry_[""Rsqrt""] = ConvertUnary; + op_registry_[""Mean""] = ConvertReduce; + op_registry_[""Pad""] = ConvertPad; // TODO(ben,jie): Add more ops } @@ -1595,6 +1561,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( } VLOG(2) << ""Finished output""; + // TODO(jie): static_id is not thread safe. static int static_id = 0; // Build the engine ",0,train 5a297f5efea725fc5889132bafc39707f57aa7ec,tensorflow/tensorflow,"Update profiler API to allow user using it in graph mode. PiperOrigin-RevId: 233492307",c_api_experimental.cc,"@@ -25,7 +25,7 @@ void TFE_OpConsumeInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status) { op->operation.ConsumeInput(h->handle); } -TFE_Profiler* TFE_NewProfiler(TFE_Context* ctx) { +TFE_Profiler* TFE_NewProfiler(TFE_ProfilerContext* ctx) { return new TFE_Profiler(ctx); } @@ -50,17 +50,21 @@ void TFE_ProfilerSerializeToString(TFE_Context* ctx, TFE_Profiler* profiler, }; } -TFE_ProfilerServerOptions* TFE_NewProfilerServerOptions() { - return new TFE_ProfilerServerOptions; +TFE_ProfilerContext* TFE_NewProfilerContext() { + return new TFE_ProfilerContext; } -void TFE_ProfilerServerOptionsSetEagerContext( - TFE_ProfilerServerOptions* options, TFE_Context* ctx) { - options->profiler_context.eager_context = &ctx->context; +void TFE_ProfilerContextSetEagerContext(TFE_ProfilerContext* profiler_context, + TFE_Context* eager_context) { + profiler_context->profiler_context.eager_context = &eager_context->context; } -void TFE_StartProfilerServer(TFE_ProfilerServerOptions* options, int port) { +void TFE_DeleteProfilerContext(TFE_ProfilerContext* profiler_context) { + delete profiler_context; +} + +void TFE_StartProfilerServer(TFE_ProfilerContext* context, int port) { // Release child thread intentionally. The child thread can be terminate by // terminating the main thread. - tensorflow::StartProfilerServer(&options->profiler_context, port).release(); + tensorflow::StartProfilerServer(&context->profiler_context, port).release(); } ",0,train 5a297f5efea725fc5889132bafc39707f57aa7ec,tensorflow/tensorflow,"Update profiler API to allow user using it in graph mode. PiperOrigin-RevId: 233492307",c_api_experimental.h,"@@ -25,6 +25,8 @@ extern ""C"" { TF_CAPI_EXPORT extern void TFE_OpConsumeInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status); +typedef struct TFE_ProfilerContext TFE_ProfilerContext; + // A profiler which will start profiling when creating the object and will stop // when the object is destroyed. It will profile all operations run under the // given TFE_Context. Multiple instance of it can be created, but at most one @@ -32,7 +34,7 @@ TF_CAPI_EXPORT extern void TFE_OpConsumeInput(TFE_Op* op, TFE_TensorHandle* h, // Thread-safety: TFE_Profiler is thread-safe. typedef struct TFE_Profiler TFE_Profiler; -TF_CAPI_EXPORT extern TFE_Profiler* TFE_NewProfiler(TFE_Context* ctx); +TF_CAPI_EXPORT extern TFE_Profiler* TFE_NewProfiler(TFE_ProfilerContext* ctx); TF_CAPI_EXPORT extern bool TFE_ProfilerIsOk(TFE_Profiler* profiler); TF_CAPI_EXPORT extern void TFE_DeleteProfiler(TFE_Profiler* profiler); @@ -43,15 +45,16 @@ TF_CAPI_EXPORT extern void TFE_ProfilerSerializeToString(TFE_Context* ctx, TF_Buffer* buf, TF_Status* status); -typedef struct TFE_ProfilerServerOptions TFE_ProfilerServerOptions; - -// Return a new Profiler server options object. -TF_CAPI_EXPORT extern TFE_ProfilerServerOptions* TFE_NewProfilerServerOptions( - void); +// Return a new profiler context object. +TF_CAPI_EXPORT extern TFE_ProfilerContext* TFE_NewProfilerContext(void); // Set the eager context in TFE_ProfilerServerOptions -TF_CAPI_EXPORT extern void TFE_ProfilerServerOptionsSetEagerContext( - TFE_ProfilerServerOptions* options, TFE_Context* ctx); +TF_CAPI_EXPORT extern void TFE_ProfilerContextSetEagerContext( + TFE_ProfilerContext* profiler_context, TFE_Context* eager_context); + +// Destroy a profiler context object. +TF_CAPI_EXPORT extern void TFE_DeleteProfilerContext( + TFE_ProfilerContext* profiler_context); // Start a profiler grpc server which listens to specified port. It will start // the server on its own thread. It can be shutdown by terminating tensorflow. @@ -61,8 +64,8 @@ TF_CAPI_EXPORT extern void TFE_ProfilerServerOptionsSetEagerContext( // tensorflow/contrib/tpu/profiler/capture_tpu_profile to capture tracable // file following // https://cloud.google.com/tpu/docs/cloud-tpu-tools#capture_trace. -TF_CAPI_EXPORT extern void TFE_StartProfilerServer( - TFE_ProfilerServerOptions* options, int port); +TF_CAPI_EXPORT extern void TFE_StartProfilerServer(TFE_ProfilerContext* context, + int port); #ifdef __cplusplus } /* end extern ""C"" */ ",0,train 5a297f5efea725fc5889132bafc39707f57aa7ec,tensorflow/tensorflow,"Update profiler API to allow user using it in graph mode. PiperOrigin-RevId: 233492307",c_api_experimental_test.cc,"@@ -41,9 +41,12 @@ void ExecuteWithProfiling(bool async) { TFE_ContextOptions* opts = TFE_NewContextOptions(); TFE_ContextOptionsSetAsync(opts, static_cast(async)); TFE_Context* ctx = TFE_NewContext(opts, status); - TFE_Profiler* profiler = TFE_NewProfiler(ctx); + TFE_ProfilerContext* profiler_context = TFE_NewProfilerContext(); + TFE_ProfilerContextSetEagerContext(profiler_context, ctx); + TFE_Profiler* profiler = TFE_NewProfiler(profiler_context); CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteContextOptions(opts); + TFE_DeleteProfilerContext(profiler_context); TFE_TensorHandle* m = TestMatrixTensorHandle(); TFE_Op* matmul = MatMulOp(ctx, m, m); @@ -108,14 +111,18 @@ TEST(CAPI, MultipleProfilerSession) { CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteContextOptions(opts); - TFE_Profiler* profiler1 = TFE_NewProfiler(ctx); + TFE_ProfilerContext* profiler_context = TFE_NewProfilerContext(); + TFE_ProfilerContextSetEagerContext(profiler_context, ctx); + + TFE_Profiler* profiler1 = TFE_NewProfiler(profiler_context); EXPECT_TRUE(TFE_ProfilerIsOk(profiler1)); - TFE_Profiler* profiler2 = TFE_NewProfiler(ctx); + TFE_Profiler* profiler2 = TFE_NewProfiler(profiler_context); EXPECT_FALSE(TFE_ProfilerIsOk(profiler2)); TFE_DeleteProfiler(profiler1); TFE_DeleteProfiler(profiler2); + TFE_DeleteProfilerContext(profiler_context); } } // namespace ",0,train 5a297f5efea725fc5889132bafc39707f57aa7ec,tensorflow/tensorflow,"Update profiler API to allow user using it in graph mode. PiperOrigin-RevId: 233492307",c_api_internal.h,"@@ -107,20 +107,18 @@ struct TFE_Op { tensorflow::EagerOperation operation; }; +struct TFE_ProfilerContext { + tensorflow::ProfilerContext profiler_context; +}; + struct TFE_Profiler { - TFE_Profiler(TFE_Context* ctx) { - tensorflow::ProfilerContext profiler_context; - profiler_context.eager_context = &ctx->context; - profiler = tensorflow::ProfilerSession::Create(&profiler_context); + TFE_Profiler(TFE_ProfilerContext* ctx) { + profiler = tensorflow::ProfilerSession::Create(&ctx->profiler_context); } std::unique_ptr profiler; }; -struct TFE_ProfilerServerOptions { - tensorflow::ProfilerContext profiler_context; -}; - namespace tensorflow { // Set an AttrValue on the op. Doesn't handle the list types. void SetOpAttrValueScalar(TFE_Context* ctx, TFE_Op* op, ",0,train 5a297f5efea725fc5889132bafc39707f57aa7ec,tensorflow/tensorflow,"Update profiler API to allow user using it in graph mode. PiperOrigin-RevId: 233492307",profiler.py,"@@ -46,7 +46,13 @@ def start(): if _profiler is not None: raise AssertionError('Another profiler is running.') with _profiler_lock: - _profiler = pywrap_tensorflow.TFE_NewProfiler(context.context()._handle) # pylint: disable=protected-access + profiler_context = pywrap_tensorflow.TFE_NewProfilerContext() + if context.default_execution_mode == context.EAGER_MODE: + pywrap_tensorflow.TFE_ProfilerContextSetEagerContext( + profiler_context, + context.context()._handle) # pylint: disable=protected-access + _profiler = pywrap_tensorflow.TFE_NewProfiler(profiler_context) + pywrap_tensorflow.TFE_DeleteProfilerContext(profiler_context) if not pywrap_tensorflow.TFE_ProfilerIsOk(_profiler): logging.warning('Another profiler session is running which is probably ' 'created by profiler server. Please avoid using profiler ' @@ -93,12 +99,13 @@ def start_profiler_server(port): Args: port: port profiler server listens to. """""" - opts = pywrap_tensorflow.TFE_NewProfilerServerOptions() + profiler_context = pywrap_tensorflow.TFE_NewProfilerContext() if context.default_execution_mode == context.EAGER_MODE: - pywrap_tensorflow.TFE_ProfilerServerOptionsSetEagerContext( - opts, + pywrap_tensorflow.TFE_ProfilerContextSetEagerContext( + profiler_context, context.context()._handle) # pylint: disable=protected-access - pywrap_tensorflow.TFE_StartProfilerServer(opts, port) + pywrap_tensorflow.TFE_StartProfilerServer(profiler_context, port) + pywrap_tensorflow.TFE_DeleteProfilerContext(profiler_context) class Profiler(object): ",0,train 28db548cfd2d238e63c3fb049119fca8369abdbe,tensorflow/tensorflow,Fixing a bug in conv+add fusion,mkl_conv_ops.cc,"@@ -24,8 +24,8 @@ limitations under the License. #include #include -#include ""mkldnn.hpp"" #include ""absl/strings/str_join.h"" +#include ""mkldnn.hpp"" #include ""tensorflow/core/framework/bounds_check.h"" #include ""tensorflow/core/framework/numeric_op.h"" #include ""tensorflow/core/framework/op_kernel.h"" @@ -944,23 +944,18 @@ class MklConvOp : public OpKernel { if (native_format) { // Forward the summand tensor to the output only if it has no other // references, otherwise make a copy of it. - if (!context->forward_input_to_output_with_shape( + if (context->forward_input_to_output_with_shape( kInputIndex_Add, kOutputIndex_Dst, output_tf_shape, output_tensor)) { - AllocateOutputSetMklShape(context, kOutputIndex_Dst, output_tensor, - output_tf_shape, *output_mkl_shape, - native_format); - bool result = - (*output_tensor)->CopyFrom(add_tensor, add_tensor.shape()); - DCHECK(result); + return; } - return; } // Check if reorder is needed if (add_mkl_shape == *output_mkl_shape && ForwardMklTensorInToOutWithMklShape(context, kInputIndex_Add, kOutputIndex_Dst, output_tensor, - add_mkl_shape, false)) { + add_mkl_shape, false) && + !native_format) { return; } else { AllocateOutputSetMklShape(context, kOutputIndex_Dst, output_tensor, @@ -987,6 +982,13 @@ class MklConvOp : public OpKernel { const_cast(add_tensor.flat().data())); void* dst_buf = static_cast((*output_tensor)->flat().data()); + if (native_format) { + // We are simply deep copying the add_tensor to output_tensor without + // changing memory layout, hence using same memory descriptor. + ADD_MD = DST_MD = + memory::desc({add_tensor.NumElements()}, MklDnnType(), + mkldnn::memory::format_tag::x); + } fuse_add_src_.reset( new MEMORY_CONSTRUCTOR(ADD_MD, this->cpu_engine_, add_buf)); fuse_add_dst_.reset( ",0,train 673b993983f37f332ff70cdb642305f69089337d,tensorflow/tensorflow,"Ensuring that the Switch op used as a pivot is always placed on the CPU. For this we set a private attribute _PivotSwitch while creating this op and then make sure that the device overwriting logic in GraphPartition isn't executed for this op. Note: Had to fix up control_flow_ops_py_test so that we don't expect a GPU graph when we don't get one. The reason is that now since we already know the switch_pred is going to be placed on CPU, the placer ensures that its input is placed on the CPU as well and we end up saving a copy. This means there is no GPU graph when we partition. PiperOrigin-RevId: 338246477 Change-Id: I5641c9ae1b2d593a2996947bafe92b22cb63371d",lower_if_op.cc,"@@ -148,13 +148,22 @@ Status CondBuilder::SetColocationAndFinalize(NodeBuilder node_builder, Status CondBuilder::CreatePivotNodes() { // Construct the basic cond body (consisting of feeding in the predicate to // create pivot nodes). + + // This is a special pivot switch node for lowering. We mark this with a + // special _PivotSwitch attr on it as later on in the graph partitioner we + // do some special placement for Switch nodes and its necessary to distinguish + // between a ""normal"" Switch node and one of these pivot switches. We would + // like to place this node on the CPU always as the pred_ will be on the CPU + // as well (either a CPU op output or a GPU op with HostMemory annotation). + // TODO(b/171321391): Fix this for NUMA cases. Node* switch_pred; TF_RETURN_IF_ERROR( SetColocationAndFinalize(NodeBuilder(NewName(""switch_pred""), ""Switch"", graph_->op_registry(), &debug_info_) .Input(NodeOut(pred_)) .Input(NodeOut(pred_)) - .Device(if_op_->requested_device()), + .Attr(""_PivotSwitch"", true) + .Device(""/CPU:0""), graph_, &switch_pred)); control_predecessor_ = switch_pred; TF_RETURN_IF_ERROR( ",0,train 673b993983f37f332ff70cdb642305f69089337d,tensorflow/tensorflow,"Ensuring that the Switch op used as a pivot is always placed on the CPU. For this we set a private attribute _PivotSwitch while creating this op and then make sure that the device overwriting logic in GraphPartition isn't executed for this op. Note: Had to fix up control_flow_ops_py_test so that we don't expect a GPU graph when we don't get one. The reason is that now since we already know the switch_pred is going to be placed on CPU, the placer ensures that its input is placed on the CPU as well and we end up saving a copy. This means there is no GPU graph when we partition. PiperOrigin-RevId: 338246477 Change-Id: I5641c9ae1b2d593a2996947bafe92b22cb63371d",lower_if_op_test.cc,"@@ -147,6 +147,115 @@ TEST(LowerIfOpTest, Simple) { } } +TEST(LowerIfOpTest, GPUPlacement) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + + // Add test functions for then and else branch. + FunctionDefLibrary f_lib_proto; + *(f_lib_proto.add_function()) = test::function::XTimesTwo(); + *(f_lib_proto.add_function()) = test::function::XTimesFour(); + + // Construct simple conditional that switches on `pred` and operates only on + // single input `A`. + Scope root = Scope::NewRootScope().ExitOnError(); + TF_ASSERT_OK(root.graph()->AddFunctionLibrary(f_lib_proto)); + auto a = ops::Placeholder(root.WithOpName(""A""), DT_INT32); + auto x = ops::Placeholder(root.WithOpName(""X""), DT_INT32); + auto y = ops::Placeholder(root.WithOpName(""Y""), DT_INT32); + Node* pred; + TF_ASSERT_OK(NodeBuilder(""greater"", ""Greater"", &root.graph()->flib_def()) + .Input(x.node()) + .Input(y.node()) + .Device(""/GPU:0"") + .Finalize(root.graph(), &pred)); + Node* written_if; + std::vector inputs({NodeBuilder::NodeOut(a.node())}); + TF_ASSERT_OK( + NodeBuilder(""if"", ""If"", &root.graph()->flib_def()) + .Input(pred) + .Input(inputs) + .Attr(""then_branch"", FuncAttr(""XTimesTwo"")) + .Attr(""else_branch"", FuncAttr(""XTimesFour"")) + .Attr(LowerFunctionalOpsPass::kLowerUsingSwitchMergeAttr, true) + .Attr(""Tout"", {DT_INT32}) + .Device(""/GPU:0"") + .Finalize(root.graph(), &written_if)); + TF_ASSERT_OK(root.DoShapeInference(written_if)); + TF_ASSERT_OK(root.ToGraph(graph.get())); + + // The input graph has no switch or merge nodes. + int node_called_if_count = 0; + for (const auto* op : graph->op_nodes()) { + ASSERT_FALSE(op->IsSwitch()); + ASSERT_FALSE(op->IsMerge()); + if (op->name() == ""if"") { + ++node_called_if_count; + } + } + ASSERT_EQ(node_called_if_count, 1); + + TF_ASSERT_OK(Rewrite(&graph)); + + // Verify the resultant graph has switch and merge nodes, and a node called + // `if` (but not If nodes). + int switch_count = 0; + int merge_count = 0; + node_called_if_count = 0; + for (const auto* op : graph->op_nodes()) { + if (op->IsSwitch()) { + ++switch_count; + } + if (op->IsMerge()) { + ++merge_count; + } + ASSERT_NE(op->type_string(), ""If""); + if (op->name() == ""if"") { + ++node_called_if_count; + } + } + // One switch for predicate and one for input (A). + ASSERT_EQ(switch_count, 2); + // One merge for the single output value of then and else, and one more merge + // to enforce then and else function call execution (`branch_executed` node). + ASSERT_EQ(merge_count, 2); + ASSERT_EQ(node_called_if_count, 1); + + // Verify execution. + ClientSession session(root, SessionOptionsWithInlining()); + { + RunMetadata metadata; + RunOptions options; + options.set_output_partition_graphs(true); + ClientSession::FeedType feeds; + feeds.emplace(Output(x.node()), Input::Initializer(5)); + feeds.emplace(Output(y.node()), Input::Initializer(10)); + feeds.emplace(Output(a.node()), Input::Initializer(10)); + std::vector out_tensors; + TF_ASSERT_OK(session.Run(options, feeds, {Output(written_if)}, {}, + &out_tensors, &metadata)); + GraphDef cpu_graph = metadata.partition_graphs(1); + int num_cpu_switch = 0; + for (const auto& node : cpu_graph.node()) { + if (node.op() == ""Switch"") { + ++num_cpu_switch; + } + } + EXPECT_EQ(num_cpu_switch, 2); + EXPECT_EQ(out_tensors.size(), 1); + EXPECT_EQ(out_tensors[0].scalar()(), 40); + } + { + ClientSession::FeedType feeds; + feeds.emplace(Output(x.node()), Input::Initializer(10)); + feeds.emplace(Output(y.node()), Input::Initializer(5)); + feeds.emplace(Output(a.node()), Input::Initializer(10)); + std::vector out_tensors; + TF_ASSERT_OK(session.Run(feeds, {Output(written_if)}, &out_tensors)); + EXPECT_EQ(out_tensors.size(), 1); + EXPECT_EQ(out_tensors[0].scalar()(), 20); + } +} + TEST(LowerIfOpTest, BranchFunctionsWithoutOutputs) { using ::tensorflow::test::function::GDef; using ::tensorflow::test::function::NDef; ",0,train 673b993983f37f332ff70cdb642305f69089337d,tensorflow/tensorflow,"Ensuring that the Switch op used as a pivot is always placed on the CPU. For this we set a private attribute _PivotSwitch while creating this op and then make sure that the device overwriting logic in GraphPartition isn't executed for this op. Note: Had to fix up control_flow_ops_py_test so that we don't expect a GPU graph when we don't get one. The reason is that now since we already know the switch_pred is going to be placed on CPU, the placer ensures that its input is placed on the CPU as well and we end up saving a copy. This means there is no GPU graph when we partition. PiperOrigin-RevId: 338246477 Change-Id: I5641c9ae1b2d593a2996947bafe92b22cb63371d",graph_partition.cc,"@@ -371,6 +371,13 @@ NodeDef* AddControlTrigger(const PartitionOptions& opts, GraphDef* gdef, void OptimizeControlFlowColocation(Graph* graph) { auto visit = [](Node* node) { if (IsSwitch(node)) { + // Pivot Switch nodes (which are also of type Switch) are already placed + // on the CPU and colocated with its inputs that are also already on the + // CPU (or might be placed on GPU but in host memory). + if (HasNodeAttr(node->def(), ""_PivotSwitch"")) { + DCHECK(node->requested_device().find(""CPU"") != string::npos); + return; + } for (const Edge* in_edge : node->in_edges()) { if (in_edge->dst_input() == 0) { // Colocate with the data input. ",0,train 673b993983f37f332ff70cdb642305f69089337d,tensorflow/tensorflow,"Ensuring that the Switch op used as a pivot is always placed on the CPU. For this we set a private attribute _PivotSwitch while creating this op and then make sure that the device overwriting logic in GraphPartition isn't executed for this op. Note: Had to fix up control_flow_ops_py_test so that we don't expect a GPU graph when we don't get one. The reason is that now since we already know the switch_pred is going to be placed on CPU, the placer ensures that its input is placed on the CPU as well and we end up saving a copy. This means there is no GPU graph when we partition. PiperOrigin-RevId: 338246477 Change-Id: I5641c9ae1b2d593a2996947bafe92b22cb63371d",control_flow_ops_py_test.py,"@@ -730,6 +730,8 @@ class ControlFlowTest(test.TestCase, parameterized.TestCase): g for g in run_metadata.partition_graphs if device_str in g.node[0].device ] + if not device_graphs: + return 0 self.assertLen(device_graphs, 1) switch_nodes = [ n for n in device_graphs[0].node @@ -759,7 +761,6 @@ class ControlFlowTest(test.TestCase, parameterized.TestCase): options = config_pb2.RunOptions(output_partition_graphs=True) sess.run( r, feed_dict={x: -10.}, options=options, run_metadata=run_metadata) - self.assertLen(run_metadata.partition_graphs, 2) # Check that the Switch for `arg` gets placed on CPU. self.assertEqual( self._count_matching_switch_nodes_on_device(run_metadata, ""CPU"", ",0,train b55300d9d569cd5b1b2c30bff9ca6a6cb129ba32,tensorflow/tensorflow,"[XLA:GPU] Ban bad CUDNN algo PiperOrigin-RevId: 383932719 Change-Id: I3a5b29ac56d621388cb430a52ce7879cdffe87e4",gpu_conv_algorithm_picker.cc,"@@ -397,7 +397,7 @@ GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda( const bool crash_on_checking_failure = debug_options.xla_gpu_crash_on_verification_failures(); - const auto canonical_hlo = + std::string canonical_hlo = std::get<1>(AutotuneCacheKeyfromInstruction(instr, stream_exec_)); string blas_version; ",0,train b55300d9d569cd5b1b2c30bff9ca6a6cb129ba32,tensorflow/tensorflow,"[XLA:GPU] Ban bad CUDNN algo PiperOrigin-RevId: 383932719 Change-Id: I3a5b29ac56d621388cb430a52ce7879cdffe87e4",hlo_algorithm_denylist.cc,"@@ -39,6 +39,14 @@ constexpr char kDefaultDenylist[] = R""pb( algos { id: 7 tensor_ops: true } blas_version: ""10201"" } + entries { + hlo: ""(f16[3,3,256,256]{2,1,0,3}, u8[0]{0}) custom-call(f16[2048,7,7,256]{3,2,1,0}, f16[2048,7,7,256]{3,2,1,0}), window={size=3x3 pad=1_1x1_1}, dim_labels=b01f_01io->b01f, custom_call_target=\""__cudnn$convBackwardFilter\"", backend_config=\""{\\\""algorithm\\\"":\\\""0\\\"",\\\""tensor_ops_enabled\\\"":false,\\\""conv_result_scale\\\"":1,\\\""activation_mode\\\"":\\\""0\\\"",\\\""side_input_scale\\\"":0}\"""" + cc { major: 7 } + cudnn_version { major: 8 minor: 2 patch: 1 } algos + [ { id: 0 tensor_ops: true } + , { id: 0 }] + blas_version: ""11402"" + } )pb""; absl::Span GetDisabledConvAlgorithms( ",0,train f9d1a8c9625fb7b4c398516e4756500c1163febf,tensorflow/tensorflow,"[NFC] Replace std::clamp with inline implementation. std::clamp is not in C++14, thus replace it with std::min and std::max. PiperOrigin-RevId: 270604708",UniformSupport.h,"@@ -145,7 +145,7 @@ private: // Round to nearest integer with halfway cases rounded away from zero. const double scaledRounded = std::round(scaled); const double clamped = - std::clamp(scaledRounded, clampMinDouble, clampMaxDouble); + std::min(std::max(scaledRounded, clampMinDouble), clampMaxDouble); uint64_t signlessResult; if (isSigned) { ",0,train c59869928608498b9161c8075874a2d4c9e9a405,tensorflow/tensorflow,"Set up device map for eager context on device worker to allow invoke multi-device function on remote worker. Added a new test in remote_test.py. Without remote device map, the test will report following error: InvalidArgumentError: /job:worker/replica:0/task:1/device:GPU:0 unknown device. Additional GRPC error information: {""created"":""@1558735306.239191376"",""description"":""Error received from peer ipv6:[::1]:17007"",""file"":""third_party/grpc/src/core/lib/surface/call.cc"",""file_line"":1046,""grpc_message"":""/job:worker/replica:0/task:1/device:GPU:0 unknown device."",""grpc_status"":3} [Op:__inference_remote_function_43] PiperOrigin-RevId: 250813680",remote_test.py,"@@ -203,7 +203,7 @@ class RemoteExecutionTest(test.TestCase): """"""Basic server connection."""""" remote.connect_to_remote_host(self._cached_server1_target) - with ops.device(""job:worker/replica:0/task:1/device:CPU:0""): + with ops.device(""job:worker/replica:0/task:0/device:CPU:0""): x1 = array_ops.ones([2, 2]) x2 = array_ops.ones([2, 2]) y = math_ops.matmul(x1, x2) ",0,train c59869928608498b9161c8075874a2d4c9e9a405,tensorflow/tensorflow,"Set up device map for eager context on device worker to allow invoke multi-device function on remote worker. Added a new test in remote_test.py. Without remote device map, the test will report following error: InvalidArgumentError: /job:worker/replica:0/task:1/device:GPU:0 unknown device. Additional GRPC error information: {""created"":""@1558735306.239191376"",""description"":""Error received from peer ipv6:[::1]:17007"",""file"":""third_party/grpc/src/core/lib/surface/call.cc"",""file_line"":1046,""grpc_message"":""/job:worker/replica:0/task:1/device:GPU:0 unknown device."",""grpc_status"":3} [Op:__inference_remote_function_43] PiperOrigin-RevId: 250813680",context.cc,"@@ -66,8 +66,10 @@ EagerContext::EagerContext( bool async, const DeviceMgr* device_mgr, bool device_mgr_owned, Rendezvous* rendezvous, const CustomKernelCreator* custom_kernel_creator, DistributedFunctionLibraryRuntime* cluster_flr, - std::function rendezvous_creator) + std::function rendezvous_creator, + const DeviceMgr* remote_device_mgr) : policy_(default_policy), + remote_unowned_device_manager_(remote_device_mgr), devices_(device_mgr->ListDevices()), rendezvous_(rendezvous), rendezvous_creator_(std::move(rendezvous_creator)), @@ -117,8 +119,8 @@ void EagerContext::InitDeviceMapAndAsync() { devices_map_[device->name()] = device; } - if (remote_device_manager_ != nullptr) { - for (auto* device : remote_device_manager_->ListDevices()) { + if (remote_device_mgr() != nullptr) { + for (auto* device : remote_device_mgr()->ListDevices()) { if (devices_map_.find(device->name()) == devices_map_.end()) { devices_map_[device->name()] = device; devices_.push_back(device); @@ -332,6 +334,7 @@ ScopedStepContainer* EagerContext::StepContainer() { } Status EagerContext::MaybeRegisterFunctionRemotely(const FunctionDef& fdef) { + // Only client context can register function on remote worker context. if (remote_device_manager_ == nullptr) return Status::OK(); #if !defined(IS_MOBILE_PLATFORM) BlockingCounter blocking_counter(static_cast(remote_contexts_.size())); @@ -487,6 +490,10 @@ Status GetTaskName(Device* d, string* task_name) { Status EagerContext::GetClientAndContextID(Device* device, eager::EagerClient** client, uint64* context_id) { + if (remote_eager_workers_ == nullptr) { + return errors::Internal( + ""Haven't set up remote eager worker in this eager context yet.""); + } auto it = device_to_client_cache_.find(device); if (it != device_to_client_cache_.end()) { *client = it->second.first; ",0,train c59869928608498b9161c8075874a2d4c9e9a405,tensorflow/tensorflow,"Set up device map for eager context on device worker to allow invoke multi-device function on remote worker. Added a new test in remote_test.py. Without remote device map, the test will report following error: InvalidArgumentError: /job:worker/replica:0/task:1/device:GPU:0 unknown device. Additional GRPC error information: {""created"":""@1558735306.239191376"",""description"":""Error received from peer ipv6:[::1]:17007"",""file"":""third_party/grpc/src/core/lib/surface/call.cc"",""file_line"":1046,""grpc_message"":""/job:worker/replica:0/task:1/device:GPU:0 unknown device."",""grpc_status"":3} [Op:__inference_remote_function_43] PiperOrigin-RevId: 250813680",context.h,"@@ -94,7 +94,8 @@ class EagerContext : public core::RefCounted { bool async, const DeviceMgr* device_mgr, bool device_mgr_owned, Rendezvous* rendezvous, const CustomKernelCreator* custom_kernel_creator, DistributedFunctionLibraryRuntime* cluster_flr = nullptr, - std::function rendezvous_creator = nullptr); + std::function rendezvous_creator = nullptr, + const DeviceMgr* remote_device_mgr = nullptr); ~EagerContext(); @@ -206,7 +207,8 @@ class EagerContext : public core::RefCounted { : local_unowned_device_manager_; } const tensorflow::DeviceMgr* remote_device_mgr() const { - return remote_device_manager_.get(); + return (remote_device_manager_ != nullptr) ? remote_device_manager_.get() + : remote_unowned_device_manager_; } // TODO(apassos) remove the need for this @@ -292,7 +294,11 @@ class EagerContext : public core::RefCounted { // Only one of the below is set. std::unique_ptr local_device_manager_; const DeviceMgr* local_unowned_device_manager_; + + // Only one of the below is set. remote_unowned_device_manager_ is set on + // remote worker to allow running multi-device function on remote worker. std::unique_ptr remote_device_manager_; + const DeviceMgr* remote_unowned_device_manager_; // Devices owned by device_manager std::vector devices_; ",0,train c59869928608498b9161c8075874a2d4c9e9a405,tensorflow/tensorflow,"Set up device map for eager context on device worker to allow invoke multi-device function on remote worker. Added a new test in remote_test.py. Without remote device map, the test will report following error: InvalidArgumentError: /job:worker/replica:0/task:1/device:GPU:0 unknown device. Additional GRPC error information: {""created"":""@1558735306.239191376"",""description"":""Error received from peer ipv6:[::1]:17007"",""file"":""third_party/grpc/src/core/lib/surface/call.cc"",""file_line"":1046,""grpc_message"":""/job:worker/replica:0/task:1/device:GPU:0 unknown device."",""grpc_status"":3} [Op:__inference_remote_function_43] PiperOrigin-RevId: 250813680",eager_service_impl.cc,"@@ -121,7 +121,8 @@ Status EagerServiceImpl::CreateContext(const CreateContextRequest* request, SessionOptions(), tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, request->async(), device_mgr, false, r, nullptr, - worker_session->cluster_flr.get(), std::move(rendezvous_creator)); + worker_session->cluster_flr.get(), std::move(rendezvous_creator), + worker_session->remote_device_mgr()); std::vector device_attributes; device_mgr->ListDeviceAttributes(&device_attributes); ",0,train c59869928608498b9161c8075874a2d4c9e9a405,tensorflow/tensorflow,"Set up device map for eager context on device worker to allow invoke multi-device function on remote worker. Added a new test in remote_test.py. Without remote device map, the test will report following error: InvalidArgumentError: /job:worker/replica:0/task:1/device:GPU:0 unknown device. Additional GRPC error information: {""created"":""@1558735306.239191376"",""description"":""Error received from peer ipv6:[::1]:17007"",""file"":""third_party/grpc/src/core/lib/surface/call.cc"",""file_line"":1046,""grpc_message"":""/job:worker/replica:0/task:1/device:GPU:0 unknown device."",""grpc_status"":3} [Op:__inference_remote_function_43] PiperOrigin-RevId: 250813680",worker_session.h,"@@ -49,6 +49,8 @@ struct WorkerSession { return device_mgr_ ? device_mgr_.get() : borrowed_device_mgr_; } + DeviceMgr* remote_device_mgr() { return remote_device_mgr_.get(); } + // graph_mgr keeps track of the registered graphs of this session. // // Note: graph_mgr must be deleted before rendezvous_mgr! ",0,train c59869928608498b9161c8075874a2d4c9e9a405,tensorflow/tensorflow,"Set up device map for eager context on device worker to allow invoke multi-device function on remote worker. Added a new test in remote_test.py. Without remote device map, the test will report following error: InvalidArgumentError: /job:worker/replica:0/task:1/device:GPU:0 unknown device. Additional GRPC error information: {""created"":""@1558735306.239191376"",""description"":""Error received from peer ipv6:[::1]:17007"",""file"":""third_party/grpc/src/core/lib/surface/call.cc"",""file_line"":1046,""grpc_message"":""/job:worker/replica:0/task:1/device:GPU:0 unknown device."",""grpc_status"":3} [Op:__inference_remote_function_43] PiperOrigin-RevId: 250813680",remote.py,"@@ -24,6 +24,7 @@ from tensorflow.core.protobuf.cluster_pb2 import ClusterDef from tensorflow.core.protobuf.tensorflow_server_pb2 import ServerDef from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context +from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export @@ -51,34 +52,37 @@ def connect_to_remote_host(remote_host=None, job_name=""worker""): ``` Args: - remote_host: The addr of the remote server in host-port format. + remote_host: a single or a list the remote server addr in host-port format. job_name: The job name under which the new server will be accessible. Raises: ValueError: if remote_host is None. """""" - if remote_host is None: - raise ValueError(""Must provide an remote_host"") + if not remote_host: + raise ValueError(""Must provide at least one remote_host"") + remote_host = nest.flatten(remote_host) grpc_prefix = ""grpc://"" - if remote_host.startswith(grpc_prefix): - remote_host = remote_host[len(grpc_prefix):] local_port = pywrap_tensorflow.TF_PickUnusedPortOrDie() cluster_def = ClusterDef() job_def = cluster_def.job.add() - job_def.name = job_name + job_def.name = ""localhost"" # TODO(fishx): Update this to make sure remote worker has valid ip address # to connect with local. job_def.tasks[0] = ""localhost:{}"".format(local_port) - job_def.tasks[1] = remote_host + + job_def = cluster_def.job.add() + job_def.name = job_name + for i in range(len(remote_host)): + if remote_host[i].startswith(grpc_prefix): + job_def.tasks[i] = remote_host[i][len(grpc_prefix):] + else: + job_def.tasks[i] = remote_host[i] server_def = ServerDef( - cluster=cluster_def, - job_name=job_name, - task_index=0, - protocol=""grpc"") + cluster=cluster_def, job_name=""localhost"", task_index=0, protocol=""grpc"") # TODO(nareshmodi): Make this default since it works in more situations. os.environ[""TF_EAGER_REMOTE_USE_SEND_TENSOR_RPC""] = ""1"" ",0,train c59869928608498b9161c8075874a2d4c9e9a405,tensorflow/tensorflow,"Set up device map for eager context on device worker to allow invoke multi-device function on remote worker. Added a new test in remote_test.py. Without remote device map, the test will report following error: InvalidArgumentError: /job:worker/replica:0/task:1/device:GPU:0 unknown device. Additional GRPC error information: {""created"":""@1558735306.239191376"",""description"":""Error received from peer ipv6:[::1]:17007"",""file"":""third_party/grpc/src/core/lib/surface/call.cc"",""file_line"":1046,""grpc_message"":""/job:worker/replica:0/task:1/device:GPU:0 unknown device."",""grpc_status"":3} [Op:__inference_remote_function_43] PiperOrigin-RevId: 250813680",remote_test.py,"@@ -28,10 +28,10 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import variables -class RemoteTest(test.TestCase): +class SingleWorkerTest(test.TestCase): def setUp(self): - super(RemoteTest, self).setUp() + super(SingleWorkerTest, self).setUp() workers, _ = test_util.create_local_cluster(1, 0) remote.connect_to_remote_host(workers[0].target) @@ -40,9 +40,9 @@ class RemoteTest(test.TestCase): @def_function.function def basic(i): - with ops.device('/job:worker/replica:0/task:0/cpu:0'): + with ops.device('/job:localhost/replica:0/task:0/cpu:0'): a = constant_op.constant([2]) + i - with ops.device('/job:worker/replica:0/task:1/cpu:0'): + with ops.device('/job:worker/replica:0/task:0/cpu:0'): b = constant_op.constant([1]) return a + b @@ -51,7 +51,7 @@ class RemoteTest(test.TestCase): self.assertAllEqual(basic(constant_op.constant([1])).numpy(), [4]) def testMultiDeviceFunctionVariable(self): - with ops.device('/job:worker/replica:0/task:1/cpu:0'): + with ops.device('/job:worker/replica:0/task:0/cpu:0'): variable_b = variables.Variable(1) @def_function.function @@ -61,7 +61,7 @@ class RemoteTest(test.TestCase): self.assertAllEqual(with_variable(constant_op.constant([2])).numpy(), [3]) def testMultiDeviceFunctionRemoteOutput(self): - with ops.device('/job:worker/replica:0/task:1/cpu:0'): + with ops.device('/job:worker/replica:0/task:0/cpu:0'): variable_b = variables.Variable(1) @def_function.function @@ -83,7 +83,7 @@ class RemoteTest(test.TestCase): return i + constant_op.constant([2]) with self.assertRaises(errors.InvalidArgumentError) as cm: - with ops.device('/job:worker/replica:0/task:1/cpu:0'): + with ops.device('/job:worker/replica:0/task:0/cpu:0'): self.assertAllEqual( ambiguous_device(constant_op.constant([2])).numpy(), [3]) @@ -91,5 +91,28 @@ class RemoteTest(test.TestCase): cm.exception.message) +class MultiWorkersTest(test.TestCase): + + def setUp(self): + super(MultiWorkersTest, self).setUp() + + workers, _ = test_util.create_local_cluster(2, 0) + remote.connect_to_remote_host([workers[0].target, workers[1].target]) + + def testMultiDeviceFunctionOnRemoteDevice(self): + with ops.device('/job:worker/replica:0/task:1'): + variable_b = variables.Variable(1.0) + + @def_function.function + def remote_function(i): + with ops.device('/job:worker/replica:0/task:0'): + a = i + variable_b + c = a + 1.0 + return c + + with ops.device('/job:worker/replica:0/task:0'): + self.assertAllEqual(remote_function(constant_op.constant([1.0])), [3.0]) + + if __name__ == '__main__': test.main() ",0,train 7ae5b12f4714455028b7422dcfe3f0dd3172b4d3,tensorflow/tensorflow,"Add more information in CheckpointManager's summary line. PiperOrigin-RevId: 307162471 Change-Id: Ifdc5e6ab7800f80b35968e6650918277f0178f85",checkpoint_management.py,"@@ -511,7 +511,7 @@ def meta_graph_filename(checkpoint_filename, meta_graph_suffix=""meta""): # TODO(allenl): Allow tf.keras.Model instances in the constructor directly? @tf_export(""train.CheckpointManager"") class CheckpointManager(object): - """"""Deletes old checkpoints. + """"""Manages multiple checkpoints by keeping some and deleting unneeded ones. Example usage: ",0,train b438b8de6af3572fbdce6ea4097d9e7935b55f30,tensorflow/tensorflow,"mlir-hlo-opt: set preloadDialectsInContext to false. This requires specifying dependent dialects in several passes. PiperOrigin-RevId: 365758084 Change-Id: If32ab0b12173c0a5e7706e9c7a0388de513c59c8",hlo_legalize_to_lhlo.cc,"@@ -20,6 +20,7 @@ limitations under the License. #include ""mlir-hlo/Dialect/mhlo/transforms/map_hlo_to_lhlo_op.h"" #include ""mlir-hlo/Dialect/mhlo/transforms/passes.h"" #include ""mlir-hlo/Dialect/mhlo/transforms/rewriters.h"" +#include ""mlir/Dialect/MemRef/IR/MemRef.h"" #include ""mlir/Dialect/Shape/IR/Shape.h"" #include ""mlir/Dialect/Shape/Transforms/Passes.h"" #include ""mlir/Dialect/StandardOps/IR/Ops.h"" @@ -564,7 +565,8 @@ class HloToLhloTensorStoreOpLegacyConverter struct HloLegalizeToLhlo : public PassWrapper> { void getDependentDialects(DialectRegistry& registry) const override { - registry.insert(); + registry.insert(); } public: ",0,train b438b8de6af3572fbdce6ea4097d9e7935b55f30,tensorflow/tensorflow,"mlir-hlo-opt: set preloadDialectsInContext to false. This requires specifying dependent dialects in several passes. PiperOrigin-RevId: 365758084 Change-Id: If32ab0b12173c0a5e7706e9c7a0388de513c59c8",legalize_to_linalg.cc,"@@ -27,6 +27,7 @@ limitations under the License. #include ""mlir/Dialect/Linalg/IR/LinalgOps.h"" #include ""mlir/Dialect/Linalg/IR/LinalgTypes.h"" #include ""mlir/Dialect/Math/IR/Math.h"" +#include ""mlir/Dialect/MemRef/IR/MemRef.h"" #include ""mlir/Dialect/SCF/SCF.h"" #include ""mlir/Dialect/StandardOps/IR/Ops.h"" #include ""mlir/Dialect/Tensor/IR/Tensor.h"" @@ -1965,7 +1966,9 @@ void populateLHLOToLinalgConversionPattern(MLIRContext* context, struct LhloLegalizeToLinalgPass : public PassWrapper { void getDependentDialects(DialectRegistry& registry) const override { - registry.insert(); + registry + .insert(); } void runOnFunction() override { @@ -1986,8 +1989,9 @@ struct LhloLegalizeToLinalgPass struct HloLegalizeToLinalgPass : public PassWrapper { void getDependentDialects(DialectRegistry& registry) const override { - registry.insert(); + registry + .insert(); } void runOnFunction() override { ",0,train b438b8de6af3572fbdce6ea4097d9e7935b55f30,tensorflow/tensorflow,"mlir-hlo-opt: set preloadDialectsInContext to false. This requires specifying dependent dialects in several passes. PiperOrigin-RevId: 365758084 Change-Id: If32ab0b12173c0a5e7706e9c7a0388de513c59c8",lhlo_legalize_to_gpu.cc,"@@ -24,6 +24,7 @@ limitations under the License. #include ""mlir/Dialect/GPU/GPUDialect.h"" #include ""mlir/Dialect/Linalg/IR/LinalgOps.h"" #include ""mlir/Dialect/Linalg/IR/LinalgTypes.h"" +#include ""mlir/Dialect/MemRef/IR/MemRef.h"" #include ""mlir/Dialect/SCF/SCF.h"" #include ""mlir/Dialect/StandardOps/IR/Ops.h"" #include ""mlir/IR/Attributes.h"" @@ -174,7 +175,7 @@ struct LhloLegalizeToGpuPass : public PassWrapper { void getDependentDialects(DialectRegistry& registry) const override { registry.insert(); + memref::MemRefDialect, scf::SCFDialect>(); } void runOnFunction() override { ",0,train b438b8de6af3572fbdce6ea4097d9e7935b55f30,tensorflow/tensorflow,"mlir-hlo-opt: set preloadDialectsInContext to false. This requires specifying dependent dialects in several passes. PiperOrigin-RevId: 365758084 Change-Id: If32ab0b12173c0a5e7706e9c7a0388de513c59c8",test_infer_shaped_type_pass.cc,"@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include ""mlir/Dialect/Shape/IR/Shape.h"" #include ""mlir/IR/Attributes.h"" #include ""mlir/IR/Identifier.h"" #include ""mlir/IR/MLIRContext.h"" @@ -83,6 +84,9 @@ struct ReifyReturnTypeShapesPattern : public RewritePattern { struct TestInferShapedTypeMethodsPass : public PassWrapper { + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } void runOnFunction() override { OwningRewritePatternList patterns(&getContext()); patterns.insert(&getContext()); ",0,train b438b8de6af3572fbdce6ea4097d9e7935b55f30,tensorflow/tensorflow,"mlir-hlo-opt: set preloadDialectsInContext to false. This requires specifying dependent dialects in several passes. PiperOrigin-RevId: 365758084 Change-Id: If32ab0b12173c0a5e7706e9c7a0388de513c59c8",transform_unranked_hlo.cc,"@@ -528,7 +528,7 @@ struct ConvertUnrankedDynamicBroadcastSelectOp struct TransformUnrankedHloPass : public PassWrapper { void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); } ",0,train b438b8de6af3572fbdce6ea4097d9e7935b55f30,tensorflow/tensorflow,"mlir-hlo-opt: set preloadDialectsInContext to false. This requires specifying dependent dialects in several passes. PiperOrigin-RevId: 365758084 Change-Id: If32ab0b12173c0a5e7706e9c7a0388de513c59c8",unfuse_batch_norm_pass.cc,"@@ -15,7 +15,9 @@ limitations under the License. #include ""mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"" #include ""mlir-hlo/Dialect/mhlo/transforms/rewriters.h"" +#include ""mlir/Dialect/MemRef/IR/MemRef.h"" #include ""mlir/Dialect/StandardOps/IR/Ops.h"" +#include ""mlir/IR/Dialect.h"" #include ""mlir/IR/MLIRContext.h"" #include ""mlir/IR/Operation.h"" #include ""mlir/Pass/Pass.h"" @@ -29,6 +31,9 @@ namespace { struct TestUnfuseBatchNormPass : public PassWrapper> { + void getDependentDialects(DialectRegistry& registry) const override { + registry.insert(); + } void runOnOperation() override { OwningRewritePatternList patterns(&getContext()); PopulateUnfuseBatchNormPatterns(&getContext(), &patterns); ",0,train 0b87efcbae8cde976a302415c6df2189958e7a8b,tensorflow/tensorflow,"Add a command line parameter to toco to change the way toco rescales input and output tensors. PiperOrigin-RevId: 191825756",args.h,"@@ -202,6 +202,7 @@ struct ParsedModelFlags { Arg input_shape; Arg rnn_states; Arg model_checks; + Arg change_concat_input_ranges = Arg(true); // Debugging output options. // TODO(benoitjacob): these shouldn't be ModelFlags. Arg graphviz_first_array; ",0,test 0b87efcbae8cde976a302415c6df2189958e7a8b,tensorflow/tensorflow,"Add a command line parameter to toco to change the way toco rescales input and output tensors. PiperOrigin-RevId: 191825756",hardcode_min_max.cc,"@@ -95,30 +95,37 @@ bool HardcodeMinMaxForConcatenation(Model* model, Operator* op) { overall_minmax.min = overall_min; overall_minmax.max = overall_max; bool changed = false; - for (const auto& input : op->inputs) { - auto& array = model->GetArray(input); - if (!array.minmax) { - changed = true; - } else if (!(overall_minmax == array.GetMinMax())) { - changed = true; - LOG(WARNING) - << ""Tweaking the MinMax of array "" << input << "", which is "" - << ""an input to "" << LogName(*op) << "", because we want all inputs "" - << ""and outputs of a Concatenation operator to have the same MinMax "" - << ""so that it can be implemented as a pure byte-copy, no "" - ""arithmetic.""; + if (model->flags.change_concat_input_ranges()) { + for (const auto& input : op->inputs) { + auto& array = model->GetArray(input); + if (!array.minmax) { + changed = true; + } else if (!(overall_minmax == array.GetMinMax())) { + changed = true; + LOG(WARNING) + << ""Tweaking the MinMax of array "" << input << "", which is "" + << ""an input to "" << LogName(*op) << "", because we want all inputs "" + << ""and outputs of a Concatenation operator to have the same "" + << ""MinMax so that it can be implemented as a pure byte-copy, no "" + ""arithmetic.""; + } + array.GetOrCreateMinMax() = overall_minmax; } - array.GetOrCreateMinMax() = overall_minmax; } if (!output.minmax) { changed = true; } else if (!(overall_minmax == output.GetMinMax())) { - changed = true; - LOG(WARNING) - << ""Tweaking the MinMax of the output array of "" << LogName(*op) - << "", because we want all inputs "" - << ""and outputs of a Concatenation operator to have the same MinMax "" - << ""so that it can be implemented as a pure byte-copy, no arithmetic.""; + if (model->flags.change_concat_input_ranges()) { + changed = true; + LOG(WARNING) + << ""Tweaking the MinMax of the output array of "" << LogName(*op) + << "", because we want all inputs "" + << ""and outputs of a Concatenation operator to have the same MinMax "" + << ""so that it can be implemented as a pure byte-copy, no "" + << ""arithmetic.""; + } else { + return false; + } } output.GetOrCreateMinMax() = overall_minmax; ",0,test 0b87efcbae8cde976a302415c6df2189958e7a8b,tensorflow/tensorflow,"Add a command line parameter to toco to change the way toco rescales input and output tensors. PiperOrigin-RevId: 191825756",quantize.cc,"@@ -431,7 +431,8 @@ bool ChooseQuantizationForOperatorOutput( (op.type == OperatorType::kSpaceToDepth) || (op.type == OperatorType::kTensorFlowReshape) || (op.type == OperatorType::kTensorFlowSplit) || - (op.type == OperatorType::kConcatenation)) { + (op.type == OperatorType::kConcatenation && + model->flags.change_concat_input_ranges())) { int data_input_index = 0; if (op.type == OperatorType::kTensorFlowSplit) { data_input_index = 1; ",0,test 0b87efcbae8cde976a302415c6df2189958e7a8b,tensorflow/tensorflow,"Add a command line parameter to toco to change the way toco rescales input and output tensors. PiperOrigin-RevId: 191825756",model_cmdline_flags.cc,"@@ -165,6 +165,11 @@ bool ParseModelFlagsFromCommandLineFlags( ""Path to an optional file containing a serialized ModelFlags proto. "" ""Options specified on the command line will override the values in "" ""the proto.""), + Flag(""change_concat_input_ranges"", + parsed_flags.change_concat_input_ranges.bind(), + parsed_flags.change_concat_input_ranges.default_value(), + ""Boolean to change the behavior of min/max ranges for inputs and"" + "" output of the concat operators.""), }; bool asked_for_help = *argc == 2 && (!strcmp(argv[1], ""--help"") || !strcmp(argv[1], ""-help"")); @@ -399,6 +404,8 @@ void ReadModelFlagsFromCommandLineFlags( parsed_model_flags.allow_nonascii_arrays.value()); model_flags->set_allow_nonexistent_arrays( parsed_model_flags.allow_nonexistent_arrays.value()); + model_flags->set_change_concat_input_ranges( + parsed_model_flags.change_concat_input_ranges.value()); if (parsed_model_flags.arrays_extra_info_file.specified()) { string arrays_extra_info_file_contents; ",0,test 0b87efcbae8cde976a302415c6df2189958e7a8b,tensorflow/tensorflow,"Add a command line parameter to toco to change the way toco rescales input and output tensors. PiperOrigin-RevId: 191825756",tooling_util.cc,"@@ -1413,7 +1413,8 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) { CHECK(input_array.shape().dims_size()); } } - + model->flags.set_change_concat_input_ranges( + model_flags.change_concat_input_ranges()); model->flags.set_allow_nonascii_arrays(model_flags.allow_nonascii_arrays()); model->flags.set_allow_nonexistent_arrays( model_flags.allow_nonexistent_arrays()); ",0,test 8d7447799904c2ac16a99154519f80d979eab0eb,tensorflow/tensorflow,"Refactor kernel thunk's launch dimension setting - part 6/8. Move SetThunkLaunchDimensions() to right after KernelThunk construction. Launch dimension will be passed to KernelThunk's constructor as a parameter. PiperOrigin-RevId: 386347660 Change-Id: I560d7f695c4e50a54a156584c4082a1cd73a5e14",ir_emitter_unnested.cc,"@@ -1853,9 +1853,20 @@ Status IrEmitterUnnested::EmitFusion(mlir::Operation* op) { // emit it in a separate kernel. Treat it like a loop fusion, writing to // the output buffer. { + auto unroll_factor = + ComputeMaxUnrollFactor(fusion_op, hlo_module_config_); + const Shape& element_shape = root->shape(); + TF_ASSIGN_OR_RETURN( + LaunchDimensions launch_dimensions, + CalculateLaunchDimensions(element_shape, + ir_emitter_context_->gpu_device_info(), + {unroll_factor, /*few_waves=*/false})); + std::vector ir_arrays; TF_ASSIGN_OR_RETURN(auto operand_thunk, BuildKernelThunk(op, Thunk::ThunkInfo(), &ir_arrays)); + SetThunkLaunchDimensions(launch_dimensions, operand_thunk.get(), + ir_emitter_context_->llvm_module()); thunks.push_back(std::move(operand_thunk)); GpuElementalIrEmitter operand_elemental_emitter( @@ -1874,16 +1885,6 @@ Status IrEmitterUnnested::EmitFusion(mlir::Operation* op) { TF_ASSIGN_OR_RETURN(auto generator, operand_fused_emitter.GetGenerator(root->operand(0))); - auto unroll_factor = - ComputeMaxUnrollFactor(fusion_op, hlo_module_config_); - const Shape& element_shape = root->shape(); - TF_ASSIGN_OR_RETURN( - LaunchDimensions launch_dimensions, - CalculateLaunchDimensions(element_shape, - ir_emitter_context_->gpu_device_info(), - {unroll_factor, /*few_waves=*/false})); - SetThunkLaunchDimensions(launch_dimensions, thunks.back().get(), - ir_emitter_context_->llvm_module()); TF_RETURN_IF_ERROR( ParallelLoopEmitter(generator, ir_arrays.back(), launch_dimensions, &b_, {unroll_factor}) @@ -1968,37 +1969,33 @@ Status IrEmitterUnnested::EmitFusion(mlir::Operation* op) { // touching the un-updated elements. CHECK_EQ(1, GetHloOutputs(op).size()); - // Set up kernel thunk and fused ir emitter. - std::vector ir_arrays; - TF_ASSIGN_OR_RETURN( - auto fusion_thunk, - BuildKernelThunk(fusion_op, GetThunkInfo(op), &ir_arrays)); - TF_ASSIGN_OR_RETURN( const HloComputation* fused_computation, GetOrCreateSubComputationFromRegion(&fusion_op.region(), /*is_fusion=*/true)); - GpuElementalIrEmitter elemental_emitter(hlo_module_config_, - ir_emitter_context_->llvm_module(), - &b_, GetNestedComputer()); - // Shape of the dynamic-update-slice's ""update"" operand. Shape update_shape = fused_computation->root_instruction()->operand(1)->shape(); - // Array to write into. Because this is an in-place operation, this is the - // same as operand 0's array. - const IrArray& output_array = ir_arrays.back(); - TF_ASSIGN_OR_RETURN( LaunchDimensions launch_dimensions, CalculateLaunchDimensions(update_shape, ir_emitter_context_->gpu_device_info())); + + // Set up kernel thunk and fused ir emitter. + std::vector ir_arrays; + TF_ASSIGN_OR_RETURN( + auto fusion_thunk, + BuildKernelThunk(fusion_op, GetThunkInfo(op), &ir_arrays)); SetThunkLaunchDimensions(launch_dimensions, fusion_thunk.get(), ir_emitter_context_->llvm_module()); AddThunkToThunkSequence(std::move(fusion_thunk)); + GpuElementalIrEmitter elemental_emitter(hlo_module_config_, + ir_emitter_context_->llvm_module(), + &b_, GetNestedComputer()); + FusedIrEmitter fused_emitter(&elemental_emitter); for (int i = 0; i < fused_computation->num_parameters(); i++) { @@ -2011,6 +2008,10 @@ Status IrEmitterUnnested::EmitFusion(mlir::Operation* op) { }); } + // Array to write into. Because this is an in-place operation, this is the + // same as operand 0's array. + const IrArray& output_array = ir_arrays.back(); + return llvm_ir::EmitParallelFusedDynamicUpdateSliceInPlace( fused_computation, output_array, &fused_emitter, launch_dimensions, &b_); ",0,train 388f9fa67d1f3300f25b491e69ece14f1299997d,tensorflow/tensorflow,"Tweak, add some examples and comparisons with alternatives to the docstring for tf.nn.sigmoid_cross_entropy_with_logits. I've seen some confusion about the wording, which previously made it seem like this symbol was not appropriate for binary classification with mutually exclusive classes. PiperOrigin-RevId: 363897491 Change-Id: I8b65b6a225320c12e4dee6fb3ef1e2ed0e8a6a02",nn_impl.py,"@@ -117,48 +117,7 @@ def sigmoid_cross_entropy_with_logits( # pylint: disable=invalid-name labels=None, logits=None, name=None): - """"""Computes sigmoid cross entropy given `logits`. - - Measures the probability error in discrete classification tasks in which each - class is independent and not mutually exclusive. For instance, one could - perform multilabel classification where a picture can contain both an elephant - and a dog at the same time. - - For brevity, let `x = logits`, `z = labels`. The logistic loss is - - z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) - = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x))) - = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x))) - = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x)) - = (1 - z) * x + log(1 + exp(-x)) - = x - x * z + log(1 + exp(-x)) - - For x < 0, to avoid overflow in exp(-x), we reformulate the above - - x - x * z + log(1 + exp(-x)) - = log(exp(x)) - x * z + log(1 + exp(-x)) - = - x * z + log(1 + exp(x)) - - Hence, to ensure stability and avoid overflow, the implementation uses this - equivalent formulation - - max(x, 0) - x * z + log(1 + exp(-abs(x))) - - `logits` and `labels` must have the same type and shape. - - Args: - _sentinel: Used to prevent positional parameters. Internal, do not use. - labels: A `Tensor` of the same type and shape as `logits`. - logits: A `Tensor` of type `float32` or `float64`. - name: A name for the operation (optional). - - Returns: - A `Tensor` of the same shape as `logits` with the componentwise - logistic losses. - - Raises: - ValueError: If `logits` and `labels` do not have the same shape. - """""" + """"""See sigmoid_cross_entropy_with_logits_v2."""""" # pylint: disable=protected-access nn_ops._ensure_xent_args(""sigmoid_cross_entropy_with_logits"", _sentinel, labels, logits) @@ -199,12 +158,13 @@ def sigmoid_cross_entropy_with_logits_v2( # pylint: disable=invalid-name labels=None, logits=None, name=None): - """"""Computes sigmoid cross entropy given `logits`. + r""""""Computes sigmoid cross entropy given `logits`. - Measures the probability error in discrete classification tasks in which each - class is independent and not mutually exclusive. For instance, one could - perform multilabel classification where a picture can contain both an elephant - and a dog at the same time. + Measures the probability error in tasks with two outcomes in which each + outcome is independent and need not have a fully certain label. For instance, + one could perform a regression where the probability of an event happening is + known and used as a label. This loss may also be used for binary + classification, where labels are either zero or one. For brevity, let `x = logits`, `z = labels`. The logistic loss is @@ -228,9 +188,51 @@ def sigmoid_cross_entropy_with_logits_v2( # pylint: disable=invalid-name `logits` and `labels` must have the same type and shape. + >>> logits = tf.constant([1., -1., 0., 1., -1., 0., 0.]) + >>> labels = tf.constant([0., 0., 0., 1., 1., 1., 0.5]) + >>> tf.nn.sigmoid_cross_entropy_with_logits( + ... labels=labels, logits=logits).numpy() + array([1.3132617, 0.3132617, 0.6931472, 0.3132617, 1.3132617, 0.6931472, + 0.6931472], dtype=float32) + + Compared to the losses which handle multiple outcomes, + `tf.nn.softmax_cross_entropy_with_logits` for general multi-class + classification and `tf.nn.sparse_softmax_cross_entropy_with_logits` for more + efficient multi-class classification with hard labels, + `sigmoid_cross_entropy_with_logits` is a slight simplification for binary + classification: + + sigmoid(x) = softmax([x, 0])[0] + + $$\frac{1}{1 + e^{-x}} = \frac{e^x}{e^x + e^0}$$ + + While `sigmoid_cross_entropy_with_logits` works for soft binary labels + (probabilities between 0 and 1), it can also be used for binary classification + where the labels are hard. There is an equivalence between all three symbols + in this case, with a probability 0 indicating the second class or 1 indicating + the first class: + + >>> sigmoid_logits = tf.constant([1., -1., 0.]) + >>> softmax_logits = tf.stack([sigmoid_logits, tf.zeros_like(sigmoid_logits)], + ... axis=-1) + >>> soft_binary_labels = tf.constant([1., 1., 0.]) + >>> soft_multiclass_labels = tf.stack( + ... [soft_binary_labels, 1. - soft_binary_labels], axis=-1) + >>> hard_labels = tf.constant([0, 0, 1]) + >>> tf.nn.sparse_softmax_cross_entropy_with_logits( + ... labels=hard_labels, logits=softmax_logits).numpy() + array([0.31326166, 1.3132616 , 0.6931472 ], dtype=float32) + >>> tf.nn.softmax_cross_entropy_with_logits( + ... labels=soft_multiclass_labels, logits=softmax_logits).numpy() + array([0.31326166, 1.3132616, 0.6931472], dtype=float32) + >>> tf.nn.sigmoid_cross_entropy_with_logits( + ... labels=soft_binary_labels, logits=sigmoid_logits).numpy() + array([0.31326166, 1.3132616, 0.6931472], dtype=float32) + Args: - labels: A `Tensor` of the same type and shape as `logits`. - logits: A `Tensor` of type `float32` or `float64`. + labels: A `Tensor` of the same type and shape as `logits`. Between 0 and 1, + inclusive. + logits: A `Tensor` of type `float32` or `float64`. Any real number. name: A name for the operation (optional). Returns: @@ -244,6 +246,10 @@ def sigmoid_cross_entropy_with_logits_v2( # pylint: disable=invalid-name logits=logits, labels=labels, name=name) +sigmoid_cross_entropy_with_logits.__doc__ = ( + sigmoid_cross_entropy_with_logits_v2.__doc__) + + @tf_export(""nn.weighted_cross_entropy_with_logits"", v1=[]) @dispatch.add_dispatch_support def weighted_cross_entropy_with_logits_v2(labels, logits, pos_weight, ",0,train fcfef1062556eb199faa2c2759aa93d15e6bfa3f,tensorflow/tensorflow,"[tf.data] Graduate the experiment `enable_bufferio_v2` and make it default for all tf.data input pipelines. PiperOrigin-RevId: 425725025 Change-Id: Id37818f2bbea416d9672302b7594ea3964cd4e29",dataset_utils.cc,"@@ -890,7 +890,6 @@ absl::flat_hash_map DatasetExperimentRegistry::Experiments() { namespace { REGISTER_DATASET_EXPERIMENT(""initial_parallelism_value"", 50); -REGISTER_DATASET_EXPERIMENT(""enable_bufferedio_v2"", 100); REGISTER_DATASET_EXPERIMENT(""inject_prefetch"", 100); REGISTER_DATASET_EXPERIMENT(""max_parallelism"", 100); REGISTER_DATASET_EXPERIMENT(""max_parallelism_v2"", 100); ",0,train 1aa8056e084494f706fb492013cf328212e782ee,tensorflow/tensorflow,"Update def_function_xla_jit_test.py Use assertEqual instead of assertTrue",def_function_xla_jit_test.py,"@@ -112,8 +112,8 @@ class DefFunctionTest(xla_test.XLATestCase): # Check that the must-compile attribute gets correctly propagated to the # created derivatives. - self.assertTrue(backward.function_def.attr['_XlaMustCompile']) - self.assertTrue(forward.definition.attr['_XlaMustCompile']) + self.assertEqual(backward.function_def.attr['_XlaMustCompile']) + self.assertEqual(forward.definition.attr['_XlaMustCompile']) # Calling function with jit_compile=True from # jit_compile=False should compile the inner func. @@ -1179,7 +1179,7 @@ class DefFunctionTest(xla_test.XLATestCase): stage='hlo') # Test that reduction occurs only once. - self.assertTrue(hlo.count('reduce'), 1) + self.assertEqual(hlo.count('reduce'), 1) if __name__ == '__main__': ",0,test b2a4a7da1a07830bfd4618603637520e8a26bd3f,tensorflow/tensorflow,"Java: Avoid some compiler and deprecation warnings Change: 149554731",exception_jni.h,"@@ -22,7 +22,7 @@ limitations under the License. extern ""C"" { #endif -class TF_Status; +struct TF_Status; extern const char kIllegalArgumentException[]; extern const char kIllegalStateException[]; ",0,train a1a3b0c6c3abe0df0c8e017d9f134db4731484b9,tensorflow/tensorflow,"Android: show inference stats on debug screen in demo (accessed with volume keys). Change: 143149923",tensorflow_inference_jni.cc,"@@ -53,6 +53,9 @@ struct SessionVariables { int num_runs = 0; int64 timing_total_us = 0; + bool log_stats = false; + StatSummarizer* summarizer = nullptr; + InputMap input_tensors; std::vector output_tensor_names; std::vector output_tensors; @@ -129,6 +132,10 @@ JNIEXPORT jint JNICALL TENSORFLOW_METHOD(initializeTensorFlow)( LOG(INFO) << ""GraphDef loaded from "" << model_str << "" with "" << tensorflow_graph.node_size() << "" nodes.""; + // Whether or not stat logging is currently enabled, the StatSummarizer must + // be initialized here with the GraphDef while it is available. + vars->summarizer = new StatSummarizer(tensorflow_graph); + LOG(INFO) << ""Creating TensorFlow graph from GraphDef.""; tensorflow::Status s = session->Create(tensorflow_graph); @@ -193,8 +200,28 @@ JNIEXPORT jint JNICALL TENSORFLOW_METHOD(runInference)( } vars->output_tensors.clear(); - s = vars->session->Run(input_tensors, vars->output_tensor_names, {}, - &(vars->output_tensors)); + + if (vars->log_stats) { + RunOptions run_options; + run_options.set_trace_level(RunOptions::FULL_TRACE); + RunMetadata run_metadata; + + s = vars->session->Run(run_options, input_tensors, + vars->output_tensor_names, {}, + &(vars->output_tensors), &run_metadata); + + assert(run_metadata.has_step_stats()); + const StepStats& step_stats = run_metadata.step_stats(); + vars->summarizer->ProcessStepStats(step_stats); + + // Print the full output string, not just the abbreviated one returned by + // getStatString(). + vars->summarizer->PrintStepStats(); + } else { + s = vars->session->Run(input_tensors, vars->output_tensor_names, {}, + &(vars->output_tensors)); + } + end_time = CurrentWallTimeUs(); const int64 elapsed_time_inf = end_time - start_time; vars->timing_total_us += elapsed_time_inf; @@ -208,6 +235,24 @@ JNIEXPORT jint JNICALL TENSORFLOW_METHOD(runInference)( return s.code(); } +JNIEXPORT void JNICALL TENSORFLOW_METHOD(enableStatLogging)( + JNIEnv* env, jobject thiz, jboolean enableStatLogging) { + SessionVariables* vars = GetSessionVars(env, thiz); + vars->log_stats = enableStatLogging; +} + +JNIEXPORT jstring JNICALL TENSORFLOW_METHOD(getStatString)(JNIEnv* env, + jobject thiz) { + // Return an abbreviated stat string suitable for displaying on screen. + SessionVariables* vars = GetSessionVars(env, thiz); + std::stringstream ss; + ss << vars->summarizer->GetStatsByMetric(""Top 10 CPU"", + StatSummarizer::BY_TIME, 10); + ss << vars->summarizer->GetStatsByNodeType(); + ss << vars->summarizer->ShortSummary(); + return env->NewStringUTF(ss.str().c_str()); +} + JNIEXPORT jint JNICALL TENSORFLOW_METHOD(close)(JNIEnv* env, jobject thiz) { SessionVariables* vars = GetSessionVars(env, thiz); @@ -216,6 +261,8 @@ JNIEXPORT jint JNICALL TENSORFLOW_METHOD(close)(JNIEnv* env, jobject thiz) { LOG(ERROR) << ""Error closing session: "" << s; } + delete vars->summarizer; + mutex_lock l(mutex_); std::map& sessions = *GetSessionsSingleton(); sessions.erase(vars->id); ",0,train a1a3b0c6c3abe0df0c8e017d9f134db4731484b9,tensorflow/tensorflow,"Android: show inference stats on debug screen in demo (accessed with volume keys). Change: 143149923",tensorflow_inference_jni.h,"@@ -48,6 +48,12 @@ JNIEXPORT jint JNICALL TENSORFLOW_METHOD(initializeTensorFlow)( JNIEXPORT jint JNICALL TENSORFLOW_METHOD(runInference)( JNIEnv* env, jobject thiz, jobjectArray output_name_strings); +JNIEXPORT void JNICALL TENSORFLOW_METHOD(enableStatLogging)( + JNIEnv* env, jobject thiz, jboolean enableStatLogging); + +JNIEXPORT jstring JNICALL TENSORFLOW_METHOD(getStatString)(JNIEnv* env, + jobject thiz); + JNIEXPORT jint JNICALL TENSORFLOW_METHOD(close)(JNIEnv* env, jobject thiz); FILL_NODE_SIGNATURE(Float, float); ",0,train a1a3b0c6c3abe0df0c8e017d9f134db4731484b9,tensorflow/tensorflow,"Android: show inference stats on debug screen in demo (accessed with volume keys). Change: 143149923",stat_summarizer.h,"@@ -113,6 +113,15 @@ class Stat { // See tensorflow/examples/android/jni/tensorflow_jni.cc for an example usage. class StatSummarizer { public: + enum SortingMetric { + BY_NAME, + BY_DEFINITION_ORDER, + BY_RUN_ORDER, + BY_TIME, + BY_MEMORY, + BY_TYPE, + }; + explicit StatSummarizer(const tensorflow::GraphDef& tensorflow_graph); // Adds another run's StepStats output to the aggregate counts. @@ -122,6 +131,8 @@ class StatSummarizer { // format which can be pasted into a spreadsheet for further analysis. std::string GetOutputString() const; + std::string ShortSummary() const; + // Prints the string returned by GetOutputString(). void PrintStepStats() const; @@ -130,6 +141,10 @@ class StatSummarizer { std::string GetStatsByNodeType() const; + std::string GetStatsByMetric(const string& title, + SortingMetric sorting_metric, + int num_stats) const; + void Reset() { run_total_us_.Reset(); memory_.Reset(); @@ -153,31 +168,16 @@ class StatSummarizer { std::vector outputs; }; - enum SortingMetric { - BY_NAME, - BY_DEFINITION_ORDER, - BY_RUN_ORDER, - BY_TIME, - BY_MEMORY, - BY_TYPE, - }; - void Validate(const Detail* detail, const NodeExecStats& ns) const; void OrderNodesByMetric(SortingMetric sorting_metric, std::vector* details) const; - std::string GetStatsByMetric(const string& title, - SortingMetric sorting_metric, - int num_stats) const; - std::string HeaderString(const string& title) const; std::string ColumnString(const Detail& detail, const int64 cumulative_stat_on_node, const Stat& stat) const; - std::string ShortSummary() const; - Stat run_total_us_; Stat memory_; ",0,train 0e52356c86cb6cdc500592124e9a21f1556934bf,tensorflow/tensorflow,"Add derived type attributes for TensorFlow ops generated by TableGen Motivation for this change is to remove redundant TF type attributes for TensorFlow ops. For example, tf$T: ""tfdtype$DT_FLOAT"". Type attributes can be derived using the MLIR operand or result MLIR types, attribute names and their mapping. This will also allow constant folding of instructions generated within MLIR (and not imported from TensorFlow) without adding type attributes for the instruction. Derived attributes are populated while exporting MLIR to TF GraphDef using auto-generated populators. Populators are only available for the ops that are generated by the TableGen. Also, fixed Operator::getNumArgs method to exclude derived attributes as they are not part of the arguments. TESTED with unit test PiperOrigin-RevId: 232531561",Operator.h,"@@ -29,6 +29,7 @@ #include ""llvm/ADT/PointerUnion.h"" #include ""llvm/ADT/SmallVector.h"" #include ""llvm/ADT/StringRef.h"" +#include ""llvm/Support/SMLoc.h"" namespace llvm { class CodeInit; @@ -54,6 +55,9 @@ public: // Returns the TableGen definition name split around '_'. const SmallVectorImpl &getSplitDefName() const; + // Returns dialect name of the op. + StringRef getDialectName() const; + // Returns the C++ class name of the op. StringRef getCppClassName() const; @@ -69,15 +73,16 @@ public: StringRef getResultName(int index) const; // Op attribute interators. - using attribute_iterator = NamedAttribute *; - attribute_iterator attribute_begin(); - attribute_iterator attribute_end(); - llvm::iterator_range getAttributes(); + using attribute_iterator = const NamedAttribute *; + attribute_iterator attribute_begin() const; + attribute_iterator attribute_end() const; + llvm::iterator_range getAttributes() const; // Op attribute accessors. int getNumAttributes() const { return attributes.size(); } // Returns the total number of native attributes. int getNumNativeAttributes() const; + int getNumDerivedAttributes() const; NamedAttribute &getAttribute(int index) { return attributes[index]; } const NamedAttribute &getAttribute(int index) const; @@ -96,7 +101,9 @@ public: Argument getArg(int index); StringRef getArgName(int index) const; // Returns the total number of arguments. - int getNumArgs() const { return operands.size() + attributes.size(); } + int getNumArgs() const { return getNumOperands() + getNumNativeAttributes(); } + + ArrayRef getLoc() const; // Query functions for the documentation of the operator. bool hasDescription() const; ",0,train 8b2dfadb82db849fa6f61879c994ed084612a7d6,tensorflow/tensorflow,"Log all the model's metrics with the SidecarEvaluator, not only the compiled metrics. PiperOrigin-RevId: 353804173 Change-Id: Icf46f0063a5327fd70c1571c2f617e78ff5ffc69",sidecar_evaluator.py,"@@ -211,14 +211,16 @@ class SidecarEvaluator(object): # TODO(rchao): Support arbitrary callback for extensibility. self.model.evaluate(self.data, steps=self.steps) - logging.info('End of evaluation. Accuracy: %r', [ - metric.result().numpy() - for metric in self.model.compiled_metrics.metrics - ]) + logging.info( + 'End of evaluation. Metrics: %s', ' '.join([ + '{}={}'.format(metric.name, + metric.result().numpy()) + for metric in self.model.metrics + ])) if self._summary_writer: with summary_ops_v2.record_if(True), self._summary_writer.as_default(): - for metric in self.model.compiled_metrics.metrics: + for metric in self.model.metrics: summary_ops_v2.scalar( metric.name, metric.result(), ",0,train 8b2dfadb82db849fa6f61879c994ed084612a7d6,tensorflow/tensorflow,"Log all the model's metrics with the SidecarEvaluator, not only the compiled metrics. PiperOrigin-RevId: 353804173 Change-Id: Icf46f0063a5327fd70c1571c2f617e78ff5ffc69",sidecar_evaluator_test.py,"@@ -58,12 +58,14 @@ class SidecarEvaluatorTest(test.TestCase): # Asserts the content of the summary file. event_pb_written = False + event_tags = [] for event_pb in summary_iterator.summary_iterator( os.path.join(log_dir, summary_files[0])): if event_pb.step > 0: self.assertEqual(event_pb.step, 32) - self.assertEqual(event_pb.summary.value[0].tag, 'categorical_accuracy') + event_tags.append(event_pb.summary.value[0].tag) event_pb_written = True + self.assertCountEqual(event_tags, ['categorical_accuracy', 'loss']) # Verifying at least one non-zeroth step is written to summary. self.assertTrue(event_pb_written) ",0,train e06673958eb2c37a263de5dfdf7b487439f427d4,tensorflow/tensorflow,"[lite] Update Graph partitioning to handle control dependency. Add has_side_effect to TfLiteNode which will be set during InterpreterBuilder. Currently this adds control dependency between stateful ops. stateful ops are determined to be any op that have input/output of type resource and selected subset for some ops. PiperOrigin-RevId: 367756651 Change-Id: Ib8cc6c875dbeafd8cd8471aed1051613a9cc8914",common.h,"@@ -456,8 +456,8 @@ typedef struct TfLiteTensor { } TfLiteTensor; // A structure representing an instance of a node. -// This structure only exhibits the inputs, outputs and user defined data, not -// other features like the type. +// This structure only exhibits the inputs, outputs, user defined data and some +// node properties (like statefulness), not other features like the type. typedef struct TfLiteNode { // Inputs to this node expressed as indices into the simulator's tensors. TfLiteIntArray* inputs; @@ -490,6 +490,9 @@ typedef struct TfLiteNode { // created by calling `interpreter.ModifyGraphWithDelegate`. // WARNING: This is an experimental interface that is subject to change. struct TfLiteDelegate* delegate; + + // Whether this op might have side effect (e.g. stateful op). + bool might_have_side_effect; } TfLiteNode; #else // defined(TF_LITE_STATIC_MEMORY)? // NOTE: This flag is opt-in only at compile time. ",0,train e06673958eb2c37a263de5dfdf7b487439f427d4,tensorflow/tensorflow,"[lite] Update Graph partitioning to handle control dependency. Add has_side_effect to TfLiteNode which will be set during InterpreterBuilder. Currently this adds control dependency between stateful ops. stateful ops are determined to be any op that have input/output of type resource and selected subset for some ops. PiperOrigin-RevId: 367756651 Change-Id: Ib8cc6c875dbeafd8cd8471aed1051613a9cc8914",subgraph.cc,"@@ -30,6 +30,7 @@ limitations under the License. #include ""tensorflow/lite/allocation.h"" #include ""tensorflow/lite/arena_planner.h"" #include ""tensorflow/lite/builtin_ops.h"" +#include ""tensorflow/lite/c/c_api_types.h"" #include ""tensorflow/lite/c/common.h"" #include ""tensorflow/lite/context_util.h"" #include ""tensorflow/lite/core/api/error_reporter.h"" @@ -786,6 +787,7 @@ TfLiteStatus Subgraph::AddNodeWithParameters( node.custom_initial_data = nullptr; node.custom_initial_data_size = 0; } + node.might_have_side_effect = OpMightHaveSideEffect(&node, registration); node.delegate = nullptr; // Copying of registration is required to support unresolved custom ops. @@ -794,6 +796,37 @@ TfLiteStatus Subgraph::AddNodeWithParameters( return kTfLiteOk; } +namespace { +// Returns true if any tensor identified by indexes in 'tensor_indexes' is +// of type 'kTfLiteResource'. False otherwise. +bool AnyTensorOfTypeResource(const std::vector& tensors, + const TfLiteIntArray* tensor_indexes) { + for (int i = 0; i < tensor_indexes->size; ++i) { + int tensor_index = tensor_indexes->data[i]; + if (tensor_index >= 0 && tensor_index < tensors.size() && + tensors[tensor_index].type == kTfLiteResource) + return true; + } + return false; +} + +} // namespace + +bool Subgraph::OpMightHaveSideEffect( + const TfLiteNode* node, const TfLiteRegistration* registration) const { + // Check if any of the input tensors are of type resource. + if (AnyTensorOfTypeResource(tensors_, node->inputs)) return true; + // Check if any of the output tensors are of type resource. + if (AnyTensorOfTypeResource(tensors_, node->outputs)) return true; + // Consider control flow ops has side effect, some ops in the control flow + // subgraph can have side effect. + if (registration->builtin_code == kTfLiteBuiltinIf || + registration->builtin_code == kTfLiteBuiltinWhile || + registration->builtin_code == kTfLiteBuiltinCallOnce) + return true; + return false; +} + TfLiteStatus Subgraph::ResizeInputTensor(int tensor_index, const std::vector& dims) { const bool delegates_applied = !pre_delegation_execution_plan_.empty(); ",0,train e06673958eb2c37a263de5dfdf7b487439f427d4,tensorflow/tensorflow,"[lite] Update Graph partitioning to handle control dependency. Add has_side_effect to TfLiteNode which will be set during InterpreterBuilder. Currently this adds control dependency between stateful ops. stateful ops are determined to be any op that have input/output of type resource and selected subset for some ops. PiperOrigin-RevId: 367756651 Change-Id: Ib8cc6c875dbeafd8cd8471aed1051613a9cc8914",subgraph.h,"@@ -615,6 +615,15 @@ class Subgraph { // Enables preserving intermediates for debugging. TfLiteStatus PreserveAllTensorsExperimental(); + // Returns true if 'node' could have side effect (e.g. stateful op). + // Note that any node that might update other tensors beside op's output + // are considered to have side effect. + // So control flow ops like 'If' and 'While' are considered to have + // side effect because they can have ops that have side effect in the + // condition and body subgraphs. + bool OpMightHaveSideEffect(const TfLiteNode* node, + const TfLiteRegistration* registration) const; + // The state of the Interpreter. enum State { // The interpreter isn't ready to be invoked. ",0,train e06673958eb2c37a263de5dfdf7b487439f427d4,tensorflow/tensorflow,"[lite] Update Graph partitioning to handle control dependency. Add has_side_effect to TfLiteNode which will be set during InterpreterBuilder. Currently this adds control dependency between stateful ops. stateful ops are determined to be any op that have input/output of type resource and selected subset for some ops. PiperOrigin-RevId: 367756651 Change-Id: Ib8cc6c875dbeafd8cd8471aed1051613a9cc8914",graph_info.cc,"@@ -56,6 +56,22 @@ class PartitionGraphIntoIndependentNodeSubsetsImpl { tensor_epochs_.resize(info_->num_tensors(), kEpochAlwaysReady); node_epochs_.clear(); node_epochs_.resize(info_->num_execution_nodes(), kEpochNotReady); + control_deps_.clear(); + control_deps_.resize(info_->num_execution_nodes()); + // Add control dependency between stateful ops. + // TODO(b/149099381): Revisit better way for adding control dependency. + int last_op_with_side_effect = -1; + for (int i = 0; i < info_->num_execution_nodes(); ++i) { + const auto& node = info_->node(i); + // Set default value. + control_deps_[i] = -1; + if (node.might_have_side_effect) { + if (last_op_with_side_effect != -1) { + control_deps_[i] = last_op_with_side_effect; + } + last_op_with_side_effect = i; + } + } // Set computed tensors to be kEpochNotReady (initializer set everything to // AlwaysReady). for (int node_index = 0; node_index < info_->num_execution_nodes(); @@ -134,6 +150,12 @@ class PartitionGraphIntoIndependentNodeSubsetsImpl { return false; } } + // If any of the nodes that current node depend on is not assigned + // any epochs then don't process this node. + if (control_deps_[node_index] != -1 && + node_epochs_[control_deps_[node_index]] == kEpochNotReady) { + return false; + } int original_node_idx = info_->node_index(node_index); // When we are starting a new epoch, the first ready node defines @@ -209,6 +231,10 @@ class PartitionGraphIntoIndependentNodeSubsetsImpl { // Maps from tensor index to the epoch in which it is assigned. Also special // negative values of kEpochNotReady if not assigned. std::vector node_epochs_; + // For each node the node id that this op depends on. + // TODO(b/149099381): This should be a list, but we are now chaining + // dependency between previous ops. + std::vector control_deps_; }; // LINT.ThenChange(//tensorflow/lite/delegates/utils.h) ",0,train e06673958eb2c37a263de5dfdf7b487439f427d4,tensorflow/tensorflow,"[lite] Update Graph partitioning to handle control dependency. Add has_side_effect to TfLiteNode which will be set during InterpreterBuilder. Currently this adds control dependency between stateful ops. stateful ops are determined to be any op that have input/output of type resource and selected subset for some ops. PiperOrigin-RevId: 367756651 Change-Id: Ib8cc6c875dbeafd8cd8471aed1051613a9cc8914",graph_info_test.cc,"@@ -66,12 +66,13 @@ class SimpleTestGraph : public GraphInfo { const std::vector& outputs() const override { return outputs_; } const std::vector& variables() const override { return variables_; } - void AddNode(const std::vector& inputs, - const std::vector& outputs) { + void AddNode(const std::vector& inputs, const std::vector& outputs, + bool might_have_side_effect = false) { nodes_.push_back(TfLiteNode()); TfLiteNode& node = nodes_.back(); node.inputs = ConvertVector(inputs); node.outputs = ConvertVector(outputs); + node.might_have_side_effect = might_have_side_effect; } void AddTensors(int count) { tensors_.resize(count + tensors_.size()); } @@ -342,6 +343,50 @@ TEST(PartitionTest, Nodes3PartitionNodes2) { {expected_subgraph0, expected_subgraph1, expected_subgraph2}); } +// Test correct partition for graph with control dependency. +// Graph for test is like +// varhandleOp -> ReadVariableOp -> Add -> AssignVariableOp +// |_________________________^ ^^ +// |------------------------->ReadVariableOp -> (Output) +// ^^ is control dependency, in this case we don't want to invoke the +// last ReadVariableOp before AssignVariableOp finishes executing. +// '>' and '^' represents data dependency. +TEST(PartitionTest, Nodes4PartitionNodes3_WithControlDependency) { + SimpleTestGraph graph; + // Construct graph. + { + graph.AddTensors(5); + graph.AddNode({0}, {1}, true); + graph.AddNode({1}, {2}, true); + graph.AddNode({2}, {3}, false); + graph.AddNode({1, 3}, {}, true); + graph.AddNode({1}, {4}, true); + } + graph.SetInputsAndOutputs({0}, {4}); + std::vector nodes_to_partition = {0, 1, 3, 4}; + std::vector generated_subgraphs; + PartitionGraph(graph, nodes_to_partition, &generated_subgraphs); + + NodeSubset expected_subgraph0; + expected_subgraph0.type = NodeSubset::kTfPartition; + expected_subgraph0.nodes = {0, 1}; + expected_subgraph0.input_tensors = {0}; + expected_subgraph0.output_tensors = {1, 2}; + NodeSubset expected_subgraph1; + expected_subgraph1.type = NodeSubset::kTfNonPartition; + expected_subgraph1.nodes = {2}; + expected_subgraph1.input_tensors = {2}; + expected_subgraph1.output_tensors = {3}; + NodeSubset expected_subgraph2; + expected_subgraph2.type = NodeSubset::kTfPartition; + expected_subgraph2.nodes = {3, 4}; + expected_subgraph2.input_tensors = {1, 3}; + expected_subgraph2.output_tensors = {4}; + CheckPartitionSubgraphs( + generated_subgraphs, + {expected_subgraph0, expected_subgraph1, expected_subgraph2}); +} + } // namespace } // namespace tflite ",0,train e06673958eb2c37a263de5dfdf7b487439f427d4,tensorflow/tensorflow,"[lite] Update Graph partitioning to handle control dependency. Add has_side_effect to TfLiteNode which will be set during InterpreterBuilder. Currently this adds control dependency between stateful ops. stateful ops are determined to be any op that have input/output of type resource and selected subset for some ops. PiperOrigin-RevId: 367756651 Change-Id: Ib8cc6c875dbeafd8cd8471aed1051613a9cc8914",interpreter_builder.cc,"@@ -27,6 +27,7 @@ limitations under the License. #include #include ""flatbuffers/flatbuffers.h"" // from @flatbuffers +#include ""tensorflow/lite/c/c_api_types.h"" #include ""tensorflow/lite/core/api/error_reporter.h"" #include ""tensorflow/lite/core/api/flatbuffer_conversions.h"" #include ""tensorflow/lite/core/api/op_resolver.h"" @@ -764,10 +765,12 @@ TfLiteStatus InterpreterBuilder::operator()( FlatBufferIntArrayToVector(subgraph->outputs())); // Finally setup nodes and tensors - if (ParseNodes(operators, modified_subgraph) != kTfLiteOk) - return cleanup_and_error(); + // Parse tensors before nodes as ParseNodes checks input tensors for the + // nodes. if (ParseTensors(buffers, tensors, modified_subgraph) != kTfLiteOk) return cleanup_and_error(); + if (ParseNodes(operators, modified_subgraph) != kTfLiteOk) + return cleanup_and_error(); std::vector variables; for (int i = 0; i < modified_subgraph->tensors_size(); ++i) { ",0,train b36f214ee82bb51d1f1c1fe9c74cc5ce81df400f,tensorflow/tensorflow,"Simplify GetCpuFlags. Now that the dotprod detection is just a getauxval, it's cheap, not worth caching in a context object, so use ruy:detect_arm directly. Also note that neon_tensor_utils.cc is already using ruy:detect_arm. PiperOrigin-RevId: 306682916 Change-Id: I5b81be71035c5d0cfb22006a5edb2dcafa335bcd",depthwiseconv_quantized_test.cc,"@@ -1074,13 +1074,9 @@ void TestOneDepthwiseConv3x3Filter( void TestOneNeonDot3x3(const TestParam& test_param) { #if defined(__aarch64__) && !defined(GOOGLE_L4T) && defined(__ANDROID__) && \ defined(__clang__) - CpuBackendContext backend_context; - ruy::Context* ruy_context = backend_context.ruy_context(); - const auto ruy_paths = ruy_context != nullptr - ? ruy_context->GetRuntimeEnabledPaths() - : ruy::Path::kNone; - const bool has_dot_product_instructions = - (ruy_paths & ruy::Path::kNeonDotprod) != ruy::Path::kNone; + CpuFlags cpu_flags; + GetCpuFlags(&cpu_flags); + const bool has_dot_product_instructions = cpu_flags.neon_dotprod; if (test_param.forced_invocation == DepthwiseConvImplementation::kUseNeon3x3DotProduct && !has_dot_product_instructions) { ",0,train b36f214ee82bb51d1f1c1fe9c74cc5ce81df400f,tensorflow/tensorflow,"Simplify GetCpuFlags. Now that the dotprod detection is just a getauxval, it's cheap, not worth caching in a context object, so use ruy:detect_arm directly. Also note that neon_tensor_utils.cc is already using ruy:detect_arm. PiperOrigin-RevId: 306682916 Change-Id: I5b81be71035c5d0cfb22006a5edb2dcafa335bcd",cpu_check.h,"@@ -15,8 +15,7 @@ limitations under the License. #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_CPU_CHECK_H_ #define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_CPU_CHECK_H_ -#include ""tensorflow/lite/kernels/cpu_backend_context.h"" -#include ""tensorflow/lite/kernels/internal/optimized/neon_check.h"" +#include ""ruy/detect_arm.h"" // from @ruy namespace tflite { @@ -24,16 +23,8 @@ struct CpuFlags { bool neon_dotprod = false; }; -inline void GetCpuFlags(CpuBackendContext* cpu_backend_context, - CpuFlags* cpu_flags) { -#if RUY_PLATFORM(ARM) - ruy::Context* ruy_context = cpu_backend_context->ruy_context(); - cpu_flags->neon_dotprod = - ruy_context != nullptr && (ruy_context->GetRuntimeEnabledPaths() & - ruy::Path::kNeonDotprod) != ruy::Path::kNone; -#else - cpu_flags->neon_dotprod = false; -#endif +inline void GetCpuFlags(CpuFlags* cpu_flags) { + cpu_flags->neon_dotprod = ruy::DetectDotprod(); } } // namespace tflite ",0,train b36f214ee82bb51d1f1c1fe9c74cc5ce81df400f,tensorflow/tensorflow,"Simplify GetCpuFlags. Now that the dotprod detection is just a getauxval, it's cheap, not worth caching in a context object, so use ruy:detect_arm directly. Also note that neon_tensor_utils.cc is already using ruy:detect_arm. PiperOrigin-RevId: 306682916 Change-Id: I5b81be71035c5d0cfb22006a5edb2dcafa335bcd",depthwiseconv_multithread.h,"@@ -144,7 +144,7 @@ inline void DepthwiseConv(const DepthwiseParams& params, const int output_height = output_shape.Dims(1); CpuFlags cpu_flags; - GetCpuFlags(cpu_backend_context, &cpu_flags); + GetCpuFlags(&cpu_flags); if (thread_count == 1) { DepthwiseConvImpl(params, input_shape, input_data, filter_shape, ",0,train b36f214ee82bb51d1f1c1fe9c74cc5ce81df400f,tensorflow/tensorflow,"Simplify GetCpuFlags. Now that the dotprod detection is just a getauxval, it's cheap, not worth caching in a context object, so use ruy:detect_arm directly. Also note that neon_tensor_utils.cc is already using ruy:detect_arm. PiperOrigin-RevId: 306682916 Change-Id: I5b81be71035c5d0cfb22006a5edb2dcafa335bcd",depthwise_conv.h,"@@ -1810,13 +1810,10 @@ inline void DepthwiseConvWithRounding( // Jetson TX-2. This compiler does not support the offsetof() macro. #if defined(__aarch64__) && !defined(GOOGLE_L4T) #if defined(__ANDROID__) && defined(__clang__) - ruy::Context* ruy_context = cpu_backend_context.ruy_context(); - const auto ruy_paths = ruy_context != nullptr - ? ruy_context->GetRuntimeEnabledPaths() - : ruy::Path::kNone; + CpuFlags cpu_flags; + GetCpuFlags(&cpu_flags); // TODO(b/150208140): Re-enable once erroneous activation in test is resolved. - const bool has_dot_product_instructions = - false && (ruy_paths & ruy::Path::kNeonDotprod) != ruy::Path::kNone; + const bool has_dot_product_instructions = false && cpu_flags.neon_dotprod; // Dispatch to dot-product 3x3 kernels when supported. if (has_dot_product_instructions) { ",0,train b36f214ee82bb51d1f1c1fe9c74cc5ce81df400f,tensorflow/tensorflow,"Simplify GetCpuFlags. Now that the dotprod detection is just a getauxval, it's cheap, not worth caching in a context object, so use ruy:detect_arm directly. Also note that neon_tensor_utils.cc is already using ruy:detect_arm. PiperOrigin-RevId: 306682916 Change-Id: I5b81be71035c5d0cfb22006a5edb2dcafa335bcd",neon_tensor_utils.h,"@@ -20,6 +20,7 @@ limitations under the License. #include ""tensorflow/lite/c/builtin_op_data.h"" #include ""tensorflow/lite/kernels/cpu_backend_context.h"" #include ""tensorflow/lite/kernels/internal/optimized/cpu_check.h"" +#include ""tensorflow/lite/kernels/internal/optimized/neon_check.h"" #include ""tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h"" #include ""tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h"" ",0,train aabcdcbdff5e68cdbf734ae3ed297242c5b6dfdf,tensorflow/tensorflow,"preserve the argument order when inserting the fake quant ops Previously, it relies on pointer values to determine the inserting order. This will introduce test flakiness. This CL makes the order deterministic by using the op visited order. PiperOrigin-RevId: 256266368",quantization_driver.cc,"@@ -267,7 +267,9 @@ class QuantizationDriver { } cached.first->second = InitializeState(op, index, in, /*as_result=*/false); if (is_argument) { - arg_states_[llvm::cast(in)] = cached.first->second; + auto *arg = llvm::cast(in); + arg_states_[arg] = cached.first->second; + args_.push_back(arg); } } @@ -299,11 +301,15 @@ class QuantizationDriver { // the values from `operand_states_` and `result_state_`. std::unordered_map rescale_states_; - // Maps of indexes to the propagation state vector from the ops results and - // op operands. Both maps are unmodified after initialization. + // Maps of indexes to the propagation state vector from the ops operands, + // results and arguments. llvm::DenseMap operand_states_; llvm::DenseMap result_states_; llvm::DenseMap arg_states_; + + // This vector is to preserve the arguments order, so the newly inserted + // quantized ops for the arguments are deterministically ordered. + llvm::SmallVector args_; }; #include ""tensorflow/compiler/mlir/lite/utils/generated_op_quant_spec_getters.inc"" @@ -656,10 +662,7 @@ bool QuantizationDriver::PropagateParams() { } void QuantizationDriver::Finalize() { - std::map sorted_states(arg_states_.begin(), - arg_states_.end()); - for (auto it : sorted_states) { - BlockArgument *arg = it.first; + for (auto *arg : args_) { auto &state = GetArgQuantState(arg); auto &requantize = GetArgRequantizeState(arg); if (state.IsEmpty() || ",0,train 22bf3df6dc91fbb80a581c1fbce30cb9d2d411e9,tensorflow/tensorflow,"[MLIR][CHLO] Use CHLO lowering for `is_inf` op PiperOrigin-RevId: 355189054 Change-Id: I28304ff8ed9f564a9698fb5609c19d5d19956e86",transform_unranked_hlo.cc,"@@ -54,8 +54,8 @@ namespace { #define MAP_CHLO_OPERATION_CWISE_UNARY(fn, sep) \ fn(AcosOp) sep fn(AcoshOp) sep fn(AsinOp) sep fn(AsinhOp) sep fn(AtanOp) \ sep fn(AtanhOp) sep fn(ConjOp) sep fn(CoshOp) sep fn(DigammaOp) \ - sep fn(ErfOp) sep fn(ErfcOp) sep fn(LgammaOp) sep fn(SinhOp) \ - sep fn(TanOp) + sep fn(ErfOp) sep fn(ErfcOp) sep fn(IsInfOp) sep fn(LgammaOp) \ + sep fn(SinhOp) sep fn(TanOp) template inline void AddLegalOpOnRankedTensor(ConversionTarget *target) { ",0,train 22bf3df6dc91fbb80a581c1fbce30cb9d2d411e9,tensorflow/tensorflow,"[MLIR][CHLO] Use CHLO lowering for `is_inf` op PiperOrigin-RevId: 355189054 Change-Id: I28304ff8ed9f564a9698fb5609c19d5d19956e86",lower_tf.cc,"@@ -1552,7 +1552,6 @@ void PopulateTFLoweringBeforeHLOPatterns(MLIRContext *context, LowerExpm1Op, LowerFakeQuantWithMinMaxArgs, LowerFillOp, - LowerIsInfOp, LowerIsNanOp, LowerL2LossOp, LowerMulNoNanOp, ",0,train d86e84eff8640f7f30818c52c3345a67cf5acb38,tensorflow/tensorflow,Rexert last part.,core.py,"@@ -659,8 +659,8 @@ class Lambda(Layer): `Lambda` layer is saving and inspecting a Model. `Lambda` layers are saved by serializing the Python bytecode, whereas subclassed Layers can be saved via overriding their `get_config` method. Overriding - `get_config` improves the portability of and the ability to inspect, - visualize and reason about them. + `get_config` improves the portability of Models. Models that rely on + subclassed Layers are also often easier to visualize and reason about. Examples: ",0,train 4bfe1dce6437e5883c2fdf232b859f8b88471083,tensorflow/tensorflow,"Small additions to DistributedStrategy's API docs PiperOrigin-RevId: 308949260 Change-Id: Ib77b03bbcc38083ce64504e29f84c2cfc8073f85",distribute_lib.py,"@@ -520,7 +520,10 @@ class StrategyBase(object): """"""A state & compute distribution policy on a list of devices. See [the guide](https://www.tensorflow.org/guide/distributed_training) - for overview and examples. + for overview and examples. See `tf.distribute.StrategyExtended` and + [`tf.distribute`](https://www.tensorflow.org/api_docs/python/tf/distribute) + for a glossory of concepts mentioned on this page such as ""per-replica"", + _replica_, and _reduce_. In short: @@ -736,12 +739,16 @@ class StrategyBase(object): # Iterate over the distributed dataset for x in dist_dataset: # process dataset elements - strategy.run(train_step, args=(x,)) + strategy.run(replica_fn, args=(x,)) ``` - We will assume that the input dataset is batched by the - global batch size. With this assumption, we will make a best effort to - divide each batch across all the replicas (one or more workers). + In the code snippet above, the dataset `dist_dataset` is batched by + GLOBAL_BATCH_SIZE, and we iterate through it using `for x in dist_dataset`, + where x is one batch of data of GLOBAL_BATCH_SIZE containing N batches of + data of per-replica batch size, corresponding to N replicas. + `tf.distribute.Strategy.run` will take care of feeding + the right per-replica batch to the right `replica_fn` execution on each + replica. In a multi-worker setting, we will first attempt to distribute the dataset by attempting to detect whether the dataset is being created out of @@ -892,8 +899,13 @@ class StrategyBase(object): `tf.distribute.DistributedValues` containing tensors or composite tensors. IMPORTANT: Depending on the implementation of `tf.distribute.Strategy` and - whether eager execution is enabled, `fn` may be called one or more times ( - once for each replica). + whether eager execution is enabled, `fn` may be called one or more times. If + `fn` is annotated with `tf.function` or `tf.distribute.Strategy.run` is + called inside a `tf.function`, eager execution is disabled and `fn` is + called once (or once per replica, if you are using MirroredStrategy) to + generate a Tensorflow graph, which will then be reused for execution with + new inputs. Otherwise, if eager execution is enabled, `fn` will be called + every step just like regular python code. Example usage: ",0,train c2830904c770c4343b3581d91f0f14c08c2a727b,tensorflow/tensorflow,use `delete[]` instead of `delete`,gradient_checker_test.cc,"@@ -52,7 +52,7 @@ void CompareNumericalAndManualGradients( for (int j = 0; j < num_grad; j++) { ASSERT_NEAR(dnumerical[j], expected_grad[j], abs_error); } - delete dnumerical; + delete[] dnumerical; TF_DeleteTensor(numerical_tensor); } ",0,train 913d597ef1d3f278084c2217d2ec82826f475c0d,tensorflow/tensorflow,"Deprecate Network `state_updates` property. PiperOrigin-RevId: 310024527 Change-Id: Ic87294583db713b4d1799e51b13689f9dbd3be25",network.py,"@@ -59,9 +59,11 @@ from tensorflow.python.training.tracking import data_structures from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils from tensorflow.python.training.tracking import tracking from tensorflow.python.training.tracking import util as trackable_utils +from tensorflow.python.util import deprecation from tensorflow.python.util import nest from tensorflow.python.util import serialization from tensorflow.python.util import tf_inspect +from tensorflow.tools.docs import doc_controls # pylint: disable=g-import-not-at-top @@ -524,8 +526,15 @@ class Network(base_layer.Layer): layer.reset_states() @property + @deprecation.deprecated( + date=None, + instructions='This property should not be used in TensorFlow 2.0, ' + 'as updates are applied automatically.') + @doc_controls.do_not_generate_docs def state_updates(self): - """"""Returns the `updates` from all layers that are stateful. + """"""Deprecated, do NOT use! + + Returns the `updates` from all layers that are stateful. This is useful for separating training updates and state updates, e.g. when we need to update a layer's internal state ",0,train 3b5b75e304e2801b3a374f9a328bb3ebba23083e,tensorflow/tensorflow,"Add strides attribute to HLO Slice Op PiperOrigin-RevId: 268957427",hlo_function_importer.cc,"@@ -327,7 +327,8 @@ StatusOr HloFunctionImporter::ImportInstruction( ->create( loc, result_type, operands[0], ConvertDimensions(instruction->slice_starts()), - ConvertDimensions(instruction->slice_limits())) + ConvertDimensions(instruction->slice_limits()), + ConvertDimensions(instruction->slice_strides())) .getOperation(); } case HloOpcode::kConcatenate: { ",0,train 2109a2b3d9fb5bf34ca09e06ff9ca990e9b8fbc7,tensorflow/tensorflow,"Minor cleanup PiperOrigin-RevId: 166070170",multioutput_fusion_test.cc,"@@ -42,17 +42,15 @@ limitations under the License. #include ""tensorflow/core/platform/test_benchmark.h"" #include ""tensorflow/core/platform/types.h"" -using tensorflow::gtl::ArraySlice; - namespace xla { namespace { -class MultiOutputFusionTest : public HloTestBase { - public: - ErrorSpec error_spec_{0.0001, 1e-2}; +using ::tensorflow::gtl::ArraySlice; +class MultiOutputFusionTest : public HloTestBase { protected: - MultiOutputFusionTest() {} + MultiOutputFusionTest() { error_spec_ = ErrorSpec{0.0001, 1e-2}; } + void RunTest2D(bool manual_fusion, int64 size) { auto builder = HloComputation::Builder(TestName()); auto hlo_module = CreateNewModule(); ",0,train 2109a2b3d9fb5bf34ca09e06ff9ca990e9b8fbc7,tensorflow/tensorflow,"Minor cleanup PiperOrigin-RevId: 166070170",heap_test.cc,"@@ -15,15 +15,10 @@ limitations under the License. #include ""tensorflow/contrib/nearest_neighbor/kernels/heap.h"" -#include - #include ""tensorflow/core/kernels/ops_testutil.h"" -using std::vector; - -using tensorflow::nearest_neighbor::SimpleHeap; -using tensorflow::nearest_neighbor::AugmentedHeap; - +namespace tensorflow { +namespace nearest_neighbor { namespace { TEST(HeapTest, SimpleHeapTest1) { @@ -189,3 +184,5 @@ TEST(HeapTest, AugmentedHeapTest1) { } } // namespace +} // namespace nearest_neighbor +} // namespace tensorflow ",0,train 2109a2b3d9fb5bf34ca09e06ff9ca990e9b8fbc7,tensorflow/tensorflow,"Minor cleanup PiperOrigin-RevId: 166070170",hyperplane_lsh_probes.cc,"@@ -101,8 +101,8 @@ class HyperplaneLSHProbesOp : public OpKernel { int batch_size = products_tensor.dim_size(0); - Tensor* probes_tensor = NULL; - Tensor* tables_tensor = NULL; + Tensor* probes_tensor = nullptr; + Tensor* tables_tensor = nullptr; TensorShape output_shape({batch_size, num_probes}); OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &probes_tensor)); ",0,train 61df29fa97cf82f3d1ef129a70bb5fa3ed99fe3a,tensorflow/tensorflow,Update version string to 1.6.0-rc1,setup.py,"@@ -20,7 +20,7 @@ from __future__ import print_function from setuptools import setup -_VERSION = '1.6.0-rc0' +_VERSION = '1.6.0-rc1' CONSOLE_SCRIPTS = [ 'capture_tpu_profile=cloud_tpu_profiler.main:run_main', ",0,train 61df29fa97cf82f3d1ef129a70bb5fa3ed99fe3a,tensorflow/tensorflow,Update version string to 1.6.0-rc1,version.h,"@@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. ""-alpha"", ""-alpha.1"", // ""-beta"", ""-rc"", ""-rc.1"") -#define TF_VERSION_SUFFIX ""-rc0"" +#define TF_VERSION_SUFFIX ""-rc1"" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) ",0,train 61df29fa97cf82f3d1ef129a70bb5fa3ed99fe3a,tensorflow/tensorflow,Update version string to 1.6.0-rc1,setup.py,"@@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.6.0-rc0' +_VERSION = '1.6.0-rc1' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', ",0,train a1e51910493309d90819fc23351d8103112cf501,tensorflow/tensorflow,"[XLA GPU] [NFC] Simplify and document getters of KernelMappingScheme PiperOrigin-RevId: 265975853",ir_emitter_unnested.cc,"@@ -2627,9 +2627,9 @@ void IrEmitterUnnested::EmitHlo021Tile( constexpr int kNumRows = 4; KernelMappingScheme mapping_scheme( reduced_output_dims, /*tile_size_y=*/kWarpSize, - /*tile_size_x=*/kWarpSize, /*req_block_sizes=*/{1, 1, 1}, + /*tile_size_x=*/kWarpSize, /*block_size_z=*/1, /*num_threads_y=*/kNumRows, - /*num_threads_x=*/kWarpSize, &b_); + /*num_threads_x=*/kWarpSize, /*is_dilated_x=*/false, &b_); KernelCodegenInfo kernel_info(&mapping_scheme); std::vector param_arrays; @@ -3062,7 +3062,7 @@ bool IsUnrollingColumnReductionBeneficial(const HloInstruction* unnested_hlo, } // namespace -std::tuple +std::pair IrEmitterUnnested::ComputeMappingSchemeAndReductionKind( const HloInstruction* unnested_hlo, const HloInstruction* first_reduce) { const Shape& input_shape = first_reduce->operand(0)->shape(); @@ -3121,12 +3121,10 @@ IrEmitterUnnested::ComputeMappingSchemeAndReductionKind( tile_size_y = kNumElementsPerPartialSum; } - DimensionVector req_block_sizes{block_size_z, 1, 1}; llvm_ir::KernelMappingScheme mapping_scheme( - dims_in_elem, tile_size_y, tile_size_x, req_block_sizes, num_threads_y, - num_threads_x, &b_); - mapping_scheme.SetDilatedX(dilated_x); - return std::make_tuple(mapping_scheme, is_row_reduction); + dims_in_elem, tile_size_y, tile_size_x, block_size_z, num_threads_y, + num_threads_x, dilated_x, &b_); + return std::make_pair(mapping_scheme, is_row_reduction); } Status IrEmitterUnnested::EmitReductionFromOrToContiguousDimensions( @@ -3197,11 +3195,11 @@ Status IrEmitterUnnested::EmitReductionFromOrToContiguousDimensions( ""doesn't set the input layout of "" << first_reduce->ToString(); - bool is_row_reduction; - llvm_ir::KernelMappingScheme mapping_scheme; - std::tie(mapping_scheme, is_row_reduction) = + auto mapping_scheme_pair = ComputeMappingSchemeAndReductionKind(unnested_hlo, first_reduce); - ReductionCodegenInfo reduction_info(&mapping_scheme, is_row_reduction); + bool is_row_reduction = mapping_scheme_pair.second; + ReductionCodegenInfo reduction_info(&mapping_scheme_pair.first, + is_row_reduction); EmitElementFunction emit_reduction_tile = [&](const llvm_ir::IrArray::Index& index, llvm::Value* y_loc, llvm::Value* x_loc, int64 x_iter_num) { @@ -3216,9 +3214,9 @@ Status IrEmitterUnnested::EmitReductionFromOrToContiguousDimensions( [&](llvm::Value* y, llvm::Value* x, const IrArray::Index& index, const string& loop_name, llvm::Value* tile_height, llvm::Value* tile_width, KernelSupportLibrary* ksl) { - EmitTiledElementalCodeWithBoundsCheck(&mapping_scheme, index, loop_name, - ksl, &b_, y, x, tile_height, - tile_width, emit_reduction_tile); + EmitTiledElementalCodeWithBoundsCheck( + &mapping_scheme_pair.first, index, loop_name, ksl, &b_, y, x, + tile_height, tile_width, emit_reduction_tile); }, /*block_prologue_generator=*/ [&](HloInstruction* hlo, KernelCodegenInfo* kernel_info) { ",0,test a1e51910493309d90819fc23351d8103112cf501,tensorflow/tensorflow,"[XLA GPU] [NFC] Simplify and document getters of KernelMappingScheme PiperOrigin-RevId: 265975853",ir_emitter_unnested.h,"@@ -212,7 +212,7 @@ class IrEmitterUnnested : public IrEmitter, // and first_reduce are the same instruction. For a kInput fusion, // unnested_hlo is the fusion instruction while first_reduce is the first // reduce op. - std::tuple + std::pair ComputeMappingSchemeAndReductionKind(const HloInstruction* unnested_hlo, const HloInstruction* first_reduce); ",0,test a1e51910493309d90819fc23351d8103112cf501,tensorflow/tensorflow,"[XLA GPU] [NFC] Simplify and document getters of KernelMappingScheme PiperOrigin-RevId: 265975853",kernel_tiling.cc,"@@ -103,29 +103,36 @@ absl::optional > FindTranspose021(const Shape& a, return absl::nullopt; } -KernelMappingScheme::KernelMappingScheme( - absl::Span dims_in_elems, int64 tile_size_y, int64 tile_size_x, - absl::Span req_block_sizes, int64 num_threads_y, - int64 num_threads_x, llvm::IRBuilder<>* b) +KernelMappingScheme::KernelMappingScheme(absl::Span dims_in_elems, + int64 tile_size_y, int64 tile_size_x, + int64 block_size_z, + int64 num_threads_y, + int64 num_threads_x, bool is_dilated_x, + llvm::IRBuilder<>* b) : b_(b), - dims_in_elems_{dims_in_elems.at(0), dims_in_elems.at(1), - dims_in_elems.at(2)}, + dims_in_elems_{dims_in_elems[0], dims_in_elems[1], dims_in_elems[2]}, tile_sizes_{1, tile_size_y, tile_size_x}, - dims_in_tiles_(ElementWiseCeilOfRatio(dims_in_elems_, tile_sizes_)), - block_sizes_{std::min(req_block_sizes.at(0), dims_in_tiles_.at(0)), - std::min(req_block_sizes.at(1), dims_in_tiles_.at(1)), - std::min(req_block_sizes.at(2), dims_in_tiles_.at(2))}, - dims_in_blocks_(ElementWiseCeilOfRatio(dims_in_tiles_, block_sizes_)), + dims_in_tiles_{dims_in_elems[0], + CeilOfRatio(dims_in_elems[1], tile_size_y), + CeilOfRatio(dims_in_elems[2], tile_size_x)}, + block_sizes_{block_size_z, 1, 1}, + dims_in_blocks_{CeilOfRatio(dims_in_elems[0], block_sizes_[0]), + dims_in_tiles_[1], dims_in_tiles_[2]}, num_threads_x_(num_threads_x), num_threads_y_(num_threads_y), - dilated_x_(true) { - DCHECK_EQ(req_block_sizes.size(), 3); + dilated_x_(is_dilated_x) { DCHECK_EQ(tile_size_y % num_threads_y_, 0); DCHECK_EQ(tile_size_x % num_threads_x_, 0); + CHECK_EQ((dims_in_elems[0] % block_size_z), 0); VLOG(10) << ""dims_in_elems_ = ["" << absl::StrJoin(dims_in_elems_, "","") << ""]""; VLOG(10) << ""dims_in_tiles_ = ["" << absl::StrJoin(dims_in_tiles_, "","") << ""]""; VLOG(10) << ""dims_in_blocks_ = ["" << absl::StrJoin(dims_in_blocks_, "","") << ""]""; + if (!dilated_x_) { + // dilated_x_=false is for the purpose of vectorization, which requires + // GetTileSizeForDimension(DimX) to be a multiplier of num_threads_x_. + CHECK_EQ(GetTileSizeForDimension(DimX) % num_threads_x_, 0); + } } IrArray::Index KernelMappingScheme::GetUnnormalizedIndex( ",0,test a1e51910493309d90819fc23351d8103112cf501,tensorflow/tensorflow,"[XLA GPU] [NFC] Simplify and document getters of KernelMappingScheme PiperOrigin-RevId: 265975853",kernel_tiling.h,"@@ -90,23 +90,24 @@ class KernelMappingScheme { enum { DimZ = 0, DimY, DimX, DimTot }; public: - KernelMappingScheme() {} // dims_in_elems: the normalized tensor dimensions. - // req_block_sizes: the requested block size in number of tiles for each - // dimension. The actual block size is set to min(req_block_size, - // dims_in_number_of_blocks). KernelMappingScheme(absl::Span dims_in_elems, int64 tile_size_y, - int64 tile_size_x, - absl::Span req_block_sizes, + int64 tile_size_x, int64 block_size_z, int64 num_threads_y, int64 num_threads_x, - llvm::IRBuilder<>* b); + bool is_dilated_x, llvm::IRBuilder<>* b); + // Number of elements in each dimension (Z/Y/X respectively). absl::Span GetDimensionsInElements() const { return dims_in_elems_; } + + // Ratio of elements in each dimension over tile sizes for Z/Y/X + // respectively. absl::Span GetDimensionsInTiles() const { return dims_in_tiles_; } + + // Ratio of dimensions per tile over block sizes. absl::Span GetDimensionsInBlocks() const { return dims_in_blocks_; } @@ -147,14 +148,6 @@ class KernelMappingScheme { } bool DilatedX() const { return dilated_x_; } - void SetDilatedX(bool v) { - dilated_x_ = v; - if (!dilated_x_) { - // dilated_x_=false is for the purpose of vectorization, which requires - // GetTileSizeForDimension(DimX) to be a multiplier of num_threads_x_. - CHECK_EQ(GetTileSizeForDimension(DimX) % num_threads_x_, 0); - } - } IrArray::Index EmitBlockIndex(llvm::Type* index_ty); // Returns the index for the first tile in the block with the given block ",0,test d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,gpu_prim.h,"@@ -31,17 +31,27 @@ limitations under the license, the license you must see. #include ""third_party/gpus/cuda/include/cusparse.h"" namespace gpuprim = ::cub; + +// Required for sorting Eigen::half +namespace cub { +template <> +struct NumericTraits + : BaseTraits {}; +} // namespace cub + #elif TENSORFLOW_USE_ROCM #include ""rocm/include/hipcub/hipcub.hpp"" namespace gpuprim = ::hipcub; +// Required for sorting Eigen::half namespace rocprim { namespace detail { template <> struct radix_key_codec_base - : radix_key_codec_floating {}; + : radix_key_codec_floating {}; }; // namespace detail }; // namespace rocprim -#endif // GOOGLE_CUDA + +#endif // TENSORFLOW_USE_ROCM #endif // TENSORFLOW_CORE_KERNELS_GPU_PRIM_H_ ",0,train d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,in_topk_op_test.cc,"@@ -76,9 +76,9 @@ static Graph* InTopK(int num_targets, int num_classes, T top_k) { BM_InTopK(int64, 64, 1000, 10, cpu); BM_InTopK(int64, 64, 10000, 10, cpu); -#ifdef GOOGLE_CUDA +#ifdef GOOGLE_CUDA || TENSORFLOW_USE_ROCM BM_InTopK(int64, 64, 1000, 10, gpu); BM_InTopK(int64, 64, 10000, 10, gpu); -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // namespace tensorflow ",0,train d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op.cc,"@@ -244,7 +244,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS); #undef REGISTER_KERNELS_NAME #undef REGISTER_KERNELS -#ifdef GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM namespace functor { #define DECLARE_GPU_SPEC(T) \ @@ -277,6 +277,6 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_KERNELS); TF_CALL_INTEGRAL_TYPES(REGISTER_KERNELS); #undef REGISTER_KERNELS -#endif // end GOOGLE_CUDA +#endif // end GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // end namespace tensorflow ",0,train d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu.h,"@@ -15,11 +15,12 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_TOPK_OP_GPU_H_ #define TENSORFLOW_CORE_KERNELS_TOPK_OP_GPU_H_ -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU #include +#include #include #include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" @@ -34,15 +35,6 @@ limitations under the License. #include ""tensorflow/core/platform/types.h"" #include ""tensorflow/core/util/gpu_kernel_helper.h"" -#if GOOGLE_CUDA -// Required for sorting Eigen::half -namespace cub { -template <> -struct NumericTraits - : BaseTraits {}; -} // namespace cub -#endif // GOOGLE_CUDA - namespace tensorflow { typedef Eigen::GpuDevice GPUDevice; @@ -93,7 +85,7 @@ struct IndirectLinearData { Entry* const backing_data; }; -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM template struct StridedData { typedef impl::Entry Entry; @@ -115,6 +107,7 @@ template ::Entry Entry; const Data data; + __device__ IndexedHeap(const Data& d) : data(d) {} __device__ bool is_above(int left, int right) { T left_value = data.get_value(left); @@ -337,12 +330,21 @@ __device__ void mergeShards(int num_shards, int k, } } +#if GOOGLE_CUDA extern __shared__ char shared_memory[]; +#endif template -__global__ void TopKKernel(const T* __restrict__ input, int length, int k, - bool sorted, T* __restrict__ output, - int* __restrict__ indices) { +#if TENSORFLOW_USE_ROCM +__attribute__((amdgpu_flat_work_group_size(1, 256))) +#endif +__global__ void TopKKernel( + const T* __restrict__ input, int length, int k, bool sorted, + T* __restrict__ output, int* __restrict__ indices) { +#if TENSORFLOW_USE_ROCM + HIP_DYNAMIC_SHARED(char, shared_memory); +#endif + const int batch_index = blockIdx.x; const T* batch_input = input + batch_index * length; @@ -370,7 +372,7 @@ __global__ void TopKKernel(const T* __restrict__ input, int length, int k, } template -cudaError LaunchTopKKernel(const cudaStream_t& stream, int num_shards, +cudaError LaunchTopKKernel(const gpuStream_t& stream, int num_shards, const T* input, int batch_size, int length, int k, bool sorted, T* output, int* indices) { // This code assumes that k is small enough that the computation @@ -395,9 +397,17 @@ cudaError LaunchTopKKernel(const cudaStream_t& stream, int num_shards, } if (num_shards <= 0) { num_shards = 1; +#if GOOGLE_CUDA } else if (num_shards > 1024) { num_shards = 1024; } +#else + // ROCm can't execute with 1024 and requires an explicit + // amdgpu_flat_work_group_size attribute with >256 + } else if (num_shards > 256) { + num_shards = 256; + } +#endif } // We are limited by the amount of shared memory we have per block. auto shared_memory_size = (num_shards + 1) * k * sizeof(Entry); @@ -448,9 +458,9 @@ Status LaunchSortKernel(OpKernelContext* ctx, const T* input, int num_rows, input_indices_t.device(d) = input_indices_t.generate(ColumnIndexCreator(num_cols)); - cub::CountingInputIterator counting_iter(0); - cub::TransformInputIterator> + gpuprim::CountingInputIterator counting_iter(0); + gpuprim::TransformInputIterator> segment_offsets_t(counting_iter, SegmentOffsetCreator(num_cols)); Tensor temp_values; @@ -472,7 +482,7 @@ Status LaunchSortKernel(OpKernelContext* ctx, const T* input, int num_rows, sorted_values_ptr = temp_values.flat().data(); } - auto err = cub::DeviceSegmentedRadixSort::SortPairsDescending( + auto err = gpuprim::DeviceSegmentedRadixSort::SortPairsDescending( /* d_temp_storage */ nullptr, /* temp_storage_bytes */ temp_storage_bytes, /* d_keys_in */ input, @@ -489,7 +499,8 @@ Status LaunchSortKernel(OpKernelContext* ctx, const T* input, int num_rows, if (err != cudaSuccess) { return errors::Internal( ""TopKOp: Could not launch "" - ""cub::DeviceSegmentedRadixSort::SortPairsDescending to calculate "" + ""cub::gpuprim::DeviceSegmentedRadixSort::SortPairsDescending to "" + ""calculate "" ""temp_storage_bytes, status: "", cudaGetErrorString(err)); } @@ -497,7 +508,7 @@ Status LaunchSortKernel(OpKernelContext* ctx, const T* input, int num_rows, TF_RETURN_IF_ERROR(ctx->allocate_temp( DT_INT8, TensorShape({static_cast(temp_storage_bytes)}), &temp_storage)); - err = cub::DeviceSegmentedRadixSort::SortPairsDescending( + err = gpuprim::DeviceSegmentedRadixSort::SortPairsDescending( /* d_temp_storage */ temp_storage.flat().data(), /* temp_storage_bytes */ temp_storage_bytes, /* d_keys_in */ input, @@ -514,7 +525,8 @@ Status LaunchSortKernel(OpKernelContext* ctx, const T* input, int num_rows, if (err != cudaSuccess) { return errors::Internal( ""TopKOp: Could not launch "" - ""cub::DeviceSegmentedRadixSort::SortPairsDescending to sort input, "" + ""cub::gpuprim::DeviceSegmentedRadixSort::SortPairsDescending to sort "" + ""input, "" ""temp_storage_bytes: "", temp_storage_bytes, "", status: "", cudaGetErrorString(err)); } @@ -567,6 +579,6 @@ struct TopKFunctor { } // end namespace functor } // namespace tensorflow -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #endif // TENSORFLOW_CORE_KERNELS_TOPK_OP_GPU_H_ ",0,train d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_double.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU #include ""tensorflow/core/kernels/topk_op.h"" @@ -25,4 +25,4 @@ using Eigen::GpuDevice; template struct functor::TopKFunctor; } // namespace tensorflow -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM ",0,train d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_float.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU #include ""tensorflow/core/kernels/topk_op.h"" @@ -25,4 +25,4 @@ using Eigen::GpuDevice; template struct functor::TopKFunctor; } // namespace tensorflow -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM ",0,train d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_half.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU #include ""tensorflow/core/kernels/topk_op.h"" @@ -25,4 +25,4 @@ using Eigen::GpuDevice; template struct functor::TopKFunctor; } // namespace tensorflow -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM ",0,train d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_int16.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU #include ""tensorflow/core/kernels/topk_op.h"" @@ -25,4 +25,4 @@ using Eigen::GpuDevice; template struct functor::TopKFunctor; } // namespace tensorflow -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM ",0,train d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_int32.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU #include ""tensorflow/core/kernels/topk_op.h"" @@ -25,4 +25,4 @@ using Eigen::GpuDevice; template struct functor::TopKFunctor; } // namespace tensorflow -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM ",0,train d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_int64.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU #include ""tensorflow/core/kernels/topk_op.h"" @@ -25,4 +25,4 @@ using Eigen::GpuDevice; template struct functor::TopKFunctor; } // namespace tensorflow -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM ",0,train d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_int8.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU #include ""tensorflow/core/kernels/topk_op.h"" @@ -25,4 +25,4 @@ using Eigen::GpuDevice; template struct functor::TopKFunctor; } // namespace tensorflow -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM ",0,train d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_uint16.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU #include ""tensorflow/core/kernels/topk_op.h"" @@ -27,4 +27,4 @@ template struct functor::TopKFunctor; } // namespace tensorflow -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM ",0,train d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_uint8.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU #include ""tensorflow/core/kernels/topk_op.h"" @@ -25,4 +25,4 @@ using Eigen::GpuDevice; template struct functor::TopKFunctor; } // namespace tensorflow -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM ",0,train d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_test.py,"@@ -102,11 +102,13 @@ class TopKTest(test.TestCase): self._validateTopK(inputs, 2, [[0.4, 0.3], [0.4, 0.3]], [[3, 1], [2, 1]]) def testTop3(self): - k = 5 - inputs = np.random.permutation(np.linspace(0, 100, 6140, dtype=np.float64)) - indices = np.argsort(-inputs)[:k] - values = -np.sort(-inputs)[:k] - self._validateTopK(inputs, k, values, indices) + for k in range(3, 11, 2): + for dim in range(512, 12288, 512): + inputs = np.random.permutation(np.linspace(0, 100, dim, + dtype=np.float64)) + indices = np.argsort(-inputs)[:k] + values = -np.sort(-inputs)[:k] + self._validateTopK(inputs, k, values, indices) def testTop1AllNan(self): inputs = [[np.NaN, np.NaN], [np.NaN, np.NaN]] ",0,train b758593dc6e0b88d704233a3ab8ae6c28d54575d,tensorflow/tensorflow,"Rollback of #37837 PiperOrigin-RevId: 304698537 Change-Id: Ib68e9dcc719add6091e2a7af1b6d15d8c6aadf03",def_function.py,"@@ -422,19 +422,6 @@ class Function(object): self._input_signature = input_signature self._call_counter = _CallCounter(FREQUENT_TRACING_WARNING_MAX_CALL_HISTORY) - def __getstate__(self): - """"""Custom pickling, to omit unpickleable objects."""""" - result = self.__dict__.copy() - del result[""_lock""] - del result[""_descriptor_cache""] - return result - - def __setstate__(self, state): - """"""Restore from pickled state."""""" - self.__dict__ = state - self._lock = threading.Lock() - self._descriptor_cache = weakref.WeakKeyDictionary() - def _defun_with_scope(self, scope): """"""Creates a defun wrapped inside a variable creator scope."""""" ",0,test b758593dc6e0b88d704233a3ab8ae6c28d54575d,tensorflow/tensorflow,"Rollback of #37837 PiperOrigin-RevId: 304698537 Change-Id: Ib68e9dcc719add6091e2a7af1b6d15d8c6aadf03",def_function_test.py,"@@ -19,7 +19,6 @@ from __future__ import print_function import functools import itertools -import pickle import re import weakref @@ -69,10 +68,6 @@ class _ModelWithOptimizer(training.Model): return {'loss': loss} -def undecorated_function(x): - return x * 3. - - class _HasDecoratedMethod(object): @def_function.function @@ -752,41 +747,6 @@ class DefFunctionTest(test.TestCase, parameterized.TestCase): # If the graph is deleted, then an exception is raised on reading `captures` self.assertEmpty(graph.captures) - @parameterized.parameters(*itertools.product( - (None, (tensor_spec.TensorSpec([]),)), # input_signature - (True, False), # autograph - (None, converter.Feature.ALL), # autograph_options - (None, 'foo.bar'), # implements - (None, True, False), # relax_shapes - )) - def test_pickle(self, input_signature, autograph, autograph_options, - implements, relax_shapes): - """"""@function objects can be pickled and unpickled."""""" - # Can't pickle functions in __main__: - from tensorflow.python.eager.def_function_test import undecorated_function - original_py_function = undecorated_function - - func = def_function.function( - func=original_py_function, - input_signature=input_signature, - autograph=autograph, - experimental_implements=implements, - experimental_autograph_options=autograph_options, - experimental_relax_shapes=relax_shapes, - ) - - cloned = pickle.loads(pickle.dumps(func)) - - self.assertEqual(func._name, cloned._name) - self.assertEqual(input_signature, cloned._input_signature) - self.assertEqual(autograph, cloned._autograph) - self.assertEqual(implements, cloned._implements) - self.assertEqual(autograph_options, cloned._experimental_autograph_options) - self.assertEqual(relax_shapes, cloned._experimental_relax_shapes) - - x = array_ops.ones([]) - self.assertEqual(self.evaluate(cloned(x)), self.evaluate(func(x))) - if __name__ == '__main__': ops.enable_eager_execution() ",0,test 24b2d0252bba21953e1921d8bccf850cbdfbcb09,tensorflow/tensorflow,"Add unidirectional sequence rnn op_def to graphdef_to_flatbuffer and also add a e2e test. PiperOrigin-RevId: 297050448 Change-Id: Ifa7249a5e4585f61ea9833f11ea28a9f2f9e0363",graphdef_to_tfl_flatbuffer.cc,"@@ -87,6 +87,17 @@ const char kUnidirectionalSequenceLstmOp[] = ""'LastState' type: DT_FLOAT } output_arg: { name: 'Output' type: DT_FLOAT} "" ""attr : { name: '_tflite_input_indices' type: 'list(int)'}""; +const char kUnidirectionalSequenceRnnOp[] = + ""name: 'UnidirectionalSequenceRnn' input_arg: {name: 'Input' type: "" + ""DT_FLOAT} input_arg: { name: 'Weights' type: DT_FLOAT } "" + ""input_arg: { name: 'RecurrentWeights' type: DT_FLOAT } input_arg: { "" + ""name: 'Bias' type: DT_FLOAT} "" + ""input_arg: { name: 'HiddenState' type: DT_FLOAT} "" + ""output_arg: { name: "" + ""'LastState' type: DT_FLOAT } output_arg: { name: 'Output' type: "" + ""DT_FLOAT} "" + ""attr : { name: '_tflite_input_indices' type: 'list(int)'}""; + // Converts the toco::IODataType to tensorflow::DataType. Only contains the // conversion mapping for constants defined in TFLite Python API. DataType ConvertIODataTypeToDataType(toco::IODataType dtype) { @@ -285,6 +296,7 @@ Status ConvertGraphDefToTFLiteFlatBuffer(const toco::ModelFlags& model_flags, toco_flags.custom_opdefs().end()); extra_tf_opdefs.push_back(kDetectionPostProcessOp); extra_tf_opdefs.push_back(kUnidirectionalSequenceLstmOp); + extra_tf_opdefs.push_back(kUnidirectionalSequenceRnnOp); TF_RETURN_IF_ERROR(RegisterCustomBuiltinOps(extra_tf_opdefs)); TF_ASSIGN_OR_RETURN( ",0,test 24b2d0252bba21953e1921d8bccf850cbdfbcb09,tensorflow/tensorflow,"Add unidirectional sequence rnn op_def to graphdef_to_flatbuffer and also add a e2e test. PiperOrigin-RevId: 297050448 Change-Id: Ifa7249a5e4585f61ea9833f11ea28a9f2f9e0363",unidirectional_sequence_rnn_test.py,"@@ -249,6 +249,10 @@ class UnidirectionalSequenceRnnTest(test_util.TensorFlowTestCase): result = self.tfliteInvoke(new_sess, test_inputs, x, output_class, False) self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-2)) + # Test MLIR-converted model. + result = self.tfliteInvoke(new_sess, test_inputs, x, output_class, True) + self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-2)) + @test_util.enable_control_flow_v2 def testDynamicRnnMultiRnnCell(self): sess = tf.compat.v1.Session(config=CONFIG) @@ -269,6 +273,10 @@ class UnidirectionalSequenceRnnTest(test_util.TensorFlowTestCase): result = self.tfliteInvoke(new_sess, test_inputs, x, output_class, False) self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-2)) + # Test MLIR-converted model. + result = self.tfliteInvoke(new_sess, test_inputs, x, output_class, True) + self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-2)) + if __name__ == ""__main__"": test.main() ",0,test 3f1d5d5670e42ea4b01795c787e847dc799b9e54,tensorflow/tensorflow,"Fixes issues with the `tf.train.Saver` in the 0.7.0 release. 1. The cifar10_train.py example model was emitting warnings, because of non-Variable objects in the `tf.moving_average_variables()` collection. This change fixes that by only adding `Variable`-typed objects to that collection in `moving_averages.py` (which better agrees with the definition in `tf.GraphKeys.MOVING_AVERAGES_VARIABLES`). 2. Saver.save() now calls `tf.gfile.MakeDirs(os.path.dirname(save_path))`, which fails if `save_path` does not contain a directory component. This change fixes the implementation of `tf.gfile.MakeDirs('')` to be a no-op (which better matches the internal library that it is shadowing). Fixes #1123. Fixes #1135. Change: 114895020",_gfile.py,"@@ -282,17 +282,16 @@ def MakeDirs(path, mode=0o755): # pylint: disable=invalid-name """"""Recursively create the directory ""path"" with the given mode. Args: - path: The directory path + path: The directory path. mode: The file mode for the created directories - Returns: - None - - Raises: OSError: if the path already exists """""" - os.makedirs(path, mode) + # NOTE(mrry): MakeDirs("""") should be a no-op to match other + # implementations of tf.gfile. + if path: + os.makedirs(path, mode) def RmDir(directory): # pylint: disable=invalid-name ",0,train 3f1d5d5670e42ea4b01795c787e847dc799b9e54,tensorflow/tensorflow,"Fixes issues with the `tf.train.Saver` in the 0.7.0 release. 1. The cifar10_train.py example model was emitting warnings, because of non-Variable objects in the `tf.moving_average_variables()` collection. This change fixes that by only adding `Variable`-typed objects to that collection in `moving_averages.py` (which better agrees with the definition in `tf.GraphKeys.MOVING_AVERAGES_VARIABLES`). 2. Saver.save() now calls `tf.gfile.MakeDirs(os.path.dirname(save_path))`, which fails if `save_path` does not contain a directory component. This change fixes the implementation of `tf.gfile.MakeDirs('')` to be a no-op (which better matches the internal library that it is shadowing). Fixes #1123. Fixes #1135. Change: 114895020",gfile_test.py,"@@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import contextlib import os import shutil import time @@ -148,6 +149,22 @@ class FunctionTests(_BaseTest, googletest.TestCase): gfile.DeleteRecursively(self.tmp + ""test_dir"") self.assertFalse(gfile.Exists(self.tmp + ""test_dir"")) + @contextlib.contextmanager + def _working_directory(self, wd): + original_cwd = os.getcwd() + os.chdir(wd) + try: + yield + finally: + os.chdir(original_cwd) + + def testMakeDirsWithEmptyString(self): + gfile.MakeDirs(self.tmp + ""test_dir"") + with self._working_directory(self.tmp + ""test_dir""): + gfile.MakeDirs("""") + # Should succeed because MakeDirs("""") is a no-op. + gfile.RmDir(self.tmp + ""test_dir"") + def testErrors(self): self.assertRaises( OSError, lambda: gfile.RmDir(self.tmp + ""dir_doesnt_exist"")) ",0,train 3f1d5d5670e42ea4b01795c787e847dc799b9e54,tensorflow/tensorflow,"Fixes issues with the `tf.train.Saver` in the 0.7.0 release. 1. The cifar10_train.py example model was emitting warnings, because of non-Variable objects in the `tf.moving_average_variables()` collection. This change fixes that by only adding `Variable`-typed objects to that collection in `moving_averages.py` (which better agrees with the definition in `tf.GraphKeys.MOVING_AVERAGES_VARIABLES`). 2. Saver.save() now calls `tf.gfile.MakeDirs(os.path.dirname(save_path))`, which fails if `save_path` does not contain a directory component. This change fixes the implementation of `tf.gfile.MakeDirs('')` to be a no-op (which better matches the internal library that it is shadowing). Fixes #1123. Fixes #1135. Change: 114895020",moving_averages.py,"@@ -269,12 +269,14 @@ class ExponentialMovingAverage(object): avg = slot_creator.create_slot( var, var.initialized_value(), self._name, colocate_with_primary=True) + # NOTE(mrry): We only add `tf.Variable` objects to the + # `MOVING_AVERAGE_VARIABLES` collection. + ops.add_to_collection(ops.GraphKeys.MOVING_AVERAGE_VARIABLES, var) else: avg = slot_creator.create_zeros_slot( var, self._name, colocate_with_primary=(var.op.type == ""Variable"")) self._averages[var] = avg - ops.add_to_collection(ops.GraphKeys.MOVING_AVERAGE_VARIABLES, var) with ops.name_scope(self._name) as scope: decay = ops.convert_to_tensor(self._decay, name=""decay"") ",0,train 3f1d5d5670e42ea4b01795c787e847dc799b9e54,tensorflow/tensorflow,"Fixes issues with the `tf.train.Saver` in the 0.7.0 release. 1. The cifar10_train.py example model was emitting warnings, because of non-Variable objects in the `tf.moving_average_variables()` collection. This change fixes that by only adding `Variable`-typed objects to that collection in `moving_averages.py` (which better agrees with the definition in `tf.GraphKeys.MOVING_AVERAGES_VARIABLES`). 2. Saver.save() now calls `tf.gfile.MakeDirs(os.path.dirname(save_path))`, which fails if `save_path` does not contain a directory component. This change fixes the implementation of `tf.gfile.MakeDirs('')` to be a no-op (which better matches the internal library that it is shadowing). Fixes #1123. Fixes #1135. Change: 114895020",moving_averages_test.py,"@@ -87,6 +87,8 @@ class ExponentialMovingAverageTest(tf.test.TestCase): avg1 = ema.average(var1) avg2 = ema.average(tensor2) + self.assertItemsEqual([var0, var1], tf.moving_average_variables()) + self.assertFalse(avg0 in tf.trainable_variables()) self.assertFalse(avg1 in tf.trainable_variables()) self.assertFalse(avg2 in tf.trainable_variables()) ",0,train 09b8ed34f47dbd6921304f2d4ceb3669c1e089e6,tensorflow/tensorflow,"Add @ebrevdo's temporary fix for int32 overflow issue, and add a test case for it Fix imports",core.py,"@@ -26,6 +26,7 @@ import warnings import numpy as np from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -580,9 +581,21 @@ class Flatten(Layer): permutation.append(1) inputs = array_ops.transpose(inputs, perm=permutation) - outputs = array_ops.reshape( - inputs, (tensor_shape.dimension_value(inputs.shape[0]) or - array_ops.shape(inputs)[0], -1)) + input_shape = inputs.shape + if input_shape[1:].is_fully_defined(): + flattened_dim = tensor_shape.dimension_value( + np.prod(input_shape[1:], dtype=int)) + # Temporary fix for integer overflow issue. + if flattened_dim > np.iinfo(np.int32).max: + shape_dtype = dtypes.int64 + else: + shape_dtype = dtypes.int32 + outputs = array_ops.reshape( + inputs, constant_op.constant((-1, flattened_dim), shape_dtype)) + else: + outputs = array_ops.reshape( + inputs, (tensor_shape.dimension_value(inputs.shape[0]) or + array_ops.shape(inputs)[0], -1)) if not context.executing_eagerly(): outputs.set_shape(self.compute_output_shape(inputs.shape)) return outputs ",0,train 09b8ed34f47dbd6921304f2d4ceb3669c1e089e6,tensorflow/tensorflow,"Add @ebrevdo's temporary fix for int32 overflow issue, and add a test case for it Fix imports",core_test.py,"@@ -556,6 +556,12 @@ class FlattenTest(test.TestCase): self.assertEqual(list(np_output.shape), [5, 6]) self.assertEqual(y.get_shape().as_list(), [5, None]) + @test_util.run_deprecated_v1 + def testFlattenLargeDim(self): + x = array_ops.placeholder(shape=(None, 21316, 21316, 80), dtype='float32') + y = core_layers.Flatten()(x) + self.assertEqual(y.shape.as_list(), [None, 21316 * 21316 * 80]) + if __name__ == '__main__': test.main() ",0,train 5220e565b7cc32a5f757896c76c7d57c33bcd323,tensorflow/tensorflow,"Don't use tensorflow::Edge after freeing it Even with this bug we were accidentally doing the right thing (so the test case doesn't actually fail without the fix): deleting an Edge sets its input and output indices to kControlSlot-1 so we'd normally expect to fail when there is a control edge out of the TF cluster (because a control edge would be recognized as a data edge). But AddEdge(x, -1, y, -1) seems to do the right thing for both control and data edges. PiperOrigin-RevId: 214831204",build_xla_ops_pass.cc,"@@ -112,16 +112,9 @@ static void MoveOutgoingEdges(Graph* g, Node* old_node, Node* new_node) { std::vector out_edges(old_node->out_edges().begin(), old_node->out_edges().end()); for (const Edge* edge : out_edges) { - Node* dst = edge->dst(); - int src_output = edge->src_output(); - int dst_input = edge->dst_input(); + // TODO(sanjoy): This does not update NodeDef inputs. + g->AddEdge(new_node, edge->src_output(), edge->dst(), edge->dst_input()); g->RemoveEdge(edge); - - if (edge->IsControlEdge()) { - g->AddControlEdge(new_node, dst); - } else { - g->AddEdge(new_node, src_output, dst, dst_input); - } } } ",0,train 5220e565b7cc32a5f757896c76c7d57c33bcd323,tensorflow/tensorflow,"Don't use tensorflow::Edge after freeing it Even with this bug we were accidentally doing the right thing (so the test case doesn't actually fail without the fix): deleting an Edge sets its input and output indices to kControlSlot-1 so we'd normally expect to fail when there is a control edge out of the TF cluster (because a control edge would be recognized as a data edge). But AddEdge(x, -1, y, -1) seems to do the right thing for both control and data edges. PiperOrigin-RevId: 214831204",build_xla_ops_pass_test.cc,"@@ -0,0 +1,112 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/compiler/jit/build_xla_ops_pass.h"" + +#include ""tensorflow/cc/framework/ops.h"" +#include ""tensorflow/cc/ops/array_ops.h"" +#include ""tensorflow/cc/ops/resource_variable_ops.h"" +#include ""tensorflow/cc/ops/standard_ops.h"" +#include ""tensorflow/compiler/jit/defs.h"" +#include ""tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"" +#include ""tensorflow/compiler/jit/node_matchers.h"" +#include ""tensorflow/core/graph/algorithm.h"" +#include ""tensorflow/core/grappler/optimizers/data/graph_utils.h"" +#include ""tensorflow/core/lib/core/status_test_util.h"" +#include ""tensorflow/core/platform/test.h"" + +namespace tensorflow { +namespace { + +using ::tensorflow::testing::FindNodeByName; +using ::tensorflow::testing::matchers::CtrlDeps; +using ::tensorflow::testing::matchers::NodeWith; +using ::tensorflow::testing::matchers::Op; + +Status BuildXlaOps(const Scope& s, std::unique_ptr* result) { + auto graph = absl::make_unique(OpRegistry::Global()); + TF_RETURN_IF_ERROR(s.ToGraph(graph.get())); + + // Assign all nodes to the CPU device. + static const char* kCpuDevice = ""/job:localhost/replica:0/task:0/cpu:0""; + for (Node* n : graph->nodes()) { + if (n->assigned_device_name().empty()) { + n->set_assigned_device_name(kCpuDevice); + } + } + + GraphOptimizationPassOptions opt_options; + opt_options.graph = &graph; + BuildXlaOpsPass pass; + TF_RETURN_IF_ERROR(pass.Run(opt_options)); + *result = std::move(graph); + return Status::OK(); +} + +Status MakeXlaCompiledKernel(Graph* graph, const string& callee_name, + const string& node_name, Node** result) { + NodeDef call_node; + call_node.set_name(node_name); + call_node.set_op(callee_name); + AddNodeAttr(kXlaCompiledKernelAttr, true, &call_node); + AddNodeAttr(kXlaNumConstantArgsAttr, 0, &call_node); + AddNodeAttr(kXlaNumResourceArgsAttr, 0, &call_node); + Status s; + *result = graph->AddNode(call_node, &s); + return s; +} + +Node* MakeWrite(const Scope& scope, const string& id) { + Output var_handle = + ops::VarHandleOp(scope.WithOpName(""Var"" + id), DT_FLOAT, TensorShape({})); + Output value_to_write = + ops::Const(scope.WithOpName(""ValueToAssign"" + id), 1.0f); + ops::AssignVariableOp assign_op(scope.WithOpName(""Assignee"" + id), var_handle, + value_to_write); + return assign_op.operation.node(); +} + +FunctionDefLibrary CreateFunctionDefLibWithConstFunction(const string& name) { + FunctionDefLibrary flib_def; + FunctionDef func = FunctionDefHelper::Create( + /*function_name=*/name, /*in_def=*/{}, /*out_def=*/{""out: float""}, + /*attr_def*/ + {}, /*node_def=*/{FunctionDefHelper::Const(""one"", 1.0f)}, + /*ret_def=*/{{""out"", ""out:output:0""}}); + *flib_def.add_function() = std::move(func); + return flib_def; +} + +TEST(BuildXlaOps, ControlDepsPreserved) { + Scope root = Scope::NewRootScope().ExitOnError(); + + FunctionDefLibrary flib_def = + CreateFunctionDefLibWithConstFunction(""cluster_0""); + TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def)); + Node* call; + TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), ""cluster_0"", ""C"", &call)); + Node* write_op = MakeWrite(root, ""write""); + root.graph()->AddControlEdge(call, write_op); + + std::unique_ptr graph; + TF_ASSERT_OK(BuildXlaOps(root, &graph)); + + Node* write_op_new = FindNodeByName(graph.get(), write_op->name()); + ASSERT_NE(write_op_new, nullptr); + EXPECT_THAT(write_op_new, NodeWith(CtrlDeps(NodeWith(Op(""_XlaRun""))))); +} + +} // namespace +} // namespace tensorflow ",0,train 2248a3488c53f8b858e2a0b8be93d62c3056df36,tensorflow/tensorflow,"[XLA] Don't call Literal::Get in HloEvaluator's convolution loop. This speeds up the implementation of conv because Literal::Get calls Literal::Piece::data, which is relatively slow. Instead, we call Literal::Data() once and cache the result. Before: ConvolutionTest/0.StridedFilter (59094 ms) After: ConvolutionTest/0.StridedFilter (41812 ms) Speedup: 59/42 = 1.4x PiperOrigin-RevId: 191830741",hlo_evaluator.cc,"@@ -1003,6 +1003,9 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { DimensionVector rhs_index(rhs_rank); DimensionVector rhs_spatial_index(dnums.kernel_spatial_dimensions_size()); + auto lhs_literal_data = lhs_literal.data(); + auto rhs_literal_data = rhs_literal.data(); + auto func = [&](ArraySlice out_index) { ElementwiseT result_val = static_cast(0); @@ -1062,9 +1065,13 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { : rhs_spatial_index[ki]; } - result_val += - static_cast(lhs_literal.Get(lhs_index)) * - static_cast(rhs_literal.Get(rhs_index)); + auto lhs_elem = static_cast( + lhs_literal_data[IndexUtil::MultidimensionalIndexToLinearIndex( + lhs_shape, lhs_index)]); + auto rhs_elem = static_cast( + rhs_literal_data[IndexUtil::MultidimensionalIndexToLinearIndex( + rhs_shape, rhs_index)]); + result_val += lhs_elem * rhs_elem; } cnt : {} } while (IndexUtil::BumpIndices(window_shape, &rhs_spatial_index)); ",0,test 9707af5cb390eea0dd8f14a270911de8ad819bfd,tensorflow/tensorflow,"Int32 support for Empty op on GPU. PiperOrigin-RevId: 236790821",inplace_ops.cc,"@@ -543,6 +543,7 @@ REGISTER_EMPTY(float, GPU); REGISTER_EMPTY(double, GPU); REGISTER_EMPTY(Eigen::half, GPU); REGISTER_EMPTY(int64, GPU); +REGISTER_EMPTY(int32, GPU); #endif // GOOGLE_CUDA ",0,train f1e0098f2a702c8cbce80ece16f2c0aa23942fd5,tensorflow/tensorflow,"Dropping tests for non-EagerService code path in remote_cluster_test. PiperOrigin-RevId: 302922575 Change-Id: I8b335e1f0ff8ed5a3f47b1fbc77302e500d7de37",remote_cluster_test.py,"@@ -495,28 +495,6 @@ class DynamicClusterTest(test.TestCase, parameterized.TestCase): context.check_alive(""/job:remote_device/replica:0/task:10"") -class DynamicClusterWithoutLazyRemoteInputsCopyTest(DynamicClusterTest): - - @classmethod - def setUpClass(cls): - super(DynamicClusterWithoutLazyRemoteInputsCopyTest, cls).setUpClass() - context._reset_context() - context.context().lazy_remote_inputs_copy = False - - @classmethod - def tearDownClass(cls): - super(DynamicClusterWithoutLazyRemoteInputsCopyTest, cls).tearDownClass() - context._reset_context() - context.context().lazy_remote_inputs_copy = True - - # TODO(haoyuzhang): When lazyh remote inputs copy is disabled, we use the - # WorkerService RunGraph request to execute component functions in distributed - # function execution. We currently do not have access control in WorkerService - # to allow concurrent cluster update and function execution. - def testMultiThreadPendingNodesLockFree(self): - self.skipTest(""Unsupported case"") - - if __name__ == ""__main__"": ops.enable_eager_execution() test.main() ",0,train 3258ebf5e18e898a11f9d2bde25efd3224738e43,tensorflow/tensorflow,"Reuse the rendezvous provided by the OpKernelContext for PartitionedCallOp. This will allow send/recv across different tf.functions. PiperOrigin-RevId: 313267770 Change-Id: I28fb8e43cb7b3374feeca9b0f203a968a338ec9e",partitioned_function_ops.cc,"@@ -245,7 +245,6 @@ void PartitionedCallOp::RunFunction(FunctionLibraryRuntime::Handle handle, run_opts.source_device = lib->device() == nullptr ? """" : lib->device()->name(); run_opts.allow_dead_tensors = true; - run_opts.rendezvous = ctx->rendezvous(); std::vector* rets = new std::vector; const string& func_name = func_->name(); ",0,train e232013764faa5c8926b562c2b1c61594d705ebe,tensorflow/tensorflow,Forgot to mark before setting type,convert_nodes.cc,"@@ -977,12 +977,12 @@ Status Converter::RenameAndMarkOutputTensors( tensor = layer->getOutput(0); } tensor->setName(output.dest_node_name.c_str()); + network()->markOutput(*tensor); // Set type after marking as output. TRT only supports setType for engine // outputs and inputs (type is inferred otherwise). tensor->setType(output.trt_dtype); VLOG(1) << ""Marking output TRT tensor "" << output.source_tensor_name << "", which feeds TF node "" << output.dest_node_name; - network()->markOutput(*tensor); } return Status::OK(); } ",0,train 2229ae89c927b46355a15e8af22365d24afc25bf,tensorflow/tensorflow,"Use group_id as step_id. PiperOrigin-RevId: 317353238 Change-Id: If52b2b4872c92d3f65af8f6ce1651e8c6da7dae7",xplane_to_memory_profile.cc,"@@ -42,6 +42,8 @@ namespace profiler { namespace { +constexpr int64 kInvalidStepId = -1; + // Index of the time-sorted memory_profile_snapshots list, and the // MemoryActivityMetadata proto it contains. using IndexMetaPair = std::pair; @@ -63,7 +65,7 @@ struct ActivityMetadata { int64 allocation_bytes = 0; uint64 address = 0; absl::string_view tf_op_name; - int64 step_id = -1; + int64 step_id = kInvalidStepId; absl::string_view region_type; int64 data_type = 0; absl::string_view tensor_shape; @@ -129,7 +131,6 @@ void UpdateProfileSummary(const AggregationStats& stats, int64 time_offset_ps, MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) { XPlaneVisitor plane = CreateTfXPlaneVisitor(host_trace); MemoryProfile memory_profile; - auto* step_count = memory_profile.mutable_step_count(); // Iterate over all XEvents in the XPlane, and add the XStats to a new // MemoryProfileSnapshot if the EventType is kMemoryAllocation or // kMemoryDeallocation. @@ -181,9 +182,8 @@ MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) { case StatType::kTfOp: metadata.tf_op_name = stat.StrOrRefValue(); break; - case StatType::kStepId: + case StatType::kGroupId: metadata.step_id = stat.IntValue(); - if (metadata.step_id != 0) (*step_count)[metadata.step_id]++; break; case StatType::kRegionType: metadata.region_type = stat.StrOrRefValue(); @@ -214,40 +214,21 @@ MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) { return memory_profile; } -// Sequentialize step ids for the memory profile. -void UpdateStepId(const tensorflow::protobuf::Map< - tensorflow::protobuf_int64 /*orig_step_id*/, - tensorflow::protobuf_int64 /*count*/>& step_count, - PerAllocatorMemoryProfile* memory_profile) { - // Map from original random step id to sequential step id. - absl::flat_hash_map step_map; - constexpr int kUnknownStep = -2; - constexpr double kStepFilterRatio = 0.1; // Magic number for filtering. - tensorflow::protobuf_int64 max_step_count = 0; - for (const auto& step_and_count : step_count) { - max_step_count = std::max(max_step_count, step_and_count.second); - } - // Filter out noisy and incomplete original step ids. - for (const auto& step_and_count : step_count) { - if (static_cast(step_and_count.second) / max_step_count > - kStepFilterRatio) { - step_map[step_and_count.first] = kUnknownStep; - } - } - - // Update the step ids in memory_profile for this allocator. - int64 step_id = -1; +// Fix invalid step ids of snapshots at the beginning/end of the profile or at +// the step boundaries. The snapshots with invalid step ids at the beginning get +// 0 for their step ids. Those at the step boundaries or at the end get the +// previous snapshot's step id + 1. +void UpdateStepId(PerAllocatorMemoryProfile* memory_profile) { + int64 last_valid_step_id = -1; + // Snapshots are already sorted in time. for (auto& snapshot : *memory_profile->mutable_memory_profile_snapshots()) { DCHECK(snapshot.has_activity_metadata()); - // Convert the random step id to sequential step id. - int64 orig_step_id = snapshot.activity_metadata().step_id(); - if (step_map.contains(orig_step_id) && - step_map[orig_step_id] == kUnknownStep) { - step_map[orig_step_id] = ++step_id; + if (snapshot.mutable_activity_metadata()->step_id() == kInvalidStepId) { + snapshot.mutable_activity_metadata()->set_step_id(last_valid_step_id + 1); + } else { + last_valid_step_id = snapshot.mutable_activity_metadata()->step_id(); } - snapshot.mutable_activity_metadata()->set_step_id(step_id); } - VLOG(2) << ""Max sequential step id in profile: "" << step_id; } // Update the MemoryActivityMetadata for each deallocation event by copying from @@ -481,14 +462,14 @@ void ProcessMemoryProfileProto(int64 max_num_snapshots, return a.time_offset_ps() < b.time_offset_ps(); }); - UpdateStepId(memory_profile->step_count(), allocator_memory_profile); + UpdateStepId(allocator_memory_profile); UpdateDeallocation(allocator_memory_profile); - int64 peak_bytes_profile = allocator_memory_profile->profile_summary() - .peak_stats() - .peak_bytes_in_use(); int64 peak_step_id = - GetPeakMemoryStep(peak_bytes_profile, allocator_memory_profile); + GetPeakMemoryStep(allocator_memory_profile->profile_summary() + .peak_stats() + .peak_bytes_in_use(), + allocator_memory_profile); ProcessActiveAllocations(peak_step_id, allocator_memory_profile); SampleSnapshots(max_num_snapshots, snapshots); } ",0,train 2229ae89c927b46355a15e8af22365d24afc25bf,tensorflow/tensorflow,"Use group_id as step_id. PiperOrigin-RevId: 317353238 Change-Id: If52b2b4872c92d3f65af8f6ce1651e8c6da7dae7",xplane_to_memory_profile.h,"@@ -25,6 +25,7 @@ namespace profiler { // Process the host threads XPlane and generate MemoryProfile result; at most // max_num_snapshots will be displayed on the UI. +// REQUIRED: host_plane should have been grouped by calling GroupTfEvents(). MemoryProfile ConvertXPlaneToMemoryProfile(const XPlane& host_plane, int64 max_num_snapshots = 1000); ",0,train 2229ae89c927b46355a15e8af22365d24afc25bf,tensorflow/tensorflow,"Use group_id as step_id. PiperOrigin-RevId: 317353238 Change-Id: If52b2b4872c92d3f65af8f6ce1651e8c6da7dae7",xplane_to_memory_profile_test.cc,"@@ -20,6 +20,7 @@ limitations under the License. #include ""tensorflow/core/platform/types.h"" #include ""tensorflow/core/profiler/protobuf/memory_profile.pb.h"" #include ""tensorflow/core/profiler/protobuf/xplane.pb.h"" +#include ""tensorflow/core/profiler/utils/group_events.h"" #include ""tensorflow/core/profiler/utils/xplane_builder.h"" #include ""tensorflow/core/profiler/utils/xplane_schema.h"" #include ""tensorflow/core/profiler/utils/xplane_test_utils.h"" @@ -84,11 +85,11 @@ TEST(ConvertXPlaneToMemoryProfile, OneAllocatorMultiActivitiesTest) { {StatType::kRegionType, ""temp""}, {StatType::kTensorShapes, ""[1, 2]""}}); + tensorflow::profiler::GroupTfEvents(&space, nullptr); MemoryProfile memory_profile = ConvertXPlaneToMemoryProfile(*host_plane); EXPECT_EQ(memory_profile.memory_profile_per_allocator().size(), 1); EXPECT_EQ(memory_profile.num_hosts(), 1); EXPECT_EQ(memory_profile.memory_ids_size(), 1); - EXPECT_EQ(memory_profile.step_count().size(), 1); EXPECT_EQ(memory_profile.memory_profile_per_allocator().begin()->first, ""GPU_0_bfc""); const auto& allocator_memory_profile = ",0,train 3b5ada30c14d35d6fbf0aeaaee898c5ff65b008c,tensorflow/tensorflow,"Add ability to disable zero-debiasing in ExponentialMovingAverage, for the purpose of backwards compatibility to support old checkpoints. For now, set this default value to avoid debiasing. Change: 140613316",moving_averages.py,"@@ -288,7 +288,8 @@ class ExponentialMovingAverage(object): @@variables_to_restore """""" - def __init__(self, decay, num_updates=None, name=""ExponentialMovingAverage""): + def __init__(self, decay, num_updates=None, zero_debias=False, + name=""ExponentialMovingAverage""): """"""Creates a new ExponentialMovingAverage object. The `apply()` method has to be called to create shadow variables and add @@ -305,11 +306,14 @@ class ExponentialMovingAverage(object): Args: decay: Float. The decay to use. num_updates: Optional count of number of updates applied to variables. + zero_debias: If `True`, zero debias moving-averages that are initialized + with tensors. name: String. Optional prefix name to use for the name of ops added in `apply()`. """""" self._decay = decay self._num_updates = num_updates + self._zero_debias = zero_debias self._name = name self._averages = {} @@ -373,7 +377,8 @@ class ExponentialMovingAverage(object): var, self._name, colocate_with_primary=(var.op.type == ""Variable"")) - zero_debias_true.add(avg) + if self._zero_debias: + zero_debias_true.add(avg) self._averages[var] = avg with ops.name_scope(self._name) as scope: ",0,train 3b5ada30c14d35d6fbf0aeaaee898c5ff65b008c,tensorflow/tensorflow,"Add ability to disable zero-debiasing in ExponentialMovingAverage, for the purpose of backwards compatibility to support old checkpoints. For now, set this default value to avoid debiasing. Change: 140613316",moving_averages_test.py,"@@ -89,6 +89,11 @@ def _Repeat(value, dim): class ExponentialMovingAverageTest(tf.test.TestCase): def _CheckDecay(self, ema, actual_decay, dim): + def _Scale(dk, steps): + if ema._zero_debias: + return 1 - dk ** (steps + 1) + else: + return 1 tens = _Repeat(10.0, dim) thirties = _Repeat(30.0, dim) var0 = tf.Variable(tens, name=""v0"") @@ -133,7 +138,7 @@ class ExponentialMovingAverageTest(tf.test.TestCase): self.assertAllClose(expected, avg0.eval()) expected = _Repeat(30.0 * dk + 30.0 * (1 - dk), dim) self.assertAllClose(expected, avg1.eval()) - expected = _Repeat(0.0 * dk + (10.0 + 30.0) * (1 - dk) / (1 - dk ** 2), dim) + expected = _Repeat(0.0 * dk + (10.0 + 30.0) * (1 - dk) / _Scale(dk, 1), dim) self.assertAllClose(expected, avg2.eval()) # Again, update the averages and check. @@ -145,7 +150,7 @@ class ExponentialMovingAverageTest(tf.test.TestCase): dim) self.assertAllClose(expected, avg1.eval()) expected = _Repeat(((0.0 * dk + (10.0 + 30.0) * (1 - dk)) * dk + - (10.0 + 30.0) * (1 - dk)) / (1 - dk ** 3), + (10.0 + 30.0) * (1 - dk)) / _Scale(dk, 2), dim) self.assertAllClose(expected, avg2.eval()) @@ -154,23 +159,47 @@ class ExponentialMovingAverageTest(tf.test.TestCase): ema = tf.train.ExponentialMovingAverage(0.25) self._CheckDecay(ema, actual_decay=0.25, dim=1) + def testAverageVariablesNoNumUpdates_Scalar_Debias(self): + with self.test_session(): + ema = tf.train.ExponentialMovingAverage(0.25, zero_debias=True) + self._CheckDecay(ema, actual_decay=0.25, dim=1) + def testAverageVariablesNoNumUpdates_Vector(self): with self.test_session(): ema = tf.train.ExponentialMovingAverage(0.25) self._CheckDecay(ema, actual_decay=0.25, dim=5) + def testAverageVariablesNoNumUpdates_Vector_Debias(self): + with self.test_session(): + ema = tf.train.ExponentialMovingAverage(0.25, zero_debias=True) + self._CheckDecay(ema, actual_decay=0.25, dim=5) + def testAverageVariablesNumUpdates_Scalar(self): with self.test_session(): # With num_updates 1, the decay applied is 0.1818 ema = tf.train.ExponentialMovingAverage(0.25, num_updates=1) self._CheckDecay(ema, actual_decay=0.181818, dim=1) + def testAverageVariablesNumUpdates_Scalar_Debias(self): + with self.test_session(): + # With num_updates 1, the decay applied is 0.1818 + ema = tf.train.ExponentialMovingAverage( + 0.25, num_updates=1, zero_debias=True) + self._CheckDecay(ema, actual_decay=0.181818, dim=1) + def testAverageVariablesNumUpdates_Vector(self): with self.test_session(): # With num_updates 1, the decay applied is 0.1818 ema = tf.train.ExponentialMovingAverage(0.25, num_updates=1) self._CheckDecay(ema, actual_decay=0.181818, dim=5) + def testAverageVariablesNumUpdates_Vector_Debias(self): + with self.test_session(): + # With num_updates 1, the decay applied is 0.1818 + ema = tf.train.ExponentialMovingAverage( + 0.25, num_updates=1, zero_debias=True) + self._CheckDecay(ema, actual_decay=0.181818, dim=5) + def testAverageVariablesWithControlDeps(self): with self.test_session() as sess: v0 = tf.Variable(0, name=""v0"") @@ -195,14 +224,15 @@ class ExponentialMovingAverageTest(tf.test.TestCase): self.assertEqual(1, sess.run(v0)) self.assertEqual([17.5], sess.run(v1_avg)) - def testAverageVariablesNames(self): + def averageVariablesNamesHelper(self, zero_debias): with self.test_session(): v0 = tf.Variable(10.0, name=""v0"") v1 = tf.Variable(30.0, name=""v1"") # Add a non-trainable variable. v2 = tf.Variable(20.0, name=""v2"", trainable=False) tensor2 = v0 + v1 - ema = tf.train.ExponentialMovingAverage(0.25, name=""foo"") + ema = tf.train.ExponentialMovingAverage( + 0.25, zero_debias=zero_debias, name=""foo"") self.assertEqual(""v0/foo"", ema.average_name(v0)) self.assertEqual(""v1/foo"", ema.average_name(v1)) self.assertEqual(""add/foo"", ema.average_name(tensor2)) @@ -212,21 +242,30 @@ class ExponentialMovingAverageTest(tf.test.TestCase): # {v0/foo : v0, # v1/foo : v1, # add/foo : add/foo, - # add/foo/biased: add/foo/biased, - # add/foo/local_step: add/foo/local_step, # v2 : v2} + expected_names = [ema.average_name(v0), + ema.average_name(v1), + ema.average_name(tensor2), + v2.op.name] + if zero_debias: + # vars_to_restore should also contain the following: + # {add/foo/biased: add/foo/biased, + # add/foo/local_step: add/foo/local_step} + expected_names += [ema.average_name(tensor2) + ""/biased"", + ema.average_name(tensor2) + ""/local_step""] self.assertEqual(sorted(vars_to_restore.keys()), - sorted([ema.average_name(v0), - ema.average_name(v1), - ema.average_name(tensor2), - ema.average_name(tensor2) + ""/biased"", - ema.average_name(tensor2) + ""/local_step"", - v2.op.name])) + sorted(expected_names)) self.assertEqual(ema.average_name(v0), ema.average(v0).op.name) self.assertEqual(ema.average_name(v1), ema.average(v1).op.name) self.assertEqual(ema.average_name(tensor2), ema.average(tensor2).op.name) - def testAverageVariablesNamesRespectScope(self): + def testAverageVariablesNames(self): + self.averageVariablesNamesHelper(zero_debias=True) + + def testAverageVariablesNamesNoDebias(self): + self.averageVariablesNamesHelper(zero_debias=False) + + def averageVariablesNamesRespectScopeHelper(self, zero_debias): # See discussion on #2740. with self.test_session(): with tf.variable_scope(""scope1""): @@ -236,7 +275,8 @@ class ExponentialMovingAverageTest(tf.test.TestCase): v2 = tf.Variable(20.0, name=""v2"", trainable=False) tensor2 = v0 + v1 with tf.variable_scope(""scope2""): - ema = tf.train.ExponentialMovingAverage(0.25, name=""foo"") + ema = tf.train.ExponentialMovingAverage( + 0.25, zero_debias=zero_debias, name=""foo"") self.assertEqual(""scope2/scope1/v0/foo"", ema.average_name(v0)) self.assertEqual(""scope2/scope1/v1/foo"", ema.average_name(v1)) self.assertEqual(""scope2/scope1/add/foo"", ema.average_name(tensor2)) @@ -246,22 +286,32 @@ class ExponentialMovingAverageTest(tf.test.TestCase): # {scope2/scope1/v0/foo : v0, # scope2/scope1/v1/foo : v1, # scope2/scope1/add/foo : add/foo, - # scope2/scope2/scope1/add/foo/biased: add/foo/biased, - # scope2/scope2/scope1/add/foo/local_step: add/foo/local_step, # scope1/v2 : v2} - sc = ""scope2/"" + expected_names = [ema.average_name(v0), + ema.average_name(v1), + ema.average_name(tensor2), + v2.op.name] + if zero_debias: + # vars_to_restore should also contain the following: + # {scope2/scope2/scope1/add/foo/biased: add/foo/biased, + # scope2/scope2/scope1/add/foo/local_step: add/foo/local_step} + sc = ""scope2/"" + expected_names += [sc + ema.average_name(tensor2) + ""/biased"", + sc + ema.average_name(tensor2) + ""/local_step""] + self.assertEqual(sorted(vars_to_restore.keys()), - sorted([ema.average_name(v0), - ema.average_name(v1), - ema.average_name(tensor2), - sc + ema.average_name(tensor2) + ""/biased"", - sc + ema.average_name(tensor2) + ""/local_step"", - v2.op.name])) + sorted(expected_names)) self.assertEqual(ema.average_name(v0), ema.average(v0).op.name) self.assertEqual(ema.average_name(v1), ema.average(v1).op.name) self.assertEqual(ema.average_name(tensor2), ema.average(tensor2).op.name) + def testAverageVariablesNamesRespectScope(self): + self.averageVariablesNamesRespectScopeHelper(zero_debias=True) + + def testAverageVariablesNamesRespectScopeNoDebias(self): + self.averageVariablesNamesRespectScopeHelper(zero_debias=False) + def testSubsetAverageVariablesNames(self): with self.test_session(): v0 = tf.Variable(10.0, name=""v0"") ",0,train 5c145b837609062d5ec5f0d2ddbd90c5fadee4ff,tensorflow/tensorflow,Udated usage example following the requested changes,image_ops_impl.py,"@@ -1948,9 +1948,8 @@ def random_hue(image, max_delta, seed=None): Usage Example: ```python - >> import tensorflow as tf - >> x = tf.random.normal(shape=(256, 256, 3)) - >> y = tf.image.random_hue(x, max_delta=0.1) + >> x = tf.constant([[[2.0, 3.0, 2.0]]]) + >> y = tf.image.random_hue(x, max_delta=0.1, seed=1) ``` Args: @@ -2942,8 +2941,7 @@ def rgb_to_yiq(images): Usage Example: ```python - >> import tensorflow as tf - >> x = tf.random.normal(shape=(256, 256, 3)) + >> x = tf.constant([[[1.0, 2.0, 3.0]]]) >> y = tf.image.rgb_to_yiq(x) ``` ",0,train 7a3b953e342dcf35869bece309f5ba3e9be81fd8,tensorflow/tensorflow,supporting quantized pooling op,quantized_pooling_ops.cc,"@@ -137,4 +137,14 @@ REGISTER_KERNEL_BUILDER( Name(""QuantizedMaxPool"").Device(DEVICE_CPU).TypeConstraint(""T""), QuantizedMaxPoolingOp); +#ifdef INTEL_MKL +REGISTER_KERNEL_BUILDER( + Name(""QuantizedAvgPool"").Device(DEVICE_CPU).TypeConstraint(""T""), + QuantizedAvgPoolingOp); + +REGISTER_KERNEL_BUILDER( + Name(""QuantizedMaxPool"").Device(DEVICE_CPU).TypeConstraint(""T""), + QuantizedMaxPoolingOp); +#endif + } // namespace tensorflow ",0,train afb966c4316a60823b584add5cec023d88a88887,tensorflow/tensorflow,"Decouple TFL_Model and TFL_Interpreter lifetimes PiperOrigin-RevId: 211988805",c_api.cc,"@@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include ""tensorflow/contrib/lite/experimental/c/c_api.h"" +#include + #include ""tensorflow/contrib/lite/context.h"" #include ""tensorflow/contrib/lite/experimental/c/c_api_internal.h"" #include ""tensorflow/contrib/lite/interpreter.h"" @@ -29,12 +31,14 @@ extern ""C"" { TFL_Model* TFL_NewModel(const void* model_data, size_t model_size) { auto model = tflite::FlatBufferModel::BuildFromBuffer( static_cast(model_data), model_size); - return model ? new TFL_Model{std::move(model)} : nullptr; + std::shared_ptr shared_model(model.release()); + return shared_model ? new TFL_Model{std::move(shared_model)} : nullptr; } TFL_Model* TFL_NewModelFromFile(const char* model_path) { auto model = tflite::FlatBufferModel::BuildFromFile(model_path); - return model ? new TFL_Model{std::move(model)} : nullptr; + std::shared_ptr shared_model(model.release()); + return shared_model ? new TFL_Model{std::move(shared_model)} : nullptr; } void TFL_DeleteModel(TFL_Model* model) { delete model; } @@ -72,7 +76,7 @@ TFL_Interpreter* TFL_NewInterpreter( } } - return new TFL_Interpreter{std::move(interpreter)}; + return new TFL_Interpreter{model->impl, std::move(interpreter)}; } void TFL_DeleteInterpreter(TFL_Interpreter* interpreter) { delete interpreter; } ",0,train afb966c4316a60823b584add5cec023d88a88887,tensorflow/tensorflow,"Decouple TFL_Model and TFL_Interpreter lifetimes PiperOrigin-RevId: 211988805",c_api.h,"@@ -93,7 +93,8 @@ typedef struct TFL_Interpreter TFL_Interpreter; // failure. // // * `model` must be a valid model instance. The caller retains ownership of the -// object, and can destroy it immediately after creating the interpreter. +// object, and can destroy it immediately after creating the interpreter; the +// interpreter will maintain its own reference to the underlying model data. // * `optional_options` may be null. The caller retains ownership of the object, // and can safely destroy it immediately after creating the interpreter. // ",0,train afb966c4316a60823b584add5cec023d88a88887,tensorflow/tensorflow,"Decouple TFL_Model and TFL_Interpreter lifetimes PiperOrigin-RevId: 211988805",c_api_internal.h,"@@ -24,7 +24,8 @@ limitations under the License. // not be depended on. struct TFL_Model { - std::unique_ptr impl; + // Sharing is safe as FlatBufferModel is const. + std::shared_ptr impl; }; struct TFL_InterpreterOptions { @@ -35,6 +36,9 @@ struct TFL_InterpreterOptions { }; struct TFL_Interpreter { + // Taking a reference to the (const) model data avoids lifetime-related issues + // and complexity with the TFL_Model's existence. + std::shared_ptr model; std::unique_ptr impl; }; ",0,train b1b7d5930ecdc9412e7a3035bdd2be49e9cfc230,tensorflow/tensorflow,"Add a tag constant, gpu, to present graph with GPU support. PiperOrigin-RevId: 161242660",reader_test.py,"@@ -81,16 +81,23 @@ class ReaderTest(test.TestCase): # Graph that updates the single variable. SavedModel is invoked: # - to add the model (weights are not updated). - # - multiple custom tags. + # - multiple predefined tags. with self.test_session(graph=ops.Graph()) as sess: self._init_and_validate_variable(sess, ""v"", 44) + builder.add_meta_graph([tag_constants.SERVING, tag_constants.GPU]) + + # Graph that updates the single variable. SavedModel is invoked: + # - to add the model (weights are not updated). + # - multiple custom tags. + with self.test_session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, ""v"", 45) builder.add_meta_graph([""foo"", ""bar""]) # Save the SavedModel to disk. builder.save() actual_tags = reader.get_saved_model_tag_sets(saved_model_dir) - expected_tags = [[""train""], [""serve""], [""foo"", ""bar""]] + expected_tags = [[""train""], [""serve""], [""serve"", ""gpu""], [""foo"", ""bar""]] self.assertEqual(expected_tags, actual_tags) ",0,train b1b7d5930ecdc9412e7a3035bdd2be49e9cfc230,tensorflow/tensorflow,"Add a tag constant, gpu, to present graph with GPU support. PiperOrigin-RevId: 161242660",saved_model_test.py,"@@ -207,6 +207,13 @@ class SavedModelTest(test.TestCase): self._init_and_validate_variable(sess, ""v"", 43) builder.add_meta_graph([tag_constants.SERVING]) + # Graph that updates the single variable. SavedModel invoked to: + # - simply add the model (weights are not updated). + # - multiple tags (from predefined constants). + with self.test_session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, ""v"", 45) + builder.add_meta_graph([tag_constants.SERVING, tag_constants.GPU]) + # Graph that updates the single variable. SavedModel is invoked: # - to add the model (weights are not updated). # - multiple custom tags. @@ -230,6 +237,13 @@ class SavedModelTest(test.TestCase): self.assertEqual( 42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # Restore the graph with multiple predefined tags whose variables were not + # saved. + with self.test_session(graph=ops.Graph()) as sess: + loader.load(sess, [tag_constants.SERVING, tag_constants.GPU], export_dir) + self.assertEqual( + 42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # Restore the graph with multiple tags. Provide duplicate tags to test set # semantics. with self.test_session(graph=ops.Graph()) as sess: ",0,train b1b7d5930ecdc9412e7a3035bdd2be49e9cfc230,tensorflow/tensorflow,"Add a tag constant, gpu, to present graph with GPU support. PiperOrigin-RevId: 161242660",tag_constants.py,"@@ -28,9 +28,12 @@ SERVING = ""serve"" # Tag for the `training` graph. TRAINING = ""train"" +# Tag for the `gpu` graph. +GPU = ""gpu"" _allowed_symbols = [ ""SERVING"", - ""TRAINING"" + ""TRAINING"", + ""GPU"" ] remove_undocumented(__name__, _allowed_symbols) ",0,train 4ecd2a70dd750b20a61033fe08301745685bf288,tensorflow/tensorflow,"Added unit test for max_to_keep being None. Change: 115516426",saver_test.py,"@@ -37,6 +37,14 @@ from tensorflow.python.framework import function from tensorflow.python.platform import gfile +def _TestDir(test_name): + test_dir = os.path.join(tf.test.get_temp_dir(), test_name) + if os.path.exists(test_dir): + shutil.rmtree(test_dir) + gfile.MakeDirs(test_dir) + return test_dir + + class SaverTest(tf.test.TestCase): def testBasics(self): @@ -349,12 +357,7 @@ class SaveRestoreShardedTest(tf.test.TestCase): class MaxToKeepTest(tf.test.TestCase): def testNonSharded(self): - save_dir = os.path.join(self.get_temp_dir(), ""max_to_keep_non_sharded"") - try: - gfile.DeleteRecursively(save_dir) - except OSError: - pass # Ignore - gfile.MakeDirs(save_dir) + save_dir = _TestDir(""max_to_keep_non_sharded"") with self.test_session() as sess: v = tf.Variable(10.0, name=""v"") @@ -456,12 +459,7 @@ class MaxToKeepTest(tf.test.TestCase): self.assertTrue(gfile.Exists(save._MetaGraphFilename(s1))) def testSharded(self): - save_dir = os.path.join(self.get_temp_dir(), ""max_to_keep_sharded"") - try: - gfile.DeleteRecursively(save_dir) - except OSError: - pass # Ignore - gfile.MakeDirs(save_dir) + save_dir = _TestDir(""max_to_keep_sharded"") with tf.Session( target="""", @@ -495,17 +493,39 @@ class MaxToKeepTest(tf.test.TestCase): self.assertEqual(2, len(gfile.Glob(s3))) self.assertTrue(gfile.Exists(save._MetaGraphFilename(s3))) + def testNoMaxToKeep(self): + save_dir = _TestDir(""no_max_to_keep"") + save_dir2 = _TestDir(""max_to_keep_0"") + + with self.test_session() as sess: + v = tf.Variable(10.0, name=""v"") + tf.initialize_all_variables().run() + + # Test max_to_keep being None. + save = tf.train.Saver({""v"": v}, max_to_keep=None) + self.assertEqual([], save.last_checkpoints) + s1 = save.save(sess, os.path.join(save_dir, ""s1"")) + self.assertEqual([], save.last_checkpoints) + self.assertTrue(gfile.Exists(s1)) + s2 = save.save(sess, os.path.join(save_dir, ""s2"")) + self.assertEqual([], save.last_checkpoints) + self.assertTrue(gfile.Exists(s2)) + + # Test max_to_keep being 0. + save2 = tf.train.Saver({""v"": v}, max_to_keep=0) + self.assertEqual([], save2.last_checkpoints) + s1 = save2.save(sess, os.path.join(save_dir2, ""s1"")) + self.assertEqual([], save2.last_checkpoints) + self.assertTrue(gfile.Exists(s1)) + s2 = save2.save(sess, os.path.join(save_dir2, ""s2"")) + self.assertEqual([], save2.last_checkpoints) + self.assertTrue(gfile.Exists(s2)) + class KeepCheckpointEveryNHoursTest(tf.test.TestCase): def testNonSharded(self): - save_dir = os.path.join(self.get_temp_dir(), - ""keep_checkpoint_every_n_hours"") - try: - gfile.DeleteRecursively(save_dir) - except OSError: - pass # Ignore - gfile.MakeDirs(save_dir) + save_dir = _TestDir(""keep_checkpoint_every_n_hours"") with self.test_session() as sess: v = tf.Variable([10.0], name=""v"") @@ -685,15 +705,8 @@ class LatestCheckpointWithRelativePaths(tf.test.TestCase): class CheckpointStateTest(tf.test.TestCase): - def _TestDir(self, test_name): - test_dir = os.path.join(self.get_temp_dir(), test_name) - if os.path.exists(test_dir): - shutil.rmtree(test_dir) - gfile.MakeDirs(test_dir) - return test_dir - def testAbsPath(self): - save_dir = self._TestDir(""abs_paths"") + save_dir = _TestDir(""abs_paths"") abs_path = os.path.join(save_dir, ""model-0"") ckpt = tf.train.generate_checkpoint_state_proto(save_dir, abs_path) self.assertEqual(ckpt.model_checkpoint_path, abs_path) @@ -712,7 +725,7 @@ class CheckpointStateTest(tf.test.TestCase): self.assertEqual(ckpt.all_model_checkpoint_paths[-1], new_rel_path) def testAllModelCheckpointPaths(self): - save_dir = self._TestDir(""all_models_test"") + save_dir = _TestDir(""all_models_test"") abs_path = os.path.join(save_dir, ""model-0"") for paths in [None, [], [""model-2""]]: ckpt = tf.train.generate_checkpoint_state_proto( @@ -726,7 +739,7 @@ class CheckpointStateTest(tf.test.TestCase): self.assertEqual(ckpt.all_model_checkpoint_paths[-1], abs_path) def testUpdateCheckpointState(self): - save_dir = self._TestDir(""update_checkpoint_state"") + save_dir = _TestDir(""update_checkpoint_state"") os.chdir(save_dir) # Make a temporary train directory. train_dir = ""train"" @@ -746,15 +759,8 @@ class CheckpointStateTest(tf.test.TestCase): class MetaGraphTest(tf.test.TestCase): - def _TestDir(self, test_name): - test_dir = os.path.join(self.get_temp_dir(), test_name) - if os.path.exists(test_dir): - shutil.rmtree(test_dir) - gfile.MakeDirs(test_dir) - return test_dir - def testAddCollectionDef(self): - test_dir = self._TestDir(""good_collection"") + test_dir = _TestDir(""good_collection"") filename = os.path.join(test_dir, ""metafile"") with self.test_session(): # Creates a graph. @@ -819,7 +825,7 @@ class MetaGraphTest(tf.test.TestCase): self.assertEqual(len(meta_graph_def.collection_def), 0) def _testMultiSaverCollectionSave(self): - test_dir = self._TestDir(""saver_collection"") + test_dir = _TestDir(""saver_collection"") filename = os.path.join(test_dir, ""metafile"") saver0_ckpt = os.path.join(test_dir, ""saver0.ckpt"") saver1_ckpt = os.path.join(test_dir, ""saver1.ckpt"") @@ -894,7 +900,7 @@ class MetaGraphTest(tf.test.TestCase): self._testMultiSaverCollectionRestore() def testBinaryAndTextFormat(self): - test_dir = self._TestDir(""binary_and_text"") + test_dir = _TestDir(""binary_and_text"") filename = os.path.join(test_dir, ""metafile"") with self.test_session(graph=tf.Graph()): # Creates a graph. @@ -924,7 +930,7 @@ class MetaGraphTest(tf.test.TestCase): tf.train.import_meta_graph(filename) def testSliceVariable(self): - test_dir = self._TestDir(""slice_saver"") + test_dir = _TestDir(""slice_saver"") filename = os.path.join(test_dir, ""metafile"") with self.test_session(): v1 = tf.Variable([20.0], name=""v1"") @@ -946,7 +952,7 @@ class MetaGraphTest(tf.test.TestCase): self.assertProtoEquals(meta_graph_def, new_meta_graph_def) def _testGraphExtensionSave(self): - test_dir = self._TestDir(""graph_extension"") + test_dir = _TestDir(""graph_extension"") filename = os.path.join(test_dir, ""metafile"") saver0_ckpt = os.path.join(test_dir, ""saver0.ckpt"") with self.test_session(graph=tf.Graph()) as sess: ",0,train 7f06d633e58ba37cbf654c1371135100260f20d8,tensorflow/tensorflow,"effective_sample_size kwarg change (same default behavior). * rename max_lags --> filter_beyond_lag * rename max_lags_threshold --> filter_threshold * Users can use both filters, and they combine in an ""OR"" manner * None ==> turn off a filter. PiperOrigin-RevId: 185666926",mcmc_diagnostics_test.py,"@@ -41,12 +41,14 @@ class _EffectiveSampleSizeTest(object): sess, atol=1e-2, rtol=1e-2, - max_lags_threshold=None, - max_lags=None): + filter_threshold=None, + filter_beyond_lag=None): x = array_ops.placeholder_with_default( input=x_, shape=x_.shape if self.use_static_shape else None) ess = mcmc_diagnostics.effective_sample_size( - x, max_lags_threshold=max_lags_threshold, max_lags=max_lags) + x, + filter_threshold=filter_threshold, + filter_beyond_lag=filter_beyond_lag) if self.use_static_shape: self.assertAllEqual(x.shape[1:], ess.shape) @@ -56,18 +58,19 @@ class _EffectiveSampleSizeTest(object): np.ones_like(ess_) * expected_ess, ess_, atol=atol, rtol=rtol) def testIidRank1NormalHasFullEssMaxLags10(self): - # With a length 5000 iid normal sequence, and max_lags = 10, we should - # have a good estimate of ESS, and it should be close to the full sequence - # length of 5000. - # The choice of max_lags = 10 is a short cutoff, reasonable only since we - # know the correlation length should be zero right away. + # With a length 5000 iid normal sequence, and filter_beyond_lag = 10, we + # should have a good estimate of ESS, and it should be close to the full + # sequence length of 5000. + # The choice of filter_beyond_lag = 10 is a short cutoff, reasonable only + # since we know the correlation length should be zero right away. with self.test_session() as sess: with spectral_ops_test_util.fft_kernel_label_map(): self._check_versus_expected_effective_sample_size( x_=rng.randn(5000).astype(np.float32), expected_ess=5000, sess=sess, - max_lags=10, + filter_beyond_lag=10, + filter_threshold=None, rtol=0.3) def testIidRank2NormalHasFullEssMaxLags10(self): @@ -78,23 +81,25 @@ class _EffectiveSampleSizeTest(object): x_=rng.randn(5000, 2).astype(np.float32), expected_ess=5000, sess=sess, - max_lags=10, + filter_beyond_lag=10, + filter_threshold=None, rtol=0.3) def testIidRank1NormalHasFullEssMaxLagThresholdZero(self): - # With a length 5000 iid normal sequence, and max_lags_threshold = 0, + # With a length 5000 iid normal sequence, and filter_threshold = 0, # we should have a super-duper estimate of ESS, and it should be very close # to the full sequence length of 5000. - # The choice of max_lags_cutoff = 0 means we cutoff as soon as the auto-corr - # is below zero. This should happen very quickly, due to the fact that the - # theoretical auto-corr is [1, 0, 0,...] + # The choice of filter_beyond_lag = 0 means we cutoff as soon as the + # auto-corris below zero. This should happen very quickly, due to the fact + # that the theoretical auto-corr is [1, 0, 0,...] with self.test_session() as sess: with spectral_ops_test_util.fft_kernel_label_map(): self._check_versus_expected_effective_sample_size( x_=rng.randn(5000).astype(np.float32), expected_ess=5000, sess=sess, - max_lags_threshold=0., + filter_beyond_lag=None, + filter_threshold=0., rtol=0.1) def testIidRank2NormalHasFullEssMaxLagThresholdZero(self): @@ -105,7 +110,8 @@ class _EffectiveSampleSizeTest(object): x_=rng.randn(5000, 2).astype(np.float32), expected_ess=5000, sess=sess, - max_lags_threshold=0., + filter_beyond_lag=None, + filter_threshold=0., rtol=0.1) def testLength10CorrelationHasEssOneTenthTotalLengthUsingMaxLags50(self): @@ -121,7 +127,8 @@ class _EffectiveSampleSizeTest(object): x_=x_, expected_ess=50000 // 10, sess=sess, - max_lags=50, + filter_beyond_lag=50, + filter_threshold=None, rtol=0.2) def testLength10CorrelationHasEssOneTenthTotalLengthUsingMaxLagsThresholdZero( @@ -138,7 +145,8 @@ class _EffectiveSampleSizeTest(object): x_=x_, expected_ess=50000 // 10, sess=sess, - max_lags_threshold=0., + filter_beyond_lag=None, + filter_threshold=0., rtol=0.1) def testListArgs(self): @@ -148,16 +156,16 @@ class _EffectiveSampleSizeTest(object): x_ = (iid_x_ * np.ones((5000, 10)).astype(np.float32)).reshape((50000,)) y_ = rng.randn(50000).astype(np.float32) states = [x_, x_, y_, y_] - max_lags_threshold = [0., None, 0., None] - max_lags = [None, 5, None, 5] + filter_threshold = [0., None, 0., None] + filter_beyond_lag = [None, 5, None, 5] # See other tests for reasoning on tolerance. with self.test_session() as sess: with spectral_ops_test_util.fft_kernel_label_map(): ess = mcmc_diagnostics.effective_sample_size( states, - max_lags_threshold=max_lags_threshold, - max_lags=max_lags) + filter_threshold=filter_threshold, + filter_beyond_lag=filter_beyond_lag) ess_ = sess.run(ess) self.assertAllEqual(4, len(ess_)) @@ -166,6 +174,59 @@ class _EffectiveSampleSizeTest(object): self.assertAllClose(50000, ess_[2], rtol=0.1) self.assertAllClose(50000, ess_[3], rtol=0.1) + def testMaxLagsThresholdLessThanNeg1SameAsNone(self): + # Setting both means we filter out items R_k from the auto-correlation + # sequence if k > filter_beyond_lag OR k >= j where R_j < filter_threshold. + + # x_ has correlation length 10. + iid_x_ = rng.randn(500, 1).astype(np.float32) + x_ = (iid_x_ * np.ones((500, 10)).astype(np.float32)).reshape((5000,)) + with self.test_session() as sess: + with spectral_ops_test_util.fft_kernel_label_map(): + x = array_ops.placeholder_with_default( + input=x_, shape=x_.shape if self.use_static_shape else None) + + ess_none_none = mcmc_diagnostics.effective_sample_size( + x, filter_threshold=None, filter_beyond_lag=None) + ess_none_200 = mcmc_diagnostics.effective_sample_size( + x, filter_threshold=None, filter_beyond_lag=200) + ess_neg2_200 = mcmc_diagnostics.effective_sample_size( + x, filter_threshold=-2., filter_beyond_lag=200) + ess_neg2_none = mcmc_diagnostics.effective_sample_size( + x, filter_threshold=-2., filter_beyond_lag=None) + ess_none_none_, ess_none_200_, ess_neg2_200_, ess_neg2_none_ = sess.run( + [ess_none_none, ess_none_200, ess_neg2_200, ess_neg2_none]) + + # filter_threshold=-2 <==> filter_threshold=None. + self.assertAllClose(ess_none_none_, ess_neg2_none_) + self.assertAllClose(ess_none_200_, ess_neg2_200_) + + def testMaxLagsArgsAddInAnOrManner(self): + # Setting both means we filter out items R_k from the auto-correlation + # sequence if k > filter_beyond_lag OR k >= j where R_j < filter_threshold. + + # x_ has correlation length 10. + iid_x_ = rng.randn(500, 1).astype(np.float32) + x_ = (iid_x_ * np.ones((500, 10)).astype(np.float32)).reshape((5000,)) + with self.test_session() as sess: + with spectral_ops_test_util.fft_kernel_label_map(): + x = array_ops.placeholder_with_default( + input=x_, shape=x_.shape if self.use_static_shape else None) + + ess_1_9 = mcmc_diagnostics.effective_sample_size( + x, filter_threshold=1., filter_beyond_lag=9) + ess_1_none = mcmc_diagnostics.effective_sample_size( + x, filter_threshold=1., filter_beyond_lag=None) + ess_none_9 = mcmc_diagnostics.effective_sample_size( + x, filter_threshold=1., filter_beyond_lag=9) + ess_1_9_, ess_1_none_, ess_none_9_ = sess.run( + [ess_1_9, ess_1_none, ess_none_9]) + + # Since R_k = 1 for k < 10, and R_k < 1 for k >= 10, + # filter_threshold = 1 <==> filter_beyond_lag = 9. + self.assertAllClose(ess_1_9_, ess_1_none_) + self.assertAllClose(ess_1_9_, ess_none_9_) + class EffectiveSampleSizeStaticTest(test.TestCase, _EffectiveSampleSizeTest): ",0,test 7f06d633e58ba37cbf654c1371135100260f20d8,tensorflow/tensorflow,"effective_sample_size kwarg change (same default behavior). * rename max_lags --> filter_beyond_lag * rename max_lags_threshold --> filter_threshold * Users can use both filters, and they combine in an ""OR"" manner * None ==> turn off a filter. PiperOrigin-RevId: 185666926",mcmc_diagnostics_impl.py,"@@ -36,13 +36,13 @@ __all__ = [ def effective_sample_size(states, - max_lags_threshold=None, - max_lags=None, + filter_threshold=0., + filter_beyond_lag=None, name=None): """"""Estimate a lower bound on effective sample size for each independent chain. - Roughly speaking, the ""effective sample size"" (ESS) is the size of an iid - sample with the same variance as `state`. + Roughly speaking, ""effective sample size"" (ESS) is the size of an iid sample + with the same variance as `state`. More precisely, given a stationary sequence of possibly correlated random variables `X_1, X_2,...,X_N`, each identically distributed ESS is the number @@ -87,21 +87,28 @@ def effective_sample_size(states, This function estimates the above by first estimating the auto-correlation. Since `R_k` must be estimated using only `N - k` samples, it becomes progressively noisier for larger `k`. For this reason, the summation over - `R_k` should be truncated at some number `max_lags < N`. Since many MCMC - methods generate chains where `R_k > 0`, a reasonable critera is to truncate - at the first index where the estimated auto-correlation becomes negative. + `R_k` should be truncated at some number `filter_beyond_lag < N`. Since many + MCMC methods generate chains where `R_k > 0`, a reasonable critera is to + truncate at the first index where the estimated auto-correlation becomes + negative. + + The arguments `filter_beyond_lag`, `filter_threshold` are filters intended to + remove noisy tail terms from `R_k`. They combine in an ""OR"" manner meaning + terms are removed if they were to be filtered under the `filter_beyond_lag` OR + `filter_threshold` criteria. Args: states: `Tensor` or list of `Tensor` objects. Dimension zero should index identically distributed states. - max_lags_threshold: `Tensor` or list of `Tensor` objects. + filter_threshold: `Tensor` or list of `Tensor` objects. Must broadcast with `state`. The auto-correlation sequence is truncated - after the first appearance of a term less than `max_lags_threshold`. If - both `max_lags` and `max_lags_threshold` are `None`, - `max_lags_threshold` defaults to `0`. - max_lags: `Tensor` or list of `Tensor` objects. Must be `int`-like and - scalar valued. The auto-correlation sequence is truncated to this length. - May be provided only if `max_lags_threshold` is not. + after the first appearance of a term less than `filter_threshold`. + Setting to `None` means we use no threshold filter. Since `|R_k| <= 1`, + setting to any number less than `-1` has the same effect. + filter_beyond_lag: `Tensor` or list of `Tensor` objects. Must be + `int`-like and scalar valued. The auto-correlation sequence is truncated + to this length. Setting to `None` means we do not filter based on number + of lags. name: `String` name to prepend to created ops. Returns: @@ -109,8 +116,8 @@ def effective_sample_size(states, each component of `states`. Shape will be `states.shape[1:]`. Raises: - ValueError: If `states` and `max_lags_threshold` or `states` and `max_lags` - are both lists with different lengths. + ValueError: If `states` and `filter_threshold` or `states` and + `filter_beyond_lag` are both lists with different lengths. """""" states_was_list = _is_list_like(states) @@ -118,15 +125,16 @@ def effective_sample_size(states, if not states_was_list: states = [states] - max_lags = _broadcast_maybelist_arg(states, max_lags, ""max_lags"") - max_lags_threshold = _broadcast_maybelist_arg(states, max_lags_threshold, - ""max_lags_threshold"") + filter_beyond_lag = _broadcast_maybelist_arg(states, filter_beyond_lag, + ""filter_beyond_lag"") + filter_threshold = _broadcast_maybelist_arg(states, filter_threshold, + ""filter_threshold"") # Process items, one at a time. with ops.name_scope(name, ""effective_sample_size""): ess_list = [ _effective_sample_size_single_state(s, ml, mlt) - for (s, ml, mlt) in zip(states, max_lags, max_lags_threshold) + for (s, ml, mlt) in zip(states, filter_beyond_lag, filter_threshold) ] if states_was_list: @@ -134,38 +142,31 @@ def effective_sample_size(states, return ess_list[0] -def _effective_sample_size_single_state(states, max_lags, max_lags_threshold): +def _effective_sample_size_single_state(states, filter_beyond_lag, + filter_threshold): """"""ESS computation for one single Tensor argument."""""" - if max_lags is not None and max_lags_threshold is not None: - raise ValueError( - ""Expected at most one of max_lags, max_lags_threshold to be provided. "" - ""Found: {}, {}"".format(max_lags, max_lags_threshold)) - - if max_lags_threshold is None: - max_lags_threshold = 0. with ops.name_scope( ""effective_sample_size_single_state"", - values=[states, max_lags, max_lags_threshold]): + values=[states, filter_beyond_lag, filter_threshold]): states = ops.convert_to_tensor(states, name=""states"") dt = states.dtype - if max_lags is not None: - auto_corr = sample_stats.auto_correlation( - states, axis=0, max_lags=max_lags) - elif max_lags_threshold is not None: - max_lags_threshold = ops.convert_to_tensor( - max_lags_threshold, dtype=dt, name=""max_lags_threshold"") - auto_corr = sample_stats.auto_correlation(states, axis=0) + # filter_beyond_lag == None ==> auto_corr is the full sequence. + auto_corr = sample_stats.auto_correlation( + states, axis=0, max_lags=filter_beyond_lag) + if filter_threshold is not None: + filter_threshold = ops.convert_to_tensor( + filter_threshold, dtype=dt, name=""filter_threshold"") # Get a binary mask to zero out values of auto_corr below the threshold. # mask[i, ...] = 1 if auto_corr[j, ...] > threshold for all j <= i, # mask[i, ...] = 0, otherwise. # So, along dimension zero, the mask will look like [1, 1, ..., 0, 0,...] # Building step by step, - # Assume auto_corr = [1, 0.5, 0.0, 0.3], and max_lags_threshold = 0.2. + # Assume auto_corr = [1, 0.5, 0.0, 0.3], and filter_threshold = 0.2. # Step 1: mask = [False, False, True, False] - mask = auto_corr < max_lags_threshold + mask = auto_corr < filter_threshold # Step 2: mask = [0, 0, 1, 1] mask = math_ops.cast(mask, dtype=dt) # Step 3: mask = [0, 0, 1, 2] @@ -173,14 +174,12 @@ def _effective_sample_size_single_state(states, max_lags, max_lags_threshold): # Step 4: mask = [1, 1, 0, 0] mask = math_ops.maximum(1. - mask, 0.) auto_corr *= mask - else: - auto_corr = sample_stats.auto_correlation(states, axis=0) # With R[k] := auto_corr[k, ...], # ESS = N / {1 + 2 * Sum_{k=1}^N (N - k) / N * R[k]} # = N / {-1 + 2 * Sum_{k=0}^N (N - k) / N * R[k]} (since R[0] = 1) # approx N / {-1 + 2 * Sum_{k=0}^M (N - k) / N * R[k]} - #, where M is the max_lags truncation point chosen above. + # where M is the filter_beyond_lag truncation point chosen above. # Get the factor (N - k) / N, and give it shape [M, 1,...,1], having total # ndims the same as auto_corr ",0,test a1d6d4524a47d11aced9156865946592f425701a,tensorflow/tensorflow,"[tf:tfrt] Temporary disable clusters with i1 inputs PiperOrigin-RevId: 390134209 Change-Id: I4454b2355add463262958c64881e9d5818560007",tf_cpurt_clustering.cc,"@@ -721,6 +721,20 @@ mlir::LogicalResult VerifyCluster(const Cluster& cluster) { (void)inserted; } + // TODO(b/196192286): This is a temporary workaround to disable excessive + // recompilation for dynamic shapes in one particular model. Remove this once + // specialization will be done based on shape constraints. + for (Operation* op : ops) { + for (Value value : op->getOperands()) { + Operation* defining_op = value.getDefiningOp(); + if (!defining_op) continue; + + if (!ops.contains(defining_op) && + mlir::getElementTypeOrSelf(value.getType()).isInteger(1)) + return failure(); + } + } + for (auto& pair : cluster.constraints) { Value value = pair.getFirst(); ValueConstraint constraint = pair.getSecond(); ",0,train a0ed0cbc9251e59d7bbd6d0ea6f20f6c28b9625d,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2021-07-08 PiperOrigin-RevId: 383585806 Change-Id: Id24ee27b5f7b68a31501009c0ddb7d436e6c485d",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 7, 7) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 7, 8) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train e45c9f22722df4d967bf81467f1691cc6b8e864b,tensorflow/tensorflow,"Fix crash when unfused layer normalization used with mixed precision. For numeric safety, I do the unfused layer normalization in fp32, as parts of the fused version are internally done in fp32. I'm not sure if doing the layer normalization in fp32 over fp16 makes a difference in practice. PiperOrigin-RevId: 293717765 Change-Id: Ie91ed55d73b02b93530a72f917243c39a37e7430",normalization.py,"@@ -1078,6 +1078,12 @@ class LayerNormalization(Layer): return v if not self._fused: + input_dtype = inputs.dtype + if input_dtype in ('float16', 'bfloat16') and self.dtype == 'float32': + # If mixed precision is used, cast inputs to float32 so that this is at + # least as numerically stable as the fused version. + inputs = math_ops.cast(inputs, 'float32') + # Calculate the moments on the last axis (layer activations). mean, variance = nn.moments(inputs, self.axis, keep_dims=True) @@ -1091,6 +1097,7 @@ class LayerNormalization(Layer): offset=offset, scale=scale, variance_epsilon=self.epsilon) + outputs = math_ops.cast(outputs, input_dtype) else: # Collapse dims before self.axis, and dims in self.axis pre_dim, in_dim = (1, 1) ",0,train e45c9f22722df4d967bf81467f1691cc6b8e864b,tensorflow/tensorflow,"Fix crash when unfused layer normalization used with mixed precision. For numeric safety, I do the unfused layer normalization in fp32, as parts of the fused version are internally done in fp32. I'm not sure if doing the layer normalization in fp32 over fp16 makes a difference in practice. PiperOrigin-RevId: 293717765 Change-Id: Ie91ed55d73b02b93530a72f917243c39a37e7430",layer_correctness_test.py,"@@ -123,6 +123,8 @@ class LayerCorrectnessTest(keras_parameterized.TestCase): ('BatchNormalization', normalization_v2.BatchNormalization, (2, 2), 1e-2, 1e-2), ('LayerNormalization', normalization.LayerNormalization, (2, 2)), + ('LayerNormalizationUnfused', + lambda: normalization.LayerNormalization(axis=1), (2, 2, 2)), ('MaxPooling2D', pooling.MaxPooling2D, (2, 2, 2, 1)), ('AveragePooling2D', pooling.AveragePooling2D, (2, 2, 2, 1)), ('GlobalMaxPooling2D', pooling.GlobalMaxPooling2D, (2, 2, 2, 1)), ",0,train 2ccfe8e764632cd05422bda12abe0f7a24abf000,tensorflow/tensorflow,"Added a new method to extract the graph properties from a cost graph without having to run the model. This will simplify the process of creating regression tests PiperOrigin-RevId: 158050327",graph_properties.cc,"@@ -218,9 +218,13 @@ Status GraphProperties::InferDynamically(Cluster* cluster) { TF_RETURN_IF_ERROR( cluster->Run(item_.graph, item_.feed, item_.fetch, &metadata)); + return InferFromCostGraph(metadata.cost_graph()); +} + +Status GraphProperties::InferFromCostGraph(const CostGraphDef& cost_graph) { std::unordered_map name_to_cost; std::unordered_map name_to_node; // Empty - for (auto& node : metadata.cost_graph().node()) { + for (auto& node : cost_graph.node()) { name_to_cost[node.name()] = &node; std::vector output_properties; ",0,train 2ccfe8e764632cd05422bda12abe0f7a24abf000,tensorflow/tensorflow,"Added a new method to extract the graph properties from a cost graph without having to run the model. This will simplify the process of creating regression tests PiperOrigin-RevId: 158050327",graph_properties.h,"@@ -36,6 +36,7 @@ class GraphProperties { Status InferStatically(); Status InferDynamically(Cluster* cluster); + Status InferFromCostGraph(const CostGraphDef& cost_graph); bool HasOutputProperties(const string& name) const; std::vector GetInputProperties( ",0,train d5c075b02191f74b0b4c37713648c59ff7b06962,tensorflow/tensorflow,"Add test for 64-bit clz and sign. PiperOrigin-RevId: 196894702",hlo_evaluator_typed_visitor.h,"@@ -1738,14 +1738,16 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { return Status::OK(); } - // Enable CLZ only for int32 and uint32. + // Enable CLZ only for int32, uint32, int64 and uint64. template < typename NativeT, typename std::enable_if< (std::is_floating_point::value || std::is_integral::value || is_complex_t::value) && !(std::is_same::value || - std::is_same::value)>::type* = nullptr> + std::is_same::value || + std::is_same::value || + std::is_same::value)>::type* = nullptr> Status HandleClz(HloInstruction* clz) { return InvalidArgument(""Unsupported type for Clz""); } @@ -1762,6 +1764,18 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { return Status::OK(); } + template ::value || + std::is_same::value>::type* = nullptr> + Status HandleClz(HloInstruction* clz) { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[clz], + ElementWiseUnaryOp(clz, [](ElementwiseT elem_operand) { + return 63 - tensorflow::Log2Floor64(elem_operand); + })); + return Status::OK(); + } + Status HandleClz(HloInstruction* clz) override { return HandleClz(clz); } ",0,test d5c075b02191f74b0b4c37713648c59ff7b06962,tensorflow/tensorflow,"Add test for 64-bit clz and sign. PiperOrigin-RevId: 196894702",array_elementwise_ops_test.cc,"@@ -2225,6 +2225,15 @@ XLA_TEST_F(ArrayElementwiseOpTest, ClzU32s) { ComputeAndCompareR1(&builder, {32, 31, 27, 15, 9, 3, 0}, {}); } +XLA_TEST_F(ArrayElementwiseOpTest, ClzS64s) { + XlaBuilder builder(TestName()); + auto a = + builder.ConstantR1({0, 1, 0x80000000, 0x7FFFFFFFF2345678ul, -1}); + builder.Clz(a); + + ComputeAndCompareR1(&builder, {64, 63, 32, 1, 0}, {}); +} + XLA_TEST_F(ArrayElementwiseOpTest, AddChainFoldLeft) { // a ------ (add) --------- (add) // / / ",0,test d5c075b02191f74b0b4c37713648c59ff7b06962,tensorflow/tensorflow,"Add test for 64-bit clz and sign. PiperOrigin-RevId: 196894702",unary_op_test.cc,"@@ -84,6 +84,11 @@ int UnaryOpTest::inf() { return 2147483647; } +template <> +int64 UnaryOpTest::inf() { + return 0x7FFFFFFFFFFFFFFFl; +} + template <> void UnaryOpTest::AbsTestHelper() { XlaBuilder builder(TestName()); @@ -176,6 +181,7 @@ XLA_TEST_F(UnaryOpTest, SignTestR0) { XLA_TEST_F(UnaryOpTest, SignTestR1) { SignTestHelper(); + SignTestHelper(); SignTestHelper(); SignTestHelper(); } ",0,test 8c44d4d7258b35da0275852a6a0c8afa28d16ea7,tensorflow/tensorflow,"Fixing bug in wait_for_session in which we were waiting for N ms instead of N secs. Change: 119119175",session_manager.py,"@@ -258,7 +258,7 @@ class SessionManager(object): if max_wait_secs is None: max_wait_secs = float(""Inf"") - timer = _CountDownTimer(max_wait_secs) + timer = _CountDownTimer(1000 * max_wait_secs) while True: sess = session.Session(target, graph=self._graph, config=config) ",0,train 8c44d4d7258b35da0275852a6a0c8afa28d16ea7,tensorflow/tensorflow,"Fixing bug in wait_for_session in which we were waiting for N ms instead of N secs. Change: 119119175",session_manager_test.py,"@@ -124,7 +124,7 @@ class SessionManagerTest(tf.test.TestCase): # Set max_wait_secs to allow us to try a few times. with self.assertRaises(errors.DeadlineExceededError): - sm.wait_for_session(master="""", max_wait_secs=3000) + sm.wait_for_session(master="""", max_wait_secs=3) if __name__ == ""__main__"": tf.test.main() ",0,train 8cb0d7f4b9fdb12b3d74bcc4cf50684a718ec46c,tensorflow/tensorflow,const removed,gradients.cc,"@@ -168,7 +168,7 @@ std::vector SymbolicGradientBuilder::GetReachableNodes() { std::vector reachable_nodes(scope_.graph()->num_node_ids(), false); std::deque queue; for (const Output& out : outputs_) { - const Node* const out_node = out.node(); + Node* const out_node = out.node(); const int out_node_id = out_node->id(); if (!reachable_nodes[out_node_id]) { queue.push_back(out_node); @@ -181,7 +181,7 @@ std::vector SymbolicGradientBuilder::GetReachableNodes() { queue.pop_front(); for (const Edge* e : n->in_edges()) { if (e->IsControlEdge()) continue; - const Node* const src_node = e->src(); + Node* const src_node = e->src(); const int src_node_id = src_node->id(); if (!reachable_nodes[src_node_id]) { queue.push_back(src_node); ",0,train 4a6aab8549606f44bc1384cfa2bbdd68764a4ebb,tensorflow/tensorflow,"A fix to RequantizationPerChannel Op - Added registration for output type: quint8",mkl_requantize_per_channel_op.cc,"@@ -20,7 +20,6 @@ limitations under the License. #include #include ""mkldnn.hpp"" -#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" #include ""tensorflow/core/framework/op.h"" #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/type_traits.h"" @@ -29,6 +28,7 @@ limitations under the License. #include ""tensorflow/core/kernels/no_op.h"" #include ""tensorflow/core/lib/core/errors.h"" #include ""tensorflow/core/util/mkl_util.h"" +#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" namespace tensorflow { @@ -141,8 +141,8 @@ class MklRequantizePerChannelOp : public OpKernel { output_min->flat()(0) = input_requested_min_float; output_max->flat()(0) = input_requested_max_float; } catch (mkldnn::error& e) { - string error_msg = ""Status: "" + std::to_string(e.status) + - "", message: "" + std::string(e.message) + "", in file "" + + string error_msg = ""Status: "" + std::to_string(e.status) + "", message: "" + + std::string(e.message) + "", in file "" + std::string(__FILE__) + "":"" + std::to_string(__LINE__); OP_REQUIRES_OK( ctx, errors::Aborted(""Operation received an exception:"", error_msg)); @@ -162,11 +162,18 @@ class MklRequantizePerChannelOp : public OpKernel { engine cpu_engine_ = engine(engine::cpu, 0); }; +// Registration for out_type: qint8 REGISTER_KERNEL_BUILDER(Name(""RequantizePerChannel"") .Device(DEVICE_CPU) .TypeConstraint(""T"") .TypeConstraint(""out_type""), MklRequantizePerChannelOp); +// Registration for out_type: quint8 +REGISTER_KERNEL_BUILDER(Name(""RequantizePerChannel"") + .Device(DEVICE_CPU) + .TypeConstraint(""T"") + .TypeConstraint(""out_type""), + MklRequantizePerChannelOp); } // namespace tensorflow #endif // INTEL_MKL ",0,train 73a45ea89bf9eb3045a07f7c1aeabd1b18113b22,tensorflow/tensorflow,"Minor cleanup - no longer returns Status on Device/OpDevice reads on tensorhandles PiperOrigin-RevId: 221151286",c_api.cc,"@@ -404,8 +404,7 @@ const char* TFE_TensorHandleDeviceName(TFE_TensorHandle* h, TF_Status* status) { ""The passed in handle is a nullptr""); return nullptr; } - tensorflow::Device* d = nullptr; - status->status = h->handle->OpDevice(&d); + tensorflow::Device* d = h->handle->op_device(); return (d == nullptr) ? ""/job:localhost/replica:0/task:0/device:CPU:0"" : d->name().c_str(); } ",0,train 73a45ea89bf9eb3045a07f7c1aeabd1b18113b22,tensorflow/tensorflow,"Minor cleanup - no longer returns Status on Device/OpDevice reads on tensorhandles PiperOrigin-RevId: 221151286",c_api_debug.cc,"@@ -57,13 +57,9 @@ TF_CAPI_EXPORT extern TFE_TensorDebugInfo* TFE_TensorHandleTensorDebugInfo( return nullptr; } - tensorflow::Device* device; - status->status = handle->handle->Device(&device); - if (!status->status.ok()) { - return nullptr; - } - #ifdef TENSORFLOW_EAGER_USE_XLA + tensorflow::Device* device = handle->handle->device(); + // If tensor resides on an XLA device, use XLA device's PaddedShapeFn. tensorflow::XlaDevice* xla_device = dynamic_cast(device); ",0,train 73a45ea89bf9eb3045a07f7c1aeabd1b18113b22,tensorflow/tensorflow,"Minor cleanup - no longer returns Status on Device/OpDevice reads on tensorhandles PiperOrigin-RevId: 221151286",execute.cc,"@@ -85,8 +85,7 @@ Status MaybeCopyInputToExpectedDevice(EagerOperation* op, int i, RunMetadata* run_metadata, TensorHandle** handle) { EagerContext* ctx = op->EagerContext(); - Device* handle_device = nullptr; - TF_RETURN_IF_ERROR((*handle)->Device(&handle_device)); + Device* handle_device = (*handle)->device(); const Device* actual_device = handle_device == nullptr ? ctx->HostCPU() : handle_device; const Device* op_device = @@ -419,8 +418,7 @@ Status EagerRemoteSendTensor(EagerContext* ctx, TensorHandle* h, request.set_op_id(ctx->NextId()); request.set_device_name(recv_device->name()); - Device* tensor_handle_device; - TF_RETURN_IF_ERROR(h->Device(&tensor_handle_device)); + Device* tensor_handle_device = h->device(); // AsProtoTensorContent doesn't work when the tensor is on the GPU, hence copy // it to the CPU before copying it out. @@ -487,8 +485,7 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, auto* remote_op = request->add_queue()->mutable_operation(); for (int i = 0; i < op->Inputs().size(); i++) { - tensorflow::Device* input_device; - TF_RETURN_IF_ERROR(op->Inputs()[i]->Device(&input_device)); + tensorflow::Device* input_device = op->Inputs()[i]->device(); if (op->Device() != input_device && // If the expected and actual devices are on the same task, don't // explicitly copy, and instead depend on the copy to happen locally @@ -624,8 +621,7 @@ Status MaybeUpdateOpDevice(EagerOperation* op) { ctx->PinSmallOpsToCPU() && IsPinnableOp(op->Name()); for (int i = 0; i < op->Inputs().size(); ++i) { - Device* input_op_device = nullptr; - TF_RETURN_IF_ERROR(op->Inputs()[i]->OpDevice(&input_op_device)); + Device* input_op_device = op->Inputs()[i]->op_device(); VLOG(2) << ""for op "" << op->Name() << "" input "" << i << "" "" << DataTypeString(op->Inputs()[i]->dtype) << "" "" << (input_op_device == nullptr ? ""cpu"" : input_op_device->name()) @@ -778,6 +774,9 @@ Status EagerExecute(EagerContext* ctx, Device* device, // In the async case, the retval is not a nullptr, and its device is // already set since all TensorHandles always have their device set during // construction. + DCHECK_EQ(device, retvals[i]->op_device()); + DCHECK_EQ(kernel->OutputDevice(i), retvals[i]->device()); + retvals[i]->SetTensor(outputs[i]); } } @@ -893,8 +892,7 @@ string GetUniqueWireID() { Status EagerCopyToDevice(TensorHandle* h, EagerContext* ctx, const char* device_name, TensorHandle** result) { - tensorflow::Device* send_device; - TF_RETURN_IF_ERROR(h->Device(&send_device)); + tensorflow::Device* send_device = h->device(); if (send_device == nullptr) { send_device = ctx->HostCPU(); ",0,train 73a45ea89bf9eb3045a07f7c1aeabd1b18113b22,tensorflow/tensorflow,"Minor cleanup - no longer returns Status on Device/OpDevice reads on tensorhandles PiperOrigin-RevId: 221151286",tensor_handle.cc,"@@ -79,16 +79,6 @@ Status TensorHandle::Tensor(const tensorflow::Tensor** t) { return Status::OK(); } -Status TensorHandle::Device(tensorflow::Device** d) { - *d = device_; - return Status::OK(); -} - -Status TensorHandle::OpDevice(tensorflow::Device** d) { - *d = op_device_; - return Status::OK(); -} - Status TensorHandle::TensorAndDevice(const tensorflow::Tensor** tensor, tensorflow::Device** device, tensorflow::Device** op_device) { ",0,train 73a45ea89bf9eb3045a07f7c1aeabd1b18113b22,tensorflow/tensorflow,"Minor cleanup - no longer returns Status on Device/OpDevice reads on tensorhandles PiperOrigin-RevId: 221151286",tensor_handle.h,"@@ -102,9 +102,9 @@ class TensorHandle : public core::RefCounted { Status Tensor(const tensorflow::Tensor** t); - Status Device(tensorflow::Device** d); + tensorflow::Device* device() const { return device_; } - Status OpDevice(tensorflow::Device** d); + tensorflow::Device* op_device() const { return op_device_; } Status TensorAndDevice(const tensorflow::Tensor** tensor, tensorflow::Device** device, @@ -171,11 +171,11 @@ class TensorHandle : public core::RefCounted { // // TODO(ashankar): Reference count TFE_Context to ensure that 'device_' of a // TFE_TensorHandle does not outlive the TFE_Context from which it came? - tensorflow::Device* device_; + tensorflow::Device* const device_; // Device in which the op producing this tensor was executed. Equals to // device_ for constant tensors. - tensorflow::Device* op_device_; + tensorflow::Device* const op_device_; // IDs required when this class is representing a remote tensor handle. const int64 remote_op_id_; ",0,train 73a45ea89bf9eb3045a07f7c1aeabd1b18113b22,tensorflow/tensorflow,"Minor cleanup - no longer returns Status on Device/OpDevice reads on tensorhandles PiperOrigin-RevId: 221151286",eager_service_impl_test.cc,"@@ -345,8 +345,7 @@ TEST_F(EagerServiceImplTest, SendTensorTest) { response.context_id(), RemoteTensorHandleInternal(2, 0), &tensor_handle)); TF_ASSERT_OK(tensor_handle->Tensor(&t)); - Device* device = nullptr; - TF_ASSERT_OK(tensor_handle->Device(&device)); + Device* device = tensor_handle->device(); EXPECT_NE(device, nullptr); EXPECT_EQ(device->name(), ""/job:localhost/replica:0/task:0/device:CPU:0""); ",0,train 73a45ea89bf9eb3045a07f7c1aeabd1b18113b22,tensorflow/tensorflow,"Minor cleanup - no longer returns Status on Device/OpDevice reads on tensorhandles PiperOrigin-RevId: 221151286",py_func.cc,"@@ -177,8 +177,7 @@ tensorflow::Status ExtractTensorFromEagerTensor(const PyObject* eager_tensor, const Device* expected_device, const Tensor** output_tensor) { auto handle = EagerTensor_Handle(eager_tensor)->handle; - Device* actual_device = nullptr; - TF_RETURN_IF_ERROR(handle->Device(&actual_device)); + Device* actual_device = handle->device(); TF_RETURN_IF_ERROR(handle->Tensor(output_tensor)); // actual_device may be nullptr, which implies local CPU. if (expected_device == actual_device) return Status::OK(); ",0,train 3a179b7ee8d2b2010e019067f8514b66b899f01d,tensorflow/tensorflow,"Move TPU error counters to compilation which fills up the cache. PiperOrigin-RevId: 424228383 Change-Id: I7cef3c66e60ca7b6d5c9d366387d149df0dbb856",tpu_compile_op_common.cc,"@@ -137,8 +137,6 @@ void TpuCompileOpKernelCommon::Compute(OpKernelContext* ctx) { proto.set_status_error_message(compile_status.error_message()); status_payload = proto.SerializeAsString(); } - metrics::UpdateTpuErrorCounter(""TpuCompileOp"", - error_name(compile_status.code())); OP_REQUIRES_OK_OR_SET_PAYLOAD(ctx, TpuCompileInterface::kTpuCompileErrorPayloadKey, status_payload, compile_status); @@ -180,6 +178,8 @@ Status TpuCompileOpKernelCommon::CompileLocallyAndFillHostCache( << session_name << "" took "" << duration << "" and "" << (compile_status.ok() ? ""succeeded"" : ""failed""); tpu_program_group->LogProgramMemorySummary(); + metrics::UpdateTpuErrorCounter(""TpuCompileOp"", + error_name(compile_status.code())); metrics::UpdateXlaCompilationTime(absl::ToInt64Microseconds(duration)); TpuCompilationMetrics::IncrementCompilationCount(session_name); ",0,test c381794b2fc3227bfee9cf085e26bafb33da8f4b,tensorflow/tensorflow,"Support different threading modes in GPU device. All modes are experimental for now. The goal is to find the best setting, and change the default to pick that. PiperOrigin-RevId: 178662212",direct_session.cc,"@@ -521,9 +521,7 @@ Status DirectSession::Run(const RunOptions& run_options, args.rendezvous = run_state.rendez; args.cancellation_manager = &step_cancellation_manager; - args.runner = [this, pool](Executor::Args::Closure c) { - SchedClosure(pool, std::move(c)); - }; + args.session_state = &session_state_; args.tensor_store = &run_state.tensor_store; args.step_container = &run_state.step_container; @@ -584,7 +582,24 @@ Status DirectSession::Run(const RunOptions& run_options, return errors::Cancelled(""Run call was cancelled""); } + Executor::Args::Runner default_runner = [this, + pool](Executor::Args::Closure c) { + SchedClosure(pool, std::move(c)); + }; for (const auto& item : executors_and_keys->items) { + // TODO(zhengxq): support partial run. + // TODO(zhengxq): support other session types. + // TODO(zhengxq): if the device picks its own threadpool, we need to assign + // less threads to the main compute pool by default. + thread::ThreadPool* device_thread_pool = + item.device->tensorflow_device_thread_pool(); + if (!device_thread_pool) { + args.runner = default_runner; + } else { + args.runner = [this, device_thread_pool](Executor::Args::Closure c) { + SchedClosure(device_thread_pool, std::move(c)); + }; + } item.executor->RunAsync(args, barrier->Get()); } @@ -1222,6 +1237,7 @@ Status DirectSession::GetOrCreateExecutors( // NewLocalExecutor takes ownership of partition_graph. item->graph = partition_graph.get(); item->executor = nullptr; + item->device = device; Executor* executor; TF_RETURN_IF_ERROR( NewLocalExecutor(params, partition_graph.release(), &executor)); ",0,train c381794b2fc3227bfee9cf085e26bafb33da8f4b,tensorflow/tensorflow,"Support different threading modes in GPU device. All modes are experimental for now. The goal is to find the best setting, and change the default to pick that. PiperOrigin-RevId: 178662212",direct_session.h,"@@ -112,6 +112,7 @@ class DirectSession : public Session { // every partition. struct PerPartitionExecutorsAndLib { Graph* graph = nullptr; // not owned. + Device* device = nullptr; // not owned. FunctionLibraryRuntime* flib = nullptr; // not owned. std::unique_ptr executor; }; ",0,train c381794b2fc3227bfee9cf085e26bafb33da8f4b,tensorflow/tensorflow,"Support different threading modes in GPU device. All modes are experimental for now. The goal is to find the best setting, and change the default to pick that. PiperOrigin-RevId: 178662212",gpu_device.cc,"@@ -60,6 +60,7 @@ limitations under the License. #include ""tensorflow/core/platform/types.h"" #include ""tensorflow/core/public/session_options.h"" #include ""tensorflow/core/util/device_name_utils.h"" +#include ""tensorflow/core/util/env_var.h"" #include ""tensorflow/core/util/stream_executor_util.h"" namespace tensorflow { @@ -305,6 +306,46 @@ Status BaseGPUDevice::Init(const SessionOptions& options) { gpu_device_info_->gpu_id = gpu_id_; set_tensorflow_gpu_device_info(gpu_device_info_); + // Whether and how the GPU device uses its own threadpool. + // This option is experimental. Once we confirm the best setting, we + // may change the default behavior and completely remove this flag. + // Default values might change in future releases. + // Possible values: + // * global: GPU uses threads shared with CPU in the main compute + // thread-pool. This is currently the default. + // * gpu_private: GPU uses threads dedicated to this device. + // * gpu_shared: All GPUs share a dedicated thread pool. + string gpu_thread_mode; + TF_RETURN_IF_ERROR( + ReadStringFromEnvVar(""TF_GPU_THREAD_MODE"", ""global"", &gpu_thread_mode)); + gpu_thread_mode = str_util::Lowercase(gpu_thread_mode); + if (gpu_thread_mode != ""global"") { + int64 gpu_thread_count = -1; + // Default to two threads. One for device compute and another for memory + // copies. + TF_RETURN_IF_ERROR( + ReadInt64FromEnvVar(""TF_GPU_THREAD_COUNT"", 2, &gpu_thread_count)); + if (gpu_thread_mode == ""gpu_private"") { + // TODO(zhengxq): since these threads only serve a single GPU device, + // we should set the device context once for each thread, and avoid + // setting them for each kernel. + // TODO(zhengxq): pin the thread to the same socket of the target GPU. + thread_pool_.reset(new thread::ThreadPool( + options.env, strings::StrCat(""gpu_private_"", gpu_id_), + static_cast(gpu_thread_count))); + set_tensorflow_device_thread_pool(thread_pool_.get()); + } else if (gpu_thread_mode == ""gpu_shared"") { + static thread::ThreadPool* thread_pool = new thread::ThreadPool( + options.env, ""gpu_shared"", static_cast(gpu_thread_count)); + set_tensorflow_device_thread_pool(thread_pool); + } else { + string error_message = + strings::StrCat(""Invalid gpu_thread_mode: "", gpu_thread_mode); + LOG(WARNING) << error_message; + return errors::InvalidArgument(error_message); + } + } + return Status::OK(); } ",0,train c381794b2fc3227bfee9cf085e26bafb33da8f4b,tensorflow/tensorflow,"Support different threading modes in GPU device. All modes are experimental for now. The goal is to find the best setting, and change the default to pick that. PiperOrigin-RevId: 178662212",gpu_device.h,"@@ -116,6 +116,7 @@ class BaseGPUDevice : public LocalDevice { const bool sync_every_op_ = false; const int32 max_streams_; std::unique_ptr em_; + std::unique_ptr thread_pool_; void ReinitializeDevice(OpKernelContext* context, PerOpGpuDevice* device, int stream_id, Allocator* allocator); ",0,train c381794b2fc3227bfee9cf085e26bafb33da8f4b,tensorflow/tensorflow,"Support different threading modes in GPU device. All modes are experimental for now. The goal is to find the best setting, and change the default to pick that. PiperOrigin-RevId: 178662212",device_base.h,"@@ -145,6 +145,12 @@ class DeviceBase { return gpu_device_info_; } + // The preferred thread pool for this device. If it is nullptr, the system + // automatically assigns a thread pool for execution. + virtual thread::ThreadPool* tensorflow_device_thread_pool() { + return device_thread_pool_; + } + // Does not take ownership. void set_eigen_cpu_device(Eigen::ThreadPoolDevice* d) { eigen_cpu_device_ = d; @@ -215,10 +221,17 @@ class DeviceBase { return errors::Internal(""Device does not implement MakeTensorFromProto()""); } + protected: + // Does not take ownership. + void set_tensorflow_device_thread_pool(thread::ThreadPool* thread_pool) { + device_thread_pool_ = thread_pool; + } + private: Env* const env_; CpuWorkerThreads* cpu_worker_threads_ = nullptr; GpuDeviceInfo* gpu_device_info_ = nullptr; + thread::ThreadPool* device_thread_pool_ = nullptr; Eigen::ThreadPoolDevice* eigen_cpu_device_ = nullptr; #ifdef TENSORFLOW_USE_SYCL Eigen::SyclDevice* eigen_sycl_device_ = nullptr; ",0,train 964a956abafab5bb1bd07256e75c89c57693b997,tensorflow/tensorflow,Fix typo in cuda_fft.cc.,cuda_fft.cc,"@@ -57,7 +57,7 @@ namespace dynload { static void *f; \ port::Status s = \ port::Env::Default->GetSymbolFromLibrary(GetDsoHandle(), kName, &f); \ - CHECK(f.ok()) << ""could not find "" << kName \ + CHECK(s.ok()) << ""could not find "" << kName \ << "" in cuFFT DSO; dlerror: "" << s.error_message(); \ return reinterpret_cast(f); \ } \ ",0,train c061d6c6b7b2004f5c271f4000fe6e1f9129e0ed,tensorflow/tensorflow,"Re-enable kmeans_test Change: 148175182",kmeans_test.py,"@@ -22,14 +22,15 @@ import math import sys import time +import numpy as np +from sklearn.cluster import KMeans as SklearnKMeans + +# pylint: disable=g-import-not-at-top # TODO: #6568 Remove this hack that makes dlopen() not crash. if hasattr(sys, 'getdlopenflags') and hasattr(sys, 'setdlopenflags'): import ctypes sys.setdlopenflags(sys.getdlopenflags() | ctypes.RTLD_GLOBAL) -import numpy as np -from sklearn.cluster import KMeans as SklearnKMeans - from tensorflow.contrib import factorization from tensorflow.contrib.learn.python import learn from tensorflow.contrib.learn.python.learn.estimators import kmeans as kmeans_lib @@ -492,10 +493,10 @@ class TensorflowKMeansBenchmark(KMeansBenchmark): initial_clusters=factorization.KMEANS_PLUS_PLUS_INIT, kmeans_plus_plus_num_retries=int(math.log(self.num_clusters) + 2), random_seed=i * 42, + relative_tolerance=1e-6, config=run_config.RunConfig(tf_random_seed=3)) tf_kmeans.fit(input_fn=lambda: (constant_op.constant(self.points), None), - steps=50, - relative_tolerance=1e-6) + steps=50) _ = tf_kmeans.clusters() scores.append( tf_kmeans.score( ",0,test 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",c_api.cc,"@@ -752,8 +752,7 @@ TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) { static_cast( opts->device_placement_policy), opts->async, opts->lazy_remote_inputs_copy, device_mgr.release(), - /*device_mgr_owned*/ true, r, - tensorflow::GetDefaultCustomKernelCreator())); + /*device_mgr_owned*/ true, r)); } void TFE_DeleteContext(TFE_Context* ctx) { ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",test_utils.cc,"@@ -48,7 +48,6 @@ EagerContextPtr CreateTestingEagerContext(DeviceMgr* device_mgr) { /* async= */ false, /* lazy_copy_function_remote_inputs= */ false, device_mgr, /* device_mgr_owned= */ false, /* rendezvous= */ nullptr, - /* custom_kernel_creator= */ nullptr, /* cluster_flr= */ nullptr)); } ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",xla_kernel_creator.cc,"@@ -72,7 +72,8 @@ static bool IsCompilable(FunctionLibraryRuntime* flr, const NodeDef& ndef, bool XlaKernelCreator::CanCreateKernel( const FunctionLibraryRuntime& flr, const std::shared_ptr& props) const { - return CanCreateXlaKernel(props->node_def); + return CanCreateXlaKernel(props->node_def) && + !XlaOpRegistry::IsCompilationDevice(flr.device()->device_type()); } static Status CreateXlaKernel(FunctionLibraryRuntime* flr, ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",xla_op_registry.cc,"@@ -134,6 +134,13 @@ XlaOpRegistry::~XlaOpRegistry() = default; result.first->second.op_filter = op_filter; } +/* static */ bool XlaOpRegistry::IsCompilationDevice( + const string& device_name) { + XlaOpRegistry& registry = Instance(); + mutex_lock lock(registry.mutex_); + return registry.backends_.find(device_name) != registry.backends_.end(); +} + /* static */ bool XlaOpRegistry::GetCompilationDevice( const string& device_name, const DeviceRegistration** registration) { XlaOpRegistry& registry = Instance(); ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",xla_op_registry.h,"@@ -153,6 +153,10 @@ class XlaOpRegistry { static void RegisterCompilationDevice(const string& device_name, const DeviceRegistration& registration); + // Returns whether the device name is for the JIT device used exclusively for + // TF2XLA conversion. + static bool IsCompilationDevice(const string& device_name); + // Returns the JIT device name associated with 'device_name', setting // 'jit_device_name', 'requires_jit', and 'enabled_jit_by_default', if they // are not null. Returns false and leaves the outputs unchanged if no matching ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",direct_session.cc,"@@ -1342,12 +1342,10 @@ Status DirectSession::CreateExecutors( options_.config.experimental().has_session_metadata() ? &options_.config.experimental().session_metadata() : nullptr; - const CustomKernelCreator* custom_kernel_creator = - GetDefaultCustomKernelCreator(); func_info->proc_flr.reset(new ProcessFunctionLibraryRuntime( device_mgr_.get(), options_.env, &options_.config, graph_def_version, func_info->flib_def.get(), optimizer_opts, thread_pools_[0].first, - /*parent=*/nullptr, custom_kernel_creator, session_metadata, + /*parent=*/nullptr, session_metadata, Rendezvous::Factory{ [](const int64, const DeviceMgr* device_mgr, Rendezvous** r) { *r = new IntraProcessRendezvous(device_mgr); ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",context.cc,"@@ -76,7 +76,6 @@ EagerContext::EagerContext( ContextDevicePlacementPolicy default_device_placement_policy, bool async, const bool lazy_copy_function_remote_inputs, const DeviceMgr* device_mgr, bool device_mgr_owned, Rendezvous* rendezvous, - const CustomKernelCreator* custom_kernel_creator, DistributedFunctionLibraryRuntime* cluster_flr) : ImmediateExecutionContext(kEager), opts_(opts), @@ -85,7 +84,6 @@ EagerContext::EagerContext( host_cpu_device_(device_mgr->HostCPU()), rendezvous_(rendezvous), thread_pool_(NewThreadPoolFromSessionOptions(opts)), - custom_kernel_creator_(custom_kernel_creator), cluster_flr_(cluster_flr), log_device_placement_(opts.config.log_device_placement()), allow_soft_placement_(opts.config.allow_soft_placement()), @@ -99,7 +97,7 @@ EagerContext::EagerContext( ""TF_EAGER_ENABLE_SMALL_TENSOR_CPU_PINNING"", false)) { ResetPFLR(device_mgr, opts.env, &opts.config, TF_GRAPH_DEF_VERSION, &func_lib_def_, opts.config.graph_options().optimizer_options(), - thread_pool_.get(), cluster_flr, custom_kernel_creator_); + thread_pool_.get(), cluster_flr); // Starts exporting metrics through a platform-specific monitoring API (if // provided). For builds using ""tensorflow/core/platform/default"", this is // currently a no-op. @@ -185,8 +183,7 @@ void EagerContext::ResetPFLR(const DeviceMgr* device_mgr, Env* env, const FunctionLibraryDefinition* lib_def, const OptimizerOptions& optimizer_options, thread::ThreadPool* thread_pool, - DistributedFunctionLibraryRuntime* cluster_flr, - const CustomKernelCreator* custom_kernel_creator) { + DistributedFunctionLibraryRuntime* cluster_flr) { Rendezvous::Factory rendezvous_factory{ [this](const int64 step_id, const DeviceMgr*, Rendezvous** r) { *r = CreateRendezvous(step_id); @@ -194,7 +191,7 @@ void EagerContext::ResetPFLR(const DeviceMgr* device_mgr, Env* env, }}; pflr_.reset(new ProcessFunctionLibraryRuntime( device_mgr, env, config, graph_def_version, lib_def, optimizer_options, - thread_pool, cluster_flr, custom_kernel_creator, + thread_pool, cluster_flr, /*session_metadata=*/nullptr, std::move(rendezvous_factory))); } @@ -1328,7 +1325,7 @@ Status EagerContext::SetMasterContextState( const auto* config = pflr_->config(); ResetPFLR(local_device_manager_.Get(), env_, config, TF_GRAPH_DEF_VERSION, &func_lib_def_, config->graph_options().optimizer_options(), - thread_pool_.get(), cluster_flr_.Get(), custom_kernel_creator_); + thread_pool_.get(), cluster_flr_.Get()); keep_alive_secs_ = keep_alive_secs; sleep_for_secs_ = std::max(1, keep_alive_secs_ / 2); @@ -1430,7 +1427,7 @@ Status EagerContext::InitializeRemoteWorker( const auto* config = pflr_->config(); ResetPFLR(local_device_manager_.Get(), env_, config, TF_GRAPH_DEF_VERSION, &func_lib_def_, config->graph_options().optimizer_options(), - thread_pool_.get(), cluster_flr_.Get(), custom_kernel_creator_); + thread_pool_.get(), cluster_flr_.Get()); InitPrioritizedDeviceTypeList(); ClearCachesAndThreadExecutors(); ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",context.h,"@@ -140,7 +140,6 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { bool async, const bool lazy_copy_function_remote_inputs, const DeviceMgr* device_mgr, bool device_mgr_owned, Rendezvous* rendezvous, - const CustomKernelCreator* custom_kernel_creator, DistributedFunctionLibraryRuntime* cluster_flr = nullptr); void Release() override { Unref(); } @@ -495,8 +494,7 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { const FunctionLibraryDefinition* lib_def, const OptimizerOptions& optimizer_options, thread::ThreadPool* thread_pool = nullptr, - DistributedFunctionLibraryRuntime* cluster_flr = nullptr, - const CustomKernelCreator* custom_kernel_creator = nullptr); + DistributedFunctionLibraryRuntime* cluster_flr = nullptr); void ResetClusterFLR(DistributedFunctionLibraryRuntime* cluster_flr); @@ -570,8 +568,6 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { std::unique_ptr thread_pool_; - const CustomKernelCreator* const custom_kernel_creator_; - // EagerContext owns the DistributedFunctionLibraryRuntime( // EagerClusterFunctionLibraryRuntime) if using EagerService for remote // function execution (lazy_copy_function_remote_inputs_=true). ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",context_test.cc,"@@ -59,7 +59,6 @@ class EagerContextTest : public ::testing::Test { /* async */ false, /* lazy_copy_function_remote_inputs */ false, device_manager_, /* device_mgr_owned */ false, /* rendezvous */ nullptr, - /* custom_kernel_creator */ nullptr, /* cluster_flr */ nullptr); } ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",eager_operation_test.cc,"@@ -28,7 +28,7 @@ TEST(EagerOperationTest, DeviceName) { auto ctx = new EagerContext( SessionOptions(), tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, false, - false, &device_mgr, false, nullptr, nullptr, nullptr); + false, &device_mgr, false, nullptr, nullptr); auto op = new EagerOperation(ctx); ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",execute_node_test.cc,"@@ -68,7 +68,7 @@ TEST(ExecuteNodeTest, ExecuteNodeArgs) { auto ctx = new EagerContext( SessionOptions(), tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, false, - false, &device_mgr, false, nullptr, nullptr, nullptr); + false, &device_mgr, false, nullptr, nullptr); // Set a RemoteMgr to the EagerContext. auto remote_mgr = absl::make_unique( ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",mkl_eager_op_rewrite_test.cc,"@@ -40,8 +40,7 @@ class EagerOpRewriteTest : public ::testing::Test { tensorflow::EagerContext* eager_ctx = new tensorflow::EagerContext( SessionOptions(), tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, - async, lazy_remote_tensor_copy, device_mgr.get(), false, rendezvous, - GetDefaultCustomKernelCreator()); + async, lazy_remote_tensor_copy, device_mgr.get(), false, rendezvous); EagerExecutor executor_(false); std::unique_ptr op( ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",placement_test.cc,"@@ -88,7 +88,6 @@ class PlacementTest : public ::testing::Test { /* async */ false, /* lazy_copy_function_remote_inputs */ false, device_manager_, /* device_mgr_owned */ false, /* rendezvous */ nullptr, - /* custom_kernel_creator */ nullptr, /* cluster_flr */ nullptr); } ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",tensor_handle_test.cc,"@@ -39,7 +39,7 @@ TEST(TensorHandle_ShapeTest, AsyncShape) { auto ctx = new EagerContext( SessionOptions(), tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, false, - false, &device_mgr, false, nullptr, nullptr, nullptr); + false, &device_mgr, false, nullptr, nullptr); TensorHandle* sync_th = TensorHandle::CreateLocalHandle(std::move(t), nullptr, nullptr, ctx); TensorHandle* async_th = TensorHandle::CreateEmptyLocalHandle( @@ -108,7 +108,6 @@ class PackedTensorHandleTest : public ::testing::Test { /* async= */ false, /* lazy_copy_function_remote_inputs= */ false, device_mgr_, /* device_mgr_owned= */ false, /* rendezvous= */ nullptr, - /* custom_kernel_creator= */ nullptr, /* cluster_flr= */ nullptr); } @@ -257,7 +256,7 @@ TEST(TensorHandle_ResourceDeviceTest, OnLocalDevice) { auto ctx = new EagerContext( SessionOptions(), tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, false, - false, &local_device_mgr, false, nullptr, nullptr, nullptr); + false, &local_device_mgr, false, nullptr, nullptr); tensorflow::DataType dtype = DT_RESOURCE; TensorShape shape = {2}; @@ -289,7 +288,7 @@ TEST(TensorHandle_ResourceDeviceTest, OnRemoteDevice) { auto ctx = new EagerContext( SessionOptions(), tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, false, - false, &local_device_mgr, false, nullptr, nullptr, nullptr); + false, &local_device_mgr, false, nullptr, nullptr); std::unique_ptr d0( CreateDevice(""CPU"", ""/job:worker/task:0/device:CPU:0"", false)); @@ -346,7 +345,6 @@ class RemoteTensorHandleTest : public ::testing::Test { /* async= */ false, /* lazy_copy_function_remote_inputs= */ false, device_mgr_, /* device_mgr_owned= */ false, /* rendezvous= */ nullptr, - /* custom_kernel_creator= */ nullptr, /* cluster_flr= */ nullptr); } @@ -387,7 +385,6 @@ TEST_F(RemoteTensorHandleTest, UnknownRemoteDevice) { /* async= */ false, /* lazy_copy_function_remote_inputs= */ false, &device_mgr, /* device_mgr_owned= */ false, /* rendezvous= */ nullptr, - /* custom_kernel_creator= */ nullptr, /* cluster_flr= */ nullptr); tensorflow::DataType dtype = DT_FLOAT; ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",function.cc,"@@ -326,7 +326,6 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime { const FunctionLibraryDefinition* lib_def, thread::ThreadPool* default_thread_pool, const OptimizerOptions& optimizer_options, - const CustomKernelCreator* custom_kernel_creator, const SessionMetadata* session_metadata, ProcessFunctionLibraryRuntime* parent); @@ -390,7 +389,6 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime { const int graph_def_version_; const FunctionLibraryDefinition* const base_lib_def_; GraphOptimizer optimizer_; - const CustomKernelCreator* custom_kernel_creator_; const SessionMetadata* const session_metadata_; Executor::Args::Runner default_runner_; const string device_name_; @@ -462,7 +460,6 @@ FunctionLibraryRuntimeImpl::FunctionLibraryRuntimeImpl( int graph_def_version, const FunctionLibraryDefinition* lib_def, thread::ThreadPool* default_thread_pool, const OptimizerOptions& optimizer_options, - const CustomKernelCreator* custom_kernel_creator, const SessionMetadata* session_metadata, ProcessFunctionLibraryRuntime* parent) : device_mgr_(dmgr), @@ -472,7 +469,6 @@ FunctionLibraryRuntimeImpl::FunctionLibraryRuntimeImpl( graph_def_version_(graph_def_version), base_lib_def_(lib_def), optimizer_(optimizer_options), - custom_kernel_creator_(custom_kernel_creator), session_metadata_(session_metadata), default_runner_(nullptr), device_name_(device_ == nullptr @@ -609,10 +605,12 @@ Status FunctionLibraryRuntimeImpl::CreateKernel( FunctionLibraryRuntime* flr, OpKernel** kernel) { // If a custom kernel creator is given, try that. Status s; - if (custom_kernel_creator_ != nullptr && - custom_kernel_creator_->CanCreateKernel(*this, props)) { + const CustomKernelCreator* custom_kernel_creator = + GetDefaultCustomKernelCreator(); + if (custom_kernel_creator && + custom_kernel_creator->CanCreateKernel(*this, props)) { std::unique_ptr ret; - s = custom_kernel_creator_->CreateKernel(this, props, &ret); + s = custom_kernel_creator->CreateKernel(this, props, &ret); if (s.ok()) { *kernel = ret.release(); } else { @@ -1328,9 +1326,9 @@ Status FunctionLibraryRuntimeImpl::Clone( std::unique_ptr* out_lib_def, std::unique_ptr* out_pflr, FunctionLibraryRuntime** out_flr, bool skip_flib_def) { - TF_RETURN_IF_ERROR(parent_->Clone( - env_, graph_def_version_, optimizer_.options(), custom_kernel_creator_, - out_lib_def, out_pflr, skip_flib_def)); + TF_RETURN_IF_ERROR(parent_->Clone(env_, graph_def_version_, + optimizer_.options(), out_lib_def, out_pflr, + skip_flib_def)); *out_flr = (*out_pflr)->GetFLR(device_->name()); if (*out_flr != nullptr) { return Status::OK(); @@ -1376,12 +1374,11 @@ std::unique_ptr NewFunctionLibraryRuntime( Device* device, int graph_def_version, const FunctionLibraryDefinition* lib_def, thread::ThreadPool* thread_pool, const OptimizerOptions& optimizer_options, - const CustomKernelCreator* custom_kernel_creator, const SessionMetadata* session_metadata, ProcessFunctionLibraryRuntime* parent) { return std::unique_ptr(new FunctionLibraryRuntimeImpl( device_mgr, env, config, device, graph_def_version, lib_def, thread_pool, - optimizer_options, custom_kernel_creator, session_metadata, parent)); + optimizer_options, session_metadata, parent)); } class SymbolicGradientHelper { ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",function.h,"@@ -43,15 +43,11 @@ const CustomKernelCreator* GetDefaultCustomKernelCreator(); // interpreter op kernel to execute a function. Else c->CreateKernel() can be // used to create a kernel that will compile the function with XLA and run the // resulting program. -// -// TODO(zhifengc/phawkins): b/32379046 void RegisterDefaultCustomKernelCreator(CustomKernelCreator* c); // Creates a FunctionLibraryRuntime, which instantiates functions // defined in ""lib_def"" and executes functions on the ""device"". -// ""device_mgr"" must contain the ""device"". If not nullptr, -// ""custom_kernel_creator"" is consulted by the returned runtime to -// create kernels. +// ""device_mgr"" must contain the ""device"". // // The returned object does not take ownerships of ""device"" or // ""lib_def"". The caller must ensure ""device"" and ""lib_def"" outlives @@ -65,7 +61,6 @@ std::unique_ptr NewFunctionLibraryRuntime( Device* device, int graph_def_version, const FunctionLibraryDefinition* lib_def, thread::ThreadPool* thread_pool, const OptimizerOptions& optimizer_options, - const CustomKernelCreator* custom_kernel_creator, const SessionMetadata* session_metadata, ProcessFunctionLibraryRuntime* parent); ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",function_test.cc,"@@ -162,8 +162,7 @@ class FunctionLibraryRuntimeTest : public ::testing::Test { pflr_.reset(new ProcessFunctionLibraryRuntime( device_mgr_.get(), Env::Default(), &options.config, TF_GRAPH_DEF_VERSION, lib_def_.get(), opts, /*thread_pool=*/nullptr, - /*parent=*/nullptr, /*custom_kernel_creator=*/nullptr, - /*session_metadata=*/nullptr, + /*parent=*/nullptr, /*session_metadata=*/nullptr, Rendezvous::Factory{ [](const int64, const DeviceMgr* device_mgr, Rendezvous** r) { *r = new IntraProcessRendezvous(device_mgr); ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",function_threadpool_test.cc,"@@ -65,8 +65,7 @@ class FunctionLibraryRuntimeTest : public ::testing::Test { pflr_.reset(new ProcessFunctionLibraryRuntime( device_mgr_.get(), Env::Default(), /*config=*/nullptr, TF_GRAPH_DEF_VERSION, lib_def_.get(), opts, default_thread_pool, - /*parent=*/nullptr, /*custom_kernel_creator=*/nullptr, - /*session_metadata=*/nullptr, + /*parent=*/nullptr, /*session_metadata=*/nullptr, Rendezvous::Factory{ [](const int64, const DeviceMgr* device_mgr, Rendezvous** r) { *r = new IntraProcessRendezvous(device_mgr); ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",kernel_benchmark_testlib.cc,"@@ -90,7 +90,7 @@ Benchmark::Benchmark(const string& device, Graph* g, pflr_ = std::unique_ptr( new ProcessFunctionLibraryRuntime( device_mgr_.get(), Env::Default(), nullptr, graph_def_version, - flib_def_.get(), OptimizerOptions(), pool_, nullptr, nullptr, nullptr, + flib_def_.get(), OptimizerOptions(), pool_, nullptr, nullptr, Rendezvous::Factory())); flr_ = pflr_->GetFLR(device_->name()); ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",process_function_library_runtime.cc,"@@ -88,7 +88,6 @@ ProcessFunctionLibraryRuntime::ProcessFunctionLibraryRuntime( const OptimizerOptions& optimizer_options, thread::ThreadPool* default_thread_pool, DistributedFunctionLibraryRuntime* parent, - const CustomKernelCreator* custom_kernel_creator, const SessionMetadata* session_metadata, Rendezvous::Factory rendezvous_factory) : parent_(parent), @@ -106,14 +105,14 @@ ProcessFunctionLibraryRuntime::ProcessFunctionLibraryRuntime( (*flr_map_)[nullptr] = NewFunctionLibraryRuntime( nullptr, env, config_ ? &(*config_) : nullptr, nullptr, graph_def_version, lib_def_, default_thread_pool, optimizer_options, - custom_kernel_creator, session_metadata_, this); + session_metadata_, this); return; } for (Device* d : device_mgr->ListDevices()) { (*flr_map_)[d] = NewFunctionLibraryRuntime( device_mgr, env, config_ ? &(*config_) : nullptr, d, graph_def_version, - lib_def_, default_thread_pool, optimizer_options, custom_kernel_creator, - session_metadata_, this); + lib_def_, default_thread_pool, optimizer_options, session_metadata_, + this); } InitializeDeviceSet(); @@ -1715,7 +1714,6 @@ void ProcessFunctionLibraryRuntime::CleanUp( Status ProcessFunctionLibraryRuntime::Clone( Env* env, int graph_def_version, const OptimizerOptions& optimizer_options, - const CustomKernelCreator* custom_kernel_creator, std::unique_ptr* out_lib_def, std::unique_ptr* out_pflr, bool skip_flib_def) const { @@ -1728,7 +1726,7 @@ Status ProcessFunctionLibraryRuntime::Clone( *out_pflr = absl::make_unique( device_mgr_, env, config_ ? &(*config_) : nullptr, graph_def_version, out_lib_def->get(), optimizer_options, default_thread_pool_, parent_, - custom_kernel_creator, session_metadata_, rendezvous_factory_); + session_metadata_, rendezvous_factory_); { tf_shared_lock l(mu_); for (auto* d : composite_devices_) (*out_pflr)->AddCompositeDevice(d); ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",process_function_library_runtime.h,"@@ -69,7 +69,6 @@ class ProcessFunctionLibraryRuntime { const OptimizerOptions& optimizer_options, thread::ThreadPool* thread_pool = nullptr, DistributedFunctionLibraryRuntime* parent = nullptr, - const CustomKernelCreator* custom_kernel_creator = nullptr, const SessionMetadata* session_metadata = nullptr, Rendezvous::Factory rendezvous_factory = Rendezvous::Factory()); @@ -357,7 +356,6 @@ class ProcessFunctionLibraryRuntime { // runtime w.r.t. to number of functions in the current function library. Status Clone(Env* env, int graph_def_version, const OptimizerOptions& optimizer_options, - const CustomKernelCreator* custom_kernel_creator, std::unique_ptr* out_lib_def, std::unique_ptr* out_pflr, bool skip_flib_def = false) const; ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",process_function_library_runtime_test.cc,"@@ -139,8 +139,7 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test { proc_flr_.reset(new ProcessFunctionLibraryRuntime( device_mgr_.get(), Env::Default(), /*config=*/nullptr, TF_GRAPH_DEF_VERSION, lib_def_.get(), opts, - /*thread_pool=*/nullptr, cluster_flr_.get(), - /*custom_kernel_creator=*/nullptr, session_metadata, + /*thread_pool=*/nullptr, cluster_flr_.get(), session_metadata, Rendezvous::Factory{ [this](const int64 step_id, const DeviceMgr* device_mgr, Rendezvous** r) { ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",standalone.cc,"@@ -59,7 +59,6 @@ Status Dataset::FromGraph(Params params, const GraphDef& graph_def, device_mgr.get(), Env::Default(), /*config=*/nullptr, TF_GRAPH_DEF_VERSION, flib_def.get(), OptimizerOptions{}, /*thread_pool=*/nullptr, /*parent=*/nullptr, - /*custom_kernel_creator=*/nullptr, /*session_metadata=*/nullptr, Rendezvous::Factory{ [](const int64, const DeviceMgr* device_mgr, Rendezvous** r) { ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",eager_service_impl.cc,"@@ -275,7 +275,7 @@ Status EagerServiceImpl::CreateContext(const CreateContextRequest* request, tensorflow::EagerContext* ctx = new tensorflow::EagerContext( opts, tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, request->async(), request->lazy_copy_remote_function_inputs(), device_mgr, - false, r, GetDefaultCustomKernelCreator(), worker_session->cluster_flr()); + false, r, worker_session->cluster_flr()); // Ownership will be transferred to the ServerContext, or else in an error // case ctx will be deleted by this unref. core::ScopedUnref unref_ctx(ctx); ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",eager_service_impl_test.cc,"@@ -780,7 +780,7 @@ class FunctionWithRemoteInputsTest : public EagerServiceImplTest { remote_device_mgr_.get(), Env::Default(), /*config=*/ nullptr, TF_GRAPH_DEF_VERSION, &func_lib_def_, OptimizerOptions(), /*thread_pool=*/nullptr, eager_cluster_flr_.get(), - /*custom_kernel_creator=*/nullptr, /*session_metadata=*/nullptr, + /*session_metadata=*/nullptr, Rendezvous::Factory{[this](const int64 step_id, const DeviceMgr* device_mgr, Rendezvous** r) { @@ -1220,7 +1220,7 @@ TEST_F(EagerServiceImplTest, RequestsToMasterTest) { tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, /*async=*/false, /*lazy_copy_function_remote_inputs=*/false, device_mgr_.get(), false, - rendezvous, GetDefaultCustomKernelCreator()); + rendezvous); const uint64 context_id = random::New64(); // Set RemoteMgr to ctx. ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",remote_mgr_test.cc,"@@ -56,7 +56,7 @@ class RemoteMgrTest : public ::testing::Test { tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, /*async=*/false, /*lazy_copy_function_remote_inputs=*/false, device_mgr.release(), true, - rendezvous, GetDefaultCustomKernelCreator(), nullptr); + rendezvous, nullptr); } ~RemoteMgrTest() override { ctx_->Unref(); } ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",graph_mgr.cc,"@@ -136,12 +136,11 @@ Status GraphMgr::InitItem( // We don't explicitly Validate the graph def because ConvertGraphDefToGraph // does that below. - item->proc_flr.reset(new ProcessFunctionLibraryRuntime( device_mgr_, worker_env_->env, /*config=*/&config_proto, gdef.versions().producer(), item->lib_def.get(), graph_options.optimizer_options(), worker_env_->compute_pool, cluster_flr, - /*custom_kernel_creator=*/nullptr, /*session_metadata=*/nullptr, + /*session_metadata=*/nullptr, Rendezvous::Factory{ [this, session](const int64 step_id, const DeviceMgr*, Rendezvous** r) -> Status { ",0,train 0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones Serialization adds a new surface area for bugs, as not all the callers propagate the CustomKernelCreator correctly. Moreover, the mechanism is quite hacky and in the future we plan to potentially switch to a different one. PiperOrigin-RevId: 333111910 Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",dataset_test_base.cc,"@@ -402,7 +402,7 @@ Status DatasetOpsTestBase::InitFunctionLibraryRuntime( pflr_ = absl::make_unique( device_mgr_.get(), Env::Default(), /*config=*/nullptr, TF_GRAPH_DEF_VERSION, lib_def_.get(), opts, thread_pool_.get(), - /*parent=*/nullptr, /*custom_kernel_creator=*/nullptr, + /*parent=*/nullptr, /*session_metadata=*/nullptr, Rendezvous::Factory{ [](const int64, const DeviceMgr* device_mgr, Rendezvous** r) { ",0,train 8a05bdf333f34603b33c0f3a029e023deb27ae04,tensorflow/tensorflow,"Expose the RegAdagradOptimizer (which allows the user to specify whether a loss should update the accumulator) through tf.contrib.opt. PiperOrigin-RevId: 210253451",__init__.py,"@@ -31,6 +31,7 @@ from tensorflow.contrib.opt.python.training.model_average_optimizer import * from tensorflow.contrib.opt.python.training.moving_average_optimizer import * from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import * from tensorflow.contrib.opt.python.training.nadam_optimizer import * +from tensorflow.contrib.opt.python.training.reg_adagrad_optimizer import * from tensorflow.contrib.opt.python.training.shampoo import * from tensorflow.contrib.opt.python.training.weight_decay_optimizers import * from tensorflow.contrib.opt.python.training.powersign import * @@ -65,6 +66,7 @@ _allowed_symbols = [ 'ModelAverageCustomGetter', 'GGTOptimizer', 'ShampooOptimizer', + 'RegAdagradOptimizer', ] remove_undocumented(__name__, _allowed_symbols) ",0,train 26664f30f4f3d98b500c8dfd4e7852661280cfab,tensorflow/tensorflow,Addressing PR feedback,device_tracer_test.cc,"@@ -243,6 +243,7 @@ TEST_F(DeviceTracerTest, RunWithTraceOption) { EXPECT_GE(run_metadata.step_stats().dev_stats_size(), 1); } +#if TENSORFLOW_USE_ROCM TEST_F(DeviceTracerTest, TraceToXSpace) { auto tracer = CreateGpuTracer(); if (!tracer) return; @@ -266,13 +267,8 @@ TEST_F(DeviceTracerTest, TraceToXSpace) { XSpace space; TF_ASSERT_OK(tracer->CollectData(&space)); // At least one gpu plane and one host plane for launching events. -#if GOOGLE_CUDA - const XPlane* host_plane = FindPlaneWithName(space, kCuptiDriverApiPlaneName); - ASSERT_NE(host_plane, nullptr); -#elif TENSORFLOW_USE_ROCM const XPlane* host_plane = FindPlaneWithName(space, kRoctracerApiPlaneName); ASSERT_NE(host_plane, nullptr); -#endif const XPlane* device_plane = FindPlaneWithName(space, strings::StrCat(kGpuPlanePrefix, 0)); @@ -283,14 +279,59 @@ TEST_F(DeviceTracerTest, TraceToXSpace) { EXPECT_GE(device_plane->event_metadata_size(), 5); // Check if device capacity is serialized. XPlaneVisitor plane = CreateTfXPlaneVisitor(device_plane); -#if GOOGLE_CUDA + + // Check if the device events timestamps are set. + int total_events = 0; + plane.ForEachLine([&](const tensorflow::profiler::XLineVisitor& line) { + line.ForEachEvent([&](const tensorflow::profiler::XEventVisitor& event) { + EXPECT_GT(event.TimestampNs(), 0); + EXPECT_GT(event.DurationNs(), 0); + ++total_events; + }); + }); + EXPECT_GE(total_events, 5); +} +#else // TENSORFLOW_USE_ROCM +TEST_F(DeviceTracerTest, TraceToXSpace) { + auto tracer = CreateGpuTracer(); + if (!tracer) return; + + Initialize({3, 2, -1, 0}); + auto session = CreateSession(); + ASSERT_TRUE(session != nullptr); + TF_ASSERT_OK(session->Create(def_)); + std::vector> inputs; + + // Request two targets: one fetch output and one non-fetched output. + std::vector output_names = {y_ + "":0""}; + std::vector target_nodes = {y_neg_}; + std::vector outputs; + + TF_ASSERT_OK(tracer->Start()); + Status s = session->Run(inputs, output_names, target_nodes, &outputs); + TF_ASSERT_OK(s); + + TF_ASSERT_OK(tracer->Stop()); + XSpace space; + TF_ASSERT_OK(tracer->CollectData(&space)); + // At least one gpu plane and one host plane for launching events. + const XPlane* host_plane = FindPlaneWithName(space, kCuptiDriverApiPlaneName); + ASSERT_NE(host_plane, nullptr); + + const XPlane* device_plane = + FindPlaneWithName(space, strings::StrCat(kGpuPlanePrefix, 0)); + ASSERT_NE(device_plane, nullptr); // Check if device plane is serialized. + // one for MemcpyH2D, one for MemcpyD2H, two for Matmul (one from Eigen, one + // from cudnn), one for memset. + EXPECT_EQ(device_plane->event_metadata_size(), 5); + // Check if device capacity is serialized. + XPlaneVisitor plane = CreateTfXPlaneVisitor(device_plane); EXPECT_TRUE(plane.GetStat(kDevCapClockRateKHz).has_value()); EXPECT_TRUE(plane.GetStat(kDevCapCoreCount).has_value()); EXPECT_TRUE(plane.GetStat(kDevCapMemoryBandwidth).has_value()); EXPECT_TRUE(plane.GetStat(kDevCapMemorySize).has_value()); EXPECT_TRUE(plane.GetStat(kDevCapComputeCapMajor).has_value()); EXPECT_TRUE(plane.GetStat(kDevCapComputeCapMinor).has_value()); -#endif // Check if the device events timestamps are set. int total_events = 0; @@ -303,6 +344,7 @@ TEST_F(DeviceTracerTest, TraceToXSpace) { }); EXPECT_GE(total_events, 5); } +#endif // TENSORFLOW_USE_ROCM #if GOOGLE_CUDA TEST_F(DeviceTracerTest, CudaRuntimeResource) { ",0,test 4ca1258da5f15f686e7caae3850f4aff30e9a9c0,tensorflow/tensorflow,[tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc] Add calls to `reserve()` before populating vectors,trt_optimization_pass.cc,"@@ -374,7 +374,9 @@ Status TRTOptimizationPass::Optimize(grappler::Cluster* cluster, } std::vector nodes_to_preserve; - for (const auto& n : item.NodesToPreserve()) { + auto _nodes_to_preserve = item.NodesToPreserve(); + nodes_to_preserve.reserve(_nodes_to_preserve.size()); + for (const auto& n : _nodes_to_preserve) { auto tokens = str_util::Split(n, "":""); string s = tokens.at(0); for (int i = 1; i < tokens.size() - 1; ++i) { ",0,train 1e2c750e62e80a4c26385536791e53806d4bffe4,tensorflow/tensorflow,"Remove trailing whitespaces in TraceMe names. PiperOrigin-RevId: 267487085",direct_session.cc,"@@ -500,11 +500,11 @@ Status DirectSession::RunInternal( if (options_.config.experimental().has_session_metadata()) { const auto& model_metadata = options_.config.experimental().session_metadata(); - return strings::StrCat(""SessionRun #id="", step_id, + return strings::StrCat(""SessionRun#id="", step_id, "",model_id="", model_metadata.name(), "":"", model_metadata.version(), ""#""); } else { - return strings::StrCat(""SessionRun #id="", step_id, ""#""); + return strings::StrCat(""SessionRun#id="", step_id, ""#""); } }, profiler::TraceMeLevel::kInfo); ",0,train 1e2c750e62e80a4c26385536791e53806d4bffe4,tensorflow/tensorflow,"Remove trailing whitespaces in TraceMe names. PiperOrigin-RevId: 267487085",graph_mgr.cc,"@@ -418,7 +418,7 @@ void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id, CancellationManager* cancellation_manager, const NamedTensors& in, StatusCallback done) { const uint64 start_time_usecs = Env::Default()->NowMicros(); - string session_id_meta = strings::StrCat(""RunGraph #id="", step_id, ""#""); + string session_id_meta = strings::StrCat(""RunGraph#id="", step_id, ""#""); auto* activity = new profiler::TraceMe(absl::string_view(session_id_meta), profiler::TraceMeLevel::kInfo); // Lookup an item. Holds one ref while executing. ",0,train c486a9177192f652320d37a5fdf33ab9a3a789f2,tensorflow/tensorflow,"Fix bugs in neutral element code and add more unit tests to cover matmul with input shape != output shape. PiperOrigin-RevId: 177920882",constant_folding.cc,"@@ -1254,8 +1254,11 @@ void ConstantFolding::ReplaceAddOrMulWithIdentity(int input_to_forward, Status ConstantFolding::ReplaceAddOrMulWithConstant( double value, const TensorShapeProto& shape, NodeDef* node) { AttrValue tensor_attr; - TF_RETURN_IF_ERROR(CreateConstantTensorAttrValue(node->attr().at(""T"").type(), - value, shape, &tensor_attr)); + AttrValue dtype_attr = node->attr().at(""T""); + TF_RETURN_IF_ERROR(CreateConstantTensorAttrValue(dtype_attr.type(), value, + shape, &tensor_attr)); + node->clear_attr(); + node->mutable_attr()->insert({""dtype"", dtype_attr}); node->mutable_attr()->insert({""value"", tensor_attr}); node->set_op(""Const""); // Convert all inputs to control dependencies. @@ -1333,55 +1336,44 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, properties.GetOutputProperties(node.name())[0].shape(); const TensorShapeProto& x_shape = properties.GetInputProperties(node.name())[0].shape(); - - // Simplify multiplication by or addition of zeros. - const bool x_is_zero = IsZeros(*x); - const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape); - if (x_is_zero) { - if ((is_mul && x_matches_output_shape) || is_matmul) { - // 0 * y = 0 - ReplaceAddOrMulWithIdentity(0, &node); - } else { - // 0 + y = y. - ReplaceAddOrMulWithIdentity(1, &node); - } - continue; - } const TensorShapeProto& y_shape = properties.GetInputProperties(node.name())[1].shape(); + const bool x_is_zero = IsZeros(*x); + const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape); const bool y_is_zero = IsZeros(*y); const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape); - if (y_is_zero) { - if ((is_mul && y_matches_output_shape) || is_matmul) { - // x * 0 = 0 + + // Simplify addition of zeros. + if (is_add) { + if (x_is_zero && y_matches_output_shape) { + // 0 + y = y. ReplaceAddOrMulWithIdentity(1, &node); - } else { + continue; + } else if (y_is_zero && x_matches_output_shape) { // x + 0 = y. ReplaceAddOrMulWithIdentity(0, &node); + continue; } - continue; } + // Simplify element-wise multiplication by ones. if (is_mul) { - // Simplify scalar multiplication by zeros where, due to broadcasting, - // the output shape does not match the shape of the zero input. - if (x_is_zero || y_is_zero) { - TF_RETURN_IF_ERROR( - ReplaceAddOrMulWithConstant(0, output_shape, &node)); - continue; - } - - // Simplify multiplication by ones. if (IsOnes(*x) && y_matches_output_shape) { // 1 * y = y. ReplaceAddOrMulWithIdentity(1, &node); continue; - } else if (IsOnes(*y) && x_matches_output_shape) { + } + if (IsOnes(*y) && x_matches_output_shape) { // x * 1 = x. ReplaceAddOrMulWithIdentity(0, &node); continue; } } + + // Simplify multiplication and matmul by zeros. + if (x_is_zero || y_is_zero) { + TF_RETURN_IF_ERROR(ReplaceAddOrMulWithConstant(0, output_shape, &node)); + } } } return Status::OK(); ",0,train c486a9177192f652320d37a5fdf33ab9a3a789f2,tensorflow/tensorflow,"Fix bugs in neutral element code and add more unit tests to cover matmul with input shape != output shape. PiperOrigin-RevId: 177920882",constant_folding_test.cc,"@@ -84,6 +84,10 @@ TEST_F(ConstantFoldingTest, NeutralElement) { ops::Placeholder::Shape(TensorShape({2, 2}))); Output y = ops::Placeholder(s.WithOpName(""y""), DT_FLOAT, ops::Placeholder::Shape(TensorShape({2, 2}))); + Output a = ops::Placeholder(s.WithOpName(""a""), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({3, 2}))); + Output b = ops::Placeholder(s.WithOpName(""b""), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({2, 3}))); Output zeros = !use_const ? ops::ZerosLike(s.WithOpName(""zeros""), x) : ops::Const(s.WithOpName(""zeros""), 0.0f, {2, 2}); Output zeros_broadcast = @@ -94,16 +98,20 @@ TEST_F(ConstantFoldingTest, NeutralElement) { Output mul2 = ops::Mul(s.WithOpName(""mul2""), zeros, y); Output mul3 = ops::Mul(s.WithOpName(""mul3""), x, ones); Output mul4 = ops::Mul(s.WithOpName(""mul4""), ones, y); - Output mul5 = ops::Mul(s.WithOpName(""mul1""), x, zeros_broadcast); - Output mul6 = ops::Mul(s.WithOpName(""mul2""), zeros_broadcast, y); + Output mul5 = ops::Mul(s.WithOpName(""mul5""), x, zeros_broadcast); + Output mul6 = ops::Mul(s.WithOpName(""mul6""), zeros_broadcast, y); Output matmul1 = ops::MatMul(s.WithOpName(""matmul1""), x, zeros); Output matmul2 = ops::MatMul(s.WithOpName(""matmul2""), zeros, y); + Output matmul3 = ops::MatMul(s.WithOpName(""matmul3""), a, zeros); + Output matmul4 = ops::MatMul(s.WithOpName(""matmul4""), zeros, b); Output add1 = ops::Add(s.WithOpName(""add1""), x, zeros); Output add2 = ops::Add(s.WithOpName(""add2""), zeros, y); - Output addn = - ops::AddN(s, {mul1, mul2, mul3, mul4, matmul1, matmul2, add1, add2}); + Output addn = ops::AddN( + s.WithOpName(""addn""), + {mul1, mul2, mul3, mul4, mul5, mul6, matmul1, matmul2, add1, add2}); GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); + item.fetch = {""addn"", ""matmul3"", ""matmul4""}; ConstantFolding optimizer(RewriterConfig::AGGRESSIVE, nullptr /* cpu_device */); @@ -111,35 +119,17 @@ TEST_F(ConstantFoldingTest, NeutralElement) { Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(16, output.node_size()); + EXPECT_EQ(20, output.node_size()); for (int i = 0; i < output.node_size(); ++i) { const NodeDef& node = output.node(i); const string& name = node.name(); if (name == ""mul1"") { - if (use_const) { - EXPECT_EQ(""Const"", node.op()); - EXPECT_EQ(""^x"", node.input(0)); - } else { - EXPECT_EQ(""Identity"", node.op()); - EXPECT_EQ(""zeros"", node.input(0)); - EXPECT_EQ(""^x"", node.input(1)); - } + EXPECT_EQ(""Const"", node.op()); + EXPECT_EQ(""^x"", node.input(0)); + EXPECT_EQ(""^zeros"", node.input(1)); } else if (name == ""mul2"") { - if (use_const) { - EXPECT_EQ(""Const"", node.op()); - EXPECT_EQ(""^y"", node.input(0)); - } else { - EXPECT_EQ(""Identity"", node.op()); - EXPECT_EQ(""zeros"", node.input(0)); - EXPECT_EQ(""^y"", node.input(1)); - } - } else if (name == ""matmul1"") { - EXPECT_EQ(""Identity"", node.op()); - EXPECT_EQ(""zeros"", node.input(0)); - EXPECT_EQ(""^x"", node.input(1)); - } else if (name == ""matmul2"") { - EXPECT_EQ(""Identity"", node.op()); - EXPECT_EQ(""zeros"", node.input(0)); + EXPECT_EQ(""Const"", node.op()); + EXPECT_EQ(""^zeros"", node.input(0)); EXPECT_EQ(""^y"", node.input(1)); } else if (name == ""mul3"") { EXPECT_EQ(""Identity"", node.op()); @@ -152,23 +142,39 @@ TEST_F(ConstantFoldingTest, NeutralElement) { } else if (name == ""mul5"") { EXPECT_EQ(""Const"", node.op()); EXPECT_EQ(""^x"", node.input(0)); - EXPECT_EQ(""^ones"", node.input(1)); + EXPECT_EQ(""^zeros_broadcast"", node.input(1)); + } else if (name == ""mul6"") { + EXPECT_EQ(""Const"", node.op()); + EXPECT_EQ(""^zeros_broadcast"", node.input(0)); + EXPECT_EQ(""^y"", node.input(1)); + } else if (name == ""matmul1"") { + EXPECT_EQ(""Const"", node.op()); + EXPECT_EQ(""^x"", node.input(0)); + EXPECT_EQ(""^zeros"", node.input(1)); + } else if (name == ""matmul2"") { + EXPECT_EQ(""Const"", node.op()); + EXPECT_EQ(""^zeros"", node.input(0)); + EXPECT_EQ(""^y"", node.input(1)); + } else if (name == ""matmul3"") { + EXPECT_EQ(""Const"", node.op()); + EXPECT_EQ(""^a"", node.input(0)); + EXPECT_EQ(""^zeros"", node.input(1)); TensorProto t = node.attr().at(""value"").tensor(); EXPECT_EQ(1, t.float_val_size()); EXPECT_EQ(0, t.float_val(0)); EXPECT_EQ(2, t.tensor_shape().dim_size()); - EXPECT_EQ(1, t.tensor_shape().dim(0).size()); + EXPECT_EQ(3, t.tensor_shape().dim(0).size()); EXPECT_EQ(2, t.tensor_shape().dim(1).size()); - } else if (name == ""mul6"") { + } else if (name == ""matmul4"") { EXPECT_EQ(""Const"", node.op()); - EXPECT_EQ(""^y"", node.input(0)); - EXPECT_EQ(""^ones"", node.input(1)); + EXPECT_EQ(""^zeros"", node.input(0)); + EXPECT_EQ(""^b"", node.input(1)); TensorProto t = node.attr().at(""value"").tensor(); EXPECT_EQ(1, t.float_val_size()); EXPECT_EQ(0, t.float_val(0)); EXPECT_EQ(2, t.tensor_shape().dim_size()); - EXPECT_EQ(1, t.tensor_shape().dim(0).size()); - EXPECT_EQ(2, t.tensor_shape().dim(1).size()); + EXPECT_EQ(2, t.tensor_shape().dim(0).size()); + EXPECT_EQ(3, t.tensor_shape().dim(1).size()); } else if (name == ""add1"") { EXPECT_EQ(""Identity"", node.op()); EXPECT_EQ(""x"", node.input(0)); @@ -178,6 +184,16 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ(""y"", node.input(0)); EXPECT_EQ(""^zeros"", node.input(1)); } + const std::set square_zero_const{""mul1"", ""mul2"", ""mul5"", + ""mul6"", ""matmul1"", ""matmul2""}; + if (square_zero_const.count(name) > 0) { + TensorProto t = node.attr().at(""value"").tensor(); + EXPECT_EQ(1, t.float_val_size()); + EXPECT_EQ(0, t.float_val(0)); + EXPECT_EQ(2, t.tensor_shape().dim_size()); + EXPECT_EQ(2, t.tensor_shape().dim(0).size()); + EXPECT_EQ(2, t.tensor_shape().dim(1).size()); + } } } } ",0,train 265e1be02583e2d62fbb797237f701a9c9bc2668,tensorflow/tensorflow,"Minimize calls to tesor_util.constant_value in array_grad._StridedSliceGrad. PiperOrigin-RevId: 293716338 Change-Id: Id05c9afa21f80543ef783d0cfbc33027caecdf05",array_grad.py,"@@ -273,14 +273,14 @@ def _StridedSliceGrad(op, grad): # be the same. x = array_ops.shape(op.inputs[0], out_type=begin.dtype) - if tensor_util.constant_value(x) is not None: - x = tensor_util.constant_value(x) - if tensor_util.constant_value(begin) is not None: - begin = tensor_util.constant_value(begin) - if tensor_util.constant_value(end) is not None: - end = tensor_util.constant_value(end) - if tensor_util.constant_value(strides) is not None: - strides = tensor_util.constant_value(strides) + x_static = tensor_util.constant_value(x) + x = x_static if x_static is not None else x + begin_static = tensor_util.constant_value(begin) + begin = begin_static if begin_static is not None else begin + end_static = tensor_util.constant_value(end) + end = end_static if end_static is not None else end + strides_static = tensor_util.constant_value(strides) + strides = strides_static if strides_static is not None else strides return array_ops.strided_slice_grad( x, ",0,test 185c0233e5533788fd5c4679acd0a4b64484dc03,tensorflow/tensorflow,"Fix an error in neon tensor util. vget_high_s8 gets the high bits. PiperOrigin-RevId: 227574080",neon_tensor_utils.cc,"@@ -144,7 +144,7 @@ void NeonMatrixBatchVectorMultiplyAccumulate( // registers). int16x8_t prod_16x8 = vmull_s8(vget_low_s8(s1_8x16), vget_low_s8(s2_8x16)); - // Multiply the high bits (i.e. the lower 8 8bit numbers in the + // Multiply the high bits (i.e. the higher 8 8bit numbers in the // registers), and accumulate with the result of the low bits product. // The assumption here is that overflow will not happen as we quantize // our values to be in the range [-127, 127]. As such the sum of the 2 ",0,train 289be76f8ed6d40752f6ee5c64632f4624fa7cc2,tensorflow/tensorflow,"Simplify GPU copy insertion. Previously, there was almost identical code for inserting copies. This CL combines the two code paths. PiperOrigin-RevId: 201655259",gpu_copy_insertion.cc,"@@ -52,60 +52,20 @@ StatusOr GpuCopyInsertion::Run(HloModule* module) { HloDataflowAnalysis::Run(*module)); // Make sure all operands of a library call are in memory instead of constants - // in IR. - for (HloInstruction* hlo : - module->entry_computation()->MakeInstructionPostOrder()) { - // Inserts a copy of hlo->operand(n) if it's a constant. - auto copy_operand_if_constant = [&](int64 n) -> Status { - HloInstruction* operand = hlo->mutable_operand(n); - TF_RET_CHECK(ShapeUtil::IsArray(operand->shape())); - const auto& values = dataflow->GetValueSet(operand).values(); - if (std::any_of(values.begin(), values.end(), [](const HloValue* value) { - return value->defining_instruction()->opcode() == - HloOpcode::kConstant; - })) { - TF_ASSIGN_OR_RETURN(HloInstruction * copy, FindOrInsertCopy(operand)); - TF_RETURN_IF_ERROR(hlo->ReplaceOperandWith(n, copy)); - changed = true; - } - return Status::OK(); - }; - - if (IsCustomCallToDnnBatchNorm(*hlo)) { - // The epsilon and feature_index operands to a CUDNN batchnorm op don't - // need to be materialized in memory -- in fact, they must be constants. - // These are the last two operands of all three batchnorm ops. - for (int64 i = 0; i < hlo->operand_count() - 2; ++i) { - TF_RETURN_IF_ERROR(copy_operand_if_constant(i)); - } - } else if (ImplementedAsLibraryCall(*hlo) || - hlo->opcode() == HloOpcode::kCrossReplicaSum) { - // For all other library calls and cross-replica-sum, materialize all the - // operands into memory. (Cross-replica-sum gets its constant args - // materialized even if it's not implemented as a libcall to simplify the - // implementation. It's slower, but we can constant fold away constant - // args *anyway*, so we just need to make it work.) - for (int64 i = 0; i < hlo->operand_count(); ++i) { - TF_RETURN_IF_ERROR(copy_operand_if_constant(i)); - } - } - } - - // Init values of while and conditional nodes cannot be constants. Insert - // copies for any constants found at the operands of these nodes. + // in IR. Also, init values of while and conditional nodes cannot be + // constants. Insert copies for any constants found at the operands of these + // nodes. tensorflow::gtl::FlatSet inserted_copies; for (HloComputation* computation : module->computations()) { - for (HloInstruction* instruction : computation->instructions()) { - if (instruction->opcode() != HloOpcode::kWhile && - instruction->opcode() != HloOpcode::kConditional) { - continue; - } - for (auto operand : instruction->operands()) { + for (HloInstruction* hlo : computation->instructions()) { + // Inserts a copy of hlo->operand(n) if it's a constant. + auto copy_operand_if_constant = [&](int64 n) -> Status { + HloInstruction* operand = hlo->mutable_operand(n); // Skip the operands that have already been replaced with a copy in a // previous iteration (which is possible when a constant is used as an // operand in multiple places). if (ContainsKey(inserted_copies, operand)) { - continue; + return Status::OK(); } for (auto& pair : dataflow->GetInstructionValueSet(operand)) { const HloValueSet& value_set = pair.second; @@ -121,6 +81,47 @@ StatusOr GpuCopyInsertion::Run(HloModule* module) { } } } + return Status::OK(); + }; + + if (IsCustomCallToDnnBatchNorm(*hlo)) { + // The epsilon and feature_index operands to a CUDNN batchnorm op don't + // need to be materialized in memory -- in fact, they must be constants. + // These are the last two operands of all three batchnorm ops. + for (int64 i = 0; i < hlo->operand_count() - 2; ++i) { + TF_RETURN_IF_ERROR(copy_operand_if_constant(i)); + } + } else if (ImplementedAsLibraryCall(*hlo) || + hlo->opcode() == HloOpcode::kCrossReplicaSum || + hlo->opcode() == HloOpcode::kWhile || + hlo->opcode() == HloOpcode::kConditional) { + // For all other library calls, cross-replica-sum, while and conditional + // ops materialize all the operands into memory. (Cross-replica-sum + // gets its constant args materialized even if it's not implemented as a + // libcall to simplify the implementation. It's slower, but we can + // constant fold away constant args *anyway*, so we just need to make it + // work.) + for (int64 i = 0; i < hlo->operand_count(); ++i) { + TF_RETURN_IF_ERROR(copy_operand_if_constant(i)); + } + } + } + } + + if (changed) { + // Check the assumption that the epsilon and feature_index constants of the + // CUDNN batchnorm op are not shared with other ops where we would replace + // them with a copy. These custom op calls are generated with the + // CudnnBatchNormRewriter, so this would only happen if HloCSE merges them. + for (HloComputation* computation : module->computations()) { + for (HloInstruction* hlo : computation->instructions()) { + if (!IsCustomCallToDnnBatchNorm(*hlo)) { + continue; + } + for (int64 i = hlo->operand_count() - 2; i < hlo->operand_count(); + ++i) { + CHECK_EQ(hlo->operand(i)->opcode(), HloOpcode::kConstant); + } } } } ",0,train 694cc06ac9290168e5a700ebb5bc2f117b04af10,tensorflow/tensorflow,"Remove an obsolete TODO for _ReductionDims(sparse_tensor, ..). tf.rank() is recently patched to work on SparseTensor. Change: 125127586",math_ops.py,"@@ -936,9 +936,6 @@ def _ReductionDims(x, reduction_indices): return constant_op.constant(np.arange(rank), dtype=dtypes.int32) # Otherwise, we rely on Range and Rank to do the right thing at run-time. - # TODO(zongheng): remove this once rank() supports SparseTensor. - if isinstance(x, ops.SparseTensor): - return range(0, array_ops.size(x.shape)) return range(0, array_ops.rank(x)) ",0,train 6c85a66a16f07bab9b5dc3df33bc6b8111b76615,tensorflow/tensorflow,"Add export calls for protos. PiperOrigin-RevId: 185166764",__init__.py,"@@ -116,6 +116,7 @@ from tensorflow.python.platform import test from tensorflow.python.util.all_util import remove_undocumented from tensorflow.python.util.all_util import make_all +from tensorflow.python.util.tf_export import tf_export # Import modules whose docstrings contribute, for use by remove_undocumented # below. @@ -167,6 +168,31 @@ _allowed_symbols = [ 'TensorInfo', # Used for tf.saved_model functionality. ] +# Export protos +# pylint: disable=undefined-variable +tf_export('AttrValue')(AttrValue) +tf_export('ConfigProto')(ConfigProto) +tf_export('Event', 'summary.Event')(Event) +tf_export('GPUOptions')(GPUOptions) +tf_export('GraphDef')(GraphDef) +tf_export('GraphOptions')(GraphOptions) +tf_export('HistogramProto')(HistogramProto) +tf_export('LogMessage')(LogMessage) +tf_export('MetaGraphDef')(MetaGraphDef) +tf_export('NameAttrList')(NameAttrList) +tf_export('NodeDef')(NodeDef) +tf_export('OptimizerOptions')(OptimizerOptions) +tf_export('RunMetadata')(RunMetadata) +tf_export('RunOptions')(RunOptions) +tf_export('SessionLog', 'summary.SessionLog')(SessionLog) +tf_export('Summary', 'summary.Summary')(Summary) +tf_export('summary.SummaryDescription')(SummaryDescription) +tf_export('SummaryMetadata')(SummaryMetadata) +tf_export('summary.TaggedRunMetadata')(TaggedRunMetadata) +tf_export('TensorInfo')(TensorInfo) +# pylint: enable=undefined-variable + + # The following symbols are kept for compatibility. It is our plan # to remove them in the future. _allowed_symbols.extend([ ",0,train 6c85a66a16f07bab9b5dc3df33bc6b8111b76615,tensorflow/tensorflow,"Add export calls for protos. PiperOrigin-RevId: 185166764",profiler.py,"@@ -31,6 +31,7 @@ from tensorflow.python.profiler.option_builder import ProfileOptionBuilder from tensorflow.python.profiler.tfprof_logger import write_op_log from tensorflow.python.util.all_util import remove_undocumented +from tensorflow.python.util.tf_export import tf_export _allowed_symbols = [ @@ -48,6 +49,12 @@ _allowed_symbols.extend([ 'OpLogProto', ]) +# Export protos +tf_export('profiler.GraphNodeProto')(GraphNodeProto) +tf_export('profiler.MultiGraphNodeProto')(MultiGraphNodeProto) +tf_export('profiler.AdviceProto')(AdviceProto) +tf_export('profiler.OpLogProto')(OpLogProto) + remove_undocumented(__name__, _allowed_symbols, [ Profiler, profile, ",0,train 6c85a66a16f07bab9b5dc3df33bc6b8111b76615,tensorflow/tensorflow,"Add export calls for protos. PiperOrigin-RevId: 185166764",training.py,"@@ -189,6 +189,7 @@ from tensorflow.python.training.training_util import create_global_step from tensorflow.python.training.training_util import get_or_create_global_step from tensorflow.python.pywrap_tensorflow import do_quantize_training_on_graphdef from tensorflow.python.pywrap_tensorflow import NewCheckpointReader +from tensorflow.python.util.tf_export import tf_export # pylint: disable=wildcard-import # Training data protos. @@ -239,6 +240,23 @@ _allowed_symbols = [ ""SequenceExample"", # from example_pb2. ""ServerDef"", ] + +# pylint: disable=undefined-variable +tf_export(""train.BytesList"")(BytesList) +tf_export(""train.ClusterDef"")(ClusterDef) +tf_export(""train.Example"")(Example) +tf_export(""train.Feature"")(Feature) +tf_export(""train.Features"")(Features) +tf_export(""train.FeatureList"")(FeatureList) +tf_export(""train.FeatureLists"")(FeatureLists) +tf_export(""train.FloatList"")(FloatList) +tf_export(""train.Int64List"")(Int64List) +tf_export(""train.JobDef"")(JobDef) +tf_export(""train.SaverDef"")(SaverDef) +tf_export(""train.SequenceExample"")(SequenceExample) +tf_export(""train.ServerDef"")(ServerDef) +# pylint: enable=undefined-variable + # Include extra modules for docstrings because: # * Input methods in tf.train are documented in io_ops. # * Saver methods in tf.train are documented in state_ops. ",0,train c88380396e1d1003fa770794217e4cb919c511a0,tensorflow/tensorflow,"Make the ops_compatibility framework usable outside tensorflow/core. Change: 120008243",backwards_compatibility_test.cc,"@@ -19,12 +19,15 @@ limitations under the License. #include ""tensorflow/core/platform/env.h"" #include ""tensorflow/core/platform/protobuf.h"" #include ""tensorflow/core/platform/test.h"" +#include ""tensorflow/core/public/version.h"" namespace tensorflow { namespace { TEST(BackwardsCompatibilityTest, IsCompatible) { - OpCompatibilityLib compatibility(""tensorflow/core/ops""); + OpCompatibilityLib compatibility(""tensorflow/core/ops"", + strings::StrCat(""v"", TF_MAJOR_VERSION), + nullptr); Env* env = Env::Default(); int changed_ops = 0; ",0,train c88380396e1d1003fa770794217e4cb919c511a0,tensorflow/tensorflow,"Make the ops_compatibility framework usable outside tensorflow/core. Change: 120008243",op_compatibility_lib.cc,"@@ -23,17 +23,21 @@ limitations under the License. #include ""tensorflow/core/lib/io/path.h"" #include ""tensorflow/core/lib/strings/strcat.h"" #include ""tensorflow/core/platform/protobuf.h"" -#include ""tensorflow/core/public/version.h"" namespace tensorflow { -static string OpsHistoryFile() { - return strings::StrCat(""compat/ops_history.v"", TF_MAJOR_VERSION, "".pbtxt""); +static string OpsHistoryFile(const string& ops_prefix, + const string& history_version) { + return io::JoinPath(ops_prefix, strings::StrCat(""compat/ops_history."", + history_version, "".pbtxt"")); } -OpCompatibilityLib::OpCompatibilityLib(const string& ops_prefix) +OpCompatibilityLib::OpCompatibilityLib(const string& ops_prefix, + const string& history_version, + const std::set* stable_ops) : ops_file_(io::JoinPath(ops_prefix, ""ops.pbtxt"")), - op_history_file_(io::JoinPath(ops_prefix, OpsHistoryFile())) { + op_history_file_(OpsHistoryFile(ops_prefix, history_version)), + stable_ops_(stable_ops) { // Get the sorted list of all registered OpDefs. printf(""Getting all registered ops...\n""); OpRegistry::Global()->Export(false, &op_list_); @@ -48,6 +52,24 @@ Status OpCompatibilityLib::ValidateCompatible(Env* env, int* changed_ops, // Strip docs out of op_list_. RemoveDescriptionsFromOpList(&op_list_); + if (stable_ops_ != nullptr) { + printf(""Verifying no stable ops have been removed...\n""); + // We rely on stable_ops_ and op_list_ being in sorted order. + auto iter = stable_ops_->begin(); + for (int cur = 0; iter != stable_ops_->end() && cur < op_list_.op_size(); + ++cur) { + const string& op_name = op_list_.op(cur).name(); + if (op_name > *iter) { + return errors::InvalidArgument(""Error, stable op removed: "", *iter); + } else if (op_name == *iter) { + ++iter; + } + } + if (iter != stable_ops_->end()) { + return errors::InvalidArgument(""Error, stable op removed: "", *iter); + } + } + OpList in_op_history; { // Read op history. printf(""Reading op history from %s...\n"", op_history_file_.c_str()); @@ -61,17 +83,22 @@ Status OpCompatibilityLib::ValidateCompatible(Env* env, int* changed_ops, int cur = 0; int start = 0; + printf(""Verifying updates are compatible...\n""); // Note: Op history is in (alphabetical, oldest-first) order. while (cur < op_list_.op_size() && start < in_op_history.op_size()) { - if (op_list_.op(cur).name() < in_op_history.op(start).name()) { + const string& op_name = op_list_.op(cur).name(); + if (stable_ops_ != nullptr && stable_ops_->count(op_name) == 0) { + // Ignore unstable op. + } + if (op_name < in_op_history.op(start).name()) { // New op: add it. if (out_op_history != nullptr) { *out_op_history->add_op() = op_list_.op(cur); } ++*added_ops; ++cur; - } else if (op_list_.op(cur).name() > in_op_history.op(start).name()) { + } else if (op_name > in_op_history.op(start).name()) { // Op removed: error. return errors::InvalidArgument(""Error, removed op: "", SummarizeOpDef(in_op_history.op(start))); @@ -79,7 +106,6 @@ Status OpCompatibilityLib::ValidateCompatible(Env* env, int* changed_ops, // Op match. // Find all historical version of this op. - const string& op_name = op_list_.op(cur).name(); int end = start + 1; for (; end < in_op_history.op_size(); ++end) { if (in_op_history.op(end).name() != op_name) break; @@ -127,17 +153,22 @@ Status OpCompatibilityLib::ValidateCompatible(Env* env, int* changed_ops, } // Error if missing ops. - if (start < in_op_history.op_size()) { + if (stable_ops_ == nullptr && start < in_op_history.op_size()) { return errors::InvalidArgument(""Error, removed op: "", SummarizeOpDef(in_op_history.op(start))); } // Add remaining new ops. for (; cur < op_list_.op_size(); ++cur) { - if (out_op_history) { - *out_op_history->add_op() = op_list_.op(cur); + const string& op_name = op_list_.op(cur).name(); + if (stable_ops_ != nullptr && stable_ops_->count(op_name) == 0) { + // Ignore unstable op. + } else { + if (out_op_history) { + *out_op_history->add_op() = op_list_.op(cur); + } + ++*added_ops; } - ++*added_ops; } return Status::OK(); ",0,train c88380396e1d1003fa770794217e4cb919c511a0,tensorflow/tensorflow,"Make the ops_compatibility framework usable outside tensorflow/core. Change: 120008243",op_compatibility_lib.h,"@@ -16,15 +16,25 @@ limitations under the License. #ifndef TENSORFLOW_CORE_OPS_COMPAT_OP_COMPATIBILITY_LIB_H_ #define TENSORFLOW_CORE_OPS_COMPAT_OP_COMPATIBILITY_LIB_H_ -#include +#include + #include ""tensorflow/core/framework/op_def.pb.h"" #include ""tensorflow/core/platform/env.h"" +#include ""tensorflow/core/platform/types.h"" namespace tensorflow { class OpCompatibilityLib { public: - explicit OpCompatibilityLib(const string& ops_prefix); + // `ops_prefix` is a filename prefix indicating where to find the + // ops files. + // `history_version` is used to construct the ops history file name. + // `*stable_ops` has an optional list of ops that we care about. + // If stable_ops == nullptr, we use all registered ops. + // Otherwise we ignore ops not in *stable_ops and require all ops + // in *stable_ops to exist. + OpCompatibilityLib(const string& ops_prefix, const string& history_version, + const std::set* stable_ops); // Name of the file that contains the checked-in versions of ops, with docs. const string& ops_file() const { return ops_file_; } @@ -45,8 +55,9 @@ class OpCompatibilityLib { OpList* out_op_history); private: - string ops_file_; - string op_history_file_; + const string ops_file_; + const string op_history_file_; + const std::set* stable_ops_; OpList op_list_; }; ",0,train c88380396e1d1003fa770794217e4cb919c511a0,tensorflow/tensorflow,"Make the ops_compatibility framework usable outside tensorflow/core. Change: 120008243",update_ops.cc,"@@ -27,7 +27,8 @@ namespace tensorflow { namespace { void WriteUpdateTo(const string& directory) { - OpCompatibilityLib compatibility(directory); + OpCompatibilityLib compatibility( + directory, strings::StrCat(""v"", TF_MAJOR_VERSION), nullptr); // Write full copy of all ops to ops.pbtxt. Env* env = Env::Default(); ",0,train 79d7ea98b59e7e0841186a6f31b85e8c4bbe5d62,tensorflow/tensorflow,"Fix pydoc for _safe_scalar_div. Change: 134115246",metric_ops.py,"@@ -93,7 +93,7 @@ def _safe_div(numerator, denominator, name): def _safe_scalar_div(numerator, denominator, name): - """"""Divides two values, returning 0 if the denominator is != 0. + """"""Divides two values, returning 0 if the denominator is 0. Args: numerator: A scalar `float64` `Tensor`. ",0,train 06c2ab5c681db8b81024dee83b620ecc49e62ae8,tensorflow/tensorflow,"test: remove defaults, use self.asserts",script_ops_test.py,"@@ -59,10 +59,10 @@ class NumpyFunctionTest(test.TestCase): # different argument tensor_double_plus_stateless( - constant_op.constant(1, dtype=dtypes.int32), - constant_op.constant(2, dtype=dtypes.int32), + constant_op.constant(1), + constant_op.constant(2), ) - assert call_count == 1 # +1 as only the first one was executed + self.assertEqual(call_count, 1) # +1 as only the first one was executed @def_function.function(autograph=False) def tensor_double_plus_stateful(a, b): @@ -71,11 +71,10 @@ class NumpyFunctionTest(test.TestCase): return sum1 + sum2 tensor_double_plus_stateful( - constant_op.constant(3, dtype=dtypes.int32), - constant_op.constant(4, dtype=dtypes.int32), + constant_op.constant(3), + constant_op.constant(4), ) - assert call_count == 3 # +2 as it is stateful, both were executed - + self.assertEqual(call_count, 3) # +2 as it is stateful, both were executed if __name__ == ""__main__"": ",0,train a968485f8adb4ee4c943dac8b3a2d480e9422284,tensorflow/tensorflow,"[MLIR][KernelGen] Simplify baseline implementations for floor_div PiperOrigin-RevId: 407310823 Change-Id: I7cd06a130ef8272576341f09dd1c68d31a52c40e",gpu_binary_ops_test.cc,"@@ -489,53 +489,36 @@ TEST_F(BinaryOpsTest, EqualUint8_tSpecialCases) { /// Test `tf.FloorDiv`. -template +template ::value, + bool> = true> T baseline_floor_div(T lhs, T rhs) { return std::floor(lhs / rhs); } -template <> -Eigen::half baseline_floor_div(Eigen::half lhs, Eigen::half rhs) { - return static_cast(std::floor(static_cast(lhs / rhs))); -} - -#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \ - defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED) -template <> -int8_t baseline_floor_div(int8_t lhs, int8_t rhs) { - int8_t res = lhs / rhs; - if (((lhs < 0 && rhs > 0) || (lhs > 0 && rhs < 0)) && lhs % rhs) { - --res; - } - return res; -} -#endif - -template <> -int16_t baseline_floor_div(int16_t lhs, int16_t rhs) { - int16_t res = lhs / rhs; - if (((lhs < 0 && rhs > 0) || (lhs > 0 && rhs < 0)) && lhs % rhs) { - --res; - } - return res; +template ::value, bool> = true> +T baseline_floor_div(T lhs, T rhs) { + return static_cast(std::floor(static_cast(lhs / rhs))); } -template <> -int64_t baseline_floor_div(int64_t lhs, int64_t rhs) { - int64_t res = lhs / rhs; +template ::value, + bool> = true> +T baseline_floor_div(T lhs, T rhs) { + T res = lhs / rhs; if (((lhs < 0 && rhs > 0) || (lhs > 0 && rhs < 0)) && lhs % rhs) { --res; } return res; } -#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \ - defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED) -template <> -uint64_t baseline_floor_div(uint64_t lhs, uint64_t rhs) { +template ::value, + bool> = true> +T baseline_floor_div(T lhs, T rhs) { return lhs / rhs; } -#endif GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES( FloorDiv, ",0,train bdc5846c58ca88bf591f0ca0a3971a2daf03a180,tensorflow/tensorflow,support max_profiling_buffer_entries,label_image.cc,"@@ -184,7 +184,8 @@ void RunInference(Settings* s) { exit(-1); } - profiling::Profiler* profiler = new profiling::Profiler(); + profiling::Profiler* profiler = + new profiling::Profiler(s->max_profiling_buffer_entries); interpreter->SetProfiler(profiler); if (s->profiling) profiler->StartProfiling(); @@ -287,12 +288,13 @@ int Main(int argc, char** argv) { {""input_mean"", required_argument, nullptr, 'b'}, {""input_std"", required_argument, nullptr, 's'}, {""num_results"", required_argument, nullptr, 'r'}, + {""max_profiling_buffer_entries"", required_argument, nullptr, 'e'}, {nullptr, 0, nullptr, 0}}; /* getopt_long stores the option index here. */ int option_index = 0; - c = getopt_long(argc, argv, ""a:b:c:f:i:l:m:p:r:s:t:v:"", long_options, + c = getopt_long(argc, argv, ""a:b:c:e:f:i:l:m:p:r:s:t:v:"", long_options, &option_index); /* Detect the end of the options. */ @@ -309,6 +311,10 @@ int Main(int argc, char** argv) { s.loop_count = strtol(optarg, nullptr, 10); // NOLINT(runtime/deprecated_fn) break; + case 'e': + s.max_profiling_buffer_entries = + strtol(optarg, nullptr, 10); // NOLINT(runtime/deprecated_fn) + break; case 'f': s.allow_fp16 = strtol(optarg, nullptr, 10); // NOLINT(runtime/deprecated_fn) ",0,train bdc5846c58ca88bf591f0ca0a3971a2daf03a180,tensorflow/tensorflow,support max_profiling_buffer_entries,label_image.h,"@@ -36,6 +36,7 @@ struct Settings { string input_layer_type = ""uint8_t""; int number_of_threads = 4; int number_of_results = 5; + int max_profiling_buffer_entries = 1024; }; } // namespace label_image ",0,train 983d8931b4ea1a5ee81a63b5a2d393bb82f2fa0b,tensorflow/tensorflow,"Fix typo. Change: 139974629",checkpoint_reader.h,"@@ -43,7 +43,7 @@ class CheckpointReader { bool HasTensor(const string& name) const; const string DebugString() const; - // Returns a map from variable namaes to its shape. Slices of a partitioned + // Returns a map from variable names to its shape. Slices of a partitioned // tensor are combined into a single entry. const TensorSliceReader::VarToShapeMap& GetVariableToShapeMap() const; ",0,train cb401f09be5b816e704a70babc0facad63e84636,tensorflow/tensorflow,"tf.tile gradient supports IndexedSlice (#17083) * TST: add test case * ENH: tf.tile gradient supports IndexedSlices * Revert ""TST: add test case"" This reverts commit b4958112a5b110dc015e48ec547eb98996a84038. * TST: move test case * CLN: fix lint error * TST: add test case, input with rank 1",shape_ops_test.py,"@@ -642,6 +642,29 @@ class TileTest(test.TestCase): err = gradient_checker.compute_gradient_error(a, [4, 2], tiled, [4, 4]) self.assertLess(err, 1e-3) + def testGradientWithSparseGradWithRank1(self): + inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], + dtype=dtypes.float32) + outputs = array_ops.gather(array_ops.tile(inputs, [3]), + [1, 5, 9, 3, 7, 2, 2, 2]) + with self.test_session(): + error = gradient_checker.compute_gradient_error( + inputs, inputs.get_shape().as_list(), + outputs, outputs.get_shape().as_list()) + self.assertLess(error, 1e-4) + + def testGradientWithSparseGradWithRank3(self): + inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], + dtype=dtypes.float32) + inputs = array_ops.reshape(inputs, [-1, 1, 1]) + outputs = array_ops.gather(array_ops.tile(inputs, [3, 4, 2]), + [1, 5, 9, 3, 7, 2, 2, 2]) + with self.test_session(): + error = gradient_checker.compute_gradient_error( + inputs, inputs.get_shape().as_list(), + outputs, outputs.get_shape().as_list()) + self.assertLess(error, 1e-4) + def testShapeFunctionEdgeCases(self): # Unknown multiples shape. inp = constant_op.constant(0.0, shape=[4, 4, 4, 4]) ",0,train cb401f09be5b816e704a70babc0facad63e84636,tensorflow/tensorflow,"tf.tile gradient supports IndexedSlice (#17083) * TST: add test case * ENH: tf.tile gradient supports IndexedSlices * Revert ""TST: add test case"" This reverts commit b4958112a5b110dc015e48ec547eb98996a84038. * TST: move test case * CLN: fix lint error * TST: add test case, input with rank 1",array_grad.py,"@@ -568,7 +568,6 @@ ops.NotDifferentiable(""Size"") @ops.RegisterGradient(""Tile"") def _TileGrad(op, grad): """"""Sum reduces grad along the tiled dimensions."""""" - assert isinstance(grad, ops.Tensor) input_shape = array_ops.shape(op.inputs[0]) # We interleave multiples and input_shape to get split_shape, # reshape grad to split_shape, and reduce along all even @@ -581,6 +580,13 @@ def _TileGrad(op, grad): split_shape = array_ops.reshape( array_ops.transpose(array_ops.stack([op.inputs[1], input_shape])), [-1]) axes = math_ops.range(0, array_ops.size(split_shape), 2) + # Sum reduces grad along the first dimension for IndexedSlices + if isinstance(grad, ops.IndexedSlices): + grad = math_ops.unsorted_segment_sum( + grad.values, + math_ops.mod(grad.indices, input_shape[0]), + input_shape[0]) + split_shape = array_ops.concat([[1], split_shape[1:]], axis=0) input_grad = math_ops.reduce_sum(array_ops.reshape(grad, split_shape), axes) # Fix shape inference if not context.executing_eagerly(): ",0,train 38491a84a9e38357e400457dbbe408b66e786672,tensorflow/tensorflow,TFTRT: Expand lambda and inline ifs to functions with if and else,trt_convert.py,"@@ -45,15 +45,33 @@ from tensorflow.python.saved_model import loader_impl from tensorflow.python.saved_model import tag_constants from tensorflow.python.training import saver -if _six.PY2: - _to_bytes = lambda s: s.encode(""utf-8"", errors=""surrogateescape"") \ - if isinstance(s, unicode) else s - _to_string = lambda s: s.decode(""utf-8"") if isinstance(s, str) else s -else: - _to_bytes = lambda s: s.encode(""utf-8"", errors=""surrogateescape"") \ - if isinstance(s, str) else s - _to_string = lambda s: s.decode(""utf-8"") if isinstance(s, bytes) else s - +def _to_bytes(s): + """"""Returns encoded of s if s is a sequence of chars otherwise returns s. + """""" + if _six.PY2: + if isinstance(s, unicode): + return s.encode(""utf-8"", errors=""surrogateescape"") + else: + return s + else: + if isinstance(s, str): + return s.encode(""utf-8"", errors=""surrogateescape"") + else: + return s + +def _to_string(s): + """"""Returns decoded of s if s is a sequence of bytes otherwise returns s. + """""" + if _six.PY2: + if isinstance(s, str): + return s.decode(""utf-8"") + else: + return s + else: + if isinstance(s, bytes): + return s.decode(""utf-8"") + else: + return s class TrtPrecisionMode(object): FP32 = ""FP32"" ",0,train a346aa260d32eb83621bb7ed501a2b07ba186480,tensorflow/tensorflow,"Automated rollback of commit 624ff13fdf4e54e255d23971ef2beec3c48c3bb2. Revert #21826. PiperOrigin-RevId: 212487142",ctc_ops.py,"@@ -242,11 +242,11 @@ def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100, If `merge_repeated` is `True`, merge repeated classes in the output beams. This means that if consecutive entries in a beam are the same, - only the first of these is emitted. That is, when the sequence is - `A B B * B * B` (where '*' is the blank label), the return value is: + only the first of these is emitted. That is, when the top path + is `A B B B B`, the return value is: * `A B` if `merge_repeated = True`. - * `A B B B` if `merge_repeated = False`. + * `A B B B B` if `merge_repeated = False`. Args: inputs: 3-D `float` `Tensor`, size ",0,train 81fefe40e1c3ad9a14d9d7d665b25d7e93fb2dfc,tensorflow/tensorflow,"Add test case for int16 support of tf.stack/Pack on gpu Signed-off-by: Yong Tang ",stack_op_test.py,"@@ -76,7 +76,7 @@ class StackOpTest(test.TestCase): np.random.seed(7) with self.test_session(use_gpu=True): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): - for dtype in [np.bool, np.float32, np.int32, np.int64]: + for dtype in [np.bool, np.float32, np.int16, np.int32, np.int64]: data = np.random.randn(*shape).astype(dtype) # Stack back into a single tensorflow tensor directly using np array c = array_ops.stack(data) ",0,train 6294c8cefa60a5a240b56e4cea5aa487a43cf245,tensorflow/tensorflow,"Remove explicit assert Signed-off-by: Yong Tang ",string_ops.py,"@@ -125,8 +125,8 @@ def string_format(template, inputs, placeholder=""{}"", summarize=3, name=None): ```python >>> tensor = tf.range(10) >>> formatted = tf.strings.format(""tensor: {}, suffix"", tensor) - >>> expected = ""tensor: [0 1 2 ... 7 8 9], suffix"" - >>> assert(formatted == expected) + >>> print(formatted) + tf.Tensor(b'tensor: [0 1 2 ... 7 8 9], suffix', shape=(), dtype=string) ``` Formatting a multi-tensor template: @@ -135,14 +135,8 @@ def string_format(template, inputs, placeholder=""{}"", summarize=3, name=None): >>> tensor_two = tf.range(10) >>> formatted = tf.strings.format(""first: {}, second: {}, suffix"", ... (tensor_one, tensor_two)) - >>> expected = (""first: [[0 1 2 ... 7 8 9]\n"" - ... "" [10 11 12 ... 17 18 19]\n"" - ... "" [20 21 22 ... 27 28 29]\n"" - ... "" ...\n"" - ... "" [70 71 72 ... 77 78 79]\n"" - ... "" [80 81 82 ... 87 88 89]\n"" - ... "" [90 91 92 ... 97 98 99]], second: [0 1 2 ... 7 8 9], suffix"") - >>> assert(formatted == expected) + >>> print(formatted) + tf.Tensor(b'first: [[0 1 2 ... 7 8 9]\n [10 11 12 ... 17 18 19]\n [20 21 22 ... 27 28 29]\n ...\n [70 71 72 ... 77 78 79]\n [80 81 82 ... 87 88 89]\n [90 91 92 ... 97 98 99]], second: [0 1 2 ... 7 8 9], suffix', shape=(), dtype=string) ``` Args: ",0,train 3a0643dcb563f9bb34879e9da98b65d359f24ed2,tensorflow/tensorflow,Change error msg,sparse_to_dense_op.cc,"@@ -241,8 +241,8 @@ class SparseToDenseGPU : public AsyncOpKernel { output_shape_vec.data(), num_dims * sizeof(Index)).ok(), errors::InvalidArgument( - ""failed to copy memory from host to device in "" - ""SparseToDense""), done); + ""failed to copy output_shape vector from host to "" + ""device in SparseToDenseOp""), done); functor::LaunchSparseToDense()( c, done, this, validate_indices_, indices.flat().data(), ",0,train 4675bebffe1eb1b94e26159a42bd7a2031837985,tensorflow/tensorflow,"Add a convenient `clone()` method on the `Op` class that forward to the underlying `Operation` (NFC) PiperOrigin-RevId: 266685852",OpDefinition.h,"@@ -906,6 +906,16 @@ public: /// Return the operation that this refers to. Operation *getOperation() { return OpState::getOperation(); } + /// Create a deep copy of this operation. + ConcreteType clone() { return cast(getOperation()->clone()); } + + /// Create a partial copy of this operation without traversing into attached + /// regions. The new operation will have the same number of regions as the + /// original one, but they will be left empty. + ConcreteType cloneWithoutRegions() { + return cast(getOperation()->cloneWithoutRegions()); + } + /// Return the dialect that this refers to. Dialect *getDialect() { return getOperation()->getDialect(); } ",0,train 4675bebffe1eb1b94e26159a42bd7a2031837985,tensorflow/tensorflow,"Add a convenient `clone()` method on the `Op` class that forward to the underlying `Operation` (NFC) PiperOrigin-RevId: 266685852",Operation.h,"@@ -94,10 +94,16 @@ public: Operation *clone(BlockAndValueMapping &mapper); Operation *clone(); - /// Create a deep copy of this operation but keep the operation regions empty. + /// Create a partial copy of this operation without traversing into attached + /// regions. The new operation will have the same number of regions as the + /// original one, but they will be left empty. /// Operands are remapped using `mapper` (if present), and `mapper` is updated /// to contain the results. Operation *cloneWithoutRegions(BlockAndValueMapping &mapper); + + /// Create a partial copy of this operation without traversing into attached + /// regions. The new operation will have the same number of regions as the + /// original one, but they will be left empty. Operation *cloneWithoutRegions(); /// Returns the operation block that contains this operation. ",0,train 37ba47810867abe769199cc46d5b8e3b6fe11069,tensorflow/tensorflow,Fixing example strided_slice (#7347),array_ops.py,"@@ -634,8 +634,8 @@ def strided_slice(input_, tf.strided_slice(input, [1, 0, 0], [2, 1, 3], [1, 1, 1]) ==> [[[3, 3, 3]]] tf.strided_slice(input, [1, 0, 0], [2, 2, 3], [1, 1, 1]) ==> [[[3, 3, 3], [4, 4, 4]]] - tf.strided_slice(input, [1, 1, 0], [2, -1, 3], [1, -1, 1]) ==>[[[4, 4, 4], - [3, 3, 3]]] + tf.strided_slice(input, [1, -1, 0], [2, -3, 3], [1, -1, 1]) ==>[[[4, 4, 4], + [3, 3, 3]]] ``` Args: ",0,train 71f7b620fc3a6bdf30facd7b6e63c789e90567e0,tensorflow/tensorflow,Fixed off-by-one error in L115-116. (#7437),word2vec_basic.py,"@@ -112,6 +112,8 @@ def generate_batch(batch_size, num_skips, skip_window): labels[i * num_skips + j, 0] = buffer[target] buffer.append(data[data_index]) data_index = (data_index + 1) % len(data) + # Backtrack a little bit to avoid skipping words in the end of a batch + data_index = (data_index + len(data) - span) % len(data) return batch, labels batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1) ",0,train 092485d20cd7dca8a9aa0268a34081cad7918549,tensorflow/tensorflow,"Raise an unimplemented error when receiving aliased reference args. The XLA runtime does not support updating reference args that alias. Currently, it fails with in internal error. However, it should really be an unimplemented error, as it is a condition that is not unexpected and might be recovered from by, e.g., falling back to TF classic. PiperOrigin-RevId: 241491318",xla_launch_util.cc,"@@ -34,6 +34,7 @@ limitations under the License. #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/types.h"" +#include ""tensorflow/core/lib/core/errors.h"" #include ""tensorflow/core/util/stream_executor_util.h"" namespace tensorflow { @@ -132,7 +133,7 @@ Status LockVariables(absl::Span variables) { // cluster because we would not handle variable updates correctly. Any // locks we have already acquired will be released when the VariableInfo // objects are destroyed. - return errors::Internal(""Duplicate variable passed to XLA cluster""); + return errors::Unimplemented(""Duplicate variable passed to XLA cluster""); } VLOG(4) << ""Acquiring lock for variable "" << reinterpret_cast(variable); ",0,train 092485d20cd7dca8a9aa0268a34081cad7918549,tensorflow/tensorflow,"Raise an unimplemented error when receiving aliased reference args. The XLA runtime does not support updating reference args that alias. Currently, it fails with in internal error. However, it should really be an unimplemented error, as it is a condition that is not unexpected and might be recovered from by, e.g., falling back to TF classic. PiperOrigin-RevId: 241491318",critical_section_test.py,"@@ -56,6 +56,7 @@ class CriticalSectionTest(test.TestCase): sorted(r_value)) @test_util.run_in_graph_and_eager_modes + @test_util.xla_allow_fallback(""b/128495870"") def testCriticalSectionWithControlFlow(self): for outer_cond in [False, True]: for inner_cond in [False, True]: ",0,train f0a7939bf73c7b08daee6a46292159fd7651a785,tensorflow/tensorflow,"Remove internal caching fields from the tracking list for tf.Moduel. It was breaking tf probability when tf.Module traverse all the fields. PiperOrigin-RevId: 296482313 Change-Id: I1afa34681df1e03e2f126b0070a597a03e4b1862",network.py,"@@ -158,7 +158,8 @@ class Network(base_layer.Layer): # The key of _layer_call_argspecs is a layer. tf.Module._flatten will fail to # flatten the key since it is trying to convert Trackable/Layer to a string. _TF_MODULE_IGNORED_PROPERTIES = frozenset(itertools.chain( - ('_layer_call_argspecs', '_compiled_trainable_state'), + ('_layer_call_argspecs', '_compiled_trainable_state', + '_output_mask_cache', '_output_tensor_cache', '_output_shape_cache'), base_layer.Layer._TF_MODULE_IGNORED_PROPERTIES )) ",0,train baea13831c2d1ffa08c4fcc8944a3870d19826cb,tensorflow/tensorflow,"Introduce a new C API entrypoint to set a 'func' attribute on an op description. PiperOrigin-RevId: 182146876",c_api.cc,"@@ -1201,6 +1201,13 @@ void TF_SetAttrTypeList(TF_OperationDescription* desc, const char* attr_name, reinterpret_cast(values), num_values)); } +void TF_SetAttrFunc(TF_OperationDescription* desc, const char* attr_name, + const char* value, size_t length) { + tensorflow::NameAttrList func_name; + func_name.set_name(std::string(value, value + length)); + desc->node_builder.Attr(attr_name, func_name); +} + void TF_SetAttrShape(TF_OperationDescription* desc, const char* attr_name, const int64_t* dims, int num_dims) { PartialTensorShape shape; ",0,train baea13831c2d1ffa08c4fcc8944a3870d19826cb,tensorflow/tensorflow,"Introduce a new C API entrypoint to set a 'func' attribute on an op description. PiperOrigin-RevId: 182146876",c_api.h,"@@ -511,6 +511,11 @@ TF_CAPI_EXPORT extern void TF_SetAttrTypeList(TF_OperationDescription* desc, const char* attr_name, const TF_DataType* values, int num_values); +// Set a 'func' attribute to the specified name. +// `value` must point to a string of length `length` bytes. +TF_CAPI_EXPORT extern void TF_SetAttrFunc(TF_OperationDescription* desc, + const char* attr_name, + const char* value, size_t length); // Set `num_dims` to -1 to represent ""unknown rank"". Otherwise, // `dims` points to an array of length `num_dims`. `dims[i]` must be ",0,train 9d17a0b425db338ae86465f5f3204335986fbae6,tensorflow/tensorflow,"Set namespace to TFDevice for MarkOpsForOutsideCompilation pass. This pass is a generic TF device pass so this is a better namespace. PiperOrigin-RevId: 322895228 Change-Id: Id848bd88af6a7d60f428a0b6531e3fb4a507976d",mark_ops_for_outside_compilation.cc,"@@ -23,7 +23,7 @@ limitations under the License. #include ""tensorflow/compiler/mlir/tensorflow/transforms/passes.h"" namespace mlir { -namespace TF { +namespace TFDevice { namespace { @@ -54,5 +54,5 @@ static PassRegistration pass( ""tf-mark-ops-for-outside-compilation"", ""Marks unsupported ops a device cluster for outside compilation.""); -} // namespace TF +} // namespace TFDevice } // namespace mlir ",0,train 9d17a0b425db338ae86465f5f3204335986fbae6,tensorflow/tensorflow,"Set namespace to TFDevice for MarkOpsForOutsideCompilation pass. This pass is a generic TF device pass so this is a better namespace. PiperOrigin-RevId: 322895228 Change-Id: Id848bd88af6a7d60f428a0b6531e3fb4a507976d",passes.h,"@@ -247,6 +247,11 @@ std::unique_ptr> CreateParallelExecuteToIslandsPass(); std::unique_ptr> CreateAnnotateParameterReplicationPass(); +// Creates a pass that marks unsupported ops in device cluster for outside +// compilation. +std::unique_ptr> +CreateMarkOpsForOutsideCompilationPass(); + // Creates a pass that hoists a `tf_device.launch` body and assigns a `device` // attribute to each TensorFlow dialect op in the body based on the `device` // attribute on the `tf_device.launch`. @@ -302,11 +307,6 @@ std::unique_ptr> CreateTPUHostComputationExpansionPass(); std::unique_ptr> CreateTPUUpdateEmbeddingEnqueueOpInputsPass(); -// Creates a pass that marks unsupported ops in device cluster for outside -// compilation. -std::unique_ptr> -CreateMarkOpsForOutsideCompilationPass(); - // Creates a pass that extract outside compilation (CPU ops inside TPU cluster) // ops to a separate parallel_execute region to run on CPU. std::unique_ptr> ",0,train dc18758c270de25d5b37a55d4b41af1157dbe625,tensorflow/tensorflow,"Roll forward ""Add a show_fusion_subcomputations command to interactive_graphviz"" with fix PiperOrigin-RevId: 313426932 Change-Id: Ia2366ee899d7bd0d69448144d1c18164d5801753",hlo_graph_dumper.cc,"@@ -312,12 +312,13 @@ optional MatchTrivialComputation(const HloComputation* computation) { class HloDotDumper { public: HloDotDumper(const HloComputation* computation, absl::string_view label, - const DebugOptions& debug_options, bool show_backend_config, + const DebugOptions& debug_options, + HloRenderOptions hlo_render_options, const HloExecutionProfile* profile, NodeFilter filter) : computation_(computation), label_(label), debug_options_(debug_options), - show_backend_config_(show_backend_config), + hlo_render_options_(hlo_render_options), profile_(profile), filter_(std::move(filter)) {} @@ -384,7 +385,7 @@ class HloDotDumper { const HloComputation* computation_; // never null const string label_; // overall name for the graph const DebugOptions& debug_options_; - const bool show_backend_config_; + const HloRenderOptions hlo_render_options_; const HloExecutionProfile* profile_; // may be null const NodeFilter filter_; @@ -565,7 +566,8 @@ bool HloDotDumper::ShouldShowFusionSubcomputation(const HloInstruction* instr) { bool HloDotDumper::ShouldShowSubcomputation(const HloComputation* subcomp) { if (subcomp->IsFusionComputation()) { const HloInstruction* fusion = subcomp->FusionInstruction(); - if (!filter_.Show(fusion) || filter_.SomeOrAllOperandsOmitted(fusion)) { + if (!filter_.Show(fusion) || filter_.SomeOrAllOperandsOmitted(fusion) || + !hlo_render_options_.show_fusion_subcomputations) { return false; } } @@ -1133,7 +1135,8 @@ string HloDotDumper::GetInstructionNodeMetadata(const HloInstruction* instr) { string HloDotDumper::GetInstructionNodeBackendConfig( const HloInstruction* instr) { - if (!show_backend_config_ || instr->raw_backend_config_string().empty()) { + if (!hlo_render_options_.show_backend_config || + instr->raw_backend_config_string().empty()) { return """"; } @@ -1604,14 +1607,14 @@ StatusOr RenderGraph(const HloComputation& computation, const DebugOptions& debug_options, RenderedGraphFormat format, const HloExecutionProfile* hlo_execution_profile, - bool show_backend_config) { + HloRenderOptions hlo_render_options) { tensorflow::mutex_lock lock(url_renderer_mu); if (format == RenderedGraphFormat::kUrl && url_renderer == nullptr) { return Unavailable(""Can't render as URL; no URL renderer was registered.""); } string rendered_dot = - HloDotDumper(&computation, label, debug_options, show_backend_config, + HloDotDumper(&computation, label, debug_options, hlo_render_options, hlo_execution_profile, NodeFilter()) .Dump(); return WrapDotInFormat(rendered_dot, format); @@ -1619,7 +1622,7 @@ StatusOr RenderGraph(const HloComputation& computation, StatusOr RenderNeighborhoodAround( const HloInstruction& node, int radius, RenderedGraphFormat format, - bool show_backend_config, + HloRenderOptions hlo_render_options, const absl::flat_hash_set& boundary) { tensorflow::mutex_lock lock(url_renderer_mu); if (format == RenderedGraphFormat::kUrl && url_renderer == nullptr) { @@ -1632,7 +1635,7 @@ StatusOr RenderNeighborhoodAround( string rendered_dot = HloDotDumper(node.parent(), label, node.GetModule()->config().debug_options(), - show_backend_config, /*profile=*/nullptr, + hlo_render_options, /*profile=*/nullptr, MakeNodeRadiusAroundFilter(&node, radius, boundary)) .Dump(); return WrapDotInFormat(rendered_dot, format); @@ -1641,7 +1644,7 @@ StatusOr RenderNeighborhoodAround( StatusOr RenderAllPathsFromTo(const HloInstruction& from, const HloInstruction& to, int64 max_nodes, RenderedGraphFormat format, - bool show_backend_config) { + HloRenderOptions hlo_render_options) { tensorflow::mutex_lock lock(url_renderer_mu); if (format == RenderedGraphFormat::kUrl && url_renderer == nullptr) { return FailedPrecondition( @@ -1663,7 +1666,7 @@ StatusOr RenderAllPathsFromTo(const HloInstruction& from, ""NODES***

""); } string rendered_dot = - HloDotDumper(from.parent(), label, debug_options, show_backend_config, + HloDotDumper(from.parent(), label, debug_options, hlo_render_options, /*profile=*/nullptr, filter) .Dump(); return WrapDotInFormat(rendered_dot, format); ",0,train dc18758c270de25d5b37a55d4b41af1157dbe625,tensorflow/tensorflow,"Roll forward ""Add a show_fusion_subcomputations command to interactive_graphviz"" with fix PiperOrigin-RevId: 313426932 Change-Id: Ia2366ee899d7bd0d69448144d1c18164d5801753",hlo_graph_dumper.h,"@@ -50,6 +50,14 @@ enum class RenderedGraphFormat { kUrl, }; +struct HloRenderOptions { + // Include the backend config string in the rendered graph. + bool show_backend_config = false; + + // Include the fusion subcomputations in the rendered graph. + bool show_fusion_subcomputations = true; +}; + // Renders an HLO module as a human-readable visual graph. // // Note that this only works well for relatively small graphs (no more than a @@ -61,7 +69,7 @@ StatusOr RenderGraph( const HloComputation& computation, absl::string_view label, const DebugOptions& debug_options, RenderedGraphFormat format, const HloExecutionProfile* hlo_execution_profile = nullptr, - bool show_backend_config = false); + HloRenderOptions hlo_render_options = {}); // Like RenderGraph, but renders only nodes ""near"" the given node in the graph. // @@ -73,7 +81,7 @@ StatusOr RenderGraph( // will be omitted even if they are within the radius. StatusOr RenderNeighborhoodAround( const HloInstruction& node, int radius, RenderedGraphFormat format, - bool show_backend_config = false, + HloRenderOptions hlo_render_options = {}, const absl::flat_hash_set& boundary = {}); // Renders nodes on any of the paths from `from` to `to`. If there are more @@ -82,7 +90,7 @@ StatusOr RenderNeighborhoodAround( StatusOr RenderAllPathsFromTo(const HloInstruction& from, const HloInstruction& to, int64 max_nodes, RenderedGraphFormat format, - bool show_backend_config = false); + HloRenderOptions hlo_render_options = {}); // Registers a function which implements RenderedGraphFormat::kUrl. // ",0,train dc18758c270de25d5b37a55d4b41af1157dbe625,tensorflow/tensorflow,"Roll forward ""Add a show_fusion_subcomputations command to interactive_graphviz"" with fix PiperOrigin-RevId: 313426932 Change-Id: Ia2366ee899d7bd0d69448144d1c18164d5801753",interactive_graphviz.cc,"@@ -112,8 +112,7 @@ constexpr int64 kDefaultMaxNumNodesInAllPaths = 100; using absl::EqualsIgnoreCase; -// A global control for whether backend configuration display is enabled. -bool show_backend_config = true; +HloRenderOptions hlo_render_options; HloInstruction* FindInstruction(const HloModule& module, string node_name) { if (absl::StartsWith(node_name, ""%"")) { @@ -160,6 +159,8 @@ void DoHelpCommand() { Renders all nodes in . backend_config [on|off] Controls whether backend operation configuration information is printed. + show_fusion_subcomputations [on|off] + Controls whether fusion subcomputations are shown. list [name|op_name|op_type] Lists all instructions whose name, metadata op_name, or metadata op_type contains as a substring. @@ -182,15 +183,32 @@ void DoHelpCommand() { // Turn metadata-printing on or off. void DoBackendConfigCommand(const std::vector& tokens) { if (tokens.size() == 2 && tokens[1] == ""on"") { - show_backend_config = true; + hlo_render_options.show_backend_config = true; } else if (tokens.size() == 2 && tokens[1] == ""off"") { - show_backend_config = false; + hlo_render_options.show_backend_config = false; } else if (tokens.size() != 1) { std::cerr << ""(Illegal backend_config value. Use either 'on' or 'off'.)"" << std::endl; } std::cout << ""Backend configuration display "" - << (show_backend_config ? ""ON"" : ""OFF"") << std::endl; + << (hlo_render_options.show_backend_config ? ""ON"" : ""OFF"") + << std::endl; +} + +// Turn fusion computation display on or off. +void DoShowFusionSubcomputationsCommand(const std::vector& tokens) { + if (tokens.size() == 2 && tokens[1] == ""on"") { + hlo_render_options.show_fusion_subcomputations = true; + } else if (tokens.size() == 2 && tokens[1] == ""off"") { + hlo_render_options.show_fusion_subcomputations = false; + } else if (tokens.size() != 1) { + std::cerr << ""(Illegal show_fusion_subcomputations value. Use either "" + ""'on' or 'off'.)"" + << std::endl; + } + std::cout << ""Fusion subcomputations display "" + << (hlo_render_options.show_fusion_subcomputations ? ""ON"" : ""OFF"") + << std::endl; } // List all computations in the module. @@ -373,7 +391,7 @@ void DoExtractCommand(const HloModule& module, auto extracted_module = ExtractModule(instr, height); std::cout << extracted_module->ToString( HloPrintOptions::ShortParsable().set_print_backend_config( - show_backend_config)) + hlo_render_options.show_backend_config)) << std::endl; } @@ -517,7 +535,7 @@ void DoAllPathsCommand(const Options& opts, const HloModule& module, } RenderAndDisplayGraph(opts, [&](RenderedGraphFormat format) { return RenderAllPathsFromTo(*from, *to, max_nodes, format, - /*show_backend_config=*/show_backend_config); + hlo_render_options); }); } @@ -582,15 +600,13 @@ void DoPlotCommand(const Options& opts, const HloModule& module, RenderAndDisplayGraph(opts, [&](RenderedGraphFormat format) { return RenderGraph(*comp, /*label=*/"""", comp->parent()->config().debug_options(), format, - /*hlo_execution_profile=*/nullptr, - /*show_backend_config=*/show_backend_config); + /*hlo_execution_profile=*/nullptr, hlo_render_options); }); } else { RenderAndDisplayGraph(opts, [&](RenderedGraphFormat format) { - return RenderNeighborhoodAround( - *instr, graph_width, format, - /*show_backend_config=*/show_backend_config, - /*boundary=*/boundary); + return RenderNeighborhoodAround(*instr, graph_width, format, + hlo_render_options, + /*boundary=*/boundary); }); } } @@ -617,6 +633,8 @@ void InteractiveDumpGraphs(const Options& opts, const HloModule& module) { DoHelpCommand(); } else if (tokens[0] == ""backend_config"") { DoBackendConfigCommand(tokens); + } else if (tokens[0] == ""show_fusion_subcomputations"") { + DoShowFusionSubcomputationsCommand(tokens); } else if (tokens[0] == ""list"") { if (tokens.size() > 1 && tokens[1] == ""computations"") { DoListComputationsCommand(module, tokens); ",0,train fce49887e827abc2627fd2a7bc135800baaafc4f,tensorflow/tensorflow,"Performing the finalization of the LayerCollection outside of FisherEstimator's constructor. This allows layers and losses to be registered after the FisherEstimator (or KFACOptimizer) has been constructed. PiperOrigin-RevId: 188889252",estimator_test.py,"@@ -96,49 +96,57 @@ class EstimatorTest(test.TestCase): # Check that we throw an error if we try to build an estimator for vars # that were not manually registered. with self.assertRaises(ValueError): - estimator.FisherEstimator([self.weights, self.bias], 0.1, 0.2, - self.layer_collection) + est = estimator.FisherEstimator([self.weights, self.bias], 0.1, 0.2, + self.layer_collection) + est.make_ops_and_vars() # Check that we throw an error if we don't include registered variables, # i.e. self.weights with self.assertRaises(ValueError): - estimator.FisherEstimator([], 0.1, 0.2, self.layer_collection) + est = estimator.FisherEstimator([], 0.1, 0.2, self.layer_collection) + est.make_ops_and_vars() @test.mock.patch.object(utils.SubGraph, ""variable_uses"", return_value=42) def testVariableWrongNumberOfUses(self, mock_uses): with self.assertRaises(ValueError): - estimator.FisherEstimator([self.weights], 0.1, 0.2, - self.layer_collection) + est = estimator.FisherEstimator([self.weights], 0.1, 0.2, + self.layer_collection) + est.make_ops_and_vars() def testInvalidEstimationMode(self): with self.assertRaises(ValueError): - estimator.FisherEstimator([self.weights], 0.1, 0.2, - self.layer_collection, - estimation_mode=""not_a_real_mode"") + est = estimator.FisherEstimator([self.weights], 0.1, 0.2, + self.layer_collection, + estimation_mode=""not_a_real_mode"") + est.make_ops_and_vars() def testGradientsModeBuild(self): with self._graph.as_default(): - estimator.FisherEstimator([self.weights], 0.1, 0.2, - self.layer_collection, - estimation_mode=""gradients"") + est = estimator.FisherEstimator([self.weights], 0.1, 0.2, + self.layer_collection, + estimation_mode=""gradients"") + est.make_ops_and_vars() def testEmpiricalModeBuild(self): with self._graph.as_default(): - estimator.FisherEstimator([self.weights], 0.1, 0.2, - self.layer_collection, - estimation_mode=""empirical"") + est = estimator.FisherEstimator([self.weights], 0.1, 0.2, + self.layer_collection, + estimation_mode=""empirical"") + est.make_ops_and_vars() def testCurvaturePropModeBuild(self): with self._graph.as_default(): - estimator.FisherEstimator([self.weights], 0.1, 0.2, - self.layer_collection, - estimation_mode=""curvature_prop"") + est = estimator.FisherEstimator([self.weights], 0.1, 0.2, + self.layer_collection, + estimation_mode=""curvature_prop"") + est.make_ops_and_vars() def testExactModeBuild(self): with self._graph.as_default(): - estimator.FisherEstimator([self.weights], 0.1, 0.2, - self.layer_collection, - estimation_mode=""exact"") + est = estimator.FisherEstimator([self.weights], 0.1, 0.2, + self.layer_collection, + estimation_mode=""exact"") + est.make_ops_and_vars() def test_cov_update_thunks(self): """"""Ensures covariance update ops run once per global_step."""""" ",0,train fce49887e827abc2627fd2a7bc135800baaafc4f,tensorflow/tensorflow,"Performing the finalization of the LayerCollection outside of FisherEstimator's constructor. This allows layers and losses to be registered after the FisherEstimator (or KFACOptimizer) has been constructed. PiperOrigin-RevId: 188889252",estimator.py,"@@ -149,8 +149,6 @@ class FisherEstimator(object): self._damping = damping self._estimation_mode = estimation_mode self._layers = layer_collection - self._layers.create_subgraph() - self._layers.check_registration(variables) self._gradient_fns = { ""gradients"": self._get_grads_lists_gradients, ""empirical"": self._get_grads_lists_empirical, @@ -164,9 +162,6 @@ class FisherEstimator(object): self._name = name - self._instantiate_factors() - self._register_matrix_functions() - @property def variables(self): return self._variables @@ -285,6 +280,12 @@ class FisherEstimator(object): for block in self.blocks: block.register_matpower(exp) + def _finalize_layer_collection(self): + self._layers.create_subgraph() + self._layers.check_registration(self.variables) + self._instantiate_factors() + self._register_matrix_functions() + def make_ops_and_vars(self, scope=None): """"""Make ops and vars with no specific device placement. @@ -467,6 +468,8 @@ class FisherEstimator(object): """""" self._check_vars_unmade_and_set_made_flag() + self._finalize_layer_collection() + scope = self.name if scope is None else scope cov_variable_thunks = [ ",0,train 5fb8e65180a86fac58709d248201c600f4817f5f,tensorflow/tensorflow,Fix clang formatting errors for micro op EXPAND_DIMS,expand_dims.cc,"@@ -29,7 +29,7 @@ constexpr int kOutputTensor = 0; TfLiteStatus ExpandTensorDim(TfLiteContext* context, const TfLiteEvalTensor* input, int axis, TfLiteEvalTensor* output) { - const TfLiteIntArray *input_dims = input->dims; + const TfLiteIntArray* input_dims = input->dims; TfLiteIntArray* output_dims = output->dims; if (axis < 0) { axis = output_dims->size + axis; @@ -59,11 +59,12 @@ TfLiteStatus GetAxisValueFromTensor(TfLiteContext* context, } if (kTfLiteInt32 == (axis->type)) { - const int32_t *axis_ptr = tflite::micro::GetTensorData(axis); + const int32_t* axis_ptr = tflite::micro::GetTensorData(axis); *axis_value = axis_ptr[0]; return kTfLiteOk; } else { - TF_LITE_KERNEL_LOG(context, ""Axis type %s (%d) not supported by Expand_Dims."", + TF_LITE_KERNEL_LOG(context, + ""Axis type %s (%d) not supported by Expand_Dims."", TfLiteTypeGetName(axis->type), axis->type); return kTfLiteError; } @@ -77,7 +78,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* axis; TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kAxisTensor, &axis)); TfLiteTensor* output; - TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, kOutputTensor, &output)); + TF_LITE_ENSURE_OK(context, + GetOutputSafe(context, node, kOutputTensor, &output)); output->type = input->type; if (IsDynamicTensor(axis)) { TF_LITE_KERNEL_LOG(context, @@ -107,24 +109,28 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { int axis_value; TF_LITE_ENSURE_OK(context, GetAxisValueFromTensor(context, axis, &axis_value)); - if ((axis_value > static_cast(input_dims)) || (axis_value < static_cast(-(input_dims + 1)))) { - TF_LITE_KERNEL_LOG(context, - ""Invalid Expand_Dims axis value (%d)."", axis_value); + if ((axis_value > static_cast(input_dims)) || + (axis_value < static_cast(-(input_dims + 1)))) { + TF_LITE_KERNEL_LOG(context, ""Invalid Expand_Dims axis value (%d)."", + axis_value); return kTfLiteError; } ExpandTensorDim(context, input, axis_value, output); switch (input->type) { case kTfLiteFloat32: { - memCopyN(tflite::micro::GetTensorData(output), tflite::micro::GetTensorData(input), flat_size); + memCopyN(tflite::micro::GetTensorData(output), + tflite::micro::GetTensorData(input), flat_size); } break; case kTfLiteInt8: { - memCopyN(tflite::micro::GetTensorData(output), tflite::micro::GetTensorData(input), flat_size); + memCopyN(tflite::micro::GetTensorData(output), + tflite::micro::GetTensorData(input), flat_size); } break; default: - TF_LITE_KERNEL_LOG(context, - ""Expand_Dims only currently supports int8 and float32, got %d."", - input->type); + TF_LITE_KERNEL_LOG( + context, + ""Expand_Dims only currently supports int8 and float32, got %d."", + input->type); return kTfLiteError; } return kTfLiteOk; ",0,train 5fb8e65180a86fac58709d248201c600f4817f5f,tensorflow/tensorflow,Fix clang formatting errors for micro op EXPAND_DIMS,expand_dims_test.cc,"@@ -24,7 +24,7 @@ namespace tflite { namespace testing { namespace { -//Hard coded dimension limit. Is there a predefined constant? +// Hard coded dimension limit. Is there a predefined constant? constexpr int MaxDims = 254; template @@ -93,9 +93,9 @@ TF_LITE_MICRO_TEST(ExpandDimsPositiveAxisTest0) { const int axis_dims[] = {1, 1}; const int axis_data[] = {0}; const int golden_dims[] = {3, 1, 2, 2}; - tflite::testing::TestExpandDims( - input_dims, input_data, axis_dims, axis_data, - golden_dims, golden_data, output_data); + tflite::testing::TestExpandDims(input_dims, input_data, axis_dims, + axis_data, golden_dims, golden_data, + output_data); } TF_LITE_MICRO_TEST(ExpandDimsPositiveAxisTest1) { @@ -106,9 +106,9 @@ TF_LITE_MICRO_TEST(ExpandDimsPositiveAxisTest1) { const int axis_dims[] = {1, 1}; const int axis_data[] = {1}; const int golden_dims[] = {3, 2, 1, 2}; - tflite::testing::TestExpandDims( - input_dims, input_data, axis_dims, axis_data, - golden_dims, golden_data, output_data); + tflite::testing::TestExpandDims(input_dims, input_data, axis_dims, + axis_data, golden_dims, golden_data, + output_data); } TF_LITE_MICRO_TEST(ExpandDimsPositiveAxisTest2) { @@ -119,9 +119,9 @@ TF_LITE_MICRO_TEST(ExpandDimsPositiveAxisTest2) { const int axis_dims[] = {1, 1}; const int axis_data[] = {2}; const int golden_dims[] = {3, 2, 2, 1}; - tflite::testing::TestExpandDims( - input_dims, input_data, axis_dims, axis_data, - golden_dims, golden_data, output_data); + tflite::testing::TestExpandDims(input_dims, input_data, axis_dims, + axis_data, golden_dims, golden_data, + output_data); } TF_LITE_MICRO_TEST(ExpandDimsNegativeAxisTest4) { @@ -132,9 +132,9 @@ TF_LITE_MICRO_TEST(ExpandDimsNegativeAxisTest4) { const int axis_dims[] = {1, 1}; const int axis_data[] = {-4}; const int golden_dims[] = {4, 1, 3, 1, 2}; - tflite::testing::TestExpandDims( - input_dims, input_data, axis_dims, axis_data, - golden_dims, golden_data, output_data); + tflite::testing::TestExpandDims(input_dims, input_data, axis_dims, + axis_data, golden_dims, golden_data, + output_data); } TF_LITE_MICRO_TEST(ExpandDimsNegativeAxisTest3) { @@ -145,9 +145,9 @@ TF_LITE_MICRO_TEST(ExpandDimsNegativeAxisTest3) { const int axis_dims[] = {1, 1}; const int axis_data[] = {-3}; const int golden_dims[] = {4, 3, 1, 1, 2}; - tflite::testing::TestExpandDims( - input_dims, input_data, axis_dims, axis_data, - golden_dims, golden_data, output_data); + tflite::testing::TestExpandDims(input_dims, input_data, axis_dims, + axis_data, golden_dims, golden_data, + output_data); } TF_LITE_MICRO_TEST(ExpandDimsNegativeAxisTest2) { @@ -158,9 +158,9 @@ TF_LITE_MICRO_TEST(ExpandDimsNegativeAxisTest2) { const int axis_dims[] = {1, 1}; const int axis_data[] = {-2}; const int golden_dims[] = {4, 1, 2, 1, 3}; - tflite::testing::TestExpandDims( - input_dims, input_data, axis_dims, axis_data, - golden_dims, golden_data, output_data); + tflite::testing::TestExpandDims(input_dims, input_data, axis_dims, + axis_data, golden_dims, golden_data, + output_data); } TF_LITE_MICRO_TEST(ExpandDimsNegativeAxisTest1) { @@ -171,9 +171,9 @@ TF_LITE_MICRO_TEST(ExpandDimsNegativeAxisTest1) { const int axis_dims[] = {1, 1}; const int axis_data[] = {-1}; const int golden_dims[] = {4, 1, 3, 2, 1}; - tflite::testing::TestExpandDims( - input_dims, input_data, axis_dims, axis_data, - golden_dims, golden_data, output_data); + tflite::testing::TestExpandDims(input_dims, input_data, axis_dims, + axis_data, golden_dims, golden_data, + output_data); } TF_LITE_MICRO_TESTS_END ",0,train 5fb8e65180a86fac58709d248201c600f4817f5f,tensorflow/tensorflow,Fix clang formatting errors for micro op EXPAND_DIMS,micro_mutable_op_resolver.h,"@@ -206,7 +206,8 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddExpandDims() { - return AddBuiltin(BuiltinOperator_EXPAND_DIMS, Register_EXP(), ParseExpandDims); + return AddBuiltin(BuiltinOperator_EXPAND_DIMS, Register_EXP(), + ParseExpandDims); } TfLiteStatus AddFloor() { ",0,train 890126848c2218c08abef80b44a6f2cb958d642b,tensorflow/tensorflow,"Add instruction count method to HloModule. PiperOrigin-RevId: 182227249",hlo_module.cc,"@@ -457,6 +457,14 @@ HloInstruction* HloModule::OutlineExpressionFromComputation( return call; } +int64 HloModule::instruction_count() const { + int64 n = 0; + for (const auto& computation : computations_) { + n += computation->instruction_count(); + } + return n; +} + std::list HloModule::MakeComputationPostOrder() const { // First determine all root computations by building a set of nonroot // computations (computations which are called by an instruction in the ",0,train 890126848c2218c08abef80b44a6f2cb958d642b,tensorflow/tensorflow,"Add instruction count method to HloModule. PiperOrigin-RevId: 182227249",hlo_module.h,"@@ -129,6 +129,9 @@ class HloModule { // Gets the number of computations in this module. int64 computation_count() const { return computations_.size(); } + // Gets the number of instructions in this module. + int64 instruction_count() const; + // Compute and return a post order of all computations in the module. The sort // is defined like so: if computation A has an instruction which calls // computation B, then A will appear after B in the sort. ",0,train aa59c42debb5146da4f9192321c92fe06eaec35d,tensorflow/tensorflow,"Solve IndexError: list index out of range #43561 Signed-off-by: Hollow Man ",functional.py,"@@ -1089,10 +1089,13 @@ def _should_skip_first_node(layer): # Networks that are constructed with an Input layer/shape start with a # pre-existing node linking their input to output. This node is excluded from # the network config. - return (isinstance(layer, Functional) and - # Filter out Sequential models without an input shape. - isinstance(layer._self_tracked_trackables[0], - input_layer_module.InputLayer)) + if layer._self_tracked_trackables: + return (isinstance(layer, Functional) and + # Filter out Sequential models without an input shape. + isinstance(layer._self_tracked_trackables[0], + input_layer_module.InputLayer)) + else: + return isinstance(layer, Functional) def connect_ancillary_layers(model, created_layers): ",0,train d6e2513d60999bf0cf315c42a14c0e45eb49cda2,tensorflow/tensorflow,"support profiling multiple tpu through one grpc and one session. data are saved with host prefix. PiperOrigin-RevId: 192523668",capture_tpu_profile.cc,"@@ -26,6 +26,7 @@ limitations under the License. #include ""tensorflow/contrib/tpu/profiler/dump_tpu_profile.h"" #include ""tensorflow/contrib/tpu/profiler/tpu_profiler.grpc.pb.h"" +#include ""tensorflow/contrib/tpu/profiler/tpu_profiler_analysis.grpc.pb.h"" #include ""tensorflow/contrib/tpu/profiler/version.h"" #include ""tensorflow/core/distributed_runtime/rpc/grpc_util.h"" #include ""tensorflow/core/lib/core/errors.h"" @@ -40,6 +41,7 @@ namespace tensorflow { namespace tpu { namespace { +using ::tensorflow::grpc::TPUProfileAnalysis; using ::tensorflow::TPUProfiler; constexpr uint64 kMaxEvents = 1000000; @@ -64,11 +66,10 @@ Status ValidateHostPortPair(const string& host_port) { return Status::OK(); } -// Returns whether the returned trace is empty. -// Failure are handled by CHECK, i.e. abort() -bool Profile(const string& service_addr, const string& logdir, int duration_ms, - const string& repository_root, const string& session_id, - const ProfileOptions& opts) { +ProfileRequest PopulateProfileRequest(int duration_ms, + const string& repository_root, + const string& session_id, + const ProfileOptions& opts) { ProfileRequest request; request.set_duration_ms(duration_ms); request.set_max_events(kMaxEvents); @@ -83,6 +84,17 @@ bool Profile(const string& service_addr, const string& logdir, int duration_ms, *request.mutable_opts() = opts; std::cout << ""Limiting the number of trace events to "" << kMaxEvents << std::endl; + return request; +} + +// Returns whether the returned trace is empty. +// Failure are handled by CHECK, i.e. abort() +bool Profile(const string& service_addr, const string& logdir, int duration_ms, + const string& repository_root, const string& session_id, + const ProfileOptions& opts) { + ProfileRequest request = + PopulateProfileRequest(duration_ms, repository_root, session_id, opts); + ::grpc::ClientContext context; ::grpc::ChannelArguments channel_args; // TODO(ioeric): use `SetMaxReceiveMessageSize` instead once it's available. @@ -120,7 +132,36 @@ bool NewSession(const string& service_addr, const std::vector& hostnames, int duration_ms, const string& repository_root, const string& session_id, const ProfileOptions& opts) { - return true; + NewProfileSessionRequest new_session_request; + *new_session_request.mutable_request() = + PopulateProfileRequest(duration_ms, repository_root, session_id, opts); + new_session_request.set_repository_root(repository_root); + new_session_request.set_session_id(session_id); + std::copy( + hostnames.begin(), hostnames.end(), + proto2::RepeatedFieldBackInserter(new_session_request.mutable_hosts())); + + ::grpc::ClientContext context; + ::grpc::ChannelArguments channel_args; + // TODO(qiuminxu): use `NewHostPortGrpcChannel` instead once their + // `ValidateHostPortPair` checks for empty host string case. + channel_args.SetMaxReceiveMessageSize(std::numeric_limits::max()); + // TODO(jiesun): GRPC support following relevant naming scheme: + // 1. dns:///host:port + // 2. ipv4:host:port or ipv6:[host]:port + // We might need to change the prefix which depends on what TPU name resolver + // will give us. + std::unique_ptr stub = + TPUProfileAnalysis::NewStub(::grpc::CreateCustomChannel( + ""dns:///"" + service_addr, ::grpc::InsecureChannelCredentials(), + channel_args)); + NewProfileSessionResponse new_session_response; + TF_QCHECK_OK(FromGrpcStatus( + stub->NewSession(&context, new_session_request, &new_session_response))); + + std::cout << ""Profile session succeed for hosts:"" + << str_util::Join(hostnames, "",""); + return new_session_response.empty_trace(); } } // namespace ",0,test d6e2513d60999bf0cf315c42a14c0e45eb49cda2,tensorflow/tensorflow,"support profiling multiple tpu through one grpc and one session. data are saved with host prefix. PiperOrigin-RevId: 192523668",dump_tpu_profile.cc,"@@ -64,7 +64,8 @@ Status WriteGzippedDataToFile(const string& filename, const string& data) { Status DumpTraceToLogDirectory(StringPiece run_dir, const string& host_prefix, const string& encoded_trace, std::ostream* os) { - string proto_path = JoinPath(run_dir, kProtoTraceFileName); + string proto_path = + JoinPath(run_dir, StrCat(host_prefix, kProtoTraceFileName)); TF_RETURN_IF_ERROR( WriteStringToFile(Env::Default(), proto_path, encoded_trace)); LOG(INFO) << ""Dumped raw-proto trace data to "" << proto_path; ",0,test 13dd442b4c248a5d0cbc5ed7b407e2fd98712cc0,tensorflow/tensorflow,"Remove unused transcription array. Change: 151959018",ctc_decoder.h,"@@ -89,7 +89,6 @@ class CTCGreedyDecoder : public CTCDecoder { std::vector& output_b = (*output)[0][b]; int prev_class_ix = -1; - std::vector transcription; (*scores)(b, 0) = 0; for (int t = 0; t < seq_len_b; ++t) { auto row = input[t].row(b); @@ -98,7 +97,6 @@ class CTCGreedyDecoder : public CTCDecoder { if (max_class_ix != blank_index_ && !(merge_repeated_ && max_class_ix == prev_class_ix)) { output_b.push_back(max_class_ix); - transcription.push_back(max_class_ix); } prev_class_ix = max_class_ix; } ",0,test 5a8679283766231c98e7b3074bad646111f96f2f,tensorflow/tensorflow,Try to pacify pylint.,def_function_test.py,"@@ -729,8 +729,8 @@ class DefFunctionTest(test.TestCase, parameterized.TestCase): (None, 'foo.bar'), # implements (None, True, False), # relax_shapes )) - def test_pickle(self, input_signature, autograph, autograph_options, implements, - relax_shapes): + def test_pickle(self, input_signature, autograph, autograph_options, + implements, relax_shapes): """"""@function objects can be pickled and unpickled."""""" # Can't pickle functions in __main__: from tensorflow.python.eager.def_function_test import undecorated_function ",0,train 9fd71390a9839c7912d83fffd4f762ea4970e3f1,tensorflow/tensorflow,"Checkpoints default values for RunConfig (#8488) * Default value of save_checkpoints_secs To not use the default value of save_checkpoints_secs if save_checkpoints_steps is specified in the RunConfig call * Update run_config.py * Addressing None, None caller input As mentioned in the review comments, the caller many not want any checkpoints and can pass None, None for these parameters. This is to address that case. * Fix indentation * Update run_config.py",run_config.py,"@@ -198,6 +198,7 @@ class RunConfig(ClusterConfig): parameter servers), you probably want to use `learn_runner.EstimatorConfig` instead. """""" + _USE_DEFAULT = 0 def __init__(self, master=None, @@ -206,7 +207,7 @@ class RunConfig(ClusterConfig): gpu_memory_fraction=1, tf_random_seed=None, save_summary_steps=100, - save_checkpoints_secs=600, + save_checkpoints_secs=_USE_DEFAULT, save_checkpoints_steps=None, keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000, @@ -258,6 +259,11 @@ class RunConfig(ClusterConfig): self._tf_random_seed = tf_random_seed self._save_summary_steps = save_summary_steps self._save_checkpoints_secs = save_checkpoints_secs + if save_checkpoints_secs == RunConfig._USE_DEFAULT: + if save_checkpoints_steps is None: + self._save_checkpoints_secs = 600 + else: + self._save_checkpoints_secs = None self._save_checkpoints_steps = save_checkpoints_steps # TODO(weiho): Remove these after ModelFn refactoring, when users can ",0,train 8b7aea89ae82ccc5da20e5ab029d069ddeff3f19,tensorflow/tensorflow,"Check for correct linkable output tensor descriptor. PiperOrigin-RevId: 278413334 Change-Id: Ie6ec0d82b7972f80c8760663a96c79df9b68840d",inference_context.cc,"@@ -390,6 +390,13 @@ void InferenceContext::Merge() { !IsReady(ready_tensors, linkable_node)) { continue; } + const auto& original_dst_def = + node.operations[0]->GetDefinition().dst_tensors[0]; + const auto& link_dst_def = + linkable_node.operations[0]->GetDefinition().dst_tensors[0]; + if (original_dst_def != link_dst_def) { + continue; + } MergeCLNodes(&linkable_node, &node); nodes_.erase(nodes_.begin() + next_nodes[0]); i -= 1; ",0,train 8b7aea89ae82ccc5da20e5ab029d069ddeff3f19,tensorflow/tensorflow,"Check for correct linkable output tensor descriptor. PiperOrigin-RevId: 278413334 Change-Id: Ie6ec0d82b7972f80c8760663a96c79df9b68840d",tensor_type.h,"@@ -41,6 +41,8 @@ struct TensorDescriptor { bool operator==(const TensorDescriptor& d) const { return data_type == d.data_type && storage_type == d.storage_type; } + + bool operator!=(const TensorDescriptor& d) const { return !(*this == d); } }; std::string ToString(TensorStorageType type); ",0,train 47b674c938a38c6d88f27244a12ce3944c2f0464,tensorflow/tensorflow,"[XLA] Remove a source of nondeterminism in HLO clustering. Record the HLO clusters with std::set instead of std::unordered_set to ensure that the algorithm to assign each cluster a sequence number during a set traversal is deterministic. PiperOrigin-RevId: 178830794",mark_for_compilation_pass.cc,"@@ -172,10 +172,15 @@ bool HasResourceInputOrOutput(const Node& node) { DT_RESOURCE) != node.output_types().end(); } +struct NodeCompare { + bool operator()(const Node* a, const Node* b) { return a->id() < b->id(); } +}; +using OrderedNodeSet = std::set; + Status FindCompilationCandidates( const Graph& graph, FunctionLibraryDefinition* flib_def, Env* env, const std::function& is_compilable_fn, - std::unordered_set* candidates) { + OrderedNodeSet* candidates) { OptimizerOptions opts; std::unique_ptr pflr( new ProcessFunctionLibraryRuntime(nullptr, env, TF_GRAPH_DEF_VERSION, @@ -354,7 +359,7 @@ Status MarkForCompilationPass::RunImpl( Graph* graph = options.graph->get(); - std::unordered_set compilation_candidates; + OrderedNodeSet compilation_candidates; TF_RETURN_IF_ERROR(FindCompilationCandidates( *graph, options.flib_def, (options.session_options != nullptr) ? options.session_options->env ",0,train dd1cfe2f2092517d8a57bad04b2cb269a19b37ee,tensorflow/tensorflow,"Convert InputBuffer to BuffereedInputStream for FixedLengthRecordDatasetOp This fix converts InputBuffer to BuffereedInputStream for FixedLengthRecordDatasetOp, so that it is possible to add compression layer on top for FixedLengthRecordDatasetOp. Signed-off-by: Yong Tang ",reader_dataset_ops.cc,"@@ -383,13 +383,13 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel { mutex_lock l(mu_); do { // We are currently processing a file, so try to read the next record. - if (input_buffer_) { - const int64 current_pos = input_buffer_->Tell(); + if (buffered_input_stream_) { + const int64 current_pos = buffered_input_stream_->Tell(); DCHECK_GE(file_pos_limit_, 0); if (current_pos < file_pos_limit_) { string record; - TF_RETURN_IF_ERROR( - input_buffer_->ReadNBytes(dataset()->record_bytes_, &record)); + TF_RETURN_IF_ERROR(buffered_input_stream_->ReadNBytes( + dataset()->record_bytes_, &record)); // Produce the record as output. out_tensors->emplace_back(ctx->allocator({}), DT_STRING, TensorShape({})); @@ -400,7 +400,7 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel { // We have reached the end of the current file, so maybe // move on to next file. - input_buffer_.reset(); + buffered_input_stream_.reset(); file_.reset(); ++current_file_index_; } @@ -432,10 +432,10 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel { } TF_RETURN_IF_ERROR(ctx->env()->NewRandomAccessFile( dataset()->filenames_[current_file_index_], &file_)); - input_buffer_.reset( - new io::InputBuffer(file_.get(), dataset()->buffer_size_)); + buffered_input_stream_.reset(new io::BufferedInputStream( + file_.get(), dataset()->buffer_size_)); TF_RETURN_IF_ERROR( - input_buffer_->SkipNBytes(dataset()->header_bytes_)); + buffered_input_stream_->SkipNBytes(dataset()->header_bytes_)); } while (true); } @@ -450,10 +450,11 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel { TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(""current_file_index""), current_file_index_)); - // `input_buffer_` is empty if + // `buffered_input_stream_` is empty if // 1. GetNext has not been called even once. // 2. All files have been read and iterator has been exhausted. - int64 current_pos = input_buffer_ ? input_buffer_->Tell() : -1; + int64 current_pos = + buffered_input_stream_ ? buffered_input_stream_->Tell() : -1; TF_RETURN_IF_ERROR( writer->WriteScalar(full_name(""current_pos""), current_pos)); return Status::OK(); @@ -471,18 +472,18 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel { reader->ReadScalar(full_name(""current_pos""), ¤t_pos)); // Seek to current_pos. - input_buffer_.reset(); + buffered_input_stream_.reset(); file_.reset(); - if (current_pos >= 0) { // There was an active input_buffer_. + if (current_pos >= 0) { // There was an active buffered_input_stream_. uint64 file_size; TF_RETURN_IF_ERROR(ctx->env()->GetFileSize( dataset()->filenames_[current_file_index_], &file_size)); file_pos_limit_ = file_size - dataset()->footer_bytes_; TF_RETURN_IF_ERROR(ctx->env()->NewRandomAccessFile( dataset()->filenames_[current_file_index_], &file_)); - input_buffer_.reset( - new io::InputBuffer(file_.get(), dataset()->buffer_size_)); - TF_RETURN_IF_ERROR(input_buffer_->Seek(current_pos)); + buffered_input_stream_.reset(new io::BufferedInputStream( + file_.get(), dataset()->buffer_size_)); + TF_RETURN_IF_ERROR(buffered_input_stream_->SkipNBytes(current_pos)); } return Status::OK(); @@ -492,8 +493,9 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel { mutex mu_; size_t current_file_index_ GUARDED_BY(mu_) = 0; std::unique_ptr file_ - GUARDED_BY(mu_); // must outlive input_buffer_ - std::unique_ptr input_buffer_ GUARDED_BY(mu_); + GUARDED_BY(mu_); // must outlive buffered_input_stream_ + std::unique_ptr buffered_input_stream_ + GUARDED_BY(mu_); int64 file_pos_limit_ GUARDED_BY(mu_) = -1; }; ",0,train 78c34b2950846690673ccdd43ff14ba109fbddd6,tensorflow/tensorflow,"Improves error behavior in rewrite pass. PiperOrigin-RevId: 386375090 Change-Id: Ie4e44c744a5a25ee1dcd3502a5cb96c11e977e5a",distributed_tpu_rewrite_pass.cc,"@@ -458,6 +458,10 @@ class TensorDevicePlacer { // Reports that the argument/return-value at index has been assigned // by the user to a given device. void ReportDeviceAssigned(int64_t device, int64_t index) { + if (device >= index_nodes_.size()) { + LOG(DFATAL) << ""Sharding assignment is out of bounds. Check that the "" + ""number of nodes is properly set.""; + } DeviceNode* node = &index_nodes_.at(device); node->size += sizes_.at(index); heap_.Adjust(node); ",0,train 99ef7181786b4bc471b10582fdab21993bda152f,tensorflow/tensorflow,"Adjust TPUEstimator timeout for worker shutdown to 60 seconds. PiperOrigin-RevId: 198477309",tpu_estimator.py,"@@ -2228,11 +2228,11 @@ class TPUEstimator(estimator_lib.Estimator): if shutdown_mode: if shutdown_mode == 'shutdown_worker': finalizer_hooks = [ - session_support.ShutdownLameWorkers(timeout_ms=1000), + session_support.ShutdownLameWorkers(timeout_ms=60*1000), ] elif shutdown_mode == 'shutdown_computation': finalizer_hooks = [ - session_support.RestartComputation(timeout_ms=1000), + session_support.RestartComputation(timeout_ms=60*1000), ] else: raise ValueError('Unknown TF_TPU_GRACEFUL_SHUTDOWN_MODE ""%s""' % ",0,train 54b5a2163bc2c5a13db8de39fc99ae558fc854a4,tensorflow/tensorflow,"batch_matmul_op_test.py: Updated to pass in TF2, by using gradient_checker_v2 and removing placeholder nodes. PiperOrigin-RevId: 223863787",batch_matmul_op_test.py,"@@ -20,9 +20,8 @@ from __future__ import print_function import numpy as np -from tensorflow.python.framework import constant_op -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gradient_checker +from tensorflow.python.framework import test_util +from tensorflow.python.ops import gradient_checker_v2 from tensorflow.python.ops import math_ops from tensorflow.python.platform import test @@ -76,25 +75,18 @@ class BatchMatmulOpTest(test.TestCase): # Compares _tfpBatchMatmul(x, y, alpha, adj) and _npBatchMatMul(x, y, alpha, # adj) - def _compare(self, x_in, y_in, adjoint_a, adjoint_b, static_shape=True): + def _compare(self, x_in, y_in, adjoint_a, adjoint_b): x_t_shape = x_in.shape[:-2] + (x_in.shape[-1], x_in.shape[-2]) y_t_shape = y_in.shape[:-2] + (y_in.shape[-1], y_in.shape[-2]) x = x_in if not adjoint_a else x_in.reshape(x_t_shape) y = y_in if not adjoint_b else y_in.reshape(y_t_shape) is_floating = x.dtype != np.int32 tol = 100 * np.finfo(x.dtype).eps if is_floating else 0 - with self.cached_session(use_gpu=is_floating) as sess: - if static_shape: - z0 = math_ops.matmul(x, y, adjoint_a=adjoint_a, adjoint_b=adjoint_b) - z0_val = self.evaluate(z0) - else: - x_ph = array_ops.placeholder(x.dtype) - y_ph = array_ops.placeholder(y.dtype) - z0 = math_ops.matmul( - x_ph, y_ph, adjoint_a=adjoint_a, adjoint_b=adjoint_b) - z0_val = sess.run(z0, feed_dict={x_ph: x, y_ph: y}) + with test_util.device(use_gpu=is_floating): + z0 = math_ops.matmul( + x, y, adjoint_a=adjoint_a, adjoint_b=adjoint_b) z1 = self._npBatchMatmul(x, y, adjoint_a, adjoint_b) - self.assertAllClose(z0_val, z1, rtol=tol, atol=tol) + self.assertAllClose(z0, z1, rtol=tol, atol=tol) def _rand(self, shape, dtype): vals = np.array(np.random.normal(-10, 10, np.prod(shape)), dtype=dtype) @@ -103,42 +95,41 @@ class BatchMatmulOpTest(test.TestCase): vals += 1j * imag return vals.reshape(shape) - def _testNonEmpty(self, dtype, adjoint_a, adjoint_b, use_static_shape): + def _testNonEmpty(self, dtype, adjoint_a, adjoint_b): - def compareNonEmpty(self, a_shape, b_shape): + def CompareNonEmpty(self, a_shape, b_shape): self._compare( self._rand(a_shape, dtype), - self._rand(b_shape, dtype), adjoint_a, adjoint_b, use_static_shape) + self._rand(b_shape, dtype), adjoint_a, adjoint_b) - compareNonEmpty(self, [1, 2, 3], [1, 3, 5]) - compareNonEmpty(self, [1, 2, 3], [1, 3, 1]) - compareNonEmpty(self, [1, 1, 3], [1, 3, 5]) - compareNonEmpty(self, [1, 2, 3], [1, 3, 5]) - compareNonEmpty(self, [7, 1, 3], [7, 3, 5]) - compareNonEmpty(self, [7, 2, 3], [7, 3, 1]) - compareNonEmpty(self, [7, 2, 3], [7, 3, 5]) - compareNonEmpty(self, [10, 64, 75], [10, 75, 30]) - compareNonEmpty(self, [5, 7, 2, 3], [5, 7, 3, 5]) + CompareNonEmpty(self, [1, 2, 3], [1, 3, 5]) + CompareNonEmpty(self, [1, 2, 3], [1, 3, 1]) + CompareNonEmpty(self, [1, 1, 3], [1, 3, 5]) + CompareNonEmpty(self, [1, 2, 3], [1, 3, 5]) + CompareNonEmpty(self, [7, 1, 3], [7, 3, 5]) + CompareNonEmpty(self, [7, 2, 3], [7, 3, 1]) + CompareNonEmpty(self, [7, 2, 3], [7, 3, 5]) + CompareNonEmpty(self, [10, 64, 75], [10, 75, 30]) + CompareNonEmpty(self, [5, 7, 2, 3], [5, 7, 3, 5]) - def _testEmpty(self, dtype, adjoint_a, adjoint_b, use_static_shape): + def _testEmpty(self, dtype, adjoint_a, adjoint_b): - def compareEmpty(self, a_shape, b_shape): + def CompareEmpty(self, a_shape, b_shape): self._compare( np.zeros(a_shape).astype(dtype), - np.zeros(b_shape).astype(dtype), adjoint_a, adjoint_b, - use_static_shape) + np.zeros(b_shape).astype(dtype), adjoint_a, adjoint_b) - compareEmpty(self, [0, 3, 2], [0, 2, 4]) - compareEmpty(self, [3, 0, 2], [3, 2, 5]) - compareEmpty(self, [3, 3, 2], [3, 2, 0]) + CompareEmpty(self, [0, 3, 2], [0, 2, 4]) + CompareEmpty(self, [3, 0, 2], [3, 2, 5]) + CompareEmpty(self, [3, 3, 2], [3, 2, 0]) -def _GetBatchMatmulOpTest(dtype, adjoint_a, adjoint_b, use_static_shape): +def _GetBatchMatmulOpTest(dtype, adjoint_a, adjoint_b): def Test(self): np.random.seed(42) - self._testNonEmpty(dtype, adjoint_a, adjoint_b, use_static_shape) - self._testEmpty(dtype, adjoint_a, adjoint_b, use_static_shape) + self._testNonEmpty(dtype, adjoint_a, adjoint_b) + self._testEmpty(dtype, adjoint_a, adjoint_b) return Test @@ -154,17 +145,13 @@ class BatchMatmulGradientTest(test.TestCase): y = y_in if not adjoint_b else y_in.reshape(y_t_shape) epsilon = np.finfo(x.dtype).eps delta = epsilon**(1.0 / 3.0) - with self.cached_session(use_gpu=True): - inx = constant_op.constant(x) - iny = constant_op.constant(y) - z = math_ops.matmul(inx, iny, adjoint_a, adjoint_b) - loss = math_ops.reduce_sum(z) - ((x_jacob_t, x_jacob_n), - (y_jacob_t, y_jacob_n)) = gradient_checker.compute_gradient( - [inx, iny], [x.shape, y.shape], - loss, [1], - x_init_value=[x, y], - delta=delta) + def Loss(x, y): + z = math_ops.matmul(x, y, adjoint_a, adjoint_b) + return math_ops.reduce_sum(z) + with self.session(use_gpu=True): + ((x_jacob_t, y_jacob_t), + (x_jacob_n, y_jacob_n)) = gradient_checker_v2.compute_gradient( + Loss, [x, y], delta=delta) tol = 20 * delta self.assertAllClose(x_jacob_t, x_jacob_n, rtol=tol, atol=tol) self.assertAllClose(y_jacob_t, y_jacob_n, rtol=tol, atol=tol) @@ -202,11 +189,9 @@ if __name__ == ""__main__"": for adjoint_a_ in False, True: for adjoint_b_ in False, True: name = ""%s_%s_%s"" % (dtype_.__name__, adjoint_a_, adjoint_b_) - for use_static_shape in True, False: - setattr(BatchMatmulOpTest, - ""testBatchMatmulOp_"" + name + (""_%s"" % use_static_shape), - _GetBatchMatmulOpTest(dtype_, adjoint_a_, adjoint_b_, - use_static_shape)) + setattr(BatchMatmulOpTest, + ""testBatchMatmulOp_"" + name, + _GetBatchMatmulOpTest(dtype_, adjoint_a_, adjoint_b_)) if dtype_ is not np.int32: setattr(BatchMatmulGradientTest, ""testBatchMatmulGradient_"" + name, _GetBatchMatmulGradientTest(dtype_, adjoint_a_, adjoint_b_)) ",0,test 4d6c4c72b4ffd2c558d7908a1f3ec32f2f92379e,tensorflow/tensorflow,"Reduce precision in one conv op test. This test was flakily failing, presumably due to nondeterministic cudnn convolution algorithm choices. PiperOrigin-RevId: 248122343",conv_ops_test.py,"@@ -2192,7 +2192,8 @@ class Conv2DTest(test.TestCase): padding=[[0, 0], [2, 2], [2, 2], [0, 0]], test_input=True, data_format=data_format, - use_gpu=use_gpu) + use_gpu=use_gpu, + max_err=0.003) @test_util.deprecated_graph_mode_only def testFilterGradient2x2PaddingStrideOne(self): ",0,train 4588361b6a5b48aad1ead88755d2afef38605af5,tensorflow/tensorflow,"tfdbg: adjust the scope of mutex for keeping track of disk usage PiperOrigin-RevId: 211966207",debug_io_utils.cc,"@@ -693,6 +693,7 @@ uint64 DebugFileIO::diskBytesUsed = 0; mutex DebugFileIO::bytes_mu(LINKER_INITIALIZED); bool DebugFileIO::requestDiskByteUsage(uint64 bytes) { + mutex_lock l(bytes_mu); if (globalDiskBytesLimit == 0) { const char* env_tfdbg_disk_bytes_limit = getenv(""TFDBG_DISK_BYTES_LIMIT""); if (env_tfdbg_disk_bytes_limit == nullptr || @@ -707,7 +708,6 @@ bool DebugFileIO::requestDiskByteUsage(uint64 bytes) { if (bytes == 0) { return true; } - mutex_lock l(bytes_mu); if (diskBytesUsed + bytes < globalDiskBytesLimit) { diskBytesUsed += bytes; return true; ",0,train b1f5d9e26125b4ad62c4566e4c2ddd784ce625bc,tensorflow/tensorflow,Add tests to validate only parameters typed with ops.Tensor are converted to Tensors,function_test.py,"@@ -3932,7 +3932,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): gradients(constant_op.constant([[[1.0], [2.0]]])) # No error is raised - def testTraceWithAnnotationsBasic(self): + def testFollowTypeHintsTraceBasic(self): trace_count = [0] def func(x: ops.Tensor): trace_count[0] += 1 @@ -3952,7 +3952,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): disabled(3) # Retrace self.assertEqual(trace_count[0], 3) - def testTraceWithAnnotationsWithArgs(self): + def testFollowTypeHintsTraceWithArgs(self): trace_count = [0] def func(*args: ops.Tensor): trace_count[0] += 1 @@ -3973,7 +3973,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): disabled(args2) # Retrace self.assertEqual(trace_count[0], 2) - def testTraceWithAnnotationsWithKwargs(self): + def testFollowTypeHintsTraceWithKwargs(self): trace_count = [0] def func(t: ops.Tensor, **kwargs: ops.Tensor): trace_count[0] += 1 @@ -3991,7 +3991,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): disabled(2, x=2, y=2.0, z=""two"") # Retrace self.assertEqual(trace_count[0], 2) - def testTraceWithAnnotationsWithMultipleInputTypes(self): + def testFollowTypeHintsTraceWithMultipleInputTypes(self): trace_count = [0] def func(t: ops.Tensor, *args: ops.Tensor, **kwargs: ops.Tensor): @@ -4010,6 +4010,62 @@ class FunctionTest(test.TestCase, parameterized.TestCase): disabled(2, constant_op.constant(2), ""str2"", x=5.0) # Retrace self.assertEqual(trace_count[0], 2) + def testFollowTypeHintsTraceWithOnlyArgNamed(self): + trace_count = [0] + def func(t: ops.Tensor, i: int = 1, **kwargs): + trace_count[0] += 1 + return t + + enabled = def_function.function(func, experimental_follow_type_hints=True) + + trace_count = [0] + enabled(1, 3, x=4.0, y=""str"") + enabled(2, 4, x=4.0, y=""str"") # Retrace + self.assertEqual(trace_count[0], 2) + + def testFollowTypeHintsTraceWithNotAllNamed(self): + trace_count = [0] + def func(x, y: ops.Tensor, z: int): + trace_count[0] += 1 + return x + + enabled = def_function.function(func, experimental_follow_type_hints=True) + + enabled(1, 2, 3) + enabled(1, 20, 3) # No retrace - change in ops.Tensor typed arg + enabled(2, 2, 3) # Retrace - change in untyped arg + enabled(2, 2, 4) # Retrace - change in typed arg + self.assertEqual(trace_count[0], 3) + + def testFollowTypeHintsTraceWithOnlyArgsNamed(self): + trace_count = [0] + def func(x, y, *args: ops.Tensor): + trace_count[0] += 1 + return x + + enabled = def_function.function(func, experimental_follow_type_hints=True) + + trace_count = [0] + enabled(1, 20, 3, 4, 5, 6) + enabled(1, 20, 3, 4, 5, 60) # No retrace - change in *args + enabled(1, 30, 7, 8, 9, 10) # Retrace - change in args + self.assertEqual(trace_count[0], 2) + + def testFollowTypeHintsTraceWithOnlyKwargsNamed(self): + trace_count = [0] + def func(x, y, *args, **kwargs: ops.Tensor): + trace_count[0] += 1 + return x + + enabled = def_function.function(func, experimental_follow_type_hints=True) + + trace_count = [0] + enabled(1, 2, 3, 4, 5, 6, a=1.0, b=2.0, c=3.0) + enabled(1, 2, 3, 4, 5, 6, a=1.5, b=2.5, c=3.5) # No retrace - change in **kwargs + enabled(100, 2, 3, 4, 5, 6, a=1.0, b=2.0, c=3.0) # Retrace - change in args + enabled(1, 2, 3, 4, 5, 100, a=1.0, b=2.0, c=3.0) # Retrace - change in *args + self.assertEqual(trace_count[0], 3) + class MultiDeviceTest(test.TestCase, parameterized.TestCase): @test_util.run_gpu_only ",0,train 36b42528ec7eb02f76cb5b802c43306b871b6229,tensorflow/tensorflow,"Removed Warning from the file. Fixed the warning in the file.",quantization_utils.cc,"@@ -117,7 +117,7 @@ void SymmetricPerChannelQuantization(const float* const input, // Calculate scales per channel std::vector scale_invs(channel_dim_size); const float half_scale = kMaxQuantizedValue; - for (size_t channel_idx = 0; channel_idx < channel_dim_size; channel_idx++) { + for (int channel_idx = 0; channel_idx < channel_dim_size; channel_idx++) { const float half_range = std::max(std::abs(min_vals[channel_idx]), std::abs(max_vals[channel_idx])); output_scales->at(channel_idx) = half_range / half_scale; ",0,train 6adf6a06e1975adadec5cb0a7b9778363e51f61c,tensorflow/tensorflow,"Update GraphDef version to 748. PiperOrigin-RevId: 370412154 Change-Id: Idc87751b9c95a5fa4268d92ad934fbe0b63cddc9",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 747 // Updated: 2021/4/25 +#define TF_GRAPH_DEF_VERSION 748 // Updated: 2021/4/26 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,train 86a83cf73c93909a0e2f54d4bb4d0879a011b899,tensorflow/tensorflow,"Make more functions work as metric functions for MetricSpec. Change: 147382300",metric_spec.py,"@@ -53,7 +53,8 @@ def _args(fn): _CANONICAL_LABELS_ARG = 'labels' _LABELS_ARGS = set((_CANONICAL_LABELS_ARG, 'label', 'targets', 'target')) _CANONICAL_PREDICTIONS_ARG = 'predictions' -_PREDICTIONS_ARGS = set((_CANONICAL_PREDICTIONS_ARG, 'prediction')) +_PREDICTIONS_ARGS = set((_CANONICAL_PREDICTIONS_ARG, 'prediction', + 'logits', 'logit')) _CANONICAL_WEIGHTS_ARG = 'weights' _WEIGHTS_ARGS = set((_CANONICAL_WEIGHTS_ARG, 'weight')) ",0,train f9c5e921dd7058ea517a3d984b2e161d8dd19cee,tensorflow/tensorflow,"[TF:XLA] Implement SqrtGrad. PiperOrigin-RevId: 167000454",binary_ops_test.py,"@@ -94,6 +94,12 @@ class BinaryOpsTest(XLATestCase): np.array([5, 6, 7, 8], dtype=dtype), expected=np.array([-160, -81, -28, -4], dtype=dtype)) + self._testBinary( + gen_math_ops._sqrt_grad, + np.array([4, 3, 2, 1], dtype=dtype), + np.array([5, 6, 7, 8], dtype=dtype), + expected=np.array([0.625, 1, 1.75, 4], dtype=dtype)) + self._testBinary( gen_nn_ops._softplus_grad, np.array([4, 3, 2, 1], dtype=dtype), ",0,test f9c5e921dd7058ea517a3d984b2e161d8dd19cee,tensorflow/tensorflow,"[TF:XLA] Implement SqrtGrad. PiperOrigin-RevId: 167000454",randomized_tests.cc,"@@ -2496,6 +2496,16 @@ TEST_F(OpTest, Sqrt) { }); } +TEST_F(OpTest, SqrtGrad) { + Repeatedly([this]() { + auto dims = RandomDims(); + return ExpectTfAndXlaOutputsAreClose(OpTestBuilder(""SqrtGrad"") + .RandomInput(DT_FLOAT, dims) + .RandomInput(DT_FLOAT, dims) + .Attr(""T"", DT_FLOAT)); + }); +} + TEST_F(OpTest, SquaredDifference) { Repeatedly([this]() { auto dims = BroadcastableDims(); ",0,test f9c5e921dd7058ea517a3d984b2e161d8dd19cee,tensorflow/tensorflow,"[TF:XLA] Implement SqrtGrad. PiperOrigin-RevId: 167000454",binary_ops.cc,"@@ -107,6 +107,10 @@ XLA_MAKE_BINARY( b->Mul(b->Pow(lhs, XlaHelpers::IntegerLiteral(b, input_type(0), 3)), b->Div(rhs, XlaHelpers::IntegerLiteral(b, input_type(0), -2)), extend_dimensions)); +XLA_MAKE_BINARY(SqrtGrad, + b->Div(b->Mul(rhs, + XlaHelpers::FloatLiteral(b, input_type(0), 0.5)), + lhs, extend_dimensions)); static xla::ComputationDataHandle Square(xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x) { ",0,test 4c0a09fc302e193df54f127ca59f465e4966b8db,tensorflow/tensorflow,"fixit for resource_scatter_update. PiperOrigin-RevId: 322286887 Change-Id: I9c2293d00c371b9cab279366bc893e509e1ded3b",optimizer_v2.py,"@@ -46,7 +46,6 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_resource_variable_ops from tensorflow.python.ops import gradients from tensorflow.python.ops import math_ops -from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables as tf_variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import revived_types @@ -1159,7 +1158,8 @@ class OptimizerV2(trackable.Trackable): def _resource_scatter_update(self, x, i, v): with ops.control_dependencies( - [resource_variable_ops.resource_scatter_update(x.handle, i, v)]): + [gen_resource_variable_ops.ResourceScatterUpdate( + resource=x.handle, indices=i, updates=v)]): return x.value() @property ",0,train 3494c78bc9eef521af3986eddfe4bf00cf9f0fe4,tensorflow/tensorflow,"[xla::gpu] skip autotuning on Ampere and above. PiperOrigin-RevId: 441499202",nvptx_compiler.cc,"@@ -147,8 +147,12 @@ Status NVPTXCompiler::OptimizeHloPostLayoutAssignment( HloPassPipeline post_pipeline(""nvptx post-layout_assignment part 2""); - // Find the fastest algorithm for GEMMs. - post_pipeline.AddPass(stream_exec, device_allocator); + // Find the fastest algorithm for GEMMs. Skip on Ampere and later as the + // algorithm goes unused. + if (!stream_exec->GetDeviceDescription().cuda_compute_capability().IsAtLeast( + se::CudaComputeCapability::AMPERE)) { + post_pipeline.AddPass(stream_exec, device_allocator); + } if (!IsBefEnabled(hlo_module->config())) { // Transform TriangularSolve ops into custom-calls, so we can add temp ",0,train 00e2cbf2e84524dd9e8320b58cdccf2c8b3f33b3,tensorflow/tensorflow,Changes based on review,mkl_fused_batch_norm_op.cc,"@@ -34,12 +34,6 @@ using BatchNormBwdPd = mkldnn::batch_normalization_backward::primitive_desc; namespace tensorflow { using CPUDevice = Eigen::ThreadPoolDevice; -#ifdef ENABLE_MKLDNN_V1 -#define BN_FLAGS mkldnn::batch_normalization_flags -#else -#define BN_FLAGS mkldnn -#endif - struct MklBatchNormFwdParams { memory::dims src_dims; int depth; @@ -61,7 +55,7 @@ struct MklBatchNormFwdParams { bool training) : src_dims(src_dims), depth(depth), eps(eps), training(training) {} -#endif +#endif // !ENABLE_MKLDNN_V1 }; template @@ -87,22 +81,18 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { static_cast(const_cast(src_data))); context_.dst_mem->set_data_handle(static_cast(dst_data)); - if (context_.flags & (int)BN_FLAGS::use_scale_shift) + if (context_.flags & static_cast(BN_FLAGS::use_scale_shift)) context_.weights_mem->set_data_handle( static_cast(const_cast(weights_data))); if ((context_.pkind == prop_kind::forward_training) || - (context_.flags & (int)BN_FLAGS::use_global_stats)) { + (context_.flags & static_cast(BN_FLAGS::use_global_stats))) { context_.mean_mem->set_data_handle(static_cast(mean_data)); context_.variance_mem->set_data_handle(static_cast(variance_data)); } #ifdef ENABLE_MKLDNN_V1 // Execute batch-normalization forward primitives. - DCHECK_EQ(context_.fwd_primitives.size(), context_.net_args.size()); - for (size_t i = 0; i < context_.fwd_primitives.size(); ++i) { - context_.fwd_primitives.at(i).execute(*context_.fwd_stream, - context_.net_args.at(i)); - } + execute_primitives(context_.fwd_primitives, context_.fwd_stream, context_.net_args); #else context_.fwd_stream->submit(context_.fwd_primitives); #endif // ENABLE_MKLDNN_V1 @@ -141,7 +131,7 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { private: // Primitive reuse context for BatchNorm forward op. struct BatchNormFwdContext { - // Flags indicts if it is training or inference mode. + // Flags indicating if it is training or inference mode. int64 flags; // Algorithm kind. @@ -556,12 +546,12 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive { auto diff_weights_desc = weights_desc; // Forward batch-normalization descriptor and primitive descriptor. - auto bn_flags = - bwdParams.training - ? BN_FLAGS::use_scale_shift - : (BN_FLAGS::use_scale_shift | BN_FLAGS::use_global_stats); + //auto bn_flags = + // bwdParams.training + // ? BN_FLAGS::use_scale_shift + // : (BN_FLAGS::use_scale_shift | BN_FLAGS::use_global_stats); auto fwd_desc = batch_normalization_forward::desc( - prop_kind::forward_training, src_md, bwdParams.eps, bn_flags); + prop_kind::forward_training, src_md, bwdParams.eps, context_.flags); auto fwd_pd = BatchNormFwdPd(fwd_desc, cpu_engine_); // Backward batch-normalization primitive. @@ -570,7 +560,7 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive { // 2. on bwd propagation, mean and variance are considered as constants. // Thus, reduce the amount of MKL computation. auto bwd_desc = batch_normalization_backward::desc( - prop_kind::backward, diff_dst_md, src_md, bwdParams.eps, bn_flags); + prop_kind::backward, diff_dst_md, src_md, bwdParams.eps, context_.flags); context_.bwd_pd.reset(new BatchNormBwdPd(bwd_desc, cpu_engine_, fwd_pd)); // Create memory primitives. @@ -979,7 +969,7 @@ class MklFusedBatchNormOp : public OpKernel { // Set NAN mean value in case of empty input tensor auto saved_mean_data = (*saved_mean_tensor)->flat().data(); - std::fill_n(saved_mean_data, num_elements, static_cast(NAN)); + std::fill_n(saved_mean_data, num_elements, static_cast(0)); MklDnnShape mkl_shape_saved_variance; mkl_shape_saved_variance.SetMklTensor(false); @@ -990,12 +980,12 @@ class MklFusedBatchNormOp : public OpKernel { // Set NAN variance value in case of empty input tensor auto saved_variance_data = (*saved_variance_tensor)->flat().data(); - std::fill_n(saved_variance_data, num_elements, static_cast(NAN)); + std::fill_n(saved_variance_data, num_elements, static_cast(0)); // Changes to support reserved_space_3 parameter in FusedBatchNormV3. // TODO: This parameter functionality is not implemented on CPU. // It is used to hold intermediate results. So the allocated - // memory is filled with NANs. + // memory is filled with 0s. if (reserved_space) { DCHECK(reserved_space_tensor != nullptr); @@ -1171,7 +1161,7 @@ class MklFusedBatchNormGradOp : public OpKernel { src_data = static_cast(const_cast(src_tensor.flat().data())); const T* diff_dst_data = nullptr; -#ifdef ENABLE_MKL_DNN_V1 +#ifdef ENABLE_MKLDNN_V1 if (IS_DIFF_DST_REORDER_NEEDED(diff_dst_md, bn_bwd_pd, bn_bwd)) { diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); diff_dst.CheckReorderToOpMem(MEMORY_PD_WITHOUT_DATA( @@ -1184,7 +1174,7 @@ class MklFusedBatchNormGradOp : public OpKernel { #else diff_dst_data = static_cast(const_cast(diff_dst_tensor.flat().data())); -#endif +#endif // ENABLE_MKLDNN_V1 // Indices of output tensors const size_t kDiffSrcIndex = 0; ",0,test 00e2cbf2e84524dd9e8320b58cdccf2c8b3f33b3,tensorflow/tensorflow,Changes based on review,mkl_types.h,"@@ -110,6 +110,7 @@ namespace tensorflow { #define TENSOR_FORMAT MKL_TENSOR_FORMAT #define TENSOR_FORMAT_NHWC MKL_TENSOR_FORMAT_NHWC #define TENSOR_MAX_DIMS MKLDNN_MAX_NDIMS +#define BN_FLAGS mkldnn::batch_normalization_flags #else @@ -205,6 +206,7 @@ namespace tensorflow { #define SUMMAND_MD summand_pd #define TENSOR_FORMAT TensorFormat #define TENSOR_FORMAT_NHWC FORMAT_NHWC +#define BN_FLAGS mkldnn #endif // ENABLE_MKLDNN_V1 } // namespace tensorflow ",0,test 7cb0b5767c549df17a52173ef33ec7d2487d25e2,tensorflow/tensorflow,"Tolerate differences equal to `tolerated` threshold in MinMaxApproximatelyEqual. PiperOrigin-RevId: 310872715 Change-Id: I5b56efad6c31efa144a72f3a30843a98fec0a6f1",hardcode_min_max.cc,"@@ -271,8 +271,8 @@ bool MinMaxApproximatelyEqual(const MinMax& minmax1, const MinMax& minmax2) { const double magnitude = std::min(minmax1.max - minmax1.min, minmax2.max - minmax2.min); const double tolerated = 1e-6 * magnitude; - return std::abs(minmax1.min - minmax2.min) < tolerated && - std::abs(minmax1.max - minmax2.max) < tolerated; + return std::abs(minmax1.min - minmax2.min) <= tolerated && + std::abs(minmax1.max - minmax2.max) <= tolerated; } // Propagates MinMax from any of the listed arrays, to all others. ",0,train 9d86b3cbb39009b6484b3ba3b8ebc3d82949fae0,tensorflow/tensorflow,"Adds summary for loss so it's easier to follow training progress Change: 137064814",kmeans.py,"@@ -243,6 +243,7 @@ class KMeansClustering(estimator.Estimator, ).training_graph() incr_step = tf.assign_add(tf.contrib.framework.get_global_step(), 1) self._loss = tf.reduce_sum(losses) + tf.scalar_summary('loss/raw', self._loss) training_op = with_dependencies([training_op, incr_step], self._loss) return training_op, self._loss ",0,train 5cb5f52aa16f85f83d818e6f219e7f483b6ead71,tensorflow/tensorflow,"Uppercase platform name when looking up custom call target. TFRT uses 'ROCm', but TF expects 'ROCM'. PiperOrigin-RevId: 428809793 Change-Id: I842887ee056cb8272cbabf439e4a537971d09e24",xlir_kernels.cc,"@@ -18,6 +18,7 @@ #include #include +#include ""absl/strings/ascii.h"" #include ""llvm/Support/Error.h"" #include ""tensorflow/compiler/xla/service/custom_call_status_internal.h"" #include ""tensorflow/compiler/xla/service/custom_call_target_registry.h"" @@ -321,11 +322,11 @@ static llvm::Error CustomCall( tfrt::StringAttribute symbol) { // Lookup custom call target from registry. auto platform = stream->platform(); - auto* target = CustomCallTargetRegistry::Global()->Lookup( - symbol.str(), tfrt::StrCat(platform)); + auto key = absl::AsciiStrToUpper(tfrt::StrCat(platform)); // 'ROCm' -> 'ROCM' + auto* target = CustomCallTargetRegistry::Global()->Lookup(symbol.str(), key); if (!target) { return tfrt::MakeStringError(""Custom call target '"", symbol.str(), - ""' not registered for platform "", platform); + ""' not registered for platform "", key); } auto current = tfrt::gpu::wrapper::CtxSetCurrent(stream.context()->get()); ",0,train a901eb7c6e30d7ece53adbe50549774e0a8e0715,tensorflow/tensorflow,"\nInternal refactor\n PiperOrigin-RevId: 274224520",grpc_tensor_coding.cc,"@@ -14,8 +14,10 @@ limitations under the License. ==============================================================================*/ #include ""tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.h"" + #include ""grpcpp/support/byte_buffer.h"" #include ""grpcpp/support/slice.h"" +#include ""absl/flags/flag.h"" #include ""tensorflow/core/common_runtime/dma_helper.h"" #include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/tensor.pb.h"" @@ -26,7 +28,7 @@ limitations under the License. #include ""tensorflow/core/platform/env.h"" #include ""tensorflow/core/protobuf/worker.pb.h"" -// (Omitted internal-only flag) +ABSL_FLAG(bool, grpc_deepcopy_tensor_response, false, ""Disables mem sharing""); namespace tensorflow { namespace grpc { @@ -183,7 +185,9 @@ void EncodeTensorToByteBuffer(bool is_dead, const Tensor& val, bool require_ack, // We enable this behavior if the tensor is large. bool share_tensor_slice_memory = (tdata.size() > kLargeTensorBytes); - // (Omitted internal-only conditional) + if (absl::GetFlag(FLAGS_grpc_deepcopy_tensor_response)) { + share_tensor_slice_memory = false; + } size_t encoder_size = expected_size - tdata.size(); ",0,train c8f1aec8046df28b4ed5b5181f3ef5509f98f97e,tensorflow/tensorflow,"Use i8 as type for the ConstantDataArray. It turns out this is faster to compile, because LLVM handles it specially. PiperOrigin-RevId: 201911349",cpu_external_constants_test.cc,"@@ -65,7 +65,7 @@ TEST_F(CpuExternalConstantsTest, BasicNegative) { // to externalize it. TestWithArray(/*rows=*/4, /*cols=*/4, R""( CHECK-NOT: @constant_global_0 = external constant [16 x float], align 8 -CHECK: @0 = private constant [16 x float] {{.*}}, align 8 +CHECK: @0 = private constant [64 x i8] {{.*}}, align 8 )""); } } // namespace ",0,test c8f1aec8046df28b4ed5b5181f3ef5509f98f97e,tensorflow/tensorflow,"Use i8 as type for the ConstantDataArray. It turns out this is faster to compile, because LLVM handles it specially. PiperOrigin-RevId: 201911349",cpu_literal_caching_test.cc,"@@ -55,8 +55,8 @@ ENTRY main { )""; string filecheck_pattern = R""( -CHECK: private constant [12 x float] -CHECK-NOT: private constant [12 x float] +CHECK: private constant [48 x i8] +CHECK-NOT: private constant [48 x i8] )""; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, @@ -98,10 +98,10 @@ ENTRY main { )""; string filecheck_pattern = R""( -CHECK: private constant [1 x float] -CHECK: private constant [2 x float] -CHECK-NOT: private constant [1 x float] -CHECK-NOT: private constant [2 x float] +CHECK: private constant [4 x i8] +CHECK: private constant [8 x i8] +CHECK-NOT: private constant [4 x i8] +CHECK-NOT: private constant [8 x i8] )""; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ",0,test c8f1aec8046df28b4ed5b5181f3ef5509f98f97e,tensorflow/tensorflow,"Use i8 as type for the ConstantDataArray. It turns out this is faster to compile, because LLVM handles it specially. PiperOrigin-RevId: 201911349",cpu_outfeed_test.cc,"@@ -37,7 +37,7 @@ ENTRY main { )""; string filecheck_pattern = R""( -CHECK: private constant [12 x float] +CHECK: private constant [48 x i8] )""; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ",0,test c8f1aec8046df28b4ed5b5181f3ef5509f98f97e,tensorflow/tensorflow,"Use i8 as type for the ConstantDataArray. It turns out this is faster to compile, because LLVM handles it specially. PiperOrigin-RevId: 201911349",llvm_util.cc,"@@ -36,6 +36,7 @@ limitations under the License. #include ""tensorflow/core/lib/io/path.h"" #include ""tensorflow/core/lib/strings/str_util.h"" #include ""tensorflow/core/lib/strings/strcat.h"" +#include ""tensorflow/core/platform/byte_order.h"" #include ""tensorflow/core/platform/env.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/types.h"" @@ -251,14 +252,12 @@ StatusOr DecodeSelfDescribingShapeConstant(const void* shape_ptr, llvm::Constant* ConvertLiteralToIrConstant(const Literal& literal, llvm::Module* module) { - const Shape& shape = literal.shape(); - llvm::Type* type = shape.element_type() == C64 - ? llvm::Type::getFloatTy(module->getContext()) - : PrimitiveTypeToIrType(shape.element_type(), module); const char* data = static_cast(literal.untyped_data()); - uint64 num_elements = literal.size_bytes() * 8 / GetSizeInBits(type); - return llvm::ConstantDataArray::getRaw( - llvm::StringRef(data, literal.size_bytes()), num_elements, type); + CHECK_EQ(module->getDataLayout().isLittleEndian(), + tensorflow::port::kLittleEndian); + return llvm::ConstantDataArray::getString( + module->getContext(), llvm::StringRef(data, literal.size_bytes()), + /*AddNull=*/false); } llvm::AllocaInst* EmitAllocaAtFunctionEntry(llvm::Type* type, ",0,test f1d8f8d1501af92b0fe1f1d37398b4dee6a4e7e2,tensorflow/tensorflow,"Collect slot variable restorations in a queue and restore them as a batch. PiperOrigin-RevId: 399236012 Change-Id: Ia89a1037fdaae45fc48cb17a56e99cd4125e9054",base.py,"@@ -275,12 +275,10 @@ class CheckpointPosition(object): checkpoint.object_by_proto_id[self._proto_id] = trackable for deferred_slot_restoration in ( checkpoint.deferred_slot_restorations.pop(self._proto_id, ())): - trackable._create_or_restore_slot_variable( # pylint: disable=protected-access - slot_variable_position=CheckpointPosition( - checkpoint=checkpoint, - proto_id=deferred_slot_restoration.slot_variable_id), - variable=deferred_slot_restoration.original_variable, - slot_name=deferred_slot_restoration.slot_name) + self._queue_slot_variable_for_restoration( + trackable, deferred_slot_restoration.original_variable, + deferred_slot_restoration.slot_variable_id, + deferred_slot_restoration.slot_name) for slot_restoration in checkpoint.slot_restorations.pop( self._proto_id, ()): optimizer_object = checkpoint.object_by_proto_id.get( @@ -300,12 +298,9 @@ class CheckpointPosition(object): # it would not have the optimizer's `_create_or_restore_slot_variable` # method. elif hasattr(optimizer_object, ""_create_or_restore_slot_variable""): - optimizer_object._create_or_restore_slot_variable( # pylint: disable=protected-access - slot_variable_position=CheckpointPosition( - checkpoint=checkpoint, - proto_id=slot_restoration.slot_variable_id), - variable=trackable, - slot_name=slot_restoration.slot_name) + self._queue_slot_variable_for_restoration( + optimizer_object, trackable, slot_restoration.slot_variable_id, + slot_restoration.slot_name) return True # New assignment else: # The object was already mapped for this checkpoint load, which means @@ -486,6 +481,44 @@ class CheckpointPosition(object): return self._checkpoint.shape_map[serialized_tensor.checkpoint_key] return None + def _queue_slot_variable_for_restoration(self, optimizer_object, variable, + slot_variable_id, slot_name): + """"""Adds a slot variable onto the restoration queue. + + See comment on slot_restoration_tensor_saveables in + _CheckpointRestoreCoordinator.__init__ for more information. + + Args: + optimizer_object: Optimizer that owns the slot variable. + variable: Variable associated with the slot variable. + slot_variable_id: ID of the slot variable. + slot_name: Name of the slot variable. + """""" + slot_variable_position = CheckpointPosition( + checkpoint=self.checkpoint, proto_id=slot_variable_id) + # pylint: disable=protected-access + slot_variable = optimizer_object._create_or_restore_slot_variable( + slot_variable_position=slot_variable_position, + variable=variable, + slot_name=slot_name) + # pylint: enable=protected-access + if slot_variable is None: + # The optimizer returns None if the restore should not be done (yet). + return + slot_variable_position.checkpoint.object_by_proto_id[ + slot_variable_id] = slot_variable + # pylint: disable=protected-access + slot_variable._maybe_initialize_trackable() + slot_variable._self_update_uid = self.checkpoint.restore_uid + # pylint: enable=protected-access + # Since this is a slot variable, there will be no new python_saveables, so + # ignore that return value. + new_restore_ops, new_tensor_saveables, _ = ( + slot_variable_position.gather_ops_or_named_saveables()) + self.checkpoint.new_restore_ops(new_restore_ops) + self.checkpoint.slot_restoration_tensor_saveables.update( + new_tensor_saveables) + _DeferredSlotVariableRestoration = collections.namedtuple( ""_DeferredSlotVariableRestoration"", [ @@ -983,6 +1016,20 @@ class Trackable(object): restore_ops.extend( current_position.checkpoint.restore_saveables( tensor_saveables, python_saveables)) + # It is faster to restore slot variables separately because the file reader + # (BundleReader) assumes that variables are stored on disk in alphabetical + # order. However, slot variables are stored in their own groups after other + # variables, and while each group is alphabetically sorted, merging them + # into 1 read would cause lots of back and forth seeking, e.g. + # variable/1 @ offset 0, + # variable/1/slot/1 @ offset 100, + # variable/1/slot/2 @ offset 200, + # variable/2 @ offset 1, + # variable/2/slot/1 @ offset 101, ... + restore_ops.extend( + current_position.checkpoint.restore_saveables( + current_position.checkpoint.slot_restoration_tensor_saveables, [])) + current_position.checkpoint.slot_restoration_tensor_saveables.clear() return restore_ops def _single_restoration_from_checkpoint_position(self, checkpoint_position, ",0,train f1d8f8d1501af92b0fe1f1d37398b4dee6a4e7e2,tensorflow/tensorflow,"Collect slot variable restorations in a queue and restore them as a batch. PiperOrigin-RevId: 399236012 Change-Id: Ia89a1037fdaae45fc48cb17a56e99cd4125e9054",util.py,"@@ -272,6 +272,15 @@ class _CheckpointRestoreCoordinator(object): optimizer_id=node_index, slot_variable_id=slot_reference.slot_variable_node_id, slot_name=slot_reference.slot_name)) + # Dictionary of tensor_saveables for slot_restorations that were not shifted + # over to deferred_slot_restorations when the variable is created/tracked. + # + # These saveables are restored, along with other (non-slot) variables, in a + # batch after collecting all child CheckpointPositions. Doing slot variable + # restorations in a batch results in more efficient (fewer) file operations. + # This efficiency is particularly significant when restoring from + # network-based file systems. + self.slot_restoration_tensor_saveables = {} self._deleter = _CheckpointRestoreCoordinatorDeleter( self.expect_partial_attr, ",0,train 7aedf28c704d3fdfe22b8563ede09677f8c92585,tensorflow/tensorflow,"Prevent overwriting from SavedModel builder, if the export-directory already exists. Change: 137312424",builder.py,"@@ -86,8 +86,12 @@ class SavedModelBuilder(object): constants.SAVED_MODEL_SCHEMA_VERSION) self._export_dir = export_dir - if not file_io.file_exists(export_dir): - file_io.recursive_create_dir(self._export_dir) + if file_io.file_exists(export_dir): + raise AssertionError( + ""Export directory already exists. Please specify a different export "" + ""directory."") + + file_io.recursive_create_dir(self._export_dir) # Boolean to track whether variables and assets corresponding to the # SavedModel have been saved. Specifically, the first meta graph to be added ",0,test 7aedf28c704d3fdfe22b8563ede09677f8c92585,tensorflow/tensorflow,"Prevent overwriting from SavedModel builder, if the export-directory already exists. Change: 137312424",saved_model_test.py,"@@ -198,6 +198,29 @@ class SavedModelTest(tf.test.TestCase): self.assertRaises(errors.NotFoundError, loader.load, sess, [""baz""], export_dir) + def testNoOverwrite(self): + export_dir = os.path.join(tf.test.get_temp_dir(), ""test_no_overwrite"") + builder = saved_model_builder.SavedModelBuilder(export_dir) + + # Graph with a single variable. SavedModel invoked to: + # - add with weights. + with self.test_session(graph=tf.Graph()) as sess: + self._init_and_validate_variable(sess, ""v"", 42) + builder.add_meta_graph_and_variables(sess, [""foo""]) + + # Save the SavedModel to disk in text format. + builder.save(as_text=True) + + # Restore the graph with tag ""foo"", whose variables were saved. + with self.test_session(graph=tf.Graph()) as sess: + loader.load(sess, [""foo""], export_dir) + self.assertEqual(42, tf.get_collection(tf.GraphKeys.VARIABLES)[0].eval()) + + # An attempt to create another builder with the same export directory should + # result in an assertion error. + self.assertRaises(AssertionError, saved_model_builder.SavedModelBuilder, + export_dir) + def testSaveAsText(self): export_dir = os.path.join(tf.test.get_temp_dir(), ""test_astext"") builder = saved_model_builder.SavedModelBuilder(export_dir) ",0,test 1fe68ce0f4f7ef020cc52d1cc9963dd344fccba0,tensorflow/tensorflow,"internal change PiperOrigin-RevId: 190789794",resource_mgr.h,"@@ -319,14 +319,13 @@ class IsResourceInitialized : public OpKernel { // specified type. The type will be a part of the generated op name. // TODO(apassos): figure out how to get non-cpu-allocated tensors to work // through constant folding so this doesn't have to be marked as stateful. -#define REGISTER_RESOURCE_HANDLE_OP(Type) \ - REGISTER_OP(#Type ""HandleOp"") \ - .Attr(""container: string = ''"") \ - .Attr(""shared_name: string = ''"") \ - .Output(""resource: resource"") \ - .SetIsStateful() \ - .SetShapeFn(tensorflow::shape_inference::ScalarShape) \ - .Doc(""Creates a handle to a "" #Type) +#define REGISTER_RESOURCE_HANDLE_OP(Type) \ + REGISTER_OP(#Type ""HandleOp"") \ + .Attr(""container: string = ''"") \ + .Attr(""shared_name: string = ''"") \ + .Output(""resource: resource"") \ + .SetIsStateful() \ + .SetShapeFn(tensorflow::shape_inference::ScalarShape) // Utility op kernel to produce a handle to a resource of type T. template ",0,train a6f9fd60cc1a16c56b78a8497530ba1351143c79,tensorflow/tensorflow,"Introduce TraceType for Iterator PiperOrigin-RevId: 404289211 Change-Id: I2996145d1af984ab6d28079e757a25654f5d06b5",iterator_ops.py,"@@ -34,7 +34,6 @@ from tensorflow.python.framework import type_spec from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.training.saver import BaseSaverBuilder from tensorflow.python.training.tracking import base as trackable -from tensorflow.python.types import trace from tensorflow.python.util import _pywrap_utils from tensorflow.python.util import deprecation from tensorflow.python.util import lazy_loader @@ -673,30 +672,7 @@ class IteratorBase(collections_abc.Iterator, trackable.Trackable, raise NotImplementedError(""Iterator.get_next_as_optional()"") -# TODO(b/202447704): Merge into IteratorSpec. -class IteratorType(trace.TraceType): - """"""Represents Iterators (and specs) for function tracing purposes."""""" - - def __init__(self, spec, local_id): - self._components = (spec, local_id) - - def is_subtype_of(self, other): - # TODO(b/202429845): Implement for subtyping. - return self == other - - def most_specific_common_supertype(self, others): - # TODO(b/202430155) Implement for shape relaxation. - return None - - def __hash__(self) -> int: - return hash(self._components) - - def __eq__(self, other) -> bool: - return isinstance( - other, IteratorType) and self._components == other._components - - -class OwnedIterator(IteratorBase, trace.SupportsTracingType): +class OwnedIterator(IteratorBase): """"""An iterator producing tf.Tensor objects from a tf.data.Dataset. The iterator resource created through `OwnedIterator` is owned by the Python @@ -900,14 +876,9 @@ class OwnedIterator(IteratorBase, trace.SupportsTracingType): return {""ITERATOR"": _saveable_factory} - def __tf_tracing_type__(self, tracing_context): - return IteratorType( - self._type_spec, - tracing_context.get_local_id(self._iterator_resource._id)) # pylint:disable=protected-access - @tf_export(""data.IteratorSpec"", v1=[]) -class IteratorSpec(type_spec.TypeSpec, trace.SupportsTracingType): +class IteratorSpec(type_spec.TypeSpec): """"""Type specification for `tf.data.Iterator`. For instance, `tf.data.IteratorSpec` can be used to define a tf.function that @@ -960,11 +931,6 @@ class IteratorSpec(type_spec.TypeSpec, trace.SupportsTracingType): def from_value(value): return IteratorSpec(value.element_spec) # pylint: disable=protected-access - def __tf_tracing_type__(self, tracing_context): - # TODO(b/202772221): Validate and enforce this assumption of uniqueness per - # spec instance. - return IteratorType(self, tracing_context.get_local_id(id(self))) - # TODO(b/71645805): Expose trackable stateful objects from dataset. class _IteratorSaveable(BaseSaverBuilder.SaveableObject): ",0,train a6f9fd60cc1a16c56b78a8497530ba1351143c79,tensorflow/tensorflow,"Introduce TraceType for Iterator PiperOrigin-RevId: 404289211 Change-Id: I2996145d1af984ab6d28079e757a25654f5d06b5",function_trace_type_test.py,"@@ -15,14 +15,10 @@ """"""Tests for function_trace_type."""""" import timeit -from absl.testing import parameterized - from tensorflow.python import keras -from tensorflow.python.data.ops import dataset_ops from tensorflow.python.eager import function from tensorflow.python.eager import function_trace_type -from tensorflow.python.framework import combinations from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_spec from tensorflow.python.ops import array_ops @@ -33,24 +29,8 @@ from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.platform import test -class CacheKeyGenerationTest(test.TestCase, parameterized.TestCase): - - @combinations.generate(combinations.combine(mode=['eager'])) - def testIteratorAliasing(self): - it1 = iter(dataset_ops.DatasetV2.from_tensor_slices([1, 2, 3])) - it2 = iter(dataset_ops.DatasetV2.from_tensor_slices([1, 2, 3])) - - self.assertEqual( - function_trace_type.get_arg_spec((it1, it1), False, False, True), - function_trace_type.get_arg_spec((it2, it2), False, False, True)) - self.assertEqual( - function_trace_type.get_arg_spec((it1, it2), False, False, True), - function_trace_type.get_arg_spec((it2, it1), False, False, True)) - self.assertNotEqual( - function_trace_type.get_arg_spec((it1, it1), False, False, True), - function_trace_type.get_arg_spec((it1, it2), False, False, True)) +class CacheKeyGenerationTest(test.TestCase): - @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testCompositeAndSpec(self): composite_tensor = ragged_tensor.RaggedTensor.from_row_splits( values=[1, 2, 3], row_splits=[0, 2, 3]) @@ -60,7 +40,6 @@ class CacheKeyGenerationTest(test.TestCase, parameterized.TestCase): function_trace_type.get_arg_spec(composite_tensor, False, False, True), function_trace_type.get_arg_spec(spec, False, False, True)) - @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testVariableAliasing(self): v1 = resource_variable_ops.ResourceVariable([1]) v2 = resource_variable_ops.ResourceVariable([1]) @@ -80,7 +59,6 @@ class CacheKeyGenerationTest(test.TestCase, parameterized.TestCase): self.assertEqual(all_unique, all_unique_again) self.assertEqual(all_same, all_same_again) - @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testTensorEquality(self): context = function_trace_type.SignatureContext() tensor_a = array_ops.zeros([11, 3, 5], @@ -97,7 +75,6 @@ class CacheKeyGenerationTest(test.TestCase, parameterized.TestCase): self.assertNotEqual(tensor_b, tensor_c) self.assertEqual(tensor_a, tensor_d) - @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testTensorAndSpecEquality(self): context = function_trace_type.SignatureContext() tensor = array_ops.zeros([11, 3, 5], @@ -110,7 +87,6 @@ class CacheKeyGenerationTest(test.TestCase, parameterized.TestCase): self.assertEqual(tensor, spec) self.assertNotEqual(tensor, spec_with_name) - @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testTupleEquality(self): trace_a = function_trace_type.get_arg_spec((1, 2, 3, 4), False, False, True) trace_b = function_trace_type.get_arg_spec((1, 2, 2, 4), False, False, True) @@ -122,7 +98,6 @@ class CacheKeyGenerationTest(test.TestCase, parameterized.TestCase): self.assertNotEqual(trace_b, trace_c) self.assertEqual(trace_a, trace_d) - @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testListEquality(self): trace_a = function_trace_type.get_arg_spec([1, 2, 3, 4], False, False, True) trace_b = function_trace_type.get_arg_spec([1, 2, 2, 4], False, False, True) @@ -134,7 +109,6 @@ class CacheKeyGenerationTest(test.TestCase, parameterized.TestCase): self.assertNotEqual(trace_b, trace_c) self.assertEqual(trace_a, trace_d) - @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testDictEquality(self): trace_a = function_trace_type.get_arg_spec({1: 2, 3: 4}, False, False, True) trace_b = function_trace_type.get_arg_spec({1: 2, 3: 2}, False, False, True) @@ -146,7 +120,6 @@ class CacheKeyGenerationTest(test.TestCase, parameterized.TestCase): self.assertNotEqual(trace_b, trace_c) self.assertEqual(trace_a, trace_d) - @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testComplexStruct(self): struct = {(1, 2, 3): {(1, 2): {12: 2}}, (3, 2, 3): (2, {2: 3})} trace_a = function_trace_type.get_arg_spec(struct, False, False, True) ",0,train a6f9fd60cc1a16c56b78a8497530ba1351143c79,tensorflow/tensorflow,"Introduce TraceType for Iterator PiperOrigin-RevId: 404289211 Change-Id: I2996145d1af984ab6d28079e757a25654f5d06b5",trace.py,"@@ -77,7 +77,6 @@ class SupportsTracingType(Protocol): classes according to the behaviour specified by their TraceType. """""" + @abc.abstractmethod def __tf_tracing_type__(self, context: TracingContext) -> TraceType: - raise NotImplementedError( - ""Class inheriting SupportsTracingType must implement __tf_tracing_type__"" - ) + pass ",0,train 14f5e78832d78b5bab6803b01156a17c1e9482b9,tensorflow/tensorflow,"Make the no-cloning distribution codepath a default except for graph mode or TPU. At this point we have verified performance and accuracy on a couple of models in addition to all unit-tests passing. PiperOrigin-RevId: 247611564",distribute_strategy_test.py,"@@ -1630,12 +1630,12 @@ class TestDistributionStrategyWithKerasModels(test.TestCase, x = np.ones((64, 10)).astype('float32') model = _make_model_with_add_loss() - model.compile('sgd', cloning=cloning) + model.compile('sgd') history = model.fit(x, steps_per_epoch=2, epochs=1) with distribution.scope(): ds_model = _make_model_with_add_loss() - ds_model.compile('sgd') + ds_model.compile('sgd', cloning=cloning) ds_history = ds_model.fit(x, steps_per_epoch=2, epochs=1) self.assertAllClose(history.history, ds_history.history) ",0,test 14f5e78832d78b5bab6803b01156a17c1e9482b9,tensorflow/tensorflow,"Make the no-cloning distribution codepath a default except for graph mode or TPU. At this point we have verified performance and accuracy on a couple of models in addition to all unit-tests passing. PiperOrigin-RevId: 247611564",distributed_training_utils.py,"@@ -618,7 +618,8 @@ def is_distributing_by_cloning(model): True if the `model` is going to be distributed using cloning and False otherwise. """""" - return (model._cloning or not context.executing_eagerly() or + return (model._cloning or model._compile_distribution or + not context.executing_eagerly() or K.is_tpu_strategy(model._distribution_strategy)) ",0,test 14f5e78832d78b5bab6803b01156a17c1e9482b9,tensorflow/tensorflow,"Make the no-cloning distribution codepath a default except for graph mode or TPU. At this point we have verified performance and accuracy on a couple of models in addition to all unit-tests passing. PiperOrigin-RevId: 247611564",training.py,"@@ -249,7 +249,7 @@ class Model(network.Network): # cloning is requested. # TODO(b/124517980, b/124377929): Remove this temporary undocumented way # of enabling the feature and graduate it to the main distributed code path. - self._cloning = kwargs.pop('cloning', True) + self._cloning = kwargs.pop('cloning', False) self._validate_compile_param_for_distribution_strategy(self.run_eagerly, sample_weight_mode, ",0,test 8802516b56e190cba5846f7b7dfca7a0902bcf03,tensorflow/tensorflow,"Integrate LLVM at https://github.com/llvm/llvm-project/commit/2501e86acda2 PiperOrigin-RevId: 319196952 Change-Id: I078a64a0b84eb9cd8f3c5d277ad30c943b58fd1c",lhlo_legalize_to_llvm.cc,"@@ -128,8 +128,10 @@ struct DynamicMemRefCastOpConverter void PopulateLhloToLLVMConversionPatterns(LLVMTypeConverter *converter, OwningRewritePatternList *patterns) { - patterns->insert( - *converter); + // TODO(b/160227541): Re-enable LHLO->LLVM lowering. + // patterns->insert( + // *converter); } } // namespace xla_lhlo ",0,train 71061b5dccc00b13f2d67144117fdd254797af38,tensorflow/tensorflow,Fixed typo,crop_and_resize_op_gpu.cu.cc,"@@ -411,7 +411,7 @@ struct CropAndResizeBackpropImage { d.stream(), config.virtual_thread_count, grads_image.data())); } - // Configurate interpolation method. + // Configure interpolation method. InterpolationMethod method = BILINEAR; if (method_name == ""nearest"") { method = NEAREST; ",0,test 3265790fde0b78769b5906909db235464377f6f8,tensorflow/tensorflow,"Support CompositeTensor in functional If/While We need to repack the return of the if/else branches to ensure any CompositeTensors are returned with the correct type. For functional while we need to be sure to unpack and repack when handling the body. PiperOrigin-RevId: 333190256 Change-Id: If9a9449a10616afa3dd79e39b1e66ee0ee571a9e",functional_ops.py,"@@ -838,28 +838,14 @@ def If(cond, inputs, then_branch, else_branch, name=None): or else_branch(inputs). """""" # pylint: disable=protected-access - # Handle the Defun case until users have transitioned to tf.function. Note - # that composites may need to be re-packed by the caller. if isinstance(then_branch, function._DefinedFunction): tlist = [_.type for _ in then_branch.definition.signature.output_arg] - return gen_functional_ops._if( - cond, inputs, tlist, then_branch, else_branch, name=name) - - # We assume that `then_branch` is a ConcreteFunction here. - then_out = then_branch.structured_outputs - else_out = else_branch.structured_outputs - - # Ensure then/else are the same type of composites to avoid an invalid call - # to pack_sequence_as later on. - nest.assert_same_structure(then_out, else_out, expand_composites=True) - - tlist = nest.flatten(then_branch.output_dtypes) - ret = gen_functional_ops._if( + else: + # We assume that `then_branch` is a ConcreteFunction here. + tlist = nest.flatten(then_branch.output_dtypes) + return gen_functional_ops._if( cond, inputs, tlist, then_branch, else_branch, name=name) - # Re-pack the outputs to restore any CompositeTensors - return nest.pack_sequence_as(then_out, ret, expand_composites=True) - def Gradient(inputs, f, name=None): r""""""Computes the gradient function for function f via backpropagation. @@ -978,8 +964,7 @@ def While(input_, cond, body, name=None, hostmem=None): # Slice off the loop-carried captured inputs. ret = ret[:-len(body.captured_inputs)] else: - ret = gen_functional_ops._while( - nest.flatten(input_, expand_composites=True), cond, body, name=name) + ret = gen_functional_ops._while(input_, cond, body, name=name) if hostmem: input_attr = attr_value_pb2.AttrValue() input_attr.list.i.extend(hostmem) @@ -988,14 +973,7 @@ def While(input_, cond, body, name=None, hostmem=None): output_attr = attr_value_pb2.AttrValue() output_attr.list.i.extend(hostmem) ret[0].op._set_attr(""_output_hostmem"", output_attr) # pylint: disable=protected-access - - # Handle the Defun case until users have transitioned to tf.function. Note - # that composites may need to be re-packed by the caller. - if isinstance(body, function._DefinedFunction): - return ret - - return nest.pack_sequence_as( - body.structured_outputs, ret, expand_composites=True) + return ret # b/36459430 ",0,train 3265790fde0b78769b5906909db235464377f6f8,tensorflow/tensorflow,"Support CompositeTensor in functional If/While We need to repack the return of the if/else branches to ensure any CompositeTensors are returned with the correct type. For functional while we need to be sure to unpack and repack when handling the body. PiperOrigin-RevId: 333190256 Change-Id: If9a9449a10616afa3dd79e39b1e66ee0ee571a9e",functional_ops_test.py,"@@ -19,30 +19,28 @@ from __future__ import division from __future__ import print_function from tensorflow.python.eager import def_function -from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import function -from tensorflow.python.framework import ops -from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_spec +from tensorflow.python.framework import test_util from tensorflow.python.ops import functional_ops -from tensorflow.python.ops import math_ops from tensorflow.python.platform import test class FunctionalOpsTest(test.TestCase): + @test_util.deprecated_graph_mode_only def testIfWithDefun(self): - # Defun should only be used in graph mode - with ops.Graph().as_default(): - @function.Defun(dtypes.float32) - def Then(x): - return x + 1 - @function.Defun(dtypes.float32) - def Else(x): - return x - 1 + @function.Defun(dtypes.float32) + def Then(x): + return x + 1 + + @function.Defun(dtypes.float32) + def Else(x): + return x - 1 + with self.cached_session(): inputs = [10.] result = self.evaluate(functional_ops.If(False, inputs, Then, Else)) self.assertEqual([9.0], result) @@ -59,78 +57,12 @@ class FunctionalOpsTest(test.TestCase): def Else(x): return x - 1 - inputs = [10.] - then_cf = Then.get_concrete_function() - else_cf = Else.get_concrete_function() - result = self.evaluate(functional_ops.If(False, inputs, then_cf, else_cf)) - self.assertEqual([9.0], result) - - def testIfWithFunctionComposite(self): - - signature = [tensor_spec.TensorSpec([], dtypes.float32)] - @def_function.function(input_signature=signature) - def Then(x): - return sparse_tensor.SparseTensor([[0]], [x + 1], [1]) - - @def_function.function(input_signature=signature) - def Else(x): - return sparse_tensor.SparseTensor([[0]], [x - 1], [1]) - - inputs = [10.] - then_cf = Then.get_concrete_function() - else_cf = Else.get_concrete_function() - result = functional_ops.If(False, inputs, then_cf, else_cf) - self.assertIsInstance(result, sparse_tensor.SparseTensor) - self.assertAllEqual([9.0], result.values) - - def testWhileWithDefun(self): - # Defun should only be used in graph mode - with ops.Graph().as_default(): - @function.Defun(dtypes.int32) - def Body(n): - return n - 1 - - @function.Defun(dtypes.int32) - def Cond(n): - return math_ops.reduce_min(n) > 0 - - n = constant_op.constant([2]) - result = self.evaluate(functional_ops.While([n], Cond, Body)) - self.assertAllEqual([[0]], result) - - def testWhileWithFunction(self): - - @def_function.function - def Body(n): - return n - 1 - - @def_function.function - def Cond(n): - return math_ops.reduce_min(n) > 0 - - n = constant_op.constant([2]) - cond_cf = Cond.get_concrete_function(n) - body_cf = Body.get_concrete_function(n) - result = functional_ops.While([n], cond_cf, body_cf) - self.assertAllEqual([0], result) - - def testWhileWithFunctionComposite(self): - - @def_function.function - def Body(n): - return sparse_tensor.SparseTensor([[0]], n.values - 1, [1]) - - @def_function.function - def Cond(n): - return math_ops.reduce_min(n.values) > 0 - - n = constant_op.constant([2]) - n = sparse_tensor.SparseTensor([[0]], [2], [1]) - cond_cf = Cond.get_concrete_function(n) - body_cf = Body.get_concrete_function(n) - result = functional_ops.While([n], cond_cf, body_cf) - self.assertIsInstance(result, sparse_tensor.SparseTensor) - self.assertAllEqual([0], result.values) + with self.cached_session(): + inputs = [10.] + result = self.evaluate( + functional_ops.If(False, inputs, Then.get_concrete_function(), + Else.get_concrete_function())) + self.assertEqual([9.0], result) if __name__ == '__main__': ",0,train 3e53570d3bf518ec2b6cfeed4b5fd57d11370289,tensorflow/tensorflow,fix #14542 a bug of model_to_dot() (#14553),vis_utils.py,"@@ -120,7 +120,7 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True, rankdir='TB'): layer_id = str(id(layer)) for i, node in enumerate(layer._inbound_nodes): # pylint: disable=protected-access node_key = layer.name + '_ib-' + str(i) - if node_key in model.container_nodes: + if node_key in model._network_nodes: # pylint: disable=protected-access for inbound_layer in node.inbound_layers: inbound_layer_id = str(id(inbound_layer)) layer_id = str(id(layer)) ",0,train dac3cf87e6fd2fd80ebc05c1d21bec9ca992041d,tensorflow/tensorflow,"Remove unnecessary control dependencies. Change: 144392019",metrics_impl.py,"@@ -296,12 +296,11 @@ def mean(values, weights=None, metrics_collections=None, values = math_ops.multiply(values, weights) num_values = math_ops.reduce_sum(weights) - total_compute_op = state_ops.assign_add(total, math_ops.reduce_sum(values)) - count_compute_op = state_ops.assign_add(count, num_values) + update_total_op = state_ops.assign_add(total, math_ops.reduce_sum(values)) + update_count_op = state_ops.assign_add(count, num_values) mean_t = _safe_div(total, count, 'value') - with ops.control_dependencies([total_compute_op, count_compute_op]): - update_op = _safe_div(total, count, 'update_op') + update_op = _safe_div(update_total_op, update_count_op, 'update_op') if metrics_collections: ops.add_to_collections(metrics_collections, mean_t) @@ -1007,8 +1006,8 @@ def mean_tensor(values, weights=None, metrics_collections=None, values = math_ops.multiply(values, weights) num_values = math_ops.multiply(num_values, weights) - total_compute_op = state_ops.assign_add(total, values) - count_compute_op = state_ops.assign_add(count, num_values) + update_total_op = state_ops.assign_add(total, values) + update_count_op = state_ops.assign_add(count, num_values) def compute_mean(total, count, name): non_zero_count = math_ops.maximum(count, @@ -1017,8 +1016,7 @@ def mean_tensor(values, weights=None, metrics_collections=None, return math_ops.truediv(total, non_zero_count, name=name) mean_t = compute_mean(total, count, 'value') - with ops.control_dependencies([total_compute_op, count_compute_op]): - update_op = compute_mean(total, count, 'update_op') + update_op = compute_mean(update_total_op, update_count_op, 'update_op') if metrics_collections: ops.add_to_collections(metrics_collections, mean_t) @@ -1271,17 +1269,16 @@ def precision(labels, predictions, weights=None, labels, predictions, weights, metrics_collections=None, updates_collections=None, name=None) - def compute_precision(name): + def compute_precision(tp, fp, name): return array_ops.where( - math_ops.greater(true_p + false_p, 0), - math_ops.div(true_p, true_p + false_p), + math_ops.greater(tp + fp, 0), + math_ops.div(tp, tp + fp), 0, name) - p = compute_precision('value') - with ops.control_dependencies([true_positives_update_op, - false_positives_update_op]): - update_op = compute_precision('update_op') + p = compute_precision(true_p, false_p, 'value') + update_op = compute_precision( + true_positives_update_op, false_positives_update_op, 'update_op') if metrics_collections: ops.add_to_collections(metrics_collections, p) @@ -1342,17 +1339,15 @@ def precision_at_thresholds(labels, predictions, thresholds, (predictions, labels, weights)): values, update_ops = _confusion_matrix_at_thresholds( labels, predictions, thresholds, weights, includes=('tp', 'fp')) - tp = values['tp'] - fp = values['fp'] # Avoid division by zero. epsilon = 1e-7 - def compute_precision(name): + def compute_precision(tp, fp, name): return math_ops.div(tp, epsilon + tp + fp, name='precision_' + name) - prec = compute_precision('value') - with ops.control_dependencies(update_ops.values()): - update_op = compute_precision('update_op') + prec = compute_precision(values['tp'], values['fp'], 'value') + update_op = compute_precision( + update_ops['tp'], update_ops['fp'], 'update_op') if metrics_collections: ops.add_to_collections(metrics_collections, prec) @@ -1469,9 +1464,8 @@ def recall(labels, predictions, weights=None, name) rec = compute_recall(true_p, false_n, 'value') - with ops.control_dependencies([true_positives_update_op, - false_negatives_update_op]): - update_op = compute_recall(true_p, false_n, 'update_op') + update_op = compute_recall( + true_positives_update_op, false_negatives_update_op, 'update_op') if metrics_collections: ops.add_to_collections(metrics_collections, rec) @@ -1881,17 +1875,14 @@ def recall_at_thresholds(labels, predictions, thresholds, (predictions, labels, weights)): values, update_ops = _confusion_matrix_at_thresholds( labels, predictions, thresholds, weights, includes=('tp', 'fn')) - tp = values['tp'] - fn = values['fn'] # Avoid division by zero. epsilon = 1e-7 - def compute_recall(name): + def compute_recall(tp, fn, name): return math_ops.div(tp, epsilon + tp + fn, name='recall_' + name) - rec = compute_recall('value') - with ops.control_dependencies(update_ops.values()): - update_op = compute_recall('update_op') + rec = compute_recall(values['tp'], values['fn'], 'value') + update_op = compute_recall(update_ops['tp'], update_ops['fn'], 'update_op') if metrics_collections: ops.add_to_collections(metrics_collections, rec) @@ -1951,21 +1942,20 @@ def root_mean_squared_error(labels, predictions, weights=None, labels, predictions, weights = _remove_squeezable_dimensions( labels, predictions, weights) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - value_tensor, update_op = mean_squared_error( + mse, update_mse_op = mean_squared_error( labels, predictions, weights, None, None, name or 'root_mean_squared_error') - rmse = math_ops.sqrt(value_tensor) - with ops.control_dependencies([update_op]): - update_op = math_ops.sqrt(update_op) + rmse = math_ops.sqrt(mse) + update_rmse_op = math_ops.sqrt(update_mse_op) if metrics_collections: ops.add_to_collections(metrics_collections, rmse) if updates_collections: - ops.add_to_collections(updates_collections, update_op) + ops.add_to_collections(updates_collections, update_rmse_op) - return rmse, update_op + return rmse, update_rmse_op def sensitivity_at_specificity( @@ -2031,12 +2021,8 @@ def sensitivity_at_specificity( values, update_ops = _confusion_matrix_at_thresholds( labels, predictions, thresholds, weights) - tp = values['tp'] - fn = values['fn'] - tn = values['tn'] - fp = values['fp'] - def compute_sensitivity_at_specificity(name): + def compute_sensitivity_at_specificity(tp, tn, fp, fn, name): specificities = math_ops.div(tn, tn + fp + kepsilon) tf_index = math_ops.argmin(math_ops.abs(specificities - specificity), 0) tf_index = math_ops.cast(tf_index, dtypes.int32) @@ -2046,9 +2032,11 @@ def sensitivity_at_specificity( tp[tf_index] + fn[tf_index] + kepsilon, name) - sensitivity = compute_sensitivity_at_specificity('value') - with ops.control_dependencies(update_ops.values()): - update_op = compute_sensitivity_at_specificity('update_op') + sensitivity = compute_sensitivity_at_specificity( + values['tp'], values['tn'], values['fp'], values['fn'], 'value') + update_op = compute_sensitivity_at_specificity( + update_ops['tp'], update_ops['tn'], update_ops['fp'], update_ops['fn'], + 'update_op') if metrics_collections: ops.add_to_collections(metrics_collections, sensitivity) @@ -2595,15 +2583,15 @@ def specificity_at_sensitivity( values, update_ops = _confusion_matrix_at_thresholds( labels, predictions, thresholds, weights) - tp = values['tp'] - fn = values['fn'] - tn = values['tn'] - fp = values['fp'] - def compute_specificity_at_sensitivity(name): + def compute_specificity_at_sensitivity(tp, tn, fp, fn, name): """"""Computes the specificity at the given sensitivity. Args: + tp: True positives. + tn: True negatives. + fp: False positives. + fn: False negatives. name: The name of the operation. Returns: @@ -2626,9 +2614,11 @@ def specificity_at_sensitivity( tn[tf_index] + fp[tf_index] + kepsilon, name) - specificity = compute_specificity_at_sensitivity('value') - with ops.control_dependencies(update_ops.values()): - update_op = compute_specificity_at_sensitivity('update_op') + specificity = compute_specificity_at_sensitivity( + values['tp'], values['tn'], values['fp'], values['fn'], 'value') + update_op = compute_specificity_at_sensitivity( + update_ops['tp'], update_ops['tn'], update_ops['fp'], update_ops['fn'], + 'update_op') if metrics_collections: ops.add_to_collections(metrics_collections, specificity) ",0,train 0a935589b9395b8275fa2ecd9fec2408a57d1b24,tensorflow/tensorflow,"Use optimized ArgMax to replace partial ArgSort when the number of category is one. PiperOrigin-RevId: 381393443 Change-Id: I2f0a47184d374543124b137b81741857e783bdeb",detection_postprocess.cc,"@@ -364,10 +364,14 @@ TfLiteStatus DecodeCenterSizeBoxes(TfLiteContext* context, TfLiteNode* node, void DecreasingPartialArgSort(const float* values, int num_values, int num_to_sort, int* indices) { - std::iota(indices, indices + num_values, 0); - std::partial_sort( - indices, indices + num_to_sort, indices + num_values, - [&values](const int i, const int j) { return values[i] > values[j]; }); + if (num_to_sort == 1) { + indices[0] = optimized_ops::ArgMaxVector(values, num_values); + } else { + std::iota(indices, indices + num_values, 0); + std::partial_sort( + indices, indices + num_to_sort, indices + num_values, + [&values](const int i, const int j) { return values[i] > values[j]; }); + } } void DecreasingArgSort(const float* values, int num_values, int* indices) { ",0,train 379d9a71d36be8728bf906c0af8d5519eeaa23cb,tensorflow/tensorflow,updates test function for new shuffle error type and message,numpy_io_test.py,"@@ -286,8 +286,9 @@ class NumpyIoTest(test.TestCase): x = np.arange(32, 36) y = np.arange(4) with self.test_session(): - with self.assertRaisesRegexp(TypeError, - 'shuffle must be explicitly set as boolean'): + with self.assertRaisesRegexp(ValueError, + 'shuffle must be provided and explicitly ' + 'set as boolean'): # Default shuffle is None. numpy_io.numpy_input_fn(x, y) ",0,test 379d9a71d36be8728bf906c0af8d5519eeaa23cb,tensorflow/tensorflow,updates test function for new shuffle error type and message,pandas_io_test.py,"@@ -70,8 +70,9 @@ class PandasIoTest(test.TestCase): return x, _ = self.makeTestDataFrame() y_noindex = pd.Series(np.arange(-32, -28)) - with self.assertRaisesRegexp(TypeError, - 'shuffle must be explicitly set as boolean'): + with self.assertRaisesRegexp(ValueError, + 'shuffle must be provided and explicitly ' + 'set as boolean'): # Default shuffle is None pandas_io.pandas_input_fn(x, y_noindex) ",0,test 14993b909aaa53f2713e234b3ad3a35aff4739e8,tensorflow/tensorflow,"rm duplicated implements for GrpcRemoteMaster (#12313) * rm duplicated implements for GrpcRemoteMaster * remove all duplicated implements * format for code style * typedef for MasterServiceStub",grpc_remote_master.cc,"@@ -32,6 +32,8 @@ namespace tensorflow { // GrpcRemoteMaster is an implementation of the MasterInterface // that uses gRPC to talk to the Master service. class GrpcRemoteMaster : public MasterInterface { + using MasterServiceStub = grpc::MasterService::Stub; + public: explicit GrpcRemoteMaster(const SharedGrpcChannelPtr& client_channel) : stub_(grpc::MasterService::NewStub(client_channel)) {} @@ -42,63 +44,56 @@ class GrpcRemoteMaster : public MasterInterface { const CreateSessionRequest* request, CreateSessionResponse* response) override { ::grpc::ClientContext ctx; - ctx.set_fail_fast(false); - SetDeadline(&ctx, call_options->GetTimeout()); - return FromGrpcStatus(stub_->CreateSession(&ctx, *request, response)); + return Call(&ctx, call_options, request, response, + &MasterServiceStub::CreateSession); } Status ExtendSession(CallOptions* call_options, const ExtendSessionRequest* request, ExtendSessionResponse* response) override { ::grpc::ClientContext ctx; - ctx.set_fail_fast(false); - SetDeadline(&ctx, call_options->GetTimeout()); - return FromGrpcStatus(stub_->ExtendSession(&ctx, *request, response)); + return Call(&ctx, call_options, request, response, + &MasterServiceStub::ExtendSession); } Status PartialRunSetup(CallOptions* call_options, const PartialRunSetupRequest* request, PartialRunSetupResponse* response) override { ::grpc::ClientContext ctx; - ctx.set_fail_fast(false); - SetDeadline(&ctx, call_options->GetTimeout()); - return FromGrpcStatus(stub_->PartialRunSetup(&ctx, *request, response)); + return Call(&ctx, call_options, request, response, + &MasterServiceStub::PartialRunSetup); } Status RunStep(CallOptions* call_options, RunStepRequestWrapper* request, MutableRunStepResponseWrapper* response) override { ::grpc::ClientContext ctx; auto trace = TraceRpc(""RunStep/Client"", &ctx); - ctx.set_fail_fast(false); - SetDeadline(&ctx, call_options->GetTimeout()); - return FromGrpcStatus(stub_->RunStep(&ctx, request->ToProto(), - get_proto_from_wrapper(response))); + return Call(&ctx, call_options, &request->ToProto(), + get_proto_from_wrapper(response), + &MasterServiceStub::RunStep); } Status CloseSession(CallOptions* call_options, const CloseSessionRequest* request, CloseSessionResponse* response) override { ::grpc::ClientContext ctx; - ctx.set_fail_fast(false); - SetDeadline(&ctx, call_options->GetTimeout()); - return FromGrpcStatus(stub_->CloseSession(&ctx, *request, response)); + return Call(&ctx, call_options, request, response, + &MasterServiceStub::CloseSession); } Status ListDevices(CallOptions* call_options, const ListDevicesRequest* request, ListDevicesResponse* response) override { ::grpc::ClientContext ctx; - ctx.set_fail_fast(false); - SetDeadline(&ctx, call_options->GetTimeout()); - return FromGrpcStatus(stub_->ListDevices(&ctx, *request, response)); + return Call(&ctx, call_options, request, response, + &MasterServiceStub::ListDevices); } Status Reset(CallOptions* call_options, const ResetRequest* request, ResetResponse* response) override { ::grpc::ClientContext ctx; - ctx.set_fail_fast(false); - SetDeadline(&ctx, call_options->GetTimeout()); - return FromGrpcStatus(stub_->Reset(&ctx, *request, response)); + return Call(&ctx, call_options, request, response, + &MasterServiceStub::Reset); } private: @@ -110,13 +105,23 @@ class GrpcRemoteMaster : public MasterInterface { return port::Tracing::TraceMe(name, trace_id); } - std::unique_ptr stub_; - void SetDeadline(::grpc::ClientContext* ctx, int64 time_in_ms) { if (time_in_ms > 0) { ctx->set_deadline(gpr_time_from_millis(time_in_ms, GPR_TIMESPAN)); } } + + template + Status Call(::grpc::ClientContext* ctx, CallOptions* call_options, + const Request* request, Response* response, + ::grpc::Status (MasterServiceStub::*pfunc)( + ::grpc::ClientContext*, const Request&, Response*)) { + ctx->set_fail_fast(false); + SetDeadline(ctx, call_options->GetTimeout()); + return FromGrpcStatus((stub_.get()->*pfunc)(ctx, *request, response)); + } + + std::unique_ptr stub_; }; MasterInterface* NewGrpcMaster(const SharedGrpcChannelPtr& channel) { ",0,test 4f933f5b9ba6469fce4d5142246ce607edbbcb00,tensorflow/tensorflow,pooling ops build fix.,mkl_pooling_ops_common.cc,"@@ -180,7 +180,7 @@ void MklPoolingBwdPrimitive::Setup(const MklPoolingParams& bwdParams) { context_.alg_kind = bwdParams.alg_kind; // Create memory descriptor. - context_.diff_src_md.reset(new memory::desc( + context_.src_md.reset(new memory::desc( {bwdParams.src_dims}, MklDnnType(), MEMORY_FORMAT::any)); #ifndef ENABLE_MKLDNN_V1 context_.diff_dst_md.reset(new memory::desc( ",0,train e91d2e843f8ef2b35179706f8b1fe964a663a988,tensorflow/tensorflow,"Update GraphDef version to 823. PiperOrigin-RevId: 383993661 Change-Id: I5acb57ddde52ce83a167f09ce46cf153dca6565a",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 822 // Updated: 2021/7/9 +#define TF_GRAPH_DEF_VERSION 823 // Updated: 2021/7/10 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,train ce9b1295b5689129fe1a35ae75faf069b0fe24ca,tensorflow/tensorflow,fix,dlpack.cc,"@@ -323,7 +323,7 @@ TFE_TensorHandle* TFE_HandleFromDLPack(void* dlm, TF_Status* status, TFE_TensorHandle* handle = TFE_NewTensorHandleFromDeviceMemory( ctx, device_name.value().c_str(), dtype, dims, num_dims, data, - total_bytes, &DeallocatorWrapperFunc, &dlmt, status); + total_bytes, &DeallocatorWrapperFunc, dlmt, status); return handle; } ",0,train ce9b1295b5689129fe1a35ae75faf069b0fe24ca,tensorflow/tensorflow,fix,tfe_wrapper.cc,"@@ -1169,7 +1169,8 @@ PYBIND11_MODULE(_pywrap_tfe, m) { PyCapsule_SetName(pycapsule.ptr(), ""used_dltensor""); PyCapsule_SetDestructor(pycapsule.ptr(), nullptr); - PyObject* pyhandle = EagerTensorFromHandle(thandle, true); + + PyObject* pyhandle = EagerTensorFromHandle(thandle); return tensorflow::PyoOrThrow(pyhandle); }); ",0,train 21d699a745bd8225f38508658415bd2739bf500f,tensorflow/tensorflow,"Stop using static initializer in cpu utils, just removed initializing method Change: 133406514",android_armv7a_cpu_utils_helper.cc,"@@ -31,26 +31,6 @@ namespace profile_utils { /* static */ constexpr int AndroidArmV7ACpuUtilsHelper::INVALID_FD; -void AndroidArmV7ACpuUtilsHelper::Initialize() { - struct perf_event_attr pe; - - memset(&pe, 0, sizeof(struct perf_event_attr)); - pe.type = PERF_TYPE_HARDWARE; - pe.size = sizeof(struct perf_event_attr); - pe.config = PERF_COUNT_HW_CPU_CYCLES; - pe.disabled = 1; - pe.exclude_kernel = 1; - pe.exclude_hv = 1; - - fd_ = OpenPerfEvent(&pe, 0, -1, -1, 0); - if (fd_ == INVALID_FD) { - LOG(WARNING) << ""Error opening perf event""; - is_initialized_ = false; - } else { - is_initialized_ = true; - } -} - void AndroidArmV7ACpuUtilsHelper::ResetClockCycle() { if (!is_initialized_) { return; @@ -98,7 +78,6 @@ int AndroidArmV7ACpuUtilsHelper::OpenPerfEvent( namespace tensorflow { namespace profile_utils { -void AndroidArmV7ACpuUtilsHelper::Initialize() {} void AndroidArmV7ACpuUtilsHelper::ResetClockCycle() {} uint64 AndroidArmV7ACpuUtilsHelper::GetCurrentClockCycle() { return 1; } void AndroidArmV7ACpuUtilsHelper::EnableClockCycleProfiling(bool) {} ",0,train 21d699a745bd8225f38508658415bd2739bf500f,tensorflow/tensorflow,"Stop using static initializer in cpu utils, just removed initializing method Change: 133406514",android_armv7a_cpu_utils_helper.h,"@@ -27,7 +27,6 @@ namespace profile_utils { class AndroidArmV7ACpuUtilsHelper : public ICpuUtilsHelper { public: AndroidArmV7ACpuUtilsHelper() = default; - void Initialize() final; void ResetClockCycle() final; uint64 GetCurrentClockCycle() final; void EnableClockCycleProfiling(bool enable) final; ",0,train 21d699a745bd8225f38508658415bd2739bf500f,tensorflow/tensorflow,"Stop using static initializer in cpu utils, just removed initializing method Change: 133406514",cpu_utils.cc,"@@ -14,64 +14,53 @@ limitations under the License. ==============================================================================*/ #include ""tensorflow/core/platform/profile_utils/cpu_utils.h"" + +#include +#include + #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.h"" namespace tensorflow { namespace profile_utils { -namespace { - -const class StaticVariableInitializer { - public: - StaticVariableInitializer() { CpuUtils::Initialize(); } -} STATIC_VARIABLE_INITIALIZER; - -} // anonymous namespace for initializer - /* static */ constexpr int64 CpuUtils::INVALID_FREQUENCY; -/* static */ int64 CpuUtils::GetCpuFrequency() { - static const int64 cpu_frequency = GetCpuFrequencyImpl(); - return cpu_frequency; -} +static ICpuUtilsHelper* cpu_utils_helper_instance_ = nullptr; -/* static */ int CpuUtils::GetClockPerMicroSec() { - static const int clock_per_micro_sec = - static_cast(GetCpuFrequency() / (1000LL * 1000LL)); - return clock_per_micro_sec; +/* static */ int64 CpuUtils::GetCycleCounterFrequency() { + static const int64 cpu_frequency = GetCycleCounterFrequencyImpl(); + return cpu_frequency; } /* static */ double CpuUtils::GetMicroSecPerClock() { static const double micro_sec_per_clock = - (1000.0 * 1000.0) / static_cast(GetCpuFrequency()); + (1000.0 * 1000.0) / static_cast(GetCycleCounterFrequency()); return micro_sec_per_clock; } -/* static */ void CpuUtils::Initialize() { - CpuUtils::GetCpuFrequency(); - CpuUtils::GetClockPerMicroSec(); - CpuUtils::GetMicroSecPerClock(); - GetCpuUtilsHelper().Initialize(); -} - /* static */ void CpuUtils::ResetClockCycle() { - GetCpuUtilsHelper().ResetClockCycle(); + GetCpuUtilsHelperSingletonInstance().ResetClockCycle(); } /* static */ void CpuUtils::EnableClockCycleProfiling(const bool enable) { - GetCpuUtilsHelper().EnableClockCycleProfiling(enable); + GetCpuUtilsHelperSingletonInstance().EnableClockCycleProfiling(enable); } -/* static */ int64 CpuUtils::GetCpuFrequencyImpl() { -// TODO(satok): do not switch by macro here +/* static */ int64 CpuUtils::GetCycleCounterFrequencyImpl() { #if defined(__ANDROID__) - // TODO:(satok): Support Android + // TODO(satok): Support android return INVALID_FREQUENCY; #elif defined(__linux__) double bogomips; FILE* fp = popen(""grep '^bogomips' /proc/cpuinfo | head -1"", ""r""); + if (fp == nullptr) { + return INVALID_FREQUENCY; + } const int retval_of_bogomips = fscanf(fp, ""bogomips : %lf"", &bogomips); + if (retval_of_bogomips <= 0) { + return INVALID_FREQUENCY; + } pclose(fp); const double freq_ghz = bogomips / 1000.0 / 2.0; if (retval_of_bogomips != 1 || freq_ghz < 0.01) { @@ -83,7 +72,12 @@ const class StaticVariableInitializer { int64 freq_hz; FILE* fp = popen(""sysctl hw | grep hw.cpufrequency_max: | cut -d' ' -f 2"", ""r""); - fscanf(fp, ""%lld"", &freq_hz); + if (fp == nullptr) { + return INVALID_FREQUENCY; + } + if (fscanf(fp, ""%lld"", &freq_hz) != 1) { + return INVALID_FREQUENCY; + } pclose(fp); if (freq_hz < 1e6) { LOG(WARNING) << ""Failed to get CPU frequency: "" << freq_hz << "" Hz""; @@ -97,14 +91,19 @@ const class StaticVariableInitializer { #endif } -/* static */ ICpuUtilsHelper& CpuUtils::GetCpuUtilsHelper() { +/* static */ ICpuUtilsHelper& CpuUtils::GetCpuUtilsHelperSingletonInstance() { + static std::once_flag flag; + std::call_once(flag, []() { + if (cpu_utils_helper_instance_ != nullptr) { + LOG(FATAL) << ""cpu_utils_helper_instance_ is already instantiated.""; + } #if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21) - static AndroidArmV7ACpuUtilsHelper cpu_utils_helper; + cpu_utils_helper_instance_ = new AndroidArmV7ACpuUtilsHelper(); #else - // TODO(satok): Change CpuUtilsHelper by cpu architecture - static DefaultCpuUtilsHelper cpu_utils_helper; + cpu_utils_helper_instance_ = new DefaultCpuUtilsHelper(); #endif - return cpu_utils_helper; + }); + return *cpu_utils_helper_instance_; } } // namespace profile_utils ",0,train 21d699a745bd8225f38508658415bd2739bf500f,tensorflow/tensorflow,"Stop using static initializer in cpu utils, just removed initializing method Change: 133406514",cpu_utils.h,"@@ -17,6 +17,8 @@ limitations under the License. #ifndef TENSORFLOW_PLATFORM_PROFILEUTILS_CPU_UTILS_H__ #define TENSORFLOW_PLATFORM_PROFILEUTILS_CPU_UTILS_H__ +#include + #include ""tensorflow/core/platform/macros.h"" #include ""tensorflow/core/platform/profile_utils/i_cpu_utils_helper.h"" #include ""tensorflow/core/platform/types.h"" @@ -29,6 +31,13 @@ namespace tensorflow { namespace profile_utils { +// CpuUtils is a profiling tool with static functions +// designed to be called from multiple classes. +// A dedicated class which inherits ICpuUtilsHelper is +// stored as a function-local static variable which inherits +// GetCpuUtilsHelperSingletonInstance that caches CPU information, +// because loading CPU information may take a long time. +// Users must call EnableClockCycleProfiling before using CpuUtils. class CpuUtils { public: // Constant for invalid frequency. @@ -44,7 +53,7 @@ class CpuUtils { static inline uint64 GetCurrentClockCycle() { #if defined(__ANDROID__) #if defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21) - return GetCpuUtilsHelper().GetCurrentClockCycle(); + return GetCpuUtilsHelperSingletonInstance().GetCurrentClockCycle(); #else // defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21) return DUMMY_CYCLE_CLOCK; #endif // defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21) @@ -88,25 +97,16 @@ class CpuUtils { #endif } - // Return cpu frequency. As this method caches the cpu frequency internally, - // there is no overhead except function call to call this method. - static int64 GetCpuFrequency(); - - // Return cached cpu count per each micro second. + // Return cycle counter frequency. // As this method caches the cpu frequency internally, - // there is no overhead except function call to call this method. - static int GetClockPerMicroSec(); + // the first call will incur overhead, but not subsequent calls. + static int64 GetCycleCounterFrequency(); // Return micro secound per each clock // As this method caches the cpu frequency internally, - // there is no overhead except function call to call this method. + // the first call will incur overhead, but not subsequent calls. static double GetMicroSecPerClock(); - // Initialize CpuUtils - // This method is called from the static initializer declared in cpu_utils.cc - // This initializes state and cached static variables declared in functions. - static void Initialize(); - // Reset clock cycle // Resetting clock cycle is recommended to prevent // clock cycle counters from overflowing on some platforms. @@ -120,7 +120,6 @@ class CpuUtils { class DefaultCpuUtilsHelper : public ICpuUtilsHelper { public: DefaultCpuUtilsHelper() = default; - void Initialize() final {} void ResetClockCycle() final {} uint64 GetCurrentClockCycle() final { return DUMMY_CYCLE_CLOCK; } void EnableClockCycleProfiling(bool /* enable */) final {} @@ -133,9 +132,15 @@ class CpuUtils { // CAVEAT: as this method calls system call and parse the mssage, // this call may be slow. This is why this class caches the value by // StaticVariableInitializer. - static int64 GetCpuFrequencyImpl(); - - static ICpuUtilsHelper& GetCpuUtilsHelper(); + static int64 GetCycleCounterFrequencyImpl(); + + // Return a singleton of ICpuUtilsHelper + // ICpuUtilsHelper is declared as a function-local static variable + // for the following two reasons: + // 1. Avoid passing instances to all classes which want + // to use profiling tools in CpuUtils + // 2. Minimize the overhead of acquiring ICpuUtilsHelper + static ICpuUtilsHelper& GetCpuUtilsHelperSingletonInstance(); TF_DISALLOW_COPY_AND_ASSIGN(CpuUtils); }; ",0,train 21d699a745bd8225f38508658415bd2739bf500f,tensorflow/tensorflow,"Stop using static initializer in cpu utils, just removed initializing method Change: 133406514",cpu_utils_test.cc,"@@ -23,7 +23,16 @@ namespace profile_utils { static constexpr bool DBG = false; -TEST(CpuUtils, CheckGetCurrentClockCycle) { +class CpuUtilsTest : public ::testing::Test { + protected: + void SetUp() { CpuUtils::EnableClockCycleProfiling(true); } +}; + +TEST_F(CpuUtilsTest, SetUpTestCase) {} + +TEST_F(CpuUtilsTest, TearDownTestCase) {} + +TEST_F(CpuUtilsTest, CheckGetCurrentClockCycle) { static constexpr int LOOP_COUNT = 10; const uint64 start_clock_count = CpuUtils::GetCurrentClockCycle(); CHECK_GT(start_clock_count, 0); @@ -42,8 +51,8 @@ TEST(CpuUtils, CheckGetCurrentClockCycle) { } } -TEST(CpuUtils, CheckCpuFrequency) { - const int64 cpu_frequency = CpuUtils::GetCpuFrequency(); +TEST_F(CpuUtilsTest, CheckCycleCounterFrequency) { + const int64 cpu_frequency = CpuUtils::GetCycleCounterFrequency(); CHECK_GT(cpu_frequency, 0); CHECK_NE(cpu_frequency, CpuUtils::INVALID_FREQUENCY); if (DBG) { @@ -51,15 +60,7 @@ TEST(CpuUtils, CheckCpuFrequency) { } } -TEST(CpuUtils, CheckClockPerMicroSec) { - const int clock_per_micro_sec = CpuUtils::GetClockPerMicroSec(); - CHECK_GT(clock_per_micro_sec, 0); - if (DBG) { - LOG(INFO) << ""Clock per micro sec = "" << clock_per_micro_sec; - } -} - -TEST(CpuUtils, CheckMicroSecPerClock) { +TEST_F(CpuUtilsTest, CheckMicroSecPerClock) { const double micro_sec_per_clock = CpuUtils::GetMicroSecPerClock(); CHECK_GT(micro_sec_per_clock, 0.0); if (DBG) { ",0,train 21d699a745bd8225f38508658415bd2739bf500f,tensorflow/tensorflow,"Stop using static initializer in cpu utils, just removed initializing method Change: 133406514",i_cpu_utils_helper.h,"@@ -24,13 +24,11 @@ namespace profile_utils { // ICpuUtilsHelper is an interface class for cpu_utils which proxies // the difference of profiling functions of different platforms. +// Overridden functions must be thread safe. class ICpuUtilsHelper { public: ICpuUtilsHelper() = default; virtual ~ICpuUtilsHelper() = default; - // Initialize CpuUtilsHelper. - // This method is called only once when CpuUtils is loaded. - virtual void Initialize() = 0; // Reset clock cycle. // Resetting clock cycle is recommended to prevent // clock cycle counters from overflowing on some platforms. ",0,train d80fa4ebdccffde26334d04ecfc7935887c603e2,tensorflow/tensorflow,"Bidirectional rnn now returns forward and backward output states, updated tests, tests pass Updated docstring for bidirectional_rnn Updated bidirectional test Using self.assertAllClose instead of explicit iterators. Fixed some alignments in rnn_test",rnn_test.py,"@@ -771,28 +771,30 @@ class BidirectionalRNNTest(tf.test.TestCase): tf.placeholder(tf.float32, shape=(batch_size, input_size) if use_shape else None) ] - outputs = tf.nn.bidirectional_rnn(cell_fw, - cell_bw, - inputs, - dtype=tf.float32, - sequence_length=sequence_length) + outputs, state_fw, state_bw = tf.nn.bidirectional_rnn(cell_fw, + cell_bw, + inputs, + dtype=tf.float32, + sequence_length=sequence_length) self.assertEqual(len(outputs), len(inputs)) for out in outputs: self.assertEqual(out.get_shape().as_list(), [batch_size if use_shape else None, 2 * num_units]) input_value = np.random.randn(batch_size, input_size) + outputs = tf.pack(outputs) - return input_value, inputs, outputs, sequence_length + return input_value, inputs, outputs, state_fw, state_bw, sequence_length def _testBidirectionalRNN(self, use_gpu, use_shape): with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: - input_value, inputs, outputs, sequence_length = ( + input_value, inputs, outputs, state_fw, state_bw, sequence_length = ( self._createBidirectionalRNN(use_gpu, use_shape, True)) tf.initialize_all_variables().run() # Run with pre-specified sequence length of 2, 3 - out = sess.run(outputs, feed_dict={inputs[0]: input_value, - sequence_length: [2, 3]}) + out, s_fw, s_bw = sess.run([outputs, state_fw, state_bw], + feed_dict={inputs[0]: input_value, + sequence_length: [2, 3]}) # Since the forward and backward LSTM cells were initialized with the # same parameters, the forward and backward output has to be the same, @@ -824,13 +826,17 @@ class BidirectionalRNNTest(tf.test.TestCase): self.assertEqual(out[2][1][0], out[0][1][3]) self.assertEqual(out[2][1][1], out[0][1][4]) self.assertEqual(out[2][1][2], out[0][1][5]) + # Via the reasoning above, the forward and backward final state should be + # exactly the same + self.assertAllClose(s_fw, s_bw) def _testBidirectionalRNNWithoutSequenceLength(self, use_gpu, use_shape): with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: - input_value, inputs, outputs, _ = self._createBidirectionalRNN( - use_gpu, use_shape, False) + input_value, inputs, outputs, state_fw, state_bw, _ = self._createBidirectionalRNN( + use_gpu, use_shape, False) tf.initialize_all_variables().run() - out = sess.run(outputs, feed_dict={inputs[0]: input_value}) + out, s_fw, s_bw = sess.run([outputs, state_fw, state_bw], + feed_dict={inputs[0]: input_value}) # Since the forward and backward LSTM cells were initialized with the # same parameters, the forward and backward output has to be the same, @@ -849,6 +855,9 @@ class BidirectionalRNNTest(tf.test.TestCase): self.assertEqual(out[i][1][0], out[8 - 1 - i][1][3]) self.assertEqual(out[i][1][1], out[8 - 1 - i][1][4]) self.assertEqual(out[i][1][2], out[8 - 1 - i][1][5]) + # Via the reasoning above, the forward and backward final state should be + # exactly the same + self.assertAllClose(s_fw, s_bw) def testBidirectionalRNN(self): self._testBidirectionalRNN(use_gpu=False, use_shape=False) ",0,train d80fa4ebdccffde26334d04ecfc7935887c603e2,tensorflow/tensorflow,"Bidirectional rnn now returns forward and backward output states, updated tests, tests pass Updated docstring for bidirectional_rnn Updated bidirectional test Using self.assertAllClose instead of explicit iterators. Fixed some alignments in rnn_test",rnn.py,"@@ -293,9 +293,11 @@ def bidirectional_rnn(cell_fw, cell_bw, inputs, scope: VariableScope for the created subgraph; defaults to ""BiRNN"" Returns: - A set of output `Tensors` where: + A tuple (outputs, output_state_fw, output_state_bw) where: outputs is a length T list of outputs (one for each input), which are depth-concatenated forward and backward outputs + output_state_fw is the final state of the forward rnn + output_state_bw is the final state of the backward rnn Raises: TypeError: If ""cell_fw"" or ""cell_bw"" is not an instance of RNNCell. @@ -314,19 +316,19 @@ def bidirectional_rnn(cell_fw, cell_bw, inputs, name = scope or ""BiRNN"" # Forward direction with vs.variable_scope(name + ""_FW"") as fw_scope: - output_fw, _ = rnn(cell_fw, inputs, initial_state_fw, dtype, + output_fw, output_state_fw = rnn(cell_fw, inputs, initial_state_fw, dtype, sequence_length, scope=fw_scope) # Backward direction with vs.variable_scope(name + ""_BW"") as bw_scope: - tmp, _ = rnn(cell_bw, _reverse_seq(inputs, sequence_length), + tmp, output_state_bw = rnn(cell_bw, _reverse_seq(inputs, sequence_length), initial_state_bw, dtype, sequence_length, scope=bw_scope) output_bw = _reverse_seq(tmp, sequence_length) # Concat each of the forward/backward outputs outputs = [array_ops.concat(1, [fw, bw]) for fw, bw in zip(output_fw, output_bw)] - return outputs + return (outputs, output_state_fw, output_state_bw) def dynamic_rnn(cell, inputs, sequence_length, initial_state=None, dtype=None, ",0,train 4d120b703ee1b28bc5dcc719d04150688ce32361,tensorflow/tensorflow,"Use std::move for functions in gpu EventMgr. Change: 143193447",gpu_event_mgr.h,"@@ -83,7 +83,7 @@ class EventMgr { ToFreeVector to_free; { mutex_lock l(mu_); - QueueFunc(stream, func); + QueueFunc(stream, std::move(func)); PollEvents(false, &to_free); } FreeMemory(to_free); @@ -147,7 +147,7 @@ class EventMgr { void QueueFunc(perftools::gputools::Stream* stream, std::function func) EXCLUSIVE_LOCKS_REQUIRED(mu_) { - QueueInUse(stream, {nullptr, nullptr, BufRec(), func}); + QueueInUse(stream, {nullptr, nullptr, BufRec(), std::move(func)}); } // This function should be called at roughly the same tempo as ",0,train 8e9f3196fd8841de83bd6a622df696ea191d1d78,tensorflow/tensorflow,"Added a bunch of unary ops to the estimator. PiperOrigin-RevId: 324607213 Change-Id: I24369f36cc29f68caac412a5d3076f5ef43859fe",op_level_cost_estimator.cc,"@@ -522,6 +522,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() { // Unary ops alphabetically sorted elementwise_ops_.emplace(""Acos"", EIGEN_COST(scalar_acos_op)); + elementwise_ops_.emplace(""All"", EIGEN_COST(scalar_boolean_and_op)); + elementwise_ops_.emplace(""ArgMax"", EIGEN_COST(scalar_max_op)); elementwise_ops_.emplace(""Asin"", EIGEN_COST(scalar_asin_op)); elementwise_ops_.emplace(""Atan"", EIGEN_COST(scalar_atan_op)); elementwise_ops_.emplace(""Atan2"", EIGEN_COST(scalar_quotient_op) + @@ -546,7 +548,10 @@ OpLevelCostEstimator::OpLevelCostEstimator() { elementwise_ops_.emplace(""Lgamma"", 1); elementwise_ops_.emplace(""Log"", EIGEN_COST(scalar_log_op)); elementwise_ops_.emplace(""Log1p"", EIGEN_COST(scalar_log1p_op)); + elementwise_ops_.emplace(""Max"", EIGEN_COST(scalar_max_op)); + elementwise_ops_.emplace(""Min"", EIGEN_COST(scalar_min_op)); elementwise_ops_.emplace(""Neg"", EIGEN_COST(scalar_opposite_op)); + elementwise_ops_.emplace(""Prod"", EIGEN_COST(scalar_product_op)); elementwise_ops_.emplace(""QuantizeAndDequantizeV2"", quantize_and_dequantize_v2_cost); elementwise_ops_.emplace(""QuantizedSigmoid"", @@ -554,6 +559,7 @@ OpLevelCostEstimator::OpLevelCostEstimator() { elementwise_ops_.emplace(""QuantizeV2"", quantize_v2_cost); elementwise_ops_.emplace(""Reciprocal"", EIGEN_COST(scalar_inverse_op)); elementwise_ops_.emplace(""Relu"", EIGEN_COST(scalar_max_op)); + elementwise_ops_.emplace(""Relu6"", EIGEN_COST(scalar_max_op)); elementwise_ops_.emplace(""Rint"", 1); elementwise_ops_.emplace(""Round"", EIGEN_COST(scalar_round_op)); elementwise_ops_.emplace(""Rsqrt"", EIGEN_COST(scalar_rsqrt_op)); @@ -562,8 +568,10 @@ OpLevelCostEstimator::OpLevelCostEstimator() { elementwise_ops_.emplace(""Sin"", EIGEN_COST(scalar_sin_op)); elementwise_ops_.emplace(""Sqrt"", EIGEN_COST(scalar_sqrt_op)); elementwise_ops_.emplace(""Square"", EIGEN_COST(scalar_square_op)); + elementwise_ops_.emplace(""Sum"", EIGEN_COST(scalar_sum_op)); elementwise_ops_.emplace(""Tan"", EIGEN_COST(scalar_tan_op)); elementwise_ops_.emplace(""Tanh"", EIGEN_COST(scalar_tanh_op)); + elementwise_ops_.emplace(""TopKV2"", EIGEN_COST(scalar_max_op)); // Binary ops alphabetically sorted elementwise_ops_.emplace(""Add"", EIGEN_COST(scalar_sum_op)); elementwise_ops_.emplace(""AddV2"", EIGEN_COST(scalar_sum_op)); ",0,train 8e9f3196fd8841de83bd6a622df696ea191d1d78,tensorflow/tensorflow,"Added a bunch of unary ops to the estimator. PiperOrigin-RevId: 324607213 Change-Id: I24369f36cc29f68caac412a5d3076f5ef43859fe",op_level_cost_estimator_test.cc,"@@ -939,24 +939,29 @@ TEST_F(OpLevelCostEstimatorTest, SquaredDifferenceExecutionTime) { EXPECT_EQ(cost.num_ops_with_unknown_shapes, 0); } -TEST_F(OpLevelCostEstimatorTest, ReluExecutionTime) { - auto cost = PredictCosts(DescribeUnaryOp(""Relu"", 1000)); - EXPECT_EQ(Costs::Duration(800), cost.memory_time); - EXPECT_EQ(Costs::Duration(100), cost.compute_time); - EXPECT_EQ(Costs::Duration(900), cost.execution_time); - EXPECT_EQ(1, cost.num_ops_total); - EXPECT_FALSE(cost.inaccurate); - EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); -} +TEST_F(OpLevelCostEstimatorTest, UnaryOpExecutionTime) { + std::vector> unary_ops = { + {""All"", 1}, {""ArgMax"", 1}, {""Cast"", 1}, {""Max"", 1}, {""Min"", 1}, + {""Prod"", 1}, {""Relu"", 1}, {""Relu6"", 1}, {""Sum"", 1}, {""TopKV2"", 1}}; -TEST_F(OpLevelCostEstimatorTest, CastExecutionTime) { - auto cost = PredictCosts(DescribeUnaryOp(""Cast"", 1000)); - EXPECT_EQ(Costs::Duration(800), cost.memory_time); - EXPECT_EQ(Costs::Duration(100), cost.compute_time); - EXPECT_EQ(Costs::Duration(900), cost.execution_time); - EXPECT_EQ(1, cost.num_ops_total); - EXPECT_FALSE(cost.inaccurate); - EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); + const int kTensorSize = 1000; + for (auto unary_op : unary_ops) { + OpContext op_context = DescribeUnaryOp(unary_op.first, kTensorSize); + + const int kExpectedMemoryTime = 800; + int expected_compute_time = std::ceil( + unary_op.second * kTensorSize / + estimator_.GetDeviceInfo(op_context.op_info.device()).gigaops); + + auto cost = PredictCosts(op_context); + EXPECT_EQ(cost.memory_time, Costs::Duration(kExpectedMemoryTime)); + EXPECT_EQ(cost.compute_time, Costs::Duration(expected_compute_time)); + EXPECT_EQ(cost.execution_time, + Costs::Duration(expected_compute_time + kExpectedMemoryTime)); + EXPECT_EQ(cost.num_ops_total, 1); + EXPECT_EQ(cost.num_ops_with_unknown_shapes, 0); + EXPECT_FALSE(cost.inaccurate); + } } TEST_F(OpLevelCostEstimatorTest, BroadcastAddExecutionTime) { ",0,train 69613d25c3f82652c636c5a1c1b42029dc427979,tensorflow/tensorflow,"More handle_data fixing. I'm not sure why our existing tests didn't catch this... PiperOrigin-RevId: 199206183",function.py,"@@ -720,6 +720,8 @@ class _FuncGraph(ops.Graph): if ops._USE_C_SHAPES: if isinstance(tensor, ops.EagerTensor): handle_data = tensor._handle_data + if handle_data: + handle_data = handle_data.SerializeToString() else: handle_data = c_api.GetResourceHandleShapeAndType( tensor.graph._c_graph, tensor._as_tf_output()) ",0,train 14101a8ba173b44179b2a9317781f140eb61b0a1,tensorflow/tensorflow,"Fix TensorForest for 32-bit platforms. PiperOrigin-RevId: 161077247",stats_ops.cc,"@@ -159,20 +159,17 @@ void TraverseTree(const DecisionTreeResource* tree_resource, // until they're gone. void UpdateStats(FertileStatsResource* fertile_stats_resource, const std::unique_ptr& data, - const Tensor& input_labels, const Tensor& input_weights, - int num_targets, const std::vector& leaf_ids, + const TensorInputTarget& target, int num_targets, + const std::vector& leaf_ids, const std::vector& leaf_depths, std::unordered_map>* locks, mutex* set_lock, int32 start, int32 end, std::unordered_set* ready_to_split) { - const auto labels = input_labels.unaligned_flat(); - const auto weights = input_weights.unaligned_flat(); // Stores leaf_id, leaf_depth, example_id for examples that are waiting // on another to finish. std::queue> waiting; int32 i = start; - TensorInputTarget target(&labels, &weights, input_labels, num_targets); while (i < end || !waiting.empty()) { int32 leaf_id; int32 leaf_depth; @@ -214,15 +211,11 @@ void UpdateStats(FertileStatsResource* fertile_stats_resource, void UpdateStatsCollated( FertileStatsResource* fertile_stats_resource, DecisionTreeResource* tree_resource, - const std::unique_ptr& data, const Tensor& input_labels, - const Tensor& input_weights, int num_targets, + const std::unique_ptr& data, const TensorInputTarget& target, + int num_targets, const std::unordered_map>& leaf_examples, const std::vector& leaf_depths, mutex* set_lock, int32 start, int32 end, std::unordered_set* ready_to_split) { - const auto labels = input_labels.unaligned_flat(); - const auto weights = input_weights.unaligned_flat(); - - TensorInputTarget target(&labels, &weights, input_labels, num_targets); auto it = leaf_examples.begin(); std::advance(it, start); auto end_it = leaf_examples.begin(); @@ -335,32 +328,33 @@ class ProcessInputOp : public OpKernel { std::unordered_set ready_to_split; mutex set_lock; + TensorInputTarget target(input_labels, input_weights, num_targets); + // TODO(gilberth): This is a rough approximation based on measurements // from a digits run on local desktop. Heuristics might be necessary // if it really matters that much. const int64 costPerUpdate = 1000; - auto update = [this, &input_labels, &input_weights, &leaf_ids, &leaf_depths, - &num_targets, fertile_stats_resource, &locks, &set_lock, - &ready_to_split, num_data](int64 start, int64 end) { + auto update = [this, &target, &leaf_ids, &leaf_depths, &num_targets, + fertile_stats_resource, &locks, &set_lock, &ready_to_split, + num_data](int64 start, int64 end) { CHECK(start <= end); CHECK(end <= num_data); - UpdateStats(fertile_stats_resource, data_set_, input_labels, - input_weights, num_targets, leaf_ids, leaf_depths, &locks, - &set_lock, static_cast(start), static_cast(end), + UpdateStats(fertile_stats_resource, data_set_, target, num_targets, + leaf_ids, leaf_depths, &locks, &set_lock, + static_cast(start), static_cast(end), &ready_to_split); }; - auto update_collated = [this, &input_labels, &input_weights, &leaf_ids, - &num_targets, &leaf_depths, fertile_stats_resource, - tree_resource, &leaf_examples, &set_lock, - &ready_to_split, + auto update_collated = [this, &target, &leaf_ids, &num_targets, + &leaf_depths, fertile_stats_resource, tree_resource, + &leaf_examples, &set_lock, &ready_to_split, num_leaves](int64 start, int64 end) { CHECK(start <= end); CHECK(end <= num_leaves); - UpdateStatsCollated( - fertile_stats_resource, tree_resource, data_set_, input_labels, - input_weights, num_targets, leaf_examples, leaf_depths, &set_lock, - static_cast(start), static_cast(end), &ready_to_split); + UpdateStatsCollated(fertile_stats_resource, tree_resource, data_set_, + target, num_targets, leaf_examples, leaf_depths, + &set_lock, static_cast(start), + static_cast(end), &ready_to_split); }; if (param_proto_.collate_examples()) { ",0,train 14101a8ba173b44179b2a9317781f140eb61b0a1,tensorflow/tensorflow,"Fix TensorForest for 32-bit platforms. PiperOrigin-RevId: 161077247",grow_stats_test.cc,"@@ -76,7 +76,7 @@ TEST(GrowStatsDenseClassificationTest, Basic) { std::vector labels = {1, 0, 1}; std::vector weights = {2.3, 20.3, 1.1}; std::unique_ptr target( - new TestableInputTarget(&labels, &weights, 1)); + new TestableInputTarget(labels, weights, 1)); RunBatch(stat.get(), target.get()); CHECK(stat->IsFinished()); @@ -127,7 +127,7 @@ TEST(GrowStatsDenseClassificationTest, BasicRunningStats) { std::vector labels = {1, 0, 1}; std::vector weights = {2.3, 20.3, 1.1}; std::unique_ptr target( - new TestableInputTarget(&labels, &weights, 1)); + new TestableInputTarget(labels, weights, 1)); RunBatch(stat.get(), target.get()); CHECK(stat->IsFinished()); @@ -185,7 +185,7 @@ TEST(GrowStatsDenseClassificationTest, TestFinishEarly) { std::vector labels = {1, 0, 1}; std::vector weights = {1, 1, 1}; std::unique_ptr target( - new TestableInputTarget(&labels, &weights, 1)); + new TestableInputTarget(labels, weights, 1)); std::unique_ptr dataset( new tensorflow::tensorforest::TestableDataSet( {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, 2)); @@ -235,7 +235,7 @@ TEST(GrowStatsDenseClassificationTest, TestCheckPruneHoeffding) { // sends them both to the left. std::vector labels = {0, 1}; std::vector weights = {1, 1}; - TestableInputTarget target(&labels, &weights, 1); + TestableInputTarget target(labels, weights, 1); std::unique_ptr dataset( new tensorflow::tensorforest::TestableDataSet( {-1.0, -1.0, 1.0, -1.0}, 2)); @@ -306,7 +306,7 @@ TEST(GrowStatsLeastSquaresRegressionTest, Basic) { std::vector labels = {2.3, 5.6, 1.1}; std::unique_ptr target( - new TestableInputTarget(&labels, {}, 1)); + new TestableInputTarget(labels, {}, 1)); std::vector branches = {1, 0, 1, 1, 0, 0}; RunBatch(stat.get(), target.get()); @@ -340,7 +340,7 @@ TEST(GrowStatsSparseClassificationTest, Basic) { std::vector labels = {100, 1000, 1}; std::vector weights = {2.3, 20.3, 1.1}; std::unique_ptr target( - new TestableInputTarget(&labels, &weights, 1)); + new TestableInputTarget(labels, weights, 1)); std::vector branches = {1, 0, 1, 1, 0, 0}; RunBatch(stat.get(), target.get()); ",0,train 14101a8ba173b44179b2a9317781f140eb61b0a1,tensorflow/tensorflow,"Fix TensorForest for 32-bit platforms. PiperOrigin-RevId: 161077247",input_target.h,"@@ -20,9 +20,7 @@ namespace tensorflow { namespace tensorforest { -typedef Eigen::TensorMap< - Eigen::Tensor, 0> // NOLINT(runtime/int) - SingleDimStorageType; +typedef TTypes::ConstTensor SingleDimStorageType; // Base class for classes that hold labels and weights. Mostly for testing // purposes, because it's inconvenient to construct nasty Eigen::things. @@ -41,11 +39,12 @@ class InputTarget { template class StoredInputTarget : public InputTarget { protected: + // Takes ownership of t and w with a std::unique_ptr. StoredInputTarget(const T* t, const T* w, int num_targets) : target_(t), weight_(w), num_targets_(num_targets) {} - const T* target_; - const T* weight_; + const std::unique_ptr target_; + const std::unique_ptr weight_; int num_targets_; }; @@ -54,10 +53,11 @@ class StoredInputTarget : public InputTarget { // outputs will correctly index the flattened data. class TensorInputTarget : public StoredInputTarget { public: - TensorInputTarget(const SingleDimStorageType* t, - const SingleDimStorageType* w, const Tensor& tensor, - int num_targets) - : StoredInputTarget(t, w, num_targets), original_tensor_(tensor) {} + TensorInputTarget(const Tensor& target, const Tensor& weight, int num_targets) + : StoredInputTarget(new SingleDimStorageType(target.tensor()), + new SingleDimStorageType(weight.tensor()), + num_targets), + original_tensor_(target) {} int32 GetTargetAsClassIndex(int example_index, int target_index) const override { ",0,train 14101a8ba173b44179b2a9317781f140eb61b0a1,tensorflow/tensorflow,"Fix TensorForest for 32-bit platforms. PiperOrigin-RevId: 161077247",leaf_model_operators_test.cc,"@@ -77,7 +77,7 @@ void TestClassificationNormalUse(const std::unique_ptr& op) { std::vector labels = {1, 0, 1}; std::vector weights = {2.3, 20.3, 1.1}; std::unique_ptr target( - new TestableInputTarget(&labels, &weights, 1)); + new TestableInputTarget(labels, weights, 1)); // Update and check value. op->UpdateModel(leaf.get(), target.get(), 0); ",0,train 14101a8ba173b44179b2a9317781f140eb61b0a1,tensorflow/tensorflow,"Fix TensorForest for 32-bit platforms. PiperOrigin-RevId: 161077247",test_utils.h,"@@ -22,9 +22,10 @@ namespace tensorforest { class TestableInputTarget : public StoredInputTarget> { public: - TestableInputTarget(const std::vector* t, const std::vector* w, + TestableInputTarget(const std::vector& t, const std::vector& w, int num_t) - : StoredInputTarget(t, w, num_t) {} + : StoredInputTarget(new std::vector(t), new std::vector(w), + num_t) {} int NumItems() const { return target_->size(); ",0,train 7731e8dfbe4a56773be5dc94d631611211156659,tensorflow/tensorflow,"Don't constant-fold DT_RESOURCE constants. PiperOrigin-RevId: 391803952 Change-Id: I0ea3ec31d3e7dfda0f03b4027a237f08d00a3091",constant_folding.cc,"@@ -30,6 +30,7 @@ limitations under the License. #include ""tensorflow/core/framework/log_memory.h"" #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/types.h"" +#include ""tensorflow/core/framework/types.pb.h"" #include ""tensorflow/core/graph/algorithm.h"" #include ""tensorflow/core/graph/node_builder.h"" #include ""tensorflow/core/graph/subgraph.h"" @@ -223,7 +224,8 @@ bool IsConstantFoldable( std::unordered_map>* shape_replacement_map) { if (n->IsConstant()) { - return true; + // Skip constant folding resources as they cannot be deep copied. + return n->output_type(0) != DT_RESOURCE; } if (MaybeReplaceShapeOp(n, shape_map, shape_replacement_map)) { return true; ",0,train 8f936eb15cc3b798dbe535ca1f4f0eff2b6b79bd,tensorflow/tensorflow,"Support dynamic value inference on iota instructions. We consider all iota output values are static. PiperOrigin-RevId: 341944607 Change-Id: Ie4c3b6dea7d168c41a10a0046eb280a5293adc60",xla_builder.cc,"@@ -3401,6 +3401,7 @@ StatusOr XlaBuilder::BuildDynamicInferenceGraph(XlaOp root_op) { break; } case HloOpcode::kConstant: + case HloOpcode::kIota: SetInstructionAsConstant(new_instr, id, new_shape, false); break; case HloOpcode::kCustomCall: ",0,train 8f936eb15cc3b798dbe535ca1f4f0eff2b6b79bd,tensorflow/tensorflow,"Support dynamic value inference on iota instructions. We consider all iota output values are static. PiperOrigin-RevId: 341944607 Change-Id: Ie4c3b6dea7d168c41a10a0046eb280a5293adc60",dynamism_inference_test.cc,"@@ -104,6 +104,19 @@ TEST_F(DynamismInferenceTest, ScalarInt32Literal) { } } +TEST_F(DynamismInferenceTest, Iota) { + // The output of iota are consistened static. + for (ClientType client_type : client_types) { + Client* client = ClientOrDie(platform_, client_type); + XlaBuilder b(TestName()); + auto computation = Iota(&b, S32, 2); + // Iota is not dynamic. + EXPECT_FALSE(ComputeDynamismLiteral(client, computation, &b) + .ValueOrDie() + .Get({0})); + } +} + TEST_F(DynamismInferenceTest, TupleSimple) { for (ClientType client_type : client_types) { Client* client = ClientOrDie(platform_, client_type); ",0,train 305712c02e70bc860812e7c151a3842f028cacb1,tensorflow/tensorflow,"More WhereOp/TopK GPU bugfixes: use the direct cuda stream for CUB GPU kernel. Turns out using the StreamInterface objects leads to ""invalid resource handle"" errors, so we have to use the cudaStream_t directly. This change is based on similar code in cuda_solvers.cc. PiperOrigin-RevId: 161261085",concat_lib_gpu_impl.cu.cc,"@@ -88,7 +88,8 @@ __global__ void concat_variable_kernel( // do an initial binary search and then scan linearly from there // works well when there are many small segments and when the // segments are much longer - IntType segment = gpu::upper_bound(col_scan, num_inputs, gidx) - 1; + IntType segment = + cuda_helper::upper_bound(col_scan, num_inputs, gidx) - 1; IntType curr_offset = col_scan[segment]; IntType curr_segment = segment; @@ -142,10 +143,10 @@ void ConcatGPUImpl(const Eigen::GpuDevice& gpu_device, output->dimension(0), gpu_device); if (fixed_size) { - concat_fixed_kernel<<< - config.block_count, config.thread_per_block, 0, gpu_device.stream()>>>( - input_ptrs, split_size, output->dimension(0), output->dimension(1), - output->data()); + concat_fixed_kernel + <<>>(input_ptrs, split_size, output->dimension(0), + output->dimension(1), output->data()); } else { IntType smem_max = gpu_device.sharedMemPerBlock(); IntType smem_usage = output_scan.size * sizeof(IntType); @@ -155,17 +156,17 @@ void ConcatGPUImpl(const Eigen::GpuDevice& gpu_device, // 4096 inputs is a lot, most code will take the smem path const int32 kMaxSmemBytesPerformance = 16384; if (smem_usage < smem_max && smem_usage < kMaxSmemBytesPerformance) - concat_variable_kernel< - T, IntType, true><<>>( - input_ptrs, output_scan, output->dimension(0), output->dimension(1), - output->data()); + concat_variable_kernel + <<>>(input_ptrs, output_scan, + output->dimension(0), output->dimension(1), + output->data()); else - concat_variable_kernel< - T, IntType, false><<>>( - input_ptrs, output_scan, output->dimension(0), output->dimension(1), - output->data()); + concat_variable_kernel + <<>>(input_ptrs, output_scan, + output->dimension(0), output->dimension(1), + output->data()); } } ",0,train 305712c02e70bc860812e7c151a3842f028cacb1,tensorflow/tensorflow,"More WhereOp/TopK GPU bugfixes: use the direct cuda stream for CUB GPU kernel. Turns out using the StreamInterface objects leads to ""invalid resource handle"" errors, so we have to use the cudaStream_t directly. This change is based on similar code in cuda_solvers.cc. PiperOrigin-RevId: 161261085",split_lib_gpu.cu.cc,"@@ -138,7 +138,8 @@ __global__ void split_v_kernel(const T* input_ptr, // do an initial binary search and then scan linearly from there // works well when there are many small segments and when the // segments are much longer - IntType segment = gpu::upper_bound(col_scan, num_outputs, gidx) - 1; + IntType segment = + cuda_helper::upper_bound(col_scan, num_outputs, gidx) - 1; IntType curr_offset = col_scan[segment]; IntType curr_segment = segment; @@ -195,10 +196,10 @@ struct SplitOpGPULaunch { CudaLaunchConfig config = GetCudaLaunchConfig( prefix_dim_size * split_dim_size * suffix_dim_size, d); - SplitOpKernel< - T><<>>( - input, prefix_dim_size, split_dim_size, suffix_dim_size, - output_ptr_data); + SplitOpKernel + <<>>( + input, prefix_dim_size, split_dim_size, suffix_dim_size, + output_ptr_data); } }; @@ -224,15 +225,15 @@ struct SplitVOpGPULaunch { // 4096 inputs is a lot, most code will take the smem path const int32 kMaxSmemBytesPerformance = 16384; if (smem_usage < smem_max && smem_usage < kMaxSmemBytesPerformance) - split_v_kernel<<>>( - input_ptr, output_scan, total_rows, total_cols, output_ptr_data); + split_v_kernel + <<>>(input_ptr, output_scan, total_rows, + total_cols, output_ptr_data); else - split_v_kernel<<>>( - input_ptr, output_scan, total_rows, total_cols, output_ptr_data); + split_v_kernel + <<>>(input_ptr, output_scan, total_rows, + total_cols, output_ptr_data); } } }; ",0,train 305712c02e70bc860812e7c151a3842f028cacb1,tensorflow/tensorflow,"More WhereOp/TopK GPU bugfixes: use the direct cuda stream for CUB GPU kernel. Turns out using the StreamInterface objects leads to ""invalid resource handle"" errors, so we have to use the cudaStream_t directly. This change is based on similar code in cuda_solvers.cc. PiperOrigin-RevId: 161261085",topk_op_gpu.cu.cc,"@@ -30,6 +30,7 @@ limitations under the License. #include ""tensorflow/core/lib/gtl/top_n.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/types.h"" +#include ""tensorflow/core/util/cuda_kernel_helper.h"" // Required for sorting Eigen::half namespace cub { @@ -365,9 +366,9 @@ __global__ void TopKKernel(const T* input, int length, int k, bool sorted, } template -cudaError LaunchTopKKernel(cudaStream_t stream, int num_shards, const T* input, - int batch_size, int length, int k, bool sorted, - T* output, int* indices) { +cudaError LaunchTopKKernel(const cudaStream_t& stream, int num_shards, + const T* input, int batch_size, int length, int k, + bool sorted, T* output, int* indices) { // This code assumes that k is small enough that the computation // fits inside shared memory (hard coded to 48KB). In practice this // means k <= 3072 for T=float/int32 and k <= 2048 for T=double/int64. @@ -428,7 +429,7 @@ Status LaunchSortKernel(OpKernelContext* ctx, const T* input, int num_rows, typename TTypes::Tensor values, TTypes::Tensor indices) { const GPUDevice& d = ctx->eigen_device(); - auto stream = ctx->eigen_gpu_device().stream(); + const cudaStream_t& cu_stream = GetCudaStream(ctx); size_t temp_storage_bytes = -1; // TODO(ebrevdo): Once cub supports iterators for the ValueT and @@ -480,7 +481,7 @@ Status LaunchSortKernel(OpKernelContext* ctx, const T* input, int num_rows, /* d_end_offsets */ segment_offsets_t.data() + 1, /* begin_bit */ 0, /* end_bit */ sizeof(T) * 8, - /* stream */ stream); + /* stream */ cu_stream); if (err != cudaSuccess) { return errors::Internal( ""TopKOp: Could not launch "" @@ -505,7 +506,7 @@ Status LaunchSortKernel(OpKernelContext* ctx, const T* input, int num_rows, /* d_end_offsets */ segment_offsets_t.data() + 1, /* begin_bit */ 0, /* end_bit */ sizeof(T) * 8, - /* stream */ stream); + /* stream */ cu_stream); if (err != cudaSuccess) { return errors::Internal( ""TopKOp: Could not launch "" @@ -545,8 +546,8 @@ struct TopKFunctor { return impl::LaunchSortKernel(context, input.data(), num_rows, num_cols, k, values, indices); } else { - auto stream = context->eigen_gpu_device().stream(); - auto err = impl::LaunchTopKKernel(stream, /* num_shards */ 0, + const cudaStream_t& cu_stream = GetCudaStream(context); + auto err = impl::LaunchTopKKernel(cu_stream, /* num_shards */ 0, input.data(), num_rows, num_cols, k, sorted, values.data(), indices.data()); if (err != cudaSuccess) { ",0,train 305712c02e70bc860812e7c151a3842f028cacb1,tensorflow/tensorflow,"More WhereOp/TopK GPU bugfixes: use the direct cuda stream for CUB GPU kernel. Turns out using the StreamInterface objects leads to ""invalid resource handle"" errors, so we have to use the cudaStream_t directly. This change is based on similar code in cuda_solvers.cc. PiperOrigin-RevId: 161261085",where_op_gpu.cu.cc,"@@ -56,8 +56,9 @@ struct NumTrue { EIGEN_ALWAYS_INLINE static Status Compute( OpKernelContext* ctx, const GPUDevice& d, TTypes::ConstFlat input, typename TTypes::Scalar num_true) { - std::size_t temp_storage_bytes = 0; + const cudaStream_t& cu_stream = GetCudaStream(ctx); + std::size_t temp_storage_bytes = 0; const bool* input_data = input.data(); TIndex* num_true_data = num_true.data(); @@ -66,7 +67,7 @@ struct NumTrue { /*d_in*/ input_data, /*d_out*/ num_true_data, /*num_items*/ input.size(), - /*stream*/ d.stream()); + /*stream*/ cu_stream); if (first_success != cudaSuccess) { return errors::Internal( @@ -85,7 +86,7 @@ struct NumTrue { /*d_in*/ input_data, /*d_out*/ num_true_data, /*num_items*/ input.size(), - /*stream*/ d.stream()); + /*stream*/ cu_stream); if (second_success != cudaSuccess) { return errors::Internal( @@ -168,6 +169,8 @@ struct Where { return Status::OK(); } + const cudaStream_t& cu_stream = GetCudaStream(ctx); + std::size_t temp_storage_bytes = 0; cub::CountingInputIterator select_counter(0); @@ -188,7 +191,7 @@ struct Where { /*d_out*/ output_iterator, /*d_num_selected_out*/ found_true_device, /*num_items*/ input.size(), - /*stream*/ d.stream()); + /*stream*/ cu_stream); if (first_success != cudaSuccess) { return errors::Internal( ""WhereOp: Could not launch cub::DeviceSelect::Flagged to calculate "" @@ -208,7 +211,7 @@ struct Where { /*d_out*/ output_iterator, /*d_num_selected_out*/ found_true_device, /*num_items*/ input.size(), - /*stream*/ d.stream()); + /*stream*/ cu_stream); if (second_success != cudaSuccess) { return errors::Internal( ",0,train 305712c02e70bc860812e7c151a3842f028cacb1,tensorflow/tensorflow,"More WhereOp/TopK GPU bugfixes: use the direct cuda stream for CUB GPU kernel. Turns out using the StreamInterface objects leads to ""invalid resource handle"" errors, so we have to use the cudaStream_t directly. This change is based on similar code in cuda_solvers.cc. PiperOrigin-RevId: 161261085",cuda_kernel_helper.h,"@@ -20,9 +20,11 @@ limitations under the License. #include +#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" +#include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/platform/logging.h"" +#include ""tensorflow/core/platform/stream_executor.h"" #include ""tensorflow/core/platform/types.h"" -#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" // Usage of GetCudaLaunchConfig, GetCuda2DLaunchConfig, and // GetCuda3DLaunchConfig: @@ -95,7 +97,8 @@ void MyDriverFunc(const GPUDevice &d) { } // See the test for this for more example: -// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/cuda_kernel_helper_test.cu.cc +// +https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/cuda_kernel_helper_test.cu.cc */ @@ -107,7 +110,7 @@ void MyDriverFunc(const GPUDevice &d) { for (int i = blockIdx.axis * blockDim.axis + threadIdx.axis; i < n.axis; \ i += blockDim.axis * gridDim.axis) -#define DIV_UP(a, b) (((a) + (b) - 1) / (b)) +#define DIV_UP(a, b) (((a) + (b)-1) / (b)) namespace tensorflow { @@ -277,7 +280,19 @@ inline Cuda2DLaunchConfig GetCuda2DLaunchConfig( dynamic_shared_memory_size, block_size_limit); } -namespace gpu { +// Returns a raw reference to the current cuda stream. Required by a +// number of kernel calls (for which StreamInterface* does not work), i.e. +// CUB and certain cublas primitives. +inline const cudaStream_t& GetCudaStream(OpKernelContext* context) { + const cudaStream_t* ptr = CHECK_NOTNULL( + reinterpret_cast(context->op_device_context() + ->stream() + ->implementation() + ->CudaStreamMemberHack())); + return *ptr; +} + +namespace cuda_helper { template __device__ IntType upper_bound(IntType* first, IntType count, IntType val) { @@ -299,7 +314,7 @@ __device__ IntType upper_bound(IntType* first, IntType count, IntType val) { return first - orig; } -} // namespace gpu +} // namespace cuda_helper template __device__ __host__ inline T ldg(const T* address) { ",0,train 8476ba0486bf03a4a622410fdefa62c159fd6235,tensorflow/tensorflow,"[tf.data] Make sure rendezvous is created when running multi-device function. PiperOrigin-RevId: 254898730",captured_function.cc,"@@ -552,10 +552,7 @@ Status InstantiatedCapturedFunction::Run(IteratorContext* ctx, }); f_opts.step_container = &step_container; f_opts.runner = ctx->runner(); - if (lib_->device()->device_type() != DEVICE_CPU || - captured_func_->is_multi_device_function()) { - f_opts.create_rendezvous = true; - } + f_opts.create_rendezvous = ShouldCreateRendezvous(); // TODO(mrry): Add cancellation manager support to IteratorContext // so that we can cancel running map functions. The local // cancellation manager here is created so that we can run kernels @@ -593,9 +590,7 @@ Status InstantiatedCapturedFunction::RunWithBorrowedArgs( }); f_opts.step_container = &step_container; f_opts.runner = ctx->runner(); - if (lib_->device()->device_type() != DEVICE_CPU) { - f_opts.create_rendezvous = true; - } + f_opts.create_rendezvous = ShouldCreateRendezvous(); // TODO(mrry): Add cancellation manager support to IteratorContext // so that we can cancel running map functions. The local // cancellation manager here is created so that we can run kernels @@ -633,9 +628,7 @@ Status InstantiatedCapturedFunction::RunInstantiated( }); f_opts.step_container = &step_container; f_opts.runner = &captured_runner_; - if (lib_->device()->device_type() != DEVICE_CPU) { - f_opts.create_rendezvous = true; - } + f_opts.create_rendezvous = ShouldCreateRendezvous(); // TODO(mrry): Add cancellation manager support to IteratorContext // so that we can cancel running map functions. The local // cancellation manager here is created so that we can run kernels @@ -688,9 +681,7 @@ void InstantiatedCapturedFunction::RunAsync( }); f_opts.step_container = step_container; f_opts.runner = ctx->runner(); - if (lib_->device()->device_type() != DEVICE_CPU) { - f_opts.create_rendezvous = true; - } + f_opts.create_rendezvous = ShouldCreateRendezvous(); // TODO(mrry): Add cancellation manager support to IteratorContext // so that we can cancel running map functions. The local // cancellation manager here is created so that we can run kernels @@ -749,6 +740,11 @@ void InstantiatedCapturedFunction::RunAsync( lib_->Run(f_opts, f_handle_, frame, std::move(callback)); } +bool InstantiatedCapturedFunction::ShouldCreateRendezvous() const { + return lib_->device()->device_type() != DEVICE_CPU || + captured_func_->is_multi_device_function(); +} + CapturedFunction::CapturedFunction( const std::shared_ptr metadata, std::vector captured_inputs) ",0,train 8476ba0486bf03a4a622410fdefa62c159fd6235,tensorflow/tensorflow,"[tf.data] Make sure rendezvous is created when running multi-device function. PiperOrigin-RevId: 254898730",captured_function.h,"@@ -95,6 +95,10 @@ class InstantiatedCapturedFunction { std::function)> runner, CapturedFunction* captured_func); + // Determines whether a rendezvous object should be created when running the + // instantiated function. + bool ShouldCreateRendezvous() const; + friend class CapturedFunction; FunctionLibraryRuntime* const lib_; ",0,train 97d7281354af43ed5fd53ebf729cea76de84acdb,tensorflow/tensorflow,"eager: Graceful failure on invalid inputs. Tests added to pywrap_tfe_test.py would fail (segmentation fault / infinite loop) without corresponding fixes to pywrap_tfe.i and pywrap_tfe_src.cc Other statements that would fail ungracefully without this fix (and with eager execution enabled) include: tf.split(value=0, num_or_size_splits=-1) tf.dynamic_partition(data=0, partitions=0, num_partitions=-1) tf.split(value=0, num_or_size_splits=1.23, num=-1) tf.unstack(value=0, num=-1) PiperOrigin-RevId: 212731927",pywrap_tfe_src.cc,"@@ -2563,13 +2563,18 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { int num_retvals = 0; for (int i = 0; i < op_def->output_arg_size(); i++) { const auto& output_arg = op_def->output_arg(i); + int delta = 1; if (!output_arg.number_attr().empty()) { - num_retvals += attr_list_sizes[output_arg.number_attr()]; + delta = attr_list_sizes[output_arg.number_attr()]; } else if (!output_arg.type_list_attr().empty()) { - num_retvals += attr_list_sizes[output_arg.type_list_attr()]; - } else { - num_retvals++; + delta = attr_list_sizes[output_arg.type_list_attr()]; + } + if (delta < 0) { + RaiseFallbackException( + ""Attributes suggest that the size of an output list is less than 0""); + return nullptr; } + num_retvals += delta; } tensorflow::gtl::InlinedVector retvals(num_retvals); ",0,train 97d7281354af43ed5fd53ebf729cea76de84acdb,tensorflow/tensorflow,"eager: Graceful failure on invalid inputs. Tests added to pywrap_tfe_test.py would fail (segmentation fault / infinite loop) without corresponding fixes to pywrap_tfe.i and pywrap_tfe_src.cc Other statements that would fail ungracefully without this fix (and with eager execution enabled) include: tf.split(value=0, num_or_size_splits=-1) tf.dynamic_partition(data=0, partitions=0, num_partitions=-1) tf.split(value=0, num_or_size_splits=1.23, num=-1) tf.unstack(value=0, num=-1) PiperOrigin-RevId: 212731927",pywrap_tfe_test.py,"@@ -21,6 +21,7 @@ from __future__ import print_function from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import backprop from tensorflow.python.eager import context +from tensorflow.python.eager import core from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -123,8 +124,8 @@ class Tests(test.TestCase): def testFastpathExecute_MixedPrecisionVariableTapeWrite(self): ctx = context.context() with backprop.GradientTape(persistent=True) as tape: - a_2_by_2 = constant_op.constant( - [[1.0, 2.0], [3.0, 4.0]], dtype=dtypes.float32) + a_2_by_2 = constant_op.constant([[1.0, 2.0], [3.0, 4.0]], + dtype=dtypes.float32) a_2_by_2_fp16 = math_ops.cast(a_2_by_2, dtype=dtypes.float16) m1 = resource_variable_ops.ResourceVariable(a_2_by_2) m2 = resource_variable_ops._MixedPrecisionVariable( @@ -233,6 +234,26 @@ class Tests(test.TestCase): pywrap_tensorflow.TFE_Py_FastPathExecute(ctx_handle, ctx.device_name, ctx_handle, None, [], a_2_by_2) + @test_util.assert_no_new_tensors + @test_util.assert_no_garbage_created + def testFastPathExecute_InvalidAttributes(self): + split_dim = constant_op.constant(0, dtype=dtypes.int32) + value = constant_op.constant([0, 1, 2, 3], dtype=dtypes.float32) + ctx = context.context() + ctx_handle = ctx._handle + with self.assertRaises(core._FallbackException): + pywrap_tensorflow.TFE_Py_FastPathExecute(ctx_handle, ctx.device_name, + ""Split"", None, None, split_dim, + value, ""num_split"", -1) + + @test_util.assert_no_new_tensors + @test_util.assert_no_garbage_created + def testInvalidNumOutputs(self): + with self.assertRaisesRegexp( + Exception, + ""Value for attr 'num_split' of -1 must be at least minimum 1""): + array_ops.split(value=[1, 2, 3], num_or_size_splits=-1) + if __name__ == ""__main__"": test.main() ",0,train 6a20128c5b9819cc09b3dc948e240d37e06aba4b,tensorflow/tensorflow,"[TF/XLA Bridge] Alias pass-through parameters when the input graph contains no TF reference variables Previously, expensive copies were required for pass-through parameters. Removing those copies is not safe in the presence of TF reference variables in the graph, so we only remove them for cases when the graph does not contain TF reference variables. PiperOrigin-RevId: 271241769",build_xla_ops_pass.cc,"@@ -472,6 +472,11 @@ Status ReplaceNodeWithXlaCompileAndXlaRun( /*resources=*/cluster_info.resource_inputs, /*must_compile=*/requires_compilation, cluster_info.function); + + bool has_ref_attr; + TF_RETURN_IF_ERROR( + GetNodeAttr(n->attrs(), kXlaHasReferenceVarsAttr, &has_ref_attr)); + xla_compile.operation.node()->AddAttr(kXlaHasReferenceVarsAttr, has_ref_attr); TF_RETURN_IF_ERROR( CopyIncomingControlEdges(g, /*from=*/n, /*to=*/xla_compile.key.node())); ",0,test 6a20128c5b9819cc09b3dc948e240d37e06aba4b,tensorflow/tensorflow,"[TF/XLA Bridge] Alias pass-through parameters when the input graph contains no TF reference variables Previously, expensive copies were required for pass-through parameters. Removing those copies is not safe in the presence of TF reference variables in the graph, so we only remove them for cases when the graph does not contain TF reference variables. PiperOrigin-RevId: 271241769",build_xla_ops_pass_test.cc,"@@ -149,8 +149,10 @@ TEST_F(BuildXlaOpsTest, ControlDepsPreserved) { TF_ASSERT_OK(root.graph()->AddFunctionLibrary(fdef_lib)); Node* call; TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), ""cluster_0"", ""C"", &call)); + call->AddAttr(kXlaHasReferenceVarsAttr, false); call->set_requested_device(kXlaDeviceName); Node* write_op = MakeWrite(root, ""write""); + write_op->AddAttr(kXlaHasReferenceVarsAttr, false); root.graph()->AddControlEdge(call, write_op); std::unique_ptr graph; @@ -191,8 +193,10 @@ TEST_F(BuildXlaOpsTest, OnNonXlaDevice) { Node* call; TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), ""cluster_0"", ""C"", &call)); TF_ASSERT_OK(root.DoShapeInference(call)); + call->AddAttr(kXlaHasReferenceVarsAttr, false); Node* write_op = MakeWrite(root, Output(call), ""write_result""); + write_op->AddAttr(kXlaHasReferenceVarsAttr, false); auto xla_compile = NodeWith(Op(""_XlaCompile""), Attr(""must_compile"", false)); auto predicated_compilation_key = @@ -226,8 +230,10 @@ TEST_F(BuildXlaOpsTest, OnXlaDevice) { TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), ""cluster_0"", ""C"", &call)); call->set_requested_device(kXlaDeviceName); TF_ASSERT_OK(root.DoShapeInference(call)); + call->AddAttr(kXlaHasReferenceVarsAttr, false); Node* write_op = MakeWrite(root, Output(call), ""write_result""); + write_op->AddAttr(kXlaHasReferenceVarsAttr, false); std::unique_ptr graph; TF_ASSERT_OK(BuildXlaOps(root, fdef_lib, &graph)); @@ -250,6 +256,7 @@ TEST_F(BuildXlaOpsTest, NoExtraMergeForEdgeToSink) { TF_ASSERT_OK(root.graph()->AddFunctionLibrary(fdef_lib)); Node* call; TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), ""cluster_0"", ""C"", &call)); + call->AddAttr(kXlaHasReferenceVarsAttr, false); std::unique_ptr graph; TF_ASSERT_OK(BuildXlaOps(root, fdef_lib, &graph)); @@ -278,6 +285,7 @@ TEST_F(BuildXlaOpsTest, NoDeviceToHostCopiesForClustersWithInt32Inputs) { TF_ASSERT_OK( MakeXlaCompiledKernel(root.graph(), ""cluster_int32"", ""C"", &call)); call->set_requested_device(kXlaDeviceName); + call->AddAttr(kXlaHasReferenceVarsAttr, false); auto var = ops::VarHandleOp(root.WithOpName(""var""), DT_INT32, TensorShape({})); ",0,test 6a20128c5b9819cc09b3dc948e240d37e06aba4b,tensorflow/tensorflow,"[TF/XLA Bridge] Alias pass-through parameters when the input graph contains no TF reference variables Previously, expensive copies were required for pass-through parameters. Removing those copies is not safe in the presence of TF reference variables in the graph, so we only remove them for cases when the graph does not contain TF reference variables. PiperOrigin-RevId: 271241769",encapsulate_subgraphs_pass.cc,"@@ -30,6 +30,7 @@ limitations under the License. #include ""tensorflow/compiler/jit/graphcycles/graphcycles.h"" #include ""tensorflow/compiler/jit/mark_for_compilation_pass.h"" #include ""tensorflow/compiler/jit/shape_inference_helpers.h"" +#include ""tensorflow/compiler/jit/xla_cluster_util.h"" #include ""tensorflow/compiler/tf2xla/const_analysis.h"" #include ""tensorflow/compiler/xla/status_macros.h"" #include ""tensorflow/core/common_runtime/device_factory.h"" @@ -61,6 +62,7 @@ const char* const kXlaNumConstantArgsAttr = ""_XlaNumConstantArgs""; const char* const kXlaNumResourceArgsAttr = ""_XlaNumResourceArgs""; const char* const kXlaHostTransferSequencerAttr = ""_xla_host_transfer_sequencer""; +const char* const kXlaHasReferenceVarsAttr = ""_XlaHasReferenceVars""; void SortControlInputs(GraphDef* gdef) { int64 num_nodes = gdef->node_size(); @@ -1311,6 +1313,14 @@ Status EncapsulateSubgraphsPass::Run( } *options.graph = std::move(graph_out); + TF_ASSIGN_OR_RETURN(absl::flat_hash_set ref_related_nodes, + GetNodesRelatedToRefVariables(**options.graph, flr)); + for (Node* node : (*options.graph)->nodes()) { + bool has_ref_vars = ref_related_nodes.contains(node); + node->AddAttr(kXlaHasReferenceVarsAttr, has_ref_vars); + VLOG(3) << ""Has ref vars = "" << has_ref_vars + << "", node: "" << node->def().SerializeAsString(); + } return Status::OK(); } ",0,test 6a20128c5b9819cc09b3dc948e240d37e06aba4b,tensorflow/tensorflow,"[TF/XLA Bridge] Alias pass-through parameters when the input graph contains no TF reference variables Previously, expensive copies were required for pass-through parameters. Removing those copies is not safe in the presence of TF reference variables in the graph, so we only remove them for cases when the graph does not contain TF reference variables. PiperOrigin-RevId: 271241769",encapsulate_subgraphs_pass.h,"@@ -91,6 +91,9 @@ extern const char* const kXlaNumConstantArgsAttr; // Name of the attribute containing the number of resource variable arguments. extern const char* const kXlaNumResourceArgsAttr; +// Name of the attribute defining whether the cluster has reference variables. +extern const char* const kXlaHasReferenceVarsAttr; + // Sorts each node's control inputs by their names. This guarantees that for two // structually equivalent GraphDefs, we get the same traversal ordering on // node's control input fields. ",0,test 6a20128c5b9819cc09b3dc948e240d37e06aba4b,tensorflow/tensorflow,"[TF/XLA Bridge] Alias pass-through parameters when the input graph contains no TF reference variables Previously, expensive copies were required for pass-through parameters. Removing those copies is not safe in the presence of TF reference variables in the graph, so we only remove them for cases when the graph does not contain TF reference variables. PiperOrigin-RevId: 271241769",encapsulate_subgraphs_pass_test.cc,"@@ -2581,5 +2581,79 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) { TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library); } +void CreateSubgraphTouchingRefVar(const Scope& s) { + Output variable = + ops::Variable(s.WithOpName(""variable""), PartialTensorShape{}, DT_FLOAT); + Output read = ops::Identity(s.WithOpName(""read_ref_var""), variable); + Output neg = ops::Negate(s.WithOpName(""negate_ref""), read); + Output add = ops::Add(s.WithOpName(""add_ref""), neg, neg); + + Output constant = + ops::Const(s.WithOpName(""constant_ref""), Input::Initializer(0.0)); + s.graph()->AddControlEdge(constant.node(), variable.node()); +} + +TEST(EncapsulateSubgraphsTest, RefVariablesMarked) { + Scope root = Scope::NewRootScope().ExitOnError(); + CreateSubgraphTouchingRefVar(root); + + auto graph = absl::make_unique(OpRegistry::Global()); + TF_ASSERT_OK(root.ToGraph(graph.get())); + + SessionOptions session_options; + session_options.env = Env::Default(); + GraphOptimizationPassOptions options; + options.session_options = &session_options; + FunctionLibraryDefinition library(OpRegistry::Global(), {}); + options.flib_def = &library; + options.graph = &graph; + + EncapsulateSubgraphsPass pass; + TF_ASSERT_OK(pass.Run(options)); + + for (const Node* node : graph->nodes()) { + bool has_ref_var; + TF_ASSERT_OK( + GetNodeAttr(node->attrs(), kXlaHasReferenceVarsAttr, &has_ref_var)); + EXPECT_TRUE(node->IsSink() || node->IsSource() || has_ref_var) + << ""All nodes apart from source and sink can access reference variable""; + } +} + +void CreateSubgraphNotTouchingRefVar(const Scope& s) { + Output constant = + ops::Const(s.WithOpName(""constant_normal""), Input::Initializer(0.0)); + Output neg = ops::Negate(s.WithOpName(""negate_normal""), constant); + Output add = ops::Add(s.WithOpName(""add_normal""), neg, neg); +} + +TEST(EncapsulateSubgraphsTest, NoRefVarsNoAttr) { + Scope root = Scope::NewRootScope().ExitOnError(); + CreateSubgraphNotTouchingRefVar(root); + + auto graph = absl::make_unique(OpRegistry::Global()); + TF_ASSERT_OK(root.ToGraph(graph.get())); + + // TODO(cheshire): reduce boilerplate for creating + // GraphOptimizationPassOptions here and elsewhere, probably using a macro. + SessionOptions session_options; + session_options.env = Env::Default(); + GraphOptimizationPassOptions options; + options.session_options = &session_options; + FunctionLibraryDefinition library(OpRegistry::Global(), {}); + options.flib_def = &library; + options.graph = &graph; + + EncapsulateSubgraphsPass pass; + TF_ASSERT_OK(pass.Run(options)); + + for (const Node* node : graph->nodes()) { + bool has_ref_var; + TF_ASSERT_OK( + GetNodeAttr(node->attrs(), kXlaHasReferenceVarsAttr, &has_ref_var)); + EXPECT_FALSE(has_ref_var) << ""The graph does not have reference variables""; + } +} + } // namespace } // namespace tensorflow ",0,test 6a20128c5b9819cc09b3dc948e240d37e06aba4b,tensorflow/tensorflow,"[TF/XLA Bridge] Alias pass-through parameters when the input graph contains no TF reference variables Previously, expensive copies were required for pass-through parameters. Removing those copies is not safe in the presence of TF reference variables in the graph, so we only remove them for cases when the graph does not contain TF reference variables. PiperOrigin-RevId: 271241769",xla_ops.cc,"@@ -18,8 +18,10 @@ limitations under the License. #include ""absl/container/flat_hash_map.h"" #include ""absl/memory/memory.h"" #include ""tensorflow/compiler/jit/defs.h"" +#include ""tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"" #include ""tensorflow/compiler/jit/flags.h"" #include ""tensorflow/compiler/jit/xla_activity_listener.h"" +#include ""tensorflow/compiler/jit/xla_cluster_util.h"" #include ""tensorflow/compiler/tf2xla/shape_util.h"" #include ""tensorflow/compiler/tf2xla/tf2xla_util.h"" #include ""tensorflow/compiler/tf2xla/xla_compiler.h"" @@ -268,7 +270,7 @@ static Status BuildCompilationCache(OpKernelContext* ctx, } static Status CompileToLocalExecutable( - OpKernelContext* ctx, const NameAttrList& function, + OpKernelContext* ctx, const NameAttrList& function, bool has_ref_vars, const XlaPlatformInfo& platform_info, absl::Span resources, absl::Span constants, bool lazy, xla::LocalClient** client, std::map* variables, @@ -313,8 +315,9 @@ static Status CompileToLocalExecutable( options.shape_representation_fn = platform_info.xla_device_metadata()->shape_representation_fn(); } - // TODO(b/138728225): Set options.alias_passthrough_params for clusters - // without ref variables. + // If reference variables are not present in the graph, we can safely alias + // passthrough parameters without performing a copy. + options.alias_passthrough_params = !has_ref_vars; std::map constant_args; for (int i : constants) { @@ -351,8 +354,8 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) { { Status s = CompileToLocalExecutable( - ctx, function_, platform_info_, resources_, constants_, /*lazy=*/false, - &client, &variables, &kernel, &executable); + ctx, function_, /*has_ref_vars=*/true, platform_info_, resources_, + constants_, /*lazy=*/false, &client, &variables, &kernel, &executable); if (!s.ok() && (platform_info_.device_type().type_string() == DEVICE_CPU || platform_info_.device_type().type_string() == DEVICE_GPU)) { // Suggest auto jit if the failure was with GPU or CPU. @@ -451,6 +454,14 @@ bool MustCompileAttr(OpKernelConstruction* ctx) { ctx->GetAttr(""must_compile"", &must_compile)); return must_compile; } + +bool HasRefVars(OpKernelConstruction* ctx) { + bool has_ref_vars; + OP_REQUIRES_OK_RETURN(ctx, false, + ctx->GetAttr(kXlaHasReferenceVarsAttr, &has_ref_vars)); + return has_ref_vars; +} + } // namespace XlaLocalLaunchOp::XlaLocalLaunchOp(OpKernelConstruction* ctx) @@ -467,7 +478,8 @@ XlaCompileOp::XlaCompileOp(OpKernelConstruction* ctx) resources_(ResourcesVector(ctx)), function_(FunctionAttr(ctx)), platform_info_(PlatformInfoFromContext(ctx)), - must_compile_(MustCompileAttr(ctx)) {} + must_compile_(MustCompileAttr(ctx)), + has_ref_vars_(HasRefVars(ctx)) {} void XlaCompileOp::Compute(OpKernelContext* ctx) { VLOG(3) << ""XlaCompileOp "" << def().name() @@ -488,7 +500,7 @@ void XlaCompileOp::Compute(OpKernelContext* ctx) { executable = nullptr; } else { Status status = CompileToLocalExecutable( - ctx, function_, platform_info_, resources_, constants_, + ctx, function_, has_ref_vars_, platform_info_, resources_, constants_, /*lazy=*/!must_compile_, &client, &variables, &kernel, &executable); if (must_compile_ || status.code() != error::UNIMPLEMENTED) { OP_REQUIRES_OK(ctx, status); ",0,test 6a20128c5b9819cc09b3dc948e240d37e06aba4b,tensorflow/tensorflow,"[TF/XLA Bridge] Alias pass-through parameters when the input graph contains no TF reference variables Previously, expensive copies were required for pass-through parameters. Removing those copies is not safe in the presence of TF reference variables in the graph, so we only remove them for cases when the graph does not contain TF reference variables. PiperOrigin-RevId: 271241769",xla_ops.h,"@@ -153,6 +153,9 @@ class XlaCompileOp : public OpKernel { const bool must_compile_; + // Whether the graph has TF reference variables. + const bool has_ref_vars_; + // cannot_compile_cluster_ is set to true if XLA returns an Unimplemented // error when compiling the cluster this _XlaCompile is supposed to compile. // If `cannot_compile_cluster_` is true then we avoid compiling this cluster ",0,test eb178237c69f8ce0cea75a42ba181dd0fbbc56a2,tensorflow/tensorflow,"Update GraphDef version to 734. PiperOrigin-RevId: 367955072 Change-Id: Iff095da3b4fd4e73eee8987938a97118cb9cce45",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 733 // Updated: 2021/4/11 +#define TF_GRAPH_DEF_VERSION 734 // Updated: 2021/4/12 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,train c2f231507c0b8b7abb8097323545f5810a208bda,tensorflow/tensorflow,"Add DT_INT8 and DT_UINT8 as supported TPU type. PiperOrigin-RevId: 322196027 Change-Id: I626d66c587ea0231cf1665b6d3349a13499b57e4",tpu_defs.h,"@@ -51,9 +51,10 @@ extern const char* const kTPUReplicateAttr; extern const char* const kOutsideCompilationAttr; // Supported types for TPUs. -static constexpr std::array kTpuAllTypes = { +static constexpr std::array kTpuAllTypes = { {DT_INT32, DT_UINT32, DT_BFLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL, - DT_COMPLEX64, DT_INT64, DT_UINT64, DT_QINT8, DT_QUINT8}}; + DT_COMPLEX64, DT_INT64, DT_UINT64, DT_QINT8, DT_QUINT8, DT_INT8, + DT_UINT8}}; } // namespace tensorflow ",0,train 7d8316fb85b21546e3df2aef701f1cfa9f92b6ba,tensorflow/tensorflow,"Add additional test cases Signed-off-by: Yong Tang ",optimizers_test.py,"@@ -250,7 +250,7 @@ class OptimizersTest(test.TestCase): self.assertAlmostEqual(var_value, 6.5, 4) self.assertEqual(global_step_value, 1) - def testGradientMultiplyTensor(self): + def testGradientMultiplyInt32Tensor(self): with self.cached_session() as session: x, var, loss, global_step = _setup_model() v = array_ops.placeholder(dtypes.float32, []) @@ -268,6 +268,24 @@ class OptimizersTest(test.TestCase): self.assertAlmostEqual(var_value, 6.5, 4) self.assertEqual(global_step_value, 1) + def testGradientMultiplyInt64Tensor(self): + with self.cached_session() as session: + x, var, loss, global_step = _setup_model() + v = array_ops.placeholder(dtypes.float64, []) + train = optimizers_lib.optimize_loss( + loss, + global_step, + learning_rate=0.1, + optimizer=""SGD"", + gradient_multipliers={var: v}) + variables.global_variables_initializer().run() + session.run(train, feed_dict={x: 5, v: 7.}) + var_value, global_step_value = session.run([var, global_step]) + # var(0) = 10, x = 5, var(0)/dx = 5, + # var(1) = var(0) - learning_rate * gradient_multiplier * var(0)/dx + self.assertAlmostEqual(var_value, 6.5, 4) + self.assertEqual(global_step_value, 1) + def testIgnoreVariablesWithNoGradients(self): _, _, loss, global_step = _setup_model() ",0,test 8412e4920296dd3df0ba1a99e4f3f783f74fcda2,tensorflow/tensorflow,"Add check for correct memory alignment to MemoryAllocation::MemoryAllocation() on 32-bit arm. This will give a reasonable error message at model build time, rather than a SIGBUS later. PiperOrigin-RevId: 262385650",allocation.cc,"@@ -87,6 +87,22 @@ bool FileCopyAllocation::valid() const { return copied_buffer_ != nullptr; } MemoryAllocation::MemoryAllocation(const void* ptr, size_t num_bytes, ErrorReporter* error_reporter) : Allocation(error_reporter, Allocation::Type::kMemory) { +#ifdef __arm__ + if ((reinterpret_cast(ptr) % 16) != 0) { + // The flat buffer schema has alignment requirements of up to 16 bytes to + // guarantee that data can be correctly accesses on 32-bit arm. The buffer + // we get must also be 16-byte aligned, otherwise the guarantee will not + // hold (potentially resulting in a SIGBUS).. + // + // Note that 64-bit ARM may also suffer a performance impact, but no crash - + // that case is not checked. + error_reporter->Report(""The supplied buffer is not 16-byte aligned""); + buffer_ = nullptr; + buffer_size_bytes_ = 0; + return; + } +#endif // __arm__ + buffer_ = ptr; buffer_size_bytes_ = num_bytes; } ",0,train 8412e4920296dd3df0ba1a99e4f3f783f74fcda2,tensorflow/tensorflow,"Add check for correct memory alignment to MemoryAllocation::MemoryAllocation() on 32-bit arm. This will give a reasonable error message at model build time, rather than a SIGBUS later. PiperOrigin-RevId: 262385650",model_test.cc,"@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include ""tensorflow/lite/model.h"" + #include #include #include @@ -20,7 +22,8 @@ limitations under the License. #include #include -#include ""tensorflow/lite/model.h"" +#include +#include #include #include ""tensorflow/lite/core/api/error_reporter.h"" @@ -72,6 +75,44 @@ TEST(BasicFlatBufferModel, TestNonExistantFiles) { ASSERT_TRUE(!FlatBufferModel::BuildFromFile(""/tmp/tflite_model_1234"")); } +TEST(BasicFlatBufferModel, TestBufferAlignment) { + // On 32-bit ARM buffers are required to be 16-byte aligned, on other + // platforms there is no alignment requirement. + const uintptr_t kAlignment = 16; + const uintptr_t kAlignmentBits = kAlignment - 1; + + // Use real model data so that we can be sure error is only from the + // alignment requirement and not from bad data. + std::ifstream fp(""tensorflow/lite/testdata/empty_model.bin""); + ASSERT_TRUE(fp.good()); + std::string empty_model_data((std::istreambuf_iterator(fp)), + std::istreambuf_iterator()); + auto free_chars = [](char* p) { free(p); }; + std::unique_ptr buffer( + reinterpret_cast(malloc(empty_model_data.size() + kAlignment)), + free_chars); + + // Check that aligned buffer works (no other errors in the test). + char* aligned = reinterpret_cast( + (reinterpret_cast(buffer.get()) + kAlignment) & + ~kAlignmentBits); + memcpy(aligned, empty_model_data.c_str(), empty_model_data.size()); + EXPECT_TRUE( + FlatBufferModel::BuildFromBuffer(aligned, empty_model_data.size())); + + // Check unaligned buffer handling. + char* unaligned = + reinterpret_cast(reinterpret_cast(buffer.get()) | 0x1); + memcpy(unaligned, empty_model_data.c_str(), empty_model_data.size()); +#ifdef __arm__ + EXPECT_FALSE( + FlatBufferModel::BuildFromBuffer(unaligned, empty_model_data.size())); +#else // !__arm__ + EXPECT_TRUE( + FlatBufferModel::BuildFromBuffer(unaligned, empty_model_data.size())); +#endif // __arm__ +} + // Make sure a model with nothing in it loads properly. TEST(BasicFlatBufferModel, TestEmptyModelsAndNullDestination) { auto model = FlatBufferModel::BuildFromFile( @@ -248,15 +289,13 @@ class FakeVerifier : public tflite::TfLiteVerifier { TEST(BasicFlatBufferModel, TestWithTrueVerifier) { FakeVerifier verifier(true); ASSERT_TRUE(FlatBufferModel::VerifyAndBuildFromFile( - ""tensorflow/lite/testdata/test_model.bin"", - &verifier)); + ""tensorflow/lite/testdata/test_model.bin"", &verifier)); } TEST(BasicFlatBufferModel, TestWithFalseVerifier) { FakeVerifier verifier(false); ASSERT_FALSE(FlatBufferModel::VerifyAndBuildFromFile( - ""tensorflow/lite/testdata/test_model.bin"", - &verifier)); + ""tensorflow/lite/testdata/test_model.bin"", &verifier)); } TEST(BasicFlatBufferModel, TestWithNullVerifier) { @@ -269,8 +308,7 @@ TEST(BasicFlatBufferModel, TestWithNullVerifier) { TEST(BasicFlatBufferModel, TestCustomErrorReporter) { TestErrorReporter reporter; auto model = FlatBufferModel::BuildFromFile( - ""tensorflow/lite/testdata/empty_model.bin"", - &reporter); + ""tensorflow/lite/testdata/empty_model.bin"", &reporter); ASSERT_TRUE(model); std::unique_ptr interpreter; ",0,train bf64fc285e88d36bb82f80757c4a1afd722347e0,tensorflow/tensorflow,"Add float16 support for NonMaxSuppressionV{2,3,4} This fix tries to address the issue raised in 20199 where there was no float16 support for NonMaxSuppressionV2. As NonMaxSuppressionV2 is the earlier versions of API and there are newer versions of NonMaxSuppression: NonMaxSuppressionV2, NonMaxSuppressionV3, NonMaxSuppressionV4, This fix exposes the float16 support to all of the above. (Note in the master the default version used is NonMaxSuppressionV3) This fix fixes 20199. Signed-off-by: Yong Tang ",non_max_suppression_op.cc,"@@ -75,28 +75,29 @@ static inline void ParseAndCheckBoxSizes(OpKernelContext* context, } // Return intersection-over-union overlap between boxes i and j -static inline float IOUGreaterThanThreshold( - typename TTypes::ConstTensor boxes, int i, int j, - float iou_threshold) { - const float ymin_i = std::min(boxes(i, 0), boxes(i, 2)); - const float xmin_i = std::min(boxes(i, 1), boxes(i, 3)); - const float ymax_i = std::max(boxes(i, 0), boxes(i, 2)); - const float xmax_i = std::max(boxes(i, 1), boxes(i, 3)); - const float ymin_j = std::min(boxes(j, 0), boxes(j, 2)); - const float xmin_j = std::min(boxes(j, 1), boxes(j, 3)); - const float ymax_j = std::max(boxes(j, 0), boxes(j, 2)); - const float xmax_j = std::max(boxes(j, 1), boxes(j, 3)); - const float area_i = (ymax_i - ymin_i) * (xmax_i - xmin_i); - const float area_j = (ymax_j - ymin_j) * (xmax_j - xmin_j); - if (area_i <= 0 || area_j <= 0) return 0.0; - const float intersection_ymin = std::max(ymin_i, ymin_j); - const float intersection_xmin = std::max(xmin_i, xmin_j); - const float intersection_ymax = std::min(ymax_i, ymax_j); - const float intersection_xmax = std::min(xmax_i, xmax_j); - const float intersection_area = - std::max(intersection_ymax - intersection_ymin, 0.0) * - std::max(intersection_xmax - intersection_xmin, 0.0); - const float iou = intersection_area / (area_i + area_j - intersection_area); +template +static inline bool IOUGreaterThanThreshold( + typename TTypes::ConstTensor boxes, int i, int j, + T iou_threshold) { + const T ymin_i = std::min(boxes(i, 0), boxes(i, 2)); + const T xmin_i = std::min(boxes(i, 1), boxes(i, 3)); + const T ymax_i = std::max(boxes(i, 0), boxes(i, 2)); + const T xmax_i = std::max(boxes(i, 1), boxes(i, 3)); + const T ymin_j = std::min(boxes(j, 0), boxes(j, 2)); + const T xmin_j = std::min(boxes(j, 1), boxes(j, 3)); + const T ymax_j = std::max(boxes(j, 0), boxes(j, 2)); + const T xmax_j = std::max(boxes(j, 1), boxes(j, 3)); + const T area_i = (ymax_i - ymin_i) * (xmax_i - xmin_i); + const T area_j = (ymax_j - ymin_j) * (xmax_j - xmin_j); + if (area_i <= static_cast(0) || area_j <= static_cast(0)) return 0; + const T intersection_ymin = std::max(ymin_i, ymin_j); + const T intersection_xmin = std::max(xmin_i, xmin_j); + const T intersection_ymax = std::min(ymax_i, ymax_j); + const T intersection_xmax = std::min(xmax_i, xmax_j); + const T intersection_area = + std::max(intersection_ymax - intersection_ymin, static_cast(0.0)) * + std::max(intersection_xmax - intersection_xmin, static_cast(0.0)); + const T iou = intersection_area / (area_i + area_j - intersection_area); return iou > iou_threshold; } @@ -106,11 +107,12 @@ static inline bool OverlapsGreaterThanThreshold( return overlaps(i, j) > overlap_threshold; } +template static inline std::function CreateIOUSuppressCheckFn( const Tensor& boxes, float threshold) { - typename TTypes::ConstTensor boxes_data = boxes.tensor(); - return std::bind(&IOUGreaterThanThreshold, boxes_data, std::placeholders::_1, - std::placeholders::_2, threshold); + typename TTypes::ConstTensor boxes_data = boxes.tensor(); + return std::bind(&IOUGreaterThanThreshold, boxes_data, std::placeholders::_1, + std::placeholders::_2, static_cast(threshold)); } static inline std::function CreateOverlapsSuppressCheckFn( @@ -121,6 +123,7 @@ static inline std::function CreateOverlapsSuppressCheckFn( std::placeholders::_1, std::placeholders::_2, threshold); } +template void DoNonMaxSuppressionOp( OpKernelContext* context, const Tensor& scores, int num_boxes, const Tensor& max_output_size, const float score_threshold, @@ -128,13 +131,13 @@ void DoNonMaxSuppressionOp( bool pad_to_max_output_size = false, int* ptr_num_valid_outputs = nullptr) { const int output_size = max_output_size.scalar()(); - std::vector scores_data(num_boxes); - std::copy_n(scores.flat().data(), num_boxes, scores_data.begin()); + std::vector scores_data(num_boxes); + std::copy_n(scores.flat().data(), num_boxes, scores_data.begin()); // Data structure for selection candidate in NMS. struct Candidate { int box_index; - float score; + T score; }; auto cmp = [](const Candidate bs_i, const Candidate bs_j) { @@ -143,13 +146,13 @@ void DoNonMaxSuppressionOp( std::priority_queue, decltype(cmp)> candidate_priority_queue(cmp); for (int i = 0; i < scores_data.size(); ++i) { - if (scores_data[i] > score_threshold) { + if (scores_data[i] > static_cast(score_threshold)) { candidate_priority_queue.emplace(Candidate({i, scores_data[i]})); } } std::vector selected; - std::vector selected_scores; + std::vector selected_scores; Candidate next_candidate; while (selected.size() < output_size && !candidate_priority_queue.empty()) { @@ -176,7 +179,7 @@ void DoNonMaxSuppressionOp( int num_valid_outputs = selected.size(); if (pad_to_max_output_size) { selected.resize(output_size, 0); - selected_scores.resize(output_size, 0); + selected_scores.resize(output_size, static_cast(0)); } if (ptr_num_valid_outputs) { *ptr_num_valid_outputs = num_valid_outputs; @@ -221,10 +224,10 @@ class NonMaxSuppressionOp : public OpKernel { if (!context->status().ok()) { return; } - auto suppress_check_fn = CreateIOUSuppressCheckFn(boxes, iou_threshold_); + auto suppress_check_fn = CreateIOUSuppressCheckFn(boxes, iou_threshold_); const float score_threshold_val = std::numeric_limits::lowest(); - DoNonMaxSuppressionOp(context, scores, num_boxes, max_output_size, + DoNonMaxSuppressionOp(context, scores, num_boxes, max_output_size, score_threshold_val, suppress_check_fn); } @@ -232,7 +235,7 @@ class NonMaxSuppressionOp : public OpKernel { float iou_threshold_; }; -template +template class NonMaxSuppressionV2Op : public OpKernel { public: explicit NonMaxSuppressionV2Op(OpKernelConstruction* context) @@ -264,10 +267,10 @@ class NonMaxSuppressionV2Op : public OpKernel { if (!context->status().ok()) { return; } - auto suppress_check_fn = CreateIOUSuppressCheckFn(boxes, iou_threshold_val); + auto suppress_check_fn = CreateIOUSuppressCheckFn(boxes, iou_threshold_val); const float score_threshold_val = std::numeric_limits::lowest(); - DoNonMaxSuppressionOp(context, scores, num_boxes, max_output_size, + DoNonMaxSuppressionOp(context, scores, num_boxes, max_output_size, score_threshold_val, suppress_check_fn); } }; @@ -325,7 +328,7 @@ class NonMaxSuppressionV3V4Base : public OpKernel { float score_threshold_val_; }; -template +template class NonMaxSuppressionV3Op : public NonMaxSuppressionV3V4Base { public: explicit NonMaxSuppressionV3Op(OpKernelConstruction* context) @@ -334,14 +337,14 @@ class NonMaxSuppressionV3Op : public NonMaxSuppressionV3V4Base { protected: void DoComputeAndPostProcess(OpKernelContext* context) override { auto suppress_check_fn = - CreateIOUSuppressCheckFn(boxes_, iou_threshold_val_); + CreateIOUSuppressCheckFn(boxes_, iou_threshold_val_); - DoNonMaxSuppressionOp(context, scores_, num_boxes_, max_output_size_, + DoNonMaxSuppressionOp(context, scores_, num_boxes_, max_output_size_, score_threshold_val_, suppress_check_fn); } }; -template +template class NonMaxSuppressionV4Op : public NonMaxSuppressionV3V4Base { public: explicit NonMaxSuppressionV4Op(OpKernelConstruction* context) @@ -353,10 +356,10 @@ class NonMaxSuppressionV4Op : public NonMaxSuppressionV3V4Base { protected: void DoComputeAndPostProcess(OpKernelContext* context) override { auto suppress_check_fn = - CreateIOUSuppressCheckFn(boxes_, iou_threshold_val_); + CreateIOUSuppressCheckFn(boxes_, iou_threshold_val_); int num_valid_outputs; - DoNonMaxSuppressionOp(context, scores_, num_boxes_, max_output_size_, + DoNonMaxSuppressionOp(context, scores_, num_boxes_, max_output_size_, score_threshold_val_, suppress_check_fn, pad_to_max_output_size_, &num_valid_outputs); @@ -413,7 +416,7 @@ class NonMaxSuppressionWithOverlapsOp : public OpKernel { auto suppress_check_fn = CreateOverlapsSuppressCheckFn(overlaps, overlap_threshold_val); - DoNonMaxSuppressionOp(context, scores, num_boxes, max_output_size, + DoNonMaxSuppressionOp(context, scores, num_boxes, max_output_size, score_threshold_val, suppress_check_fn); } }; @@ -421,14 +424,20 @@ class NonMaxSuppressionWithOverlapsOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppression"").Device(DEVICE_CPU), NonMaxSuppressionOp); -REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV2"").Device(DEVICE_CPU), - NonMaxSuppressionV2Op); +REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV2"").TypeConstraint(""T"").Device(DEVICE_CPU), + NonMaxSuppressionV2Op); +REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV2"").TypeConstraint(""T"").Device(DEVICE_CPU), + NonMaxSuppressionV2Op); -REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV3"").Device(DEVICE_CPU), - NonMaxSuppressionV3Op); +REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV3"").TypeConstraint(""T"").Device(DEVICE_CPU), + NonMaxSuppressionV3Op); +REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV3"").TypeConstraint(""T"").Device(DEVICE_CPU), + NonMaxSuppressionV3Op); -REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV4"").Device(DEVICE_CPU), - NonMaxSuppressionV4Op); +REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV4"").TypeConstraint(""T"").Device(DEVICE_CPU), + NonMaxSuppressionV4Op); +REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV4"").TypeConstraint(""T"").Device(DEVICE_CPU), + NonMaxSuppressionV4Op); REGISTER_KERNEL_BUILDER( Name(""NonMaxSuppressionWithOverlaps"").Device(DEVICE_CPU), ",0,test bf64fc285e88d36bb82f80757c4a1afd722347e0,tensorflow/tensorflow,"Add float16 support for NonMaxSuppressionV{2,3,4} This fix tries to address the issue raised in 20199 where there was no float16 support for NonMaxSuppressionV2. As NonMaxSuppressionV2 is the earlier versions of API and there are newer versions of NonMaxSuppression: NonMaxSuppressionV2, NonMaxSuppressionV3, NonMaxSuppressionV4, This fix exposes the float16 support to all of the above. (Note in the master the default version used is NonMaxSuppressionV3) This fix fixes 20199. Signed-off-by: Yong Tang ",image_ops.cc,"@@ -683,11 +683,12 @@ REGISTER_OP(""NonMaxSuppression"") }); REGISTER_OP(""NonMaxSuppressionV2"") - .Input(""boxes: float"") - .Input(""scores: float"") + .Input(""boxes: T"") + .Input(""scores: T"") .Input(""max_output_size: int32"") .Input(""iou_threshold: float"") .Output(""selected_indices: int32"") + .Attr(""T: {half, float}"") .SetShapeFn([](InferenceContext* c) { // Get inputs and validate ranks. ShapeHandle boxes; @@ -711,22 +712,24 @@ REGISTER_OP(""NonMaxSuppressionV2"") }); REGISTER_OP(""NonMaxSuppressionV3"") - .Input(""boxes: float"") - .Input(""scores: float"") + .Input(""boxes: T"") + .Input(""scores: T"") .Input(""max_output_size: int32"") .Input(""iou_threshold: float"") .Input(""score_threshold: float"") .Output(""selected_indices: int32"") + .Attr(""T: {half, float}"") .SetShapeFn(NMSShapeFn); REGISTER_OP(""NonMaxSuppressionV4"") - .Input(""boxes: float"") - .Input(""scores: float"") + .Input(""boxes: T"") + .Input(""scores: T"") .Input(""max_output_size: int32"") .Input(""iou_threshold: float"") .Input(""score_threshold: float"") .Output(""selected_indices: int32"") .Output(""valid_outputs: int32"") + .Attr(""T: {half, float}"") .Attr(""pad_to_max_output_size: bool = false"") .SetShapeFn([](InferenceContext* c) { TF_RETURN_IF_ERROR(NMSShapeFn(c)); ",0,test 631f2b77dc22cf2e1b9a41d0c7518b32fe02e61b,tensorflow/tensorflow,"Add api documentation for `tf.io.read_file`. PiperOrigin-RevId: 363707890 Change-Id: I0adfd60983bf38c312042de1d73eae1ec4c737e2",io_ops.py,"@@ -35,6 +35,7 @@ from tensorflow.python.ops import gen_io_ops from tensorflow.python.ops.gen_io_ops import * # pylint: enable=wildcard-import from tensorflow.python.util import deprecation +from tensorflow.python.util import dispatch as _dispatch from tensorflow.python.util.tf_export import tf_export @@ -96,6 +97,47 @@ def _restore_slice(file_pattern, tensor_name, shape_and_slice, tensor_type, preferred_shard, name=name) +@_dispatch.add_dispatch_list +@tf_export(""io.read_file"", v1=[""io.read_file"", ""read_file""]) +def read_file(filename, name=None): + """"""Reads the contents of file. + + This operation returns a tensor with the entire contents of the input + filename. It does not do any parsing, it just returns the contents as + they are. Usually, this is the first step in the input pipeline. + + Example: + + >>> with open(""/tmp/file.txt"", ""w"") as f: + ... f.write(""asdf"") + ... + 4 + >>> tf.io.read_file(""/tmp/file.txt"") + + + Example of using the op in a function to read an image, decode it and reshape + the tensor containing the pixel data: + + >>> @tf.function + ... def load_image(filename): + ... raw = tf.io.read_file(filename) + ... image = tf.image.decode_png(raw, channels=3) + ... # the `print` executes during tracing. + ... print(""Initial shape: "", image.shape) + ... image.set_shape([28, 28, 3]) + ... print(""Final shape: "", image.shape) + ... return image + + Args: + filename: string. filename to read from. + name: string. Optional name for the op. + + Returns: + A tensor of dtype ""string"", with the file contents. + """""" + return gen_io_ops.read_file(filename, name) + + @tf_export(v1=[""ReaderBase""]) class ReaderBase(object): """"""Base class for different Reader types, that produce a record every step. ",0,train fa4b80602bbeb6f2d9b94f22b537643b9f482a20,tensorflow/tensorflow,"Shuffle passes in kernel generator to do partial bufferization. With this change, we first bufferize only HLO operations, optimize those and then lower and bufferize shape computations. PiperOrigin-RevId: 343062899 Change-Id: Ibc70a1f623f3c71c06478e75ef869fde3eecc741",kernel_creator.cc,"@@ -82,31 +82,19 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only, mlir::kernel_gen::transforms::CreateMaterializeBroadcastsPass()); pm.addNestedPass( mlir::kernel_gen::transforms::CreateUnfuseBatchNormPass()); - pm.addPass(mlir::mhlo::createLegalizeToLhloPass()); - // Moving `AllocOp`s and inserting missing `DeallocOp`s - pm.addNestedPass(::mlir::createBufferHoistingPass()); - pm.addNestedPass(::mlir::createBufferDeallocationPass()); - pm.addNestedPass(mlir::createCopyRemovalPass()); - pm.addPass(mlir::createCanonicalizerPass()); - pm.addPass(mlir::kernel_gen::transforms::CreateShapeToDescriptorsPass()); } else { pm.addNestedPass(mlir::mhlo::createLegalizeTFPass( /*allow_partial_conversion=*/false, /*legalize_chlo=*/false)); pm.addNestedPass(mlir::createTransformUnrankedHloPass()); pm.addNestedPass(mlir::mhlo::createChloLegalizeToHloPass()); pm.addNestedPass(mlir::createCanonicalizerPass()); - pm.addNestedPass(mlir::createCSEPass()); - pm.addPass(mlir::kernel_gen::transforms::CreateShapeToDescriptorsPass()); - // Clean up the IR created above. In particular, operations on descriptors - // are simplified here. - pm.addPass(mlir::createCSEPass()); - pm.addPass(mlir::kernel_gen::transforms::CreateBufferizePass()); - pm.addNestedPass( - mlir::kernel_gen::transforms::CreateParallelLoopsToSequential()); } + // Legalize only hlo operations to lhlo, keep the rest as tensors. + pm.addPass(mlir::kernel_gen::transforms::CreateHloBufferizePass()); // Clean up the IR for further processing. pm.addPass(mlir::createCanonicalizerPass()); + pm.addNestedPass(mlir::createCSEPass()); // We have to anticipate later unrolling in tiling to make sure that we get // the requested tiling after unrolling. Compute the new tiling here if // needed. @@ -160,7 +148,23 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only, } // Greedily map the remaining loop to GPU hardware dimensions. pm.addNestedPass<::mlir::FuncOp>(xla::mlir_gpu::createMapParallelLoopsPass()); - // Apply the mapping. + + // Now lower the shape computations, bufferize all remaining ops and insert + // deallocs. + pm.addNestedPass(::mlir::createBufferHoistingPass()); + pm.addNestedPass(mlir::createCopyRemovalPass()); + pm.addPass(mlir::createCanonicalizerPass()); + pm.addPass(mlir::kernel_gen::transforms::CreateShapeToDescriptorsPass()); + pm.addPass(mlir::createCanonicalizerPass()); + pm.addNestedPass(mlir::createCSEPass()); + pm.addPass(mlir::kernel_gen::transforms::CreateFinalBufferizePass()); + pm.addNestedPass(mlir::createPromoteBuffersToStackPass(64)); + // TODO(herhut): Enabled this to avoid leaks once fixed. + // pm.addNestedPass(::mlir::createBufferDeallocationPass()); + + // Apply the mapping and go to GPU. We cannot do this earlier due to missing + // interfaces on the GPU dialect. + // TODO(herhut) Implement interfaces. pm.addNestedPass<::mlir::FuncOp>(mlir::createParallelLoopToGpuPass()); // Some basic cleanup. @@ -190,7 +194,9 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only, mlir::kernel_gen::transforms::CreateEmbedMemRefPrintsPass()); } pm.addNestedPass<::mlir::FuncOp>(::mlir::createCanonicalizerPass()); - + // TODO(herhut): Remove this pass once the LowerToCFG pass can handle it. + pm.addNestedPass( + mlir::kernel_gen::transforms::CreateParallelLoopsToSequential()); pm.addPass(::mlir::createLowerToCFGPass()); // Map allocs, asserts, etc. to the tensorflow framework. pm.addPass(mlir::kernel_gen::tf_framework::CreateEmbedTFFrameworkPass()); ",0,train fa4b80602bbeb6f2d9b94f22b537643b9f482a20,tensorflow/tensorflow,"Shuffle passes in kernel generator to do partial bufferization. With this change, we first bufferize only HLO operations, optimize those and then lower and bufferize shape computations. PiperOrigin-RevId: 343062899 Change-Id: Ibc70a1f623f3c71c06478e75ef869fde3eecc741",bufferize.cc,"@@ -21,6 +21,7 @@ limitations under the License. #include ""mlir/Dialect/StandardOps/IR/Ops.h"" // from @llvm-project #include ""mlir/IR/Attributes.h"" // from @llvm-project #include ""mlir/IR/BlockAndValueMapping.h"" // from @llvm-project +#include ""mlir/IR/StandardTypes.h"" // from @llvm-project #include ""mlir/Transforms/DialectConversion.h"" // from @llvm-project #include ""tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h"" ",0,train fa4b80602bbeb6f2d9b94f22b537643b9f482a20,tensorflow/tensorflow,"Shuffle passes in kernel generator to do partial bufferization. With this change, we first bufferize only HLO operations, optimize those and then lower and bufferize shape computations. PiperOrigin-RevId: 343062899 Change-Id: Ibc70a1f623f3c71c06478e75ef869fde3eecc741",bufferize_pass.cc,"@@ -18,6 +18,8 @@ limitations under the License. #include +#include ""llvm/ADT/STLExtras.h"" +#include ""mlir/Dialect/Affine/IR/AffineOps.h"" // from @llvm-project #include ""mlir/Dialect/SCF/SCF.h"" // from @llvm-project #include ""mlir/Dialect/SCF/Transforms.h"" // from @llvm-project #include ""mlir/Dialect/Shape/IR/Shape.h"" // from @llvm-project @@ -49,6 +51,37 @@ namespace { #define GEN_PASS_CLASSES #include ""tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc"" +struct HloBufferizePass : public HloBufferizePassBase { + // TODO(b/173201243): Move to tablegen. + void getDependentDialects(DialectRegistry& registry) const override { + registry.insert(); + } + + public: + void runOnOperation() override { + OwningRewritePatternList patterns; + auto& context = getContext(); + ConversionTarget target(context); + target.addLegalDialect(); + target.addLegalDialect(); + target.addIllegalDialect(); + + BufferizeTypeConverter converter; + // Configure bufferize pattern for functions and lhlo. + mhlo::populateHLOToLHLOConversionPattern(&context, &converter, &patterns); + + // Configure legality and structural patterns. + populateBufferizeMaterializationLegality(target); + populateShapeStructuralTypeConversionsAndLegality(&context, converter, + patterns, target); + scf::populateSCFStructuralTypeConversionsAndLegality(&context, converter, + patterns, target); + if (failed(applyPartialConversion(getOperation(), target, + std::move(patterns)))) + signalPassFailure(); + } +}; + // TODO(herhut) : This could become a real pattern in bufferize pass. What we // would need to do is insert a copy to model the semantics correctly. The same // is true for the TensorLoad pattern that is already in there. Then buffer @@ -71,28 +104,36 @@ class UnrankedTensorStoreTestOnlyPattern } }; -struct BufferizePass : public BufferizePassBase { +struct FinalBufferizePass : public FinalBufferizePassBase { + // TODO(b/173201243): Move to tablegen. void getDependentDialects(DialectRegistry& registry) const override { - registry.insert(); + registry.insert(); } public: void runOnOperation() override { auto& context = getContext(); ConversionTarget target(context); - target.addLegalDialect(); - target.addLegalOp(); + target.addLegalDialect(); + target.addLegalOp(); target.addIllegalDialect(); target.addIllegalOp(); + // Certain operations are no longer legal on tensors but otherwise are. + target.addDynamicallyLegalOp([&](Operation* op) { + return llvm::none_of(op->getResultTypes(), + [](Type t) { return t.isa(); }); + }); target.addDynamicallyLegalOp([&](TensorStoreOp op) { return !op.tensor().getType().isa(); }); BufferizeTypeConverter converter; + // TODO(herhut): Move this legality configuration to bufferize itself? auto typesAreLegal = [&converter](Operation* op) { return converter.isLegal(op->getOperandTypes()) && converter.isLegal(op->getResultTypes()); @@ -111,6 +152,8 @@ struct BufferizePass : public BufferizePassBase { populateFuncOpTypeConversionPattern(patterns, &context, converter); populateCallOpTypeConversionPattern(patterns, &context, converter); populateStdBufferizePatterns(&context, converter, patterns); + populateEliminateBufferizeMaterializationsPatterns(&context, converter, + patterns); populateExtraStdBufferizePattern(&context, &converter, &patterns); populateShapeStructuralTypeConversionsAndLegality(&context, converter, patterns, target); @@ -127,8 +170,12 @@ struct BufferizePass : public BufferizePassBase { } // namespace -std::unique_ptr > CreateBufferizePass() { - return std::make_unique(); +std::unique_ptr > CreateHloBufferizePass() { + return std::make_unique(); +} + +std::unique_ptr > CreateFinalBufferizePass() { + return std::make_unique(); } } // namespace transforms ",0,train fa4b80602bbeb6f2d9b94f22b537643b9f482a20,tensorflow/tensorflow,"Shuffle passes in kernel generator to do partial bufferization. With this change, we first bufferize only HLO operations, optimize those and then lower and bufferize shape computations. PiperOrigin-RevId: 343062899 Change-Id: Ibc70a1f623f3c71c06478e75ef869fde3eecc741",passes.h,"@@ -47,9 +47,13 @@ std::unique_ptr > CreateTFKernelToLLVMPass(); // using memref descriptors. std::unique_ptr > CreateShapeToDescriptorsPass(); -// Pass to tranform computations on values to their corresponding parts on -// buffers. -std::unique_ptr > CreateBufferizePass(); +// Pass to tranform hlo-level computations on values to their corresponding +// parts on buffers. +std::unique_ptr> CreateHloBufferizePass(); + +// Pass to tranform late-dialect level computations (essentially all non-hlo +// dialects) on values to their corresponding parts on buffers. +std::unique_ptr> CreateFinalBufferizePass(); // Pass to materialize broadcasts. std::unique_ptr CreateMaterializeBroadcastsPass(); ",0,train 94e7e37a60e56464cedee82125691f2ba7b9be22,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2019-10-21 PiperOrigin-RevId: 275799990 Change-Id: I11e805303876cab4e0a47e4427a4fdcfb74706f2",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 10, 20) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 10, 21) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train 519189837b77181137505bf83054ddd962600f9b,tensorflow/tensorflow,"Making the tf.name_scope blocks related to the factor and weight vars configurable. By default they will not be scoped. PiperOrigin-RevId: 198759754",factorization_ops.py,"@@ -197,7 +197,8 @@ class WALSModel(object): row_weights=1, col_weights=1, use_factors_weights_cache=True, - use_gramian_cache=True): + use_gramian_cache=True, + use_scoped_vars=False): """"""Creates model for WALS matrix factorization. Args: @@ -239,6 +240,8 @@ class WALSModel(object): weights cache to take effect. use_gramian_cache: When True, the Gramians will be cached on the workers before the updates start. Defaults to True. + use_scoped_vars: When True, the factor and weight vars will also be nested + in a tf.name_scope. """""" self._input_rows = input_rows self._input_cols = input_cols @@ -251,18 +254,36 @@ class WALSModel(object): regularization * linalg_ops.eye(self._n_components) if regularization is not None else None) assert (row_weights is None) == (col_weights is None) - self._row_weights = WALSModel._create_weights( - row_weights, self._input_rows, self._num_row_shards, ""row_weights"") - self._col_weights = WALSModel._create_weights( - col_weights, self._input_cols, self._num_col_shards, ""col_weights"") self._use_factors_weights_cache = use_factors_weights_cache self._use_gramian_cache = use_gramian_cache - self._row_factors = self._create_factors( - self._input_rows, self._n_components, self._num_row_shards, row_init, - ""row_factors"") - self._col_factors = self._create_factors( - self._input_cols, self._n_components, self._num_col_shards, col_init, - ""col_factors"") + + if use_scoped_vars: + with ops.name_scope(""row_weights""): + self._row_weights = WALSModel._create_weights( + row_weights, self._input_rows, self._num_row_shards, ""row_weights"") + with ops.name_scope(""col_weights""): + self._col_weights = WALSModel._create_weights( + col_weights, self._input_cols, self._num_col_shards, ""col_weights"") + with ops.name_scope(""row_factors""): + self._row_factors = self._create_factors( + self._input_rows, self._n_components, self._num_row_shards, + row_init, ""row_factors"") + with ops.name_scope(""col_factors""): + self._col_factors = self._create_factors( + self._input_cols, self._n_components, self._num_col_shards, + col_init, ""col_factors"") + else: + self._row_weights = WALSModel._create_weights( + row_weights, self._input_rows, self._num_row_shards, ""row_weights"") + self._col_weights = WALSModel._create_weights( + col_weights, self._input_cols, self._num_col_shards, ""col_weights"") + self._row_factors = self._create_factors( + self._input_rows, self._n_components, self._num_row_shards, row_init, + ""row_factors"") + self._col_factors = self._create_factors( + self._input_cols, self._n_components, self._num_col_shards, col_init, + ""col_factors"") + self._row_gramian = self._create_gramian(self._n_components, ""row_gramian"") self._col_gramian = self._create_gramian(self._n_components, ""col_gramian"") with ops.name_scope(""row_prepare_gramian""): @@ -313,37 +334,36 @@ class WALSModel(object): @classmethod def _create_factors(cls, rows, cols, num_shards, init, name): """"""Helper function to create row and column factors."""""" - with ops.name_scope(name): - if callable(init): - init = init() - if isinstance(init, list): - assert len(init) == num_shards - elif isinstance(init, str) and init == ""random"": - pass - elif num_shards == 1: - init = [init] - sharded_matrix = [] - sizes = cls._shard_sizes(rows, num_shards) - assert len(sizes) == num_shards - - def make_initializer(i, size): - - def initializer(): - if init == ""random"": - return random_ops.random_normal([size, cols]) - else: - return init[i] + if callable(init): + init = init() + if isinstance(init, list): + assert len(init) == num_shards + elif isinstance(init, str) and init == ""random"": + pass + elif num_shards == 1: + init = [init] + sharded_matrix = [] + sizes = cls._shard_sizes(rows, num_shards) + assert len(sizes) == num_shards + + def make_initializer(i, size): - return initializer + def initializer(): + if init == ""random"": + return random_ops.random_normal([size, cols]) + else: + return init[i] - for i, size in enumerate(sizes): - var_name = ""%s_shard_%d"" % (name, i) - var_init = make_initializer(i, size) - sharded_matrix.append( - variable_scope.variable( - var_init, dtype=dtypes.float32, name=var_name)) + return initializer - return sharded_matrix + for i, size in enumerate(sizes): + var_name = ""%s_shard_%d"" % (name, i) + var_init = make_initializer(i, size) + sharded_matrix.append( + variable_scope.variable( + var_init, dtype=dtypes.float32, name=var_name)) + + return sharded_matrix @classmethod def _create_weights(cls, wt_init, num_wts, num_shards, name): @@ -384,26 +404,25 @@ class WALSModel(object): sizes = cls._shard_sizes(num_wts, num_shards) assert len(sizes) == num_shards - with ops.name_scope(name): - def make_wt_initializer(i, size): + def make_wt_initializer(i, size): - def initializer(): - if init_mode == ""scalar"": - return wt_init * array_ops.ones([size]) - else: - return wt_init[i] + def initializer(): + if init_mode == ""scalar"": + return wt_init * array_ops.ones([size]) + else: + return wt_init[i] - return initializer + return initializer - sharded_weight = [] - for i, size in enumerate(sizes): - var_name = ""%s_shard_%d"" % (name, i) - var_init = make_wt_initializer(i, size) - sharded_weight.append( - variable_scope.variable( - var_init, dtype=dtypes.float32, name=var_name)) + sharded_weight = [] + for i, size in enumerate(sizes): + var_name = ""%s_shard_%d"" % (name, i) + var_init = make_wt_initializer(i, size) + sharded_weight.append( + variable_scope.variable( + var_init, dtype=dtypes.float32, name=var_name)) - return sharded_weight + return sharded_weight @staticmethod def _create_gramian(n_components, name): ",0,train afab9ac5103929ad4d3d523021308ca650457ba5,tensorflow/tensorflow,Changed example and output,image_ops_impl.py,"@@ -3252,24 +3252,19 @@ def rgb_to_yuv(images): Outputs a tensor of the same shape as the `images` tensor, containing the YUV value of the pixels. The output is only well defined if the value in images are in [0,1]. - You need to scale your RGB images if their pixel values are not in the - required range. Below given example illustrates preprocessing of each channel - of images before feeding them to `rgb_to_yuv`. Usage Example: - >>> rgb_images = tf.random.uniform(shape=[100,64,64,3], maxval=255) - >>> preprocessed_rgb_images = tf.truediv( - ... tf.subtract( - ... rgb_images, - ... tf.reduce_min(rgb_images) - ... ), - ... tf.subtract( - ... tf.reduce_max(rgb_images), - ... tf.reduce_min(rgb_images) - ... ) - ... ) - >>> yub_tensor_images = tf.image.rgb_to_yuv(preprocessed_rgb_images) + >>> x = [[[0.1, 0.2, 0.3], + ... [0.4, 0.5, 0.6]], + ... [[0.7, 0.8, 0.9], + ... [0.10, 0.11, 0.12]]] + >>> tf.image.rgb_to_yuv(x) + Args: images: 2-D or higher rank. Image data to convert. Last dimension must be ",0,train 733bff53926717bb9583d4833ba062c58f27960f,tensorflow/tensorflow,"Add a tf.contrib.util.create_example utility for building Example protos. PiperOrigin-RevId: 155868794",__init__.py,"@@ -18,6 +18,7 @@ See @{$python/contrib.util} guide. @@constant_value +@@create_example @@make_tensor_proto @@make_ndarray @@ops_used_by_graph_def @@ -30,11 +31,11 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import +from tensorflow.contrib.util.create_example import create_example from tensorflow.python.framework.meta_graph import ops_used_by_graph_def from tensorflow.python.framework.meta_graph import stripped_op_list_for_graph from tensorflow.python.framework.tensor_util import constant_value from tensorflow.python.framework.tensor_util import make_tensor_proto from tensorflow.python.framework.tensor_util import MakeNdarray as make_ndarray -# pylint: disable=unused_import from tensorflow.python.util.all_util import remove_undocumented remove_undocumented(__name__) ",0,train 733bff53926717bb9583d4833ba062c58f27960f,tensorflow/tensorflow,"Add a tf.contrib.util.create_example utility for building Example protos. PiperOrigin-RevId: 155868794",create_example.py,"@@ -0,0 +1,61 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""""""Utilities for constructing Example protos. + +Takes ndarrays, lists, or tuples for each feature. + +@@create_example +"""""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.core.example import example_pb2 + + +def create_example(**features): + """"""Constructs a `tf.train.Example` from the given features. + + Args: + **features: Maps feature name to an integer, float, or string ndarray, or + another object convertible to an ndarray (list, tuple, etc). + + Returns: + A `tf.train.Example` with the features. + + Raises: + ValueError: if a feature is not integer, float, or string. + """""" + example = example_pb2.Example() + for name in features: + feature = example.features.feature[name] + values = np.asarray(features[name]) + # Encode unicode using UTF-8. + if values.dtype.kind == 'U': + values = np.vectorize(lambda string: string.encode('utf-8'))(values) + + if values.dtype.kind == 'i': + feature.int64_list.value.extend(values.astype(np.int64).ravel()) + elif values.dtype.kind == 'f': + feature.float_list.value.extend(values.astype(np.float32).ravel()) + elif values.dtype.kind == 'S': + feature.bytes_list.value.extend(values.ravel()) + else: + raise ValueError('Feature ""%s"" has unexpected dtype: %s' % (name, + values.dtype)) + return example ",0,train 733bff53926717bb9583d4833ba062c58f27960f,tensorflow/tensorflow,"Add a tf.contrib.util.create_example utility for building Example protos. PiperOrigin-RevId: 155868794",create_example_test.py,"@@ -0,0 +1,86 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""""""Tests for the Example creation utilities."""""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib import util +from tensorflow.core.example import example_pb2 +from tensorflow.python.platform import googletest + + +class CreateExampleTest(googletest.TestCase): + + def testCreateExample_empty(self): + self.assertEqual(util.create_example(), example_pb2.Example()) + + # np.asarray([]) == np.array([], dtype=np.float64), but the dtype should not + # matter here. + actual = util.create_example(foo=[], bar=()) + expected = example_pb2.Example() + expected.features.feature['foo'].float_list.value.extend([]) + expected.features.feature['bar'].float_list.value.extend([]) + self.assertEqual(actual, expected) + + def testCreateExample_scalars(self): + actual = util.create_example(foo=3, bar=4.2, baz='x', qux=b'y') + expected = example_pb2.Example() + expected.features.feature['foo'].int64_list.value.append(3) + # 4.2 cannot be represented exactly in floating point. + expected.features.feature['bar'].float_list.value.append(np.float32(4.2)) + expected.features.feature['baz'].bytes_list.value.append(b'x') + expected.features.feature['qux'].bytes_list.value.append(b'y') + self.assertEqual(actual, expected) + + def testCreateExample_listContainingString(self): + actual = util.create_example(foo=[3, 4.2, 'foo']) + # np.asarray([3, 4.2, 'foo']) == np.array(['3', '4.2', 'foo']) + expected = example_pb2.Example() + expected.features.feature['foo'].bytes_list.value.extend( + [b'3', b'4.2', b'foo']) + self.assertEqual(actual, expected) + + def testCreateExample_lists_tuples_ranges(self): + actual = util.create_example( + foo=[1, 2, 3, 4, 5], bar=(0.5, 0.25, 0.125), baz=range(3)) + expected = example_pb2.Example() + expected.features.feature['foo'].int64_list.value.extend([1, 2, 3, 4, 5]) + expected.features.feature['bar'].float_list.value.extend([0.5, 0.25, 0.125]) + expected.features.feature['baz'].int64_list.value.extend([0, 1, 2]) + self.assertEqual(actual, expected) + + def testCreateExample_ndarrays(self): + a = np.random.random((3, 4, 5)).astype(np.float32) + b = np.random.randint(low=1, high=10, size=(6, 5, 4)) + actual = util.create_example(A=a, B=b) + expected = example_pb2.Example() + expected.features.feature['A'].float_list.value.extend(a.ravel()) + expected.features.feature['B'].int64_list.value.extend(b.ravel()) + self.assertEqual(actual, expected) + + def testCreateExample_unicode(self): + actual = util.create_example(A=[u'\u4242', u'\u5555']) + expected = example_pb2.Example() + expected.features.feature['A'].bytes_list.value.extend( + [u'\u4242'.encode('utf-8'), u'\u5555'.encode('utf-8')]) + self.assertEqual(actual, expected) + + +if __name__ == '__main__': + googletest.main() ",0,train 5ae2d41e7a1daf4b00b24dda683fabf7c283df7c,tensorflow/tensorflow,"Checkpointable: Fix device placement when restoring name-based checkpoints. Just need to put the restore ops on a CPU. PiperOrigin-RevId: 188248198",checkpointable_utils.py,"@@ -493,8 +493,9 @@ class NameBasedSaverStatus(_LoadStatus): """"""Load the name-based training checkpoint using a new `tf.train.Saver`."""""" if session is None and not context.executing_eagerly(): session = ops.get_default_session() - saver_lib.Saver(self._object_saver._global_variable_names()).restore( # pylint: disable=protected-access - sess=session, save_path=self._save_path) + with ops.device(""/cpu:0""): + saver_lib.Saver(self._object_saver._global_variable_names()).restore( # pylint: disable=protected-access + sess=session, save_path=self._save_path) def initialize_or_restore(self, session=None): """"""Alias for `run_restore_ops`."""""" ",0,train 5ae2d41e7a1daf4b00b24dda683fabf7c283df7c,tensorflow/tensorflow,"Checkpointable: Fix device placement when restoring name-based checkpoints. Just need to put the restore ops on a CPU. PiperOrigin-RevId: 188248198",checkpointable_utils_test.py,"@@ -993,20 +993,21 @@ class CheckpointCompatibilityTests(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testLoadFromNameBasedSaver(self): """"""Save a name-based checkpoint, load it using the object-based API."""""" - save_path = self._write_name_based_checkpoint() - root = self._initialized_model() - self._set_sentinels(root) - with self.assertRaises(AssertionError): + with test_util.device(use_gpu=True): + save_path = self._write_name_based_checkpoint() + root = self._initialized_model() + self._set_sentinels(root) + with self.assertRaises(AssertionError): + self._check_sentinels(root) + object_saver = checkpointable_utils.CheckpointableSaver(root) + status = object_saver.restore(save_path) + with self.assertRaises(AssertionError): + status.assert_consumed() + status.run_restore_ops() + self._check_sentinels(root) + self._set_sentinels(root) + status.initialize_or_restore() self._check_sentinels(root) - object_saver = checkpointable_utils.CheckpointableSaver(root) - status = object_saver.restore(save_path) - with self.assertRaises(AssertionError): - status.assert_consumed() - status.run_restore_ops() - self._check_sentinels(root) - self._set_sentinels(root) - status.initialize_or_restore() - self._check_sentinels(root) # TODO(allenl): Test for the core name-based saver loading object-based # checkpoints once object-based checkpointing is in core. ",0,train 091e8500c02ac69c3f1eced6f923598ebfcc354c,tensorflow/tensorflow,"Mention workarounds for load_weights not loading name-based checkpoints in 1.x PiperOrigin-RevId: 247697625",util.py,"@@ -616,8 +616,10 @@ def streaming_restore(status, session=None): session = keras_backend.get_session() if isinstance(status, NameBasedSaverStatus): raise NotImplementedError( - ""Streaming restore not supported from name-based checkpoints. File a "" - ""feature request if this limitation bothers you."") + ""Streaming restore not supported from name-based checkpoints when "" + ""graph building. File a feature request if this limitation bothers "" + ""you. As a workaround, consider either using tf.train.Checkpoint to "" + ""load name-based checkpoints or enabling eager execution."") status.run_restore_ops(session=session) # pylint: disable=protected-access status._checkpoint.new_restore_ops_callback = ( ",0,test d1cde76080ee52e3b2fb99966d44b4af515a9846,tensorflow/tensorflow,"[OpenCL] Fixes SYCL profiler tests (#141) The profiler relies heavily on the canonical device being listed in the TFProf nodes, which is only set for those devices which return True from CountAsCPUTime, so we need this to return True for SYCL device nodes too. The check for whether the node will run on an Accelerator comes from IsPlacedOnAccelerator.",tfprof_node.cc,"@@ -25,7 +25,7 @@ bool CountAsAcceleratorTime(const string& device) { } bool CountAsCPUTime(const string& device) { - return RE2::FullMatch(device, "".*/(gpu|cpu):\\d+""); + return RE2::FullMatch(device, "".*/(gpu|cpu|device:sycl):\\d+""); } bool IsCanonicalDevice(const string& device) { return CountAsCPUTime(device); } @@ -133,7 +133,7 @@ void TFGraphNode::AddStepStat(int64 step, const string& device, // See run_metadata_test.py // It can be /job:0/replica:0/xxxx/gpu:0, or simply /gpu:0. // It can has some ad-hoc suffix, such as /stream:xx or /memcpy:xx. - if (IsCanonicalDevice(device)) { + if (IsCanonicalDevice(dev)) { if (!canonical_device_.empty()) { if (canonical_device_ != dev) { fprintf(stderr, ""Unexpected: graph node changed device: %s->%s.\n"", @@ -143,7 +143,11 @@ void TFGraphNode::AddStepStat(int64 step, const string& device, } else { canonical_device_ = dev; // TODO(xpan): Support things other than gpu? - host_device_ = StringReplace(dev, ""gpu:\\d+"", ""cpu:0""); + if (dev.find(""sycl"") != dev.npos) { + host_device_ = StringReplace(dev, ""device:sycl:\\d+"", ""cpu:0""); + } else { + host_device_ = StringReplace(dev, ""gpu:\\d+"", ""cpu:0""); + } AddOpType(canonical_device_); } } @@ -217,7 +221,8 @@ TensorShapeProto VecToShapeProto(const std::vector shape_vec) { } bool IsPlacedOnAccelerator(const string& device) { - return device.find(""gpu"") != device.npos; + return device.find(""gpu"") != device.npos || + device.find(""sycl"") != device.npos; } } // namespace tfprof } // namespace tensorflow ",0,train 74f306e3cdf653338ed40a08c38b50aed8ed810b,tensorflow/tensorflow,"Add more details in the tfl.pack error string. PiperOrigin-RevId: 283113195 Change-Id: I7a69145252792b742b9d1d66152aa3d6eff713e8",tfl_ops.cc,"@@ -720,7 +720,8 @@ static LogicalResult Verify(PackOp op) { for (Value *operand : op.getOperands()) { auto other_type = operand->getType().cast(); if (input_type != other_type) - return op.emitOpError(""operands should be of the same type""); + return op.emitOpError(""operands should be of the same type. got "") + << input_type << "", "" << other_type; } return success(); ",0,train 759125bceae56152b1060b0aa416d7dc6dad1fb2,tensorflow/tensorflow,Added utility methods to insert allocs and deallocs.,buffer_assignment.h,"@@ -94,6 +94,24 @@ struct BufferAssignmentPositions { /// inserted. Operation* getDeallocPosition() const { return deallocPosition; } + /// Inserts a new dialect-specific alloc operation that will be constructed in + /// the right place using the arguments provided. + template + AllocOpT insertAlloc(Value value, Args... args) const { + OpBuilder allocBuilder(value.getDefiningOp()); + allocBuilder.setInsertionPoint(allocPosition); + return allocBuilder.create(args...); + } + + /// Inserts a new dialect-specific dealloc operation that will be constructed + /// in the right place using the arguments provided. + template + DeallocOpT insertDealloc(Value value, Args... args) const { + OpBuilder deallocBuilder(value.getDefiningOp()); + deallocBuilder.setInsertionPointAfter(deallocPosition); + return deallocBuilder.create(args...); + } + private: Operation* allocPosition; Operation* deallocPosition; ",0,train 5d2c4009987a6b33a683d6cbf1ade560e1f5b59b,tensorflow/tensorflow,"[tf.data] Use a more efficient source in MapBenchmark. Currently, we use `Dataset.from_tensors(0).repeat(None)` as the source of dummy data in MapBenchmark. Consuming this dataset involves repeatedly creating and destroying a TensorDataset iterator, and the cost of doing this dominates the MapDataset execution time (for small chains). Switching to a `Dataset.range(num_elements)` has much lower overhead per element. From running the benchmark on my workstation (with increased num_elements), the execution time of ""MapBenchmark.chain_length_1_single_threaded"" reduces by more than 50%: Before: entry { name: ""MapBenchmark.chain_length_1_single_threaded"" iters: 5 wall_time: 1.71906495094e-06 extras { key: ""num_elements"" value { double_value: 1000000.0 } } } After: entry { name: ""MapBenchmark.chain_length_1_single_threaded"" iters: 5 wall_time: 8.35798978806e-07 extras { key: ""num_elements"" value { double_value: 1000000.0 } } } PiperOrigin-RevId: 282434351 Change-Id: I7f726be65af35c5401c8c9a54c0b84bf27b9fa0f",map_benchmark.py,"@@ -28,7 +28,7 @@ class MapBenchmark(benchmark_base.DatasetBenchmarkBase): def benchmark_chain_of_maps(self): def benchmark_helper(chain_length, map_fn, use_inter_op_parallelism, label): - dataset = dataset_ops.Dataset.from_tensors(0).repeat(None) + dataset = dataset_ops.Dataset.range(10000) for _ in range(chain_length): dataset = dataset_ops.MapDataset( dataset, map_fn, use_inter_op_parallelism=use_inter_op_parallelism) ",0,test cea9f19ebf1ff74177d91c9d18926af0f3e2ce13,tensorflow/tensorflow,"Sink standard dialect constants in sink_constants_to_control_flow pass This is required before exporting HLO dialect ops with standard dialect constant to XLA. Also, sink constants for sort op as well. Added a TODO to generalize this pass to handle more ops and non-const values defined outside. PiperOrigin-RevId: 324301911 Change-Id: I2a67a2cc5d1f58dc5fad11a319a2f4ca63a8f434",sink_constants_to_control_flow.cc,"@@ -21,6 +21,7 @@ limitations under the License. #include ""mlir/Pass/PassManager.h"" #include ""mlir/Support/LLVM.h"" #include ""mlir/Transforms/RegionUtils.h"" +#include ""mlir/Dialect/StandardOps/IR/Ops.h"" // from @llvm-project namespace mlir { namespace mhlo { @@ -29,6 +30,13 @@ namespace { // A pass that sinks constants implicitly captured in control flow regions. This // is necessary to export to XLA. +// TODO(hinsu): Generalize this pass to handle all the ops with regions. Any +// value used within the region that is defined outside of op's region should be +// sank to the regions and not just the constants. Ops such as If and While +// whose computations doesn't require fixed signature like Sort or Reduce have +// an option to pass outside values as operands of the op to avoid recomputing +// those within internally. Note that doing so is the only option in case of +// BlockArguments. class SinkConstantsToControlFlowPass : public mlir::PassWrapper { void runOnFunction() override { @@ -39,6 +47,8 @@ class SinkConstantsToControlFlowPass } else if (auto if_op = llvm::dyn_cast(op)) { SinkToRegion(&if_op.true_branch()); SinkToRegion(&if_op.false_branch()); + } else if (auto sort_op = llvm::dyn_cast(op)) { + SinkToRegion(&sort_op.comparator()); } }); } @@ -46,26 +56,26 @@ class SinkConstantsToControlFlowPass private: // Performs constant sinking into a region. static void SinkToRegion(Region* region) { - llvm::DenseMap sunk_constant; + llvm::DenseMap sunk_constant; visitUsedValuesDefinedAbove({*region}, [&](OpOperand* use) { Value constant = use->get(); - auto const_op = dyn_cast_or_null(constant.getDefiningOp()); - if (!const_op) return; + auto op = constant.getDefiningOp(); + if (!op || !isa(op)) return; auto map_entry = sunk_constant.try_emplace(constant, nullptr); if (!map_entry.second) { // This constant has already been cloned into the region, reuse it. - use->set(map_entry.first->getSecond().getResult()); - if (constant.use_empty()) const_op.erase(); + use->set(map_entry.first->getSecond()->getResult(0)); + if (op->use_empty()) op->erase(); return; } if (constant.hasOneUse()) { - const_op.getOperation()->moveBefore(®ion->front().front()); + op->moveBefore(®ion->front().front()); return; } - map_entry.first->getSecond() = const_op.clone(); + map_entry.first->getSecond() = op->clone(); region->front().getOperations().insert(region->front().begin(), map_entry.first->getSecond()); - use->set(map_entry.first->getSecond().getResult()); + use->set(map_entry.first->getSecond()->getResult(0)); }); } }; ",0,train bcb5a132684424ff678e9d64fc291f49ce7fcc4c,tensorflow/tensorflow,"Explicitly cast the types of a few variables in VLOG statements to avoid an issue where the compiler isn't sure of the type when building for arm64 computers. PiperOrigin-RevId: 207151595",virtual_scheduler.cc,"@@ -859,9 +859,10 @@ Costs VirtualScheduler::Summary() const { const auto& memory_cost = op_cost_pair.second.memory_time.count(); const bool is_op_cost_accurate = !op_cost_pair.second.inaccurate; if (cost) { // Skip printing out zero-cost ops. - VLOG(1) << strings::Printf("" + %30s : %c %10ld / %10ld / %10ld"", - op.c_str(), (is_op_cost_accurate ? ' ' : '~'), - cost, compute_cost, memory_cost); + VLOG(1) << strings::Printf( + "" + %30s : %c %10lld / %10lld / %10lld"", op.c_str(), + (is_op_cost_accurate ? ' ' : '~'), static_cast(cost), + static_cast(compute_cost), static_cast(memory_cost)); } } @@ -936,10 +937,12 @@ Costs VirtualScheduler::Summary() const { : 0.0; if (cost || mem_usage_percent > 1.0) { // Print out only non-zero cost ops or ops with > 1% memory usage. - VLOG(1) << strings::Printf("" + %30s : %c %10ld / %10ld / %10ld"", + VLOG(1) << strings::Printf("" + %30s : %c %10lld / %10lld / %10lld"", op.c_str(), - (is_op_cost_accurate ? ' ' : '~'), cost, - compute_cost, memory_cost) + (is_op_cost_accurate ? ' ' : '~'), + static_cast(cost), + static_cast(compute_cost), + static_cast(memory_cost)) << "" ("" << strings::HumanReadableNumBytes(op_mem_usage) << "" ["" << mem_usage_percent << ""%] "" << (persisent_ops.count(op) > 0 ? "": persistent op)"" : "")""); ",0,train b15bccccbcddef2fa576e14b7e67a06e10f11690,tensorflow/tensorflow,Addressing comments,mkl_batch_matmul_op.cc,"@@ -174,122 +174,105 @@ class BatchMatMulMkl : public OpKernel { } } - MklCblasGemmBatch(CblasRowMajor, adj_x_, adj_y_, m_array, n_array, k_array, - &a_array[0], lda_array, &b_array[0], ldb_array, - &c_array[0], ldc_array, 1, group_size); + MklCblasGemmBatch( + CblasRowMajor, adj_x_, adj_y_, m_array, n_array, k_array, + reinterpret_cast(&a_array[0]), lda_array, + reinterpret_cast(&b_array[0]), ldb_array, + reinterpret_cast(&c_array[0]), ldc_array, 1, group_size); } private: bool adj_x_; bool adj_y_; + template ::value || + std::is_same::value), + int>::type = 0> void MklCblasGemmBatch( const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB, const std::vector& M_Array, const std::vector& N_Array, - const std::vector& K_Array, const float** A_Array, - const std::vector& lda_Array, const float** B_Array, - const std::vector& ldb_Array, float** C_Array, + const std::vector& K_Array, const void** A_Array, + const std::vector& lda_Array, const void** B_Array, + const std::vector& ldb_Array, void** C_Array, const std::vector& ldc_Array, const MKL_INT group_count, const std::vector& group_size) { std::vector TransA_Array( group_size[0], TransA ? CblasTrans : CblasNoTrans); std::vector TransB_Array( group_size[0], TransB ? CblasTrans : CblasNoTrans); - std::vector alpha_Array(group_size[0], 1.0); - std::vector beta_Array(group_size[0], 0.0); - cblas_sgemm_batch(Layout, &TransA_Array[0], &TransB_Array[0], &M_Array[0], - &N_Array[0], &K_Array[0], &alpha_Array[0], A_Array, - &lda_Array[0], B_Array, &ldb_Array[0], &beta_Array[0], - C_Array, &ldc_Array[0], group_count, &group_size[0]); - } - -#ifdef ENABLE_MKLDNN_V1_2 - void MklCblasGemmBatch( - const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB, - const std::vector& M_Array, const std::vector& N_Array, - const std::vector& K_Array, const bfloat16** A_Array, - const std::vector& lda_Array, const bfloat16** B_Array, - const std::vector& ldb_Array, bfloat16** C_Array, - const std::vector& ldc_Array, const MKL_INT group_count, - const std::vector& group_size) { - std::vector TransA_Array(group_size[0], TransA); - std::vector TransB_Array(group_size[0], TransB); - std::vector alpha_Array(group_size[0], 1.0); - std::vector beta_Array(group_size[0], 0.0); - dnnl_gemm_batch(Layout, TransA_Array, TransB_Array, M_Array, - N_Array, K_Array, alpha_Array, A_Array, lda_Array, - B_Array, ldb_Array, beta_Array, C_Array, - ldc_Array, group_count, group_size); - } -#endif // ENABLE_MKLDNN_V1_2 - - void MklCblasGemmBatch( - const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB, - const std::vector& M_Array, const std::vector& N_Array, - const std::vector& K_Array, const double** A_Array, - const std::vector& lda_Array, const double** B_Array, - const std::vector& ldb_Array, double** C_Array, - const std::vector& ldc_Array, const MKL_INT group_count, - const std::vector& group_size) { - std::vector TransA_array( - group_size[0], TransA ? CblasTrans : CblasNoTrans); - std::vector TransB_array( - group_size[0], TransB ? CblasTrans : CblasNoTrans); - std::vector alpha_Array(group_size[0], 1.0); - std::vector beta_Array(group_size[0], 0.0); - cblas_dgemm_batch(Layout, &TransA_array[0], &TransB_array[0], &M_Array[0], - &N_Array[0], &K_Array[0], &alpha_Array[0], A_Array, - &lda_Array[0], B_Array, &ldb_Array[0], &beta_Array[0], - C_Array, &ldc_Array[0], group_count, &group_size[0]); + if (std::is_same::value) { + std::vector alpha_Array(group_size[0], 1.0); + std::vector beta_Array(group_size[0], 0.0); + cblas_sgemm_batch(Layout, &TransA_Array[0], &TransB_Array[0], &M_Array[0], + &N_Array[0], &K_Array[0], &alpha_Array[0], + reinterpret_cast(A_Array), &lda_Array[0], + reinterpret_cast(B_Array), &ldb_Array[0], + &beta_Array[0], reinterpret_cast(C_Array), + &ldc_Array[0], group_count, &group_size[0]); + } else { + std::vector alpha_Array(group_size[0], 1.0); + std::vector beta_Array(group_size[0], 0.0); + cblas_dgemm_batch( + Layout, &TransA_Array[0], &TransB_Array[0], &M_Array[0], &N_Array[0], + &K_Array[0], &alpha_Array[0], + reinterpret_cast(A_Array), &lda_Array[0], + reinterpret_cast(B_Array), &ldb_Array[0], + &beta_Array[0], reinterpret_cast(C_Array), &ldc_Array[0], + group_count, &group_size[0]); + } } + template ::value || + std::is_same::value), + int>::type = 0> void MklCblasGemmBatch( const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB, const std::vector& M_Array, const std::vector& N_Array, - const std::vector& K_Array, const complex64** A_Array, - const std::vector& lda_Array, const complex64** B_Array, - const std::vector& ldb_Array, complex64** C_Array, + const std::vector& K_Array, const void** A_Array, + const std::vector& lda_Array, const void** B_Array, + const std::vector& ldb_Array, void** C_Array, const std::vector& ldc_Array, const MKL_INT group_count, const std::vector& group_size) { std::vector TransA_array( group_size[0], TransA ? CblasConjTrans : CblasNoTrans); std::vector TransB_array( group_size[0], TransB ? CblasConjTrans : CblasNoTrans); - std::vector alpha_Array(group_size[0], {1.0f, 0.0f}); - std::vector beta_Array(group_size[0], {0.0f, 0.0f}); - cblas_cgemm_batch(Layout, &TransA_array[0], &TransB_array[0], &M_Array[0], - &N_Array[0], &K_Array[0], - static_cast(&alpha_Array[0]), - reinterpret_cast(A_Array), &lda_Array[0], - reinterpret_cast(B_Array), &ldb_Array[0], - static_cast(&beta_Array[0]), - reinterpret_cast(C_Array), &ldc_Array[0], - group_count, &group_size[0]); + std::vector alpha_Array(group_size[0], {1.0f, 0.0f}); + std::vector beta_Array(group_size[0], {0.0f, 0.0f}); + auto gemm_fn = (std::is_same::value) ? cblas_cgemm_batch + : cblas_zgemm_batch; + gemm_fn(Layout, &TransA_array[0], &TransB_array[0], &M_Array[0], + &N_Array[0], &K_Array[0], static_cast(&alpha_Array[0]), + reinterpret_cast(A_Array), &lda_Array[0], + reinterpret_cast(B_Array), &ldb_Array[0], + static_cast(&beta_Array[0]), + reinterpret_cast(C_Array), &ldc_Array[0], group_count, + &group_size[0]); } - void MklCblasGemmBatch( +#ifdef ENABLE_MKLDNN_V1_2 + void MklCblasGemmBatch( const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB, const std::vector& M_Array, const std::vector& N_Array, - const std::vector& K_Array, const complex128** A_Array, - const std::vector& lda_Array, const complex128** B_Array, - const std::vector& ldb_Array, complex128** C_Array, + const std::vector& K_Array, const void** A_Array, + const std::vector& lda_Array, const void** B_Array, + const std::vector& ldb_Array, void** C_Array, const std::vector& ldc_Array, const MKL_INT group_count, const std::vector& group_size) { - std::vector TransA_array( - group_size[0], TransA ? CblasConjTrans : CblasNoTrans); - std::vector TransB_array( - group_size[0], TransB ? CblasConjTrans : CblasNoTrans); - std::vector alpha_Array(group_size[0], {1.0f, 0.0f}); - std::vector beta_Array(group_size[0], {0.0f, 0.0f}); - cblas_zgemm_batch(Layout, &TransA_array[0], &TransB_array[0], &M_Array[0], - &N_Array[0], &K_Array[0], - static_cast(&alpha_Array[0]), - reinterpret_cast(A_Array), &lda_Array[0], - reinterpret_cast(B_Array), &ldb_Array[0], - static_cast(&beta_Array[0]), - reinterpret_cast(C_Array), &ldc_Array[0], - group_count, &group_size[0]); + std::vector TransA_Array(group_size[0], TransA); + std::vector TransB_Array(group_size[0], TransB); + std::vector alpha_Array(group_size[0], 1.0); + std::vector beta_Array(group_size[0], 0.0); + dnnl_gemm_batch( + Layout, TransA_Array, TransB_Array, M_Array, N_Array, K_Array, + alpha_Array, reinterpret_cast(A_Array), lda_Array, + reinterpret_cast(B_Array), ldb_Array, beta_Array, + reinterpret_cast(C_Array), ldc_Array, group_count, + group_size); } +#endif // ENABLE_MKLDNN_V1_2 }; #define REGISTER_BATCH_MATMUL_MKL(TYPE) \ ",0,test b15bccccbcddef2fa576e14b7e67a06e10f11690,tensorflow/tensorflow,Addressing comments,mkl_matmul_op.cc,"@@ -268,10 +268,10 @@ class MklMatMulOp : public OpKernel { // TODO(inteltf) Consider template specialization when adding/removing // additional types TF_CALL_float(REGISTER_CPU); -#ifndef ENABLE_MKLDNN_V1 +#if !defined(ENABLE_MKLDNN_V1) || defined(ENABLE_MKLDNN_V1_2) // MKLDNNv1 does not have support for bfloat16 GEMM. Only V1.2 has that support. TF_CALL_bfloat16(REGISTER_CPU); -#endif // ENABLE_MKLDNN_V1 +#endif // !defined(ENABLE_MKLDNN_V1) || defined(ENABLE_MKLDNN_V1_2) #ifndef INTEL_MKL_DNN_ONLY TF_CALL_double(REGISTER_CPU); ",0,test b15bccccbcddef2fa576e14b7e67a06e10f11690,tensorflow/tensorflow,Addressing comments,mkl_matmul_op_fused.cc,"@@ -187,7 +187,7 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase { void ExtendMklDnnMatMulFwdParams(OpKernelContext* ctx, MklDnnMatMulFwdParams& params) { -#ifndef ENABLE_MKL_DNN_V1 +#ifndef ENABLE_MKLDNN_V1 if (fused_ops_.size() == 2) { string post_op = fused_ops_[1]; @@ -203,7 +203,7 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase { ""Unsupported post-argument in MklFusedMatMul: "", post_op)); } } -#endif +#endif // !ENABLE_MKLDNN_V1 } private: ",0,test b15bccccbcddef2fa576e14b7e67a06e10f11690,tensorflow/tensorflow,Addressing comments,mkl_matmul_ops_common.h,"@@ -97,11 +97,8 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { context_.dst_mem->set_data_handle(static_cast(dst_data)); #ifdef ENABLE_MKLDNN_V1 - DCHECK_EQ(context_.fwd_primitives.size(), context_.net_args.size()); - for (size_t i = 0; i < context_.fwd_primitives.size(); ++i) { - context_.fwd_primitives.at(i).execute(*context_.fwd_stream, - context_.net_args.at(i)); - } + execute_primitives(context_.fwd_primitives, context_.fwd_stream, + context_.net_args); #else context_.fwd_stream->submit(context_.fwd_primitives); #endif // ENABLE_MKLDNN_V1 @@ -117,7 +114,7 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { // In MKL-DNN v1.x, memory format tags only provide a partial description // of the memory layout. Hence, these functions are disabled for v1.x. memory::format GetSrcMemoryFormat() const { return context_.src_fmt; } - memory::format GetweightMemoryFormat() const { return context_.weight_fmt; } + memory::format GetWeightMemoryFormat() const { return context_.weight_fmt; } #endif // ENABLE_MKLDNN_V1 std::shared_ptr @@ -132,7 +129,7 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { // Expected memory format for this primitive instance MEMORY_FORMAT src_fmt; MEMORY_FORMAT weight_fmt; -#endif // ENABLE_MKLDNN_V1 +#endif // !ENABLE_MKLDNN_V1 // MKL-DNN memory. std::shared_ptr src_mem; @@ -164,7 +161,7 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { #ifndef ENABLE_MKLDNN_V1 src_fmt(MEMORY_FORMAT::any), weight_fmt(MEMORY_FORMAT::any), -#endif // ENABLE_MKLDNN_V1 +#endif // !ENABLE_MKLDNN_V1 src_mem(nullptr), weight_mem(nullptr), bias_mem(nullptr), ",0,test b15bccccbcddef2fa576e14b7e67a06e10f11690,tensorflow/tensorflow,Addressing comments,mkl_qmatmul_op.cc,"@@ -243,11 +243,7 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { // Check if src and weight data need to be reordered. Tinput* src_data = nullptr; -#ifdef ENABLE_MKLDNN_V1 if (IS_SRC_REORDER_NEEDED(src_md, matmul_fwd_pd, matmul_fwd)) { -#else - if (src_md.data.format != matmul_fwd->GetSrcMemoryFormat()) { -#endif src.SetUsrMem(src_md, &src_tensor); src.CheckReorderToOpMem(MEMORY_PD_WITHOUT_DATA( matmul_fwd_pd.get()->PRIMITIVE_DESC_SRC, this->cpu_engine_)); @@ -258,11 +254,7 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { } Tweight* weight_data = nullptr; -#ifdef ENABLE_MKLDNN_V1 if (IS_WEIGHTS_REORDER_NEEDED(weight_md, matmul_fwd_pd, matmul_fwd)) { -#else - if (weight_md.data.format != matmul_fwd->GetweightMemoryFormat()) { -#endif bool is_weight_cached = false; // For batch size 1, MKL-DNN expects that weight format is OI whereas // TF default format is IO. So in that case convert weight from IO @@ -280,7 +272,7 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { context, static_cast(weight_mkl_shape.GetTfDataFormat())); #else weight_data = GetCachedWeight( - context, static_cast(matmul_fwd->GetweightMemoryFormat())); + context, static_cast(matmul_fwd->GetWeightMemoryFormat())); #endif is_weight_cached = (weight_data != nullptr); } @@ -554,14 +546,10 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { OP_REQUIRES_OK(context, context->allocate_persistent( DT_INT32, weight_mkl_format, &weight_oi_md, &weight_md_tensor_ptr)); -#ifdef ENABLE_MKLDNN_V1 - // Using the logic from filter caching in mkl_conv_ops.cc - weight_md_tensor_ptr->scalar()() = - static_cast(weight_mkl_shape.GetTfDataFormat()); -#else weight_md_tensor_ptr->scalar()() = - matmul_fwd_pd.get()->weights_primitive_desc().desc().data.format; -#endif // ENABLE_MKLDNN_V1 + static_cast(GET_TF_DATA_FORMAT( + weight_mkl_shape, + matmul_fwd_pd.get()->weights_primitive_desc().desc())); } Tweight* GetCachedWeight(OpKernelContext* context, int32 weight_mf) ",0,test b15bccccbcddef2fa576e14b7e67a06e10f11690,tensorflow/tensorflow,Addressing comments,mkl_types.h,"@@ -149,7 +149,7 @@ namespace tensorflow { #define IS_SRC_REORDER_NEEDED(src_md, op_pd, op) \ src_md.data.format != op->GetSrcMemoryFormat() #define IS_WEIGHTS_REORDER_NEEDED(weights_md, op_pd, op) \ - weights_md.data.format != op->GetWeightsMemoryFormat() + weights_md.data.format != op->GetWeightMemoryFormat() #define GET_MEMORY_DESC_FROM_MEM_PTR(mem_ptr) \ mem_ptr->get_primitive_desc().desc() #define GET_MEMORY_PRIMITIVE_DESC_FROM_MEM_PTR(mem_ptr) \ ",0,test 6f0f757c808266a184da4004faf6f66b66f36014,tensorflow/tensorflow,Replace TEST_F with XLA_TEST_F to allow for disabling (#12520),copy_test.cc,"@@ -56,30 +56,30 @@ class CopyOpTest : public HloTestBase { tensorflow::gtl::ArraySlice permutation); }; -TEST_F(CopyOpTest, CopyR0Bool) { TestCopyOp(*Literal::CreateR0(true)); } +XLA_TEST_F(CopyOpTest, CopyR0Bool) { TestCopyOp(*Literal::CreateR0(true)); } -TEST_F(CopyOpTest, CopyR1S0U32) { TestCopyOp(*Literal::CreateR1({})); } +XLA_TEST_F(CopyOpTest, CopyR1S0U32) { TestCopyOp(*Literal::CreateR1({})); } -TEST_F(CopyOpTest, CopyR1S3U32) { +XLA_TEST_F(CopyOpTest, CopyR1S3U32) { TestCopyOp(*Literal::CreateR1({1, 2, 3})); } -TEST_F(CopyOpTest, CopyR3F32_2x2x3) { +XLA_TEST_F(CopyOpTest, CopyR3F32_2x2x3) { TestCopyOp(*Literal::CreateR3({{{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}}, {{1.1f, 2.1f, 3.1f}, {6.1f, 3.5f, 2.8f}}})); } -TEST_F(CopyOpTest, CopyR4S32_2x2x3x2) { +XLA_TEST_F(CopyOpTest, CopyR4S32_2x2x3x2) { TestCopyOp(*Literal::CreateR4( {{{{1, -2}, {-4, 5}, {6, 7}}, {{8, 9}, {10, 11}, {12, 13}}}, {{{10, 3}, {7, -2}, {3, 6}}, {{2, 5}, {-11, 5}, {-2, -5}}}})); } -TEST_F(CopyOpTest, CopyR4S32_0x2x3x2) { +XLA_TEST_F(CopyOpTest, CopyR4S32_0x2x3x2) { TestCopyOp(*Literal::CreateR4FromArray4D(Array4D(0, 2, 3, 2))); } -TEST_F(CopyOpTest, CopyParameterScalar) { +XLA_TEST_F(CopyOpTest, CopyParameterScalar) { auto builder = HloComputation::Builder(TestName()); // Copy literal to device to use as parameter. @@ -102,7 +102,7 @@ TEST_F(CopyOpTest, CopyParameterScalar) { LiteralTestUtil::ExpectR0Near(42.0f, *result, error_spec_); } -TEST_F(CopyOpTest, CopyConstantR2Twice) { +XLA_TEST_F(CopyOpTest, CopyConstantR2Twice) { auto builder = HloComputation::Builder(TestName()); auto literal = Literal::CreateR2({{1.0, 2.0}, {3.0, 4.0}}); @@ -123,7 +123,7 @@ TEST_F(CopyOpTest, CopyConstantR2Twice) { error_spec_); } -TEST_F(CopyOpTest, CopyConstantR2DifferentLayouts) { +XLA_TEST_F(CopyOpTest, CopyConstantR2DifferentLayouts) { HloComputation::Builder builder(TestName()); std::unique_ptr literal = ",0,train ffb40e2bf2a47f299955041faf6bd5740baa69ef,tensorflow/tensorflow,"Ensure that PyFunc arrays are C-ordered. Change: 120625852",py_func_test.py,"@@ -153,5 +153,11 @@ class PyOpTest(tf.test.TestCase): self.assertEqual(sess.run(x), 1) self.assertEqual(sess.run(x), 2) + def testCOrder(self): + with self.test_session(): + val = [[1, 2], [3, 4]] + x, = tf.py_func(lambda: np.array(val, order=""F""), [], [tf.int64]) + self.assertAllEqual(val, x.eval()) + if __name__ == ""__main__"": tf.test.main() ",0,train ffb40e2bf2a47f299955041faf6bd5740baa69ef,tensorflow/tensorflow,"Ensure that PyFunc arrays are C-ordered. Change: 120625852",script_ops.py,"@@ -62,9 +62,9 @@ class FuncRegistry(object): # Ensures that we return either a single numpy array or a list of numpy # arrays. if isinstance(ret, (tuple, list)): - ret = [np.array(x) for x in ret] + ret = [np.array(x, order=""C"") for x in ret] else: - ret = np.array(ret) + ret = np.array(ret, order=""C"") return ret def size(self): ",0,train 3ed1e3029e68ca8cb6306c8f31182306741dcf0c,tensorflow/tensorflow,"Remove static_assert for type checking in FlatBufferVectorToTfLiteTypeArray. It turns out that std::is_same() has dropped the non-string argument in c++17. This breaks internal users that are building against qualcomm. PiperOrigin-RevId: 317790812 Change-Id: If56a61d20426670251b55f370a6b5fa886a49e21",micro_allocator.cc,"@@ -401,12 +401,9 @@ TfLiteStatus FlatBufferVectorToTfLiteTypeArray( kTfLiteArrayType** result) { TFLITE_DCHECK(error_reporter != nullptr); TFLITE_DCHECK(flatbuffer_array != nullptr); - // Only two conversions are supported - float and int32 - ensure that these - // match at compile time instead of duplicating functions here: - static_assert((std::is_same() && - std::is_same()) || - (std::is_same() && - std::is_same())); + // TODO(b/159668691): Consider adding type assertion or breaking this function + // into multiple functions for each type. std::is_same is c++11 and has a + // special updated constructor in c++17 that requires a string argument. if (FLATBUFFERS_LITTLEENDIAN) { // On little-endian machines, TfLite*Array happens to have the same memory // layout as flatbuffers:Vector, so we can ",0,train f4b06261c900c3217891eea6285d603fcf11776b,tensorflow/tensorflow,"Use self.handle inside ResourceVariable to allow tf.distribute to customize handle behavior I'm working on a new version of DistributedVariable which directly inherits from BaseResourceVariable. Its handle would return different resource tensors under different context, e.g. self.handle would be a replicated tensor under tpu context. This can avoid the need to use raw variable operations for special resource handles like tpu replicate handle or parallel device handle. PiperOrigin-RevId: 355663353 Change-Id: I16201f94ef27a0dc7ac1491c616d7bd68397123a",packed_distributed_variable.py,"@@ -252,7 +252,8 @@ class PackedVarAndDevice(object): self._device = device def __getattr__(self, name): - return getattr(self._var, name) + with ops.device(self._device): + return getattr(self._var, name) def var(self): return self._var ",0,train f4b06261c900c3217891eea6285d603fcf11776b,tensorflow/tensorflow,"Use self.handle inside ResourceVariable to allow tf.distribute to customize handle behavior I'm working on a new version of DistributedVariable which directly inherits from BaseResourceVariable. Its handle would return different resource tensors under different context, e.g. self.handle would be a replicated tensor under tpu context. This can avoid the need to use raw variable operations for special resource handles like tpu replicate handle or parallel device handle. PiperOrigin-RevId: 355663353 Change-Id: I16201f94ef27a0dc7ac1491c616d7bd68397123a",resource_variable_ops.py,"@@ -516,12 +516,12 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor): @property def device(self): """"""The device this variable is on."""""" - return self._handle.device + return self.handle.device @property def graph(self): """"""The `Graph` of this variable."""""" - return self._handle.graph + return self.handle.graph @property def name(self): @@ -596,7 +596,7 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor): @property def op(self): """"""The op for this variable."""""" - return self._handle.op + return self.handle.op @property def trainable(self): @@ -655,7 +655,7 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor): else: new_variable = copy_to_graph_uninitialized(self) obj_map = {self: new_variable} - resource_map = {self._handle: new_variable.handle} + resource_map = {self.handle: new_variable.handle} return obj_map, resource_map def _read_variable_op(self): @@ -663,8 +663,8 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor): def read_and_set_handle(): result = gen_resource_variable_ops.read_variable_op( - self._handle, self._dtype) - _maybe_set_handle_data(self._dtype, self._handle, result) + self.handle, self._dtype) + _maybe_set_handle_data(self._dtype, self.handle, result) return result if getattr(self, ""_caching_device"", None) is not None: @@ -678,7 +678,7 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor): # Note that if a control flow context is active the input of the read op # might not actually be the handle. This line bypasses it. tape.record_operation( - ""ReadVariableOp"", [result], [self._handle], + ""ReadVariableOp"", [result], [self.handle], backward_function=lambda x: [x], forward_function=lambda x: [x]) return result @@ -703,12 +703,12 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor): with ops.name_scope(""Gather"" if name is None else name) as name: variable_accessed(self) value = gen_resource_variable_ops.resource_gather( - self._handle, indices, dtype=self._dtype, name=name) + self.handle, indices, dtype=self._dtype, name=name) if self._dtype == dtypes.variant: # For DT_VARIANT types, the handle's shape_and_type[1:] stores the # variant's handle data. Extract it. - handle_data = get_eager_safe_handle_data(self._handle) + handle_data = get_eager_safe_handle_data(self.handle) if handle_data.is_set and len(handle_data.shape_and_type) > 1: value._handle_data = ( # pylint: disable=protected-access cpp_shape_inference_pb2.CppShapeInferenceResult.HandleData( @@ -722,7 +722,7 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor): if self.trainable: variable_accessed(self) value = gen_resource_variable_ops.resource_gather_nd( - self._handle, indices, dtype=self._dtype, name=name) + self.handle, indices, dtype=self._dtype, name=name) return array_ops.identity(value) @@ -855,7 +855,7 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor): def _lazy_read(self, op): variable_accessed(self) return _UnreadVariable( - handle=self._handle, + handle=self.handle, dtype=self.dtype, shape=self._shape, in_graph_mode=self._in_graph_mode, ",0,train bf31a56af7748b2a022ade234b98b41140d68248,tensorflow/tensorflow,"Add the {input, output}_details method to the Signature Runner in Python PiperOrigin-RevId: 392819614 Change-Id: I4ec5d522b172405f0db1c7ed04d273ecfe4e340e",interpreter.py,"@@ -269,6 +269,50 @@ class SignatureRunner(object): output_index, self._subgraph_index) return result + def get_input_details(self): + """"""Gets input tensor details. + + Returns: + A dictionary from input name to tensor details where each item is a + dictionary with details about an input tensor. Each dictionary contains + the following fields that describe the tensor: + + + `name`: The tensor name. + + `index`: The tensor index in the interpreter. + + `shape`: The shape of the tensor. + + `shape_signature`: Same as `shape` for models with known/fixed shapes. + If any dimension sizes are unkown, they are indicated with `-1`. + + `dtype`: The numpy data type (such as `np.int32` or `np.uint8`). + + `quantization`: Deprecated, use `quantization_parameters`. This field + only works for per-tensor quantization, whereas + `quantization_parameters` works in all cases. + + `quantization_parameters`: A dictionary of parameters used to quantize + the tensor: + ~ `scales`: List of scales (one if per-tensor quantization). + ~ `zero_points`: List of zero_points (one if per-tensor quantization). + ~ `quantized_dimension`: Specifies the dimension of per-axis + quantization, in the case of multiple scales/zero_points. + + `sparsity_parameters`: A dictionary of parameters used to encode a + sparse tensor. This is empty if the tensor is dense. + """""" + result = {} + for input_name, tensor_index in self._inputs.items(): + result[input_name] = self._interpreter._get_tensor_details(tensor_index) # pylint: disable=protected-access + return result + + def get_output_details(self): + """"""Gets output tensor details. + + Returns: + A dictionary from input name to tensor details where each item is a + dictionary with details about an output tensor. The dictionary contains + the same fields as described for `get_input_details()`. + """""" + result = {} + for output_name, tensor_index in self._outputs: + result[output_name] = self._interpreter._get_tensor_details(tensor_index) # pylint: disable=protected-access + return result + @_tf_export('lite.experimental.OpResolverType') @enum.unique ",0,test bf31a56af7748b2a022ade234b98b41140d68248,tensorflow/tensorflow,"Add the {input, output}_details method to the Signature Runner in Python PiperOrigin-RevId: 392819614 Change-Id: I4ec5d522b172405f0db1c7ed04d273ecfe4e340e",lite_v2_test.py,"@@ -178,10 +178,23 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): add_signature_runner = interpreter.get_signature_runner('add') add_output = add_signature_runner(x=input_data) self.assertEqual(add_output['output_0'], 3) + input_details = add_signature_runner.get_input_details() + self.assertEqual(1, len(input_details)) + self.assertEqual('add_x:0', input_details['x']['name']) + self.assertEqual(np.float32, input_details['x']['dtype']) + self.assertTrue(([1] == input_details['x']['shape']).all()) + self.assertEqual((0.0, 0), input_details['x']['quantization']) sub_signature_runner = interpreter.get_signature_runner('sub') sub_output = sub_signature_runner(x=input_data) self.assertEqual(sub_output['output_0'], -2) + output_details = sub_signature_runner.get_output_details() + self.assertEqual(1, len(output_details)) + self.assertEqual('StatefulPartitionedCall:0', + output_details['output_0']['name']) + self.assertEqual(np.float32, output_details['output_0']['dtype']) + self.assertTrue(([1] == output_details['output_0']['shape']).all()) + self.assertEqual((0.0, 0), output_details['output_0']['quantization']) def _getIntegerQuantizeModel(self, num_filters=16): np.random.seed(0) ",0,test bf31a56af7748b2a022ade234b98b41140d68248,tensorflow/tensorflow,"Add the {input, output}_details method to the Signature Runner in Python PiperOrigin-RevId: 392819614 Change-Id: I4ec5d522b172405f0db1c7ed04d273ecfe4e340e",calibrator.py,"@@ -87,11 +87,13 @@ class Calibrator(object): # Convert signature based inputs to the tensor index based data. if not hasattr(self, ""_interpreter""): self._interpreter = Interpreter(model_content=self._model_content) - input_array = [None] * len(sample) + input_array = [] signature_runner = self._interpreter.get_signature_runner() - for input_name, value in sample.items(): - tensor_index = signature_runner._inputs[input_name] # pylint: disable=protected-access - input_array[tensor_index] = value + input_details = sorted( + signature_runner.get_input_details().items(), + key=lambda item: item[1][""index""]) + for input_name, input_detail in input_details: + input_array.append(sample[input_name]) elif isinstance(sample, list): input_array = sample else: ",0,test 6598d11e8b8ea3f33c65091d5ffdfacbbc98cfad,tensorflow/tensorflow,"Whitelist XlaBroadcastHelperOp and enable tests XlaDynamicUpdateSliceOp PiperOrigin-RevId: 311049106 Change-Id: I6f47f784f744ba3e60f3f377fa90412b1114d3b5",legalize_tf_with_tf2xla.cc,"@@ -165,6 +165,7 @@ static bool IsOpWhitelisted(Operation* op) { TypeID::get(), TypeID::get(), TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), TypeID::get(), ",0,train 6598d11e8b8ea3f33c65091d5ffdfacbbc98cfad,tensorflow/tensorflow,"Whitelist XlaBroadcastHelperOp and enable tests XlaDynamicUpdateSliceOp PiperOrigin-RevId: 311049106 Change-Id: I6f47f784f744ba3e60f3f377fa90412b1114d3b5",xla_ops_test.py,"@@ -51,7 +51,6 @@ class XlaOpsNumericalTest(xla_test.XLATestCase, parameterized.TestCase): equality_fn = self.assertAllClose equality_fn(result, expected, rtol=1e-3) - @test_util.disable_mlir_bridge('Not supported yet') def testAdd(self): for dtype in self.numeric_types: self._assertOpOutputMatchesExpected( ",0,train a3dfb6f36692c5a887c2ae10713f408772b00d2f,tensorflow/tensorflow,"Change ThenBlasLtMatmul implem to a template - This is required to ensure that the template version of DoBlasLtMatmul is called (which is important because it performs additional type checks).",stream.cc,"@@ -4809,28 +4809,79 @@ Stream &Stream::ThenBlasGemmStridedBatched( c, ldc, stride_c, batch_count); } -Stream& Stream::ThenBlasLtMatmul(const blas::IBlasLtMatmulPlan* plan, - const HostOrDeviceScalar& alpha, - DeviceMemoryBase a, DeviceMemoryBase b, - const HostOrDeviceScalar& beta, - DeviceMemoryBase c, - ScratchAllocator* scratch_allocator, - const blas::IBlasLtMatmulAlgorithm* algorithm, - DeviceMemoryBase bias, - blas::ProfileResult* output_profile_result) { +template +Stream& Stream::ThenBlasLtMatmulImpl( + const blas::IBlasLtMatmulPlan* plan, const HostOrDeviceScalar& alpha, + const DeviceMemory& a, const DeviceMemory& b, + const HostOrDeviceScalar& beta, DeviceMemory* c, + ScratchAllocator* scratch_allocator, + const blas::IBlasLtMatmulAlgorithm* algorithm, + const DeviceMemory& bias, + blas::ProfileResult* output_profile_result) { VLOG_CALL(PARAM(plan), PARAM(alpha), PARAM(a), PARAM(b), PARAM(beta), PARAM(c), PARAM(algorithm), PARAM(bias)); - ThenBlasWithProfileImpl&, DeviceMemoryBase, - DeviceMemoryBase, const HostOrDeviceScalar&, - DeviceMemoryBase, ScratchAllocator*, - const blas::IBlasLtMatmulAlgorithm*, DeviceMemoryBase> + ThenBlasWithProfileImpl< + const blas::IBlasLtMatmulPlan*, const HostOrDeviceScalar&, + const DeviceMemory&, const DeviceMemory&, + const HostOrDeviceScalar&, DeviceMemory*, ScratchAllocator*, + const blas::IBlasLtMatmulAlgorithm*, const DeviceMemory&> impl; return impl(this, &blas::BlasSupport::DoBlasLtMatmul, plan, alpha, a, b, beta, c, scratch_allocator, algorithm, bias, output_profile_result); } +// Explicit template instantiations for each supported type combination. +template Stream& Stream::ThenBlasLtMatmulImpl( + const blas::IBlasLtMatmulPlan*, const HostOrDeviceScalar&, + const DeviceMemory&, const DeviceMemory&, + const HostOrDeviceScalar&, DeviceMemory*, ScratchAllocator*, + const blas::IBlasLtMatmulAlgorithm*, const DeviceMemory&, + blas::ProfileResult*); + +template Stream& Stream::ThenBlasLtMatmulImpl( + const blas::IBlasLtMatmulPlan*, const HostOrDeviceScalar&, + const DeviceMemory&, const DeviceMemory&, + const HostOrDeviceScalar&, DeviceMemory*, + ScratchAllocator*, const blas::IBlasLtMatmulAlgorithm*, + const DeviceMemory&, blas::ProfileResult*); + +template Stream& Stream::ThenBlasLtMatmulImpl( + const blas::IBlasLtMatmulPlan*, const HostOrDeviceScalar&, + const DeviceMemory&, const DeviceMemory&, + const HostOrDeviceScalar&, DeviceMemory*, ScratchAllocator*, + const blas::IBlasLtMatmulAlgorithm*, const DeviceMemory&, + blas::ProfileResult*); + +template Stream& Stream::ThenBlasLtMatmulImpl( + const blas::IBlasLtMatmulPlan*, const HostOrDeviceScalar&, + const DeviceMemory&, const DeviceMemory&, + const HostOrDeviceScalar&, DeviceMemory*, ScratchAllocator*, + const blas::IBlasLtMatmulAlgorithm*, const DeviceMemory&, + blas::ProfileResult*); + +template Stream& +Stream::ThenBlasLtMatmulImpl, std::complex>( + const blas::IBlasLtMatmulPlan*, + const HostOrDeviceScalar>&, + const DeviceMemory>&, + const DeviceMemory>&, + const HostOrDeviceScalar>&, + DeviceMemory>*, ScratchAllocator*, + const blas::IBlasLtMatmulAlgorithm*, + const DeviceMemory>&, blas::ProfileResult*); + +template Stream& +Stream::ThenBlasLtMatmulImpl, std::complex>( + const blas::IBlasLtMatmulPlan*, + const HostOrDeviceScalar>&, + const DeviceMemory>&, + const DeviceMemory>&, + const HostOrDeviceScalar>&, + DeviceMemory>*, ScratchAllocator*, + const blas::IBlasLtMatmulAlgorithm*, + const DeviceMemory>&, blas::ProfileResult*); + Stream &Stream::ThenSetRngSeed(const uint8 *seed, uint64 seed_bytes) { VLOG_CALL(PARAM(seed), PARAM(seed_bytes)); ",0,train a3dfb6f36692c5a887c2ae10713f408772b00d2f,tensorflow/tensorflow,"Change ThenBlasLtMatmul implem to a template - This is required to ensure that the template version of DoBlasLtMatmul is called (which is important because it performs additional type checks).",stream.h,"@@ -1679,16 +1679,6 @@ class Stream { DeviceMemory> *b, int ldb); // See BlasSupport::DoBlatLtMatmul. - Stream& ThenBlasLtMatmul(const blas::IBlasLtMatmulPlan* plan, - const HostOrDeviceScalar& alpha, - DeviceMemoryBase a, DeviceMemoryBase b, - const HostOrDeviceScalar& beta, - DeviceMemoryBase c, - ScratchAllocator* scratch_allocator, - const blas::IBlasLtMatmulAlgorithm* algorithm, - DeviceMemoryBase bias, - blas::ProfileResult* output_profile_result); - // Note that we prevent alpha and beta from being used to deduce CType so that // they can be constructed implicitly from values of type CType. Without this, // type deduction would fail when this function is called with a value of type @@ -1703,8 +1693,8 @@ class Stream { const blas::IBlasLtMatmulAlgorithm* algorithm, const DeviceMemory& bias = {}, blas::ProfileResult* output_profile_result = nullptr) { - return ThenBlasLtMatmul(plan, alpha, a, b, beta, *c, scratch_allocator, - algorithm, bias, output_profile_result); + return ThenBlasLtMatmulImpl(plan, alpha, a, b, beta, c, scratch_allocator, + algorithm, bias, output_profile_result); } // See FftSupport::DoFft. @@ -2139,6 +2129,19 @@ class Stream { const dnn::BatchDescriptor &bias_descriptor, DeviceMemory *backward_bias_data); + // Implementation of ThenBlasLtMatmul that is shared by all types. + template + Stream& ThenBlasLtMatmulImpl(const blas::IBlasLtMatmulPlan* plan, + const HostOrDeviceScalar& alpha, + const DeviceMemory& a, + const DeviceMemory& b, + const HostOrDeviceScalar& beta, + DeviceMemory* c, + ScratchAllocator* scratch_allocator, + const blas::IBlasLtMatmulAlgorithm* algorithm, + const DeviceMemory& bias, + blas::ProfileResult* output_profile_result); + SE_DISALLOW_COPY_AND_ASSIGN(Stream); }; ",0,train 3db982b63419fec84084fc606bbaa0de3277b996,tensorflow/tensorflow,"Added mean 'Loss' to Estimator.Evaluate Change: 150085654",estimator.py,"@@ -36,6 +36,7 @@ from tensorflow.python.estimator import run_config from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import metrics as metrics_lib from tensorflow.python.ops import variables from tensorflow.python.platform import gfile from tensorflow.python.platform import tf_logging as logging @@ -602,13 +603,15 @@ class Estimator(object): estimator_spec = self._call_model_fn( features, labels, model_fn_lib.ModeKeys.EVAL) + self._verify_default_metric_key(model_fn_lib.MetricKeys.LOSS, + estimator_spec.eval_metric_ops) + estimator_spec.eval_metric_ops[ + model_fn_lib.MetricKeys.LOSS] = metrics_lib.mean(estimator_spec.loss) + update_op, eval_dict = _extract_metric_update_ops( estimator_spec.eval_metric_ops) - if ops.GraphKeys.GLOBAL_STEP in six.iterkeys(eval_dict): - raise ValueError( - 'Metric with name `global_step` is not allowed, because Estimator ' - 'already defines a default metric with the same name.') + self._verify_default_metric_key(ops.GraphKeys.GLOBAL_STEP, eval_dict) eval_dict[ops.GraphKeys.GLOBAL_STEP] = global_step_tensor eval_results = evaluation._evaluate_once( # pylint: disable=protected-access @@ -627,6 +630,12 @@ class Estimator(object): return eval_results + def _verify_default_metric_key(self, metric_key, eval_dict): + if metric_key in six.iterkeys(eval_dict): + raise ValueError( + 'Metric with name `%s` is not allowed, because Estimator ' + 'already defines a default metric with the same name.' % metric_key) + def _get_replica_device_setter(config): """"""Creates a replica device setter if required as a default device_fn. ",0,test 3db982b63419fec84084fc606bbaa0de3277b996,tensorflow/tensorflow,"Added mean 'Loss' to Estimator.Evaluate Change: 150085654",estimator_test.py,"@@ -401,10 +401,8 @@ class EstimatorEvaluateTest(test.TestCase): 'metric_value': 2.}) est.train(dummy_input_fn, steps=5) scores = est.evaluate(dummy_input_fn, steps=1) - self.assertDictEqual( - {'metric': 2., - 'global_step': 5}, - scores) + self.assertIn('metric', scores) + self.assertAlmostEqual(2., scores['metric']) def test_steps0_raises_error(self): est = estimator.Estimator( @@ -431,6 +429,36 @@ class EstimatorEvaluateTest(test.TestCase): ValueError, 'Metric with name `global_step` is not allowed'): est.evaluate(dummy_input_fn, steps=1) + def test_global_step_is_reported(self): + est = estimator.Estimator( + model_fn=_model_fn_with_eval_metric_ops, + params={'metric_name': 'metric', + 'metric_value': 2.}) + est.train(dummy_input_fn, steps=5) + scores = est.evaluate(dummy_input_fn, steps=1) + self.assertIn('global_step', scores) + self.assertEqual(5, scores['global_step']) + + def test_loss_metric_is_reported(self): + + def _model_fn_with_incremental_loss(features, labels, mode): + _, _ = features, labels + local_weight = variables.Variable( + 0., name='local_weight', collections=[ops.GraphKeys.LOCAL_VARIABLES]) + # Loss will be 2, 4, 6, ... + loss = 2 * state_ops.assign_add(local_weight, 1.) + return model_fn_lib.EstimatorSpec( + mode, + loss=loss, + train_op=state_ops.assign_add(training.get_global_step(), 1)) + + est = estimator.Estimator(model_fn=_model_fn_with_incremental_loss) + est.train(dummy_input_fn, steps=1) + scores = est.evaluate(dummy_input_fn, steps=5) + self.assertIn(model_fn_lib.MetricKeys.LOSS, scores) + # Average loss will be (2 + 4 + 6 + 8 + 10)/5=6 + self.assertAlmostEqual(6., scores[model_fn_lib.MetricKeys.LOSS]) + def test_hooks_are_used(self): step_counter_hook = _StepCounterHook() @@ -454,10 +482,7 @@ class EstimatorEvaluateTest(test.TestCase): dummy_input_fn, steps=1, checkpoint_path=saver.latest_checkpoint(est1.model_dir)) - self.assertDictEqual( - {'metric': 2., - 'global_step': 5}, - scores) + self.assertEqual(5, scores['global_step']) def test_scaffold_is_used(self): ",0,test 3db982b63419fec84084fc606bbaa0de3277b996,tensorflow/tensorflow,"Added mean 'Loss' to Estimator.Evaluate Change: 150085654",model_fn.py,"@@ -47,6 +47,13 @@ class ModeKeys(object): PREDICT = 'infer' +class MetricKeys(object): + """"""Metric key strings."""""" + LOSS = 'loss' + AUC = 'auc' + ACCURACY = 'accuracy' + + class EstimatorSpec( collections.namedtuple('EstimatorSpec', [ 'predictions', 'loss', 'train_op', 'eval_metric_ops', ",0,test e383266c782e4bfaf6c18ffb91b94eb5f44155d1,tensorflow/tensorflow,"Add RaggedTensor dispatch for tf.debugging.assert_type. PiperOrigin-RevId: 423338351 Change-Id: I44c9423d0170a21d682f7cb20157247a26c18f4c",check_ops_test.py,"@@ -34,6 +34,7 @@ from tensorflow.python.ops import check_ops from tensorflow.python.ops import gradients from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops.ragged import ragged_factory_ops from tensorflow.python.platform import test @@ -1534,6 +1535,14 @@ class AssertTypeTest(test.TestCase): sparse_float.dense_shape) self.evaluate(out) + @test_util.run_in_graph_and_eager_modes + def test_raggedtensor_doesnt_raise_when_correct_type(self): + x = ragged_factory_ops.constant([[1., 2.], [3.]]) + with ops.control_dependencies( + [check_ops.assert_type(x, dtypes.float32)]): + y = array_ops.identity(x) + self.assertAllEqual(x, y) + @test_util.run_in_graph_and_eager_modes def test_raises_when_wrong_type(self): floats = constant_op.constant([1.0, 2.0], dtype=dtypes.float16) @@ -1549,6 +1558,12 @@ class AssertTypeTest(test.TestCase): with self.assertRaisesRegexp(TypeError, ""must be of type.*float32""): check_ops.assert_type(sparse_float16, dtypes.float32) + @test_util.run_in_graph_and_eager_modes + def test_raggedtensor_raises_when_wrong_type(self): + x = ragged_factory_ops.constant([[1, 2], [3]]) + with self.assertRaisesRegex(TypeError, ""must be of type.*float32""): + check_ops.assert_type(x, dtypes.float32) + def test_raise_when_tf_type_is_not_dtype(self): # Test case for GitHub issue: # https://github.com/tensorflow/tensorflow/issues/45975 ",0,train e383266c782e4bfaf6c18ffb91b94eb5f44155d1,tensorflow/tensorflow,"Add RaggedTensor dispatch for tf.debugging.assert_type. PiperOrigin-RevId: 423338351 Change-Id: I44c9423d0170a21d682f7cb20157247a26c18f4c",ragged_check_ops.py,"@@ -0,0 +1,27 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""""""Asserts and Boolean Checks for RaggedTensors."""""" + +from tensorflow.python.ops import check_ops +from tensorflow.python.ops.ragged import ragged_tensor +from tensorflow.python.util import dispatch + + +@dispatch.dispatch_for_api(check_ops.assert_type) +def assert_type(tensor: ragged_tensor.Ragged, tf_type, message=None, name=None): + return check_ops.assert_type(tensor.flat_values, tf_type, + message=message, name=name) + + ",0,train e383266c782e4bfaf6c18ffb91b94eb5f44155d1,tensorflow/tensorflow,"Add RaggedTensor dispatch for tf.debugging.assert_type. PiperOrigin-RevId: 423338351 Change-Id: I44c9423d0170a21d682f7cb20157247a26c18f4c",ragged_dispatch_test.py,"@@ -26,6 +26,7 @@ from tensorflow.python.framework import random_seed from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import image_ops_impl @@ -903,8 +904,7 @@ class RaggedDispatchTest(test_util.TensorFlowTestCase, parameterized.TestCase): np.exp(5) / (np.exp(4) + np.exp(5)), ], ]), - rtol=1e-6, - ), + rtol=1e-6), ]) def testRaggedDispatch(self, op, @@ -1036,6 +1036,13 @@ class RaggedDispatchTest(test_util.TensorFlowTestCase, parameterized.TestCase): self.evaluate(variables.global_variables_initializer()) self.assertAllEqual(math_ops.add(x, v), [[11, 12], [13, 14, 15]]) + def testAssertType(self): + x = ragged_factory_ops.constant([[1., 2.], [3.]]) + with ops.control_dependencies( + [check_ops.assert_type(x, dtypes.float32)]): + y = array_ops.identity(x) + self.assertAllEqual(x, y) + if __name__ == '__main__': googletest.main() ",0,train e383266c782e4bfaf6c18ffb91b94eb5f44155d1,tensorflow/tensorflow,"Add RaggedTensor dispatch for tf.debugging.assert_type. PiperOrigin-RevId: 423338351 Change-Id: I44c9423d0170a21d682f7cb20157247a26c18f4c",ragged_ops.py,"@@ -27,6 +27,7 @@ circular dependencies. from tensorflow.python.ops.ragged import ragged_array_ops from tensorflow.python.ops.ragged import ragged_batch_gather_ops from tensorflow.python.ops.ragged import ragged_batch_gather_with_default_op +from tensorflow.python.ops.ragged import ragged_check_ops from tensorflow.python.ops.ragged import ragged_concat_ops from tensorflow.python.ops.ragged import ragged_conversion_ops from tensorflow.python.ops.ragged import ragged_dispatch ",0,train e3a4aa36027b779ca1011c6331f173cc15d16135,tensorflow/tensorflow,"Remove xw_plus_b from the API for TF 2.0. PiperOrigin-RevId: 221732670",nn_ops.py,"@@ -2299,7 +2299,7 @@ def _calc_bias_add_flops(graph, node): return ops.OpStats(""flops"", input_count) -@tf_export(""nn.xw_plus_b"") +@tf_export(v1=[""nn.xw_plus_b""]) def xw_plus_b(x, weights, biases, name=None): # pylint: disable=invalid-name """"""Computes matmul(x, weights) + biases. ",0,test 23d9a2b49d12ced9d8291f1097af92ae067bd4a6,tensorflow/tensorflow,"Remove unneeded copies now that int64 and int64_t are the same Just removed a couple of obvious ones. Larger/more uniform update coming later. PiperOrigin-RevId: 354637425 Change-Id: If7ae27acf47c81f1a39eb9120ddbfd0bae828a15",lower_static_tensor_list.cc,"@@ -216,14 +216,8 @@ struct ConvertConst : public OpConversionPattern { // If the list is empty, directly create the final result instead of // creating the tf.Pack op. tf.Pack op requires at least one operand. if (tensors.empty()) { - absl::InlinedVector tf_shape; - tf_shape.reserve(result_shape.size()); - for (int64_t dim : result_shape) { - tf_shape.push_back(dim); - } - tensorflow::Tensor tensor(list->element_dtype, - tensorflow::TensorShape(tf_shape)); + tensorflow::TensorShape(result_shape)); auto attr_or = tensorflow::ConvertTensor(tensor, &rewriter); if (!attr_or.ok()) return failure(); rewriter.replaceOpWithNewOp(op, attr_or.ValueOrDie()); ",0,test 23d9a2b49d12ced9d8291f1097af92ae067bd4a6,tensorflow/tensorflow,"Remove unneeded copies now that int64 and int64_t are the same Just removed a couple of obvious ones. Larger/more uniform update coming later. PiperOrigin-RevId: 354637425 Change-Id: If7ae27acf47c81f1a39eb9120ddbfd0bae828a15",shape_inference_utils.cc,"@@ -56,7 +56,6 @@ limitations under the License. #define DEBUG_TYPE ""tf-shape-inference-utils"" -using ::tensorflow::int64; using tensorflow::shape_inference::DimensionHandle; using tensorflow::shape_inference::InferenceContext; using tensorflow::shape_inference::ShapeHandle; @@ -83,12 +82,7 @@ NamedAttrList GetAllAttributesFromOperation(Operation* op) { // Extracts a PartialTensorShape from the MLIR type. Optional GetShapeFromMlirType(Type t) { if (auto ranked_type = t.dyn_cast()) { - // Convert the MLIR shape indices (int64_t) to TensorFlow indices - // (int64). - ArrayRef shape = ranked_type.getShape(); - SmallVector tf_shape(shape.begin(), shape.end()); - return tensorflow::PartialTensorShape( - MutableArrayRefToSpan(tf_shape)); + return tensorflow::PartialTensorShape(ranked_type.getShape()); } return None; } ",0,test 23d9a2b49d12ced9d8291f1097af92ae067bd4a6,tensorflow/tensorflow,"Remove unneeded copies now that int64 and int64_t are the same Just removed a couple of obvious ones. Larger/more uniform update coming later. PiperOrigin-RevId: 354637425 Change-Id: If7ae27acf47c81f1a39eb9120ddbfd0bae828a15",array_container_utils.h,"@@ -41,11 +41,6 @@ inline absl::Span ArrayRefToSpan(llvm::ArrayRef ref) { return absl::Span(ref.data(), ref.size()); } -template -inline absl::Span MutableArrayRefToSpan(llvm::MutableArrayRef ref) { - return absl::Span(ref.data(), ref.size()); -} - } // namespace mlir #endif // TENSORFLOW_COMPILER_MLIR_UTILS_ARRAY_CONTAINER_UTILS_H_ ",0,test 23d9a2b49d12ced9d8291f1097af92ae067bd4a6,tensorflow/tensorflow,"Remove unneeded copies now that int64 and int64_t are the same Just removed a couple of obvious ones. Larger/more uniform update coming later. PiperOrigin-RevId: 354637425 Change-Id: If7ae27acf47c81f1a39eb9120ddbfd0bae828a15",mlir_hlo_builder.cc,"@@ -49,16 +49,12 @@ static mlir::DenseIntElementsAttr GetI64ElementsAttr( absl::Span values, mlir::Builder* builder) { auto ty = mlir::RankedTensorType::get({static_cast(values.size())}, builder->getIntegerType(64)); - llvm::SmallVector mlir_values; - mlir_values.reserve(values.size()); - for (const auto& value : values) { - mlir_values.push_back(value); - } - return mlir::DenseIntElementsAttr::get(ty, mlir_values); + return mlir::DenseIntElementsAttr::get( + ty, llvm::makeArrayRef(values.data(), values.size())); } static mlir::DenseIntElementsAttr ConvertPadding( - absl::Span> padding, + absl::Span> padding, mlir::Builder* builder) { llvm::SmallVector elements; elements.reserve(padding.size() * 2); @@ -80,7 +76,7 @@ StatusOr MlirHloBuilder::MakeXlaOp(mlir::Value val) { return InvalidArgument(""unsupported type: %s"", ToString(ty).c_str()); } - int64 handle = reinterpret_cast(val.getAsOpaquePointer()); + int64_t handle = reinterpret_cast(val.getAsOpaquePointer()); handle_to_shape_[handle] = std::move(shape); return XlaOp(handle, this); } ",0,test 101d46ab716931f27c76b86c2f4d1e5780b43e64,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-05-13 PiperOrigin-RevId: 311289765 Change-Id: I6167b9a3d737248f831fbd4405339a9e59220944",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 5, 12) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 5, 13) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,test 136ccd3d83f1dbc27dbe80cab6fd8662964062dc,tensorflow/tensorflow,"Remove the check for optimizer in model_to_estimator. PiperOrigin-RevId: 247287177",distribute_strategy_test.py,"@@ -35,6 +35,7 @@ from tensorflow.python.framework import test_util from tensorflow.python.keras import testing_utils from tensorflow.python.keras.distribute import distributed_training_utils from tensorflow.python.keras.optimizer_v2 import gradient_descent as gradient_descent_keras +from tensorflow.python.keras.optimizer_v2 import rmsprop as rmsprop_keras from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.parsing_ops import gen_parsing_ops @@ -115,7 +116,7 @@ def multi_inputs_multi_outputs_model(): inputs=[input_a, input_b, input_m], outputs=[output_c, output_d]) model.compile( loss='categorical_crossentropy', - optimizer=gradient_descent.GradientDescentOptimizer(0.001), + optimizer=gradient_descent_keras.SGD(learning_rate=0.001), metrics={ 'dense_2': 'categorical_accuracy', 'dense_3': 'categorical_accuracy' @@ -371,7 +372,7 @@ class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase, keras_model.compile( loss='categorical_crossentropy', metrics=[keras.metrics.CategoricalAccuracy()], - optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.01), + optimizer=rmsprop_keras.RMSprop(learning_rate=0.01), cloning=cloning) config = run_config_lib.RunConfig( tf_random_seed=_RANDOM_SEED, @@ -405,7 +406,7 @@ class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase, keras_model.compile( loss='categorical_crossentropy', metrics=[keras.metrics.CategoricalAccuracy()], - optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.01), + optimizer=rmsprop_keras.RMSprop(learning_rate=0.01), cloning=cloning) config = run_config_lib.RunConfig( tf_random_seed=_RANDOM_SEED, @@ -477,36 +478,6 @@ class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase, eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1) self.assertLess(eval_results['loss'], baseline_eval_results['loss']) - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu - ], - mode=['graph'], - cloning=[True, False])) - def test_keras_optimizer_with_distribution_strategy(self, distribution, - cloning): - keras_model = simple_sequential_model() - keras_model.compile( - loss='categorical_crossentropy', - optimizer=keras.optimizers.rmsprop(lr=0.01), - cloning=cloning) - - config = run_config_lib.RunConfig( - tf_random_seed=_RANDOM_SEED, - model_dir=self._base_dir, - train_distribute=distribution) - with self.cached_session(): - est_keras = keras_lib.model_to_estimator( - keras_model=keras_model, config=config) - with self.assertRaisesRegexp(ValueError, - 'Only TensorFlow native optimizers are ' - 'supported with DistributionStrategy.'): - est_keras.train(input_fn=get_ds_train_input_fn, steps=_TRAIN_SIZE / 16) - - writer_cache.FileWriterCache.clear() - gfile.DeleteRecursively(self._config.model_dir) - class TestDistributionStrategyWithNumpyArrays(test.TestCase, parameterized.TestCase): ",0,test 5ff365a4cebce6f50f4cfeeab7490992f6089961,tensorflow/tensorflow,"Add user-defined initializers to the IndyLSTMCell. PiperOrigin-RevId: 204137901",rnn_cell.py,"@@ -3153,8 +3153,8 @@ class IndyGRUCell(rnn_cell_impl.LayerRNNCell): reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. - kernel_initializer: (optional) The initializer to use for the weight and - projection matrices. + kernel_initializer: (optional) The initializer to use for the weight + matrices applied to the input. bias_initializer: (optional) The initializer to use for the bias. name: String, the name of the layer. Layers with the same name will share weights, but to avoid mistakes we require reuse=True in such @@ -3287,6 +3287,8 @@ class IndyLSTMCell(rnn_cell_impl.LayerRNNCell): forget_bias=1.0, activation=None, reuse=None, + kernel_initializer=None, + bias_initializer=None, name=None, dtype=None): """"""Initialize the IndyLSTM cell. @@ -3300,6 +3302,9 @@ class IndyLSTMCell(rnn_cell_impl.LayerRNNCell): reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. + kernel_initializer: (optional) The initializer to use for the weight + matrix applied to the inputs. + bias_initializer: (optional) The initializer to use for the bias. name: String, the name of the layer. Layers with the same name will share weights, but to avoid mistakes we require reuse=True in such cases. @@ -3314,6 +3319,8 @@ class IndyLSTMCell(rnn_cell_impl.LayerRNNCell): self._num_units = num_units self._forget_bias = forget_bias self._activation = activation or math_ops.tanh + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer @property def state_size(self): @@ -3332,7 +3339,8 @@ class IndyLSTMCell(rnn_cell_impl.LayerRNNCell): # pylint: disable=protected-access self._kernel_w = self.add_variable( ""%s_w"" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME, - shape=[input_depth, 4 * self._num_units]) + shape=[input_depth, 4 * self._num_units], + initializer=self._kernel_initializer) self._kernel_u = self.add_variable( ""%s_u"" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME, shape=[1, 4 * self._num_units], @@ -3341,7 +3349,9 @@ class IndyLSTMCell(rnn_cell_impl.LayerRNNCell): self._bias = self.add_variable( rnn_cell_impl._BIAS_VARIABLE_NAME, shape=[4 * self._num_units], - initializer=init_ops.zeros_initializer(dtype=self.dtype)) + initializer=(self._bias_initializer + if self._bias_initializer is not None else + init_ops.zeros_initializer(dtype=self.dtype))) # pylint: enable=protected-access self.built = True ",0,train 7ea9843501d8938b7d58c5a95eacc3158b5784ec,tensorflow/tensorflow,"Optimize `tf.nn.embedding_lookup()` and `tf.gather()` when shapes are known. This avoids cross-device transfers of shape metadata, which is often statically known at graph construction time. As a result, the load on the parameter servers is reduced. Change: 117135698",array_grad.py,"@@ -174,10 +174,14 @@ ops.NoGradient(""ZerosLike"") @ops.RegisterGradient(""Gather"") def _GatherGrad(op, grad): - # op.inputs[0] can be large, so colocate the shape calculation with it. - with ops.colocate_with(op.inputs[0]): - dense_shape = array_ops.shape(op.inputs[0]) - values_shape = array_ops.concat(0, [[-1], dense_shape[1:]]) + if op.inputs[0].get_shape().is_fully_defined(): + dense_shape = constant_op.constant(op.inputs[0].get_shape().as_list()) + values_shape = [-1] + op.inputs[0].get_shape()[1:].as_list() + else: + # op.inputs[0] can be large, so colocate the shape calculation with it. + with ops.colocate_with(op.inputs[0]): + dense_shape = array_ops.shape(op.inputs[0]) + values_shape = array_ops.concat(0, [[-1], dense_shape[1:]]) values = array_ops.reshape(grad, values_shape) indices = array_ops.reshape(op.inputs[1], [-1]) ",0,train 7ea9843501d8938b7d58c5a95eacc3158b5784ec,tensorflow/tensorflow,"Optimize `tf.nn.embedding_lookup()` and `tf.gather()` when shapes are known. This avoids cross-device transfers of shape metadata, which is often statically known at graph construction time. As a result, the load on the parameter servers is reduced. Change: 117135698",embedding_ops.py,"@@ -105,8 +105,11 @@ def embedding_lookup(params, ids, partition_strategy=""mod"", name=None, else: dim_0_sizes = [] for p in xrange(np): - with ops.colocate_with(params[p]): - dim_0_sizes.append(array_ops.shape(params[p])[0]) + if params[p].get_shape()[0].value is not None: + dim_0_sizes.append(params[p].get_shape()[0].value) + else: + with ops.colocate_with(params[p]): + dim_0_sizes.append(array_ops.shape(params[p])[0]) num_total_ids = math_ops.reduce_sum( math_ops.cast(array_ops.pack(dim_0_sizes), flat_ids.dtype)) ids_per_partition = num_total_ids // np @@ -147,18 +150,22 @@ def embedding_lookup(params, ids, partition_strategy=""mod"", name=None, ret = data_flow_ops.dynamic_stitch(pindices, partitioned_result, name=name) # Reshape to reverse the flattening of ids. - # It's important that we compute params[0].shape on the right device - # to avoid data motion. - with ops.colocate_with(params[0]): - params_shape = array_ops.shape(params[0]) - ret = array_ops.reshape(ret, array_ops.concat(0, [ - array_ops.shape(ids), array_ops.slice(params_shape, [1], [-1])])) - # output shape = ids.shape + params[*].shape[1:] - # Normally the reshape is sufficient, but setting shape explicitly - # teaches shape inference that params[1:].get_shape() matters. element_shape = params[0].get_shape()[1:] for p in params[1:]: element_shape = element_shape.merge_with(p.get_shape()[1:]) + if element_shape.is_fully_defined(): + ret = array_ops.reshape(ret, array_ops.concat(0, [ + array_ops.shape(ids), element_shape])) + else: + # It's important that we compute params[0].shape on the right device + # to avoid data motion. + with ops.colocate_with(params[0]): + params_shape = array_ops.shape(params[0]) + ret = array_ops.reshape(ret, array_ops.concat(0, [ + array_ops.shape(ids), array_ops.slice(params_shape, [1], [-1])])) + # output shape = ids.shape + params[*].shape[1:] + # Normally the reshape is sufficient, but setting shape explicitly + # teaches shape inference that params[1:].get_shape() matters. ret.set_shape(ids.get_shape().concatenate(element_shape)) return ret ",0,train 19852793cecb4434791bba42d6e76dbb4b107e99,tensorflow/tensorflow,"Add complex data type support to tf.sparse.to_dense This PR tries to fix the issue raised in 53653 where tf.sparse.to_dense does not support complex64 or complex128 (tf.sparse.from_dense support complex dtypes). This PR adds complex64/complex128 support for tf.sparse.to_dense. This PR fixes 53653. Signed-off-by: Yong Tang ",sparse_to_dense_op.cc,"@@ -187,6 +187,8 @@ class SparseToDense : public OpKernel { TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS_ALL); REGISTER_KERNELS_ALL(bool); REGISTER_KERNELS_ALL(tstring); +REGISTER_KERNELS_ALL(complex64); +REGISTER_KERNELS_ALL(complex128); #undef REGISTER_KERNELS_ALL #undef REGISTER_KERNELS ",0,train b3cdbcf13659b8a6bfe83b61248ab0e429fd9273,tensorflow/tensorflow,Minor typo fix. (#2889),strip_unused.py,"@@ -57,7 +57,7 @@ tf.app.flags.DEFINE_boolean(""input_binary"", False, tf.app.flags.DEFINE_string(""output_graph"", """", """"""Output 'GraphDef' file name."""""") tf.app.flags.DEFINE_string(""input_node_names"", """", - """"""The name of the output nodes, comma separated."""""") + """"""The name of the input nodes, comma separated."""""") tf.app.flags.DEFINE_string(""output_node_names"", """", """"""The name of the output nodes, comma separated."""""") tf.app.flags.DEFINE_integer(""placeholder_type_enum"", ",0,train 308bb3c69b850535a49d49a63ca74d0a7ba61fc1,tensorflow/tensorflow,"Handle zero batch input in BatchNorm correctly if inside a DistributionStrategy scope. PiperOrigin-RevId: 240643242",zero_batch_test.py,"@@ -0,0 +1,109 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""""""Test DistributionStrategy in the zero batch case."""""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import numpy as np + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.layers import normalization +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import variables +from tensorflow.python.ops.losses import losses +from tensorflow.python.platform import test +from tensorflow.python.training import gradient_descent + + +all_combinations = combinations.combine( + distribution=[ + strategy_combinations.one_device_strategy, + ], + mode=[""graph""]) + + +class NormalizationTest(test.TestCase, parameterized.TestCase): + + @combinations.generate( + combinations.times(all_combinations, + combinations.combine(fused=[True, False]))) + def testBNWithZeroBatchInput(self, distribution, fused): + with distribution.scope(), self.cached_session() as sess: + bn_list = [] + inputs = ops.convert_to_tensor( + np.random.random((0, 4, 4, 3)) + 100, dtype=dtypes.float32) + targets = ops.convert_to_tensor( + np.random.random((0, 4, 4, 3)), dtype=dtypes.float32) + + def step_fn(is_training, inputs, targets=None): + bn = normalization.BatchNormalization( + axis=3, epsilon=1e-3, momentum=0.9, fused=fused) + bn_list.append(bn) + outputs = bn.apply(inputs, training=is_training) + if not is_training: + return outputs + + loss = losses.mean_squared_error(targets, outputs) + optimizer = gradient_descent.GradientDescentOptimizer(0.01) + train_op = optimizer.minimize(loss) + with ops.control_dependencies([train_op]): + return array_ops.identity(loss) + + train_op = distribution.extended.call_for_each_replica( + step_fn, args=(True, inputs, targets)) + predict_op = distribution.extended.call_for_each_replica( + step_fn, args=(False, inputs)) + bn = bn_list[0] + + self.evaluate(variables.global_variables_initializer()) + + # Check for initial statistics and weights. + moving_mean, moving_var = self.evaluate( + [bn.moving_mean, bn.moving_variance]) + self.assertAllEqual([0, 0, 0], moving_mean) + self.assertAllEqual([1, 1, 1], moving_var) + + np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) + self.assertAllEqual([1, 1, 1], np_gamma) + self.assertAllEqual([0, 0, 0], np_beta) + + for _ in range(100): + np_output, _, _ = sess.run([train_op] + bn.updates) + self.assertEqual(0.0, np_output) + + # Verify that the statistics and weights are not changed after training. + moving_mean, moving_var = self.evaluate( + [bn.moving_mean, bn.moving_variance]) + self.assertAllEqual([0, 0, 0], moving_mean) + self.assertAllEqual([1, 1, 1], moving_var) + + np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta]) + self.assertAllEqual([1, 1, 1], np_gamma) + self.assertAllEqual([0, 0, 0], np_beta) + + # Test inference. + np_output = sess.run(predict_op) + self.assertEqual([], np_output.tolist()) + + +if __name__ == ""__main__"": + test.main() + ",0,train 308bb3c69b850535a49d49a63ca74d0a7ba61fc1,tensorflow/tensorflow,"Handle zero batch input in BatchNorm correctly if inside a DistributionStrategy scope. PiperOrigin-RevId: 240643242",normalization.py,"@@ -424,7 +424,7 @@ class BatchNormalizationBase(Layer): self._scope.set_partitioner(partitioner) self.built = True - def _assign_moving_average(self, variable, value, momentum): + def _assign_moving_average(self, variable, value, momentum, inputs_size): with ops.name_scope(None, 'AssignMovingAvg', [variable, value, momentum]) as scope: with ops.colocate_with(variable): @@ -433,12 +433,19 @@ class BatchNormalizationBase(Layer): decay = math_ops.cast(decay, variable.dtype.base_dtype) update_delta = ( variable - math_ops.cast(value, variable.dtype)) * decay + # TODO(b/129279393): Support zero batch input in non + # DistributionStrategy code as well. + if distribution_strategy_context.has_strategy(): + update_delta = tf_utils.smart_cond( + inputs_size > 0, + lambda: update_delta, lambda: K.zeros_like(update_delta)) return state_ops.assign_sub(variable, update_delta, name=scope) def _fused_batch_norm(self, inputs, training): """"""Returns the output of fused batch norm."""""" beta = self.beta if self.center else self._beta_const gamma = self.gamma if self.scale else self._gamma_const + inputs_size = array_ops.size(inputs) def _fused_batch_norm_training(): return nn.fused_batch_norm( @@ -482,21 +489,22 @@ class BatchNormalizationBase(Layer): strategy = distribution_strategy_context.get_strategy() mean_update = strategy.extended.update( self.moving_mean, self._assign_moving_average, - (mean, self.momentum)) + (mean, self.momentum, inputs_size)) variance_update = strategy.extended.update( self.moving_variance, self._assign_moving_average, - (variance, self.momentum)) + (variance, self.momentum, inputs_size)) else: mean_update = self._assign_moving_average(self.moving_mean, mean, - momentum) - variance_update = self._assign_moving_average(self.moving_variance, - variance, momentum) + momentum, inputs_size) + variance_update = self._assign_moving_average( + self.moving_variance, variance, momentum, inputs_size) self.add_update(mean_update, inputs=True) self.add_update(variance_update, inputs=True) return output - def _renorm_correction_and_moments(self, mean, variance, training): + def _renorm_correction_and_moments(self, mean, variance, training, + inputs_size): """"""Returns the correction and update values for renorm."""""" stddev = math_ops.sqrt(variance + self.epsilon) # Compute the average mean and standard deviation, as if they were @@ -527,7 +535,7 @@ class BatchNormalizationBase(Layer): lambda: d, lambda: array_ops.zeros_like(d)) - def _update_renorm_variable(var, weight, value): + def _update_renorm_variable(var, weight, value, inputs_size): """"""Updates a moving average and weight, returns the unbiased value."""""" value = array_ops.identity(value) def _do_update(): @@ -540,9 +548,10 @@ class BatchNormalizationBase(Layer): # Make sure the weight is not updated until before r and d computation. with ops.control_dependencies([value]): weight_value = array_ops.constant(1., dtype=weight.dtype) - new_var = self._assign_moving_average(var, value, self.renorm_momentum) - new_weight = self._assign_moving_average(weight, weight_value, - self.renorm_momentum) + new_var = self._assign_moving_average(var, value, self.renorm_momentum, + inputs_size) + new_weight = self._assign_moving_average( + weight, weight_value, self.renorm_momentum, inputs_size) # TODO(yuefengz): the updates to var and weighted can not be batched # together if we fetch their updated values here. Consider calculating # new values and delaying the updates. @@ -553,17 +562,26 @@ class BatchNormalizationBase(Layer): return tf_utils.smart_cond(training, _do_update, _fake_update) # TODO(yuefengz): colocate the operations - new_mean = _update_renorm_variable(self.renorm_mean, - self.renorm_mean_weight, mean) - new_stddev = _update_renorm_variable(self.renorm_stddev, - self.renorm_stddev_weight, stddev) + new_mean = _update_renorm_variable( + self.renorm_mean, self.renorm_mean_weight, mean, inputs_size) + new_stddev = _update_renorm_variable( + self.renorm_stddev, self.renorm_stddev_weight, stddev, inputs_size) # Make sqrt(moving_variance + epsilon) = new_stddev. new_variance = math_ops.square(new_stddev) - self.epsilon return (r, d, new_mean, new_variance) def _moments(self, inputs, reduction_axes, keep_dims): - return nn.moments(inputs, reduction_axes, keep_dims=keep_dims) + mean, variance = nn.moments(inputs, reduction_axes, keep_dims=keep_dims) + # TODO(b/129279393): Support zero batch input in non DistributionStrategy + # code as well. + if distribution_strategy_context.has_strategy(): + inputs_size = array_ops.size(inputs) + mean = tf_utils.smart_cond( + inputs_size > 0, lambda: mean, lambda: K.zeros_like(mean)) + variance = tf_utils.smart_cond( + inputs_size > 0, lambda: variance, lambda: K.zeros_like(variance)) + return mean, variance def call(self, inputs, training=None): if training is None: @@ -661,9 +679,10 @@ class BatchNormalizationBase(Layer): else: new_mean, new_variance = mean, variance + inputs_size = array_ops.size(inputs) if self.renorm: r, d, new_mean, new_variance = self._renorm_correction_and_moments( - new_mean, new_variance, training) + new_mean, new_variance, training, inputs_size) # When training, the normalized values (say, x) will be transformed as # x * gamma + beta without renorm, and (x * r + d) * gamma + beta # = x * (r * gamma) + (d * gamma + beta) with renorm. @@ -679,8 +698,8 @@ class BatchNormalizationBase(Layer): if in_eager_mode and not self.trainable: return return strategy.extended.update( - var, self._assign_moving_average, (value, self.momentum), - group=False) + var, self._assign_moving_average, + (value, self.momentum, inputs_size), group=False) # We need to unwrap the moving_mean or moving_variance in the case of # training being false to match the output of true_fn and false_fn # in the smart cond. @@ -697,7 +716,9 @@ class BatchNormalizationBase(Layer): """"""Compute the updates for mean and variance."""""" if in_eager_mode and not self.trainable: return - return self._assign_moving_average(var, value, self.momentum) + return self._assign_moving_average(var, value, self.momentum, + inputs_size) + mean_update = tf_utils.smart_cond( training, lambda: _do_update(self.moving_mean, new_mean), ",0,train 9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context. The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it. PiperOrigin-RevId: 298636976 Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",executor.cc,"@@ -1737,6 +1737,7 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) { params.inputs = &inputs; params.input_alloc_attrs = &input_alloc_attrs; params.runner = &runner_; + params.run_all_kernels_inline = run_all_kernels_inline_; params.stats_collector = stats_collector_; params.inc_num_deferred_ops_function = [this]() { mutex_lock lock(num_deferred_ops_mu_); ",0,train 9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context. The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it. PiperOrigin-RevId: 298636976 Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",function.cc,"@@ -532,6 +532,7 @@ class CallOp : public AsyncOpKernel { opts.step_container = ctx->step_container(); opts.stats_collector = ctx->stats_collector(); opts.runner = ctx->runner(); + opts.run_all_kernels_inline = ctx->run_all_kernels_inline(); opts.collective_executor = ctx->collective_executor(); std::vector args; args.reserve(ctx->num_inputs()); @@ -1021,6 +1022,7 @@ void FunctionLibraryRuntimeImpl::ExecutorArgsFromOptions( } exec_args->collective_executor = run_opts.collective_executor; exec_args->call_frame = frame; + exec_args->run_all_kernels_inline = run_opts.run_all_kernels_inline; } void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle, ",0,train 9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context. The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it. PiperOrigin-RevId: 298636976 Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",function_test.cc,"@@ -1872,6 +1872,67 @@ TEST_F(FunctionLibraryRuntimeTest, CrossDevice) { TensorShape({}))); } +class AreAllKernelsInlineOp : public OpKernel { + public: + using OpKernel::OpKernel; + + void Compute(OpKernelContext* ctx) override { + Tensor* output; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, {}, &output)); + output->scalar()() = ctx->run_all_kernels_inline(); + } +}; + +REGISTER_OP(""AreAllKernelsInline"").Output(""result : bool"").SetIsStateful(); +REGISTER_KERNEL_BUILDER(Name(""AreAllKernelsInline"").Device(DEVICE_CPU), + AreAllKernelsInlineOp); + +TEST_F(FunctionLibraryRuntimeTest, RunAllKernelsInline) { + // Create a function ""F"" that includes an AreAllKernelsInline op, and a + // function ""G"" that calls ""F"". + auto f = FDH::Create( + // Name + ""F"", + // Args + {}, + // Return values + {""ret: bool""}, + // Attrs + {}, + // Nodes + {// y = AreAllKernelsInline() + {{""y""}, ""AreAllKernelsInline"", {}, {}}}, + {{""ret"", ""y:result:0""}}); + + auto g = FDH::Create( + // Name + ""G"", + // Args + {}, + // Return values + {""ret: bool""}, + // Attrs + {}, + // Nodes + {// y = F() + {{""y""}, ""F"", {}, {}}}, + {{""ret"", ""y:ret:0""}}); + + Init({f, g}); + FunctionLibraryRuntime::Handle handle; + TF_CHECK_OK(Instantiate(flr0_, ""G"", {}, &handle)); + + // Test that the `run_all_kernels_inline` flag is inherited by the kernel + // running inside the called function. + for (bool inline_option : {false, true}) { + FunctionLibraryRuntime::Options opts; + opts.run_all_kernels_inline = inline_option; + Tensor result; + TF_CHECK_OK(Run(flr0_, handle, opts, {}, {&result}, true)); + EXPECT_EQ(result.scalar()(), inline_option); + } +} + namespace { bool DoNothing(Graph* g) { return false; } ",0,train 9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context. The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it. PiperOrigin-RevId: 298636976 Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",function.h,"@@ -712,6 +712,10 @@ class FunctionLibraryRuntime { // If True, allow returning dead tensors. bool allow_dead_tensors = false; + // If True, hint that all kernels should be treated as ""inexpensive"", and + // hence executed on the scheduling thread. + bool run_all_kernels_inline = false; + // Returns a human readable representation of this. string DebugString() const; }; ",0,train 9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context. The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it. PiperOrigin-RevId: 298636976 Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",op_kernel.h,"@@ -732,6 +732,7 @@ class OpKernelContext { std::function)>* runner = nullptr; StepStatsCollectorInterface* stats_collector = nullptr; GraphCollector* graph_collector = nullptr; + bool run_all_kernels_inline = false; // TensorSliceReaderCache support. checkpoint::TensorSliceReaderCacheWrapper* slice_reader_cache = nullptr; @@ -867,6 +868,12 @@ class OpKernelContext { // If non-null, kernels should populate with any partition subgraphs created. GraphCollector* graph_collector() { return params_->graph_collector; } + // If True, hint that all kernels in functions called by this kernel, should + // be treated as ""inexpensive"", and hence executed on the scheduling thread. + bool run_all_kernels_inline() const { + return params_->run_all_kernels_inline; + } + // Input to output forwarding. // Set the output Ref Tensor at output_index to be an alias of the ",0,train 9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context. The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it. PiperOrigin-RevId: 298636976 Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",batch_kernels.cc,"@@ -515,6 +515,7 @@ class BatchResource : public ResourceBase { opts.stats_collector = last_task_context->stats_collector(); opts.rendezvous = last_task_context->rendezvous(); opts.runner = last_task_context->runner(); + opts.run_all_kernels_inline = last_task_context->run_all_kernels_inline(); auto* flib = last_task_context->function_library(); std::vector combined_outputs; ",0,train 9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context. The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it. PiperOrigin-RevId: 298636976 Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",iterator_ops.cc,"@@ -835,6 +835,7 @@ class OneShotIteratorOp : public AsyncOpKernel { }); opts.step_container = &step_container; opts.runner = ctx->runner(); + opts.run_all_kernels_inline = ctx->run_all_kernels_inline(); Notification n; Status factory_status; std::vector return_values; ",0,train 9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context. The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it. PiperOrigin-RevId: 298636976 Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",map_defun_op.cc,"@@ -239,6 +239,7 @@ void MapDefunOp::SetRunOptions(OpKernelContext* ctx, } else { opts->runner = ctx->runner(); } + opts->run_all_kernels_inline = ctx->run_all_kernels_inline(); } Status MapDefunOp::SetupArgs(OpKernelContext* ctx, ",0,train 9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context. The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it. PiperOrigin-RevId: 298636976 Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",single_threaded_executor.cc,"@@ -259,6 +259,7 @@ class SingleThreadedExecutorImpl : public Executor { Args::Runner runner_copy = args.runner; params.runner = &runner_copy; + params.run_all_kernels_inline = args.run_all_kernels_inline; params.stats_collector = args.stats_collector; // NOTE(mrry): We are assuming that the graph is loopless and condless. ",0,train 9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context. The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it. PiperOrigin-RevId: 298636976 Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",function_ops.cc,"@@ -253,6 +253,7 @@ class SymbolicGradientOp : public AsyncOpKernel { opts.rendezvous = ctx->rendezvous(); opts.cancellation_manager = ctx->cancellation_manager(); opts.runner = ctx->runner(); + opts.run_all_kernels_inline = ctx->run_all_kernels_inline(); opts.stats_collector = ctx->stats_collector(); opts.step_container = ctx->step_container(); opts.collective_executor = ctx->collective_executor(); @@ -365,6 +366,7 @@ void RemoteCallOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) { FunctionLibraryRuntime::Options opts; opts.runner = ctx->runner(); + opts.run_all_kernels_inline = ctx->run_all_kernels_inline(); opts.source_device = source_device; if (opts.source_device != target_device) { opts.remote_execution = true; ",0,train 9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context. The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it. PiperOrigin-RevId: 298636976 Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",functional_ops.cc,"@@ -107,6 +107,7 @@ void SetRunOptions(OpKernelContext* ctx, FunctionLibraryRuntime::Options* opts, opts->stats_collector = ctx->stats_collector(); } opts->runner = ctx->runner(); + opts->run_all_kernels_inline = ctx->run_all_kernels_inline(); opts->step_container = ctx->step_container(); } ",0,train 9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context. The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it. PiperOrigin-RevId: 298636976 Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",partitioned_function_ops.cc,"@@ -241,6 +241,7 @@ void PartitionedCallOp::RunFunction(FunctionLibraryRuntime::Handle handle, // TODO(akshayka): Consider selecting a runner on a per-device basis, // i.e., using device-specific threadpools when available. run_opts.runner = ctx->runner(); + run_opts.run_all_kernels_inline = ctx->run_all_kernels_inline(); run_opts.source_device = lib->device() == nullptr ? """" : lib->device()->name(); run_opts.allow_dead_tensors = true; ",0,train 48f2893491fdc7352590a7d93b9ec317f2850c64,tensorflow/tensorflow,"Pass -v when invoking FileCheck. FileCheck has learned to output the debugging information we were printing, plus other info. See https://reviews.llvm.org/rL349418. PiperOrigin-RevId: 226941909",filecheck.cc,"@@ -48,7 +48,7 @@ StatusOr RunFileCheck(const string& input, const string& pattern) { tensorflow::SubProcess file_check_process; file_check_process.SetProgram(file_check_path, - {file_check_path, pattern_path}); + {file_check_path, ""-v"", pattern_path}); file_check_process.SetChannelAction(tensorflow::CHAN_STDIN, tensorflow::ACTION_PIPE); file_check_process.SetChannelAction(tensorflow::CHAN_STDERR, @@ -71,9 +71,7 @@ StatusOr RunFileCheck(const string& input, const string& pattern) { LOG(WARNING) << ""NOTE: FileCheck binary does not exist!""; } - LOG(WARNING) << ""FileCheck error: "" << standard_error; - LOG(WARNING) << ""FileCheck input was:""; - XLA_LOG_LINES(tensorflow::WARNING, input); + LOG(WARNING) << ""FileCheck error:\n"" << standard_error; LOG(WARNING) << ""FileCheck pattern was:""; XLA_LOG_LINES(tensorflow::WARNING, pattern); } else if (!standard_error.empty()) { ",0,train 1fa7b63eb3a45541551f398f2d076d5ef99e33dd,tensorflow/tensorflow,"Temporarily disable two tests due to overflow PiperOrigin-RevId: 387638876 Change-Id: If9293e2a806db6d70cc4fe2ed5d9bf47230ec52a",tensor_array_ops_test.py,"@@ -259,10 +259,11 @@ class TensorArrayTest(xla_test.XLATestCase): self.assertAllEqual(convert([[2.0, 201.0]]), d1) self.assertAllEqual(convert([[3.0, 301.0]]), d2) - @test_util.disable_control_flow_v2(""b/122315872 (split)"") - def testTensorArraySplitRead(self): - for dtype in self.numeric_tf_types: - self._testTensorArraySplitRead(dtype) + # Disable temporarily due to b/195023333 + # @test_util.disable_control_flow_v2(""b/122315872 (split)"") + # def testTensorArraySplitRead(self): + # for dtype in self.numeric_tf_types: + # self._testTensorArraySplitRead(dtype) @test_util.disable_control_flow_v2(""TensorArray.grad is not supported in v2"") def testTensorGradArrayWriteRead(self): @@ -1046,11 +1047,12 @@ class TensorArrayTest(xla_test.XLATestCase): self.assertAllEqual(convert([1.0, -1.0]), read_vals[0]) self.assertAllEqual(convert([10.0, -10.0]), read_vals[1]) - @test_util.disable_control_flow_v2(""b/122315734 (scatter)"") - def testTensorArrayScatterRead(self): - for dtype in self.numeric_tf_types: - self._testTensorArrayScatterRead(dtype) - self._testTensorArrayScatterRead(dtypes.bool) + # Disable temporarily due to b/195023333 + # @test_util.disable_control_flow_v2(""b/122315734 (scatter)"") + # def testTensorArrayScatterRead(self): + # for dtype in self.numeric_tf_types: + # self._testTensorArrayScatterRead(dtype) + # self._testTensorArrayScatterRead(dtypes.bool) @test_util.disable_control_flow_v2(""b/122315734 (scatter)"") def testTensorArrayScatterReadAndGradients(self): ",0,test 4d80b9758fc2ef64cb05ebd80d5fd1f21584413e,tensorflow/tensorflow,"[tf.data] Fixes RangeDataset cardinality when start_ == stop_. PiperOrigin-RevId: 396700532 Change-Id: Ie282649a1518a477208618170cb65dc744f38341",range_dataset_op.cc,"@@ -164,7 +164,9 @@ class RangeDatasetOp::Dataset : public DatasetBase { } int64_t Cardinality() const override { - if (step_ > 0) { + if (start_ == stop_) { + return 0; + } else if (step_ > 0) { return std::max(int64_t{0}, (stop_ - start_ - 1) / step_ + 1); } else { return std::max(int64_t{0}, (start_ - stop_ - 1) / -step_ + 1); ",0,train 4d80b9758fc2ef64cb05ebd80d5fd1f21584413e,tensorflow/tensorflow,"[tf.data] Fixes RangeDataset cardinality when start_ == stop_. PiperOrigin-RevId: 396700532 Change-Id: Ie282649a1518a477208618170cb65dc744f38341",cardinality_test.py,"@@ -115,6 +115,9 @@ def _test_combinations(): (""Range4"", lambda: dataset_ops.Dataset.range(10, 5), 0), (""Range5"", lambda: dataset_ops.Dataset.range(5, 10, 2), 3), (""Range6"", lambda: dataset_ops.Dataset.range(10, 5, -2), 3), + (""Range7"", lambda: dataset_ops.Dataset.range(0, 0, -2), 0), + (""Range8"", lambda: dataset_ops.Dataset.range(3, 3, 1), 0), + (""Range9"", lambda: dataset_ops.Dataset.range(-4, -4, 2), 0), (""Repeat1"", lambda: dataset_ops.Dataset.range(0).repeat(0), 0), (""Repeat2"", lambda: dataset_ops.Dataset.range(1).repeat(0), 0), (""Repeat3"", lambda: dataset_ops.Dataset.range(0).repeat(5), 0), ",0,train 82eafde35fce5aa7cbe57b6af49cb990f5edaf2c,tensorflow/tensorflow,"Fix negative axis issue with ragged tensor and reduce_sum This fix tries to address the issue raised in 27497 where `tf.reduce_sum` with multiple negative axes and ragged tensor does not produce correct result. The issue is that during reduce op, ragged tensor will reduce one axis at a time. However, for negative axis, sort result is reversed so order is different. This fix convert to positive before the sort to make sure the order. This fix fixes 27497. Signed-off-by: Yong Tang ",ragged_math_ops.py,"@@ -461,6 +461,12 @@ def _ragged_reduce_aggregate(reduce_op, elif len(axis) == 1: axis = axis[0] else: + # When reducing multiple axes, as we reduce one at a time (see below), + # the negative axis has to be converted to positive at the first run + # as the sort with negative axis will have different orders. + # See GitHub issue 27497. + axis = [ragged_util.get_positive_axis( + a, rt_input.shape.ndims) for a in axis] # When reducing multiple axes, just reduce one at a time. This is less # efficient, and only works for associative ops. (In particular, it # does not work for reduce_mean.) However, reducing multiple axes at ",0,train 8bf25a491b60d223bba11233de9e62f4b0db17e8,tensorflow/tensorflow,"Add a read-ahead cache to the GCS implementation of RandomAccessFile. In some cases TensorFlow reads the data via RandomAccessFile in really small chunks, which doesn't work very efficiently with HTTP requests. Adding a read-ahead cache significantly boosts the performance. Change: 125691397",gcs_file_system.cc,"@@ -16,6 +16,7 @@ limitations under the License. #include ""tensorflow/core/platform/cloud/gcs_file_system.h"" #include #include +#include #include #include #include @@ -80,19 +81,58 @@ Status ParseGcsPath(const string& fname, string* bucket, string* object) { return Status::OK(); } -/// GCS-based implementation of a random access file. +/// A GCS-based implementation of a random access file with a read-ahead buffer. class GcsRandomAccessFile : public RandomAccessFile { public: GcsRandomAccessFile(const string& bucket, const string& object, AuthProvider* auth_provider, - HttpRequest::Factory* http_request_factory) + HttpRequest::Factory* http_request_factory, + size_t read_ahead_bytes) : bucket_(bucket), object_(object), auth_provider_(auth_provider), - http_request_factory_(std::move(http_request_factory)) {} + http_request_factory_(std::move(http_request_factory)), + read_ahead_bytes_(read_ahead_bytes) {} + /// The implementation of reads with a read-ahead buffer. Status Read(uint64 offset, size_t n, StringPiece* result, char* scratch) const override { + if (offset >= buffer_start_offset_ && + offset + n <= buffer_start_offset_ + buffer_content_size_) { + // If the requested range is fully in the buffer, just return it. + std::memcpy(scratch, buffer_.get() + offset - buffer_start_offset_, n); + *result = StringPiece(scratch, n); + return Status::OK(); + } + + // Update the buffer content based on the new requested range. + auto buffer_size = n + read_ahead_bytes_; + buffer_.reset(new char[buffer_size]); + buffer_start_offset_ = offset; + buffer_content_size_ = 0; + StringPiece buffer_content; + TF_RETURN_IF_ERROR( + ReadFromGCS(offset, buffer_size, &buffer_content, buffer_.get())); + buffer_content_size_ = buffer_content.size(); + + // Set the results. + *result = StringPiece(scratch, std::min(buffer_content_size_, n)); + std::memcpy(scratch, buffer_.get(), result->size()); + + if (result->size() < n) { + // This is not an error per se. The RandomAccessFile interface expects + // that Read returns OutOfRange if fewer bytes were read than requested. + return errors::OutOfRange(strings::StrCat(""EOF reached, "", result->size(), + "" bytes were read out of "", n, + "" bytes requested."")); + } + return Status::OK(); + } + + private: + /// A helper function to actually read the data from GCS. + Status ReadFromGCS(uint64 offset, size_t n, StringPiece* result, + char* scratch) const { string auth_token; TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_, &auth_token)); @@ -105,22 +145,21 @@ class GcsRandomAccessFile : public RandomAccessFile { TF_RETURN_IF_ERROR(request->SetRange(offset, offset + n - 1)); TF_RETURN_IF_ERROR(request->SetResultBuffer(scratch, n, result)); TF_RETURN_IF_ERROR(request->Send()); - - if (result->size() < n) { - // This is not an error per se. The RandomAccessFile interface expects - // that Read returns OutOfRange if fewer bytes were read than requested. - return errors::OutOfRange(strings::StrCat(""EOF reached, "", result->size(), - "" bytes were read out of "", n, - "" bytes requested."")); - } return Status::OK(); } - private: string bucket_; string object_; AuthProvider* auth_provider_; HttpRequest::Factory* http_request_factory_; + const size_t read_ahead_bytes_; + + // The buffer-related members need to be mutable, because they are modified + // by the const Read() method. + mutable std::unique_ptr buffer_; + // The original file offset of the first byte in the buffer. + mutable size_t buffer_start_offset_ = 0; + mutable size_t buffer_content_size_ = 0; }; /// \brief GCS-based implementation of a writeable file. @@ -233,16 +272,19 @@ GcsFileSystem::GcsFileSystem() GcsFileSystem::GcsFileSystem( std::unique_ptr auth_provider, - std::unique_ptr http_request_factory) + std::unique_ptr http_request_factory, + size_t read_ahead_bytes) : auth_provider_(std::move(auth_provider)), - http_request_factory_(std::move(http_request_factory)) {} + http_request_factory_(std::move(http_request_factory)), + read_ahead_bytes_(read_ahead_bytes) {} Status GcsFileSystem::NewRandomAccessFile( const string& fname, std::unique_ptr* result) { string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, &bucket, &object)); result->reset(new GcsRandomAccessFile(bucket, object, auth_provider_.get(), - http_request_factory_.get())); + http_request_factory_.get(), + read_ahead_bytes_)); return Status::OK(); } ",0,train 8bf25a491b60d223bba11233de9e62f4b0db17e8,tensorflow/tensorflow,"Add a read-ahead cache to the GCS implementation of RandomAccessFile. In some cases TensorFlow reads the data via RandomAccessFile in really small chunks, which doesn't work very efficiently with HTTP requests. Adding a read-ahead cache significantly boosts the performance. Change: 125691397",gcs_file_system.h,"@@ -30,7 +30,8 @@ class GcsFileSystem : public FileSystem { public: GcsFileSystem(); GcsFileSystem(std::unique_ptr auth_provider, - std::unique_ptr http_request_factory); + std::unique_ptr http_request_factory, + size_t read_ahead_bytes); Status NewRandomAccessFile( const string& filename, @@ -63,6 +64,11 @@ class GcsFileSystem : public FileSystem { private: std::unique_ptr auth_provider_; std::unique_ptr http_request_factory_; + + // The number of bytes to read ahead for buffering purposes in the + // RandomAccessFile implementation. Defaults to 256Mb. + const size_t read_ahead_bytes_ = 256 * 1024 * 1024; + TF_DISALLOW_COPY_AND_ASSIGN(GcsFileSystem); }; ",0,train 8bf25a491b60d223bba11233de9e62f4b0db17e8,tensorflow/tensorflow,"Add a read-ahead cache to the GCS implementation of RandomAccessFile. In some cases TensorFlow reads the data via RandomAccessFile in really small chunks, which doesn't work very efficiently with HTTP requests. Adding a read-ahead cache significantly boosts the performance. Change: 125691397",gcs_file_system_test.cc,"@@ -49,7 +49,7 @@ class FakeAuthProvider : public AuthProvider { } }; -TEST(GcsFileSystemTest, NewRandomAccessFile) { +TEST(GcsFileSystemTest, NewRandomAccessFile_NoReadAhead) { std::vector requests( {new FakeHttpRequest( ""Uri: https://bucket.storage.googleapis.com/random_access.txt\n"" @@ -63,7 +63,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile) { ""6789"")}); GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), std::unique_ptr( - new FakeHttpRequestFactory(&requests))); + new FakeHttpRequestFactory(&requests)), + 0 /* read ahead bytes */); std::unique_ptr file; TF_EXPECT_OK(fs.NewRandomAccessFile(""gs://bucket/random_access.txt"", &file)); @@ -82,6 +83,65 @@ TEST(GcsFileSystemTest, NewRandomAccessFile) { EXPECT_EQ(""6789"", result); } +TEST(GcsFileSystemTest, NewRandomAccessFile_WithReadAhead) { + std::vector requests( + {new FakeHttpRequest( + ""Uri: https://bucket.storage.googleapis.com/random_access.txt\n"" + ""Auth Token: fake_token\n"" + ""Range: 0-8\n"", + ""01234567""), + new FakeHttpRequest( + ""Uri: https://bucket.storage.googleapis.com/random_access.txt\n"" + ""Auth Token: fake_token\n"" + ""Range: 6-15\n"", + ""6789abcd""), + new FakeHttpRequest( + ""Uri: https://bucket.storage.googleapis.com/random_access.txt\n"" + ""Auth Token: fake_token\n"" + ""Range: 6-20\n"", + ""6789abcd""), + new FakeHttpRequest( + ""Uri: https://bucket.storage.googleapis.com/random_access.txt\n"" + ""Auth Token: fake_token\n"" + ""Range: 15-29\n"", + """")}); + GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), + std::unique_ptr( + new FakeHttpRequestFactory(&requests)), + 5 /* read ahead bytes */); + + std::unique_ptr file; + TF_EXPECT_OK(fs.NewRandomAccessFile(""gs://bucket/random_access.txt"", &file)); + + char scratch[100]; + StringPiece result; + + // Read the first chunk. The cache will be updated with 4 + 5 = 9 bytes. + TF_EXPECT_OK(file->Read(0, 4, &result, scratch)); + EXPECT_EQ(""0123"", result); + + // The second chunk will be fully loaded from the cache, no requests are made. + TF_EXPECT_OK(file->Read(4, 4, &result, scratch)); + EXPECT_EQ(""4567"", result); + + // The chunk is only partially cached -- the request will be made to + // reload the cache. 5 + 5 = 10 bytes will be requested. + TF_EXPECT_OK(file->Read(6, 5, &result, scratch)); + EXPECT_EQ(""6789a"", result); + + // The range can only be partially satisfied. An attempt to fill the cache + // with 10 + 5 = 15 bytes will be made. + EXPECT_EQ(errors::Code::OUT_OF_RANGE, + file->Read(6, 10, &result, scratch).code()); + EXPECT_EQ(""6789abcd"", result); + + // The range cannot be satisfied. An attempt to fill the cache + // with 10 + 5 = 15 bytes will be made. + EXPECT_EQ(errors::Code::OUT_OF_RANGE, + file->Read(15, 10, &result, scratch).code()); + EXPECT_TRUE(result.empty()); +} + TEST(GcsFileSystemTest, NewWritableFile) { std::vector requests({new FakeHttpRequest( ""Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?"" @@ -91,7 +151,8 @@ TEST(GcsFileSystemTest, NewWritableFile) { """")}); GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), std::unique_ptr( - new FakeHttpRequestFactory(&requests))); + new FakeHttpRequestFactory(&requests)), + 0 /* read ahead bytes */); std::unique_ptr file; TF_EXPECT_OK(fs.NewWritableFile(""gs://bucket/path/writeable.txt"", &file)); @@ -116,7 +177,8 @@ TEST(GcsFileSystemTest, NewAppendableFile) { """")}); GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), std::unique_ptr( - new FakeHttpRequestFactory(&requests))); + new FakeHttpRequestFactory(&requests)), + 0 /* read ahead bytes */); std::unique_ptr file; TF_EXPECT_OK(fs.NewAppendableFile(""gs://bucket/path/appendable.txt"", &file)); @@ -142,7 +204,8 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) { content)}); GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), std::unique_ptr( - new FakeHttpRequestFactory(&requests))); + new FakeHttpRequestFactory(&requests)), + 0 /* read ahead bytes */); std::unique_ptr region; TF_EXPECT_OK(fs.NewReadOnlyMemoryRegionFromFile( @@ -166,7 +229,8 @@ TEST(GcsFileSystemTest, FileExists) { """", errors::NotFound(""404""))}); GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), std::unique_ptr( - new FakeHttpRequestFactory(&requests))); + new FakeHttpRequestFactory(&requests)), + 0 /* read ahead bytes */); EXPECT_TRUE(fs.FileExists(""gs://bucket/path/file1.txt"")); EXPECT_FALSE(fs.FileExists(""gs://bucket/path/file2.txt"")); @@ -176,7 +240,8 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles) { auto requests = CreateGetThreeChildrenRequest(); GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), std::unique_ptr( - new FakeHttpRequestFactory(&requests))); + new FakeHttpRequestFactory(&requests)), + 0 /* read ahead bytes */); std::vector children; TF_EXPECT_OK(fs.GetChildren(""gs://bucket/path/"", &children)); @@ -188,7 +253,8 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles_NoSlash) { auto requests = CreateGetThreeChildrenRequest(); GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), std::unique_ptr( - new FakeHttpRequestFactory(&requests))); + new FakeHttpRequestFactory(&requests)), + 0 /* read ahead bytes */); std::vector children; TF_EXPECT_OK(fs.GetChildren(""gs://bucket/path"", &children)); @@ -204,7 +270,8 @@ TEST(GcsFileSystemTest, GetChildren_Empty) { ""{}"")}); GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), std::unique_ptr( - new FakeHttpRequestFactory(&requests))); + new FakeHttpRequestFactory(&requests)), + 0 /* read ahead bytes */); std::vector children; TF_EXPECT_OK(fs.GetChildren(""gs://bucket/path/"", &children)); @@ -221,7 +288,8 @@ TEST(GcsFileSystemTest, DeleteFile) { """")}); GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), std::unique_ptr( - new FakeHttpRequestFactory(&requests))); + new FakeHttpRequestFactory(&requests)), + 0 /* read ahead bytes */); TF_EXPECT_OK(fs.DeleteFile(""gs://bucket/path/file1.txt"")); } @@ -234,7 +302,8 @@ TEST(GcsFileSystemTest, DeleteDir_Empty) { ""{}"")}); GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), std::unique_ptr( - new FakeHttpRequestFactory(&requests))); + new FakeHttpRequestFactory(&requests)), + 0 /* read ahead bytes */); TF_EXPECT_OK(fs.DeleteDir(""gs://bucket/path/"")); } @@ -248,7 +317,8 @@ TEST(GcsFileSystemTest, DeleteDir_NonEmpty) { "" { \""name\"": \""path/file1.txt\"" }]}"")}); GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), std::unique_ptr( - new FakeHttpRequestFactory(&requests))); + new FakeHttpRequestFactory(&requests)), + 0 /* read ahead bytes */); EXPECT_FALSE(fs.DeleteDir(""gs://bucket/path/"").ok()); } @@ -261,7 +331,8 @@ TEST(GcsFileSystemTest, GetFileSize) { strings::StrCat(""{\""size\"": \""1010\""}""))}); GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), std::unique_ptr( - new FakeHttpRequestFactory(&requests))); + new FakeHttpRequestFactory(&requests)), + 0 /* read ahead bytes */); uint64 size; TF_EXPECT_OK(fs.GetFileSize(""gs://bucket/file.txt"", &size)); @@ -284,7 +355,8 @@ TEST(GcsFileSystemTest, RenameFile) { """")}); GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), std::unique_ptr( - new FakeHttpRequestFactory(&requests))); + new FakeHttpRequestFactory(&requests)), + 0 /* read ahead bytes */); TF_EXPECT_OK( fs.RenameFile(""gs://bucket/path/src.txt"", ""gs://bucket/path/dst.txt"")); ",0,train 2147ea442a0f514ef81c91ba569de89bab5753ca,tensorflow/tensorflow,"[PJRT] Expose PyClient::runtime_type() to distinguish between ""stream_executor"" and ""tfrt"" backends. PiperOrigin-RevId: 370531014 Change-Id: I498827583a21e51de390e8d0d34ebe175b323a5a",pjrt_client.h,"@@ -57,6 +57,14 @@ static const PjRtPlatformId kGpuId = tensorflow::Fingerprint64(kGpuName); static const PjRtPlatformId kTpuId = tensorflow::Fingerprint64(kTpuName); enum PjRtRuntimeType { kStreamExecutor, kTfrt }; +static constexpr absl::string_view PjRtRuntimeTypeString(PjRtRuntimeType type) { + switch (type) { + case kStreamExecutor: + return ""stream_executor""; + case kTfrt: + return ""tfrt""; + } +} class PjRtClient; @@ -184,6 +192,8 @@ class PjRtClient { // Returns a string that identifies the platform (CPU/GPU/TPU). virtual absl::string_view platform_name() const = 0; + // Returns a string containing human-readable, platform-specific version info + // (e.g. the CUDA version on GPU or libtpu version on Cloud TPU). virtual absl::string_view platform_version() const = 0; // Returns an enum that identifies the type of runtime being used under this ",0,train 2147ea442a0f514ef81c91ba569de89bab5753ca,tensorflow/tensorflow,"[PJRT] Expose PyClient::runtime_type() to distinguish between ""stream_executor"" and ""tfrt"" backends. PiperOrigin-RevId: 370531014 Change-Id: I498827583a21e51de390e8d0d34ebe175b323a5a",py_client.h,"@@ -100,6 +100,9 @@ class PyClient : public std::enable_shared_from_this { absl::string_view platform_version() const { return pjrt_client_->platform_version(); } + absl::string_view runtime_type() const { + return PjRtRuntimeTypeString(pjrt_client_->runtime_type()); + } int addressable_device_count() const { return pjrt_client_->addressable_device_count(); } ",0,train 2147ea442a0f514ef81c91ba569de89bab5753ca,tensorflow/tensorflow,"[PJRT] Expose PyClient::runtime_type() to distinguish between ""stream_executor"" and ""tfrt"" backends. PiperOrigin-RevId: 370531014 Change-Id: I498827583a21e51de390e8d0d34ebe175b323a5a",xla.cc,"@@ -206,6 +206,7 @@ PYBIND11_MODULE(xla_extension, m) { py::class_> py_local_client(m, ""Client""); py_local_client.def_property_readonly(""platform"", &PyClient::platform_name) .def_property_readonly(""platform_version"", &PyClient::platform_version) + .def_property_readonly(""runtime_type"", &PyClient::runtime_type) .def(""device_count"", &PyClient::device_count) .def(""local_device_count"", &PyClient::addressable_device_count) .def(""devices"", &PyClient::Devices) ",0,train 11fb9e3fcca1567d4e09d9fa043d97e31b66d8a7,tensorflow/tensorflow,"Internal change PiperOrigin-RevId: 377602003 Change-Id: Ied5dd5cd7f963a157719f20600f0326e14a5a964",fake_quant_ops_functor.h,"@@ -87,13 +87,13 @@ struct FakeQuantWithMinMaxArgsFunctor { float nudged_min, nudged_max, nudged_scale; Nudge(min, max, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale); - const float quant_zero = floor(-nudged_min / nudged_scale + 0.5f); + const float inv_nudged_scale = 1.0f / nudged_scale; auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min); auto clamped_shifted = clamped - nudged_min; outputs.device(d) = - (clamped_shifted / nudged_scale - quant_zero + 0.5f).floor() * - nudged_scale; + (clamped_shifted * inv_nudged_scale + 0.5f).floor() * nudged_scale + + nudged_min; } }; @@ -138,14 +138,13 @@ struct FakeQuantWithMinMaxVarsFunctor { float nudged_min, nudged_max, nudged_scale; Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale); - const float quant_zero = floor(-nudged_min / nudged_scale + 0.5f); const auto nudged_scale_repl = inputs.constant(nudged_scale); const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min); const auto clamped_shifted = clamped - nudged_min; - outputs.device(d) = - (clamped_shifted / nudged_scale_repl - quant_zero + 0.5f).floor() * - nudged_scale_repl; + outputs.device(d) = (clamped_shifted / nudged_scale_repl + 0.5f).floor() * + nudged_scale_repl + + nudged_min; } }; @@ -213,15 +212,13 @@ struct FakeQuantWithMinMaxVarsPerChannelFunctor { float nudged_min, nudged_max, nudged_scale; Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale); - const float quant_zero = floor(-nudged_min / nudged_scale + 0.5f); - const auto clamped = inputs.chip<1>(i).cwiseMin(nudged_max).cwiseMax(nudged_min); const auto clamped_shifted = clamped - nudged_min; outputs.chip<1>(i).device(d) = - (clamped_shifted / nudged_scale - quant_zero + 0.5f).floor() * - nudged_scale; + (clamped_shifted / nudged_scale + 0.5f).floor() * nudged_scale + + nudged_min; } } }; ",0,train 11fb9e3fcca1567d4e09d9fa043d97e31b66d8a7,tensorflow/tensorflow,"Internal change PiperOrigin-RevId: 377602003 Change-Id: Ied5dd5cd7f963a157719f20600f0326e14a5a964",fake_quant_ops_test.cc,"@@ -54,9 +54,7 @@ class QuantOpsTest : public OpsTestBase { const bool narrow_range, const float min, const float max, const TensorShape& shape, const gtl::ArraySlice data, - gtl::ArraySlice expected_data, - const double atol = -1.0, - const double rtol = -1.0) { + gtl::ArraySlice expected_data) { TF_EXPECT_OK(NodeDefBuilder(""op"", ""FakeQuantWithMinMaxArgs"") .Input(FakeInput(DT_FLOAT)) // inputs .Attr(""min"", min) @@ -74,16 +72,14 @@ class QuantOpsTest : public OpsTestBase { Tensor* output = GetOutput(0); Tensor expected(allocator(), DT_FLOAT, shape); FillValues(&expected, expected_data); - ExpectClose(expected, *output, atol, rtol); + ExpectClose(expected, *output); } void RunTestFakeQuantWithMinMaxVars(const int num_bits, const bool narrow_range, const float min, const float max, const TensorShape& shape, const gtl::ArraySlice data, - gtl::ArraySlice expected_data, - const double atol = -1.0, - const double rtol = -1.0) { + gtl::ArraySlice expected_data) { TF_EXPECT_OK(NodeDefBuilder(""op"", ""FakeQuantWithMinMaxVars"") .Input(FakeInput(DT_FLOAT)) // inputs .Input(FakeInput(DT_FLOAT)) // min @@ -105,15 +101,14 @@ class QuantOpsTest : public OpsTestBase { Tensor* output = GetOutput(0); Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3})); FillValues(&expected, expected_data); - ExpectClose(expected, *output, atol, rtol); + ExpectClose(expected, *output); } void RunTestFakeQuantWithMinMaxVarsPerChannel( const int num_bits, const bool narrow_range, const TensorShape& minmax_shape, const gtl::ArraySlice min, const gtl::ArraySlice max, const TensorShape& shape, - const gtl::ArraySlice data, gtl::ArraySlice expected_data, - const double atol = -1.0, const double rtol = -1.0) { + const gtl::ArraySlice data, gtl::ArraySlice expected_data) { TF_EXPECT_OK(NodeDefBuilder(""op"", ""FakeQuantWithMinMaxVarsPerChannel"") .Input(FakeInput(DT_FLOAT)) // inputs .Input(FakeInput(DT_FLOAT)) // min @@ -135,30 +130,10 @@ class QuantOpsTest : public OpsTestBase { Tensor* output = GetOutput(0); Tensor expected(allocator(), DT_FLOAT, shape); FillValues(&expected, expected_data); - ExpectClose(expected, *output, atol, rtol); + ExpectClose(expected, *output); } }; -TEST_F(QuantOpsTest, WithArgsSymmetricRangeZeroInput_RegularRange) { - // Original quantization range: [-10, 10], scale: 20/255. - // Original zero point: 127.5, nudged zero point 128.0. - // Expected quantized values: 0.0. - RunTestFakeQuantWithMinMaxArgs(8, false, -10.0f, 10.0f, TensorShape({2, 3}), - {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0, - 0.0); -} - -TEST_F(QuantOpsTest, WithArgsSymmetricRangeZeroInput_NarrowRange) { - // Original quantization range: [-10, 10], scale: 20/254. - // Original zero point: 128., no nudging necessary. - // Expected quantized values: 0.0. - RunTestFakeQuantWithMinMaxArgs(8, true, -10.0f, 10.0f, TensorShape({2, 3}), - {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0, - 0.0); -} - TEST_F(QuantOpsTest, WithArgsNoNudging_RegularRange) { // Original quantization range: [-10 + 0 / 4, -10 + 255 / 4], scale: 1/4. // Original zero point: 40, no nudging necessary. @@ -506,26 +481,6 @@ TEST_F(QuantOpsTest, WithVars_ZeroMinAndMax) { {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); } -TEST_F(QuantOpsTest, WithVarsSymmetricRangeZeroInput_RegularRange) { - // Original quantization range: [-10, 10], scale: 20/255. - // Original zero point: 127.5, nudged zero point 128. - // Expected quantized values: 0. - RunTestFakeQuantWithMinMaxVars(8, false, -10.0f, 10.0f, TensorShape({2, 3}), - {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0, - 0.0); -} - -TEST_F(QuantOpsTest, WithVarsSymmetricRangeZeroInput_NarrowRange) { - // Original quantization range: [-10, 10], scale: 20/254. - // Original zero point: 128., no nudging necessary. - // Expected quantized values: 0. - RunTestFakeQuantWithMinMaxVars(8, true, -10.0f, 10.0f, TensorShape({2, 3}), - {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, - {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0, - 0.0); -} - TEST_F(QuantOpsTest, WithVarsNoNudging_RegularRange) { // Original quantization range: [-10 + 0 / 4, -10 + 255 / 4], scale: 1/4. // Original zero point: 40, no nudging necessary. @@ -913,26 +868,6 @@ TEST_F(QuantOpsTest, WithVarsPerChannel_ZeroMinAndMax) { {0.0f, 0.0f, 0.0f, 0.0f}); } -TEST_F(QuantOpsTest, WithVarsPerChannelSymmetricRangeZeroInput_RegularRange) { - // Original quantization range: [-10, 10], scale: 20/255. - // Original zero point: 127.5, nudged zero point 128.0. - // Expected quantized values: 0. - RunTestFakeQuantWithMinMaxVarsPerChannel( - 8, false, TensorShape({4}), {-10.0f, -10.0f, -10.0f, -10.0f}, - {10.0f, 10.0f, 10.0f, 10.0f}, TensorShape({4}), {0.0f, 0.0f, 0.0f, 0.0f}, - {0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0); -} - -TEST_F(QuantOpsTest, WithVarsPerChannelSymmetricRangeZeroInput_NarrowRange) { - // Original quantization range: [-10, 10], scale: 20/254. - // Original zero point: 128.0, no nudging necessary. - // Expected quantized values: 0. - RunTestFakeQuantWithMinMaxVarsPerChannel( - 8, true, TensorShape({4}), {-10.0f, -10.0f, -10.0f, -10.0f}, - {10.0f, 10.0f, 10.0f, 10.0f}, TensorShape({4}), {0.0f, 0.0f, 0.0f, 0.0f}, - {0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0); -} - TEST_F(QuantOpsTest, WithVarsPerChannelDim1NudgedDown_RegularRange) { // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4]. // Scale: 1/4, original zero point: 0.4, nudged to 0. ",0,train ff9fa17710e8f9ee2b74f4022f0ce72a8ca23f6b,tensorflow/tensorflow,"[Trackable Method Migration] Change `checkpoint_dependencies` property to `_trackable_children` method. PiperOrigin-RevId: 419693734 Change-Id: Iba7549714c3fcd984fb5e5bfe4517137678c8e4f",rnn_cell_test.py,"@@ -237,9 +237,8 @@ class RNNTest(test.TestCase): cell = Plus1RNNCell() full_dropout_cell = rnn_cell.DropoutWrapper( cell, input_keep_prob=1e-6, seed=0) - (name, dep), = full_dropout_cell._checkpoint_dependencies - self.assertIs(dep, cell) - self.assertEqual(""cell"", name) + self.assertIn(""cell"", full_dropout_cell._trackable_children()) + self.assertIs(full_dropout_cell._trackable_children()[""cell""], cell) batch_size = 2 input_size = 5 max_length = 8 @@ -2584,8 +2583,8 @@ class RNNCellTest(test.TestCase, parameterized.TestCase): ], state_is_tuple=False) self.assertEqual(cell.dtype, None) - self.assertEqual(""cell-0"", cell._checkpoint_dependencies[0].name) - self.assertEqual(""cell-1"", cell._checkpoint_dependencies[1].name) + self.assertIn(""cell-0"", cell._trackable_children()) + self.assertIn(""cell-1"", cell._trackable_children()) cell.get_config() # Should not throw an error g, out_m = cell(x, m) # Layer infers the input type. @@ -2830,10 +2829,10 @@ class RNNCellTest(test.TestCase, parameterized.TestCase): bias_initializer=init_ops.constant_initializer(0.5)) g, m_new = base_cell(x, m) wrapper_object = wrapper_type(base_cell) - (name, dep), = wrapper_object._checkpoint_dependencies wrapper_object.get_config() # Should not throw an error - self.assertIs(dep, base_cell) - self.assertEqual(""cell"", name) + + self.assertIn(""cell"", wrapper_object._trackable_children()) + self.assertIs(wrapper_object._trackable_children()[""cell""], base_cell) g_res, m_new_res = wrapper_object(x, m) self.evaluate([variables_lib.global_variables_initializer()]) @@ -2873,10 +2872,8 @@ class RNNCellTest(test.TestCase, parameterized.TestCase): m = array_ops.zeros([1, 3]) cell = rnn_cell_impl.GRUCell(3) wrapped_cell = wrapper_type(cell, ""/cpu:0"") - (name, dep), = wrapped_cell._checkpoint_dependencies wrapped_cell.get_config() # Should not throw an error - self.assertIs(dep, cell) - self.assertEqual(""cell"", name) + self.assertEqual(wrapped_cell._trackable_children()[""cell""], cell) outputs, _ = wrapped_cell(x, m) self.assertIn(""cpu:0"", outputs.device.lower()) ",0,train e9a33ba0e504838ce9781b7cfa4401d67cecff80,tensorflow/tensorflow,"Fix kernel logging macros so that they work when TF_LITE_STRIP_ERROR_STRINGS is defined PiperOrigin-RevId: 439244018",common.h,"@@ -173,8 +173,9 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a); } \ } while (false) #else // TF_LITE_STRIP_ERROR_STRINGS -#define TF_LITE_KERNEL_LOG(context, ...) -#define TF_LITE_MAYBE_KERNEL_LOG(context, ...) +#define UNUSED(...) (void)sizeof(__VA_ARGS__) +#define TF_LITE_KERNEL_LOG(context, ...) UNUSED(__VA_ARGS__) +#define TF_LITE_MAYBE_KERNEL_LOG(context, ...) UNUSED(__VA_ARGS__) #endif // TF_LITE_STRIP_ERROR_STRINGS // Check whether value is true, and if not return kTfLiteError from ",0,test 28c9bc3ab1bbcba5056470519bd1118f2722e05b,tensorflow/tensorflow,"Slightly simplify base64 decoding function. Change: 131142953",base64.cc,"@@ -15,6 +15,7 @@ limitations under the License. #include ""tensorflow/core/lib/strings/base64.h"" +#include #include #include ""tensorflow/core/lib/core/errors.h"" @@ -57,38 +58,14 @@ inline uint32 Convert(char x) { return static_cast(z); } -Status DecodeOneChar(const char* codes, char* result) { - const uint32 packed = (Convert(codes[0]) << 2) | - (Convert(codes[1]) >> 4); +Status DecodeThreeChars(const char* codes, char* result) { + const uint32 packed = (Convert(codes[0]) << 18) | (Convert(codes[1]) << 12) | + (Convert(codes[2]) << 6) | (Convert(codes[3])); // Convert() return value has upper 25 bits set if input is invalid. // Therefore `packed` has high bits set iff at least one of code is invalid. if (TF_PREDICT_FALSE((packed & 0xFF000000) != 0)) { return errors::InvalidArgument(""Invalid character found in base64.""); } - *result = static_cast(packed); - return Status::OK(); -} - -Status DecodeTwoChars(const char* codes, char* result) { - const uint32 packed = (Convert(codes[0]) << 10) | - (Convert(codes[1]) << 4) | - (Convert(codes[2]) >> 2); - if (TF_PREDICT_FALSE((packed & 0xFF000000) != 0)) { - return errors::InvalidArgument(""Invalid character found in base64.""); - } - result[0] = static_cast(packed >> 8); - result[1] = static_cast(packed); - return Status::OK(); -} - -Status DecodeThreeChars(const char* codes, char* result) { - const uint32 packed = (Convert(codes[0]) << 18) | - (Convert(codes[1]) << 12) | - (Convert(codes[2]) << 6) | - (Convert(codes[3])); - if (TF_PREDICT_FALSE((packed & 0xFF000000) != 0)) { - return errors::InvalidArgument(""Invalid character found in base64.""); - } result[0] = static_cast(packed >> 16); result[1] = static_cast(packed >> 8); result[2] = static_cast(packed); @@ -106,7 +83,10 @@ Status Base64Decode(StringPiece data, string* decoded) { return Status::OK(); } - // max_decoded_size may overestimate by up to 3 bytes. + // This decoding procedure will write 3 * ceil(data.size() / 4) bytes to be + // output buffer, then truncate if necessary. Therefore we must overestimate + // and allocate sufficient amount. Currently max_decoded_size may overestimate + // by up to 3 bytes. const size_t max_decoded_size = 3 * (data.size() / 4) + 3; std::unique_ptr buffer(new char[max_decoded_size]); char* current = buffer.get(); @@ -135,25 +115,22 @@ Status Base64Decode(StringPiece data, string* decoded) { } } - switch (end - b64) { - case 4: - TF_RETURN_IF_ERROR(DecodeThreeChars(b64, current)); - current += 3; - break; - case 3: - TF_RETURN_IF_ERROR(DecodeTwoChars(b64, current)); - current += 2; - break; - case 2: - TF_RETURN_IF_ERROR(DecodeOneChar(b64, current)); - current += 1; - break; - default: // case 1 - // We may check this condition early by checking data.size() % 4 == 1. - return errors::InvalidArgument( - ""Base64 string length cannot be 1 modulo 4.""); + const int remain = end - b64; + if (TF_PREDICT_FALSE(remain == 1)) { + // We may check this condition early by checking data.size() % 4 == 1. + return errors::InvalidArgument( + ""Base64 string length cannot be 1 modulo 4.""); } + // A valid base64 character will replace paddings, if any. + char tail[4] = {kBase64UrlSafeChars[0], kBase64UrlSafeChars[0], + kBase64UrlSafeChars[0], kBase64UrlSafeChars[0]}; + // Copy tail of the input into the array, then decode. + std::memcpy(tail, b64, remain * sizeof(*b64)); + TF_RETURN_IF_ERROR(DecodeThreeChars(tail, current)); + // We know how many parsed characters are valid. + current += remain - 1; + decoded->assign(buffer.get(), current - buffer.get()); return Status::OK(); } ",0,train d8ee3cbf769323a185614d9c427f809d135c9830,tensorflow/tensorflow,"Make InTopK return False if any of the predictions are NaN or non-finite. Change: 114218051",in_topk_op.cc,"@@ -56,11 +56,20 @@ class InTopK : public OpKernel { const auto num_classes = predictions.dimension(1); for (int b = 0; b < size; b++) { T target_prediction = predictions(b, targets(b)); + bool cannot_say = !std::isfinite(target_prediction); int more_probable_classes = 0; - for (int i = 0; i < num_classes; ++i) { - if (predictions(b, i) > target_prediction) ++more_probable_classes; + if (!cannot_say) { + for (int i = 0; i < num_classes; ++i) { + T pred = predictions(b, i); + if (!std::isfinite(pred)) { + cannot_say = true; + break; + } else if (pred > target_prediction) { + ++more_probable_classes; + } + } } - out(b) = more_probable_classes < k_; + out(b) = cannot_say ? false : (more_probable_classes < k_); } } @@ -68,13 +77,11 @@ class InTopK : public OpKernel { int k_; }; -REGISTER_KERNEL_BUILDER(Name(""InTopK"") - .Device(DEVICE_CPU) - .TypeConstraint(""T""), - InTopK); -REGISTER_KERNEL_BUILDER(Name(""InTopK"") - .Device(DEVICE_CPU) - .TypeConstraint(""T""), - InTopK); +REGISTER_KERNEL_BUILDER( + Name(""InTopK"").Device(DEVICE_CPU).TypeConstraint(""T""), + InTopK); +REGISTER_KERNEL_BUILDER( + Name(""InTopK"").Device(DEVICE_CPU).TypeConstraint(""T""), + InTopK); } // namespace tensorflow ",0,train d8ee3cbf769323a185614d9c427f809d135c9830,tensorflow/tensorflow,"Make InTopK return False if any of the predictions are NaN or non-finite. Change: 114218051",in_topk_op_test.py,"@@ -55,6 +55,11 @@ class InTopKTest(tf.test.TestCase): target = np.asarray([0, 2]).astype(np.int64) self._validateInTopK(predictions, target, 2, [False, True]) + def testInTopNan(self): + predictions = [[0.1, float(""nan""), 0.2, 0.4], [0.1, 0.2, 0.3, float(""inf"")]] + target = [0, 2] + self._validateInTopK(predictions, target, 2, [False, False]) + if __name__ == ""__main__"": tf.test.main() ",0,train 786ec0a88129c4dc729994ac1e6956bdb8ac5da1,tensorflow/tensorflow,"Update GraphDef version to 873. PiperOrigin-RevId: 393607022 Change-Id: I01c66290c41847f9ee219dea196959841ed72cba",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 872 // Updated: 2021/8/28 +#define TF_GRAPH_DEF_VERSION 873 // Updated: 2021/8/29 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,train e48dc7f745a932a6ae6c21bb7b24bee1e76f1e5f,tensorflow/tensorflow,Adding a trivial helper function.,kernels.cc,"@@ -439,6 +439,23 @@ void TF_OpKernelConstruction_GetAttrStringList(TF_OpKernelConstruction* ctx, } } +void TF_OpKernelConstruction_GetAttrTensorShape(TF_OpKernelConstruction* ctx, + const char* attr_name, int64_t* values, + size_t max_vals, + TF_Status* status) { + ::tensorflow::TensorShape shape; + auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelConstruction*>(ctx); + ::tensorflow::Status s = cc_ctx->GetAttr(attr_name, &shape); + ::tensorflow::Set_TF_Status_from_Status(status, s); + + if (!status->status.ok()) return; + + const auto len = std::min(max_vals, (size_t)shape.dims()); + for (int i = 0; i < len; ++i) { + values[i] = static_cast(shape.dim_size(i)); + } +} + bool TF_OpKernelConstruction_HasAttr(TF_OpKernelConstruction* ctx, const char* attr_name, TF_Status* status) { auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelConstruction*>(ctx); ",0,train e48dc7f745a932a6ae6c21bb7b24bee1e76f1e5f,tensorflow/tensorflow,Adding a trivial helper function.,kernels.h,"@@ -322,6 +322,18 @@ TF_CAPI_EXPORT extern void TF_OpKernelConstruction_GetAttrStringList( size_t* lengths, int max_values, void* storage, size_t storage_size, TF_Status* status); +// Interprets the named kernel construction attribute as a shape attribute and fills +// in `vals` with the size of each dimension. +// `vals` must point to an array of length at least `max_values` (ideally set +// to total_size from +// TF_OpKernelConstruction_GetAttrSize(ctx, attr_name, &list_size, +// &total_size)). +TF_CAPI_EXPORT extern void TF_OpKernelConstruction_GetAttrTensorShape( + TF_OpKernelConstruction* ctx, + const char* attr_name, int64_t* values, + size_t max_vals, + TF_Status* status); + // Return true if the kernel construction has the attr_name TF_CAPI_EXPORT extern bool TF_OpKernelConstruction_HasAttr( TF_OpKernelConstruction* ctx, const char* attr_name, TF_Status* status); ",0,train b7bc8650cd673e2b12ed2a9b5a81d8074cee1e2a,tensorflow/tensorflow,fixed grammar in dataset_ops and readers,dataset_ops.py,"@@ -278,9 +278,9 @@ class DatasetV2(object): Note that if `tensors` contains a NumPy array, and eager execution is not enabled, the values will be embedded in the graph as one or more `tf.constant` operations. For large datasets (> 1 GB), this can waste - memory and run into byte limits of graph serialization. If tensors contains - one or more large NumPy arrays, consider the alternative described in - [this guide](https://tensorflow.org/guide/datasets#consuming_numpy_arrays). + memory and run into byte limits of graph serialization. If `tensors` + contains one or more large NumPy arrays, consider the alternative described + in [this guide](https://tensorflow.org/guide/datasets#consuming_numpy_arrays). Args: tensors: A nested structure of tensors. @@ -297,9 +297,9 @@ class DatasetV2(object): Note that if `tensors` contains a NumPy array, and eager execution is not enabled, the values will be embedded in the graph as one or more `tf.constant` operations. For large datasets (> 1 GB), this can waste - memory and run into byte limits of graph serialization. If tensors contains - one or more large NumPy arrays, consider the alternative described in - [this guide](https://tensorflow.org/guide/datasets#consuming_numpy_arrays). + memory and run into byte limits of graph serialization. If `tensors` + contains one or more large NumPy arrays, consider the alternative described + in [this guide](https://tensorflow.org/guide/datasets#consuming_numpy_arrays). Args: tensors: A nested structure of tensors, each having the same size in the @@ -566,7 +566,7 @@ class DatasetV2(object): ``` Args: - *args: follow same semantics as python's xrange. + *args: follows the same semantics as python's xrange. len(args) == 1 -> start = 0, stop = args[0], step = 1 len(args) == 2 -> start = args[0], stop = args[1], step = 1 len(args) == 3 -> start = args[0], stop = args[1, stop = args[2] @@ -852,10 +852,10 @@ class DatasetV2(object): Raises: ValueError: if `num_shards` or `index` are illegal values. Note: error - checking is done on a best-effort basis, and aren't guaranteed to be - caught upon dataset creation. (e.g. providing in a placeholder tensor - bypasses the early checking, and will instead result in an error during - a session.run call.) + checking is done on a best-effort basis, and errors aren't guaranteed + to be caught upon dataset creation. (e.g. providing in a placeholder + tensor bypasses the early checking, and will instead result in an error + during a session.run call.) """""" num_shards = ops.convert_to_tensor( num_shards, name=""num_shards"", dtype=dtypes.int64) @@ -892,7 +892,7 @@ class DatasetV2(object): batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of consecutive elements of this dataset to combine in a single batch. drop_remainder: (Optional.) A `tf.bool` scalar `tf.Tensor`, representing - whether the last batch should be dropped in the case its has fewer than + whether the last batch should be dropped in the case it has fewer than `batch_size` elements; the default behavior is not to drop the smaller batch. @@ -949,7 +949,7 @@ class DatasetV2(object): respective components. Defaults are `0` for numeric types and the empty string for string types. drop_remainder: (Optional.) A `tf.bool` scalar `tf.Tensor`, representing - whether the last batch should be dropped in the case its has fewer than + whether the last batch should be dropped in the case it has fewer than `batch_size` elements; the default behavior is not to drop the smaller batch. @@ -1573,7 +1573,7 @@ class DatasetV1Adapter(DatasetV1): class Options(object): """"""Represents options for tf.data.Dataset. - An `Options` object can be for instance used to control which static + An `Options` object can be, for instance, used to control which static optimizations to apply or whether to use performance modeling to dynamically tune the parallelism of operations such as `tf.data.Dataset.map` or `tf.data.Dataset.interleave`. ",0,train b7bc8650cd673e2b12ed2a9b5a81d8074cee1e2a,tensorflow/tensorflow,fixed grammar in dataset_ops and readers,readers.py,"@@ -180,7 +180,7 @@ class TFRecordDatasetV2(dataset_ops.DatasetV2): def __init__(self, filenames, compression_type=None, buffer_size=None, num_parallel_reads=None): - """"""Creates a `TFRecordDataset` to read for one or more TFRecord files. + """"""Creates a `TFRecordDataset` to read one or more TFRecord files. NOTE: The `num_parallel_reads` argument can be used to improve performance when reading from a remote filesystem. ",0,train 2473778dbe885da80e49a78022cc7efb60c6789d,tensorflow/tensorflow,"Revert ""Internal change"" This reverts commit 44ee91f82d40c79c248defea21010e63f58e0857.",legalize_tf.cc,"@@ -50,6 +50,7 @@ limitations under the License. #include ""tensorflow/compiler/mlir/lite/utils/validators.h"" #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"" #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h"" +#include ""tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.h"" #include ""tensorflow/compiler/mlir/tensorflow/utils/mangling_util.h"" #include ""tensorflow/compiler/xla/status.h"" #include ""tensorflow/compiler/xla/statusor.h"" @@ -661,6 +662,9 @@ void LegalizeTF::runOnFunction() { auto* context = &getContext(); auto func = getFunction(); + // Add TF->TF lowering patterns. + TF::PopulateLoweringTFPatterns(context, &patterns); + // Add the generated patterns to the list. populateWithGenerated(context, patterns); patterns ",0,train 6d2df88d657c87fdce6365bce9b19f1c39b0b0b2,tensorflow/tensorflow,"Several Estimator changes: - support configurable input_fn calling in Estimator subclasses. - pass params and config to the input_fn. - allow callables for model_fn and input_fn. PiperOrigin-RevId: 159725554",tpu_estimator.py,"@@ -307,7 +307,7 @@ def _convert_model_fn_to_train_step(model_fn, dequeue_fn, mode, run_config): def _call_model_fn(features, labels): """"""Calls the model_fn with required parameters."""""" - model_fn_args = estimator_lib._model_fn_args(model_fn) # pylint: disable=protected-access + model_fn_args = estimator_lib._fn_args(model_fn) # pylint: disable=protected-access kwargs = {} if 'mode' in model_fn_args: kwargs['mode'] = mode ",0,test 6d2df88d657c87fdce6365bce9b19f1c39b0b0b2,tensorflow/tensorflow,"Several Estimator changes: - support configurable input_fn calling in Estimator subclasses. - pass params and config to the input_fn. - allow callables for model_fn and input_fn. PiperOrigin-RevId: 159725554",estimator.py,"@@ -52,7 +52,7 @@ from tensorflow.python.util import tf_inspect _VALID_MODEL_FN_ARGS = set( - ['features', 'labels', 'mode', 'params', 'config']) + ['features', 'labels', 'mode', 'params', 'self', 'config']) class Estimator(object): @@ -357,7 +357,7 @@ class Estimator(object): } def _assert_members_are_not_overridden(self): - allowed_overrides = set(['_create_global_step']) + allowed_overrides = set(['_call_input_fn', '_create_global_step']) estimator_members = set([m for m in Estimator.__dict__.keys() if not m.startswith('__')]) subclass_members = set(self.__class__.__dict__.keys()) @@ -485,7 +485,7 @@ class Estimator(object): return export_dir def _get_features_from_input_fn(self, input_fn): - result = input_fn() + result = self._call_input_fn(input_fn) if not ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS): logging.warning('Input graph does not contain a QueueRunner. ' 'That means predict yields forever. ' @@ -549,6 +549,29 @@ class Estimator(object): assert step.dtype.is_integer return step + def _call_input_fn(self, input_fn): + """"""Calls the input function. + + Args: + input_fn: The input function. + + Returns: + Either features or (features, labels) where features and labels are: + features - `Tensor` or dictionary of string feature name to `Tensor`. + labels - `Tensor` or dictionary of `Tensor` with labels. + + Raises: + ValueError: if input_fn takes invalid arguments. + """""" + input_fn_args = _fn_args(input_fn) + kwargs = {} + if 'params' in input_fn_args: + kwargs['params'] = self.params + if 'config' in input_fn_args: + kwargs['config'] = self.config + with ops.device('/cpu:0'): + return input_fn(**kwargs) + def _call_model_fn(self, features, labels, mode): """"""Calls model function. @@ -563,7 +586,7 @@ class Estimator(object): Raises: ValueError: if model_fn returns invalid objects. """""" - model_fn_args = _model_fn_args(self._model_fn) + model_fn_args = _fn_args(self._model_fn) kwargs = {} if 'mode' in model_fn_args: kwargs['mode'] = mode @@ -584,8 +607,7 @@ class Estimator(object): with ops.Graph().as_default() as g, g.device(self._device_fn): random_seed.set_random_seed(self._config.tf_random_seed) global_step_tensor = self._create_and_assert_global_step(g) - with ops.device('/cpu:0'): - features, labels = input_fn() + features, labels = self._call_input_fn(input_fn) estimator_spec = self._call_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN) ops.add_to_collection(ops.GraphKeys.LOSSES, estimator_spec.loss) @@ -666,7 +688,7 @@ class Estimator(object): with ops.Graph().as_default() as g: random_seed.set_random_seed(self._config.tf_random_seed) global_step_tensor = self._create_and_assert_global_step(g) - features, labels = input_fn() + features, labels = self._call_input_fn(input_fn) estimator_spec = self._call_model_fn( features, labels, model_fn_lib.ModeKeys.EVAL) @@ -749,7 +771,7 @@ def _get_replica_device_setter(config): return None -def _model_fn_args(fn): +def _fn_args(fn): """"""Get argument names for function-like object. Args: @@ -762,6 +784,9 @@ def _model_fn_args(fn): ValueError: if partial function has positionally bound arguments """""" _, fn = tf_decorator.unwrap(fn) + if hasattr(fn, '__call__') and tf_inspect.ismethod(fn.__call__): + # Handle callables. + return tuple(tf_inspect.getargspec(fn.__call__).args) if hasattr(fn, 'func') and hasattr(fn, 'keywords') and hasattr(fn, 'args'): # Handle functools.partial and similar objects. return tuple([ @@ -774,7 +799,7 @@ def _model_fn_args(fn): def _verify_model_fn_args(model_fn, params): """"""Verifies model fn arguments."""""" - args = set(_model_fn_args(model_fn)) + args = set(_fn_args(model_fn)) if 'features' not in args: raise ValueError('model_fn (%s) must include features argument.' % model_fn) if 'labels' not in args: ",0,test 6d2df88d657c87fdce6365bce9b19f1c39b0b0b2,tensorflow/tensorflow,"Several Estimator changes: - support configurable input_fn calling in Estimator subclasses. - pass params and config to the input_fn. - allow callables for model_fn and input_fn. PiperOrigin-RevId: 159725554",estimator_test.py,"@@ -120,6 +120,9 @@ class EstimatorInheritanceConstraintTest(test.TestCase): def __init__(self): super(_Estimator, self).__init__(model_fn=dummy_model_fn) + def _call_input_fn(self, input_fn): + return input_fn() + def _create_global_step(self, graph): pass @@ -325,6 +328,79 @@ def _make_input_fn(features, labels): class EstimatorTrainTest(test.TestCase): + def test_callable_model_fn(self): + expected_features = {'x': 42., 'y': 43.} + expected_labels = 44. + + model_fn_call_count = [0] + + test_self = self + + class ModelFn(object): + + def __call__(self, features, labels): + model_fn_call_count[0] += 1 + test_self.assertItemsEqual(expected_features.keys(), features.keys()) + return _estimator_spec( + expected_features, expected_labels, features, labels, + model_fn_lib.ModeKeys.TRAIN) + + with self.assertRaisesRegexp(ValueError, 'does not include params'): + estimator.Estimator(model_fn=ModelFn(), params={'a': 'b'}) + est = estimator.Estimator(model_fn=ModelFn(), config=run_config.RunConfig()) + self.assertEqual(0, model_fn_call_count[0]) + est.train( + input_fn=_make_input_fn(expected_features, expected_labels), steps=1) + self.assertEqual(1, model_fn_call_count[0]) + + def test_callable_input_fn(self): + expected_params = {'batch_size': 10} + expected_config = run_config.RunConfig().replace(tf_random_seed=4321) + input_fn_call_count = [0] + + def _model_fn(features, labels, mode, params, config): + del params, config + return model_fn_global_step_incrementer(features, labels, mode) + + test_self = self + + class InputFn(object): + + def __call__(self, params, config): + input_fn_call_count[0] += 1 + test_self.assertEqual(expected_params, params) + test_self.assertEqual(4321, config.tf_random_seed) + return dummy_input_fn() + + est = estimator.Estimator(model_fn=_model_fn, + params=expected_params, + config=expected_config) + self.assertEqual(0, input_fn_call_count[0]) + est.train(InputFn(), steps=1) + self.assertEqual(1, input_fn_call_count[0]) + + def test_input_fn_args(self): + expected_params = {'batch_size': 10} + expected_config = run_config.RunConfig().replace(tf_random_seed=4321) + input_fn_call_count = [0] + + def _model_fn(features, labels, mode, params, config): + del params, config + return model_fn_global_step_incrementer(features, labels, mode) + + def _input_fn(params, config): + input_fn_call_count[0] += 1 + self.assertEqual(expected_params, params) + self.assertEqual(4321, config.tf_random_seed) + return dummy_input_fn() + + est = estimator.Estimator(model_fn=_model_fn, + params=expected_params, + config=expected_config) + self.assertEqual(0, input_fn_call_count[0]) + est.train(_input_fn, steps=1) + self.assertEqual(1, input_fn_call_count[0]) + def test_minimal_model_fn_args(self): expected_features = {'x': 42., 'y': 43.} expected_labels = 44. @@ -665,6 +741,29 @@ class _StepCounterHook(session_run_hook.SessionRunHook): class EstimatorEvaluateTest(test.TestCase): + def test_input_fn_args(self): + expected_params = {'batch_size': 10} + expected_config = run_config.RunConfig().replace(tf_random_seed=4321) + input_fn_call_count = [0] + + def _model_fn(features, labels, mode, params, config): + del params, config + return model_fn_global_step_incrementer(features, labels, mode) + + def _input_fn(params, config): + input_fn_call_count[0] += 1 + self.assertEqual(expected_params, params) + self.assertEqual(4321, config.tf_random_seed) + return dummy_input_fn() + + est = estimator.Estimator(model_fn=_model_fn, + params=expected_params, + config=expected_config) + est.train(dummy_input_fn, steps=1) + self.assertEqual(0, input_fn_call_count[0]) + est.evaluate(_input_fn, steps=1) + self.assertEqual(1, input_fn_call_count[0]) + def test_model_fn_must_return_estimator_spec(self): def _model_fn(features, labels, mode): _, _ = features, labels @@ -866,6 +965,33 @@ class EstimatorEvaluateTest(test.TestCase): class EstimatorPredictTest(test.TestCase): + def test_input_fn_args(self): + expected_params = {'batch_size': 10} + expected_config = run_config.RunConfig().replace(tf_random_seed=4321) + input_fn_call_count = [0] + + def _model_fn(features, labels, mode, params, config): + del features, labels, params, config + return model_fn_lib.EstimatorSpec( + mode, + loss=constant_op.constant(0.), + train_op=state_ops.assign_add(training.get_global_step(), 1), + predictions=constant_op.constant([[10.]])) + + def _input_fn(params, config): + input_fn_call_count[0] += 1 + self.assertEqual(expected_params, params) + self.assertEqual(4321, config.tf_random_seed) + return dummy_input_fn() + + est = estimator.Estimator(model_fn=_model_fn, + params=expected_params, + config=expected_config) + est.train(dummy_input_fn, steps=1) + self.assertEqual(0, input_fn_call_count[0]) + next(est.predict(_input_fn)) + self.assertEqual(1, input_fn_call_count[0]) + def test_no_trained_model_in_model_dir(self): est = estimator.Estimator(model_fn=model_fn_global_step_incrementer) with self.assertRaisesRegexp(ValueError, ",0,test 92e8484f0abbc0f18f0e7d2680085fa3c285bf31,tensorflow/tensorflow,Stubbed out memory allocation calls in audio library,fft_util.cc,"@@ -20,6 +20,8 @@ limitations under the License. #include ""kiss_fft.h"" #include ""tools/kiss_fftr.h"" +#include ""tensorflow/lite/experimental/microfrontend/lib/memory_util.h"" + int FftPopulateState(struct FftState* state, size_t input_size) { state->input_size = input_size; state->fft_size = 1; @@ -28,14 +30,14 @@ int FftPopulateState(struct FftState* state, size_t input_size) { } state->input = reinterpret_cast( - malloc(state->fft_size * sizeof(*state->input))); + microfrontend_alloc(state->fft_size * sizeof(*state->input))); if (state->input == nullptr) { fprintf(stderr, ""Failed to alloc fft input buffer\n""); return 0; } - state->output = reinterpret_cast( - malloc((state->fft_size / 2 + 1) * sizeof(*state->output) * 2)); + state->output = reinterpret_cast(microfrontend_alloc( + (state->fft_size / 2 + 1) * sizeof(*state->output) * 2)); if (state->output == nullptr) { fprintf(stderr, ""Failed to alloc fft output buffer\n""); return 0; @@ -49,7 +51,7 @@ int FftPopulateState(struct FftState* state, size_t input_size) { fprintf(stderr, ""Kiss memory sizing failed.\n""); return 0; } - state->scratch = malloc(scratch_size); + state->scratch = microfrontend_alloc(scratch_size); if (state->scratch == nullptr) { fprintf(stderr, ""Failed to alloc fft scratch buffer\n""); return 0; @@ -66,7 +68,7 @@ int FftPopulateState(struct FftState* state, size_t input_size) { } void FftFreeStateContents(struct FftState* state) { - free(state->input); - free(state->output); - free(state->scratch); + microfrontend_free(state->input); + microfrontend_free(state->output); + microfrontend_free(state->scratch); } ",0,train 92e8484f0abbc0f18f0e7d2680085fa3c285bf31,tensorflow/tensorflow,Stubbed out memory allocation calls in audio library,filterbank_util.c,"@@ -18,6 +18,8 @@ limitations under the License. #include #include +#include ""tensorflow/lite/experimental/microfrontend/lib/memory_util.h"" + #define kFilterbankIndexAlignment 4 #define kFilterbankChannelBlockSize 4 @@ -65,32 +67,32 @@ int FilterbankPopulateState(const struct FilterbankConfig* config, ? 1 : kFilterbankIndexAlignment / sizeof(int16_t)); - state->channel_frequency_starts = - malloc(num_channels_plus_1 * sizeof(*state->channel_frequency_starts)); +state->channel_frequency_starts = + microfrontend_alloc(num_channels_plus_1 * sizeof(*state->channel_frequency_starts)); state->channel_weight_starts = - malloc(num_channels_plus_1 * sizeof(*state->channel_weight_starts)); + microfrontend_alloc(num_channels_plus_1 * sizeof(*state->channel_weight_starts)); state->channel_widths = - malloc(num_channels_plus_1 * sizeof(*state->channel_widths)); - state->work = malloc(num_channels_plus_1 * sizeof(*state->work)); + microfrontend_alloc(num_channels_plus_1 * sizeof(*state->channel_widths)); + state->work = microfrontend_alloc(num_channels_plus_1 * sizeof(*state->work)); float* center_mel_freqs = - malloc(num_channels_plus_1 * sizeof(*center_mel_freqs)); + microfrontend_alloc(num_channels_plus_1 * sizeof(*center_mel_freqs)); int16_t* actual_channel_starts = - malloc(num_channels_plus_1 * sizeof(*actual_channel_starts)); + microfrontend_alloc(num_channels_plus_1 * sizeof(*actual_channel_starts)); int16_t* actual_channel_widths = - malloc(num_channels_plus_1 * sizeof(*actual_channel_widths)); + microfrontend_alloc(num_channels_plus_1 * sizeof(*actual_channel_widths)); if (state->channel_frequency_starts == NULL || state->channel_weight_starts == NULL || state->channel_widths == NULL || center_mel_freqs == NULL || actual_channel_starts == NULL || actual_channel_widths == NULL) { - free(center_mel_freqs); - free(actual_channel_starts); - free(actual_channel_widths); + microfrontend_free(center_mel_freqs); + microfrontend_free(actual_channel_starts); + microfrontend_free(actual_channel_widths); fprintf(stderr, ""Failed to allocate channel buffers\n""); return 0; } - + CalculateCenterFrequencies(num_channels_plus_1, config->lower_band_limit, config->upper_band_limit, center_mel_freqs); @@ -165,9 +167,9 @@ int FilterbankPopulateState(const struct FilterbankConfig* config, // If the alloc failed, we also need to nuke the arrays. if (state->weights == NULL || state->unweights == NULL) { - free(center_mel_freqs); - free(actual_channel_starts); - free(actual_channel_widths); + microfrontend_free(center_mel_freqs); + microfrontend_free(actual_channel_starts); + microfrontend_free(actual_channel_widths); fprintf(stderr, ""Failed to allocate weights or unweights\n""); return 0; } @@ -200,9 +202,9 @@ int FilterbankPopulateState(const struct FilterbankConfig* config, } } - free(center_mel_freqs); - free(actual_channel_starts); - free(actual_channel_widths); + microfrontend_free(center_mel_freqs); + microfrontend_free(actual_channel_starts); + microfrontend_free(actual_channel_widths); if (state->end_index >= spectrum_size) { fprintf(stderr, ""Filterbank end_index is above spectrum size.\n""); return 0; @@ -211,10 +213,10 @@ int FilterbankPopulateState(const struct FilterbankConfig* config, } void FilterbankFreeStateContents(struct FilterbankState* state) { - free(state->channel_frequency_starts); - free(state->channel_weight_starts); - free(state->channel_widths); - free(state->weights); - free(state->unweights); - free(state->work); + microfrontend_free(state->channel_frequency_starts); + microfrontend_free(state->channel_weight_starts); + microfrontend_free(state->channel_widths); + microfrontend_free(state->weights); + microfrontend_free(state->unweights); + microfrontend_free(state->work); } ",0,train 92e8484f0abbc0f18f0e7d2680085fa3c285bf31,tensorflow/tensorflow,Stubbed out memory allocation calls in audio library,pcan_gain_control_util.c,"@@ -17,6 +17,8 @@ limitations under the License. #include #include +#include ""tensorflow/lite/experimental/microfrontend/lib/memory_util.h"" + #define kint16max 0x00007FFF void PcanGainControlFillConfigWithDefaults( @@ -52,7 +54,7 @@ int PcanGainControlPopulateState(const struct PcanGainControlConfig* config, } state->noise_estimate = noise_estimate; state->num_channels = num_channels; - state->gain_lut = malloc(kWideDynamicFunctionLUTSize * sizeof(int16_t)); + state->gain_lut = microfrontend_alloc(kWideDynamicFunctionLUTSize * sizeof(int16_t)); if (state->gain_lut == NULL) { fprintf(stderr, ""Failed to allocate gain LUT\n""); return 0; @@ -88,5 +90,5 @@ int PcanGainControlPopulateState(const struct PcanGainControlConfig* config, } void PcanGainControlFreeStateContents(struct PcanGainControlState* state) { - free(state->gain_lut); + microfrontend_free(state->gain_lut); } ",0,train 92e8484f0abbc0f18f0e7d2680085fa3c285bf31,tensorflow/tensorflow,Stubbed out memory allocation calls in audio library,window_util.c,"@@ -19,6 +19,8 @@ limitations under the License. #include #include +#include ""tensorflow/lite/experimental/microfrontend/lib/memory_util.h"" + // Some platforms don't have M_PI #ifndef M_PI #define M_PI 3.14159265358979323846 @@ -34,7 +36,7 @@ int WindowPopulateState(const struct WindowConfig* config, state->size = config->size_ms * sample_rate / 1000; state->step = config->step_size_ms * sample_rate / 1000; - state->coefficients = malloc(state->size * sizeof(*state->coefficients)); + state->coefficients = microfrontend_alloc(state->size * sizeof(*state->coefficients)); if (state->coefficients == NULL) { fprintf(stderr, ""Failed to allocate window coefficients\n""); return 0; @@ -51,13 +53,13 @@ int WindowPopulateState(const struct WindowConfig* config, } state->input_used = 0; - state->input = malloc(state->size * sizeof(*state->input)); + state->input = microfrontend_alloc(state->size * sizeof(*state->input)); if (state->input == NULL) { fprintf(stderr, ""Failed to allocate window input\n""); return 0; } - state->output = malloc(state->size * sizeof(*state->output)); + state->output = microfrontend_alloc(state->size * sizeof(*state->output)); if (state->output == NULL) { fprintf(stderr, ""Failed to allocate window output\n""); return 0; @@ -67,7 +69,7 @@ int WindowPopulateState(const struct WindowConfig* config, } void WindowFreeStateContents(struct WindowState* state) { - free(state->coefficients); - free(state->input); - free(state->output); + microfrontend_free(state->coefficients); + microfrontend_free(state->input); + microfrontend_free(state->output); } ",0,train 87610fd68852983e97f5475a364b08272d080e48,tensorflow/tensorflow,"Add mhlo python binding generator target This just invokes the generator backend & creates a filegroup. PiperOrigin-RevId: 377318653 Change-Id: I0f750cb8890a5259f7e87805fde45bc4b8cb7d3b",mhlo.py,"@@ -0,0 +1,18 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# pylint: disable=wildcard-import,relative-beyond-top-level +from ._mhlo_ops_gen import * +# pylint: enable=wildcard-import,relative-beyond-top-level ",0,train 8a25f427db3d3dc5c9ddffc775b4c7dd4a96a6f9,tensorflow/tensorflow,"Enabe BF16 SoftmaxGrad(Sum), and fix accuracy by accum type.",reduction_ops.h,"@@ -19,9 +19,9 @@ limitations under the License. // Functor definitions for Reduction ops, must be compilable by nvcc. #include -#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/tensor_types.h"" +#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" namespace tensorflow { namespace functor { @@ -58,6 +58,29 @@ struct ReduceEigenImpl { } }; +// Specialization for BF16 Reducer to fix accuracy. +// TODO: all BF16 Reducer should have specialization to fix accuracy. +#define CASTING_SPECIALIZATION(Reducer, ScalarType, IntermediateType) \ + template \ + struct ReduceEigenImpl> { \ + void operator()(const Device& d, OUT_T out, IN_T in, \ + const ReductionAxes& reduction_axes, \ + const Reducer& reducer) { \ + static_assert(std::is_same::value, \ + """"); \ + Reducer intermediate_reducer; \ + auto in_as_intermediate = in.template cast(); \ + out.device(d) = \ + in_as_intermediate.reduce(reduction_axes, intermediate_reducer) \ + .template cast(); \ + } \ + }; + +CASTING_SPECIALIZATION(Eigen::internal::SumReducer, bfloat16, float); +#undef CASTING_SPECIALIZATION + template struct ReduceEigenImpl(); fusion.AddPass(/*is_layout_sensitive=*/true, /*only_fusion_computations=*/true); + fusion.AddPass(); TF_RETURN_IF_ERROR(fusion.Run(hlo_module).status()); HloPassPipeline reduce_pipeline(""reduce-precision""); ",0,test 2cd247d20422a41c33e0f4be265eba2df537ed3b,tensorflow/tensorflow,"Handle positive and negative infinity in TopKV2. TopKV2 hides iota in the low bits of the input after converting from bf16 to f32. This usually works, but for positive and negative infinity or'ing in iota produces NANs. To handle positive and negative infinity, treat bf16 as integers in sign-magnitude format. Convert to two's complement. Sort in two's complement and convert back. Add an exhaustive unit test for bfloat16 to float conversion. PiperOrigin-RevId: 201421784",sort_ops_test.py,"@@ -81,7 +81,7 @@ class XlaSortOpTest(xla_test.XLATestCase): def testTopKZeros(self): """"""Tests that positive and negative zeros sort correctly."""""" - # Requires Sort HLO, which is not implemented on CPU or GPU. + # TODO(b/26783907): The Sort HLO is not implemented on CPU or GPU. if self.device in [""XLA_CPU"", ""XLA_GPU""]: return @@ -99,7 +99,32 @@ class XlaSortOpTest(xla_test.XLATestCase): {p: np.array([0., -0., 0., 3., -0., -4., 0., -0.], dtype=bfloat16)}) self.assertAllEqual( np.array([3., 0., 0., 0.], dtype=bfloat16), results[0]) - self.assertEqual(set([0, 2, 3, 6]), set(results[1])) + self.assertEqual(list([3, 0, 1, 2]), list(results[1])) + + def testTopKInfinities(self): + """"""Tests that positive and negative infinity sort correctly."""""" + # TODO(b/26783907): The Sort HLO is not implemented on CPU or GPU. + if self.device in [""XLA_CPU"", ""XLA_GPU""]: + return + + # Only bfloat16 is implemented. + bfloat16 = dtypes.bfloat16.as_numpy_dtype + if bfloat16 not in self.numeric_types: + return + + with self.test_session() as sess: + p = array_ops.placeholder(dtypes.bfloat16) + with self.test_scope(): + topk = nn_ops.top_k(p, k=6) + results = sess.run(topk, { + p: np.array( + [1, 2, float(""inf""), -float(""inf""), -1, -2], dtype=bfloat16) + }) + self.assertAllEqual( + np.array( + [float(""inf""), 2.0, 1.0, -1.0, -2.0, -float(""inf"")], + dtype=bfloat16), results[0]) + self.assertEqual(list([2, 1, 0, 4, 5, 3]), list(results[1])) if __name__ == ""__main__"": ",0,train 2cd247d20422a41c33e0f4be265eba2df537ed3b,tensorflow/tensorflow,"Handle positive and negative infinity in TopKV2. TopKV2 hides iota in the low bits of the input after converting from bf16 to f32. This usually works, but for positive and negative infinity or'ing in iota produces NANs. To handle positive and negative infinity, treat bf16 as integers in sign-magnitude format. Convert to two's complement. Sort in two's complement and convert back. Add an exhaustive unit test for bfloat16 to float conversion. PiperOrigin-RevId: 201421784",topk_op.cc,"@@ -61,42 +61,89 @@ class TopKOp : public XlaOpKernel { if (input_shape.dim_size(0) < k) { k = input_shape.dim_size(0); } - const xla::XlaOp input = context->Input(0); - xla::XlaOp iota; - OP_REQUIRES_OK(context, XlaHelpers::Iota(b, DT_INT32, n, &iota)); + const xla::XlaOp input_bf16 = context->Input(0); + xla::XlaOp iota_s32; + OP_REQUIRES_OK(context, XlaHelpers::Iota(b, DT_INT32, n, &iota_s32)); // TODO(b/73891930): add a key-value sort to HLO, rather than using // bit-packing tricks here. - // TODO(b/73891930): this implementation will convert Infs to NaNs. A - // key-value sort would avoid this; for now, it is no worse than, say, the - // CPU backend in fast-math mode. + + xla::XlaOp zero = b->ConstantR0(0); + + // max can either be 0x7FFFFFFF or 0x8000000. Neither choice is totally + // ideal. The implications of the choice are: + // + // 0x7FFFFFFF + // 1. +0.0 > -0.0 + // 2. The elements of the inputs and outputs are bitwise identical. + // 3. The sort is unstable since a later +0.0 will appear before an earlier + // -0.0. + // + // 0x8000000 + // 1. +0.0 == -0.0 + // 2. All -0.0 in the input are replaced with +0.0 in the output. + // 3. The sort is stable. + xla::XlaOp max = b->ConstantR0(0x80000000); + xla::XlaOp index_mask = b->ConstantR0(0x0000FFFF); + xla::XlaOp value_mask = b->ConstantR0(0xFFFF0000); + + // Convert to from bf16 to f32. The lower 16-bits are zero due to the + // definition of bf16. + xla::XlaOp input_f32 = b->ConvertElementType(input_bf16, xla::F32); + + // Negate the input to reverse sort it. The lower 16-bits are zero, because + // negating a float is just inverting the high-bit. + xla::XlaOp negative_input_f32 = b->Neg(input_f32); + + // Convert to a sign magnitude integer. The lower 16-bits are zero, since + // bitcast convert doesn't change any bits. + xla::XlaOp negative_input_sm32 = + b->BitcastConvertType(negative_input_f32, xla::S32); + + // Convert from sign magnitude integer to two's complement integer. The + // lower 16-bits are zero on both sides of the select. On the false side, + // the value is unchanged, and on the true side, the lower 16-bits of max + // are all zero, so the lower 16-bits of the result of the subtraction will + // also be zero. + xla::XlaOp negative_input_s32 = + b->Select(b->Lt(negative_input_sm32, zero), + b->Sub(max, negative_input_sm32), negative_input_sm32); + + // In order for the Or with iota_s32 to to work properly, the lower 16-bits + // of negative_input_32 must be zero. // Pack elements as: // * upper 16 bits are the value // * lower 16 bits are the index. - xla::XlaOp packed = b->BitcastConvertType( - b->Or(b->BitcastConvertType(b->ConvertElementType(input, xla::F32), - xla::S32), - iota), - xla::F32); + xla::XlaOp packed_s32 = b->Or(negative_input_s32, iota_s32); // TODO(phawkins): use a more efficient algorithm that does not require a // full sort. - xla::XlaOp sorted = b->Slice(b->Rev(b->Sort(packed), {0}), - /*start_indices=*/{0}, - /*limit_indices=*/{k}, - /*strides=*/{1}); - - // Unpack the value/index - xla::XlaOp x = b->BitcastConvertType(sorted, xla::S32); - xla::XlaOp indices = b->And(x, b->ConstantR0(0x0000FFFF)); - xla::XlaOp values = b->ConvertElementType( - b->BitcastConvertType(b->And(x, b->ConstantR0(0xFFFF0000)), - xla::F32), - xla::BF16); - - context->SetOutput(0, values); - context->SetOutput(1, indices); + xla::XlaOp sorted_s32 = b->Slice(b->Sort(packed_s32), + /*start_indices=*/{0}, + /*limit_indices=*/{k}, + /*strides=*/{1}); + + // Unpack the value/index. + xla::XlaOp indices_s32 = b->And(sorted_s32, index_mask); + xla::XlaOp negative_values_s32 = b->And(sorted_s32, value_mask); + + // Convert from two's complement integer to sign magnitude integer. + xla::XlaOp negative_values_sm32 = + b->Select(b->Lt(negative_values_s32, zero), + b->Sub(max, negative_values_s32), negative_values_s32); + + xla::XlaOp negative_values_f32 = + b->BitcastConvertType(negative_values_sm32, xla::F32); + + // Negate the values to get back the original inputs. + xla::XlaOp values_f32 = b->Neg(negative_values_f32); + + // Convert from f32 to bf16. + xla::XlaOp values_bf16 = b->ConvertElementType(values_f32, xla::BF16); + + context->SetOutput(0, values_bf16); + context->SetOutput(1, indices_s32); } private: ",0,train 2cd247d20422a41c33e0f4be265eba2df537ed3b,tensorflow/tensorflow,"Handle positive and negative infinity in TopKV2. TopKV2 hides iota in the low bits of the input after converting from bf16 to f32. This usually works, but for positive and negative infinity or'ing in iota produces NANs. To handle positive and negative infinity, treat bf16 as integers in sign-magnitude format. Convert to two's complement. Sort in two's complement and convert back. Add an exhaustive unit test for bfloat16 to float conversion. PiperOrigin-RevId: 201421784",convert_test.cc,"@@ -461,5 +461,26 @@ XLA_TEST_F(ConvertTest, ConvertS64U64) { ComputeAndCompareR1(&builder, unsigned_x, {}); } +XLA_TEST_F(ConvertTest, ConvertBF16F32) { + XlaBuilder builder(TestName()); + + std::vector all_bfloats(1 << 16); + for (int i = 0; i < all_bfloats.size(); ++i) { + all_bfloats[i].value = i; + } + + std::vector expected(all_bfloats.size()); + for (int i = 0; i < expected.size(); ++i) { + expected[i] = (1U << 16) * i; + } + + // Exhaustively test all bf16 to f32 conversions. + xla::XlaOp all_bfloats_bf16 = builder.ConstantR1(all_bfloats); + xla::XlaOp all_bfloats_f32 = + builder.ConvertElementType(all_bfloats_bf16, F32); + xla::XlaOp all_bfloats_u32 = builder.BitcastConvertType(all_bfloats_f32, U32); + ComputeAndCompareR1(&builder, expected, {}); +} + } // namespace } // namespace xla ",0,train 7a08a4067cdbbcffbd158a4f018bb064b36fe909,tensorflow/tensorflow,"Add a number of missing headers being transitively pulled in. This enables a few headers to be removed from implementations and in turn simplify the build graph some. PiperOrigin-RevId: 291452567 Change-Id: Ia29c0095f6444043f0f6fb7d91b3abd93dd983af",op_compatibility_test.cc,"@@ -22,6 +22,7 @@ limitations under the License. #include ""tensorflow/core/framework/op.h"" #include ""tensorflow/core/framework/types.h"" #include ""tensorflow/core/kernels/ops_testutil.h"" +#include ""tensorflow/core/lib/core/status_test_util.h"" #include ""tensorflow/core/lib/strings/str_util.h"" #include ""tensorflow/core/platform/test.h"" ",0,train 5f9852c3ea1046513369ef9c0b2a2c6c103b147d,tensorflow/tensorflow,"Fix how keras's model_to_estimator function is exported. Fix issue if estimator is not present when API files are generated but installed later. PiperOrigin-RevId: 222107827",__init__.py,"@@ -24,23 +24,54 @@ from tensorflow.python.util.tf_export import tf_export # As long as you depend //third_party/py/tensorflow:tensorflow target # everything will work as normal. -try: - from tensorflow.python.estimator import keras as keras_lib # pylint: disable=g-import-not-at-top - model_to_estimator = tf_export('keras.estimator.model_to_estimator')( - keras_lib.model_to_estimator) -except Exception: # pylint: disable=broad-except - - # pylint: disable=unused-argument - def stub_model_to_estimator(keras_model=None, - keras_model_path=None, - custom_objects=None, - model_dir=None, - config=None): + +# LINT.IfChange +@tf_export('keras.estimator.model_to_estimator') +def model_to_estimator( + keras_model=None, + keras_model_path=None, + custom_objects=None, + model_dir=None, + config=None): + """"""Constructs an `Estimator` instance from given keras model. + + For usage example, please see: + [Creating estimators from Keras + Models](https://tensorflow.org/guide/estimators#model_to_estimator). + + Args: + keras_model: A compiled Keras model object. This argument is mutually + exclusive with `keras_model_path`. + keras_model_path: Path to a compiled Keras model saved on disk, in HDF5 + format, which can be generated with the `save()` method of a Keras model. + This argument is mutually exclusive with `keras_model`. + custom_objects: Dictionary for custom objects. + model_dir: Directory to save `Estimator` model parameters, graph, summary + files for TensorBoard, etc. + config: `RunConfig` to config `Estimator`. + + Returns: + An Estimator from given keras model. + + Raises: + ValueError: if neither keras_model nor keras_model_path was given. + ValueError: if both keras_model and keras_model_path was given. + ValueError: if the keras_model_path is a GCS URI. + ValueError: if keras_model has not been compiled. + """""" + try: + from tensorflow_estimator.python.estimator import keras as keras_lib # pylint: disable=g-import-not-at-top + except ImportError: raise NotImplementedError( 'tf.keras.estimator.model_to_estimator function not available in your ' 'installation.') - # pylint: enable=unused-argument + keras_lib.model_to_estimator( + keras_model=keras_model, + keras_model_path=keras_model_path, + custom_objects=custom_objects, + model_dir=model_dir, + config=config) + +# LINT.ThenChange(//third_party/tensorflow_estimator/python/estimator/keras.py) - model_to_estimator = tf_export('keras.estimator.model_to_estimator')( - stub_model_to_estimator) ",0,train 821d738255cc7baf9330bd7265e72dca972be465,tensorflow/tensorflow,"Fix Linalg lowering to loops This CL makes lowering to loops always be a: ``` %D = linalg.dim %view, constant : !linalg.view<...> affine.for %ix = %c0 to %D { ... } ``` This form composes correctly with tiling and is also the proper way to emit loops from views that across function boundaries. The previous version that would extract the range_min/max/step was composing incorrectly with tiling (i.e. would shift by range_min both in the loop bounds and in the slice) and would not work across function boundaries. The relevant tests are updated and a new test `dot_view`---which lowers to loops from views passed as function parameters---is added. When additional context is available, the linalg.dim operations should be folded away but this is left for a future CL. -- PiperOrigin-RevId: 249634712",LinalgOps.h,"@@ -336,6 +336,10 @@ public: ArrayRef operands) { return impl->create(builder, loc, operands); } + Operation::operand_range getInputsAndOutputs() { + auto range = this->getOperation()->getOperands(); + return {range.begin(), range.begin() + getNumInputsAndOutputs()}; + } private: struct Concept { ",0,train 821d738255cc7baf9330bd7265e72dca972be465,tensorflow/tensorflow,"Fix Linalg lowering to loops This CL makes lowering to loops always be a: ``` %D = linalg.dim %view, constant : !linalg.view<...> affine.for %ix = %c0 to %D { ... } ``` This form composes correctly with tiling and is also the proper way to emit loops from views that across function boundaries. The previous version that would extract the range_min/max/step was composing incorrectly with tiling (i.e. would shift by range_min both in the loop bounds and in the slice) and would not work across function boundaries. The relevant tests are updated and a new test `dot_view`---which lowers to loops from views passed as function parameters---is added. When additional context is available, the linalg.dim operations should be folded away but this is left for a future CL. -- PiperOrigin-RevId: 249634712",Utils.h,"@@ -89,8 +89,16 @@ Value *createOrReturnView(FuncBuilder *b, Location loc, enum class RangePart { Min = 0, Max, Step }; Value *extractRangePart(Value *range, RangePart part); +/// Returns the values obtained by applying `map` to the list of values. +/// Performs simplifications and foldings where possible. +SmallVector applyMapToValues(FuncBuilder *b, Location loc, + AffineMap map, + ArrayRef values, + FunctionConstants &state); + /// Returns the values obtained by applying `map` to the list of range parts -/// extracted from `ranges`. +/// extracted from `ranges`. Performs simplifications and foldings where +/// possible. SmallVector applyMapToRangePart(FuncBuilder *b, Location loc, AffineMap map, ArrayRef ranges, ",0,train bdc6a138403e8257841e8dff6d6b9322bb65053a,tensorflow/tensorflow,"Peel once on all tiled_loop dimensions This CL assumes that the best single rule for loop peeling is to ensure that there is a single loop without any padding needed. All dimensions are peeled once to ensure that this loop exists. Other peeled loops could be peeled again to remove more padding, but the assumption is that in the common case, this will not be worth the IR size/compile time increase. This is a temporary rule of thumb until more advanced heuristics can be created with multiple different code generation strategies depending on the input. PiperOrigin-RevId: 399143335 Change-Id: Ie20653fc4d900c43b03107211c9cac4a3a89781c",tf_cpurt_peel_tiled_loops.cc,"@@ -40,17 +40,17 @@ struct PeelTiledLoop mlir::linalg::TiledLoopOp loop, mlir::PatternRewriter &rewriter) const override { if (loop->hasAttr(kWasPeeledAttr)) return mlir::failure(); - auto peeled_idx = loop.getNumLoops() - 1; - mlir::linalg::TiledLoopOp peel; - if (mlir::linalg::peelAndCanonicalizeTiledLoop(rewriter, loop, peeled_idx, - peel) - .failed()) - return mlir::failure(); - - // Ensure that the peeling doesn't keep occurring forever. auto true_attr = mlir::BoolAttr::get(rewriter.getContext(), true); loop->setAttr(kWasPeeledAttr, true_attr); - peel->setAttr(kWasPeeledAttr, true_attr); + for (int peeled_idx = loop.getNumLoops() - 1; peeled_idx >= 0; + peeled_idx--) { + mlir::linalg::TiledLoopOp peel; + // Mark the new loop if one was created + if (mlir::linalg::peelAndCanonicalizeTiledLoop(rewriter, loop, peeled_idx, + peel) + .succeeded()) + peel->setAttr(kWasPeeledAttr, true_attr); + } return mlir::success(); } }; ",0,train e0266dbf39deac09315b764524835299b513926c,tensorflow/tensorflow,"Use `static_cast` instead of C-style casts. PiperOrigin-RevId: 316738458 Change-Id: I54f2f2f43d31606246475df0eae8d20e673aee6b",types.h,"@@ -37,18 +37,18 @@ namespace tensorflow { // Alias tensorflow::string to std::string. using std::string; -static const uint8 kuint8max = ((uint8)0xFF); -static const uint16 kuint16max = ((uint16)0xFFFF); -static const uint32 kuint32max = ((uint32)0xFFFFFFFF); -static const uint64 kuint64max = ((uint64)0xFFFFFFFFFFFFFFFFull); -static const int8 kint8min = ((int8)~0x7F); -static const int8 kint8max = ((int8)0x7F); -static const int16 kint16min = ((int16)~0x7FFF); -static const int16 kint16max = ((int16)0x7FFF); -static const int32 kint32min = ((int32)~0x7FFFFFFF); -static const int32 kint32max = ((int32)0x7FFFFFFF); -static const int64 kint64min = ((int64)~0x7FFFFFFFFFFFFFFFll); -static const int64 kint64max = ((int64)0x7FFFFFFFFFFFFFFFll); +static const uint8 kuint8max = static_cast(0xFF); +static const uint16 kuint16max = static_cast(0xFFFF); +static const uint32 kuint32max = static_cast(0xFFFFFFFF); +static const uint64 kuint64max = static_cast(0xFFFFFFFFFFFFFFFFull); +static const int8 kint8min = static_cast(~0x7F); +static const int8 kint8max = static_cast(0x7F); +static const int16 kint16min = static_cast(~0x7FFF); +static const int16 kint16max = static_cast(0x7FFF); +static const int32 kint32min = static_cast(~0x7FFFFFFF); +static const int32 kint32max = static_cast(0x7FFFFFFF); +static const int64 kint64min = static_cast(~0x7FFFFFFFFFFFFFFFll); +static const int64 kint64max = static_cast(0x7FFFFFFFFFFFFFFFll); // A typedef for a uint64 used as a short fingerprint. typedef uint64 Fprint; ",0,test debc40442f13d96047bb0f64e5f8f6921b0baf2b,tensorflow/tensorflow,"Remove explicit static linking from tests that load a shared library. This was causing the .so files to have undefined symbols from core/framework. Change: 145479847",gru_ops_test.cc,"@@ -27,8 +27,10 @@ class GruOpsTest : public ::testing::Test { TF_Status* status = TF_NewStatus(); auto* lib = TF_LoadLibrary( ""tensorflow/contrib/rnn/python/ops/_gru_ops.so"", status); - CHECK_EQ(TF_OK, TF_GetCode(status)); + TF_Code code = TF_GetCode(status); + string status_msg(TF_Message(status)); TF_DeleteStatus(status); + ASSERT_EQ(TF_OK, code) << status_msg; TF_DeleteLibraryHandle(lib); } }; ",0,train debc40442f13d96047bb0f64e5f8f6921b0baf2b,tensorflow/tensorflow,"Remove explicit static linking from tests that load a shared library. This was causing the .so files to have undefined symbols from core/framework. Change: 145479847",lstm_ops_test.cc,"@@ -29,9 +29,11 @@ class LSTMOpsTest : public ::testing::Test { TF_Status* status = TF_NewStatus(); auto* lib = TF_LoadLibrary( ""tensorflow/contrib/rnn/python/ops/_lstm_ops.so"", status); - CHECK_EQ(TF_OK, TF_GetCode(status)); - TF_DeleteLibraryHandle(lib); + TF_Code code = TF_GetCode(status); + string status_msg(TF_Message(status)); TF_DeleteStatus(status); + ASSERT_EQ(TF_OK, code) << status_msg; + TF_DeleteLibraryHandle(lib); } }; ",0,train daa75aff18fd42598d1fb68f13da13042e886c07,tensorflow/tensorflow,"Allows TensorListScatter to scatter at non-contiguous indices to make it consistent with TensorArray.scatter. PiperOrigin-RevId: 227874653",list_kernels.h,"@@ -521,14 +521,31 @@ class TensorListScatter : public OpKernel { ""Specified a list with shape "", element_shape.DebugString(), "" from a tensor with shape "", output_shape.DebugString())); output_list.element_shape = element_shape; - output_list.tensors.reserve(indices.NumElements()); + + OP_REQUIRES(c, indices.NumElements() == input_tensor.shape().dim_size(0), + errors::InvalidArgument( + ""Invalid number of rows in input tensor. Expected: "", + indices.NumElements(), + "" Actual: "", input_tensor.shape().dim_size(0))); + + // Validate indices and resize output_list.tensors to fit the highest index. + { + size_t list_size = 0; + for (int index = 0; index < indices.NumElements(); ++index) { + const int i = indices.flat()(index); + OP_REQUIRES(c, i >= 0, + errors::InvalidArgument( + ""Indices in TensorListScatter must all be positive."")); + if (i >= list_size) { + list_size = i + 1; + } + } + output_list.tensors.resize(list_size, Tensor(DT_INVALID)); + } + for (int index = 0; index < indices.NumElements(); ++index) { const int i = indices.flat()(index); - OP_REQUIRES(c, i < input_tensor.shape().dim_size(0), - errors::InvalidArgument( - ""Trying to scatter index "", i, "" from tensor with "", - input_tensor.shape().dim_size(0), "" rows."")); - Tensor tmp = input_tensor.Slice(i, i + 1); + Tensor tmp = input_tensor.Slice(index, index + 1); TensorShape tmp_shape = tmp.shape(); tmp_shape.RemoveDim(0); OP_REQUIRES(c, tmp.CopyFrom(tmp, tmp_shape), @@ -541,7 +558,7 @@ class TensorListScatter : public OpKernel { // many small ondes. aligned.flat().device(c->eigen_device()) = tmp.unaligned_flat(); - output_list.tensors.push_back(aligned); + std::swap(output_list.tensors[i], aligned); } output_tensor->scalar()() = std::move(output_list); } ",0,test daa75aff18fd42598d1fb68f13da13042e886c07,tensorflow/tensorflow,"Allows TensorListScatter to scatter at non-contiguous indices to make it consistent with TensorArray.scatter. PiperOrigin-RevId: 227874653",list_ops_test.py,"@@ -290,6 +290,47 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): t = list_ops.tensor_list_gather(l, [], element_dtype=dtypes.float32) self.evaluate(t) + def testGatherGradWithNonContiguousIndices(self): + with backprop.GradientTape(persistent=True) as tape: + t = constant_op.constant([1.0, 2.0, 3.0]) + l = list_ops.tensor_list_from_tensor(t, element_shape=[]) + c = constant_op.constant(5.0) + tape.watch(c) + l = list_ops.tensor_list_set_item(l, 1, c) + t = list_ops.tensor_list_gather(l, [1], element_dtype=dtypes.float32) + self.assertAllEqual(self.evaluate(t), [5.0]) + s = t[0] * t[0] + dt = tape.gradient(s, c) + self.assertAllEqual(self.evaluate(dt), 10.0) + dl = tape.gradient(t, l) + dl_length = list_ops.tensor_list_length(dl) + self.assertAllEqual(self.evaluate(dl_length), 3) + + def testScatterOutputListSize(self): + c0 = constant_op.constant([1.0, 2.0]) + l = list_ops.tensor_list_scatter( + c0, [1, 3], ops.convert_to_tensor([], dtype=dtypes.int32)) + # TensorListScatter should return a list with size largest index + 1. + self.assertEqual(self.evaluate(list_ops.tensor_list_length(l)), 4) + + def testScatterWithInvalidRowsInInputTensorFails(self): + c0 = constant_op.constant([1.0, 2.0]) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + ""Invalid number of rows in input tensor. Expected: 3 Actual: 2""): + l = list_ops.tensor_list_scatter( + c0, [1, 0, 2], ops.convert_to_tensor([], dtype=dtypes.int32)) + self.evaluate(l) + + def testScatterWithNegativeIndicesFails(self): + c0 = constant_op.constant([1.0, 2.0]) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + ""Indices in TensorListScatter must all be positive.""): + l = list_ops.tensor_list_scatter( + c0, [-1, -2], ops.convert_to_tensor([], dtype=dtypes.int32)) + self.evaluate(l) + def testScatterGrad(self): with backprop.GradientTape() as tape: c0 = constant_op.constant([1.0, 2.0]) ",0,test daa75aff18fd42598d1fb68f13da13042e886c07,tensorflow/tensorflow,"Allows TensorListScatter to scatter at non-contiguous indices to make it consistent with TensorArray.scatter. PiperOrigin-RevId: 227874653",tensor_array_ops_test.py,"@@ -1359,7 +1359,6 @@ class TensorArrayTest(test.TestCase): def testSkipEagerTensorArrayEvalEmptyWithDefault(self): self._testTensorArrayEvalEmptyWithDefault() - @test_util.disable_control_flow_v2(""b/117943286"") @test_util.run_v1_only(""b/117943489"") def testSkipEagerTensorArrayScatterReadAndGradients(self): with self.session(use_gpu=True) as session: @@ -1387,8 +1386,8 @@ class TensorArrayTest(test.TestCase): self.assertAllEqual([10.0, -10.0], read_vals[1]) self.assertAllEqual([[2.0, 3.0], [4.0, 5.0]], grad_vals[0]) - @test_util.disable_control_flow_v2(""b/117943286"") - @test_util.run_v1_only(""b/117943286"") + @test_util.disable_control_flow_v2(""b/118890905"") + @test_util.run_v1_only(""b/118890905"") def testTensorArrayWriteGatherAndGradients(self): with self.session(use_gpu=True) as session: ta = tensor_array_ops.TensorArray( ",0,test daa75aff18fd42598d1fb68f13da13042e886c07,tensorflow/tensorflow,"Allows TensorListScatter to scatter at non-contiguous indices to make it consistent with TensorArray.scatter. PiperOrigin-RevId: 227874653",list_ops.py,"@@ -200,10 +200,16 @@ def _TensorListResizeGrad(op, dlist): @ops.RegisterGradient(""TensorListGather"") def _TensorListGatherGrad(op, dtensor): - _, indices = op.inputs - return gen_list_ops.tensor_list_scatter( - tensor=dtensor, indices=indices, - element_shape=ops.convert_to_tensor(-1, dtype=dtypes.int32)), None + input_list, indices = op.inputs + dlist = gen_list_ops.tensor_list_scatter( + tensor=dtensor, + indices=indices, + element_shape=ops.convert_to_tensor(-1, dtype=dtypes.int32)) + # TensorListScatter returns a list with size `max(indices) + 1` + # so we manually resize it to match the size of the input list. + input_list_size = gen_list_ops.tensor_list_length(input_list) + dlist = gen_list_ops.tensor_list_resize(dlist, input_list_size) + return dlist, None @ops.RegisterGradient(""TensorListScatter"") ",0,test dbdaee1bf6f0840a86ac1002248a1600850ba549,tensorflow/tensorflow,"Replace `DCHECK_LE(f_dim, feature_dims)` with corresponding `OP_REQUIRES` PiperOrigin-RevId: 411069705 Change-Id: I4866a80873d0be4ce43157c713ed476cb1445741",stats_ops.cc,"@@ -1692,6 +1692,15 @@ class BoostedTreesSparseAggregateStatsOp : public OpKernel { const int64_t stats_dims = logits_dims + hessians_dims; const int64_t num_sparse_entries = feature_indices_t->dim_size(0); const int32_t feature_dims = feature_shape(1); + for (int i = 0; i < num_sparse_entries; ++i) { + const int32_t f_dim = feature_indices(i, 1); + OP_REQUIRES( + context, f_dim <= feature_dims, + errors::InvalidArgument( + ""Got invalid feature index feature_indices("", i, ""1) = "", f_dim, + "" which is above "", feature_dims, + "" (from feature_shape: "", feature_shape_t->DebugString(), "")"")); + } OP_REQUIRES(context, num_sparse_entries <= batch_size * feature_dims, errors::InvalidArgument( ""feature_indices dim0 should be <= gradients dim0 * "" @@ -1735,7 +1744,6 @@ class BoostedTreesSparseAggregateStatsOp : public OpKernel { num_nodes, "", got "", instance, "")"")); // the feature dimension. const int32_t f_dim = feature_indices(i, 1); - DCHECK_LE(f_dim, feature_dims); // the bucket id of the value. const int32_t bucket_id = feature_values(i); ",0,train 4fa4001d457b1b7e3a38533defbebbed143c7a33,tensorflow/tensorflow,"Expose _log_and_record method to allow easier subclassing of StepCounter PiperOrigin-RevId: 182112167",tpu_estimator.py,"@@ -36,6 +36,7 @@ from tensorflow.contrib.tpu.python.tpu import tpu_feed from tensorflow.contrib.tpu.python.tpu import training_loop from tensorflow.contrib.tpu.python.tpu import util as util_lib +from tensorflow.core.framework.summary_pb2 import Summary from tensorflow.core.protobuf import config_pb2 from tensorflow.python.estimator import estimator as estimator_lib @@ -53,6 +54,7 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.summary import summary +from tensorflow.python.training import basic_session_run_hooks from tensorflow.python.training import evaluation from tensorflow.python.training import session_run_hook from tensorflow.python.training import training @@ -216,6 +218,10 @@ class _TPUContext(object): (mode == model_fn_lib.ModeKeys.EVAL and self._eval_batch_size is None)) + @property + def global_batch_size(self): + return self._train_batch_size + @property def batch_size_for_input_fn(self): """"""Returns the shard batch size for `input_fn`."""""" @@ -1317,6 +1323,31 @@ class _EvalMetrics(object): return eval_metric_ops, eval_update_ops +class ExamplesPerSecondHook(basic_session_run_hooks.StepCounterHook): + """"""Count examples during runtime."""""" + + def __init__(self, + batch_size, + every_n_steps=100, + every_n_secs=None, + output_dir=None, + summary_writer=None): + self._batch_size = batch_size + super(ExamplesPerSecondHook, self).__init__( + every_n_steps=every_n_steps, + every_n_secs=every_n_secs, + output_dir=output_dir, + summary_writer=summary_writer) + + def _log_and_record(self, elapsed_steps, elapsed_time, global_step): + examples_per_sec = self._batch_size * elapsed_steps / elapsed_time + if self._summary_writer is not None: + example_summary = Summary(value=[Summary.Value( + tag='examples_sec', simple_value=examples_per_sec)]) + self._summary_writer.add_summary(example_summary, global_step) + logging.info('examples/sec: %g', examples_per_sec) + + class TPUEstimator(estimator_lib.Estimator): """"""Estimator with TPU support. @@ -1534,8 +1565,8 @@ class TPUEstimator(estimator_lib.Estimator): if max_steps is not None: util_lib.check_positive_integer(max_steps, 'Train max_steps') - return [_TPUStopAtStepHook(self._iterations_per_training_loop, - steps, max_steps)] + return [_TPUStopAtStepHook(self._iterations_per_training_loop, steps, + max_steps)] def _convert_eval_steps_to_hooks(self, steps): with self._ctx.with_mode(model_fn_lib.ModeKeys.EVAL) as ctx: @@ -1547,11 +1578,11 @@ class TPUEstimator(estimator_lib.Estimator): util_lib.check_positive_integer(steps, 'Eval steps') - hooks = [] - hooks.append(evaluation._StopAfterNEvalsHook( # pylint: disable=protected-access - num_evals=steps)) - hooks.append(_SetEvalIterationsHook(steps)) - return hooks + return [ + evaluation._StopAfterNEvalsHook( # pylint: disable=protected-access + num_evals=steps), + _SetEvalIterationsHook(steps) + ] def _call_input_fn(self, input_fn, mode): """"""Calls the input function. @@ -1632,6 +1663,7 @@ class TPUEstimator(estimator_lib.Estimator): _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn)) hooks = [ TPUInfeedOutfeedSessionHook(ctx, enqueue_ops), + ExamplesPerSecondHook(self._ctx.global_batch_size), training.LoggingTensorHook( {'loss': array_ops.identity(loss), 'step': training.get_global_step()}, ",0,train 4fa4001d457b1b7e3a38533defbebbed143c7a33,tensorflow/tensorflow,"Expose _log_and_record method to allow easier subclassing of StepCounter PiperOrigin-RevId: 182112167",basic_session_run_hooks.py,"@@ -529,6 +529,14 @@ class StepCounterHook(session_run_hook.SessionRunHook): def before_run(self, run_context): # pylint: disable=unused-argument return SessionRunArgs(self._global_step_tensor) + def _log_and_record(self, elapsed_steps, elapsed_time, global_step): + steps_per_sec = elapsed_steps / elapsed_time + if self._summary_writer is not None: + summary = Summary(value=[Summary.Value( + tag=self._summary_tag, simple_value=steps_per_sec)]) + self._summary_writer.add_summary(summary, global_step) + logging.info(""%s: %g"", self._summary_tag, steps_per_sec) + def after_run(self, run_context, run_values): _ = run_context @@ -540,12 +548,7 @@ class StepCounterHook(session_run_hook.SessionRunHook): elapsed_time, elapsed_steps = self._timer.update_last_triggered_step( global_step) if elapsed_time is not None: - steps_per_sec = elapsed_steps / elapsed_time - if self._summary_writer is not None: - summary = Summary(value=[Summary.Value( - tag=self._summary_tag, simple_value=steps_per_sec)]) - self._summary_writer.add_summary(summary, global_step) - logging.info(""%s: %g"", self._summary_tag, steps_per_sec) + self._log_and_record(elapsed_steps, elapsed_time, global_step) # Check whether the global step has been increased. Here, we do not use the # timer.last_triggered_step as the timer might record a different global ",0,train 6a57baa5153f5f348c0cb577da8854d46282118d,tensorflow/tensorflow,"Plumb through stack_trace() in func execution. Also enable stack_trace propagation in ResourceHandle. PiperOrigin-RevId: 434850304",kernel_and_device.cc,"@@ -395,6 +395,7 @@ KernelAndDeviceFunc::PrepareForRun( opts->step_container = step_container; opts->collective_executor = collective_executor_ ? collective_executor_->get() : nullptr; + opts->stack_trace = stack_trace; opts->stats_collector = nullptr; opts->runner = get_runner(); ",0,train 6a57baa5153f5f348c0cb577da8854d46282118d,tensorflow/tensorflow,"Plumb through stack_trace() in func execution. Also enable stack_trace propagation in ResourceHandle. PiperOrigin-RevId: 434850304",executor.cc,"@@ -74,6 +74,7 @@ limitations under the License. #include ""tensorflow/core/profiler/lib/traceme_encode.h"" #include ""tensorflow/core/protobuf/error_codes.pb.h"" #include ""tensorflow/core/util/determinism.h"" +#include ""tensorflow/core/util/managed_stack_trace.h"" #include ""tensorflow/core/util/tensor_slice_reader_cache.h"" namespace tensorflow { @@ -373,6 +374,7 @@ class ExecutorState { ExecutorImpl::KernelStats* const kernel_stats_; CancellationManager* cancellation_manager_; CoordinationServiceAgent* coordination_service_agent_; + absl::optional stack_trace_ = absl::nullopt; // If not null, use this device to schedule intra-op operation std::unique_ptr user_device_; Executor::Args::Runner runner_; @@ -422,6 +424,7 @@ ExecutorState::ExecutorState( kernel_stats_(kernel_stats), cancellation_manager_(args.cancellation_manager), coordination_service_agent_(args.coordination_service_agent), + stack_trace_(args.stack_trace), runner_(args.runner), sync_on_finish_(args.sync_on_finish), run_all_kernels_inline_(args.run_all_kernels_inline), @@ -717,6 +720,7 @@ void ExecutorState::Process(TaggedNode tagged_node, params.tensor_store = tensor_store_; params.cancellation_manager = cancellation_manager_; params.coordination_service_agent = coordination_service_agent_; + params.stack_trace = stack_trace_; params.call_frame = call_frame_; params.function_library = immutable_state_.params().function_library; params.resource_manager = device->resource_manager(); ",0,train 6a57baa5153f5f348c0cb577da8854d46282118d,tensorflow/tensorflow,"Plumb through stack_trace() in func execution. Also enable stack_trace propagation in ResourceHandle. PiperOrigin-RevId: 434850304",executor.h,"@@ -30,6 +30,7 @@ limitations under the License. #include ""tensorflow/core/lib/core/threadpool_interface.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/macros.h"" +#include ""tensorflow/core/util/managed_stack_trace.h"" namespace tensorflow { @@ -105,6 +106,7 @@ class Executor { int64_t start_time_usecs = 0; // The deadline for the kernel to complete by. Empty if unspecified. absl::optional deadline; + absl::optional stack_trace = absl::nullopt; // If true, calls Sync() on the device. bool sync_on_finish = false; ",0,train 6a57baa5153f5f348c0cb577da8854d46282118d,tensorflow/tensorflow,"Plumb through stack_trace() in func execution. Also enable stack_trace propagation in ResourceHandle. PiperOrigin-RevId: 434850304",function.cc,"@@ -543,6 +543,7 @@ class CallOp : public AsyncOpKernel { opts.runner = ctx->runner(); opts.run_all_kernels_inline = ctx->run_all_kernels_inline(); opts.collective_executor = ctx->collective_executor(); + opts.stack_trace = ctx->stack_trace(); std::vector args; args.reserve(ctx->num_inputs()); for (int i = 0; i < ctx->num_inputs(); ++i) { @@ -1031,6 +1032,7 @@ void FunctionLibraryRuntimeImpl::ExecutorArgsFromOptions( exec_args->run_all_kernels_inline = run_opts.run_all_kernels_inline; exec_args->user_intra_op_threadpool = run_opts.user_intra_op_threadpool; exec_args->coordination_service_agent = run_opts.coordination_service_agent; + exec_args->stack_trace = run_opts.stack_trace; } void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle, ",0,train 6a57baa5153f5f348c0cb577da8854d46282118d,tensorflow/tensorflow,"Plumb through stack_trace() in func execution. Also enable stack_trace propagation in ResourceHandle. PiperOrigin-RevId: 434850304",single_threaded_executor.cc,"@@ -315,6 +315,7 @@ class SingleThreadedExecutorImpl : public Executor { params.resource_manager = device->resource_manager(); params.step_container = args.step_container; params.collective_executor = args.collective_executor; + params.stack_trace = args.stack_trace; params.slice_reader_cache = nullptr; // TODO(mrry): Too severe? params.inputs = &node_inputs; params.input_alloc_attrs = &input_alloc_attrs; ",0,train 6a57baa5153f5f348c0cb577da8854d46282118d,tensorflow/tensorflow,"Plumb through stack_trace() in func execution. Also enable stack_trace propagation in ResourceHandle. PiperOrigin-RevId: 434850304",function.h,"@@ -843,6 +843,8 @@ class FunctionLibraryRuntime { StepStatsCollectorInterface* stats_collector = nullptr; CoordinationServiceAgent* coordination_service_agent = nullptr; + absl::optional stack_trace = absl::nullopt; + std::function)>* runner = nullptr; // Parameters for remote function execution. ",0,train 6a57baa5153f5f348c0cb577da8854d46282118d,tensorflow/tensorflow,"Plumb through stack_trace() in func execution. Also enable stack_trace propagation in ResourceHandle. PiperOrigin-RevId: 434850304",resource_variable_ops.cc,"@@ -253,7 +253,8 @@ void VarHandleOp::Compute(OpKernelContext* ctx) { ResourceMgr* mgr = ctx->resource_manager(); ResourceHandle handle = ResourceHandle::MakeRefCountingHandle( resource, ctx->device()->name(), - std::vector{dtype_and_shape_}); + std::vector{dtype_and_shape_}, + ctx->stack_trace()); // TODO(b/203901837): See if we can abolish all code paths that lookup // anonymous variables and then stop publishing them to the manager. OP_REQUIRES_OK(ctx, mgr->CreateUnowned(handle.container(), ",0,train cc10ac9b7d593375a7cee0c167c20989dc29e8cf,tensorflow/tensorflow,remove unnecessary lambda,linalg_ops.py,"@@ -545,7 +545,7 @@ def norm(tensor, if is_matrix_norm and ord in [2, 2.0]: axes = list(range(rank)) perm_before = list(filter(lambda i: i not in axis, axes)) + list(axis) - perm_after = list(map(lambda i: perm_before.index(i), axes)) + perm_after = list(map(perm_before.index, axes)) result = array_ops.transpose(array_ops.expand_dims( math_ops.reduce_max(gen_linalg_ops.svd( array_ops.transpose(tensor, perm=perm_before), ",0,train f17620153c47370f30a84b99eaba82bef8cd7d8e,tensorflow/tensorflow,"Handle delayed variable initialization in MirroredStrategy. Test with RNN layer. Bug reported and solution suggested in #19069 PiperOrigin-RevId: 196718454",mirrored_strategy.py,"@@ -111,10 +111,13 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): kwargs[""name""] = ""%s/replica_%d"" % (var0name, i) # Initialize replicas with the same value: if context.executing_eagerly(): - initial_value = index[devices[0]].value() + kwargs[""initial_value""] = array_ops.identity( + index[devices[0]].value()) else: - initial_value = index[devices[0]].initial_value - kwargs[""initial_value""] = array_ops.identity(initial_value) + def initial_value_fn(device=d): + with ops.device(device): + return array_ops.identity(index[devices[0]].initial_value) + kwargs[""initial_value""] = initial_value_fn with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): v = next_creator(*args, **kwargs) assert not isinstance(v, values.DistributedVariable) ",0,train f17620153c47370f30a84b99eaba82bef8cd7d8e,tensorflow/tensorflow,"Handle delayed variable initialization in MirroredStrategy. Test with RNN layer. Bug reported and solution suggested in #19069 PiperOrigin-RevId: 196718454",mirrored_strategy_multigpu_test.py,"@@ -28,9 +28,12 @@ from tensorflow.python.data.ops import dataset_ops from tensorflow.python.eager import context from tensorflow.python.eager import test from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.layers import core +from tensorflow.python.ops import rnn +from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.training import distribute as distribute_lib @@ -436,6 +439,30 @@ class MirroredStrategyVariableCreationTest(test.TestCase): self.assertEquals(""foo/"" + name + "":0"", v0.name) self.assertEquals(""tower_1/foo/"" + name + "":0"", v1.name) + def testDynamicRnnVariables(self): + def model_fn(): + inputs = constant_op.constant(2 * [2 * [[0.0, 1.0, 2.0, 3.0, 4.0]]]) + cell_fw = rnn_cell_impl.LSTMCell(300) + cell_bw = rnn_cell_impl.LSTMCell(300) + (outputs, _) = rnn.bidirectional_dynamic_rnn( + cell_fw, + cell_bw, + inputs, + dtype=dtypes.float32) + return outputs + + dist = mirrored_strategy.MirroredStrategy( + [""/device:GPU:0"", ""/device:CPU:0""]) + + with context.graph_mode(), dist.scope(): + result = dist.call_for_each_tower(model_fn, run_concurrently=False) + # Two variables are created by the RNN layer. + self.assertEquals(2, len(result)) + for v in result: + self.assertIsInstance(v, values.DistributedValues) + _, v1 = dist.unwrap(v) + self.assertStartsWith(v1.name, ""tower_1/"") + if __name__ == ""__main__"": test.main() ",0,train 2efd47de550fa1eceb12d36a87449c4cbdf2f861,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2022-03-08 PiperOrigin-RevId: 433145847",compat.py,"@@ -29,7 +29,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2022, 3, 7) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2022, 3, 8) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train e7e5bc9440792a187dd7847a72088ae50b9ad2be,tensorflow/tensorflow,"Fix issue using python flatbuffers library. PiperOrigin-RevId: 331633117 Change-Id: I92b1d4af9e046a6f0e610365ce95e90fa7e05921",util.py,"@@ -27,7 +27,6 @@ from absl import logging import six from six.moves import range -import flatbuffers from tensorflow.core.protobuf import config_pb2 as _config_pb2 from tensorflow.core.protobuf import graph_debug_info_pb2 from tensorflow.core.protobuf import meta_graph_pb2 as _meta_graph_pb2 @@ -578,7 +577,7 @@ def _convert_model_from_bytearray_to_object(model_bytearray): def _convert_model_from_object_to_bytearray(model_object): """"""Converts a tflite model from a parsable object into a bytearray."""""" # Initial size of the buffer, which will grow automatically if needed - builder = flatbuffers.Builder(1024) + builder = schema_fb.flatbuffers.Builder(1024) model_offset = model_object.Pack(builder) builder.Finish(model_offset, file_identifier=_TFLITE_FILE_IDENTIFIER) return bytes(builder.Output()) ",0,train e7e5bc9440792a187dd7847a72088ae50b9ad2be,tensorflow/tensorflow,"Fix issue using python flatbuffers library. PiperOrigin-RevId: 331633117 Change-Id: I92b1d4af9e046a6f0e610365ce95e90fa7e05921",flatbuffer_utils.py,"@@ -30,7 +30,6 @@ import os import random import re -import flatbuffers from tensorflow.lite.python import schema_py_generated as schema_fb _TFLITE_FILE_IDENTIFIER = b'TFL3' @@ -84,7 +83,7 @@ def read_model_with_mutable_tensors(input_tflite_file): def convert_object_to_bytearray(model_object): """"""Converts a tflite model from an object to a immutable bytearray."""""" # Initial size of the buffer, which will grow automatically if needed - builder = flatbuffers.Builder(1024) + builder = schema_fb.flatbuffers.Builder(1024) model_offset = model_object.Pack(builder) builder.Finish(model_offset, file_identifier=_TFLITE_FILE_IDENTIFIER) model_bytearray = bytes(builder.Output()) @@ -157,7 +156,7 @@ def randomize_weights(model, random_seed=0): def xxd_output_to_bytes(input_cc_file): - """"""Converts xxd output C++ source file to bytes (immutable) + """"""Converts xxd output C++ source file to bytes (immutable). Args: input_cc_file: Full path name to th C++ source file dumped by xxd @@ -196,7 +195,7 @@ def xxd_output_to_bytes(input_cc_file): def xxd_output_to_object(input_cc_file): - """"""Converts xxd output C++ source file to object + """"""Converts xxd output C++ source file to object. Args: input_cc_file: Full path name to th C++ source file dumped by xxd ",0,train e7e5bc9440792a187dd7847a72088ae50b9ad2be,tensorflow/tensorflow,"Fix issue using python flatbuffers library. PiperOrigin-RevId: 331633117 Change-Id: I92b1d4af9e046a6f0e610365ce95e90fa7e05921",test_utils.py,"@@ -21,7 +21,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import flatbuffers from tensorflow.lite.python import schema_py_generated as schema_fb TFLITE_SCHEMA_VERSION = 3 @@ -29,7 +28,7 @@ TFLITE_SCHEMA_VERSION = 3 def build_mock_flatbuffer_model(): """"""Creates a flatbuffer containing an example model."""""" - builder = flatbuffers.Builder(1024) + builder = schema_fb.flatbuffers.Builder(1024) schema_fb.BufferStart(builder) buffer0_offset = schema_fb.BufferEnd(builder) ",0,train a847d09b50066f03a9b45156044b1517367eeea4,tensorflow/tensorflow,"Preserve buffer allocation source locations in LHLO. PiperOrigin-RevId: 396558903 Change-Id: Ia1375ef8da22746cf07b7ef17dac2046e6dde496",mhlo_to_lhlo_with_xla.cc,"@@ -1577,6 +1577,16 @@ Status LhloDialectEmitter::Initialize() { NamedAttrList arg_attr_list; mlir::Type arg_type = MemRefType::get({alloc->size()}, i8_type_); + // Propagate source location information for every HLOInstruction that + // uses this allocation. + std::vector buf_locs; + buf_locs.reserve(alloc->assigned_buffers().size()); + for (const auto& entry : alloc->assigned_buffers()) { + const xla::HloValue* hlo_value = entry.first; + buf_locs.push_back(getLocation(hlo_value->instruction())); + } + mlir::Location loc = builder_.getFusedLoc(buf_locs); + if (alloc->is_entry_computation_parameter()) { arg_attr_list.set(""lmhlo.params"", builder_.getIndexAttr(alloc->parameter_number())); @@ -1615,7 +1625,7 @@ Status LhloDialectEmitter::Initialize() { } } } - block->addArgument(arg_type); + block->addArgument(arg_type, loc); allocations_[alloc] = block->getArguments().back(); args_attrs.push_back(arg_attr_list.getDictionary(builder_.getContext())); } ",0,test da5b8ea5bd378cad5e313a540ba40a102dcddf6d,tensorflow/tensorflow,"Use empty rather than alternative size checks (readability-container-size-empty) PiperOrigin-RevId: 369912337 Change-Id: I74688afc85c9999c9a31d14f02abcf909d915686",tf_ops_a_m.cc,"@@ -1618,7 +1618,7 @@ static LogicalResult inferConvReturnTypes( ""D tensor""); if (padding == tensorflow::Padding::EXPLICIT) { - if (explicit_padding.size() == 0) { + if (explicit_padding.empty()) { return emitOptionalError(location, ""requires attribute 'explicit_paddings' with "" ""'EXPLICIT' padding mode""); ",0,train da5b8ea5bd378cad5e313a540ba40a102dcddf6d,tensorflow/tensorflow,"Use empty rather than alternative size checks (readability-container-size-empty) PiperOrigin-RevId: 369912337 Change-Id: I74688afc85c9999c9a31d14f02abcf909d915686",cluster_formation.cc,"@@ -188,7 +188,7 @@ void BuildClusters(Block* block, OpBuilder builder) { llvm::MapVector nearest_clusters; for (Operation& op : llvm::make_early_inc_range(*block)) { auto device = GetDevice(&op); - if (device == """") continue; + if (device.empty()) continue; // If no cluster of same device has been formed yet, create a new cluster // with op alone. ",0,train da5b8ea5bd378cad5e313a540ba40a102dcddf6d,tensorflow/tensorflow,"Use empty rather than alternative size checks (readability-container-size-empty) PiperOrigin-RevId: 369912337 Change-Id: I74688afc85c9999c9a31d14f02abcf909d915686",tf_device_assignment.cc,"@@ -38,7 +38,7 @@ class SimpleTFDeviceAssignmentPass getFunction().walk([&](Operation* op) { if (auto device_attr = op->getAttrOfType(""device"")) { // We assign default device to ops with device attribute that is empty. - if (device_attr.getValue() == """") { + if (device_attr.getValue().empty()) { op->setAttr(""device"", builder.getStringAttr(default_device_)); } } else if (op->getDialect() == tf) { ",0,train d2c578c71901275323ba3c00c57ec2e91531a698,tensorflow/tensorflow,"[XLA:SPMD] Avoid designated initializer. It broke external build. PiperOrigin-RevId: 311447720 Change-Id: I460624dc2242deead277eb70fbd1c6a0701250f6",spmd_partitioner.h,"@@ -370,14 +370,15 @@ class SpmdPartitioningVisitor : public DfsHloVisitorWithDefault { int64 NewChannel() { return (*next_channel_id_)++; } PartitionedHlo::PartitioningState MakePartitioningState() { - return PartitionedHlo::PartitioningState{ - .b = &b_, - .module = module_, - .num_replicas = num_replicas_, - .partition_id = partition_id_, - .collective_ops_creator = collective_ops_creator_, - .next_channel_id = next_channel_id_, - .reshard_cache = &reshard_cache_}; + PartitionedHlo::PartitioningState state; + state.b = &b_; + state.module = module_; + state.num_replicas = num_replicas_; + state.partition_id = partition_id_; + state.collective_ops_creator = collective_ops_creator_; + state.next_channel_id = next_channel_id_; + state.reshard_cache = &reshard_cache_; + return state; } SpmdBuilder* builder() { return &b_; } ",0,train f24f5cd47493b3db9a8b053bd4723b18ce57ae0f,tensorflow/tensorflow,"Simplifies `testBatch` to eliminate testing timeouts. Change: 134301154",tensorflow_dataframe_test.py,"@@ -153,8 +153,8 @@ class TensorFlowDataFrameTestCase(tf.test.TestCase): tensorflow_df = df.TensorFlowDataFrame.from_pandas(pandas_df, shuffle=False) # Rebatch `df` into the following sizes successively. - batch_sizes = [8, 4, 7] - num_batches = 10 + batch_sizes = [4, 7] + num_batches = 3 final_batch_size = batch_sizes[-1] ",0,test eca0365de37ebed58d98e22b0b6542512b7f90c8,tensorflow/tensorflow,"Add examples for `tf.unstack`. PiperOrigin-RevId: 342952616 Change-Id: I5367754d272ea5b6e367becc19d6eebb3b9a9de9",array_ops.py,"@@ -1549,22 +1549,101 @@ ops.register_tensor_conversion_function((list, tuple), def unstack(value, num=None, axis=0, name=""unstack""): """"""Unpacks the given dimension of a rank-`R` tensor into rank-`(R-1)` tensors. - Unpacks `num` tensors from `value` by chipping it along the `axis` dimension. - If `num` is not specified (the default), it is inferred from `value`'s shape. - If `value.shape[axis]` is not known, `ValueError` is raised. + Unpacks tensors from `value` by chipping it along the `axis` dimension. - For example, given a tensor of shape `(A, B, C, D)`; - - If `axis == 0` then the i'th tensor in `output` is the slice - `value[i, :, :, :]` and each tensor in `output` will have shape `(B, C, D)`. - (Note that the dimension unpacked along is gone, unlike `split`). + >>> x = tf.reshape(tf.range(12), (3,4)) + >>> + >>> p, q, r = tf.unstack(x) + >>> p.shape.as_list() + [4] - If `axis == 1` then the i'th tensor in `output` is the slice - `value[:, i, :, :]` and each tensor in `output` will have shape `(A, C, D)`. - Etc. + >>> i, j, k, l = tf.unstack(x, axis=1) + >>> i.shape.as_list() + [3] This is the opposite of stack. + >>> x = tf.stack([i, j, k, l], axis=1) + + More generally if you have a tensor of shape `(A, B, C, D)`: + + >>> A, B, C, D = [2, 3, 4, 5] + >>> t = tf.random.normal(shape=[A, B, C, D]) + + The number of tensor returned is equal to the length of the target `axis`: + + >>> axis = 2 + >>> items = tf.unstack(t, axis=axis) + >>> len(items) == t.shape[axis] + True + + The shape of each result tensor is equal to the shape of the input tensor, + with the target `axis` removed. + + >>> items[0].shape.as_list() # [A, B, D] + [2, 3, 5] + + The value of each tensor `items[i]` is equal to the slice of `input` across + `axis` at index `i`: + + >>> for i in range(len(items)): + ... slice = t[:,:,i,:] + ... assert tf.reduce_all(slice == items[i]) + + #### Python iterable unpacking + + With eager execution you _can_ unstack the 0th axis of a tensor using python's + iterable unpacking: + + >>> t = tf.constant([1,2,3]) + >>> a,b,c = t + + `unstack` is still necessary because Iterable unpacking doesn't work in + a `@tf.function`: Symbolic tensors are not iterable. + + You need to use `tf.unstack` here: + + >>> @tf.function + ... def bad(t): + ... a,b,c = t + ... return a + >>> + >>> bad(t) + Traceback (most recent call last): + ... + OperatorNotAllowedInGraphError: ... + + >>> @tf.function + ... def good(t): + ... a,b,c = tf.unstack(t) + ... return a + >>> + >>> good(t).numpy() + 1 + + #### Unknown shapes + + Eager tensors have concrete values, so their shape is always known. + Inside a `tf.function` the symbolic tensors may have unknown shapes. + If the length of `axis` is unknown `tf.unstack` will fail because it cannot + handle an unknown number of tensors: + + >>> @tf.function(input_signature=[tf.TensorSpec([None], tf.float32)]) + ... def bad(t): + ... tensors = tf.unstack(t) + ... return tensors[0] + >>> + >>> bad(tf.constant([1,2,3])) + Traceback (most recent call last): + ... + ValueError: Cannot infer num from shape (None,) + + If you know the `axis` length you can pass it as the `num` argument. But this + must be a constant value. + + If you actually need a variable number of tensors in a single `tf.function` + trace, you will need to use exlicit loops and a `tf.TensorArray` instead. + Args: value: A rank `R > 0` `Tensor` to be unstacked. num: An `int`. The length of the dimension `axis`. Automatically inferred if @@ -1577,8 +1656,9 @@ def unstack(value, num=None, axis=0, name=""unstack""): The list of `Tensor` objects unstacked from `value`. Raises: + ValueError: If `axis` is out of the range `[-R, R)`. ValueError: If `num` is unspecified and cannot be inferred. - ValueError: If `axis` is out of the range [-R, R). + InvalidArgumentError: If `num` does not match the shape of `value`. """""" if num is None: value = ops.convert_to_tensor(value) ",0,test 7e8073610db8019414bdfee2d9043e65bc698484,tensorflow/tensorflow,"[NFC] Expose GetNcclCollectivePermuteConfig() as a static method of NcclCollectivePermuteThunk. PiperOrigin-RevId: 393457374 Change-Id: I91fd782edb99d33be0c3f6d3fedaf6b9d660dce0",nccl_collective_permute_thunk.h,"@@ -56,6 +56,10 @@ struct NcclCollectivePermuteConfig : public NcclCollectiveConfig { // Thunk that performs a NCCL-based collective permute. class NcclCollectivePermuteThunk : public NcclCollectiveThunk { public: + static NcclCollectivePermuteConfig GetNcclCollectivePermuteConfig( + mlir::lmhlo::CollectivePermuteOp op, int64_t replica_count, + int64_t partition_count); + NcclCollectivePermuteThunk(ThunkInfo thunk_info, mlir::lmhlo::CollectivePermuteOp op, int64_t replica_count, int64_t partition_count, @@ -81,10 +85,6 @@ class NcclCollectivePermuteThunk : public NcclCollectiveThunk { const NcclCollectiveConfig& config() const override { return config_; } private: - static NcclCollectivePermuteConfig GetNcclCollectivePermuteConfig( - mlir::lmhlo::CollectivePermuteOp op, int64_t replica_count, - int64_t partition_count); - const NcclCollectivePermuteConfig config_; const Buffer buffer_; }; ",0,test 2f7455d56c8328fd1b232e5bca68b636e0a34822,tensorflow/tensorflow,"Update minimum op version for TF 2.2.0 branch cut. PiperOrigin-RevId: 296328883 Change-Id: I3deda696e7ad2c35cbd580decd72ca79e91963e4",op_version.cc,"@@ -89,7 +89,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kGatherNd, 1}, ""1.14.0""}, {{OperatorType::kSvdf, 1}, ""1.5.0""}, {{OperatorType::kSvdf, 2}, ""1.14.0""}, - {{OperatorType::kSvdf, 3}, kPendingReleaseOpVersion}, + {{OperatorType::kSvdf, 3}, ""2.2.0""}, {{OperatorType::kL2Normalization, 1}, ""1.5.0""}, {{OperatorType::kL2Normalization, 2}, ""1.14.0""}, {{OperatorType::kL2Pool, 1}, ""1.5.0""}, @@ -137,7 +137,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kRelu6, 2}, ""1.14.0""}, {{OperatorType::kResizeBilinear, 1}, ""1.7.0""}, {{OperatorType::kResizeBilinear, 2}, ""1.14.0""}, - {{OperatorType::kResizeBilinear, 3}, kPendingReleaseOpVersion}, + {{OperatorType::kResizeBilinear, 3}, ""2.2.0""}, {{OperatorType::kResizeNearestNeighbor, 1}, ""1.13.1""}, {{OperatorType::kResizeNearestNeighbor, 2}, ""1.14.0""}, {{OperatorType::kSqueeze, 1}, ""1.6.0""}, @@ -171,7 +171,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kCTCBeamSearchDecoder, 1}, ""1.11.0""}, {{OperatorType::kUnpack, 1}, ""1.11.0""}, {{OperatorType::kUnpack, 2}, ""1.14.0""}, - {{OperatorType::kUnpack, 3}, kPendingReleaseOpVersion}, + {{OperatorType::kUnpack, 3}, ""2.2.0""}, {{OperatorType::kLeakyRelu, 1}, ""1.13.1""}, {{OperatorType::kLogistic, 1}, ""1.14.0""}, {{OperatorType::kLogistic, 2}, ""1.14.0""}, @@ -198,10 +198,10 @@ string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kLess, 2}, ""1.14.0""}, {{OperatorType::kLessEqual, 1}, ""1.14.0""}, {{OperatorType::kLessEqual, 2}, ""1.14.0""}, - {{OperatorType::kSegmentSum, 1}, kPendingReleaseOpVersion}, + {{OperatorType::kSegmentSum, 1}, ""2.2.0""}, {{OperatorType::kSelect, 1}, ""1.14.0""}, {{OperatorType::kSelect, 2}, ""1.14.0""}, - {{OperatorType::kSelectV2, 1}, kPendingReleaseOpVersion}, + {{OperatorType::kSelectV2, 1}, ""2.2.0""}, {{OperatorType::kFloorDiv, 1}, ""1.14.0""}, {{OperatorType::kFloorDiv, 2}, ""1.14.0""}, {{OperatorType::kFloor, 1}, ""1.9.0""}, @@ -232,7 +232,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kHardSwish, 1}, ""1.15.0""}, {{OperatorType::kFill, 1}, ""1.13.0""}, {{OperatorType::kReverseV2, 1}, ""1.14.0""}, - {{OperatorType::kReverseV2, 2}, kPendingReleaseOpVersion}, + {{OperatorType::kReverseV2, 2}, ""2.2.0""}, {{OperatorType::kRank, 1}, ""1.14.0""}, }); ",0,train 8933b8a21280696ab119b63263babdb54c298538,tensorflow/tensorflow,"Fix a null pointer exception caused by branching on uninitialized data. This is due to not checking that the params for the quantization exists. If there is no quantization, we should not access the `.params` field. PiperOrigin-RevId: 385173491 Change-Id: I8fc476c4b274fdb21ba741caa0fbc6d1b8840663",depthwise_conv.cc,"@@ -176,6 +176,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { if (data_type != kTfLiteFloat32) { TF_LITE_ENSURE_EQ(context, filter->quantization.type, kTfLiteAffineQuantization); + TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization); const auto* affine_quantization = reinterpret_cast( filter->quantization.params); @@ -195,6 +196,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { } if (is_hybrid) { + TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization); const auto* affine_quantization = reinterpret_cast( filter->quantization.params); @@ -495,6 +497,7 @@ TfLiteStatus EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node, op_params.weights_offset = 0; op_params.float_activation_min = output_activation_min; op_params.float_activation_max = output_activation_max; + TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization); const auto* affine_quantization = reinterpret_cast(filter->quantization.params); if (kernel_type == kReference) { ",0,train 0131d1a7d052ff5104c8c4ab22944b95ece130ed,tensorflow/tensorflow,"Add absl::Cord support to open source TensorFlow PiperOrigin-RevId: 341926653 Change-Id: Id6174cf149526cd07670bebb2be6c91dbbf11a50",cord.h,"@@ -16,6 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_CORD_H_ #define TENSORFLOW_CORE_PLATFORM_DEFAULT_CORD_H_ -// TODO(ebrevdo): Fill this in. +#include ""absl/strings/cord.h"" #endif // TENSORFLOW_CORE_PLATFORM_DEFAULT_CORD_H_ ",0,train 4aaab50552a3cdb4b785653f071ae6c7193992ca,tensorflow/tensorflow,CLN: fix coding style,array_grad.py,"@@ -763,9 +763,10 @@ def _ExtractImagePatchesGrad(op, grad): (1, rows_out, cols_out, ksize_r * ksize_c)) # Construct mapping table for indices: (input -> output). - idx_matrix = array_ops.concat([array_ops.expand_dims(input_idx_patched, axis=-1), - array_ops.expand_dims(output_idx, axis=-1)], - axis=-1) + idx_matrix = array_ops.concat( + [array_ops.expand_dims(input_idx_patched, axis=-1), + array_ops.expand_dims(output_idx, axis=-1)], + axis=-1) idx_map = array_ops.reshape(idx_matrix, (-1, 2)) sp_shape = (input_indices_num, output_indices_num) ",0,train dbc129a925f936e3179c93a7908bc01132c9a61e,tensorflow/tensorflow,"Add MLIR generated Softplus GPU kernels. Not enabled by default yet. PiperOrigin-RevId: 383389507 Change-Id: Ie0c81f5c95ce7a3a2514d6e00c524c92830cc15d",gpu_op_softplus.cc,"@@ -0,0 +1,25 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" +#include ""tensorflow/core/kernels/mlir_generated/base_gpu_op.h"" + +namespace tensorflow { + +GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Softplus, DT_HALF); +GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Softplus, DT_FLOAT); +GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Softplus, DT_DOUBLE); + +} // namespace tensorflow ",0,train dbc129a925f936e3179c93a7908bc01132c9a61e,tensorflow/tensorflow,"Add MLIR generated Softplus GPU kernels. Not enabled by default yet. PiperOrigin-RevId: 383389507 Change-Id: Ie0c81f5c95ce7a3a2514d6e00c524c92830cc15d",gpu_unary_ops_test.cc,"@@ -899,6 +899,28 @@ GENERATE_DEFAULT_TEST(Sinh, DT_FLOAT, DT_FLOAT, std::sinh, GENERATE_DEFAULT_TEST(Sinh, DT_DOUBLE, DT_DOUBLE, std::sinh, test::OpsTestConfig()) +/// Test `tf.Softplus`. + +// Reference implementation +template +T baseline_softplus(T x) { + T epsilon = std::numeric_limits::epsilon(); + T threshold = 2 + std::log(epsilon); + if (x > -threshold && x < threshold) { + return std::exp(x); + } + return std::log1p(std::exp(x)); +} + +GENERATE_DEFAULT_TEST_2(Softplus, DT_HALF, DT_FLOAT, DT_HALF, DT_FLOAT, + baseline_softplus, test::OpsTestConfig()) + +GENERATE_DEFAULT_TEST(Softplus, DT_FLOAT, DT_FLOAT, baseline_softplus, + test::OpsTestConfig()) + +GENERATE_DEFAULT_TEST(Softplus, DT_DOUBLE, DT_DOUBLE, baseline_softplus, + test::OpsTestConfig()) + /// Test `tf.Sqrt`. GENERATE_DEFAULT_TEST(Sqrt, DT_FLOAT, DT_FLOAT, std::sqrt, ",0,train dbc129a925f936e3179c93a7908bc01132c9a61e,tensorflow/tensorflow,"Add MLIR generated Softplus GPU kernels. Not enabled by default yet. PiperOrigin-RevId: 383389507 Change-Id: Ie0c81f5c95ce7a3a2514d6e00c524c92830cc15d",softplus_op.cc,"@@ -91,13 +91,14 @@ TF_CALL_FLOAT_TYPES(REGISTER_KERNELS); (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) // Forward declarations of the functor specializations for GPU. namespace functor { -#define DECLARE_GPU_SPEC(T) \ - template <> \ - void Softplus::operator()( \ - const GPUDevice& d, typename TTypes::ConstTensor features, \ - typename TTypes::Tensor activations); \ - extern template struct Softplus; \ - \ +#define DECLARE_SOFTPLUS_GPU_SPEC(T) \ + template <> \ + void Softplus::operator()( \ + const GPUDevice& d, typename TTypes::ConstTensor features, \ + typename TTypes::Tensor activations); \ + extern template struct Softplus; + +#define DECLARE_SOFTPLUS_GRAD_GPU_SPEC(T) \ template <> \ void SoftplusGrad::operator()( \ const GPUDevice& d, typename TTypes::ConstTensor gradients, \ @@ -105,20 +106,34 @@ namespace functor { typename TTypes::Tensor backprops); \ extern template struct SoftplusGrad; -TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED) +TF_CALL_GPU_NUMBER_TYPES(DECLARE_SOFTPLUS_GPU_SPEC); +#endif + +TF_CALL_GPU_NUMBER_TYPES(DECLARE_SOFTPLUS_GRAD_GPU_SPEC); } // namespace functor // Registration of the GPU implementations. -#define REGISTER_GPU_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name(""Softplus"").Device(DEVICE_GPU).TypeConstraint(""T""), \ - SoftplusOp); \ +#define REGISTER_SOFTPLUS_GPU_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name(""Softplus"").Device(DEVICE_GPU).TypeConstraint(""T""), \ + SoftplusOp); + +#define REGISTER_SOFTPLUS_GRAD_GPU_KERNELS(type) \ REGISTER_KERNEL_BUILDER( \ Name(""SoftplusGrad"").Device(DEVICE_GPU).TypeConstraint(""T""), \ SoftplusGradOp); -TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); -#undef REGISTER_GPU_KERNELS +#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \ + !defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED) +TF_CALL_GPU_NUMBER_TYPES(REGISTER_SOFTPLUS_GPU_KERNELS); +#endif + +TF_CALL_GPU_NUMBER_TYPES(REGISTER_SOFTPLUS_GRAD_GPU_KERNELS); + +#undef REGISTER_SOFTPLUS_GPU_KERNELS +#undef REGISTER_SOFTPLUS_GRAD_GPU_KERNELS #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM ",0,train 453c9d6bc11fd09523ea0c2ebee8e82b94f76654,tensorflow/tensorflow,"[tf:tfrt] Do not outline too large clusters PiperOrigin-RevId: 416263997 Change-Id: I516c2f960450aa7bcf5f5e3459bb22953aea03c0",clustering.cc,"@@ -826,6 +826,9 @@ mlir::LogicalResult VerifyCluster(const Cluster& cluster) { (void)inserted; } + // TODO(b/202247905): Large clusters can lead to a very long compilation time. + if (ops.size() > 10) return failure(); + // TODO(ezhulenev): This is a temporary workaround to disable forming clusters // with known compilation problems. for (Operation* op : ops) { ",0,train 27de8e717c1bec91398f5a6be6c7287b657fc960,tensorflow/tensorflow,"Improve shape function for CudnnRNNParamsSize In cudnn_rnn_ops.cc, the CudnnRNNParamsSize does not have restrictions on num_layers, num_units, and input_size, though they all should be scalars. This fix adds the shape check of num_layers, num_units, and input_size for CudnnRNNParamsSize. Signed-off-by: Yong Tang ",cudnn_rnn_ops.cc,"@@ -52,6 +52,12 @@ REGISTER_OP(""CudnnRNNParamsSize"") .Attr(""seed2: int = 0"") .Output(""params_size: S"") .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + // num_layers, num_units, and input_size should be scalars. + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(0, c->Vector(1)); return Status::OK(); }); ",0,test 20bab61688b60300eafb2c7cc48b9ad542bcb1a4,tensorflow/tensorflow,"Update tensorflow/core/kernels/mkl_relu_op.cc Co-Authored-By: guizili0 ",mkl_relu_op.cc,"@@ -1368,7 +1368,7 @@ class MklLeakyReluGradOp : public MklReluGradOpBase { OP_REQUIRES_OK(context, context->GetAttr(""alpha"", &alpha)); OP_REQUIRES( context, alpha <= 1, - errors::InvalidArgument(""MKL LeakyRelu only support alpha <= 1. "" + errors::InvalidArgument(""MKL LeakyRelu only supports alpha <= 1. "" ""alpha is: "", alpha)); ",0,train 0822126d7e0b9cd612dffaf5a89eb930e15e37f9,tensorflow/tensorflow,"Add FunctionSpec to def_funcion.PolymorphicFunction. In the future, this should be consolidated with function.PolymorphicFunction's FunctionSpec. PiperOrigin-RevId: 226170883",def_function.py,"@@ -236,6 +236,10 @@ class PolymorphicFunction(object): """""" self._python_function = python_function self._input_signature = input_signature + # TODO(vbardiovsky): Both _stateful_fn and _stateless_fn are populating the + # same FunctionSpec. Consider removing it from both and passing in instead. + self._function_spec = function_lib.FunctionSpec.from_function_and_signature( + python_function, input_signature) self._autograph = autograph self._experimental_autograph_options = experimental_autograph_options if self._experimental_autograph_options is not None: @@ -265,15 +269,8 @@ class PolymorphicFunction(object): def _canonicalize_function_inputs(self, args, kwds): """"""Canonicalize the inputs to the Python function."""""" - if not self._stateful_fn: - raise ValueError( - ""_canonicalize_function_inputs must be called only after _initialize "" - ""has run."") - # pylint: disable=protected-access if self._input_signature is None or args or kwds: - return self._stateful_fn._function_spec.canonicalize_function_inputs( - *args, **kwds) - # pylint: enable=protected-access + return self._function_spec.canonicalize_function_inputs(*args, **kwds) # pylint: disable=protected-access # If an input signature is defined, we may need to fetch a concrete function # without any inputs specified. In this case args and kwds should be ignored # but running _canonicalize_function_inputs would raise an exception. @@ -405,6 +402,10 @@ class PolymorphicFunction(object): def input_signature(self): return self._input_signature + @property + def function_spec(self): + return self._function_spec + def get_initialization_function(self, *args, **kwargs): """"""Returns a `Function` object which initializes this function's variables. ",0,train 54ac76671b51b3aae688c4906101c1334cd95c4f,tensorflow/tensorflow,Change Grappler optimizers run by the TRT converter to avoid constant duplication,base_test.py,"@@ -126,16 +126,15 @@ class SimpleMultiEnginesTest(trt_test.TfTrtIntegrationTestBase): class SimpleMultiEnginesTest2(trt_test.TfTrtIntegrationTestBase): def GraphFn(self, inp): - """"""Create a graph containing two segment."""""" + """"""Create a graph containing two segments."""""" n = inp for i in range(2): c = constant_op.constant(1.0, name=""c%d"" % i) n = math_ops.add(n, c, name=""add%d"" % i) n = math_ops.mul(n, n, name=""mul%d"" % i) - edge = self.trt_incompatible_op(n, name=""incompatible"") - with ops.control_dependencies([edge]): - c = constant_op.constant(1.0, name=""c2"") - n = math_ops.add(n, c, name=""add2"") + n = self.trt_incompatible_op(n, name=""incompatible"") + c = constant_op.constant(1.0, name=""c2"") + n = math_ops.add(n, c, name=""add2"") n = math_ops.mul(n, n, name=""mul2"") c = constant_op.constant(1.0, name=""c3"") n = math_ops.add(n, c, name=""add3"") ",0,train 54ac76671b51b3aae688c4906101c1334cd95c4f,tensorflow/tensorflow,Change Grappler optimizers run by the TRT converter to avoid constant duplication,combined_nms_test.py,"@@ -33,15 +33,10 @@ class CombinedNmsTest(trt_test.TfTrtIntegrationTestBase): self.num_boxes = 200 def GraphFn(self, boxes, scores): - max_output_size_per_class = 3 max_total_size = 3 score_threshold = 0.1 iou_threshold = 0.5 # Shapes - max_output_size_per_class_tensor = constant_op.constant( - max_output_size_per_class, - dtype=dtypes.int32, - name='max_output_size_per_class') max_total_size_tensor = constant_op.constant( max_total_size, dtype=dtypes.int32, name='max_total_size') iou_threshold_tensor = constant_op.constant( @@ -51,7 +46,7 @@ class CombinedNmsTest(trt_test.TfTrtIntegrationTestBase): nms_output = image_ops_impl.combined_non_max_suppression( boxes, scores, - max_output_size_per_class_tensor, + max_total_size_tensor, max_total_size_tensor, iou_threshold_tensor, score_threshold_tensor, @@ -86,8 +81,7 @@ class CombinedNmsTest(trt_test.TfTrtIntegrationTestBase): return { 'TRTEngineOp_0': [ 'combined_nms/CombinedNonMaxSuppression', - 'max_output_size_per_class', 'max_total_size', 'iou_threshold', - 'score_threshold' + 'max_total_size', 'iou_threshold', 'score_threshold' ] } else: ",0,train 54ac76671b51b3aae688c4906101c1334cd95c4f,tensorflow/tensorflow,Change Grappler optimizers run by the TRT converter to avoid constant duplication,quantization_test.py,"@@ -117,8 +117,8 @@ class NonQuantizedPrecisionsWithRangesTest(trt_test.TfTrtIntegrationTestBase): def ExpectedEnginesToBuild(self, run_params): """"""Return the expected engines to build."""""" # The fake quant ops are not supported in FP32/FP16 mode, and will split the - # graph into three TRT segments. - return [""TRTEngineOp_0"", ""TRTEngineOp_1"", ""TRTEngineOp_2"", ""TRTEngineOp_3""] + # graph into two TRT segments. + return [""TRTEngineOp_0"", ""TRTEngineOp_1""] def ExpectedAbsoluteTolerance(self, run_params): """"""The absolute tolerance to compare floating point results."""""" ",0,train 54ac76671b51b3aae688c4906101c1334cd95c4f,tensorflow/tensorflow,Change Grappler optimizers run by the TRT converter to avoid constant duplication,reshape_transpose_test.py,"@@ -15,6 +15,7 @@ """"""Basic tests for TF-TensorRT integration."""""" from tensorflow.python.compiler.tensorrt.test import tf_trt_integration_test_base as trt_test +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -32,9 +33,10 @@ class ReshapeTest(trt_test.TfTrtIntegrationTestBase): # conversion. # # These reshapes happen at batch dimension, thus conversion should fail. + orig_shape = constant_op.constant([-1, 24, 24, 2], name=""original_shape"") for shape in [[2, 50, 24, 24, 2], [-1, 50, 24, 24, 2], [2, 50, -1, 24, 2]]: incompatible_reshape = array_ops.reshape(inp, shape) - reshape_back = array_ops.reshape(incompatible_reshape, [-1, 24, 24, 2]) + reshape_back = array_ops.reshape(incompatible_reshape, orig_shape) outputs.append(self.trt_incompatible_op(reshape_back)) # Add another block with many reshapes that don't change the batch # dimension. ",0,train 54ac76671b51b3aae688c4906101c1334cd95c4f,tensorflow/tensorflow,Change Grappler optimizers run by the TRT converter to avoid constant duplication,trt_convert.py,"@@ -306,10 +306,9 @@ def _get_tensorrt_rewriter_config(conversion_params, rewriter_config_with_trt.remapping = False if not disable_non_trt_optimizers: - # Layout optimizer may add Const nodes followed by Reshape nodes, thus we - # need to run constant folding again. rewriter_config_with_trt.optimizers.extend( - [""constfold"", ""layout"", ""constfold""]) + [""pruning"", ""debug_stripper"", ""layout"", ""dependency"", ""constfold"", + ""common_subgraph_elimination""]) rewriter_config_with_trt.meta_optimizer_iterations = ( rewriter_config_pb2.RewriterConfig.ONE) ",0,train 54ac76671b51b3aae688c4906101c1334cd95c4f,tensorflow/tensorflow,Change Grappler optimizers run by the TRT converter to avoid constant duplication,trt_convert_test.py,"@@ -274,7 +274,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase): self.assertEqual( { ""add"": ""AddV2"", - ""add/ReadVariableOp"": ""Const"", + ""v1"": ""Const"", ""add_1"": ""AddV2"", ""add_2"": ""AddV2"", ""input1"": ""Placeholder"", @@ -806,7 +806,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase): node_name_to_op = {node.name: node.op for node in output_graph_def.node} self.assertEqual( { - ""add/ReadVariableOp"": ""Const"", + ""v1"": ""Const"", ""input1"": ""Placeholder"", ""input2"": ""Placeholder"", ""add"": ""AddV2"", ",0,train 7a26883cb88478e17d0e23ff9e4058aa853426de,tensorflow/tensorflow,"Update GraphDef version to 724. PiperOrigin-RevId: 366415693 Change-Id: Ib42da615ac33d8551cfcf8dfd685b6e7d1bf8eb1",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 723 // Updated: 2021/4/1 +#define TF_GRAPH_DEF_VERSION 724 // Updated: 2021/4/2 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,train ec3f158515788d9a86dc1bad479f6dfc3be2879a,tensorflow/tensorflow,"Catch non-worker-preemption errors in the preemption handler. PiperOrigin-RevId: 358928837 Change-Id: I8c6a248a89579ca4418ba73fae17fcb9f7acd5ea",cluster_coordinator.py,"@@ -622,6 +622,7 @@ class WorkerPreemptionHandler(object): self._cluster_update_lock = threading.Lock() self._cluster_due_for_update_or_finish = threading.Event() self._worker_up_cond = threading.Condition(self._cluster_update_lock) + self._error_from_recovery = None self._should_preemption_thread_run = True self._preemption_handler_thread = threading.Thread( target=self._preemption_handler, @@ -680,6 +681,14 @@ class WorkerPreemptionHandler(object): with self._cluster_update_lock: self._cluster_due_for_update_or_finish.set() self._worker_up_cond.wait(_WORKER_MAXIMUM_RECOVERY_SEC) + if self._error_from_recovery: + # TODO(yuefengz): there is only one worker that will get this error. + # Ideally we shuold let all workers notified by `_worker_up_cond` get + # this error. + try: + raise self._error_from_recovery + finally: + self._error_from_recovery = None logging.info(""Worker %s has been recovered."", worker_device_name) if on_recovery_fn: @@ -717,7 +726,15 @@ class WorkerPreemptionHandler(object): if self._should_preemption_thread_run: self._cluster_due_for_update_or_finish.clear() except Exception as e: # pylint: disable=broad-except - self._validate_preemption_failure(e) + try: + self._validate_preemption_failure(e) + except Exception as e: # pylint: disable=broad-except + # In this case, a parameter server fails. So we raise this error to + # the caller of `wait_on_failure`. + self._error_from_recovery = e + self._worker_up_cond.notify_all() + if self._should_preemption_thread_run: + self._cluster_due_for_update_or_finish.clear() # NOTE: Since the first RPC (GetStatus) of update_server_def is # currently blocking by default, error should only happen if: # (1) More workers failed while waiting for the previous workers to ",0,train ec3f158515788d9a86dc1bad479f6dfc3be2879a,tensorflow/tensorflow,"Catch non-worker-preemption errors in the preemption handler. PiperOrigin-RevId: 358928837 Change-Id: I8c6a248a89579ca4418ba73fae17fcb9f7acd5ea",fault_tolerance_test.py,"@@ -446,10 +446,26 @@ class BaseFaultToleranceTest(object): # pylint: disable=missing-docstring self.assertGreaterEqual(model.iterations.numpy(), 10) def testPSFailureWhileRecoveryFromWokerFailure(self): - # Only by adding this empty test, can the problem of b/180348454 be - # reproduced. - # TODO(yuefengz): fill in this test. - pass + model = self._create_model_and_run_indefinitely() + + time.sleep(1) + self.assertFalse(self.cluster_coord.done()) + + def kill(task): + self._cluster.kill_task(task, 0) + self.sleep(1) + self._cluster.start_task(task, 0) + + kill_thread_1 = threading.Thread(target=kill, args=(""worker"",)) + kill_thread_2 = threading.Thread(target=kill, args=(""ps"",)) + kill_thread_1.start() + kill_thread_2.start() + kill_thread_1.join() + kill_thread_2.join() + + with self.assertRaises( + (errors.UnavailableError, errors.InvalidArgumentError)): + model.join_training_functions() def testNumpyFetchedAfterWorkerFailure(self): ",0,train 05471ab95fc86834d171a3df23bd4397266a985e,tensorflow/tensorflow,Update losses_test.py,losses_test.py,"@@ -1806,7 +1806,7 @@ class HuberLossTest(test.TestCase): class BinaryTruePositivesViaControlFlow(losses.Loss): def __init__(self, reduction=losses_utils.ReductionV2.AUTO): - super().__init__(reduction=reduction) + super(BinaryTruePositivesViaControlFlow, self).__init__(reduction=reduction) def call(self, y_true, y_pred): y_true = math_ops.cast(y_true, dtypes.bool) ",0,train f6bf10607fc0bd00e94704e1ae20f06f34b81df3,tensorflow/tensorflow,"[tf.data] Fix a bug in prefetch dataset serialization logic. PiperOrigin-RevId: 313453820 Change-Id: I573d4288fbb10b7491778ce4edf24241f5e35fa1",prefetch_dataset_op.cc,"@@ -100,9 +100,13 @@ class PrefetchDatasetOp::Dataset : public DatasetBase { TF_RETURN_IF_ERROR(b->AddScalar(buffer_size_, &buffer_size)); AttrValue slack_period_attr; b->BuildAttrValue(slack_period_, &slack_period_attr); - TF_RETURN_IF_ERROR(b->AddDataset( - this, {input_graph_node, buffer_size}, - {std::make_pair(kSlackPeriod, slack_period_attr)}, output)); + AttrValue legacy_autotune_attr; + b->BuildAttrValue(legacy_autotune_, &legacy_autotune_attr); + TF_RETURN_IF_ERROR( + b->AddDataset(this, {input_graph_node, buffer_size}, + {std::make_pair(kSlackPeriod, slack_period_attr), + std::make_pair(kLegacyAutotune, legacy_autotune_attr)}, + output)); return Status::OK(); } ",0,train d93a6f392597928113895ace200ee1e0b6a13b5f,tensorflow/tensorflow,Add test and fix one error for writable file,gcs_filesystem.cc,"@@ -151,7 +151,7 @@ static void SyncImpl(const std::string& bucket, const std::string& object, *offset = static_cast(metadata->size()); } outfile->clear(); - outfile->seekp(std::ios::end); + outfile->seekp(0, std::ios::end); TF_SetStatus(status, TF_OK, """"); } else { std::string temporary_object = @@ -275,11 +275,6 @@ uint64_t Length(const TF_ReadOnlyMemoryRegion* region) { // SECTION 4. Implementation for `TF_Filesystem`, the actual filesystem // ---------------------------------------------------------------------------- namespace tf_gcs_filesystem { -typedef struct GCSFile { - gcs::Client gcs_client; // owned - bool compose; -} GCSFile; - // TODO(vnvo2409): Add lazy-loading and customizing parameters. void Init(TF_Filesystem* filesystem, TF_Status* status) { google::cloud::StatusOr client = ",0,train d93a6f392597928113895ace200ee1e0b6a13b5f,tensorflow/tensorflow,Add test and fix one error for writable file,gcs_filesystem.h,"@@ -28,7 +28,27 @@ int64_t Read(const TF_RandomAccessFile* file, uint64_t offset, size_t n, char* buffer, TF_Status* status); } // namespace tf_random_access_file +namespace tf_writable_file { +void Cleanup(TF_WritableFile* file); +void Append(const TF_WritableFile* file, const char* buffer, size_t n, + TF_Status* status); +int64_t Tell(const TF_WritableFile* file, TF_Status* status); +void Flush(const TF_WritableFile* file, TF_Status* status); +void Sync(const TF_WritableFile* file, TF_Status* status); +void Close(const TF_WritableFile* file, TF_Status* status); +} // namespace tf_writable_file + +namespace tf_read_only_memory_region { +void Cleanup(TF_ReadOnlyMemoryRegion* region); +const void* Data(const TF_ReadOnlyMemoryRegion* region); +uint64_t Length(const TF_ReadOnlyMemoryRegion* region); +} // namespace tf_read_only_memory_region + namespace tf_gcs_filesystem { +typedef struct GCSFile { + google::cloud::storage::Client gcs_client; // owned + bool compose; +} GCSFile; void Init(TF_Filesystem* filesystem, TF_Status* status); void Cleanup(TF_Filesystem* filesystem); void NewRandomAccessFile(const TF_Filesystem* filesystem, const char* path, @@ -37,6 +57,10 @@ void NewWritableFile(const TF_Filesystem* filesystem, const char* path, TF_WritableFile* file, TF_Status* status); void NewAppendableFile(const TF_Filesystem* filesystem, const char* path, TF_WritableFile* file, TF_Status* status); +void NewReadOnlyMemoryRegionFromFile(const TF_Filesystem* filesystem, + const char* path, + TF_ReadOnlyMemoryRegion* region, + TF_Status* status); } // namespace tf_gcs_filesystem #endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_GCS_GCS_FILESYSTEM_H_ ",0,train d93a6f392597928113895ace200ee1e0b6a13b5f,tensorflow/tensorflow,Add test and fix one error for writable file,gcs_filesystem_test.cc,"@@ -93,38 +93,53 @@ class GCSFilesystemTest : public ::testing::Test { }; std::string GCSFilesystemTest::tmp_dir_; -::testing::AssertionResult WriteToServer(const std::string& path, size_t length, - gcs::Client* gcs_client, +::testing::AssertionResult WriteToServer(const std::string& path, size_t offset, + size_t length, gcs::Client* gcs_client, TF_Status* status) { std::string bucket, object; ParseGCSPath(path, false, &bucket, &object, status); - if (TF_GetCode(status) != TF_OK) { + if (TF_GetCode(status) != TF_OK) return ::testing::AssertionFailure() << TF_Message(status); - } auto writer = gcs_client->WriteObject(bucket, object); - writer.write(content, length); + writer.write(content + offset, length); writer.Close(); - if (writer.metadata()) { + if (writer.metadata()) return ::testing::AssertionSuccess(); - } else { + else return ::testing::AssertionFailure() << writer.metadata().status().message(); - } } -::testing::AssertionResult CompareSubString(int64_t offset, size_t n, +::testing::AssertionResult CompareSubString(int64_t offset, size_t length, absl::string_view result, size_t read) { // Result isn't a null-terminated string so we have to wrap it inside a // `string_view` - if (n == read && content_view.substr(offset, n) == - absl::string_view(result).substr(0, read)) { + if (length == read && content_view.substr(offset, length) == + absl::string_view(result).substr(0, read)) return ::testing::AssertionSuccess(); - } else { + else return ::testing::AssertionFailure() << ""Result: "" << absl::string_view(result).substr(0, read) - << "" Read:"" << read; + << "" Read: "" << read; +} + +::testing::AssertionResult CompareWithServer(const std::string& path, + size_t offset, size_t length, + gcs::Client* gcs_client, + TF_Status* status) { + std::string bucket, object; + ParseGCSPath(path, false, &bucket, &object, status); + if (TF_GetCode(status) != TF_OK) + return ::testing::AssertionFailure() << TF_Message(status); + + auto reader = gcs_client->ReadObject(bucket, object); + if (!reader) + return ::testing::AssertionFailure() << reader.status().message(); + else { + std::string content{std::istreambuf_iterator{reader}, {}}; + return CompareSubString(offset, length, content, content.length()); } } @@ -162,9 +177,10 @@ TEST_F(GCSFilesystemTest, RandomAccessFile) { ASSERT_EQ(TF_GetCode(status_), TF_NOT_FOUND) << TF_Message(status_); TF_SetStatus(status_, TF_OK, """"); - auto gcs_client = static_cast(filesystem_->plugin_filesystem); - ASSERT_TRUE( - WriteToServer(filepath, content_view.length(), gcs_client, status_)); + auto gcs_file = + static_cast(filesystem_->plugin_filesystem); + ASSERT_TRUE(WriteToServer(filepath, 0, content_view.length(), + &gcs_file->gcs_client, status_)); read = tf_random_access_file::Read(file, 0, content_view.length(), result, status_); @@ -185,6 +201,97 @@ TEST_F(GCSFilesystemTest, RandomAccessFile) { delete file; } +TEST_F(GCSFilesystemTest, WritableFile) { + std::string filepath = GetURIForPath(""a_file""); + TF_WritableFile* file = new TF_WritableFile; + tf_gcs_filesystem::NewWritableFile(filesystem_, filepath.c_str(), file, + status_); + ASSERT_TF_OK(status_); + tf_writable_file::Append(file, content, 4, status_); + ASSERT_TF_OK(status_); + auto length = tf_writable_file::Tell(file, status_); + ASSERT_EQ(length, 4); + ASSERT_TF_OK(status_); + tf_writable_file::Flush(file, status_); + ASSERT_TF_OK(status_); + + auto gcs_file = + static_cast(filesystem_->plugin_filesystem); + ASSERT_TRUE( + CompareWithServer(filepath, 0, 4, &gcs_file->gcs_client, status_)); + + tf_writable_file::Append(file, content + 4, 4, status_); + ASSERT_TF_OK(status_); + length = tf_writable_file::Tell(file, status_); + ASSERT_EQ(length, 8); + ASSERT_TF_OK(status_); + tf_writable_file::Flush(file, status_); + ASSERT_TF_OK(status_); + ASSERT_TRUE( + CompareWithServer(filepath, 0, 8, &gcs_file->gcs_client, status_)); + + tf_writable_file::Close(file, status_); + ASSERT_TF_OK(status_); + tf_writable_file::Cleanup(file); + + // Testing for compose objects + gcs_file->compose = true; + filepath = GetURIForPath(""b_file""); + tf_gcs_filesystem::NewWritableFile(filesystem_, filepath.c_str(), file, + status_); + ASSERT_TF_OK(status_); + tf_writable_file::Append(file, content, 4, status_); + ASSERT_TF_OK(status_); + length = tf_writable_file::Tell(file, status_); + ASSERT_EQ(length, 4); + ASSERT_TF_OK(status_); + tf_writable_file::Flush(file, status_); + ASSERT_TF_OK(status_); + ASSERT_TRUE( + CompareWithServer(filepath, 0, 4, &gcs_file->gcs_client, status_)); + + tf_writable_file::Append(file, content + 4, 4, status_); + ASSERT_TF_OK(status_); + length = tf_writable_file::Tell(file, status_); + ASSERT_EQ(length, 8); + ASSERT_TF_OK(status_); + tf_writable_file::Flush(file, status_); + ASSERT_TF_OK(status_); + ASSERT_TRUE( + CompareWithServer(filepath, 0, 8, &gcs_file->gcs_client, status_)); + + tf_writable_file::Close(file, status_); + ASSERT_TF_OK(status_); + tf_writable_file::Cleanup(file); + delete file; +} + +TEST_F(GCSFilesystemTest, ReadOnlyMemoryRegion) { + std::string path = GetURIForPath(""a_file""); + auto gcs_file = + static_cast(filesystem_->plugin_filesystem); + ASSERT_TRUE(WriteToServer(path, 0, 0, &gcs_file->gcs_client, status_)); + TF_ReadOnlyMemoryRegion* region = new TF_ReadOnlyMemoryRegion; + tf_gcs_filesystem::NewReadOnlyMemoryRegionFromFile(filesystem_, path.c_str(), + region, status_); + ASSERT_EQ(TF_GetCode(status_), TF_INVALID_ARGUMENT) << TF_Message(status_); + + TF_SetStatus(status_, TF_OK, """"); + ASSERT_TRUE(WriteToServer(path, 0, content_view.length(), + &gcs_file->gcs_client, status_)); + tf_gcs_filesystem::NewReadOnlyMemoryRegionFromFile(filesystem_, path.c_str(), + region, status_); + ASSERT_TF_OK(status_); + auto length = tf_read_only_memory_region::Length(region); + ASSERT_EQ(length, content_view.length()); + auto data = + static_cast(tf_read_only_memory_region::Data(region)); + ASSERT_TRUE(CompareSubString(0, content_view.length(), data, length)); + + tf_read_only_memory_region::Cleanup(region); + delete region; +} + } // namespace } // namespace tensorflow ",0,train 8649852b75ed43fd62e1429086e4a8f5dd6d38ee,tensorflow/tensorflow,"Fix CPU NCHW BiasAddGrad op when height and width are 1. Before it outputted uninitialized memory. There was a special case in ReduceMiddleDimensions which incorrectly used ReduceOuterDimensions. If a ReduceInnerDimensions struct existed, that could have been used instead to make the special case work, but it doesn't exist, so this change removes the special case. PiperOrigin-RevId: 396563734 Change-Id: I8ce437e8d26d0ecc9f44390de17c71b49ee74ee8",redux_functor.h,"@@ -230,11 +230,6 @@ struct ReduceMiddleDimensions { input.template flat().template cast().reshape( output_dims); return; - } else if (1 == inner_dim) { - // Equivalent to ReduceOuterDimensions. - const ReduceOuterDimensions redux; - redux(device, input_dims, input, output); - return; } // Compute block size along the outer dimension for efficiency. ",0,test 8649852b75ed43fd62e1429086e4a8f5dd6d38ee,tensorflow/tensorflow,"Fix CPU NCHW BiasAddGrad op when height and width are 1. Before it outputted uninitialized memory. There was a special case in ReduceMiddleDimensions which incorrectly used ReduceOuterDimensions. If a ReduceInnerDimensions struct existed, that could have been used instead to make the special case work, but it doesn't exist, so this change removes the special case. PiperOrigin-RevId: 396563734 Change-Id: I8ce437e8d26d0ecc9f44390de17c71b49ee74ee8",bias_op_base.py,"@@ -254,7 +254,7 @@ class BiasAddTestBase(test.TestCase): self._testGradient(np_input, bias, dtype, data_format, use_gpu) def testGradientTensor4D(self): - for (data_format, use_gpu) in [(""NHWC"", False)]: + for (data_format, use_gpu) in [(""NHWC"", False), (""NCHW"", False)]: for dtype in (dtypes.float16, dtypes.float32, dtypes.float64): np_input = np.arange( 1.0, 49.0, @@ -273,6 +273,13 @@ class BiasAddTestBase(test.TestCase): self._testGradient(np_input, np.random.rand(64).astype(dtype.as_numpy_dtype), dtype, data_format, use_gpu) + np_input = np.arange( + 1.0, 129.0, + dtype=dtype.as_numpy_dtype).reshape([4, 1, 1, + 32]).astype(np.float32) + self._testGradient(np_input, + np.random.rand(32).astype(dtype.as_numpy_dtype), + dtype, data_format, use_gpu) def testGradientTensor5D(self): for (data_format, use_gpu) in [(""NHWC"", False), (""NHWC"", True), ",0,test d96e762f330b3646150ee811058be39345d1124e,tensorflow/tensorflow,clarified the DispatchServer creation process,data_service_ops.py,"@@ -318,12 +318,26 @@ def distribute(processing_mode, a ""one_epoch"" mode which partitions the dataset across the tf.data workers, so that the consumers see each element of the dataset only once. + To see the distributed operations in action, the `DispatchServer` should be + started first so that tf.data workers can register to it. + + ``` + dispatcher = tf.data.experimental.service.DispatchServer(port=5000) + print(dispatcher.target) # prints grpc://localhost:5000 + + dispatcher_address = dispatcher.target.split(""://"")[1] + worker = tf.data.experimental.service.WorkerServer( + port=0, dispatcher_address=dispatcher_address) + ``` + + Now, when the operations on a `tf.data.Dataset` can distributed to the worker. + ``` dataset = tf.data.Dataset.range(5) dataset = dataset.map(lambda x: x*x) dataset = dataset.apply( tf.data.experimental.service.distribute(""parallel_epochs"", - ""grpc://dataservice:5000"")) + dispatcher.target)) dataset = dataset.map(lambda x: x+1) for element in dataset: @@ -331,7 +345,7 @@ def distribute(processing_mode, ``` In the above example, the first two lines (before the call to `distribute`) - will be executed on tf.data workers, and the elements provided over + will be executed on the tf.data worker, and the elements are provided over RPC. The remaining transformations (after the call to `distribute`) will be executed locally. @@ -339,9 +353,10 @@ def distribute(processing_mode, datasets. Instead of each dataset creating its own job, all datasets with the same `job_name` will consume from the same job. A new job will be created for each iteration of the dataset (with each repetition of - `Dataset.repeat` counting as a new iteration). Suppose two training workers - (in either a single client or multi-client setup) iterate over the below - dataset, and there is a single tf.data worker: + `Dataset.repeat` counting as a new iteration). Suppose the `DispatchServer` + is serving on `dataservice:5000` and two training workers (in either a single + client or multi-client setup) iterate over the below dataset, and there is a + single tf.data worker: ``` range5_dataset = tf.data.Dataset.range(5) ",0,train d96e762f330b3646150ee811058be39345d1124e,tensorflow/tensorflow,clarified the DispatchServer creation process,distribute.py,"@@ -460,8 +460,7 @@ def batch_sizes_for_worker(global_batch_size, num_workers, worker_0 = floor * worker_0 + array_ops.concat([ array_ops.ones(num_ceil, dtype=dtypes.int64), array_ops.zeros(num_subbatches - num_ceil, dtype=dtypes.int64) - ], - axis=0) + ], axis=0) return array_ops.concat([worker_0[offset:], worker_0[:offset]], axis=0) ",0,train d96e762f330b3646150ee811058be39345d1124e,tensorflow/tensorflow,clarified the DispatchServer creation process,distribute_options.py,"@@ -80,6 +80,5 @@ class DistributeOptions(options.OptionsBase): num_devices = options.create_option( name=""num_devices"", ty=int, - docstring= - ""The number of devices attached to this input pipeline. This will be "" - ""automatically set by MultiDeviceIterator."") + docstring=""The number of devices attached to this input pipeline. "" + ""This will be automatically set by MultiDeviceIterator."") ",0,train 13e153172c0afc1e24a98db98df07ea0cb680d8d,tensorflow/tensorflow,format code,non_max_suppression_op.cc,"@@ -197,10 +197,12 @@ void DoNonMaxSuppressionOp(OpKernelContext* context, const Tensor& scores, scale = static_cast(-1.0) / soft_nms_sigma; } - auto suppress_weight = [similarity_threshold, scale, is_soft_nms](const T sim) { + auto suppress_weight = [similarity_threshold, scale, + is_soft_nms](const T sim) { const T weight = static_cast(std::exp(static_cast(scale * sim * sim))); - return is_soft_nms || sim <= similarity_threshold ? weight : static_cast(0.0); + return is_soft_nms || sim <= similarity_threshold ? weight + : static_cast(0.0); }; std::vector selected; ",0,train 8f0570b9627f12fc95b02eca70e6267735f9c717,tensorflow/tensorflow,"Remove unneeded quote for forward slash based on review comment. Signed-off-by: Yong Tang ",tensor_shape_div_test.py,"@@ -40,8 +40,8 @@ class DimensionDivTest(test_util.TensorFlowTestCase): """"""Without from __future__ import division, __rdiv__ is used."""""" if six.PY2: # Old division exists only in Python 2 two = tensor_shape.Dimension(2) - message = (r""unsupported operand type\(s\) for \/: "" - r""'int' and 'Dimension', please use \/\/ instead"") + message = (r""unsupported operand type\(s\) for /: "" + r""'int' and 'Dimension', please use // instead"") with self.assertRaisesRegexp(TypeError, message): _ = 6 / two ",0,train 8f0570b9627f12fc95b02eca70e6267735f9c717,tensorflow/tensorflow,"Remove unneeded quote for forward slash based on review comment. Signed-off-by: Yong Tang ",tensor_shape_test.py,"@@ -209,16 +209,16 @@ class DimensionTest(test_util.TensorFlowTestCase): # Note: This test is related to GitHub issue 25790. six = tensor_shape.Dimension(6) two = tensor_shape.Dimension(2) - message = (r""unsupported operand type\(s\) for \/: "" - r""'Dimension' and 'Dimension', please use \/\/ instead"") + message = (r""unsupported operand type\(s\) for /: "" + r""'Dimension' and 'Dimension', please use // instead"") with self.assertRaisesRegexp(TypeError, message): _ = six / two - message = (r""unsupported operand type\(s\) for \/: "" - r""'Dimension' and 'int', please use \/\/ instead"") + message = (r""unsupported operand type\(s\) for /: "" + r""'Dimension' and 'int', please use // instead"") with self.assertRaisesRegexp(TypeError, message): _ = six / 2 - message = (r""unsupported operand type\(s\) for \/: "" - r""'int' and 'Dimension', please use \/\/ instead"") + message = (r""unsupported operand type\(s\) for /: "" + r""'int' and 'Dimension', please use // instead"") with self.assertRaisesRegexp(TypeError, message): _ = 6 / two ",0,train 2cbbe2ae0d4ab61d8f08f1eb31417e4a163395c7,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-01-23 PiperOrigin-RevId: 291116711 Change-Id: Iaccd5467e1581192360210fdcc01ca6b40bb713a",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 1, 22) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 1, 23) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train 34b08363810df7dbb678902a5158358cc006e514,tensorflow/tensorflow,"Fix gru kernel test for msan. PiperOrigin-RevId: 247150783",unidirectional_sequence_gru_test.cc,"@@ -31,11 +31,13 @@ using ::testing::ElementsAreArray; class GRUOpModel : public SingleOpModel { public: - explicit GRUOpModel(const std::vector>& input_shapes, - const TensorType& weight_type = TensorType_FLOAT32) { + explicit GRUOpModel(int n_batch, int n_input, int n_output, + const std::vector>& input_shapes, + const TensorType& weight_type = TensorType_FLOAT32) + : n_batch_(n_batch), n_input_(n_input), n_output_(n_output) { input_ = AddInput(TensorType_FLOAT32); input_state_ = - AddInput(TensorData{TensorType_FLOAT32, {n_batch_, n_output_}}, true); + AddInput(TensorData{TensorType_FLOAT32, {n_batch, n_output}}, true); gate_weight_ = AddInput(TensorType_FLOAT32); gate_bias_ = AddInput(TensorType_FLOAT32); candidate_weight_ = AddInput(TensorType_FLOAT32); @@ -100,7 +102,8 @@ TEST(GRUTest, SimpleTest) { const int n_input = 2; const int n_output = 3; - GRUOpModel m({{n_time, n_batch, n_input}, + GRUOpModel m(n_batch, n_input, n_output, + {{n_time, n_batch, n_input}, {n_batch, n_output}, {2 * n_output, n_input + n_output}, {2 * n_output}, ",0,train 1700ac827237992143144a5763a72d56b2da7127,tensorflow/tensorflow,"Use correct module when calling reduce_prod. PiperOrigin-RevId: 158544698",util.py,"@@ -544,7 +544,7 @@ def fill_lower_triangular(x, validate_args=False, name=""fill_lower_triangular""): m = np.prod(batch_shape).astype(np.int32) else: batch_shape = array_ops.shape(x)[:-1] - m = array_ops.reduce_prod(array_ops.shape(x)[:-1]) + m = math_ops.reduce_prod(array_ops.shape(x)[:-1]) batch_ids = math_ops.range(m) # Assemble the tril_ids into batch,tril_id pairs. ",0,train ec26ef5fbc463ffee3321b34c68eac08e4b4c64e,tensorflow/tensorflow,"Add shape assertion to categorical crossentropy loss. PiperOrigin-RevId: 292243729 Change-Id: I88da74f303e46075b3934749664129b03b8774b2",backend.py,"@@ -4571,6 +4571,7 @@ def categorical_crossentropy(target, output, from_logits=False, axis=-1): dtype=float32) """""" + target.shape.assert_is_compatible_with(output.shape) if from_logits: return nn.softmax_cross_entropy_with_logits_v2( labels=target, logits=output, axis=axis) ",0,train ec26ef5fbc463ffee3321b34c68eac08e4b4c64e,tensorflow/tensorflow,"Add shape assertion to categorical crossentropy loss. PiperOrigin-RevId: 292243729 Change-Id: I88da74f303e46075b3934749664129b03b8774b2",losses_test.py,"@@ -875,6 +875,15 @@ class CategoricalCrossentropyTest(test.TestCase): expected_value = 400.0 * label_smoothing / 3.0 self.assertAlmostEqual(self.evaluate(loss), expected_value, 3) + def test_shape_mismatch(self): + y_true = constant_op.constant([[0], [1], [2]]) + y_pred = constant_op.constant([[.9, .05, .05], [.5, .89, .6], + [.05, .01, .94]]) + + cce_obj = keras.losses.CategoricalCrossentropy() + with self.assertRaisesRegexp(ValueError, 'Shapes .+ are incompatible'): + cce_obj(y_true, y_pred) + @test_util.run_all_in_graph_and_eager_modes class SparseCategoricalCrossentropyTest(test.TestCase): ",0,train 121dd0762284314d739296de70b1ea1979cd5949,tensorflow/tensorflow,"Dispatch while loops based exclusively on the test closure, not the modified state. Upgrade the tests from v1-only mode. PiperOrigin-RevId: 232817358",control_flow.py,"@@ -329,7 +329,6 @@ class ControlFlowTransformer(converter.Base): cond_closure = set() for s in cond_scope.read: cond_closure |= s.support_set - cond_closure -= loop_state loop_state, state_ssf, state_ast_tuple, ssf_map = self._state_constructs( loop_state, reserved_symbols) ",0,test 121dd0762284314d739296de70b1ea1979cd5949,tensorflow/tensorflow,"Dispatch while loops based exclusively on the test closure, not the modified state. Upgrade the tests from v1-only mode. PiperOrigin-RevId: 232817358",control_flow_test.py,"@@ -33,8 +33,7 @@ class ControlFlowTest(converter_testing.TestCase): inputs = (inputs,) with self.converted(test_fn, control_flow, {}, constant_op.constant) as result: - with self.cached_session() as sess: - self.assertEqual(sess.run(result.test_fn(*inputs)), expected) + self.assertEqual(self.evaluate(result.test_fn(*inputs)), expected) @test_util.run_deprecated_v1 def test_while_basic(self): @@ -78,6 +77,33 @@ class ControlFlowTest(converter_testing.TestCase): self.assertTransformedResult(test_fn, constant_op.constant(5), 0) + @test_util.run_deprecated_v1 + def test_while_dispatches_by_cond_only(self): + + class TensorIncompatibleNumeric(object): + """"""Works in arithmetic expression, but errors out with TF ops."""""" + + def __init__(self, val): + self.val = val + + def __add__(self, other): + return TensorIncompatibleNumeric(self.val + other) + + def test_fn(n, s): + while n > 0: + n -= 1 + s += n + return s + + self.assertTransformedResult(test_fn, (constant_op.constant(5), 0), 10) + with self.converted(test_fn, control_flow, {}) as result: + # n alone controls the staging. When the loop is not staged, Python + # knows how to add the two objects. But when staged, tf.while_loop will + # not know how to deal with the TensorIncompatibleNumeric object. + self.assertEqual(result.test_fn(5, TensorIncompatibleNumeric(0)).val, 10) + with self.assertRaises(TypeError): + result.test_fn(constant_op.constant(5), TensorIncompatibleNumeric(0)) + @test_util.run_deprecated_v1 def test_if_basic(self): @@ -112,11 +138,10 @@ class ControlFlowTest(converter_testing.TestCase): return obj with self.converted(test_fn, control_flow, {}) as result: - with self.cached_session() as sess: - res_obj = result.test_fn(constant_op.constant(1), TestClass(0, 0)) - self.assertEqual(sess.run((res_obj.a, res_obj.b)), (-1, 0)) - res_obj = result.test_fn(constant_op.constant(-1), TestClass(0, 0)) - self.assertEqual(sess.run((res_obj.a, res_obj.b)), (0, -2)) + res_obj = result.test_fn(constant_op.constant(1), TestClass(0, 0)) + self.assertEqual(self.evaluate((res_obj.a, res_obj.b)), (-1, 0)) + res_obj = result.test_fn(constant_op.constant(-1), TestClass(0, 0)) + self.assertEqual(self.evaluate((res_obj.a, res_obj.b)), (0, -2)) @test_util.run_deprecated_v1 def test_if_single_output(self): @@ -223,5 +248,7 @@ class ControlFlowTest(converter_testing.TestCase): return z self.assertTransformedResult(test_fn, [3, 3], 7) + + if __name__ == '__main__': test.main() ",0,test 121dd0762284314d739296de70b1ea1979cd5949,tensorflow/tensorflow,"Dispatch while loops based exclusively on the test closure, not the modified state. Upgrade the tests from v1-only mode. PiperOrigin-RevId: 232817358",control_flow.py,"@@ -153,8 +153,7 @@ def while_stmt(test, body, init_state, extra_deps, opts=None): # TODO(mdan): Consider adding a generic mechanism for dynamic dispatch. # That could be something as simple as a collection of dispatch rules, with # some prioritization. - if any(tensor_util.is_tensor(v) - for v in nest.flatten(init_state + extra_deps)): + if any(tensor_util.is_tensor(v) for v in nest.flatten(extra_deps)): return _tf_while_stmt(test, body, init_state, opts) else: return _py_while_stmt(test, body, init_state, opts) ",0,test 121dd0762284314d739296de70b1ea1979cd5949,tensorflow/tensorflow,"Dispatch while loops based exclusively on the test closure, not the modified state. Upgrade the tests from v1-only mode. PiperOrigin-RevId: 232817358",control_flow_test.py,"@@ -65,30 +65,41 @@ class WhileLoopTest(test.TestCase): def test_tensor(self): n = constant_op.constant(5) results = control_flow.while_stmt( - test=lambda i, sum: i < n, - body=lambda i, sum: (i + 1, sum + i,), + test=lambda i, s: i < n, + body=lambda i, s: (i + 1, s + i,), init_state=(0, 0), extra_deps=(n,)) - with self.cached_session(): - self.assertEqual((5, 10), self.evaluate(results)) + self.assertEqual((5, 10), self.evaluate(results)) @test_util.run_deprecated_v1 - def test_tensor_dict_state(self): + def test_python_with_tensor_state(self): n = 5 - init_state = {'i': constant_op.constant(0), 'sum': constant_op.constant(0)} results = control_flow.while_stmt( - test=lambda s: s['i'] < n, - body=lambda s: ({'i': s['i'] + 1, 'sum': s['sum'] + s['i']},), - init_state=(init_state,), + test=lambda i, s: i < n, + body=lambda i, s: (i + 1, s + i), + init_state=(0, constant_op.constant(0)), extra_deps=()) - with self.cached_session(): - self.assertEqual(({'i': 5, 'sum': 10},), self.evaluate(results)) + result_i, result_s = results + self.assertEqual(5, result_i) + self.assertEqual(10, self.evaluate(result_s)) + + @test_util.run_deprecated_v1 + def test_python_due_to_hidden_cond_type(self): + n = 5 + + # TODO(b/124002646): Improve the error message. + with self.assertRaises(Exception): + control_flow.while_stmt( + test=lambda i, s: i < n, + body=lambda i, s: (i + 1, s + i), + init_state=(constant_op.constant(0), constant_op.constant(0)), + extra_deps=()) def test_python(self): n = 5 results = control_flow.while_stmt( - test=lambda i, sum: i < n, - body=lambda i, sum: (i + 1, sum + i), + test=lambda i, s: i < n, + body=lambda i, s: (i + 1, s + i), init_state=(0, 0), extra_deps=(n,)) self.assertEqual((5, 10), results) ",0,test 39c5b0470e7e6b6f79c8f55d89ea46585168f2a8,tensorflow/tensorflow,"Really delete old checkpoints this time. Follows up on cl/188187349, which fixed checkpoint management for tf.train.Saver when executing eagerly. Except I was recreating the tf.train.Saver objects each save, so tfe.Checkpoint and friends did not benefit from that change. Keeps the same tf.train.Saver around when executing eagerly. This limits object graph mutations just like when graph building; if there are complaints I can assign to Saver._var_list instead, since eager tf.train.Saver is not specialized to its var_list argument. PiperOrigin-RevId: 189211552",checkpointable_utils.py,"@@ -602,8 +602,7 @@ class CheckpointableSaver(object): """""" named_variables, graph_proto = _serialize_object_graph( self._root_checkpointable) - in_graph_mode = not context.executing_eagerly() - if in_graph_mode: + if not context.executing_eagerly(): if session is None: session = ops.get_default_session() if self._object_graph_feed_tensor is None: @@ -622,17 +621,17 @@ class CheckpointableSaver(object): named_variables[_OBJECT_GRAPH_PROTO_KEY] = _NoRestoreSaveable( tensor=object_graph_tensor, name=_OBJECT_GRAPH_PROTO_KEY) - if not in_graph_mode or self._last_save_object_graph != graph_proto: - if self._last_save_object_graph is not None and in_graph_mode: + if self._last_save_object_graph != graph_proto: + if self._last_save_object_graph is not None: raise NotImplementedError( ""Using a single Saver to save a mutated object graph is not "" ""currently supported when graph building. Use a different Saver "" - ""when the object graph changes (save ops will be duplicated), or "" - ""file a feature request if this limitation bothers you."") + ""when the object graph changes (save ops will be duplicated when "" + ""graph building), or file a feature request if this limitation "" + ""bothers you."") saver = saver_lib.Saver(var_list=named_variables) - if in_graph_mode: - self._last_save_saver = saver - self._last_save_object_graph = graph_proto + self._last_save_saver = saver + self._last_save_object_graph = graph_proto else: saver = self._last_save_saver with ops.device(""/cpu:0""): ",0,train 39c5b0470e7e6b6f79c8f55d89ea46585168f2a8,tensorflow/tensorflow,"Really delete old checkpoints this time. Follows up on cl/188187349, which fixed checkpoint management for tf.train.Saver when executing eagerly. Except I was recreating the tf.train.Saver objects each save, so tfe.Checkpoint and friends did not benefit from that change. Keeps the same tf.train.Saver around when executing eagerly. This limits object graph mutations just like when graph building; if there are complaints I can assign to Saver._var_list instead, since eager tf.train.Saver is not specialized to its var_list argument. PiperOrigin-RevId: 189211552",checkpointable_utils_test.py,"@@ -849,6 +849,26 @@ class CheckpointingTests(test.TestCase): saver.save(checkpoint_prefix) self.assertEqual(before_ops, graph.get_operations()) + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testCheckpointCleanup(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, ""ckpt"") + obj = checkpointable.Checkpointable() + obj.var = variable_scope.get_variable(name=""v"", initializer=0.) + self.evaluate(checkpointable_utils.gather_initializers(obj)) + saver = checkpointable_utils.Checkpoint(obj=obj) + for _ in range(10): + saver.save(checkpoint_prefix) + expected_filenames = [""checkpoint""] + for checkpoint_number in range(6, 11): + expected_filenames.append(""ckpt-%d.index"" % (checkpoint_number,)) + expected_filenames.append( + ""ckpt-%d.data-00000-of-00001"" % (checkpoint_number,)) + six.assertCountEqual( + self, + expected_filenames, + os.listdir(checkpoint_directory)) + def testManyRestoresGraph(self): """"""Restores after the first should not modify the graph."""""" with context.graph_mode(): ",0,train a1e78629fa0b461273d0ff4c5b45e01ee4b8836d,tensorflow/tensorflow,"Fix to handle Reshape Layer in experimental TFLite writer library. Changes: 1. Updated handling of ReshapeParams. 2. Added write_lib tests to check different scenarios. PiperOrigin-RevId: 323950640 Change-Id: I20c4a5dcd3d80c591366edb7341634c0b13ffd45",option_writer_generator.cc,"@@ -265,6 +265,29 @@ void GenerateImportForResizeBilinearOp(FILE* fp) { "" }\n break;\n""); } +// Reshape Op infers output shape either from Parameter or from shape tensor +// that's is an additional input. When we have this additional shape tensor as +// input we don't have the parameter present in this layer. In case of more than +// one input we import an empty vector for the parameters. +void GenerateImportForReshapeOp(FILE* fp) { + fprintf(fp, + "" case BuiltinOperator_RESHAPE: {\n"" + "" const auto* params = reinterpret_cast(builtin_op_data);\n"" + "" flatbuffers::Offset union_type;\n"" + "" if (node.inputs->size > 1) {\n"" + "" union_type = CreateReshapeOptions(*fbb).Union();\n"" + "" } else {\n"" + "" auto val0 = fbb->CreateVector(std::vector(params->shape, "" + ""params->shape + params->num_dimensions));\n"" + "" union_type = CreateReshapeOptions(*fbb, "" + ""val0).Union();\n"" + "" }\n"" + "" return std::make_pair(BuiltinOptions_ReshapeOptions, "" + ""union_type);\n"" + "" }\n break;\n""); +} + void GenerateImportForOp(FILE* fp, const std::string& op_name, const std::string& option_name, const std::string& option_type, @@ -276,6 +299,13 @@ void GenerateImportForOp(FILE* fp, const std::string& op_name, return; } + // Special case Reshape that may have 'new_shape' field missing from the + // parameters. + if (struct_name == ""TfLiteReshapeParams"") { + GenerateImportForReshapeOp(fp); + return; + } + fprintf(fp, "" case BuiltinOperator_%s: {\n"", op_name.c_str()); if (options->num_elems != 0) { fprintf(fp, ",0,train a1e78629fa0b461273d0ff4c5b45e01ee4b8836d,tensorflow/tensorflow,"Fix to handle Reshape Layer in experimental TFLite writer library. Changes: 1. Updated handling of ReshapeParams. 2. Added write_lib tests to check different scenarios. PiperOrigin-RevId: 323950640 Change-Id: I20c4a5dcd3d80c591366edb7341634c0b13ffd45",writer_lib.cc,"@@ -31,7 +31,7 @@ namespace tflite { std::pair> CreateBuiltinUnion( flatbuffers::FlatBufferBuilder* fbb, enum BuiltinOperator op, - void* builtin_op_data) { + void* builtin_op_data, const TfLiteNode& node) { switch (op) { #include ""tensorflow/lite/experimental/writer/option_writer_generated.h"" } @@ -82,7 +82,7 @@ SubgraphWriter::ExportOperators(flatbuffers::FlatBufferBuilder* fbb) { // builtin auto builtin_options_and_type = CreateBuiltinUnion( fbb, static_cast(registration.builtin_code), - node.builtin_data); + node.builtin_data, node); builtin_options = builtin_options_and_type.second; builtin_options_type = builtin_options_and_type.first; } else { ",0,train a1e78629fa0b461273d0ff4c5b45e01ee4b8836d,tensorflow/tensorflow,"Fix to handle Reshape Layer in experimental TFLite writer library. Changes: 1. Updated handling of ReshapeParams. 2. Added write_lib tests to check different scenarios. PiperOrigin-RevId: 323950640 Change-Id: I20c4a5dcd3d80c591366edb7341634c0b13ffd45",writer_lib_test.cc,"@@ -15,6 +15,9 @@ limitations under the License. #include ""tensorflow/lite/experimental/writer/writer_lib.h"" +#include +#include + #include #include ""tensorflow/lite/c/common.h"" #include ""tensorflow/lite/interpreter.h"" @@ -184,6 +187,83 @@ TEST(Writer, PerTensorQuantizedModelTest) { CHECK_EQ(new_interpreter->AllocateTensors(), kTfLiteOk); } +struct ReshapeTestPattern { + int num_inputs; + bool is_param_valid; +}; + +class ReshapeLayerTest : public ::testing::TestWithParam {}; + +TEST_P(ReshapeLayerTest, ReshapeLayerTest) { + const auto param = GetParam(); + Interpreter interpreter; + const int total_tensors = param.num_inputs + 1; + interpreter.AddTensors(total_tensors); + int output_shape[] = {1, 2, 3}; + interpreter.SetTensorParametersReadWrite(/*tensor_index=*/0, kTfLiteFloat32, + /*name=*/""a"", /*dims=*/{6}, + TfLiteQuantization()); + ASSERT_LE(param.num_inputs, 2); + if (param.num_inputs == 2) { + interpreter.SetTensorParametersReadOnly( + /*tensor_index=*/1, kTfLiteInt32, /*name=*/""b"", /*dims=*/{3}, + TfLiteQuantization(), reinterpret_cast(output_shape), + sizeof(output_shape)); + } + interpreter.SetTensorParametersReadWrite(/*tensor_index=*/total_tensors - 1, + kTfLiteFloat32, /*name=*/""c"", + /*dims=*/{3}, TfLiteQuantization()); + + std::vector input_tensors(param.num_inputs); + std::iota(input_tensors.begin(), input_tensors.end(), 0); + + interpreter.SetInputs(input_tensors); + interpreter.SetOutputs({total_tensors - 1}); + const char* initial_data = """"; + tflite::ops::builtin::BuiltinOpResolver resolver; + TfLiteReshapeParams* builtin_data = reinterpret_cast( + malloc(sizeof(TfLiteReshapeParams))); + if (param.is_param_valid) { + builtin_data->num_dimensions = 3; + for (int dim = 0; dim < builtin_data->num_dimensions; ++dim) { + builtin_data->shape[dim] = output_shape[dim]; + } + } + const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_RESHAPE, 1); + interpreter.AddNodeWithParameters(input_tensors, + /*outputs=*/{total_tensors - 1}, + initial_data, /*init_data_size=*/0, + reinterpret_cast(builtin_data), reg); + + SubgraphWriter writer(&interpreter.primary_subgraph()); + std::stringstream ss; + ss << ""/tmp/test_reshape_"" << param.num_inputs << param.is_param_valid + << "".tflite""; + std::string filename = ss.str(); + writer.Write(filename); + std::unique_ptr model = + FlatBufferModel::BuildFromFile(filename.c_str()); + InterpreterBuilder builder(*model, resolver); + std::unique_ptr new_interpreter; + builder(&new_interpreter); + ASSERT_EQ(new_interpreter->AllocateTensors(), kTfLiteOk); +} + +INSTANTIATE_TEST_SUITE_P( + Writer, ReshapeLayerTest, + ::testing::Values(ReshapeTestPattern{/*num_inputs=*/2, + /*is_param_valid=*/true}, + ReshapeTestPattern{/*num_inputs=*/2, + /*is_param_valid=*/false}, + ReshapeTestPattern{/*num_inputs=*/1, + /*is_param_valid=*/true}), + [](const ::testing::TestParamInfo& info) { + std::stringstream ss; + ss << ""num_inputs_"" << info.param.num_inputs << ""_valid_param_"" + << info.param.is_param_valid; + std::string name = ss.str(); + return name; + }); } // namespace tflite int main(int argc, char** argv) { ",0,train 25337d2065bd3ef79b9018714c0cb5af46ca06dc,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2018-12-12 PiperOrigin-RevId: 225140840",compat.py,"@@ -32,7 +32,7 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 11) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 12) @tf_export(""compat.forward_compatible"") ",0,test 0fc6825fbe34165d5792e938b5f724a58d638ab2,tensorflow/tensorflow,"Support NNAPI Burst mode in the delegate execution provider. PiperOrigin-RevId: 369781496 Change-Id: I53f3d0c587e22bd47a498e7e0fc9c4f16c689011",nnapi_delegate_provider.cc,"@@ -36,6 +36,8 @@ class NnapiDelegateProvider : public DelegateProvider { ToolParam::Create(true)); default_params_.AddParam(""nnapi_allow_fp16"", ToolParam::Create(false)); + default_params_.AddParam(""nnapi_use_burst_mode"", + ToolParam::Create(false)); } std::vector CreateFlags(ToolParams* params) const final; @@ -65,7 +67,13 @@ std::vector NnapiDelegateProvider::CreateFlags(ToolParams* params) const { CreateFlag(""disable_nnapi_cpu"", params, ""Disable the NNAPI CPU device""), CreateFlag(""nnapi_allow_fp16"", params, - ""Allow fp32 computation to be run in fp16"")}; + ""Allow fp32 computation to be run in fp16""), + CreateFlag( + ""nnapi_use_burst_mode"", params, + ""use NNAPI Burst mode if supported. Burst mode allows accelerators "" + ""to efficiently manage resources, which would significantly reduce "" + ""overhead especially if the same delegate instance is to be used for "" + ""multiple inferences."")}; return flags; } @@ -93,6 +101,8 @@ void NnapiDelegateProvider::LogParams(const ToolParams& params, verbose); LOG_TOOL_PARAM(params, bool, ""nnapi_allow_fp16"", ""Allow fp16 in NNAPI"", verbose); + LOG_TOOL_PARAM(params, bool, ""nnapi_use_burst_mode"", + ""Use burst mode in NNAPI"", verbose); } TfLiteDelegatePtr NnapiDelegateProvider::CreateTfLiteDelegate( @@ -112,6 +122,10 @@ TfLiteDelegatePtr NnapiDelegateProvider::CreateTfLiteDelegate( options.allow_fp16 = true; } + if (params.Get(""nnapi_use_burst_mode"")) { + options.use_burst_computation = true; + } + std::string string_execution_preference = params.Get(""nnapi_execution_preference""); // Only set execution preference if user explicitly passes one. Otherwise, ",0,train a96417300ab274f90dd223c0507a162bb5e7e521,tensorflow/tensorflow,"Replacing the deprecated notifyAll() with notify_all(). This is preparing for the upcoming tf release with python 3.10 support. PiperOrigin-RevId: 413513658 Change-Id: I8bfb4e7f2ab52e80d4dbcfb8fb24cd268cd6ca5e",cluster_coordinator.py,"@@ -329,7 +329,7 @@ class _CoordinatedClosureQueue(object): def stop(self): with self._queue_lock: self._should_process_closures = False - self._closures_queued_condition.notifyAll() + self._closures_queued_condition.notify_all() self._watchdog.stop() def _cancel_all_closures(self): @@ -408,9 +408,9 @@ class _CoordinatedClosureQueue(object): raise AssertionError(""There is no inflight closures to mark_finished."") self._inflight_closure_count -= 1 if self._inflight_closure_count == 0: - self._no_inflight_closure_condition.notifyAll() + self._no_inflight_closure_condition.notify_all() if self._queue.empty() and self._inflight_closure_count == 0: - self._stop_waiting_condition.notifyAll() + self._stop_waiting_condition.notify_all() self._watchdog.report_closure_done() def put_back(self, closure): @@ -426,7 +426,7 @@ class _CoordinatedClosureQueue(object): self._closures_queued_condition.notify() self._inflight_closure_count -= 1 if self._inflight_closure_count == 0: - self._no_inflight_closure_condition.notifyAll() + self._no_inflight_closure_condition.notify_all() def wait(self, timeout=None): """"""Wait for all closures to be finished before returning. @@ -459,8 +459,8 @@ class _CoordinatedClosureQueue(object): self._error = e self._inflight_closure_count -= 1 if self._inflight_closure_count == 0: - self._no_inflight_closure_condition.notifyAll() - self._stop_waiting_condition.notifyAll() + self._no_inflight_closure_condition.notify_all() + self._stop_waiting_condition.notify_all() def done(self): """"""Returns true if the queue is empty and there is no inflight closure. ",0,train a96417300ab274f90dd223c0507a162bb5e7e521,tensorflow/tensorflow,"Replacing the deprecated notifyAll() with notify_all(). This is preparing for the upcoming tf release with python 3.10 support. PiperOrigin-RevId: 413513658 Change-Id: I8bfb4e7f2ab52e80d4dbcfb8fb24cd268cd6ca5e",lock_util.py,"@@ -99,7 +99,7 @@ class GroupLock(object): self._ready.acquire() self._group_member_counts[group_id] -= 1 if self._group_member_counts[group_id] == 0: - self._ready.notifyAll() + self._ready.notify_all() self._ready.release() def _another_group_active(self, group_id): ",0,train 74ee9cb1effdee27fca298d7979676064b2c8c8e,tensorflow/tensorflow,"Make TRTEngineOp node names unique. Add a unique graph sequence number to TRTEngineOp node names to avoid name collision. Since the TRTEngineOp node names are used as the cache keys for the resource cache objects for the operation, this can avoid mapping two different TRTEngineOp nodes to the same cache objects. Fix affected tests. PiperOrigin-RevId: 304524561 Change-Id: I6a7f8c5f484f883f6c3d02df4967bbed5f758467",convert_graph.cc,"@@ -617,11 +617,6 @@ std::pair GetDeviceAndAllocator(const ConversionParams& params, return std::make_pair(cuda_device_id, dev_allocator); } -int64 GetNextGraphSequenceNumber() { - static std::atomic graph_sequence_num; - return graph_sequence_num++; -} - // Entry function from optimization pass. Status ConvertAfterShapes(const ConversionParams& params) { // Sanity checks. @@ -671,12 +666,10 @@ Status ConvertAfterShapes(const ConversionParams& params) { std::vector engine_bytes_size; segment::SegmentNodesVector converted_segments; converted_segments.reserve(initial_segments.size()); - string engine_name_prefix = - StrCat(""TRTEngineOp_"", GetNextGraphSequenceNumber(), ""_""); for (size_t t = 0; t < initial_segments.size(); t++) { auto& curr_segment = initial_segments.at(t); EngineInfo curr_engine; - curr_engine.engine_name = StrCat(engine_name_prefix, t); + curr_engine.engine_name = StrCat(""TRTEngineOp_"", t); Status status = GetEngineInfo(&graph, *params.graph_properties, curr_segment, node_map, reverse_topo_order, &curr_engine); ",0,train 74ee9cb1effdee27fca298d7979676064b2c8c8e,tensorflow/tensorflow,"Make TRTEngineOp node names unique. Add a unique graph sequence number to TRTEngineOp node names to avoid name collision. Since the TRTEngineOp node names are used as the cache keys for the resource cache objects for the operation, this can avoid mapping two different TRTEngineOp nodes to the same cache objects. Fix affected tests. PiperOrigin-RevId: 304524561 Change-Id: I6a7f8c5f484f883f6c3d02df4967bbed5f758467",tf_trt_integration_test_base.py,"@@ -522,25 +522,6 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): logging.info(""Writing graph to %s/%s"", temp_dir, graph_name) graph_io.write_graph(gdef, temp_dir, graph_name) - # Remove the graph sequence number prefix from the name only if the name has - # a prefix TRTEngineOp_n_. When expecting_prefix is true, assert such a - # prefix exists. - def _RemoveGraphSequenceNumberImpl(self, name, expecting_prefix): - match = re.search(r""TRTEngineOp_\d+_"", name) - has_prefix = match and name.startswith(match.group(0)) - assert (not expecting_prefix) or has_prefix - if has_prefix: - parts = name.split(""_"", maxsplit=2) - assert len(parts) == 3 - return parts[0] + ""_"" + parts[2] - return name - - def _RemoveGraphSequenceNumber(self, name): - return self._RemoveGraphSequenceNumberImpl(name, True) - - def _MayRemoveGraphSequenceNumber(self, name): - return self._RemoveGraphSequenceNumberImpl(name, False) - def _VerifyConnections(self, expected_engines, original_gdef, converted_gdef): old_to_new_node_map = { self._ToString(node.name): self._ToString(node.name) @@ -598,14 +579,11 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): # Compute the actual mapping from each node to its input nodes. actual_input_map = {} for node in converted_gdef.node: - name_str = node.name - if node.op == ""TRTEngineOp"": - name_str = self._RemoveGraphSequenceNumber(name_str) + name_str = self._ToString(node.name) actual_input_map[name_str] = set() input_set = actual_input_map[name_str] for inp in node.input: (prefix, node_name) = _InputName(inp) - node_name = self._MayRemoveGraphSequenceNumber(node_name) input_set.add(prefix + node_name) self.assertEqual( @@ -650,8 +628,7 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): self.assertIn(function_name, functions) if not IsQuantizationWithCalibration and not is_dynamic_engine: self.assertTrue(len(node.attr[""serialized_segment""].s), node.name) - self.assertIn( - self._RemoveGraphSequenceNumber(node.name), expected_engines) + self.assertIn(node.name, expected_engines) self.assertEqual( self._ToBytes(run_params.precision_mode), node.attr[""precision_mode""].s, node.name) @@ -685,8 +662,7 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): node.name for node in gdef_to_verify.node if node.op == ""TRTEngineOp"" ] for func in gdef_to_verify.library.function: - if not re.search(r""TRTEngineOp_\d+_\d+_native_segment"", - func.signature.name): + if not re.search(r""TRTEngineOp_\d+_native_segment"", func.signature.name): for node in func.node_def: all_op_names.append(node.name) if node.op == ""TRTEngineOp"": @@ -694,12 +670,9 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): # Remove the function name prefix. def _Canonicalize(names): return set(self._ToString(name.split(""/"")[-1]) for name in names) - # Remove the graph sequence number prefix from all the names. - def _RemoveGraphSequenceNumber(names): - return set(self._RemoveGraphSequenceNumber(name) for name in names) all_op_names = _Canonicalize(all_op_names) - trt_op_names = _RemoveGraphSequenceNumber(_Canonicalize(trt_op_names)) + trt_op_names = _Canonicalize(trt_op_names) if isinstance(expected_engines, dict): # For simplicity we don't verify the connections inside the engine in ",0,train 74ee9cb1effdee27fca298d7979676064b2c8c8e,tensorflow/tensorflow,"Make TRTEngineOp node names unique. Add a unique graph sequence number to TRTEngineOp node names to avoid name collision. Since the TRTEngineOp node names are used as the cache keys for the resource cache objects for the operation, this can avoid mapping two different TRTEngineOp nodes to the same cache objects. Fix affected tests. PiperOrigin-RevId: 304524561 Change-Id: I6a7f8c5f484f883f6c3d02df4967bbed5f758467",trt_convert_test.py,"@@ -20,7 +20,6 @@ from __future__ import print_function import gc import os -import re import tempfile from absl.testing import parameterized @@ -311,24 +310,6 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase): converter.save(output_saved_model_dir=output_saved_model_dir) return output_graph_def - # Remove the graph sequence number prefix from the name only if the name has - # a prefix TRTEngineOp_n_. - def _MayRemoveGraphSequenceNumber(self, name): - prefix = re.search(r""TRTEngineOp_\d+_"", name) - if prefix and name.startswith(prefix.group(0)): - parts = name.split(""_"", maxsplit=2) - assert len(parts) == 3 - return parts[0] + ""_"" + parts[2] - return name - - # Return the unique TRTEngineOp in the given graph def. - def _GetUniqueTRTEngineOp(self, graph_def): - trt_engine_nodes = [ - node for node in graph_def.node if node.op == ""TRTEngineOp"" - ] - assert len(trt_engine_nodes) == 1 - return trt_engine_nodes[0] - def _TestTrtGraphConverter(self, device, output_saved_model_dir=None, @@ -349,10 +330,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase): graph_defs_to_verify.append(saved_model_graph_def) for graph_def in graph_defs_to_verify: - node_name_to_op = { - self._MayRemoveGraphSequenceNumber(node.name): node.op - for node in graph_def.node - } + node_name_to_op = {node.name: node.op for node in graph_def.node} self.assertEqual( { ""input1"": ""Placeholder"", @@ -456,13 +434,13 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase): trt_op_names = [] for node in graph_def.node: if node.op == ""TRTEngineOp"": - trt_op_names.append(self._MayRemoveGraphSequenceNumber(node.name)) + trt_op_names.append(node.name) if check_fn: check_fn(node) for func in graph_def.library.function: for node in func.node_def: if node.op == ""TRTEngineOp"": - trt_op_names.append(self._MayRemoveGraphSequenceNumber(node.name)) + trt_op_names.append(node.name) if check_fn: check_fn(node) self.assertEqual(1, len(trt_op_names)) @@ -495,15 +473,11 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase): # Verify the converted GraphDef and ConcreteFunction. self._CheckTrtOps(converter._converted_func) # pylint: disable=protected-access - trt_engine_name = self._GetUniqueTRTEngineOp( - converter._converted_graph_def).name - # Save the converted model without any TRT engine cache. output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) unexpected_asset_file = os.path.join( - output_saved_model_dir, - ""assets/trt-serialized-engine."" + trt_engine_name) + output_saved_model_dir, ""assets/trt-serialized-engine.TRTEngineOp_0"") self.assertFalse(os.path.exists(unexpected_asset_file)) # Run the converted function to populate the engine cache. @@ -516,8 +490,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase): output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) expected_asset_file = os.path.join( - output_saved_model_dir, - ""assets/trt-serialized-engine."" + trt_engine_name) + output_saved_model_dir, ""assets/trt-serialized-engine.TRTEngineOp_0"") self.assertTrue(os.path.exists(expected_asset_file)) self.assertTrue(os.path.getsize(expected_asset_file)) @@ -593,9 +566,6 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase): converter.convert(calibration_input_fn=_CalibrationInputFn) - trt_engine_name = self._GetUniqueTRTEngineOp( - converter._converted_graph_def).name - def _CheckFn(node): self.assertTrue(len(node.attr[""calibration_data""].s), node.name) @@ -613,8 +583,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase): output_saved_model_dir = self.mkdtemp() converter.save(output_saved_model_dir) expected_asset_file = os.path.join( - output_saved_model_dir, - ""assets/trt-serialized-engine."" + trt_engine_name) + output_saved_model_dir, ""assets/trt-serialized-engine.TRTEngineOp_0"") self.assertTrue(os.path.exists(expected_asset_file)) self.assertTrue(os.path.getsize(expected_asset_file)) @@ -666,9 +635,6 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase): converter = self._CreateConverterV2(input_saved_model_dir) converter.convert() - trt_engine_name = self._GetUniqueTRTEngineOp( - converter._converted_graph_def).name - def _InputFn(): yield np_input1, np_input2 @@ -679,7 +645,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase): def _DestroyCache(): with ops.device(""GPU:0""): handle = gen_trt_ops.create_trt_resource_handle( - resource_name=trt_engine_name) + resource_name=""TRTEngineOp_0"") gen_resource_variable_ops.destroy_resource_op( handle, ignore_lookup_error=False) ",0,train 2b559a9a086f7e8e79557c642c6d4f5115f855c5,tensorflow/tensorflow,"Improves constant shape inference for resource variables. PiperOrigin-RevId: 223367586",resource_variable_ops_test.py,"@@ -29,6 +29,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -137,6 +138,14 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): self.evaluate(v[0].assign(2.0)) self.assertAllEqual(self.evaluate(v), [2.0, 2.0]) + @test_util.run_in_graph_and_eager_modes + def testVariableShape(self): + v = resource_variable_ops.ResourceVariable([1., 1.]) + self.assertAllEqual( + tensor_util.constant_value( + resource_variable_ops.variable_shape(v.handle)), + [2]) + def testDifferentAssignGraph(self): with ops.Graph().as_default(): v = resource_variable_ops.ResourceVariable(1.0) ",0,train 2b559a9a086f7e8e79557c642c6d4f5115f855c5,tensorflow/tensorflow,"Improves constant shape inference for resource variables. PiperOrigin-RevId: 223367586",resource_variable_ops.py,"@@ -26,6 +26,7 @@ from tensorflow.core.framework import variable_pb2 from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context from tensorflow.python.eager import tape +from tensorflow.python.framework import constant_op from tensorflow.python.framework import cpp_shape_inference_pb2 from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -64,6 +65,7 @@ def eager_safe_variable_handle(shape, dtype, shared_name, name, graph_mode): name=name, container=container) if graph_mode: + handle._handle_data = get_resource_handle_data(handle) # pylint: disable=protected-access return handle # We do not want two distinct ResourceVariable objects for the same @@ -1410,13 +1412,23 @@ def _ReadGrad(_, grad): return grad +def variable_shape(handle, out_type=dtypes.int32): + if getattr( + handle, ""_handle_data"", None) is None or not handle._handle_data.is_set: + return gen_resource_variable_ops.variable_shape(handle, out_type=out_type) + shape_proto = handle._handle_data.shape_and_type[0].shape + if shape_proto.unknown_rank or any(x.size == -1 for x in shape_proto.dim): + return gen_resource_variable_ops.variable_shape(handle, out_type=out_type) + return constant_op.constant([x.size for x in shape_proto.dim], dtype=out_type) + + @ops.RegisterGradient(""ResourceGather"") def _GatherGrad(op, grad): """"""Gradient for gather op."""""" # Build appropriately shaped IndexedSlices handle = op.inputs[0] indices = op.inputs[1] - params_shape = gen_resource_variable_ops.variable_shape(handle) + params_shape = variable_shape(handle) size = array_ops.expand_dims(array_ops.size(indices), 0) values_shape = array_ops.concat([size, params_shape[1:]], 0) values = array_ops.reshape(grad, values_shape) ",0,train 4721480639b185cc9ce2eb1dbbcd25984a068453,tensorflow/tensorflow,spelling docstring for predict,training.py,"@@ -859,7 +859,7 @@ class Model(network.Network, version_utils.ModelVersionSelector): (Dataset, generator, Sequence) is given in the `Unpacking behavior for iterator-like inputs` section of `Model.fit`. batch_size: Integer or `None`. - Number of samples per gradient update. + Number of samples per batch. If unspecified, `batch_size` will default to 32. Do not specify the `batch_size` if your data is in the form of symbolic tensors, dataset, ",0,test b76fbd5d4d3ed92209e124746850004099687219,tensorflow/tensorflow,"remove left-over debug printf statement PiperOrigin-RevId: 266378933",fully_connected.cc,"@@ -61,7 +61,6 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, int exponent; QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent); data->output_shift = -exponent; - printf(""%d \n"", data->output_multiplier); TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( context, params->activation, output, &data->output_activation_min, &data->output_activation_max)); ",0,train 3acc8eaf602b3e9a009f54e1e0164644dd793831,tensorflow/tensorflow,"Add sanity check for resize-bilinear input shape. PiperOrigin-RevId: 245618186",resize_bilinear.cc,"@@ -40,9 +40,12 @@ TfLiteStatus ResizeOutputTensor(TfLiteContext* context, const TfLiteTensor* input, const TfLiteTensor* size, TfLiteTensor* output) { + const int32* size_data = GetTensorData(size); + // Sanity check, the up/down sampling size should always be positive. + TF_LITE_ENSURE(context, size_data[0] > 0); + TF_LITE_ENSURE(context, size_data[1] > 0); TfLiteIntArray* output_size = TfLiteIntArrayCreate(4); output_size->data[0] = input->dims->data[0]; - const int32* size_data = GetTensorData(size); output_size->data[1] = size_data[0]; output_size->data[2] = size_data[1]; output_size->data[3] = input->dims->data[3]; ",0,train 7cb3e1328e859187bd59879477f2dfb820fb98d4,tensorflow/tensorflow,"Add lowering for tf.XlaEinSum PiperOrigin-RevId: 400872318 Change-Id: I45d2e1bc35d98880e4eb668ea926072b1696a788",legalize_tf_with_tf2xla.cc,"@@ -266,7 +266,6 @@ bool IsOpAllowedTf2XlaFallback(Operation* op) { TypeID::get(), TypeID::get(), TypeID::get(), - TypeID::get(), TypeID::get(), TypeID::get(), TypeID::get(), ",0,train 7cb3e1328e859187bd59879477f2dfb820fb98d4,tensorflow/tensorflow,"Add lowering for tf.XlaEinSum PiperOrigin-RevId: 400872318 Change-Id: I45d2e1bc35d98880e4eb668ea926072b1696a788",xla_legalize_tf.cc,"@@ -164,6 +164,7 @@ const llvm::DenseSet &MlirPreferredOps() { TypeID::get(), TypeID::get(), TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), ",0,train 7cb3e1328e859187bd59879477f2dfb820fb98d4,tensorflow/tensorflow,"Add lowering for tf.XlaEinSum PiperOrigin-RevId: 400872318 Change-Id: I45d2e1bc35d98880e4eb668ea926072b1696a788",randomized_tests.cc,"@@ -3497,6 +3497,56 @@ TEST_F(OpTest, XlaDotV2) { }); } +TEST_F(OpTest, XlaEinsum) { + Repeatedly([this]() { + std::string equation; + std::vector lhs_dims, rhs_dims; + + enum EinsumType { matmul, batchmatmul, dot, outer }; + int op_kind = Choose({matmul, batchmatmul, dot, outer}); + switch (op_kind) { + case matmul: + case batchmatmul: { + std::vector dims; + if (op_kind == matmul) { + equation = ""ij,jk->ik""; + dims = RandomDims(2, 2); + } else { + equation = ""...ij,...jk->...ik""; + dims = RandomDims(2); + } + int64_t ndims = dims.size(); + int64_t inner_dim = RandomDim(); + lhs_dims = dims; + rhs_dims = dims; + lhs_dims[ndims - 1] = inner_dim; + rhs_dims[ndims - 2] = inner_dim; + break; + } + case dot: { + equation = ""i,i->""; + std::vector dims = RandomDims(1, 1); + lhs_dims = dims; + rhs_dims = dims; + break; + } + case outer: { + equation = ""i,j->ij""; + lhs_dims = RandomDims(1, 1); + rhs_dims = RandomDims(1, 1); + break; + } + } + + auto dtype = Choose(kAllXlaTypes); + return ExpectTfAndXlaOutputsAreClose(OpTestBuilder(""XlaEinsum"") + .RandomInput(dtype, lhs_dims) + .RandomInput(dtype, rhs_dims) + .Attr(""equation"", equation) + .Attr(""T"", dtype)); + }); +} + TEST_F(OpTest, ZerosLike) { GTEST_SKIP() << ""b/201095155""; Repeatedly([this]() { ",0,train 7cb3e1328e859187bd59879477f2dfb820fb98d4,tensorflow/tensorflow,"Add lowering for tf.XlaEinSum PiperOrigin-RevId: 400872318 Change-Id: I45d2e1bc35d98880e4eb668ea926072b1696a788",einsum_op.cc,"@@ -30,32 +30,8 @@ constexpr std::array kEinsumTypes = { {DT_INT32, DT_INT64, DT_UINT64, DT_HALF, DT_BFLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}}; -// Kernel which compiles XlaEinsum, an einsum op accepting two inputs. -class XlaEinsumOp : public XlaOpKernel { - public: - explicit XlaEinsumOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr(""equation"", &equation_)); - } - - ~XlaEinsumOp() override = default; - - void Compile(XlaOpKernelContext* ctx) override { - xla::XlaOp lhs = ctx->Input(0); - if (equation_.find(',') == equation_.npos) { - ctx->SetOutput(0, xla::Einsum(lhs, equation_)); - } else { - xla::XlaOp rhs = ctx->Input(1); - ctx->SetOutput(0, xla::Einsum(lhs, rhs, equation_)); - } - } - - private: - string equation_; - TF_DISALLOW_COPY_AND_ASSIGN(XlaEinsumOp); -}; - REGISTER_XLA_OP(Name(""XlaEinsum"").TypeConstraint(""T"", kEinsumTypes), - XlaEinsumOp); + MlirXlaOpKernel); REGISTER_XLA_OP(Name(""Einsum"").TypeConstraint(""T"", kEinsumTypes), MlirXlaOpKernel); ",0,train 87c225ef0e8b1eac47dac471c8b6307ebd1f79be,tensorflow/tensorflow,"Add verifier for HLO Iota op. Also fixes a bug in tf.RandomShuffle legalization caught by verifier. PiperOrigin-RevId: 296109247 Change-Id: Icea818f51a6eab91f65efb65aa07f9639d9704a6",hlo_ops.cc,"@@ -202,6 +202,20 @@ OpFoldResult IotaOp::fold(ArrayRef operands) { return DenseIntElementsAttr::get(output_type, values); } +static LogicalResult Verify(IotaOp op) { + auto shape = op.getType().cast(); + if (!shape.hasRank()) return success(); + + if (shape.getRank() == 0) + return op.emitOpError() << ""does not support scalars.""; + + auto iota_dimension = op.iota_dimension().getSExtValue(); + if (iota_dimension >= shape.getRank() || iota_dimension < 0) + return op.emitOpError() << ""iota dimension cannot go beyond the output "" + ""rank or be negative.""; + return success(); +} + //===----------------------------------------------------------------------===// // AbsOp //===----------------------------------------------------------------------===// ",0,train 87c225ef0e8b1eac47dac471c8b6307ebd1f79be,tensorflow/tensorflow,"Add verifier for HLO Iota op. Also fixes a bug in tf.RandomShuffle legalization caught by verifier. PiperOrigin-RevId: 296109247 Change-Id: Icea818f51a6eab91f65efb65aa07f9639d9704a6",legalize_tf.cc,"@@ -3362,7 +3362,7 @@ class ConvertRandomShuffleOp : public OpRewritePattern { auto indices_type = RankedTensorType::get({first_dim_size}, rewriter.getIntegerType(32)); Value indices = rewriter.create( - op.getLoc(), indices_type, rewriter.getI64IntegerAttr(first_dim_size)); + op.getLoc(), indices_type, rewriter.getI64IntegerAttr(0)); // Generate random numbers to be used as swaps for the indices. Value swaps = CreateRngUniform32(op.getLoc(), first_dim_size, 0, ",0,train d69515a21cae5bb002ec75783ef973ebf253fdec,tensorflow/tensorflow,"Add the capability of verbose logging for HTTP requests to the TensorFlow GCS client. This change allows the GCS client within TensorFlow to log verbose information about HTTP requests to the standard Google logger. (Actually, to the TensorFlow variant of the standard Google logger.) This capability is disabled by default. It is enabled by setting the environment variable GCS_LOG_HTTP_REQUEST_VERBOSE=1 (or =true). PiperOrigin-RevId: 179729641",curl_http_request.cc,"@@ -17,6 +17,7 @@ limitations under the License. #include ""tensorflow/core/platform/cloud/curl_http_request.h"" +#include ""third_party/absl/strings/string_view.h"" #include ""tensorflow/core/lib/core/errors.h"" #include ""tensorflow/core/lib/gtl/map_util.h"" #include ""tensorflow/core/lib/strings/scanner.h"" @@ -24,13 +25,12 @@ limitations under the License. #include ""tensorflow/core/platform/types.h"" #include ""tensorflow/core/public/version.h"" +using absl::string_view; + namespace tensorflow { namespace { -// Set to 1 to enable verbose debug output from curl. -constexpr uint64 kVerboseOutput = 0; - // Proxy to the real libcurl implementation. class LibCurlProxy : public LibCurl { public: @@ -114,6 +114,29 @@ class LibCurlProxy : public LibCurl { return ::curl_easy_strerror(errornum); } }; + +int CurlDebugCallback(CURL* handle, curl_infotype type, char* data, size_t size, + void* userptr) { + switch (type) { + case CURLINFO_HEADER_IN: + LOG(INFO) << ""< "" << string_view(data, size); + break; + + case CURLINFO_HEADER_OUT: + LOG(INFO) << ""> "" << string_view(data, size); + break; + + case CURLINFO_TEXT: + LOG(INFO) << ""* "" << string_view(data, size); + break; + + default: + // We are not currently interested in the other CURLINFO_* types. + break; + } + + return 0; +} } // namespace CurlHttpRequest::CurlHttpRequest() : CurlHttpRequest(LibCurlProxy::Load()) {} @@ -129,7 +152,6 @@ CurlHttpRequest::CurlHttpRequest(LibCurl* libcurl, Env* env) // default in //third_party:curl.BUILD and can be customized via an // environment variable. - libcurl_->curl_easy_setopt(curl_, CURLOPT_VERBOSE, kVerboseOutput); libcurl_->curl_easy_setopt( curl_, CURLOPT_USERAGENT, strings::StrCat(""TensorFlow/"", TF_VERSION_STRING).c_str()); @@ -164,6 +186,18 @@ CurlHttpRequest::~CurlHttpRequest() { } } +void CurlHttpRequest::SetVerboseLogging(bool enabled) { + if (enabled) { + libcurl_->curl_easy_setopt(curl_, CURLOPT_VERBOSE, static_cast(1)); + libcurl_->curl_easy_setopt(curl_, CURLOPT_DEBUGFUNCTION, + reinterpret_cast(CurlDebugCallback)); + } else { + libcurl_->curl_easy_setopt(curl_, CURLOPT_VERBOSE, static_cast(0)); + libcurl_->curl_easy_setopt(curl_, CURLOPT_DEBUGFUNCTION, + static_cast(nullptr)); + } +} + string CurlHttpRequest::EscapeString(const string& str) { char* out_char_str = libcurl_->curl_easy_escape(curl_, str.c_str(), 0); string out_str(out_char_str); ",0,train d69515a21cae5bb002ec75783ef973ebf253fdec,tensorflow/tensorflow,"Add the capability of verbose logging for HTTP requests to the TensorFlow GCS client. This change allows the GCS client within TensorFlow to log verbose information about HTTP requests to the standard Google logger. (Actually, to the TensorFlow variant of the standard Google logger.) This capability is disabled by default. It is enabled by setting the environment variable GCS_LOG_HTTP_REQUEST_VERBOSE=1 (or =true). PiperOrigin-RevId: 179729641",curl_http_request.h,"@@ -140,6 +140,8 @@ class CurlHttpRequest : public HttpRequest { void SetTimeouts(uint32 connection, uint32 inactivity, uint32 total) override; + void SetVerboseLogging(bool enabled) override; + private: /// A write callback in the form which can be accepted by libcurl. static size_t WriteCallback(const void* ptr, size_t size, size_t nmemb, ",0,train d69515a21cae5bb002ec75783ef973ebf253fdec,tensorflow/tensorflow,"Add the capability of verbose logging for HTTP requests to the TensorFlow GCS client. This change allows the GCS client within TensorFlow to log verbose information about HTTP requests to the standard Google logger. (Actually, to the TensorFlow variant of the standard Google logger.) This capability is disabled by default. It is enabled by setting the environment variable GCS_LOG_HTTP_REQUEST_VERBOSE=1 (or =true). PiperOrigin-RevId: 179729641",curl_http_request_test.cc,"@@ -318,6 +318,33 @@ TEST(CurlHttpRequestTest, GetRequest_Direct) { EXPECT_EQ(200, http_request.GetResponseCode()); } +TEST(CurlHttpRequestTest, GetRequest_Verbose) { + FakeLibCurl libcurl(""get response"", 200); + CurlHttpRequest http_request(&libcurl); + + std::vector scratch(kTestContent.begin(), kTestContent.end()); + scratch.reserve(100); + + http_request.SetVerboseLogging(true); + http_request.SetUri(""http://www.testuri.com""); + http_request.AddAuthBearerHeader(""fake-bearer""); + http_request.SetRange(100, 199); + http_request.SetResultBuffer(&scratch); + TF_EXPECT_OK(http_request.Send()); + + EXPECT_EQ(""get response"", string(scratch.begin(), scratch.end())); + + // Check interactions with libcurl. + EXPECT_TRUE(libcurl.is_initialized_); + EXPECT_EQ(""http://www.testuri.com"", libcurl.url_); + EXPECT_EQ(""100-199"", libcurl.range_); + EXPECT_EQ("""", libcurl.custom_request_); + EXPECT_EQ(1, libcurl.headers_->size()); + EXPECT_EQ(""Authorization: Bearer fake-bearer"", (*libcurl.headers_)[0]); + EXPECT_FALSE(libcurl.is_post_); + EXPECT_EQ(200, http_request.GetResponseCode()); +} + TEST(CurlHttpRequestTest, GetRequest_Empty) { FakeLibCurl libcurl("""", 200); CurlHttpRequest http_request(&libcurl); ",0,train d69515a21cae5bb002ec75783ef973ebf253fdec,tensorflow/tensorflow,"Add the capability of verbose logging for HTTP requests to the TensorFlow GCS client. This change allows the GCS client within TensorFlow to log verbose information about HTTP requests to the standard Google logger. (Actually, to the TensorFlow variant of the standard Google logger.) This capability is disabled by default. It is enabled by setting the environment variable GCS_LOG_HTTP_REQUEST_VERBOSE=1 (or =true). PiperOrigin-RevId: 179729641",gcs_dns_cache_test.cc,"@@ -56,6 +56,7 @@ class TestHttpRequest : public HttpRequest { void SetTimeouts(uint32 connection, uint32 inactivity, uint32 total) override {} + void SetVerboseLogging(bool enabled) override {} std::map resolve_overrides_; }; ",0,train d69515a21cae5bb002ec75783ef973ebf253fdec,tensorflow/tensorflow,"Add the capability of verbose logging for HTTP requests to the TensorFlow GCS client. This change allows the GCS client within TensorFlow to log verbose information about HTTP requests to the standard Google logger. (Actually, to the TensorFlow variant of the standard Google logger.) This capability is disabled by default. It is enabled by setting the environment variable GCS_LOG_HTTP_REQUEST_VERBOSE=1 (or =true). PiperOrigin-RevId: 179729641",gcs_file_system.cc,"@@ -25,6 +25,7 @@ limitations under the License. #ifdef _WIN32 #include //for _mktemp #endif +#include ""third_party/absl/strings/numbers.h"" #include ""include/json/json.h"" #include ""tensorflow/core/lib/core/errors.h"" #include ""tensorflow/core/lib/gtl/map_util.h"" @@ -117,6 +118,9 @@ constexpr char kReadRequestTimeout[] = ""GCS_READ_REQUEST_TIMEOUT_SECS""; // The environment variable to configure the overall request timeout for // upload requests. constexpr char kWriteRequestTimeout[] = ""GCS_WRITE_REQUEST_TIMEOUT_SECS""; +// If set to true, then each HTTP request will log verbose output. +// This is for debugging only. +constexpr char kLogHttpRequestVerbose[] = ""GCS_LOG_HTTP_REQUEST_VERBOSE""; // TODO: DO NOT use a hardcoded path Status GetTmpFilename(string* filename) { @@ -604,6 +608,10 @@ bool GetEnvVar(const char* varname, bool (*convert)(StringPiece, T*), return convert(env_value, value); } +bool SimpleAtob(StringPiece text, bool* result) { + return absl::SimpleAtob(absl::string_view(text.data(), text.size()), result); +} + } // namespace GcsFileSystem::GcsFileSystem() @@ -684,6 +692,11 @@ GcsFileSystem::GcsFileSystem() if (GetEnvVar(kWriteRequestTimeout, strings::safe_strtou32, &timeout_value)) { timeouts_.write = timeout_value; } + + bool log_verbose = false; + if (GetEnvVar(kLogHttpRequestVerbose, SimpleAtob, &log_verbose)) { + log_http_request_verbose_ = log_verbose; + } } GcsFileSystem::GcsFileSystem( @@ -1389,6 +1402,10 @@ Status GcsFileSystem::CreateHttpRequest(std::unique_ptr* request) { new_request->AddAuthBearerHeader(auth_token); + if (log_http_request_verbose_) { + new_request->SetVerboseLogging(true); + } + *request = std::move(new_request); return Status::OK(); } ",0,train d69515a21cae5bb002ec75783ef973ebf253fdec,tensorflow/tensorflow,"Add the capability of verbose logging for HTTP requests to the TensorFlow GCS client. This change allows the GCS client within TensorFlow to log verbose information about HTTP requests to the standard Google logger. (Actually, to the TensorFlow variant of the standard Google logger.) This capability is disabled by default. It is enabled by setting the environment variable GCS_LOG_HTTP_REQUEST_VERBOSE=1 (or =true). PiperOrigin-RevId: 179729641",gcs_file_system.h,"@@ -195,6 +195,9 @@ class GcsFileSystem : public FileSystem { /// The initial delay for exponential backoffs when retrying failed calls. const int64 initial_retry_delay_usec_ = 1000000L; + /// Controls whether we enable verbose logging in CurlHttpRequests. + bool log_http_request_verbose_ = false; + TF_DISALLOW_COPY_AND_ASSIGN(GcsFileSystem); }; ",0,train d69515a21cae5bb002ec75783ef973ebf253fdec,tensorflow/tensorflow,"Add the capability of verbose logging for HTTP requests to the TensorFlow GCS client. This change allows the GCS client within TensorFlow to log verbose information about HTTP requests to the standard Google logger. (Actually, to the TensorFlow variant of the standard Google logger.) This capability is disabled by default. It is enabled by setting the environment variable GCS_LOG_HTTP_REQUEST_VERBOSE=1 (or =true). PiperOrigin-RevId: 179729641",http_request.h,"@@ -140,6 +140,13 @@ class HttpRequest { virtual void SetTimeouts(uint32 connection, uint32 inactivity, uint32 total) = 0; + /// \brief Enables verbose logging for this HTTP request. + /// + /// The implementation is free to do whatever it wants with this request. + /// Generally, the implementation should use this as a hint to emit debug + /// logging somewhere. + virtual void SetVerboseLogging(bool enabled) = 0; + TF_DISALLOW_COPY_AND_ASSIGN(HttpRequest); }; ",0,train d69515a21cae5bb002ec75783ef973ebf253fdec,tensorflow/tensorflow,"Add the capability of verbose logging for HTTP requests to the TensorFlow GCS client. This change allows the GCS client within TensorFlow to log verbose information about HTTP requests to the standard Google logger. (Actually, to the TensorFlow variant of the standard Google logger.) This capability is disabled by default. It is enabled by setting the environment variable GCS_LOG_HTTP_REQUEST_VERBOSE=1 (or =true). PiperOrigin-RevId: 179729641",http_request_fake.h,"@@ -167,6 +167,8 @@ class FakeHttpRequest : public CurlHttpRequest { inactivity, "" "", total, ""\n""); } + virtual void SetVerboseLogging(bool enabled) override {} + private: string actual_request() const { string s; ",0,train 70743f654dc34f1765879f65b28d30e9d09c6954,tensorflow/tensorflow,"Delete test that loads weights between two models of different types. This test assumes the following: 1. The list of layers is the same between the two models --> This isn't always the case because functional models include the input layer in the list, while sequential models don't. or 2. The checkpointed weights are loaded in a specific order (this was the cause of the flakiness) PiperOrigin-RevId: 288607470 Change-Id: Ic8db57a65f4f4910e8d404ab108ff4d686c56f2b",models_test.py,"@@ -28,7 +28,6 @@ from tensorflow.python import keras from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_spec from tensorflow.python.keras import backend as K from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import metrics @@ -338,37 +337,6 @@ class CheckpointingTests(keras_parameterized.TestCase): model.load_weights(save_prefix) self.assertEqual(12., self.evaluate(beta1_power)) - @keras_parameterized.run_with_all_model_types(exclude_models=['subclass']) - def test_layer_tracking(self): - with self.cached_session(): - model = _get_model(input_shape=(4,)) - - if testing_utils.get_model_type() == 'subclass': - # Subclassed model must be built separately. - model._set_inputs(tensor_spec.TensorSpec((None, 4))) - - # Ensure that checkpoints are compatible with another model with the same - # layers, even if the model isn't built until after initialization. - layers = _get_layers(input_shape=None, add_input_layer=False) - model2 = models.Sequential(layers) - # Build model by calling it. - model2.predict_on_batch(np.random.random((10, 4))) - - model_path = os.path.join(self.get_temp_dir(), 'model_ckpt') - model.save_weights(model_path) - model2_path = os.path.join(self.get_temp_dir(), 'model2_ckpt') - model2.save_weights(model2_path) - - # Check that the checkpoints are compatible with both models. - model.load_weights(model2_path) - self.assertAllClose(self.evaluate(model.weights), - self.evaluate(model2.weights)) - - model.load_weights(model_path) - model2.load_weights(model_path) - self.assertAllClose(self.evaluate(model.weights), - self.evaluate(model2.weights)) - @keras_parameterized.run_all_keras_modes class TestModelBackend(keras_parameterized.TestCase): ",0,train d63e3ea9a26fc049c654a966d0ebc56bc2747729,tensorflow/tensorflow,"Rollback of ""Replace a few calls of Session `run` with `evaluate`"" for distribute_coordinator_test to fix breakage. PiperOrigin-RevId: 222017627",distribute_coordinator_test.py,"@@ -235,7 +235,7 @@ class DistributeCoordinatorTestBase(test.TestCase): result = math_ops.add_n(xs) variables.global_variables_initializer().run() - result_value = self.evaluate(result) + result_value = sess.run(result) self.assertEqual(result_value, expected) if result_value == expected: self._result_correct += 1 @@ -294,7 +294,7 @@ class DistributeCoordinatorTestBase(test.TestCase): if len(uninit_vars) == 0: break - self.evaluate(train_op) + sess.run(train_op) # Synchronize workers after one step to make sure they all have finished # training. @@ -327,7 +327,7 @@ class DistributeCoordinatorTestBase(test.TestCase): # The monitored session will run init or ready ops. with monitored_session.MonitoredSession() as sess: - self.evaluate(train_op) + sess.run(train_op) # Synchronize workers after one step to make sure they all have finished # training. ",0,train 5ea9724314362fe80760cf226addc7e4a2539493,tensorflow/tensorflow,"Minor typo fix. Change: 117611495",convolutional.py,"@@ -150,7 +150,7 @@ def main(argv=None): # pylint: disable=unused-argument shape=(EVAL_BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)) # The variables below hold all the trainable weights. They are passed an - # initial value which will be assigned when when we call: + # initial value which will be assigned when we call: # {tf.initialize_all_variables().run()} conv1_weights = tf.Variable( tf.truncated_normal([5, 5, NUM_CHANNELS, 32], # 5x5 filter, depth 32. ",0,train 7954fb8fd1104dd6e78781a895aea1022357da72,tensorflow/tensorflow,"Do not fail when TPUReplicateMetadata op is missing. The rationale behind this change is that functionalizing control flow in TF v1 models adds functions to function library with nodes that have '_tpu_replicate' attrribute. The TPU cluster formation pass throws error on these functions when it does not find the TPUReplicateMetadata op associated with a _tpu_replicate attribute. PiperOrigin-RevId: 292986754 Change-Id: I203c4e62db96c835bdad3f669500674dc4fce8c5",tpu_cluster_formation.cc,"@@ -414,10 +414,12 @@ LogicalResult FormClustersInBlock(Block* block, auto cluster_metadata = metadata_map.find(cluster.getFirst()); // No TPUReplicateMetadata for a `_tpu_replicate` attribute. - if (cluster_metadata == metadata_map.end()) - return cluster_ops.front()->emitError() - << ""TPUReplicateMetadata for associated '"" << kTPUReplicateAttr - << ""' attribute '"" << cluster.getFirst() << ""' is missing""; + if (cluster_metadata == metadata_map.end()) { + cluster_ops.front()->emitWarning() + << ""TPUReplicateMetadata for associated '"" << kTPUReplicateAttr + << ""' attribute '"" << cluster.getFirst() << ""' is missing""; + continue; + } llvm::SmallSetVector preceding_users = CollectClusterPrecedingUsers(block, cluster_ops); ",0,test f6e707ca2d5514fc21aa10c83e509f7077c73f4d,tensorflow/tensorflow,"Supporting conversion of argument attributes along their types. This fixes a bug: previously, during conversion function argument attributes were neither beings passed through nor converted. This fix extends DialectConversion to allow for simultaneous conversion of the function type and the argument attributes. This was important when lowering MLIR to LLVM where attribute information (e.g. noalias) needs to be preserved in MLIR(LLVMDialect). Longer run it seems reasonable that we want to convert both the function attribute and its type and the argument attributes, but that requires a small refactoring in Function.h to aggregate these three fields in an inner struct, which will require some discussion. PiperOrigin-RevId: 236709409",Function.h,"@@ -46,6 +46,10 @@ class Function : public llvm::ilist_node_with_parent { public: Function(Location location, StringRef name, FunctionType type, ArrayRef attrs = {}); + Function(Location location, StringRef name, FunctionType type, + ArrayRef attrs, + ArrayRef argAttrs); + ~Function(); /// The source location the function was defined or derived from. @@ -198,6 +202,10 @@ public: argAttrs[index].setAttrs(getContext(), attributes); } + /// Return all argument attributes of this function. + MutableArrayRef getAllArgAttrs() { return argAttrs; } + ArrayRef getAllArgAttrs() const { return argAttrs; } + /// Return the specified attribute if present, null otherwise. Attribute getAttr(Identifier name) const { return attrs.get(name); } Attribute getAttr(StringRef name) const { return attrs.get(name); } ",0,train f6e707ca2d5514fc21aa10c83e509f7077c73f4d,tensorflow/tensorflow,"Supporting conversion of argument attributes along their types. This fixes a bug: previously, during conversion function argument attributes were neither beings passed through nor converted. This fix extends DialectConversion to allow for simultaneous conversion of the function type and the argument attributes. This was important when lowering MLIR to LLVM where attribute information (e.g. noalias) needs to be preserved in MLIR(LLVMDialect). Longer run it seems reasonable that we want to convert both the function attribute and its type and the argument attributes, but that requires a small refactoring in Function.h to aggregate these three fields in an inner struct, which will require some discussion. PiperOrigin-RevId: 236709409",DialectConversion.h,"@@ -191,7 +191,9 @@ protected: /// The default behavior of this function is to call convertType on individual /// function operands and results, and then create a new MLIR function type /// from those. - virtual FunctionType convertFunctionSignatureType(FunctionType t); + virtual std::pair> + convertFunctionSignatureType(FunctionType t, + ArrayRef argAttrs); }; } // end namespace mlir ",0,train bf0b5b619d633fcff14cc11243297537e83d77d2,tensorflow/tensorflow,add an extra check in case the rank is static,py_builtins.py,"@@ -478,12 +478,16 @@ def _tf_sorted(iterable, key, reverse): direction = 'DESCENDING' if key is not UNSPECIFIED: mapped = parallel_ops.vectorized_map(key, iterable) + if mapped.shape.rank is not None and mapped.shape.rank != 1: + raise ValueError('sort only supports only 1D tensors') with ops.control_dependencies( - [check_ops.assert_rank_v2(mapped, 1, 'only support 1-D tensor')]): + [check_ops.assert_rank_v2(mapped, 1, 'sort only supports only 1D tensors')]): order = sort_ops.argsort(mapped, direction=direction) return array_ops.gather_v2(iterable, order) + if iterable.shape.rank is not None and iterable.shape.rank != 1: + raise ValueError('sort only supports only 1D tensors') with ops.control_dependencies( - [check_ops.assert_rank_v2(iterable, 1, 'only support 1-D tensor')]): + [check_ops.assert_rank_v2(iterable, 1, 'sort only supports only 1D tensors')]): return sort_ops.sort(iterable, direction=direction) ",0,train 6b8469f225837eff0ecc6a92cc74d1605f3d4dae,tensorflow/tensorflow,"Add go_backwards support for keras fused lstm PiperOrigin-RevId: 297287353 Change-Id: Idaebe3d0c84fc8be03651233a4af7c9cd46a23ca",lstm_utils.cc,"@@ -95,6 +95,14 @@ Value Transpose2D(OpBuilder* builder, Value value_to_transpose, return Transpose(builder, value_to_transpose, perm, type, location); } +Value Reverse(OpBuilder* builder, Value value_to_reverse, int axis, + RankedTensorType type, mlir::Location location) { + auto axis_op = CreateI32SplatConst(builder, {1}, axis, location); + // The result type will be the same as the input. + return builder->create(location, type, value_to_reverse, + axis_op); +} + ArrayRef GetRankedTensorShape(Value value) { return value.getType().cast().getShape(); } @@ -615,6 +623,16 @@ LogicalResult ConvertKerasLSTMLayer(mlir::FuncOp func_op, OpBuilder* builder) { final_input_type = final_inputs.getType().dyn_cast(); } + // Handle go_backwards: + // LSTM in Keras semantic will reverse the input sequence if it's go_backwards + auto go_backwards_attr = func_op.getAttrOfType(""tf.go_backwards""); + + if (go_backwards_attr != nullptr && go_backwards_attr.getValue()) { + // We assume input is already in {time, batch, size} layout. + final_inputs = + Reverse(builder, final_inputs, 0, final_input_type, func_op.getLoc()); + } + int batch = final_input_type.getDimSize(1); int time = final_input_type.getDimSize(0); ",0,train f72bf79d9da541165f88161bd0a973c085ec924d,tensorflow/tensorflow,"Automated rollback of commit 05b15600b3c4472ec79aa865ea1d313c87b68a21 PiperOrigin-RevId: 273779697",training.py,"@@ -291,6 +291,22 @@ class Model(network.Network): self._experimental_run_tf_function = kwargs.pop( 'experimental_run_tf_function', True) + # Prepare Session arguments (legacy). + kwargs.pop('cloning', None) # Legacy DistStrat argument, never used. + allowed_kwargs = {'feed_dict', 'fetches', 'options', 'run_metadata'} + unknown_kwargs = set(kwargs.keys()) - allowed_kwargs + if unknown_kwargs: + raise TypeError( + 'Invalid keyword argument(s) in `compile`: %s' % (unknown_kwargs,)) + self._function_kwargs = kwargs + if self._function_kwargs: + self._experimental_run_tf_function = False + if self.run_eagerly: + raise ValueError( + 'Session keyword arguments are not supported ' + 'when `run_eagerly=True`. You passed the following ' + 'Session arguments: %s' % (self._function_kwargs,)) + self._set_optimizer(optimizer) is_any_optimizer_v1 = any(isinstance(opt, optimizers.Optimizer) for opt in nest.flatten(self.optimizer)) @@ -416,8 +432,6 @@ class Model(network.Network): # Functions for train, test and predict will # be compiled lazily when required. # This saves time when the user is not using all functions. - self._function_kwargs = kwargs - self.train_function = None self.test_function = None self.predict_function = None ",0,train f72bf79d9da541165f88161bd0a973c085ec924d,tensorflow/tensorflow,"Automated rollback of commit 05b15600b3c4472ec79aa865ea1d313c87b68a21 PiperOrigin-RevId: 273779697",training_test.py,"@@ -244,6 +244,38 @@ class CompileTest(keras_parameterized.TestCase): run_eagerly=testing_utils.should_run_eagerly(), experimental_run_tf_function=testing_utils.should_run_tf_function()) + @keras_parameterized.run_all_keras_modes + def test_compile_with_session_kwargs(self): + model = testing_utils.get_small_sequential_mlp( + num_hidden=10, num_classes=2, input_dim=3) + + # Test that unknown arguments are not accepted + with self.assertRaisesRegexp( + TypeError, + r'Invalid keyword argument'): + model.compile( + optimizer='adam', + loss='mse', + foo=True) + + if testing_utils.should_run_eagerly(): + # Test that Session kwargs cannot be used with run_eagerly + with self.assertRaisesRegexp( + ValueError, + r'not supported when `run_eagerly=True`'): + model.compile( + optimizer='adam', + loss='mse', + run_eagerly=True, + feed_dict={}) + else: + # Test that Session kwargs trigger legacy path execution + model.compile( + optimizer='adam', + loss='mse', + feed_dict={}) + self.assertFalse(model._experimental_run_tf_function) + class TrainingTest(keras_parameterized.TestCase): ",0,train 87b040bcdf94d3b0799ad433c97e636b55f2d27b,tensorflow/tensorflow,"Free queues when Initialize() fails. All queue ops except for RandomShuffleQueue do not unref correctly when Initialize() fails. Since the caller does not free the queue when returned status is fail, this is memory leak. Fixing all other queue ops as done in RandomShuffleQueue. Change: 133673012",fifo_queue_op.cc,"@@ -50,8 +50,13 @@ class FIFOQueueOp : public QueueOp { return [this](QueueInterface** ret) { FIFOQueue* queue = new FIFOQueue(capacity_, component_types_, component_shapes_, cinfo_.name()); - *ret = queue; - return queue->Initialize(); + Status s = queue->Initialize(); + if (s.ok()) { + *ret = queue; + } else { + queue->Unref(); + } + return s; }; } ",0,train 87b040bcdf94d3b0799ad433c97e636b55f2d27b,tensorflow/tensorflow,"Free queues when Initialize() fails. All queue ops except for RandomShuffleQueue do not unref correctly when Initialize() fails. Since the caller does not free the queue when returned status is fail, this is memory leak. Fixing all other queue ops as done in RandomShuffleQueue. Change: 133673012",padding_fifo_queue_op.cc,"@@ -42,7 +42,8 @@ namespace tensorflow { // tensor of handles to Queues in the corresponding device. class PaddingFIFOQueueOp : public QueueOp { public: - explicit PaddingFIFOQueueOp(OpKernelConstruction* context) : QueueOp(context) { + explicit PaddingFIFOQueueOp(OpKernelConstruction* context) + : QueueOp(context) { OP_REQUIRES_OK(context, context->GetAttr(""shapes"", &component_shapes_)); for (const auto& shape : component_shapes_) { OP_REQUIRES(context, shape.dims() >= 0, @@ -56,8 +57,13 @@ class PaddingFIFOQueueOp : public QueueOp { return [this](QueueInterface** ret) { PaddingFIFOQueue* queue = new PaddingFIFOQueue( capacity_, component_types_, component_shapes_, cinfo_.name()); - *ret = queue; - return queue->Initialize(); + Status s = queue->Initialize(); + if (s.ok()) { + *ret = queue; + } else { + queue->Unref(); + } + return s; }; } ",0,train 87b040bcdf94d3b0799ad433c97e636b55f2d27b,tensorflow/tensorflow,"Free queues when Initialize() fails. All queue ops except for RandomShuffleQueue do not unref correctly when Initialize() fails. Since the caller does not free the queue when returned status is fail, this is memory leak. Fixing all other queue ops as done in RandomShuffleQueue. Change: 133673012",priority_queue_op.cc,"@@ -53,8 +53,13 @@ class PriorityQueueOp : public QueueOp { return [this](QueueInterface** ret) { PriorityQueue* queue = new PriorityQueue( capacity_, component_types_, component_shapes_, cinfo_.name()); - *ret = queue; - return queue->Initialize(); + Status s = queue->Initialize(); + if (s.ok()) { + *ret = queue; + } else { + queue->Unref(); + } + return s; }; } ",0,train 90393adbc0366515b9903407c9aa1a70799508c6,tensorflow/tensorflow,"Fix the forwardprop docstring Labels were incorrectly broadcasting against predictions in the regression examples Fixes #46848. PiperOrigin-RevId: 355742564 Change-Id: I532d3f19ed38e1e9c06a0b1d009bd867e0d25983",forwardprop.py,"@@ -234,12 +234,13 @@ class ForwardAccumulator(): Consider a simple linear regression: >>> x = tf.constant([[2.0, 3.0], [1.0, 4.0]]) + >>> targets = tf.constant([[1.], [-1.]]) >>> dense = tf.keras.layers.Dense(1) >>> dense.build([None, 2]) >>> with tf.autodiff.ForwardAccumulator( ... primals=dense.kernel, ... tangents=tf.constant([[1.], [0.]])) as acc: - ... loss = tf.reduce_sum((dense(x) - tf.constant([1., -1.])) ** 2.) + ... loss = tf.reduce_sum((dense(x) - targets) ** 2.) >>> acc.jvp(loss) @@ -258,9 +259,10 @@ class ForwardAccumulator(): invocations: >>> x = tf.constant([[2.0, 3.0], [1.0, 4.0]]) + >>> targets = tf.constant([[1.], [-1.]]) >>> dense = tf.keras.layers.Dense(1) >>> dense.build([None, 2]) - >>> loss_fn = lambda: tf.reduce_sum((dense(x) - tf.constant([1., -1.])) ** 2.) + >>> loss_fn = lambda: tf.reduce_sum((dense(x) - targets) ** 2.) >>> kernel_fprop = [] >>> with tf.autodiff.ForwardAccumulator( ... dense.kernel, tf.constant([[1.], [0.]])) as acc: ",0,train abd645085b1dd1496df847b05a1934d471a2f2c0,tensorflow/tensorflow,"Use the correct device ordinal to check whether the device the executable was built for is equivalent to the device the it will run on. Before this patch, if the device to run on was provided via a stream without setting the device ordinal in the ExecutableRunOptions, we would check the default device against the device the executable was built for. PiperOrigin-RevId: 206892902",local_client.cc,"@@ -101,11 +101,14 @@ Status LocalExecutable::ValidateExecutionOptions( } } - // Verify that the device the executable was built for is equivalent to the - // device it will run on. - int run_device_ordinal = run_options.device_ordinal() == -1 - ? backend_->default_device_ordinal() - : run_options.device_ordinal(); + // Verify that the device the executable was built for is equivalent + // to the device it will run on. + int run_device_ordinal = run_options.device_ordinal(); + if (run_device_ordinal == -1) { + run_device_ordinal = run_options.stream() != nullptr + ? run_options.stream()->parent()->device_ordinal() + : backend_->default_device_ordinal(); + } TF_ASSIGN_OR_RETURN(bool devices_equivalent, backend_->devices_equivalent( run_device_ordinal, build_options_.device_ordinal())); ",0,test 87bdd515ecd1991cb11d5dac654f47033f520768,tensorflow/tensorflow,Prefer tf.ones and tf.zeros over tf.fill,normalization.py,"@@ -1270,15 +1270,12 @@ class LayerNormalization(Layer): inputs = array_ops.reshape(inputs, squeezed_shape) - def _set_const_tensor(val, dtype, shape): - return array_ops.fill(shape, constant_op.constant(val, dtype=dtype)) - # self.gamma and self.beta have the wrong shape for fused_batch_norm, so # we cannot pass them as the scale and offset parameters. Therefore, we # create two constant tensors in correct shapes for fused_batch_norm and # later construct a separate calculation on the scale and offset. - scale = _set_const_tensor(1.0, self.dtype, [pre_dim]) - offset = _set_const_tensor(0.0, self.dtype, [pre_dim]) + scale = array_ops.ones([pre_dim], dtype=self.dtype) + offset = array_ops.zeros([pre_dim], dtype=self.dtype) # Compute layer normalization using the fused_batch_norm function. outputs, _, _ = nn.fused_batch_norm( ",0,train 87bdd515ecd1991cb11d5dac654f47033f520768,tensorflow/tensorflow,Prefer tf.ones and tf.zeros over tf.fill,array_grad.py,"@@ -77,10 +77,11 @@ def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index): # with 0's everywhere and 1 in the concat dim position. # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now) mask = array_ops.concat([ - array_ops.fill(array_ops.expand_dims(concat_dim, 0), 0), [1], - array_ops.fill(shape_of_shape - concat_dim - 1, 0) + array_ops.zeros(array_ops.expand_dims(concat_dim, 0), + dtype=dtypes.int32), [1], + array_ops.zeros(shape_of_shape - concat_dim - 1, dtype=dtypes.int32) ], 0) - begin = array_ops.fill(shape_of_shape, 0) + begin = array_ops.zeros(shape_of_shape, dtype=dtypes.int32) return mask, begin def _ExtractInputShapes(inputs): ",0,train 87bdd515ecd1991cb11d5dac654f47033f520768,tensorflow/tensorflow,Prefer tf.ones and tf.zeros over tf.fill,embedding_ops.py,"@@ -525,8 +525,8 @@ def embedding_lookup_sparse(params, embeddings = array_ops.gather(embeddings, idx) # Reshape weights to allow broadcast - ones = array_ops.fill( - array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1) + ones_shape = array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0) + ones = array_ops.ones(ones_shape, dtype=dtypes.int32) bcast_weights_shape = array_ops.concat([array_ops.shape(weights), ones], 0) ",0,train 87bdd515ecd1991cb11d5dac654f47033f520768,tensorflow/tensorflow,Prefer tf.ones and tf.zeros over tf.fill,gradients_util.py,"@@ -28,7 +28,6 @@ from tensorflow.python import pywrap_tfe from tensorflow.python.eager import backprop from tensorflow.python.eager import backprop_util from tensorflow.python.eager import context -from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import function as framework_function from tensorflow.python.framework import ops @@ -172,9 +171,8 @@ def _DefaultGradYs(grad_ys, ""Gradients of complex tensors must set grad_ys (y.dtype = %r)"" % y.dtype) new_grad_ys.append( - array_ops.fill( - array_ops.shape(y), - constant_op.constant(1, dtype=y.dtype, name=""grad_ys_%d"" % i))) + array_ops.ones( + array_ops.shape(y), dtype=y.dtype, name=""grad_ys_%d"" % i)) continue if y.dtype.is_floating or y.dtype.is_integer: if not grad_y.dtype.is_floating and not grad_y.dtype.is_integer: ",0,train 87bdd515ecd1991cb11d5dac654f47033f520768,tensorflow/tensorflow,Prefer tf.ones and tf.zeros over tf.fill,image_ops_impl.py,"@@ -5548,7 +5548,8 @@ def non_max_suppression_padded_v2(boxes, array_ops.gather(array_ops.reshape(sorted_indices, [-1]), gather_idx), [batch_size, -1]) - invalid_index = array_ops.fill([batch_size, max_output_size], 0) + invalid_index = array_ops.zeros( + [batch_size, max_output_size], dtype=dtypes.int32) idx_index = array_ops.expand_dims(math_ops.range(max_output_size), 0) num_valid_expanded = array_ops.expand_dims(num_valid, 1) idx = array_ops.where(idx_index < num_valid_expanded, ",0,train 87bdd515ecd1991cb11d5dac654f47033f520768,tensorflow/tensorflow,Prefer tf.ones and tf.zeros over tf.fill,math_grad.py,"@@ -330,9 +330,10 @@ def _SegmentMeanGrad(op, grad): input_rank = array_ops.rank(op.inputs[0]) ones_shape = array_ops.concat([ array_ops.shape(op.inputs[1]), - array_ops.fill(array_ops.expand_dims(input_rank - 1, 0), 1) + array_ops.ones( + array_ops.expand_dims(input_rank - 1, 0), dtype=dtypes.int32) ], 0) - ones = array_ops.fill(ones_shape, constant_op.constant(1, dtype=grad.dtype)) + ones = array_ops.ones(ones_shape, dtype=grad.dtype) scaled_grad = math_ops.divide(grad, math_ops.segment_sum(ones, op.inputs[1])) return array_ops.gather(scaled_grad, op.inputs[1]), None ",0,train 87bdd515ecd1991cb11d5dac654f47033f520768,tensorflow/tensorflow,Prefer tf.ones and tf.zeros over tf.fill,math_ops.py,"@@ -4169,7 +4169,7 @@ def reduced_shape(input_shape, axes): ], # [1, 2] [ input_shape, # [2, 3, 5, 7] - array_ops.fill(axes_shape, 1) + array_ops.ones(axes_shape, dtype=dtypes.int32) ]) # [1, 1] ",0,train e305ac4b75a9523bf047fdaef75159f13bd04b86,tensorflow/tensorflow,"[tflite] add int8 input/output to label_image More and more models, such as MobilenetV3's EdgeTPU ones, are using post-training full integer quantization. With this patch, I can get reasonable results. ./label_image_int8 -m mobilenet_edgetpu_224_1.0_int8.tflite Loaded model mobilenet_edgetpu_224_1.0_int8.tflite resolved reporter INFO: Initialized TensorFlow Lite runtime. invoked average time: 15.363 ms 0.867188: 653 military uniform 0.0390625: 835 suit 0.015625: 458 bow tie 0.0078125: 907 Windsor tie 0.00390625: 716 pickelhaube",bitmap_helpers.h,"@@ -31,10 +31,12 @@ void resize(T* out, uint8_t* in, int image_height, int image_width, int wanted_channels, Settings* s); // explicit instantiation -template void resize(uint8_t*, unsigned char*, int, int, int, int, int, - int, Settings*); template void resize(float*, unsigned char*, int, int, int, int, int, int, Settings*); +template void resize(int8_t*, unsigned char*, int, int, int, int, int, + int, Settings*); +template void resize(uint8_t*, unsigned char*, int, int, int, int, int, + int, Settings*); } // namespace label_image } // namespace tflite ",0,train e305ac4b75a9523bf047fdaef75159f13bd04b86,tensorflow/tensorflow,"[tflite] add int8 input/output to label_image More and more models, such as MobilenetV3's EdgeTPU ones, are using post-training full integer quantization. With this patch, I can get reasonable results. ./label_image_int8 -m mobilenet_edgetpu_224_1.0_int8.tflite Loaded model mobilenet_edgetpu_224_1.0_int8.tflite resolved reporter INFO: Initialized TensorFlow Lite runtime. invoked average time: 15.363 ms 0.867188: 653 military uniform 0.0390625: 835 suit 0.015625: 458 bow tie 0.0078125: 907 Windsor tie 0.00390625: 716 pickelhaube",bitmap_helpers_impl.h,"@@ -82,10 +82,19 @@ void resize(T* out, uint8_t* in, int image_height, int image_width, auto output_number_of_pixels = wanted_height * wanted_width * wanted_channels; for (int i = 0; i < output_number_of_pixels; i++) { - if (s->input_floating) - out[i] = (output[i] - s->input_mean) / s->input_std; - else - out[i] = (uint8_t)output[i]; + switch (s->input_type) { + case kTfLiteFloat32: + out[i] = (output[i] - s->input_mean) / s->input_std; + break; + case kTfLiteInt8: + out[i] = output[i] - 128; + break; + case kTfLiteUInt8: + out[i] = output[i]; + break; + default: + break; + } } } ",0,train e305ac4b75a9523bf047fdaef75159f13bd04b86,tensorflow/tensorflow,"[tflite] add int8 input/output to label_image More and more models, such as MobilenetV3's EdgeTPU ones, are using post-training full integer quantization. With this patch, I can get reasonable results. ./label_image_int8 -m mobilenet_edgetpu_224_1.0_int8.tflite Loaded model mobilenet_edgetpu_224_1.0_int8.tflite resolved reporter INFO: Initialized TensorFlow Lite runtime. invoked average time: 15.363 ms 0.867188: 653 military uniform 0.0390625: 835 suit 0.015625: 458 bow tie 0.0078125: 907 Windsor tie 0.00390625: 716 pickelhaube",get_top_n.h,"@@ -27,10 +27,12 @@ void get_top_n(T* prediction, int prediction_size, size_t num_results, bool input_floating); // explicit instantiation so that we can use them otherwhere -template void get_top_n(uint8_t*, int, size_t, float, - std::vector>*, bool); template void get_top_n(float*, int, size_t, float, - std::vector>*, bool); + std::vector>*, int); +template void get_top_n(int8_t*, int, size_t, float, + std::vector>*, int); +template void get_top_n(uint8_t*, int, size_t, float, + std::vector>*, int); } // namespace label_image } // namespace tflite ",0,train e305ac4b75a9523bf047fdaef75159f13bd04b86,tensorflow/tensorflow,"[tflite] add int8 input/output to label_image More and more models, such as MobilenetV3's EdgeTPU ones, are using post-training full integer quantization. With this patch, I can get reasonable results. ./label_image_int8 -m mobilenet_edgetpu_224_1.0_int8.tflite Loaded model mobilenet_edgetpu_224_1.0_int8.tflite resolved reporter INFO: Initialized TensorFlow Lite runtime. invoked average time: 15.363 ms 0.867188: 653 military uniform 0.0390625: 835 suit 0.015625: 458 bow tie 0.0078125: 907 Windsor tie 0.00390625: 716 pickelhaube",get_top_n_impl.h,"@@ -30,19 +30,30 @@ extern bool input_floating; template void get_top_n(T* prediction, int prediction_size, size_t num_results, float threshold, std::vector>* top_results, - bool input_floating) { + int input_type) { // Will contain top N results in ascending order. std::priority_queue, std::vector>, std::greater>> top_result_pq; const long count = prediction_size; // NOLINT(runtime/int) + float value = 0.0; + for (int i = 0; i < count; ++i) { - float value; - if (input_floating) - value = prediction[i]; - else - value = prediction[i] / 255.0; + switch (input_type) { + case kTfLiteFloat32: + value = prediction[i]; + break; + case kTfLiteInt8: + // value = prediction[i] / 128.0; + value = (prediction[i] + 128) / 256.0; + break; + case kTfLiteUInt8: + value = prediction[i] / 255.0; + break; + default: + break; + } // Only add it if it beats the threshold and has a chance at being in // the top N. if (value < threshold) { ",0,train e305ac4b75a9523bf047fdaef75159f13bd04b86,tensorflow/tensorflow,"[tflite] add int8 input/output to label_image More and more models, such as MobilenetV3's EdgeTPU ones, are using post-training full integer quantization. With this patch, I can get reasonable results. ./label_image_int8 -m mobilenet_edgetpu_224_1.0_int8.tflite Loaded model mobilenet_edgetpu_224_1.0_int8.tflite resolved reporter INFO: Initialized TensorFlow Lite runtime. invoked average time: 15.363 ms 0.867188: 653 military uniform 0.0390625: 835 suit 0.015625: 458 bow tie 0.0078125: 907 Windsor tie 0.00390625: 716 pickelhaube",label_image.cc,"@@ -221,13 +221,18 @@ void RunInference(Settings* s) { int wanted_width = dims->data[2]; int wanted_channels = dims->data[3]; - switch (interpreter->tensor(input)->type) { + s->input_type = interpreter->tensor(input)->type; + switch (s->input_type) { case kTfLiteFloat32: - s->input_floating = true; resize(interpreter->typed_tensor(input), in.data(), image_height, image_width, image_channels, wanted_height, wanted_width, wanted_channels, s); break; + case kTfLiteInt8: + resize(interpreter->typed_tensor(input), in.data(), + image_height, image_width, image_channels, wanted_height, + wanted_width, wanted_channels, s); + break; case kTfLiteUInt8: resize(interpreter->typed_tensor(input), in.data(), image_height, image_width, image_channels, wanted_height, @@ -238,7 +243,6 @@ void RunInference(Settings* s) { << interpreter->tensor(input)->type << "" yet""; exit(-1); } - auto profiler = absl::make_unique(s->max_profiling_buffer_entries); interpreter->SetProfiler(profiler.get()); @@ -290,16 +294,22 @@ void RunInference(Settings* s) { switch (interpreter->tensor(output)->type) { case kTfLiteFloat32: get_top_n(interpreter->typed_output_tensor(0), output_size, - s->number_of_results, threshold, &top_results, true); + s->number_of_results, threshold, &top_results, + s->input_type); + break; + case kTfLiteInt8: + get_top_n(interpreter->typed_output_tensor(0), + output_size, s->number_of_results, threshold, + &top_results, s->input_type); break; case kTfLiteUInt8: get_top_n(interpreter->typed_output_tensor(0), output_size, s->number_of_results, threshold, - &top_results, false); + &top_results, s->input_type); break; default: LOG(FATAL) << ""cannot handle output type "" - << interpreter->tensor(input)->type << "" yet""; + << interpreter->tensor(output)->type << "" yet""; exit(-1); } ",0,train e305ac4b75a9523bf047fdaef75159f13bd04b86,tensorflow/tensorflow,"[tflite] add int8 input/output to label_image More and more models, such as MobilenetV3's EdgeTPU ones, are using post-training full integer quantization. With this patch, I can get reasonable results. ./label_image_int8 -m mobilenet_edgetpu_224_1.0_int8.tflite Loaded model mobilenet_edgetpu_224_1.0_int8.tflite resolved reporter INFO: Initialized TensorFlow Lite runtime. invoked average time: 15.363 ms 0.867188: 653 military uniform 0.0390625: 835 suit 0.015625: 458 bow tie 0.0078125: 907 Windsor tie 0.00390625: 716 pickelhaube",label_image.h,"@@ -26,7 +26,7 @@ struct Settings { bool verbose = false; bool accel = false; bool old_accel = false; - bool input_floating = false; + int input_type = kTfLiteFloat32; bool profiling = false; bool allow_fp16 = false; bool gl_backend = false; @@ -37,7 +37,6 @@ struct Settings { tflite::FlatBufferModel* model; string input_bmp_name = ""./grace_hopper.bmp""; string labels_file_name = ""./labels.txt""; - string input_layer_type = ""uint8_t""; int number_of_threads = 4; int number_of_results = 5; int max_profiling_buffer_entries = 1024; ",0,train cdb5cd1786f295e699789cc822bca2e52a4cb81c,tensorflow/tensorflow,"Do not use fused batch norm in the 5D case. https://github.com/tensorflow/tensorflow/commit/27d26a8d86bceda282ad9ba3e3116a00759d4ebc added support for using fused batch norm for 5D tensors, but this caused a regression in UNet. It's unclear why, but perhaps it is due to the fact fused batch norm uses Bessel's correction and nonfused batch norm does not. PiperOrigin-RevId: 342728653 Change-Id: I23c705c73ac4f55c1c799d1530d1e9c6a9928ea0",normalization.py,"@@ -248,6 +248,7 @@ class BatchNormalizationBase(Layer): axis = [self.axis] if isinstance(self.axis, int) else self.axis # Axis -3 is equivalent to 1, and axis -1 is equivalent to 3, because the # input rank is required to be 4 (which is checked later). + # TODO(b/173253101): Once the input rank can be 5, update this check. if len(axis) > 1 or axis[0] not in (-3, -1, 1, 3): raise ValueError('Passing `fused=True` is only supported when axis is 1 ' 'or 3. Got axis %s' % (axis,)) @@ -331,16 +332,19 @@ class BatchNormalizationBase(Layer): # TODO(yaozhang): if input is not 4D, reshape it to 4D and reshape the # output back to its original shape accordingly. if self._USE_V2_BEHAVIOR: + # TODO(b/173253101): Using fused in the 5D case is currently disabled + # due to a regression on UNet, so it is only currently only supported in + # the 4D case. if self.fused is None: - self.fused = ndims in (4, 5) - elif self.fused and ndims not in (4, 5): + self.fused = ndims == 4 + elif self.fused and ndims != 4: raise ValueError('Batch normalization layers with `fused=True` only ' 'support 4D or 5D input tensors. ' 'Received tensor with shape: %s' % (tuple(input_shape),)) else: assert self.fused is not None - self.fused = (ndims in (4, 5) and self._fused_can_be_used()) + self.fused = (ndims == 4 and self._fused_can_be_used()) # TODO(chrisying): fused batch norm is currently not supported for # multi-axis batch norm and by extension virtual batches. In some cases, # it might be possible to use fused batch norm but would require reshaping ",0,train cdb5cd1786f295e699789cc822bca2e52a4cb81c,tensorflow/tensorflow,"Do not use fused batch norm in the 5D case. https://github.com/tensorflow/tensorflow/commit/27d26a8d86bceda282ad9ba3e3116a00759d4ebc added support for using fused batch norm for 5D tensors, but this caused a regression in UNet. It's unclear why, but perhaps it is due to the fact fused batch norm uses Bessel's correction and nonfused batch norm does not. PiperOrigin-RevId: 342728653 Change-Id: I23c705c73ac4f55c1c799d1530d1e9c6a9928ea0",normalization_test.py,"@@ -241,6 +241,31 @@ class BatchNormalizationTest(keras_parameterized.TestCase): self.assertAllClose(model.bn.moving_mean.numpy(), [0.047], atol=3e-3) self.assertAllClose(model.bn.moving_variance.numpy(), [0.9], atol=3e-2) + @combinations.generate(combinations.combine(mode=['eager'])) + def test_bessels_correction(self): + # Bessel's correction is currently only used in the fused case. In the + # future, it may be used in the nonfused case as well. + + x = constant_op.constant([0., 2.], shape=[2, 1, 1, 1]) + layer = normalization_v2.BatchNormalization( + momentum=0.5, moving_variance_initializer='zeros') + layer(x, training=True) + self.assertTrue(layer.fused) + # Since fused is used, Bessel's correction is used. The variance of [0, 2] + # is 2 with Bessel's correction. Since the momentum is 0.5, the variance is + # 2 * 0.5 == 1. + self.assertAllEqual(self.evaluate(layer.moving_variance), [1.]) + + x = constant_op.constant([0., 2.], shape=[2, 1, 1, 1, 1]) + layer = normalization_v2.BatchNormalization( + momentum=0.5, moving_variance_initializer='zeros') + layer(x, training=True) + self.assertFalse(layer.fused) + # Since fused is not used, Bessel's correction is not used. The variance of + # [0, 2] is 1 without Bessel's correction. Since the momentum is 0.5, the + # variance is 1 * 0.5 == 0.5. + self.assertAllEqual(self.evaluate(layer.moving_variance), [0.5]) + class BatchNormalizationV1Test(keras_parameterized.TestCase): @@ -291,6 +316,12 @@ class BatchNormalizationV2Test(keras_parameterized.TestCase): norm(inp) self.assertEqual(norm.fused, False) + norm = normalization_v2.BatchNormalization() + self.assertIsNone(norm.fused) + inp = keras.layers.Input(shape=(4, 4, 4, 4)) + norm(inp) + self.assertEqual(norm.fused, False) + norm = normalization_v2.BatchNormalization(virtual_batch_size=2) self.assertEqual(norm.fused, False) inp = keras.layers.Input(shape=(4, 4, 4)) ",0,train 193ff560137f885a18398e83d4d490b0f9ea610a,tensorflow/tensorflow,"[XLA] [NFC] Add more information to buffer assignment: print parameter and output shape PiperOrigin-RevId: 285440385 Change-Id: Ia993b9fd0218820403e4d9edb2da44e72c8677fd",buffer_assignment.cc,"@@ -298,6 +298,38 @@ static bool CompareHloValuesById(const HloValue* a, const HloValue* b) { return a->id() < b->id(); } +// Returns parameter instruction corresponding to the allocation or nullptr. +static const HloInstruction* GetEntryParameterInstruction( + const BufferAllocation& alloc) { + for (const auto& p : alloc.assigned_buffers()) { + const HloValue* value = p.first; + const HloInstruction* instr = value->instruction(); + if (instr->opcode() == HloOpcode::kParameter && + instr->parent() == instr->parent()->parent()->entry_computation()) { + return instr; + } + } + return nullptr; +} + +// Returns root module output instruction corresponding to the allocation or +// nullptr. +static const HloInstruction* GetOutputInstruction( + const BufferAllocation& alloc) { + for (const auto& p : alloc.assigned_buffers()) { + const HloValue* value = p.first; + for (const HloPosition& position : value->positions()) { + const HloInstruction* instr = position.instruction; + if (position.index.empty() && + instr->parent()->root_instruction() == instr && + instr->parent()->IsEntryComputation()) { + return instr; + } + } + } + return nullptr; +} + string BufferAllocation::ToString() const { string output; StrAppendFormat(&output, ""allocation %d: %p, size %d"", index_, this, size()); @@ -305,8 +337,15 @@ string BufferAllocation::ToString() const { StrAppend(&output, "", color "", color().value()); } if (is_entry_computation_parameter()) { - StrAppend(&output, "", parameter "", parameter_number(), "" at ShapeIndex "", - param_shape_index().ToString()); + const HloInstruction* param = GetEntryParameterInstruction(*this); + CHECK(param); + StrAppend(&output, "", parameter "", parameter_number(), "", shape |"", + param->shape().ToString(/*print_layout=*/false), + ""| at ShapeIndex "", param_shape_index().ToString()); + } + if (const HloInstruction* instr = GetOutputInstruction(*this)) { + StrAppend(&output, "", output shape is |"", + instr->shape().ToString(/*print_layout=*/false), ""|""); } if (is_constant()) { StrAppend(&output, "", constant""); ",0,train 452952e289084468d06431db433ce5fbd031dfac,tensorflow/tensorflow,"Automated rollback of commit 31df1ce7dee077a5acaba2ddd43959665a8ae323 PiperOrigin-RevId: 235552900",callbacks.py,"@@ -1222,8 +1222,6 @@ class TensorBoard(Callback): with self._train_writer.as_default(): with summary_ops_v2.always_record_summaries(): summary_ops_v2.graph(K.get_graph()) - if self.model._is_graph_network: # pylint: disable=protected-access - summary_ops_v2.keras_model('keras', self.model, step=0) def _close_writers(self): """"""Close all remaining open file writers owned by this callback. ",0,train a5ac44a0da3fb5e325195577149f27a4dae9ae4a,tensorflow/tensorflow,Add GetNumberOfEngineInputs function,utils.cc,"@@ -165,5 +165,21 @@ string GetLoadedTensorRTVersion() { return absl::StrCat(major, ""."", minor, ""."", patch); } +int GetNumberOfEngineInputs( + const nvinfer1::ICudaEngine *engine) { + int n_bindings = engine->getNbBindings(); + int n_input = 0; + for (int i=0; i < n_bindings; i++) { + if (engine->bindingIsInput(i)) n_input++; + } + // According to TensorRT 7 doc: ""If the engine has been built for K profiles, + // the first getNbBindings() / K bindings are used by profile number 0, the + // following getNbBindings() / K bindings are used by profile number 1 etc."" + // Therefore, to get the number of input tensors, we need to divide by the + // the number of profiles. + int n_profiles = engine->getNbOptimizationProfiles(); + return n_input / n_profiles; +} + } // namespace tensorrt } // namespace tensorflow ",0,train a5ac44a0da3fb5e325195577149f27a4dae9ae4a,tensorflow/tensorflow,Add GetNumberOfEngineInputs function,utils.h,"@@ -106,6 +106,11 @@ string GetLinkedTensorRTVersion(); // TensorRT library version information {Maj, Min, Patch}. string GetLoadedTensorRTVersion(); +// Returns the number of inputs for the engine, which also correspends to the +// number of input tensors for the network. This can differ from the number of +// input bindings, because each profile has a set of bindings. +int GetNumberOfEngineInputs(const nvinfer1::ICudaEngine *engine); + #endif // GOOGLE_CUDA && GOOGLE_TENSORRT } // namespace tensorrt ",0,train 63c1befb8930f8f5f34ae9f2b10e8ae870493d86,tensorflow/tensorflow,Improve docs for tf.nn.depthwise_conv2d_native,nn_ops.cc,"@@ -831,11 +831,13 @@ a different filter to each input channel (expanding from 1 channel to `channel_multiplier` channels for each), then concatenates the results together. Thus, the output has `in_channels * channel_multiplier` channels. +``` for k in 0..in_channels-1 for q in 0..channel_multiplier-1 output[b, i, j, k * channel_multiplier + q] = sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] * filter[di, dj, k, q] +``` Must have `strides[0] = strides[3] = 1`. For the most common case of the same horizontal and vertices strides, `strides = [1, stride, stride, 1]`. ",0,train 1e1f1aa3c2a505c6d44051291b7bf1b05a0923f7,tensorflow/tensorflow,Update network_test.py,network_test.py,"@@ -131,8 +131,8 @@ class NetworkConstructionTest(keras_parameterized.TestCase): def test_get_layer(self): # create a simple network x = input_layer_lib.Input(shape=(32,)) - dense_a = keras.layers.Dense(4, name='dense_a') - dense_b = keras.layers.Dense(2, name='dense_b') + dense_a = layers.Dense(4, name='dense_a') + dense_b = layers.Dense(2, name='dense_b') y = dense_b(dense_a(x)) network = network_lib.Network(x, y, name='dense_network') ",0,test b51bba348602ef9e9d4e6269d019a40ec3d74c30,tensorflow/tensorflow,"Qualify uses of std::string PiperOrigin-RevId: 324233556 Change-Id: I5537b9bd231c0ea1f267406a9b19ec943928e8cd",device_name_utils.h,"@@ -46,8 +46,8 @@ namespace tensorflow { class DeviceNameUtils { public: // Returns a fully qualified device name given the parameters. - static string FullName(const string& job, int replica, int task, - const string& type, int id); + static std::string FullName(const std::string& job, int replica, int task, + const std::string& type, int id); struct ParsedName { void Clear() { @@ -79,13 +79,13 @@ class DeviceNameUtils { } bool has_job = false; - string job; + std::string job; bool has_replica = false; int replica = 0; bool has_task = false; int task = 0; bool has_type = false; - string type; + std::string type; bool has_id = false; int id = 0; }; @@ -107,7 +107,7 @@ class DeviceNameUtils { // an error and *canonical_name is set to """". static Status CanonicalizeDeviceName(StringPiece fullname, StringPiece basename, - string* canonical_name); + std::string* canonical_name); // Returns true if ""name"" specifies any non-trivial constraint on the device. static bool HasSomeDetails(const ParsedName& name) { @@ -163,11 +163,11 @@ class DeviceNameUtils { static const ParsedName AddressSpace(const ParsedName& name); // Returns the local device given its ""type"" and ""id"". - static string LocalName(StringPiece type, int id); + static std::string LocalName(StringPiece type, int id); // Returns a short local device name (cpu:0, gpu:1, etc) based on // the given fullname. - static string LocalName(StringPiece fullname); + static std::string LocalName(StringPiece fullname); // If ""name"" is a valid local device name (cpu:0, gpu:1, etc.), // fills in parsed.type and parsed.id accordingly. Returns true iff @@ -181,13 +181,14 @@ class DeviceNameUtils { // component into *device. This function will still return true if // the task component is empty, but it requires the relative device // component to be fully specified. - static bool SplitDeviceName(StringPiece name, string* task, string* device); + static bool SplitDeviceName(StringPiece name, std::string* task, + std::string* device); // Get the task name from ParsedName. Return false if the task component is // not fully specified. - static bool GetTaskName(const ParsedName& pn, string* task); + static bool GetTaskName(const ParsedName& pn, std::string* task); - static string ParsedNameToString(const ParsedName& pn); + static std::string ParsedNameToString(const ParsedName& pn); // Returns canonical and legacy full names for the given parsed // device name 'pn'. The returned string names are often useful to @@ -202,8 +203,8 @@ class DeviceNameUtils { // Returns name of the CPU:0 device on the same host as the device // `device_name`. - static Status DeviceNameToCpuDeviceName(const string& device_name, - string* host_device_name); + static Status DeviceNameToCpuDeviceName(const std::string& device_name, + std::string* host_device_name); }; std::ostream& operator<<(std::ostream& os, ",0,train b51bba348602ef9e9d4e6269d019a40ec3d74c30,tensorflow/tensorflow,"Qualify uses of std::string PiperOrigin-RevId: 324233556 Change-Id: I5537b9bd231c0ea1f267406a9b19ec943928e8cd",padding.h,"@@ -53,12 +53,12 @@ Status CheckValidPadding(Padding padding_type, // Return the string containing the list of valid padding types, that can be // used as an Attr() in REGISTER_OP. -string GetPaddingAttrString(); +std::string GetPaddingAttrString(); // Like GetPaddingAttrString(), but also includes EXPLICIT. -string GetPaddingAttrStringWithExplicit(); +std::string GetPaddingAttrStringWithExplicit(); -string GetExplicitPaddingsAttrString(); +std::string GetExplicitPaddingsAttrString(); // Sets padding value based on the given string padding value. Status GetPaddingFromString(StringPiece str_value, Padding* value); ",0,train b51bba348602ef9e9d4e6269d019a40ec3d74c30,tensorflow/tensorflow,"Qualify uses of std::string PiperOrigin-RevId: 324233556 Change-Id: I5537b9bd231c0ea1f267406a9b19ec943928e8cd",tensor_format.h,"@@ -97,18 +97,18 @@ enum FilterTensorFormat { // Parse tensor format from the given string. // Return true if the parsing succeeds, and false if it fails. -bool FormatFromString(const string& format_str, TensorFormat* format); +bool FormatFromString(const std::string& format_str, TensorFormat* format); // Parse tensor format from the given string. // Return true if the parsing succeeds, and false if it fails. -bool FilterFormatFromString(const string& format_str, +bool FilterFormatFromString(const std::string& format_str, FilterTensorFormat* format); // Convert a tensor format into string. -string ToString(TensorFormat format); +std::string ToString(TensorFormat format); // Convert a filter tensor format into string. -string ToString(FilterTensorFormat format); +std::string ToString(FilterTensorFormat format); // Returns the number of spatial dims of a tensor of rank 'num_dims' and tensor // format 'format'. @@ -504,13 +504,13 @@ inline void GetExplicitPaddingForDim( } // Return the string that specifies the data format for convnet operations. -string GetConvnetDataFormatAttrString(); -string GetConvnet3dDataFormatAttrString(); +std::string GetConvnetDataFormatAttrString(); +std::string GetConvnet3dDataFormatAttrString(); // Return the string that specifies the filter format for convnet operations. -string GetConvnetFilterFormatAttrString(); -string GetConvnet3dFilterFormatAttrString(); -string GetConvnetDataFormat2D3DAttrString(); +std::string GetConvnetFilterFormatAttrString(); +std::string GetConvnet3dFilterFormatAttrString(); +std::string GetConvnetDataFormat2D3DAttrString(); // Returns a tensor shape for the specified format and dimension sizes. // Works for both 2D and 3D operations. The output shapes are as follows: ",0,train b51bba348602ef9e9d4e6269d019a40ec3d74c30,tensorflow/tensorflow,"Qualify uses of std::string PiperOrigin-RevId: 324233556 Change-Id: I5537b9bd231c0ea1f267406a9b19ec943928e8cd",util.h,"@@ -49,12 +49,12 @@ class MovingAverage { // Returns a string printing bytes in ptr[0..n). The output looks // like ""00 01 ef cd cd ef"". -string PrintMemory(const char* ptr, size_t n); +std::string PrintMemory(const char* ptr, size_t n); // Given a flattened index into a tensor, computes a string s so that // StrAppend(""tensor"", s) is a Python indexing expression. E.g., // ""tensor"", ""tensor[i]"", ""tensor[i, j]"", etc. -string SliceDebugString(const TensorShape& shape, const int64 flat); +std::string SliceDebugString(const TensorShape& shape, const int64 flat); // disable MKL in runtime #ifdef INTEL_MKL ",0,train eb2f6d0410c70a383b60505cea518758d910a006,tensorflow/tensorflow,"VLOG(2) instead of VLOG(1) for detailed op printouts. PiperOrigin-RevId: 157291238",virtual_scheduler.cc,"@@ -475,9 +475,9 @@ Costs VirtualScheduler::Summary() const { } // Also log the op description and their corresponding counts. - VLOG(1) << ""Node description, counts, cost:""; + VLOG(2) << ""Node description, counts, cost:""; for (const auto& item : op_counts_) { - VLOG(1) << ""Node: "" << item.first << "", Count: "" << item.second + VLOG(2) << ""Node: "" << item.first << "", Count: "" << item.second << "", Individual Cost: "" << op_costs_.at(item.first); } ",0,train d2ce989ee65ce40d8cba8e446eaf64f8a5105adf,tensorflow/tensorflow,"Update tensorflow/core/kernels/data/experimental/snapshot_util.cc Co-authored-by: Mihai Maruseac ",snapshot_util.cc,"@@ -514,8 +514,8 @@ class Reader::NestedDataset : public DatasetBase { Status GetNextInternal(IteratorContext* ctx, std::vector* out_tensors, bool* end_of_sequence) override { - const int64 dataset_datasets_size = dataset()->datasets_.size(); - *end_of_sequence = dataset_datasets_size == index_; + const int64 num_datasets = dataset()->datasets_.size(); + *end_of_sequence = num_datasets == index_; if (!*end_of_sequence) { Tensor tensor(DT_VARIANT, TensorShape({})); ",0,train c3830c42bd7ac47eb130b09bfdb75e900f767474,tensorflow/tensorflow,"Renaming `consumers` to `_consumers` in the `CompositeTensor` interface to avoid leaking it into the public API of classes that implement the interface. The consequence of this change is that subclasses of `CompositeTensors` do not necessarily provide the `consumers` method. The existing subclasses -- `SparseTensor`, `RaggedTensor`, and `IndexedSlices` -- do, for legacy reasons. PiperOrigin-RevId: 246651256",composite_tensor.py,"@@ -102,7 +102,7 @@ class CompositeTensor(object): """"""Returns True if this tensor's components belong to a TF graph."""""" raise NotImplementedError(""CompositeTensor._is_symbolic_tensor"") - def consumers(self): + def _consumers(self): """"""Returns a list of `Operation`s that consume this `CompositeTensor`. Returns: ",0,train c3830c42bd7ac47eb130b09bfdb75e900f767474,tensorflow/tensorflow,"Renaming `consumers` to `_consumers` in the `CompositeTensor` interface to avoid leaking it into the public API of classes that implement the interface. The consequence of this change is that subclasses of `CompositeTensors` do not necessarily provide the `consumers` method. The existing subclasses -- `SparseTensor`, `RaggedTensor`, and `IndexedSlices` -- do, for legacy reasons. PiperOrigin-RevId: 246651256",ops.py,"@@ -1771,6 +1771,9 @@ class IndexedSlices(_TensorLike, composite_tensor.CompositeTensor): def _is_graph_tensor(self): return hasattr(self._values, ""graph"") + def consumers(self): + return self._consumers() + IndexedSlicesValue = collections.namedtuple( ""IndexedSlicesValue"", [""values"", ""indices"", ""dense_shape""]) ",0,train c3830c42bd7ac47eb130b09bfdb75e900f767474,tensorflow/tensorflow,"Renaming `consumers` to `_consumers` in the `CompositeTensor` interface to avoid leaking it into the public API of classes that implement the interface. The consequence of this change is that subclasses of `CompositeTensors` do not necessarily provide the `consumers` method. The existing subclasses -- `SparseTensor`, `RaggedTensor`, and `IndexedSlices` -- do, for legacy reasons. PiperOrigin-RevId: 246651256",sparse_tensor.py,"@@ -260,6 +260,9 @@ class SparseTensor(_TensorLike, composite_tensor.CompositeTensor): def _is_graph_tensor(self): return hasattr(self._values, ""graph"") + def consumers(self): + return self._consumers() + SparseTensorValue = collections.namedtuple(""SparseTensorValue"", [""indices"", ""values"", ""dense_shape""]) ",0,train c3830c42bd7ac47eb130b09bfdb75e900f767474,tensorflow/tensorflow,"Renaming `consumers` to `_consumers` in the `CompositeTensor` interface to avoid leaking it into the public API of classes that implement the interface. The consequence of this change is that subclasses of `CompositeTensors` do not necessarily provide the `consumers` method. The existing subclasses -- `SparseTensor`, `RaggedTensor`, and `IndexedSlices` -- do, for legacy reasons. PiperOrigin-RevId: 246651256",ragged_tensor.py,"@@ -1863,6 +1863,9 @@ class RaggedTensor(composite_tensor.CompositeTensor): def _is_graph_tensor(self): return hasattr(self._values, ""graph"") + def consumers(self): + return self._consumers() + def is_ragged(value): """"""Returns true if `value` is a ragged tensor or ragged tensor value."""""" ",0,train 1d3d92dfdecd38daf068583b39aef7811a604601,tensorflow/tensorflow,"No-op refactor and comment fix. PiperOrigin-RevId: 221786311",train.py,"@@ -1071,8 +1071,19 @@ def get_sequential_train_hooks(train_steps=namedtuples.GANTrainSteps(1, 1)): return get_hooks +def _num_joint_steps(train_steps): + g_steps = train_steps.generator_train_steps + d_steps = train_steps.discriminator_train_steps + # Get the number of each type of step that should be run. + num_d_and_g_steps = min(g_steps, d_steps) + num_g_steps = g_steps - num_d_and_g_steps + num_d_steps = d_steps - num_d_and_g_steps + + return num_d_and_g_steps, num_g_steps, num_d_steps + + def get_joint_train_hooks(train_steps=namedtuples.GANTrainSteps(1, 1)): - """"""Returns a hooks function for sequential GAN training. + """"""Returns a hooks function for joint GAN training. When using these train hooks, IT IS RECOMMENDED TO USE `use_locking=True` ON ALL OPTIMIZERS TO AVOID RACE CONDITIONS. @@ -1105,12 +1116,7 @@ def get_joint_train_hooks(train_steps=namedtuples.GANTrainSteps(1, 1)): Returns: A function that takes a GANTrainOps tuple and returns a list of hooks. """""" - g_steps = train_steps.generator_train_steps - d_steps = train_steps.discriminator_train_steps - # Get the number of each type of step that should be run. - num_d_and_g_steps = min(g_steps, d_steps) - num_g_steps = g_steps - num_d_and_g_steps - num_d_steps = d_steps - num_d_and_g_steps + num_d_and_g_steps, num_g_steps, num_d_steps = _num_joint_steps(train_steps) def get_hooks(train_ops): g_op = train_ops.generator_train_op ",0,test 0c46c7bcb05b1502bda869db371bee198d5be28a,tensorflow/tensorflow,"Switch to doxygen-friendly comments in generated C++ op code. Change: 144859450",cc_op_gen.cc,"@@ -76,9 +76,9 @@ string ToGuard(const std::string& path) { } // Change: Into: -// ABC // ABC -// // -// DEF // DEF +// ABC /// ABC +// /// +// DEF /// DEF string MakeComment(StringPiece text, StringPiece indent) { string ret; while (!text.empty()) { @@ -89,9 +89,9 @@ string MakeComment(StringPiece text, StringPiece indent) { if (text[newline] != ' ') last_non_space = newline; } if (last_non_space == -1) { - strings::StrAppend(&ret, indent, ""//\n""); + strings::StrAppend(&ret, indent, ""///\n""); } else { - strings::StrAppend(&ret, indent, ""// "", + strings::StrAppend(&ret, indent, ""/// "", text.substr(0, last_non_space + 1), ""\n""); } text.remove_prefix(newline + 1); ",0,train 4316054cc62b030832cd0fd3cdd175e92f232ebf,tensorflow/tensorflow,"[tf.data] Only enables optimization `map_parallelization` on the main dataset pipeline. PiperOrigin-RevId: 349334274 Change-Id: I40dd2650a59dc4677de33d0a62aee4b104412450",map_parallelization.cc,"@@ -65,6 +65,12 @@ Status MapParallelization::OptimizeAndCollectStats(Cluster* cluster, } MutableGraphView graph(output); + // If the GrapplerItem is derived from a FunctionDef, we don't optimize it, + // because we only want to enable extra map parallelism on the main dataset + // pipeline. + if (graph_utils::IsItemDerivedFromFunctionDef(item, graph)) + return Status::OK(); + absl::flat_hash_set nodes_to_delete; FunctionLibraryDefinition function_library(OpRegistry::Global(), item.graph.library()); ",0,train 4316054cc62b030832cd0fd3cdd175e92f232ebf,tensorflow/tensorflow,"[tf.data] Only enables optimization `map_parallelization` on the main dataset pipeline. PiperOrigin-RevId: 349334274 Change-Id: I40dd2650a59dc4677de33d0a62aee4b104412450",map_parallelization_test.cc,"@@ -57,12 +57,15 @@ TEST_P(AutotuneSetting, MapParallelizationTest) { NDef(""stop"", ""Const"", {}, {{""value"", 10}, {""dtype"", DT_INT32}}), NDef(""step"", ""Const"", {}, {{""value"", 1}, {""dtype"", DT_INT32}}), NDef(""range"", ""RangeDataset"", {""start"", ""stop"", ""step""}, {}), - MakeMapNode(""map"", ""range"", stateless_fun_name)}, + MakeMapNode(""map"", ""range"", stateless_fun_name), + NDef(""Sink"", ""Identity"", {""map""}, {})}, // FunctionLib { test::function::XTimesTwo(), }); + item.fetch.push_back(""Sink""); + GraphDef output; TF_ASSERT_OK(OptimizeWithMapParallelization(item, &output, autotune)); EXPECT_EQ(graph_utils::ContainsNodeWithOp(""ParallelMapDataset"", output), @@ -72,6 +75,39 @@ TEST_P(AutotuneSetting, MapParallelizationTest) { INSTANTIATE_TEST_SUITE_P(Test, AutotuneSetting, ::testing::Values(false, true)); +class FromFunctionDef : public ::testing::TestWithParam {}; + +TEST_P(FromFunctionDef, MapParallelizationTest) { + const string op = GetParam(); + bool from_function_def = (op == ""_Retval""); + + using test::function::NDef; + GrapplerItem item; + item.graph = test::function::GDef( + {NDef(""start"", ""Const"", {}, {{""value"", 0}, {""dtype"", DT_INT32}}), + NDef(""stop"", ""Const"", {}, {{""value"", 10}, {""dtype"", DT_INT32}}), + NDef(""step"", ""Const"", {}, {{""value"", 1}, {""dtype"", DT_INT32}}), + NDef(""range"", ""RangeDataset"", {""start"", ""stop"", ""step""}, {}), + MakeMapNode(""map"", ""range"", stateless_fun_name), + NDef(""Sink"", op, {""map""}, {})}, + // FunctionLib + { + test::function::XTimesTwo(), + }); + + item.fetch.push_back(""Sink""); + + GraphDef output; + TF_ASSERT_OK(OptimizeWithMapParallelization(item, &output, true)); + EXPECT_EQ(graph_utils::ContainsNodeWithOp(""ParallelMapDataset"", output), + !from_function_def); + EXPECT_EQ(graph_utils::ContainsGraphNodeWithName(""map"", output), + from_function_def); +} + +INSTANTIATE_TEST_SUITE_P(Test, FromFunctionDef, + ::testing::Values(""Identity"", ""_Retval"")); + TEST(ParallelizeAssert, MapParallelizationTest) { using test::function::NDef; GrapplerItem item; @@ -83,13 +119,16 @@ TEST(ParallelizeAssert, MapParallelizationTest) { NDef(""range"", ""RangeDataset"", {""start"", ""stop"", ""step""}, {}), MakeMapNode(""map1"", ""range"", stateful_fun_name), MakeMapNode(""map2"", ""map1"", stateless_fun_name), - NDef(""cache"", ""CacheDataset"", {""map2"", ""filename""}, {})}, + NDef(""cache"", ""CacheDataset"", {""map2"", ""filename""}, {}), + NDef(""Sink"", ""Identity"", {""cache""}, {})}, // FunctionLib { test::function::XTimesTwo(), test::function::RandomUniform(), }); + item.fetch.push_back(""Sink""); + GraphDef output; TF_ASSERT_OK(OptimizeWithMapParallelization(item, &output, true)); EXPECT_TRUE(graph_utils::ContainsNodeWithOp(""ParallelMapDataset"", output)); ",0,train 4316054cc62b030832cd0fd3cdd175e92f232ebf,tensorflow/tensorflow,"[tf.data] Only enables optimization `map_parallelization` on the main dataset pipeline. PiperOrigin-RevId: 349334274 Change-Id: I40dd2650a59dc4677de33d0a62aee4b104412450",map_parallelization_test.py,"@@ -108,24 +108,49 @@ class MapParallelizationTest(test_base.DatasetTestBase, parameterized.TestCase): combinations.combine(apply_autotune=[None, True, False]))) def testAutotuneOption(self, apply_autotune): next_nodes = [""ParallelMap""] if (apply_autotune is not False) else [""Map""] # pylint: disable=g-bool-id-comparison + dataset = dataset_ops.Dataset.range(4).apply( + testing.assert_next(next_nodes)).map(lambda x: x + 2) + + options = dataset_ops.Options() + options.experimental_optimization.apply_default_optimizations = False + options.experimental_optimization.map_parallelization = True + if apply_autotune is not None: + options.experimental_optimization.autotune = apply_autotune + dataset = dataset.with_options(options) + self.assertDatasetProduces(dataset, expected_output=[2, 3, 4, 5]) + + @combinations.generate(test_base.default_test_combinations()) + def testNoParallelizationInsideInterleave(self): def func(i): - ds = dataset_ops.Dataset.range(i).apply( - testing.assert_next(next_nodes)).map(lambda x: x + 1) + ds = dataset_ops.Dataset.range(i).apply(testing.assert_next( + [""Map""])).map(lambda x: x + 1) return ds dataset = dataset_ops.Dataset.range(1, 4).interleave( - map_func=func, cycle_length=4, block_length=5) - dataset = dataset.apply( - testing.assert_next(next_nodes)).map(lambda x: x * 2) + map_func=func, cycle_length=2, block_length=2) + options = dataset_ops.Options() + options.experimental_optimization.apply_default_optimizations = False + options.experimental_optimization.map_parallelization = True + dataset = dataset.with_options(options) + + self.assertDatasetProduces(dataset, expected_output=[1, 1, 2, 1, 2, 3]) + @combinations.generate(test_base.default_test_combinations()) + def testNoParallelizationInsideFlatMap(self): + + def func(i): + ds = dataset_ops.Dataset.range(i).apply(testing.assert_next( + [""Map""])).map(lambda x: x + 1) + return ds + + dataset = dataset_ops.Dataset.range(1, 4).flat_map(map_func=func) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.map_parallelization = True - if apply_autotune is not None: - options.experimental_optimization.autotune = apply_autotune dataset = dataset.with_options(options) - self.assertDatasetProduces(dataset, expected_output=[2, 2, 4, 2, 4, 6]) + + self.assertDatasetProduces(dataset, expected_output=[1, 1, 2, 1, 2, 3]) if __name__ == ""__main__"": ",0,train 21f5c12e3d9c5b0c2f4c45c70a3da08b4edf212d,tensorflow/tensorflow,"Fix PrepareForStrCat() for types that are AlphaNum constructible but not implicitly convertible. In the latest dev version of Eigen, `Eigen::half` is implicitly convertible to float. This makes `std::is_constructible` true (using the float constructor), but since `Eigen::half` is not implicitly convertible to `AlphaNum` directly, this leads to the compile error: ``` ./third_party/tensorflow/core/platform/errors.h:107:1: error: no matching function for call to 'PrepareForStrCat' ... ./third_party/tensorflow/core/platform/errors.h:54:33: note: candidate function not viable: no known conversion from 'Eigen::half' to 'const strings::AlphaNum' for 1st argument inline const strings::AlphaNum& PrepareForStrCat(const strings::AlphaNum& a) { ^ ./third_party/tensorflow/core/platform/errors.h:49:1: note: candidate template ignored: requirement '!std::is_constructible_v' was not satisfied [with T = Eigen::half] PrepareForStrCat(const T& t) { ^ ``` The same error occurs for any type implicitly convertible to int/float/double/string/etc... To fix this, we need to change the condition to `!is_convertible`, so that if `T` *is* implicitly convertible to `AlphaNum`, it will use the `AlphaNum` version, otherwise it will use the stream operator version. See [MR !278](https://gitlab.com/libeigen/eigen/-/merge_requests/278). PiperOrigin-RevId: 343357980 Change-Id: Ibad29a54105a70c473ca4c9b205fce8356d155ab",errors.h,"@@ -44,7 +44,7 @@ namespace internal { // Eventually absl::strings will have native support for this and we will be // able to completely remove PrepareForStrCat(). template -typename std::enable_if::value, +typename std::enable_if::value, std::string>::type PrepareForStrCat(const T& t) { std::stringstream ss; ",0,train bee0d8ead2c4445546c089fd59c5c9ff98bbae0a,tensorflow/tensorflow,"Add support for legalizing mhlo.slice to lmhlo.slice PiperOrigin-RevId: 330153599 Change-Id: I8b62f003b20742ab11fce19f50e38039be898606",map_hlo_to_lhlo_op.h,"@@ -69,6 +69,7 @@ MAP_HLO_TO_LHLO(RsqrtOp); MAP_HLO_TO_LHLO(SelectOp); MAP_HLO_TO_LHLO(SignOp); MAP_HLO_TO_LHLO(SinOp); +MAP_HLO_TO_LHLO(SliceOp); MAP_HLO_TO_LHLO(SqrtOp); MAP_HLO_TO_LHLO(SubOp); MAP_HLO_TO_LHLO(TanhOp); ",0,train bee0d8ead2c4445546c089fd59c5c9ff98bbae0a,tensorflow/tensorflow,"Add support for legalizing mhlo.slice to lmhlo.slice PiperOrigin-RevId: 330153599 Change-Id: I8b62f003b20742ab11fce19f50e38039be898606",hlo_legalize_to_lhlo.cc,"@@ -497,6 +497,7 @@ void populateHLOToLHLOConversionPattern( HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, + HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, ",0,train d0c647ff2f6f3398252c9831c8b49e8a2c3c8db5,tensorflow/tensorflow,"Fix misleading comment. PiperOrigin-RevId: 188450336",ir_array.h,"@@ -76,8 +76,7 @@ class IrArray { llvm::IRBuilder<>* ir_builder); // Constructs an index from the given multi-dimensional index and the shape - // that it indexes into. Also, computes the linear index according to - // ""shape"". + // that it indexes into. // // Precondition: ""shape"" has a layout. Index(tensorflow::gtl::ArraySlice multidim, ",0,train 23ddb5c69a6c98e8654b6114b1aa33606460638a,tensorflow/tensorflow,"Move to do into comment. PiperOrigin-RevId: 223398784",op_hint.py,"@@ -104,9 +104,9 @@ class OpHint(object): that make up the pseudo op. A similar process is done to any output that is to be exported from the current op. - TODO(aselle): When TensorFlow functions functionality works for arbitrary - constructs, this mechanism can be retired and changed to use python defun's. """""" + # TODO(aselle): When TensorFlow functions functionality works for arbitrary + # constructs, this mechanism can be retired and changed to use python defun's. # Attr constants that are used for representation in the GraphDef. These # will be used on every Identity op that is involved in a total OpHint. ",0,train 010506f4feb93ff210fe92d5b48b8b6da56fea9b,tensorflow/tensorflow,"Fix docstring typos in tf.distributions.bijectors.Bijector. PiperOrigin-RevId: 171756150",bijector_impl.py,"@@ -158,7 +158,7 @@ class Bijector(object): # Evaluate forward transformation. fwd_x = my_bijector.forward(x) x == my_bijector.inverse(fwd_x) - x != my_bijector.forward(fwd_x) # Not equal because g(x) != g(g(x)). + x != my_bijector.forward(fwd_x) # Not equal because x != g(g(x)). ``` - Computing a log-likelihood: @@ -275,7 +275,7 @@ class Bijector(object): implies `g^{-1}` is differentiable in the image of `g`. Applying the chain rule to `y = g(x) = g(g^{-1}(y))` yields `I = g'(g^{-1}(y))*g^{-1}'(y)`. - The same theorem also implies `g{-1}'` is non-singular therefore: + The same theorem also implies `g^{-1}'` is non-singular therefore: `inv[ g'(g^{-1}(y)) ] = g^{-1}'(y)`. The claim follows from [properties of determinant]( https://en.wikipedia.org/wiki/Determinant#Multiplicativity_and_matrix_groups). ",0,train 5ef5f683cafbf3d961cf900ea848621dee393625,tensorflow/tensorflow,Deprecated tf.Session removed in input_data.py,input_data.py,"@@ -122,7 +122,7 @@ def load_wav_file(filename): Returns: Numpy array holding the sample data as floats between -1.0 and 1.0. """""" - with tf.Session(graph=tf.Graph()) as sess: + with tf.compat.v1.Session(graph=tf.Graph()) as sess: wav_filename_placeholder = tf.placeholder(tf.string, []) wav_loader = io_ops.read_file(wav_filename_placeholder) wav_decoder = contrib_audio.decode_wav(wav_loader, desired_channels=1) @@ -139,7 +139,7 @@ def save_wav_file(filename, wav_data, sample_rate): wav_data: 2D array of float PCM-encoded audio data. sample_rate: Samples per second to encode in the file. """""" - with tf.Session(graph=tf.Graph()) as sess: + with tf.compat.v1.Session(graph=tf.Graph()) as sess: wav_filename_placeholder = tf.placeholder(tf.string, []) sample_rate_placeholder = tf.placeholder(tf.int32, []) wav_data_placeholder = tf.placeholder(tf.float32, [None, 1]) @@ -349,7 +349,7 @@ class AudioProcessor(object): background_dir = os.path.join(self.data_dir, BACKGROUND_NOISE_DIR_NAME) if not os.path.exists(background_dir): return self.background_data - with tf.Session(graph=tf.Graph()) as sess: + with tf.compat.v1.Session(graph=tf.Graph()) as sess: wav_filename_placeholder = tf.placeholder(tf.string, []) wav_loader = io_ops.read_file(wav_filename_placeholder) wav_decoder = contrib_audio.decode_wav(wav_loader, desired_channels=1) @@ -654,7 +654,7 @@ class AudioProcessor(object): words_list = self.words_list data = np.zeros((sample_count, desired_samples)) labels = [] - with tf.Session(graph=tf.Graph()) as sess: + with tf.compat.v1.Session(graph=tf.Graph()) as sess: wav_filename_placeholder = tf.placeholder(tf.string, []) wav_loader = io_ops.read_file(wav_filename_placeholder) wav_decoder = contrib_audio.decode_wav( ",0,train 795652118936b28b2ae3c1da63fd2efb5f445adc,tensorflow/tensorflow,"Reduce conservatism for Send/Recv ops Split SendRecv effect into Send effect and Recv effect since we don't need any dependencies between Send and Recv ops. Note that we do need dependencies between different Send ops and different Recv ops unless we know that they generate different rendezvous keys. PiperOrigin-RevId: 425389580 Change-Id: Iccaca22d3abce26f9a9dfe1b10e9089211833823",tf_side_effects.h,"@@ -68,8 +68,12 @@ struct GeneratorOp : public ::mlir::SideEffects::Resource::Base { StringRef getName() final { return """"; } }; -struct SendRecv : public ::mlir::SideEffects::Resource::Base { - StringRef getName() final { return """"; } +struct Send : public ::mlir::SideEffects::Resource::Base { + StringRef getName() final { return """"; } +}; + +struct Recv : public ::mlir::SideEffects::Resource::Base { + StringRef getName() final { return """"; } }; struct RandomGenerator ",0,train 800706fbdb725c3db0f7fc41e1f348bba0c6e5c3,tensorflow/tensorflow,"Be consistent w.r.t. struct/class in forward declaration This addresses compiler warnings. -- PiperOrigin-RevId: 246386939",NestedMatcher.h,"@@ -63,8 +63,8 @@ struct NestedMatch { ArrayRef getMatchedChildren() { return matchedChildren; } private: - friend class NestedPattern; - friend class NestedPatternContext; + friend struct NestedPattern; + friend struct NestedPatternContext; /// Underlying global bump allocator managed by a NestedPatternContext. static llvm::BumpPtrAllocator *&allocator(); @@ -116,8 +116,8 @@ struct NestedPattern { unsigned getDepth() const; private: - friend class NestedPatternContext; - friend class NestedMatch; + friend struct NestedPatternContext; + friend struct NestedMatch; friend struct State; /// Underlying global bump allocator managed by a NestedPatternContext. ",0,train 800706fbdb725c3db0f7fc41e1f348bba0c6e5c3,tensorflow/tensorflow,"Be consistent w.r.t. struct/class in forward declaration This addresses compiler warnings. -- PiperOrigin-RevId: 246386939",Utils.h,"@@ -39,7 +39,7 @@ class AffineForOp; class Block; class FlatAffineConstraints; class Location; -class MemRefAccess; +struct MemRefAccess; class Operation; class Value; ",0,train 800706fbdb725c3db0f7fc41e1f348bba0c6e5c3,tensorflow/tensorflow,"Be consistent w.r.t. struct/class in forward declaration This addresses compiler warnings. -- PiperOrigin-RevId: 246386939",Builders.h,"@@ -239,7 +239,7 @@ private: /// Base class for ValueHandle, OperationHandle and BlockHandle. /// Not meant to be used outside of these classes. -struct CapturableHandle { +class CapturableHandle { protected: CapturableHandle() = default; }; ",0,train 800706fbdb725c3db0f7fc41e1f348bba0c6e5c3,tensorflow/tensorflow,"Be consistent w.r.t. struct/class in forward declaration This addresses compiler warnings. -- PiperOrigin-RevId: 246386939",Helpers.h,"@@ -113,7 +113,8 @@ private: /// Assigning to an IndexedValue emits an actual `Store` operation, while /// converting an IndexedValue to a ValueHandle emits an actual `Load` /// operation. -template struct TemplatedIndexedValue { +template class TemplatedIndexedValue { +public: explicit TemplatedIndexedValue(Type t) : base(t) {} explicit TemplatedIndexedValue(Value *v) : TemplatedIndexedValue(ValueHandle(v)) {} ",0,train 800706fbdb725c3db0f7fc41e1f348bba0c6e5c3,tensorflow/tensorflow,"Be consistent w.r.t. struct/class in forward declaration This addresses compiler warnings. -- PiperOrigin-RevId: 246386939",AffineExpr.h,"@@ -36,11 +36,11 @@ class IntegerSet; namespace detail { -class AffineExprStorage; -class AffineBinaryOpExprStorage; -class AffineDimExprStorage; -class AffineSymbolExprStorage; -class AffineConstantExprStorage; +struct AffineExprStorage; +struct AffineBinaryOpExprStorage; +struct AffineDimExprStorage; +struct AffineSymbolExprStorage; +struct AffineConstantExprStorage; } // namespace detail ",0,train 800706fbdb725c3db0f7fc41e1f348bba0c6e5c3,tensorflow/tensorflow,"Be consistent w.r.t. struct/class in forward declaration This addresses compiler warnings. -- PiperOrigin-RevId: 246386939",Location.h,"@@ -34,12 +34,12 @@ class Identifier; namespace detail { -class LocationStorage; -class UnknownLocationStorage; -class FileLineColLocationStorage; -class NameLocationStorage; -class CallSiteLocationStorage; -class FusedLocationStorage; +struct LocationStorage; +struct UnknownLocationStorage; +struct FileLineColLocationStorage; +struct NameLocationStorage; +struct CallSiteLocationStorage; +struct FusedLocationStorage; } // namespace detail ",0,train 800706fbdb725c3db0f7fc41e1f348bba0c6e5c3,tensorflow/tensorflow,"Be consistent w.r.t. struct/class in forward declaration This addresses compiler warnings. -- PiperOrigin-RevId: 246386939",Operation.h,"@@ -34,7 +34,7 @@ class BlockAndValueMapping; class Location; class MLIRContext; class OperandIterator; -class OperationState; +struct OperationState; class ResultIterator; class ResultTypeIterator; ",0,train 800706fbdb725c3db0f7fc41e1f348bba0c6e5c3,tensorflow/tensorflow,"Be consistent w.r.t. struct/class in forward declaration This addresses compiler warnings. -- PiperOrigin-RevId: 246386939",StandardTypes.h,"@@ -22,7 +22,7 @@ #include ""mlir/Support/LLVM.h"" namespace llvm { -class fltSemantics; +struct fltSemantics; } // namespace llvm namespace mlir { ",0,train a0fc1302d25c7f14c7893374a97751ab97373e9a,tensorflow/tensorflow,"Change zip(...)[1] to list(zip(...))[1], for python 3 compatibility. PiperOrigin-RevId: 167654035",backprop.py,"@@ -186,7 +186,7 @@ def _aggregate_grads(gradients): ret.append(g_list[0]) else: # TODO(xpan): Aggregate IndexedSlices. - ret.append((g_list[0][0], math_ops.add_n(zip(*g_list)[1]))) + ret.append((g_list[0][0], math_ops.add_n(list(zip(*g_list))[1]))) return ret ",0,test b8b7f916f79dce0f6d7c65aabf7f6b0c8092574a,tensorflow/tensorflow,"Fix fp16 tf.linalg.band_part bug. The issue were some output values were uninitialized when they should have been 0. Unfortunately, I could not get this to reproduce in a unit test, but it was occurring in the Transformer Keras model. PiperOrigin-RevId: 252733927",matrix_band_part_op.cc,"@@ -148,7 +148,8 @@ struct MatrixBandPartFunctor { const bool in_place = input.data() == output.data(); auto compute_shard = [=, &input, &output](int64 begin, int64 end) { if (!in_place) { - std::fill(output.data() + begin * n, output.data() + end * n, Scalar()); + std::fill(output.data() + begin * n, output.data() + end * n, + Scalar(0)); } const int64 batch_begin = begin / m; const int64 batch_end = (end + m - 1) / m; @@ -167,11 +168,11 @@ struct MatrixBandPartFunctor { if (in_place) { if (band_start > 0) { std::fill(&output(batch, row, 0), &output(batch, row, band_start), - Scalar()); + Scalar(0)); } if (band_end < n) { std::fill(&output(batch, row, band_end), &output(batch, row, n), - Scalar()); + Scalar(0)); } } else { if (band_start < band_end) { ",0,train b8b7f916f79dce0f6d7c65aabf7f6b0c8092574a,tensorflow/tensorflow,"Fix fp16 tf.linalg.band_part bug. The issue were some output values were uninitialized when they should have been 0. Unfortunately, I could not get this to reproduce in a unit test, but it was occurring in the Transformer Keras model. PiperOrigin-RevId: 252733927",matrix_band_part_op_gpu.cu.cc,"@@ -42,7 +42,7 @@ __global__ void MatrixBandPartKernel(const int num_threads, const int band_start = (num_lower_diags < 0 ? 0 : row - num_lower_diags); const int band_end = (num_upper_diags < 0 ? n : row + num_upper_diags + 1); if (col < band_start || col >= band_end) { - output_ptr[index] = Scalar(); + output_ptr[index] = Scalar(0); } else { output_ptr[index] = input_ptr[index]; } ",0,train 9664ba19296e58f4437feab4d4b2789cc1e38fd4,tensorflow/tensorflow,"[XLA/GPU] Fix row reduction codegen: we only need 32 bytes of shared memory PiperOrigin-RevId: 297881063 Change-Id: I40a924779b56c0b50ebf4b66fa7bf9202a833b19",ir_emitter_unnested.cc,"@@ -2156,9 +2156,9 @@ void IrEmitterUnnested::EmitPrologueForReduction( reduce_inst->shape().element_type(), module_); llvm::Type* buffer_type = [&] { if (reduction_info->IsRowReduction()) { - // Allocate __shared__ cache[num_partial_results][num_threads]. + // Allocate __shared__ cache[num_partial_results][kWarpSize]. return llvm::ArrayType::get( - llvm::ArrayType::get(primitive_type, num_threads_x), + llvm::ArrayType::get(primitive_type, kWarpSize), num_partial_results); } else { // Allocate __shared__ ",0,train 9664ba19296e58f4437feab4d4b2789cc1e38fd4,tensorflow/tensorflow,"[XLA/GPU] Fix row reduction codegen: we only need 32 bytes of shared memory PiperOrigin-RevId: 297881063 Change-Id: I40a924779b56c0b50ebf4b66fa7bf9202a833b19",gpu_kernel_tiling_test.cc,"@@ -815,6 +815,30 @@ ENTRY %primitive_computation_svd.38 (constant_5: f32[3,29,29], fusion.3: pred[3] EXPECT_TRUE(RunAndCompareNoHloPasses(kHloString, ErrorSpec{0.001})); } +TEST_F(GpuKernelTilingTest, RowReductionCorrectShmemUsage) { + const char *const kHloString = R""( + HloModule RowReduce + + Sum { + x.1 = f32[] parameter(0) + y.1 = f32[] parameter(1) + ROOT add.1 = f32[] add(x.1, y.1) + } + + ENTRY reduce.1 { + parameter = f32[1048576] parameter(0) + init_value = f32[] constant(0) + ROOT reduce = f32[] reduce(parameter, init_value), dimensions={0}, to_apply=Sum + } + )""; + auto hlo_module = ParseAndReturnVerifiedModule(kHloString).ValueOrDie(); + auto expected_ir = R""( +; CHECK: shared_cache_{{[0-9]*}} = private addrspace({{[0-9]*}}) global [1 x [32 x float]] + )""; + CompileAndVerifyIr(std::move(hlo_module), expected_ir, + /*match_optimized_ir=*/true); +} + } // namespace } // namespace gpu } // namespace xla ",0,train 866bade85dec9a231350201fb147f9381eee55c7,tensorflow/tensorflow,"Remove deprecation on int64 type. I have half a million lines of log-spew in my CI from this and clearly there has been ~no effort in the codebase to actually stop using it. Please do not deprecate such a critical type until at least its own codebase has substantially migrated off of it. PiperOrigin-RevId: 399093429 Change-Id: I807d9e6c79c4c33f4a50f88ba28fe5fda194ddd4",integral_types.h,"@@ -26,7 +26,7 @@ namespace tensorflow { typedef signed char int8; typedef short int16; typedef int int32; -[[deprecated(""Use int64_t instead."")]] typedef ::std::int64_t int64; +typedef ::std::int64_t int64; typedef unsigned char uint8; typedef unsigned short uint16; ",0,train dce7bc4c68929bf890912471a8c1ebb0d86ce044,tensorflow/tensorflow,"TFLite GPU Delegate: Implement unit tests for slice operation. PiperOrigin-RevId: 252670406",slice_test.cc,"@@ -0,0 +1,174 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/lite/delegates/gpu/gl/kernels/slice.h"" + +#include + +#include +#include +#include ""tensorflow/lite/delegates/gpu/common/operations.h"" +#include ""tensorflow/lite/delegates/gpu/gl/kernels/test_util.h"" + +using ::testing::FloatNear; +using ::testing::Pointwise; + +namespace tflite { +namespace gpu { +namespace gl { +namespace { + +TEST(SliceTest, Identity) { + TensorRefFloat32 input; + input.type = DataType::FLOAT32; + input.ref = 0; + input.shape = BHWC(1, 1, 2, 2); + + TensorRefFloat32 output; + output.type = DataType::FLOAT32; + output.ref = 1; + output.shape = BHWC(1, 1, 2, 2); + + SliceAttributes attr; + attr.starts = HWC(0, 0, 0); + attr.ends = HWC(1, 2, 2); + attr.strides = HWC(1, 1, 1); + + SingleOpModel model({ToString(OperationType::SLICE), attr}, {input}, + {output}); + ASSERT_TRUE(model.PopulateTensor(0, {1, 2, 3, 4})); + ASSERT_TRUE(model.Invoke(*NewSliceNodeShader())); + EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {1, 2, 3, 4})); +} + +TEST(SliceTest, NegativeEnds) { + TensorRefFloat32 input; + input.type = DataType::FLOAT32; + input.ref = 0; + input.shape = BHWC(1, 1, 2, 2); + + TensorRefFloat32 output; + output.type = DataType::FLOAT32; + output.ref = 1; + output.shape = BHWC(1, 1, 2, 2); + + SliceAttributes attr; + attr.starts = HWC(0, 0, 0); + attr.ends = HWC(1, -1, -1); + attr.strides = HWC(1, 1, 1); + + SingleOpModel model({ToString(OperationType::SLICE), attr}, {input}, + {output}); + ASSERT_TRUE(model.PopulateTensor(0, {1, 2, 3, 4})); + ASSERT_TRUE(model.Invoke(*NewSliceNodeShader())); + EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {1, 2, 3, 4})); +} + +TEST(SliceTest, NegativeEndsNonZeroStarts) { + TensorRefFloat32 input; + input.type = DataType::FLOAT32; + input.ref = 0; + input.shape = BHWC(1, 1, 2, 2); + + TensorRefFloat32 output; + output.type = DataType::FLOAT32; + output.ref = 1; + output.shape = BHWC(1, 1, 1, 1); + + SliceAttributes attr; + attr.starts = HWC(0, 1, 0); + attr.ends = HWC(0, 1, 1); + attr.strides = HWC(1, 1, 1); + + SingleOpModel model({ToString(OperationType::SLICE), attr}, {input}, + {output}); + ASSERT_TRUE(model.PopulateTensor(0, {1, 2, 3, 4})); + ASSERT_TRUE(model.Invoke(*NewSliceNodeShader())); + EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {3})); +} + +TEST(SliceTest, StridesByHeight) { + TensorRefFloat32 input; + input.type = DataType::FLOAT32; + input.ref = 0; + input.shape = BHWC(1, 4, 1, 1); + + TensorRefFloat32 output; + output.type = DataType::FLOAT32; + output.ref = 1; + output.shape = BHWC(1, 2, 1, 1); + + SliceAttributes attr; + attr.starts = HWC(0, 0, 0); + attr.ends = HWC(-1, -1, -1); + attr.strides = HWC(2, 1, 1); + + SingleOpModel model({ToString(OperationType::SLICE), attr}, {input}, + {output}); + ASSERT_TRUE(model.PopulateTensor(0, {1, 2, 3, 4})); + ASSERT_TRUE(model.Invoke(*NewSliceNodeShader())); + EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {1, 3})); +} + +TEST(SliceTest, StridesByWidth) { + TensorRefFloat32 input; + input.type = DataType::FLOAT32; + input.ref = 0; + input.shape = BHWC(1, 1, 4, 1); + + TensorRefFloat32 output; + output.type = DataType::FLOAT32; + output.ref = 1; + output.shape = BHWC(1, 1, 2, 1); + + SliceAttributes attr; + attr.starts = HWC(0, 1, 0); + attr.ends = HWC(-1, -1, -1); + attr.strides = HWC(1, 2, 1); + + SingleOpModel model({ToString(OperationType::SLICE), attr}, {input}, + {output}); + ASSERT_TRUE(model.PopulateTensor(0, {1, 2, 3, 4})); + ASSERT_TRUE(model.Invoke(*NewSliceNodeShader())); + EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {2, 4})); +} + +TEST(SliceTest, StridesByChannels) { + TensorRefFloat32 input; + input.type = DataType::FLOAT32; + input.ref = 0; + input.shape = BHWC(1, 1, 1, 4); + + TensorRefFloat32 output; + output.type = DataType::FLOAT32; + output.ref = 1; + output.shape = BHWC(1, 1, 1, 1); + + SliceAttributes attr; + attr.starts = HWC(0, 0, 2); + attr.ends = HWC(-1, -1, -1); + attr.strides = HWC(1, 1, 3); + + SingleOpModel model({ToString(OperationType::SLICE), attr}, {input}, + {output}); + ASSERT_TRUE(model.PopulateTensor(0, {1, 2, 3, 4})); + ASSERT_TRUE(model.Invoke(*NewSliceNodeShader())); + EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {3})); +} + +} // namespace +} // namespace gl +} // namespace gpu +} // namespace tflite ",0,train 1dbe0671a05257244fd9eae5701092c24540d872,tensorflow/tensorflow,"[XLA] fix bug in conditional_code_motion.cc by checking whether an instruction is dead before removing it. The bug has to do with instructions in alternative branches (other than branch(0)) of a conditional may be placed into boundaries to move out multiple times, if they happen to be identical to those in branch(0) and are shared multiple times (while those in branch(0) are not shared). The fix tries to avoid deleting them if they are already deleted, or if they still have uses inside their conditional branch. PiperOrigin-RevId: 330045927 Change-Id: I7a786eaa77085dd65609cc8639019874140474c0",conditional_code_motion.cc,"@@ -97,6 +97,17 @@ class BoundaryVisitor { absl::flat_hash_set visited_; }; +template +int64 CountNonLeafOps(const OpCollection& ops) { + absl::flat_hash_set op_set; + for (auto op : ops) { + if (!op_set.contains(op) && op->opcode() != HloOpcode::kConstant) { + op_set.insert(op); + } + } + return op_set.size(); +} + // Returns estimation of potential reuses carried by a given pair of // instructions. Use different integers to classify different levels // of reuses This is used as a placeholder only, assuming all @@ -120,7 +131,7 @@ int64 ReusesCarriedBy(HloInstruction* op, HloInstruction* user) { return 10; default: // Assume fusion will not happen anyway if user count > 1) - if (op->user_count() > 1) { + if (CountNonLeafOps(op->users()) > 1) { return 0; } return 10; @@ -508,8 +519,16 @@ StatusOr ConditionalCodeMotion::MoveInstructionOut( VLOG(2) << ""computation is :"" << computation->ToString() << ""\n""; // Remove hoisted instructions from the branches. for (auto b2 : to_move_out) { - VLOG(2) << ""Removing boundary:"" << b2.ToString() << ""\n""; - TF_RETURN_IF_ERROR(computation->RemoveInstruction(b2.operands()[i])); + auto instr_to_remove = b2.operands()[i]; + // Double check to make sure it is safe to delete the instruction. + // Complications may arise due to some operations in the alternative + // branches (branches 1..n) being placed into the boundaries multiple + // times. + if (!computation->IsMarkedAsDead(instr_to_remove) && + instr_to_remove->user_count() == 0) { + VLOG(2) << ""Removing boundary:"" << b2.ToString() << ""\n""; + TF_RETURN_IF_ERROR(computation->RemoveInstruction(instr_to_remove)); + } } } // Change conditional instruction shape to the shape of the new root. @@ -847,17 +866,6 @@ class GroupConnectedBoundaries { } return b2; } - int64 CountNonLeafOps(const xla::HloInstruction::InstructionVector& ops) { - int64 count = 0; - absl::flat_hash_set op_set; - for (auto op : ops) { - if (!op_set.contains(op) && op->opcode() != HloOpcode::kConstant) { - count++; - op_set.insert(op); - } - } - return count; - } // This function is reused both for moving the boundary outside or into a // conditional. As the result, the readability is somewhat compromised. // It might be nice to refactor this function to factor the outside-inside ",0,train 1dbe0671a05257244fd9eae5701092c24540d872,tensorflow/tensorflow,"[XLA] fix bug in conditional_code_motion.cc by checking whether an instruction is dead before removing it. The bug has to do with instructions in alternative branches (other than branch(0)) of a conditional may be placed into boundaries to move out multiple times, if they happen to be identical to those in branch(0) and are shared multiple times (while those in branch(0) are not shared). The fix tries to avoid deleting them if they are already deleted, or if they still have uses inside their conditional branch. PiperOrigin-RevId: 330045927 Change-Id: I7a786eaa77085dd65609cc8639019874140474c0",conditional_code_motion_test.cc,"@@ -828,6 +828,99 @@ ENTRY main { op::GetTupleElement(op::Conditional(), 1)))); } +TEST_F(ConditionalCodeMotionTest, MoveReplicatedTupleEntryOut) { + absl::string_view hlo_string = + R""( +HloModule RemoveIdenticalInstruction + +%add.64 (x.139: bf16[], y.139: bf16[]) -> bf16[] { + %x.139 = bf16[]{:T(512)} parameter(0) + %y.139 = bf16[]{:T(512)} parameter(1) + ROOT %add.44073 = bf16[]{:T(512)} add(bf16[]{:T(512)} %x.139, bf16[]{:T(512)} %y.139) +} + +%add.181 (x.256: bf16[], y.256: bf16[]) -> bf16[] { + %x.256 = bf16[]{:T(512)} parameter(0) + %y.256 = bf16[]{:T(512)} parameter(1) + ROOT %add.44842 = bf16[]{:T(512)} add(bf16[]{:T(512)} %x.256, bf16[]{:T(512)} %y.256) +} + +on_true { + arg_tuple.1 = (bf16[2,54,168,128], bf16[2,52,168,128]) parameter(0) + get-tuple-element.11 = bf16[2,54,168,128] get-tuple-element(arg_tuple.1), index=0 + get-tuple-element.12 = bf16[2,52,168,128] get-tuple-element(arg_tuple.1), index=1 + convolution.1 = bf16[3,3,128,128] convolution(bf16[2,54,168,128] + get-tuple-element.11, bf16[2,52,168,128] + get-tuple-element.12), window={size=52x168 pad=0_0x1_1}, + dim_labels=f01b_i01o->01bf + all-reduce.1 = bf16[3,3,128,128] + all-reduce(bf16[3,3,128,128] %convolution.1), + channel_id=188, replica_groups={{0,1}}, use_global_device_ids=true, + to_apply=%add.64 + convert.1 = f32[3,3,128,128] convert(bf16[3,3,128,128] %all-reduce.1) + all-reduce.3 = bf16[3,3,128,128] + all-reduce(bf16[3,3,128,128] %convolution.1), + channel_id=188, replica_groups={{0,1}}, use_global_device_ids=true, + to_apply=%add.64 + convert.3 = f32[3,3,128,128] convert(bf16[3,3,128,128] %all-reduce.3) + ROOT tuple.1 = (f32[3,3,128,128], f32[3,3,128,128]) tuple(convert.1, convert.3) +} + +on_false { + arg_tuple.2 = (bf16[2,86,104,128], bf16[2,84,104,128]) parameter(0) + get-tuple-element.21 = bf16[2,86,104,128] + get-tuple-element(arg_tuple.2), index=0 + get-tuple-element.22 = bf16[2,84,104,128] + get-tuple-element(arg_tuple.2), index=1 + convolution.2 = bf16[3,3,128,128] + convolution(bf16[2,86,104,128] get-tuple-element.21, bf16[2,84,104,128] + get-tuple-element.22), window={size=84x104 pad=0_0x1_1}, + dim_labels=f01b_i01o->01bf + all-reduce.2 = bf16[3,3,128,128] + all-reduce(bf16[3,3,128,128] %convolution.2), + channel_id=485, replica_groups={{0,1}}, use_global_device_ids=true, + to_apply=%add.181 + convert.2 = f32[3,3,128,128] + convert(bf16[3,3,128,128] %all-reduce.2) + ROOT tuple.2 = (f32[3,3,128,128], f32[3,3,128,128]) tuple(convert.2, convert.2) +} + +ENTRY main { + pred.1 = pred[] parameter(0) + arg_tuple.3 = (bf16[2,54,168,128], bf16[2,52,168,128]) parameter(1) + arg_tuple.4 = (bf16[2,86,104,128], bf16[2,84,104,128]) parameter(2) + conditional = (f32[3,3,128,128], f32[3,3,128,128]) + conditional(pred.1, arg_tuple.3, arg_tuple.4), true_computation=on_true, + false_computation=on_false + get-first-index = f32[3,3,128,128] + get-tuple-element(conditional), index=0 + add.1 = f32[3,3,128,128] add(f32[3,3,128,128] get-first-index, f32[3,3,128,128] get-first-index) + ROOT result = (f32[3,3,128,128]) tuple(add.1) +} +)""; + auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie(); + ConditionalCodeMotion pass(true, true); + ASSERT_TRUE(pass.Run(&*module).ValueOrDie()); + const HloInstruction* conditional = + FindInstruction(module.get(), ""conditional""); + const HloComputation* on_true = conditional->branch_computation(0); + ASSERT_EQ(on_true->instruction_count(), 5); + const HloComputation* on_false = conditional->branch_computation(1); + ASSERT_EQ(on_false->instruction_count(), 5); + + // Checks if conditional shape has changed. + ASSERT_TRUE(ShapeUtil::Compatible( + conditional->shape(), ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape( + BF16, {3, 3, 128, 128})}))); + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT( + root, + AllOf(op::Tuple(op::Add( + op::Convert(op::AllReduce(op::GetTupleElement(op::Conditional()))), + op::Convert( + op::AllReduce(op::GetTupleElement(op::Conditional()))))))); +} + } // namespace conditional_opt } // namespace xla ",0,train 491fb62d90f080d4daf32b5539ec9b4a2de71c6c,tensorflow/tensorflow,"Add cost estimator tests for the BiasAdd, ReLU, and Conv2D operations. PiperOrigin-RevId: 186705930",op_level_cost_estimator.cc,"@@ -245,6 +245,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() { {""Add"", Eigen::internal::functor_traits< Eigen::internal::scalar_sum_op>::Cost}, {""ApproximateEqual"", 1}, + {""BiasAdd"", Eigen::internal::functor_traits< + Eigen::internal::scalar_sum_op>::Cost}, {""Div"", Eigen::internal::functor_traits< Eigen::internal::scalar_quotient_op>::Cost}, {""Equal"", 1}, ",0,train 491fb62d90f080d4daf32b5539ec9b4a2de71c6c,tensorflow/tensorflow,"Add cost estimator tests for the BiasAdd, ReLU, and Conv2D operations. PiperOrigin-RevId: 186705930",op_level_cost_estimator_test.cc,"@@ -99,47 +99,81 @@ OpContext DescribeBatchMatMul(const std::vector& dims_a, // Wrangles the minimum number of proto fields to set up a 4D Tensor for cost // estimation purposes. void DescribeTensor4D(int dim0, int dim1, int dim2, int dim3, - OpInfo* op_features) { - auto input = op_features->add_inputs(); - auto shape = input->mutable_shape(); + OpInfo::TensorProperties* tensor) { + auto shape = tensor->mutable_shape(); shape->add_dim()->set_size(dim0); shape->add_dim()->set_size(dim1); shape->add_dim()->set_size(dim2); shape->add_dim()->set_size(dim3); - input->set_dtype(DT_FLOAT); + tensor->set_dtype(DT_FLOAT); } -// Returns an OpInfo for Conv2D with the minimum set of fields set up. +// DescribeConvolution constructs an OpContext for a Conv2D applied to an input +// tensor with shape (batch, ix, iy, iz1) and a kernel tensor with shape +// (kx, ky, iz2, oz). OpContext DescribeConvolution(int batch, int ix, int iy, int iz1, int iz2, int kx, int ky, int oz) { OpContext op_context; SetCpuDevice(&op_context.op_info); op_context.op_info.set_op(""Conv2D""); - DescribeTensor4D(batch, ix, iy, iz1, &op_context.op_info); - DescribeTensor4D(kx, ky, iz2, oz, &op_context.op_info); + DescribeTensor4D(batch, ix, iy, iz1, op_context.op_info.add_inputs()); + DescribeTensor4D(kx, ky, iz2, oz, op_context.op_info.add_inputs()); + + return op_context; +} + +// DescribeUnaryOp constructs an OpContext for the given operation applied to +// a 4-tensor with shape (size1, 1, 1, 1). +OpContext DescribeUnaryOp(const string& op, int size1) { + OpContext op_context; + SetCpuDevice(&op_context.op_info); + op_context.op_info.set_op(op); + + DescribeTensor4D(size1, 1, 1, 1, op_context.op_info.add_inputs()); + DescribeTensor4D(size1, 1, 1, 1, op_context.op_info.add_outputs()); + return op_context; } -OpContext DescribeOp(const string& op, int size1, int size2) { +// DescribeBinaryOp constructs an OpContext for the given operation applied to +// a 4-tensor with dimensions (size1, 1, 1, 1) and a 4-tensor with dimensions +// (2 * size1, size2, 1, 1). +// +// The choice of dimension here is arbitrary, and is used strictly to test the +// cost model for applying elementwise operations to tensors with unequal +// dimension values. +OpContext DescribeBinaryOp(const string& op, int size1, int size2) { OpContext op_context; SetCpuDevice(&op_context.op_info); op_context.op_info.set_op(op); - DescribeTensor4D(size1, 1, 1, 1, &op_context.op_info); - DescribeTensor4D(2 * size1, size2, 1, 1, &op_context.op_info); + DescribeTensor4D(size1, 1, 1, 1, op_context.op_info.add_inputs()); + DescribeTensor4D(2 * size1, size2, 1, 1, op_context.op_info.add_inputs()); + DescribeTensor4D(2 * size1, size2, 1, 1, op_context.op_info.add_outputs()); - auto output = op_context.op_info.add_outputs(); - auto shape = output->mutable_shape(); - shape->add_dim()->set_size(2 * size1); - shape->add_dim()->set_size(size2); - shape->add_dim()->set_size(1); - shape->add_dim()->set_size(1); - output->set_dtype(DT_FLOAT); + return op_context; +} +// DescribeBiasAdd constructs an OpContext for a BiasAdd applied to a 4-tensor +// with dimensions (1, 1, size2, size1) and a bias with dimension (size1), +// according to the constraint that the bias must be 1D with size equal to that +// of the last dimension of the input value. +OpContext DescribeBiasAdd(int size1, int size2) { + OpContext op_context; SetCpuDevice(&op_context.op_info); + op_context.op_info.set_op(""BiasAdd""); + + DescribeTensor4D(1, 1, size2, size1, op_context.op_info.add_inputs()); + DescribeTensor4D(1, 1, size2, size1, op_context.op_info.add_outputs()); + + auto bias = op_context.op_info.add_inputs(); + bias->mutable_shape()->add_dim()->set_size(size1); + bias->set_dtype(DT_FLOAT); + return op_context; } + } // namespace class OpLevelCostEstimatorTest : public ::testing::Test { @@ -166,8 +200,24 @@ class OpLevelCostEstimatorTest : public ::testing::Test { OpLevelCostEstimator estimator_; }; +TEST_F(OpLevelCostEstimatorTest, BiasAddExecutionTime) { + auto cost = PredictCosts(DescribeBiasAdd(1000, 10)); + EXPECT_EQ(Costs::Duration(8400), cost.memory_time); + EXPECT_EQ(Costs::Duration(1000), cost.compute_time); + EXPECT_EQ(Costs::Duration(9400), cost.execution_time); + EXPECT_FALSE(cost.inaccurate); +} + +TEST_F(OpLevelCostEstimatorTest, Conv2DExecutionTime) { + auto cost = PredictCosts(DescribeConvolution(16, 19, 19, 48, 48, 5, 5, 256)); + EXPECT_EQ(Costs::Duration(233780), cost.memory_time); + EXPECT_EQ(Costs::Duration(354877440), cost.compute_time); + EXPECT_EQ(Costs::Duration(355111220), cost.execution_time); + EXPECT_FALSE(cost.inaccurate); +} + TEST_F(OpLevelCostEstimatorTest, DummyExecutionTime) { - auto cost = PredictCosts(DescribeOp(""Dummy"", 1000, 1)); + auto cost = PredictCosts(DescribeBinaryOp(""Dummy"", 1000, 1)); EXPECT_EQ(Costs::Duration(2000), cost.memory_time); EXPECT_EQ(Costs::Duration(0), cost.compute_time); EXPECT_EQ(Costs::Duration(2000), cost.execution_time); @@ -176,7 +226,7 @@ TEST_F(OpLevelCostEstimatorTest, DummyExecutionTime) { TEST_F(OpLevelCostEstimatorTest, ExecutionTimeSumOrMax) { SetComputeMemoryOverlap(true); - auto cost = PredictCosts(DescribeOp(""Dummy"", 1000, 1)); + auto cost = PredictCosts(DescribeBinaryOp(""Dummy"", 1000, 1)); EXPECT_EQ(Costs::Duration(2000), cost.memory_time); EXPECT_EQ(Costs::Duration(0), cost.compute_time); EXPECT_EQ(Costs::Duration(2000), cost.execution_time); // max(2000, 200) @@ -185,7 +235,7 @@ TEST_F(OpLevelCostEstimatorTest, ExecutionTimeSumOrMax) { } TEST_F(OpLevelCostEstimatorTest, MulExecutionTime) { - auto cost = PredictCosts(DescribeOp(""Mul"", 1000, 1)); + auto cost = PredictCosts(DescribeBinaryOp(""Mul"", 1000, 1)); EXPECT_EQ(Costs::Duration(2000), cost.memory_time); EXPECT_EQ(Costs::Duration(200), cost.compute_time); EXPECT_EQ(Costs::Duration(2200), cost.execution_time); @@ -193,7 +243,7 @@ TEST_F(OpLevelCostEstimatorTest, MulExecutionTime) { } TEST_F(OpLevelCostEstimatorTest, MulBroadcastExecutionTime) { - auto cost = PredictCosts(DescribeOp(""Mul"", 1000, 2)); + auto cost = PredictCosts(DescribeBinaryOp(""Mul"", 1000, 2)); EXPECT_EQ(Costs::Duration(3600), cost.memory_time); EXPECT_EQ(Costs::Duration(400), cost.compute_time); EXPECT_EQ(Costs::Duration(4000), cost.execution_time); @@ -201,13 +251,21 @@ TEST_F(OpLevelCostEstimatorTest, MulBroadcastExecutionTime) { } TEST_F(OpLevelCostEstimatorTest, ModExecutionTime) { - auto cost = PredictCosts(DescribeOp(""Mod"", 1000, 1)); + auto cost = PredictCosts(DescribeBinaryOp(""Mod"", 1000, 1)); EXPECT_EQ(Costs::Duration(2000), cost.memory_time); EXPECT_EQ(Costs::Duration(1600), cost.compute_time); EXPECT_EQ(Costs::Duration(3600), cost.execution_time); EXPECT_FALSE(cost.inaccurate); } +TEST_F(OpLevelCostEstimatorTest, ReluExecutionTime) { + auto cost = PredictCosts(DescribeUnaryOp(""Relu"", 1000)); + EXPECT_EQ(Costs::Duration(800), cost.memory_time); + EXPECT_EQ(Costs::Duration(100), cost.compute_time); + EXPECT_EQ(Costs::Duration(900), cost.execution_time); + EXPECT_FALSE(cost.inaccurate); +} + TEST_F(OpLevelCostEstimatorTest, UnknownOrPartialShape) { EXPECT_FALSE(PredictCosts(DescribeMatMul(2, 4, 7, 7)).inaccurate); EXPECT_TRUE(PredictCosts(DescribeMatMul(-1, 4, 7, 7)).inaccurate); ",0,train 6a20edf95fcaf45c46385eaf649e814a571737ed,tensorflow/tensorflow,"backward compatibility: Disallow changes to an OpDef attribute's default value. PiperOrigin-RevId: 180611380",op_compatibility_test.cc,"@@ -163,6 +163,18 @@ class OpCompatibilityTest : public OpsTestBase { ExpectIncompatible(old_op_def, *new_op_def, compatibility_error); } + + void ExpectDefaultChangeFailure(const OpDef& old_op_def, + const string& compatibility_error) { + // This should be all that is needed to get compatibility. + const OpDef* new_op_def = RegisteredOpDef(); + AddDefaultsToNodeDef(*new_op_def, node_def()); + + // Validate that the NodeDef is valid. + TF_ASSERT_OK(ValidateNodeDef(*node_def(), *new_op_def)); + + ExpectIncompatible(old_op_def, *new_op_def, compatibility_error); + } }; // Should be compatible if the Op hasn't changed (sanity check). @@ -260,40 +272,6 @@ TEST_F(OpCompatibilityTest, AttrOrder) { EXPECT_EQ(""attr_order = AttrOrder[a=7, b=true]()"", Result()); } -// Should be able to add a default to an attr. -REGISTER_OP(""AddDefault"").Output(""ndef: string"").Attr(""a: int = 1234""); -REGISTER_KERNEL_BUILDER(Name(""AddDefault"").Device(DEVICE_CPU), TestKernel); - -TEST_F(OpCompatibilityTest, AddDefault) { - OpRegistrationData old_op; - TF_ASSERT_OK(OpDefBuilder(""AddDefault"") - .Output(""ndef: string"") - .Attr(""a: int"") - .Finalize(&old_op)); - TF_ASSERT_OK(NodeDefBuilder(""add_default"", &old_op.op_def) - .Attr(""a"", 765) - .Finalize(node_def())); - ExpectSuccess(old_op.op_def); - EXPECT_EQ(""add_default = AddDefault[a=765]()"", Result()); -} - -// Should be able to remove a default from an attr, *as long as that -// attr has always existed*. -REGISTER_OP(""RemoveDefault"").Output(""ndef: string"").Attr(""a: int""); -REGISTER_KERNEL_BUILDER(Name(""RemoveDefault"").Device(DEVICE_CPU), TestKernel); - -TEST_F(OpCompatibilityTest, RemoveDefault) { - OpRegistrationData old_op; - TF_ASSERT_OK(OpDefBuilder(""RemoveDefault"") - .Output(""ndef: string"") - .Attr(""a: int = 91"") - .Finalize(&old_op)); - TF_ASSERT_OK( - NodeDefBuilder(""remove_default"", &old_op.op_def).Finalize(node_def())); - ExpectSuccess(old_op.op_def); - EXPECT_EQ(""remove_default = RemoveDefault[a=91]()"", Result()); -} - // Should be able to make an input/output polymorphic. // Changing from int32 -> T (where T: type = DT_INT32 by default). REGISTER_OP(""TypePolymorphic"") @@ -1054,9 +1032,56 @@ TEST_F(OpCompatibilityTest, RenameOutputListFails) { ""Output signature mismatch 'old:T' vs. 'new:T'""); } -// Changing an attr's default is not technically illegal, but should -// be forbidden if it the attr ever didn't exist since it likely -// affects semantics. +// Should not be able to add a default to an attr. +REGISTER_OP(""AddDefault"").Output(""ndef: string"").Attr(""a: int = 1234""); +REGISTER_KERNEL_BUILDER(Name(""AddDefault"").Device(DEVICE_CPU), TestKernel); + +TEST_F(OpCompatibilityTest, AddDefault) { + OpRegistrationData old_op; + TF_ASSERT_OK(OpDefBuilder(""AddDefault"") + .Output(""ndef: string"") + .Attr(""a: int"") + .Finalize(&old_op)); + TF_ASSERT_OK(NodeDefBuilder(""add_default"", &old_op.op_def) + .Attr(""a"", 765) + .Finalize(node_def())); + ExpectDefaultChangeFailure( + old_op.op_def, + ""Attr 'a' has added/removed it's default; from no default to 1234""); +} + +// Should not be able to remove a default from an attr. +REGISTER_OP(""RemoveDefault"").Output(""ndef: string"").Attr(""a: int""); +REGISTER_KERNEL_BUILDER(Name(""RemoveDefault"").Device(DEVICE_CPU), TestKernel); + +TEST_F(OpCompatibilityTest, RemoveDefault) { + OpRegistrationData old_op; + TF_ASSERT_OK(OpDefBuilder(""RemoveDefault"") + .Output(""ndef: string"") + .Attr(""a: int = 91"") + .Finalize(&old_op)); + TF_ASSERT_OK( + NodeDefBuilder(""remove_default"", &old_op.op_def).Finalize(node_def())); + ExpectDefaultChangeFailure( + old_op.op_def, + ""Attr 'a' has added/removed it's default; from 91 to no default""); +} + +// Should not be able to change a default for an attr. +REGISTER_OP(""ChangeDefault"").Output(""ndef: string"").Attr(""a: int = 1""); +REGISTER_KERNEL_BUILDER(Name(""ChangeDefault"").Device(DEVICE_CPU), TestKernel); + +TEST_F(OpCompatibilityTest, ChangeDefault) { + OpRegistrationData old_op; + TF_ASSERT_OK(OpDefBuilder(""ChangeDefault"") + .Output(""ndef: string"") + .Attr(""a: int = 2"") + .Finalize(&old_op)); + TF_ASSERT_OK( + NodeDefBuilder(""change_default"", &old_op.op_def).Finalize(node_def())); + ExpectDefaultChangeFailure( + old_op.op_def, ""Attr 'a' has changed it's default value; from 2 to 1""); +} } // namespace } // namespace tensorflow ",0,test 6a20edf95fcaf45c46385eaf649e814a571737ed,tensorflow/tensorflow,"backward compatibility: Disallow changes to an OpDef attribute's default value. PiperOrigin-RevId: 180611380",op_def_util.cc,"@@ -449,6 +449,11 @@ string AllowedStr(const OpDef::AttrDef& attr) { return SummarizeAttrValue(attr.allowed_values()); } +string DefaultAttrStr(const OpDef::AttrDef& attr) { + if (!attr.has_default_value()) return ""no default""; + return SummarizeAttrValue(attr.default_value()); +} + bool HigherMinimum(const OpDef::AttrDef& old_attr, const OpDef::AttrDef& new_attr) { // Anything -> no restriction : not more restrictive. @@ -610,6 +615,16 @@ Status OpDefCompatible(const OpDef& old_op, const OpDef& new_op) { VALIDATE(!HigherMinimum(old_attr, *new_attr), ""Attr '"", old_attr.name(), ""' has a higher minimum; from "", MinStr(old_attr), "" to "", MinStr(*new_attr)); + VALIDATE(old_attr.has_default_value() == new_attr->has_default_value(), + ""Attr '"", old_attr.name(), ""' has added/removed it's default; "", + ""from "", DefaultAttrStr(old_attr), "" to "", + DefaultAttrStr(*new_attr)); + VALIDATE(!old_attr.has_default_value() || + AreAttrValuesEqual(old_attr.default_value(), + new_attr->default_value()), + ""Attr '"", old_attr.name(), ""' has changed it's default value; "", + ""from "", DefaultAttrStr(old_attr), "" to "", + DefaultAttrStr(*new_attr)); } for (const auto& new_attr : new_op.attr()) { ",0,test 3ac00edd723ea54d42b015273913f8f616b4cbea,tensorflow/tensorflow,"[lite] Update TAC and add options for inlining and legalizing to TFLite PiperOrigin-RevId: 397205785 Change-Id: Ie0940174b4e995b3e2eb27571d4369b9d6870a2d",tac_module.cc,"@@ -29,8 +29,8 @@ namespace TFL { namespace tac { namespace { // TODO(b/177376459): We should make this configureable. -void AddExportTFLPass(mlir::OpPassManager* pass_manager) { - pass_manager->addPass(mlir::createInlinerPass()); +void AddExportTFLPass(mlir::OpPassManager* pass_manager, bool enable_inliner) { + if (enable_inliner) pass_manager->addPass(mlir::createInlinerPass()); pass_manager->addPass(mlir::createSymbolDCEPass()); pass_manager->addNestedPass(mlir::createCanonicalizerPass()); pass_manager->addNestedPass(mlir::createCSEPass()); @@ -46,19 +46,21 @@ void TacModule::AddTACPass(mlir::OpPassManager* pass_manager, /*fold_all_constants=*/false)); pass_manager->addPass( mlir::TFL::tac::CreateAlternativeSubgraphPass(device_specs)); - // After we creat the alternative subgraph, we can still do canonicalization - // legalization & other optimizations as long as we're not inlining the - // function. - // And in fact, we probably need to do the proper legalization, for the - // compute cost to work. (in case we added some TF ops) - pass_manager->addPass(mlir::TFL::CreatePrepareTFPass( - /*unfold_batch_matmul=*/true, - /*allow_bf16_and_f16_type_legalization=*/false)); - pass_manager->addNestedPass(mlir::createCanonicalizerPass()); - pass_manager->addPass( - mlir::TFL::CreateLegalizeTFPass(/*run_tfl_runtime_verification=*/true)); - pass_manager->addPass( - mlir::TFL::CreateOptimizePass(/*enable_canonicalization=*/true)); + if (options_.legalize_to_tflite_ops) { + // After we creat the alternative subgraph, we can still do canonicalization + // legalization & other optimizations as long as we're not inlining the + // function. + // And in fact, we probably need to do the proper legalization, for the + // compute cost to work. (in case we added some TF ops) + pass_manager->addPass(mlir::TFL::CreatePrepareTFPass( + /*unfold_batch_matmul=*/true, + /*allow_bf16_and_f16_type_legalization=*/false)); + pass_manager->addNestedPass(mlir::createCanonicalizerPass()); + pass_manager->addPass( + mlir::TFL::CreateLegalizeTFPass(/*run_tfl_runtime_verification=*/true)); + pass_manager->addPass( + mlir::TFL::CreateOptimizePass(/*enable_canonicalization=*/true)); + } pass_manager->addPass(mlir::TFL::tac::CreateComputeCostPass()); pass_manager->addPass(mlir::TFL::tac::CreatePickSubgraphsPass()); @@ -79,7 +81,7 @@ absl::Status TacModule::RunTacPasses(mlir::ModuleOp* module, bool debug_mode) { mlir::OpPassManager::Nesting::Implicit); AddTACPass(&pm, options_.hardware_backends); if (!debug_mode) { - AddExportTFLPass(&pm); + AddExportTFLPass(&pm, options_.enable_inliner); } mlir::StatusScopedDiagnosticHandler statusHandler(module->getContext(), ",0,train 3ac00edd723ea54d42b015273913f8f616b4cbea,tensorflow/tensorflow,"[lite] Update TAC and add options for inlining and legalizing to TFLite PiperOrigin-RevId: 397205785 Change-Id: Ie0940174b4e995b3e2eb27571d4369b9d6870a2d",tac_module.h,"@@ -48,6 +48,10 @@ class TacModule { // This will output different alternative subgraphs in mlir format for debug // purpose. bool debug_mode = false; + // Whether to enable inliner passes or not. + bool enable_inliner = false; + // Whether to legalize ops to TFLite ops before exporting. + bool legalize_to_tflite_ops = false; }; virtual ~TacModule() {} ",0,train 3ac00edd723ea54d42b015273913f8f616b4cbea,tensorflow/tensorflow,"[lite] Update TAC and add options for inlining and legalizing to TFLite PiperOrigin-RevId: 397205785 Change-Id: Ie0940174b4e995b3e2eb27571d4369b9d6870a2d",tac_translate.cc,"@@ -126,6 +126,8 @@ absl::Status TargetAwareConversionMain() { if (!output_mlir || inline_subgraphs) { options.debug_mode = false; } + options.enable_inliner = true; + options.legalize_to_tflite_ops = true; mlir::TFL::tac::TacModule tac_module(options); mlir::DialectRegistry registry; mlir::RegisterAllTensorFlowDialects(registry); ",0,train 1f10b08b375f6d9c4800dc4183ef836b3d729605,tensorflow/tensorflow,"Allow using DNN to only train the embeddings and using the tree model for the final prediction. PiperOrigin-RevId: 197462585",dnn_tree_combined_estimator.py,"@@ -45,6 +45,7 @@ from tensorflow.python.training import training_util _DNN_LEARNING_RATE = 0.001 + def _get_optimizer(optimizer): if callable(optimizer): return optimizer() @@ -73,6 +74,7 @@ def _dnn_tree_combined_model_fn(features, dnn_input_layer_partitioner=None, dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, + predict_with_tree_only=False, tree_feature_columns=None, tree_center_bias=False, use_core_versions=False): @@ -108,6 +110,8 @@ def _dnn_tree_combined_model_fn(features, as a feature to the tree. dnn_steps_to_train: Number of steps to train dnn for before switching to gbdt. + predict_with_tree_only: Whether to use only the tree model output as the + final prediction. tree_feature_columns: An iterable containing all the feature columns used by the model's boosted trees. If dnn_input_layer_to_tree is set to True, these features are in addition to dnn_feature_columns. @@ -132,8 +136,7 @@ def _dnn_tree_combined_model_fn(features, dnn_parent_scope = ""dnn"" dnn_partitioner = dnn_input_layer_partitioner or ( partitioned_variables.min_max_variable_partitioner( - max_partitions=config.num_ps_replicas, - min_slice_size=64 << 20)) + max_partitions=config.num_ps_replicas, min_slice_size=64 << 20)) with variable_scope.variable_scope( dnn_parent_scope, @@ -171,8 +174,7 @@ def _dnn_tree_combined_model_fn(features, _add_hidden_layer_summary(net, hidden_layer_scope.name) previous_layer = net with variable_scope.variable_scope( - ""logits"", - values=(previous_layer,)) as logits_scope: + ""logits"", values=(previous_layer,)) as logits_scope: dnn_logits = layers.fully_connected( previous_layer, head.logits_dimension, @@ -190,8 +192,7 @@ def _dnn_tree_combined_model_fn(features, optimizer=_get_optimizer(dnn_optimizer), name=dnn_parent_scope, variables=ops.get_collection( - ops.GraphKeys.TRAINABLE_VARIABLES, - scope=dnn_parent_scope), + ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope), # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) @@ -230,7 +231,10 @@ def _dnn_tree_combined_model_fn(features, update_op = state_ops.assign_add(global_step, 1).op return update_op - tree_train_logits = dnn_logits + tree_logits + if predict_with_tree_only: + tree_train_logits = tree_logits + else: + tree_train_logits = dnn_logits + tree_logits def _no_train_op_fn(loss): """"""Returns a no-op."""""" @@ -288,10 +292,10 @@ def _dnn_tree_combined_model_fn(features, finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor() model_fn_ops.training_hooks.extend([ - trainer_hooks.SwitchTrainOp( - dnn_train_op, dnn_steps_to_train, tree_train_op), - trainer_hooks.StopAfterNTrees( - num_trees, attempted_trees, finalized_trees)]) + trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train, + tree_train_op), + trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees) + ]) return model_fn_ops @@ -318,6 +322,7 @@ class DNNBoostedTreeCombinedClassifier(estimator.Estimator): dnn_input_layer_partitioner=None, dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, + predict_with_tree_only=False, tree_feature_columns=None, tree_center_bias=False, use_core_versions=False): @@ -360,6 +365,8 @@ class DNNBoostedTreeCombinedClassifier(estimator.Estimator): as a feature to the tree. dnn_steps_to_train: Number of steps to train dnn for before switching to gbdt. + predict_with_tree_only: Whether to use only the tree model output as the + final prediction. tree_feature_columns: An iterable containing all the feature columns used by the model's boosted trees. If dnn_input_layer_to_tree is set to True, these features are in addition to dnn_feature_columns. @@ -377,16 +384,32 @@ class DNNBoostedTreeCombinedClassifier(estimator.Estimator): def _model_fn(features, labels, mode, config): return _dnn_tree_combined_model_fn( - features, labels, mode, head, dnn_hidden_units, dnn_feature_columns, - tree_learner_config, num_trees, tree_examples_per_layer, config, - dnn_optimizer, dnn_activation_fn, dnn_dropout, - dnn_input_layer_partitioner, dnn_input_layer_to_tree, - dnn_steps_to_train, tree_feature_columns, tree_center_bias, - use_core_versions) + features=features, + labels=labels, + mode=mode, + head=head, + dnn_hidden_units=dnn_hidden_units, + dnn_feature_columns=dnn_feature_columns, + tree_learner_config=tree_learner_config, + num_trees=num_trees, + tree_examples_per_layer=tree_examples_per_layer, + config=config, + dnn_optimizer=dnn_optimizer, + dnn_activation_fn=dnn_activation_fn, + dnn_dropout=dnn_dropout, + dnn_input_layer_partitioner=dnn_input_layer_partitioner, + dnn_input_layer_to_tree=dnn_input_layer_to_tree, + dnn_steps_to_train=dnn_steps_to_train, + predict_with_tree_only=predict_with_tree_only, + tree_feature_columns=tree_feature_columns, + tree_center_bias=tree_center_bias, + use_core_versions=use_core_versions) super(DNNBoostedTreeCombinedClassifier, self).__init__( - model_fn=_model_fn, model_dir=model_dir, - config=config, feature_engineering_fn=feature_engineering_fn) + model_fn=_model_fn, + model_dir=model_dir, + config=config, + feature_engineering_fn=feature_engineering_fn) class DNNBoostedTreeCombinedRegressor(estimator.Estimator): @@ -410,6 +433,7 @@ class DNNBoostedTreeCombinedRegressor(estimator.Estimator): dnn_input_layer_partitioner=None, dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, + predict_with_tree_only=False, tree_feature_columns=None, tree_center_bias=False, use_core_versions=False): @@ -452,6 +476,8 @@ class DNNBoostedTreeCombinedRegressor(estimator.Estimator): as a feature to the tree. dnn_steps_to_train: Number of steps to train dnn for before switching to gbdt. + predict_with_tree_only: Whether to use only the tree model output as the + final prediction. tree_feature_columns: An iterable containing all the feature columns used by the model's boosted trees. If dnn_input_layer_to_tree is set to True, these features are in addition to dnn_feature_columns. @@ -474,16 +500,32 @@ class DNNBoostedTreeCombinedRegressor(estimator.Estimator): def _model_fn(features, labels, mode, config): return _dnn_tree_combined_model_fn( - features, labels, mode, head, dnn_hidden_units, dnn_feature_columns, - tree_learner_config, num_trees, tree_examples_per_layer, config, - dnn_optimizer, dnn_activation_fn, dnn_dropout, - dnn_input_layer_partitioner, dnn_input_layer_to_tree, - dnn_steps_to_train, tree_feature_columns, tree_center_bias, - use_core_versions) + features=features, + labels=labels, + mode=mode, + head=head, + dnn_hidden_units=dnn_hidden_units, + dnn_feature_columns=dnn_feature_columns, + tree_learner_config=tree_learner_config, + num_trees=num_trees, + tree_examples_per_layer=tree_examples_per_layer, + config=config, + dnn_optimizer=dnn_optimizer, + dnn_activation_fn=dnn_activation_fn, + dnn_dropout=dnn_dropout, + dnn_input_layer_partitioner=dnn_input_layer_partitioner, + dnn_input_layer_to_tree=dnn_input_layer_to_tree, + dnn_steps_to_train=dnn_steps_to_train, + predict_with_tree_only=predict_with_tree_only, + tree_feature_columns=tree_feature_columns, + tree_center_bias=tree_center_bias, + use_core_versions=use_core_versions) super(DNNBoostedTreeCombinedRegressor, self).__init__( - model_fn=_model_fn, model_dir=model_dir, - config=config, feature_engineering_fn=feature_engineering_fn) + model_fn=_model_fn, + model_dir=model_dir, + config=config, + feature_engineering_fn=feature_engineering_fn) class DNNBoostedTreeCombinedEstimator(estimator.Estimator): @@ -508,6 +550,7 @@ class DNNBoostedTreeCombinedEstimator(estimator.Estimator): dnn_input_layer_partitioner=None, dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, + predict_with_tree_only=False, tree_feature_columns=None, tree_center_bias=False, use_core_versions=False): @@ -545,6 +588,8 @@ class DNNBoostedTreeCombinedEstimator(estimator.Estimator): as a feature to the tree. dnn_steps_to_train: Number of steps to train dnn for before switching to gbdt. + predict_with_tree_only: Whether to use only the tree model output as the + final prediction. tree_feature_columns: An iterable containing all the feature columns used by the model's boosted trees. If dnn_input_layer_to_tree is set to True, these features are in addition to dnn_feature_columns. @@ -553,15 +598,32 @@ class DNNBoostedTreeCombinedEstimator(estimator.Estimator): use_core_versions: Whether feature columns and loss are from the core (as opposed to contrib) version of tensorflow. """""" + def _model_fn(features, labels, mode, config): return _dnn_tree_combined_model_fn( - features, labels, mode, head, dnn_hidden_units, dnn_feature_columns, - tree_learner_config, num_trees, tree_examples_per_layer, config, - dnn_optimizer, dnn_activation_fn, dnn_dropout, - dnn_input_layer_partitioner, dnn_input_layer_to_tree, - dnn_steps_to_train, tree_feature_columns, tree_center_bias, - use_core_versions) + features=features, + labels=labels, + mode=mode, + head=head, + dnn_hidden_units=dnn_hidden_units, + dnn_feature_columns=dnn_feature_columns, + tree_learner_config=tree_learner_config, + num_trees=num_trees, + tree_examples_per_layer=tree_examples_per_layer, + config=config, + dnn_optimizer=dnn_optimizer, + dnn_activation_fn=dnn_activation_fn, + dnn_dropout=dnn_dropout, + dnn_input_layer_partitioner=dnn_input_layer_partitioner, + dnn_input_layer_to_tree=dnn_input_layer_to_tree, + dnn_steps_to_train=dnn_steps_to_train, + predict_with_tree_only=predict_with_tree_only, + tree_feature_columns=tree_feature_columns, + tree_center_bias=tree_center_bias, + use_core_versions=use_core_versions) super(DNNBoostedTreeCombinedEstimator, self).__init__( - model_fn=_model_fn, model_dir=model_dir, - config=config, feature_engineering_fn=feature_engineering_fn) + model_fn=_model_fn, + model_dir=model_dir, + config=config, + feature_engineering_fn=feature_engineering_fn) ",0,train 8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a discardable array. If it's not discardable, it means that the user wants this array to keep existing in a way that is observable to them, i.e. not as weights. Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable). It seems that so far we have been relying on accidental ordering of graph transformations for such state arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array. So I don't have a test for this, but this seems to be tightening existing behavior, and should be good to have as long as it does not regress anything. PiperOrigin-RevId: 215500760",resolve_constant_binary.cc,"@@ -191,6 +191,14 @@ void EvaluateBinaryOperatorOnConstantInputs(Model* model, bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) { const auto binary_it = model->operators.begin() + op_index; const auto* binary_op = binary_it->get(); + + // If the output of this op is a non-discardable array such as an input_array + // or a state array of the model, then this is a job for RemoveUnusedOp, not + // for constants-propagation. + if (!IsDiscardableArray(*model, binary_op->outputs[0])) { + return false; + } + // Test for binary ops of types that we know how to resolve if (binary_op->type != OperatorType::kAdd && binary_op->type != OperatorType::kMul && ",0,train 8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a discardable array. If it's not discardable, it means that the user wants this array to keep existing in a way that is observable to them, i.e. not as weights. Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable). It seems that so far we have been relying on accidental ordering of graph transformations for such state arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array. So I don't have a test for this, but this seems to be tightening existing behavior, and should be good to have as long as it does not regress anything. PiperOrigin-RevId: 215500760",resolve_constant_concatenation.cc,"@@ -144,6 +144,13 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) { const auto* concat_op = static_cast(concat_base_op); + // If the output of this op is a non-discardable array such as an input_array + // or a state array of the model, then this is a job for RemoveUnusedOp, not + // for constants-propagation. + if (!IsDiscardableArray(*model, concat_op->outputs[0])) { + return false; + } + for (const string& input_name : concat_op->inputs) { // We only expect constant unquantized arrays as input, otherwise we return. // We also make sure the shapes of the input arrays are known and they are ",0,train 8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a discardable array. If it's not discardable, it means that the user wants this array to keep existing in a way that is observable to them, i.e. not as weights. Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable). It seems that so far we have been relying on accidental ordering of graph transformations for such state arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array. So I don't have a test for this, but this seems to be tightening existing behavior, and should be good to have as long as it does not regress anything. PiperOrigin-RevId: 215500760",resolve_constant_fake_quant.cc,"@@ -69,6 +69,13 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) { const auto* fakequant_op = static_cast(fakequant_base_op); + // If the output of this op is a non-discardable array such as an input_array + // or a state array of the model, then this is a job for RemoveUnusedOp, not + // for constants-propagation. + if (!IsDiscardableArray(*model, fakequant_op->outputs[0])) { + return false; + } + // Yield until the fakequant MinMax has been resolved. if (!fakequant_op->minmax) { return false; ",0,train 8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a discardable array. If it's not discardable, it means that the user wants this array to keep existing in a way that is observable to them, i.e. not as weights. Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable). It seems that so far we have been relying on accidental ordering of graph transformations for such state arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array. So I don't have a test for this, but this seems to be tightening existing behavior, and should be good to have as long as it does not regress anything. PiperOrigin-RevId: 215500760",resolve_constant_fill.cc,"@@ -52,6 +52,13 @@ bool ResolveConstantFill::Run(Model* model, std::size_t op_index) { CHECK_EQ(op->inputs.size(), 2); CHECK_EQ(op->outputs.size(), 1); + // If the output of this op is a non-discardable array such as an input_array + // or a state array of the model, then this is a job for RemoveUnusedOp, not + // for constants-propagation. + if (!IsDiscardableArray(*model, op->outputs[0])) { + return false; + } + auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes ",0,train 8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a discardable array. If it's not discardable, it means that the user wants this array to keep existing in a way that is observable to them, i.e. not as weights. Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable). It seems that so far we have been relying on accidental ordering of graph transformations for such state arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array. So I don't have a test for this, but this seems to be tightening existing behavior, and should be good to have as long as it does not regress anything. PiperOrigin-RevId: 215500760",resolve_constant_gather.cc,"@@ -71,6 +71,14 @@ bool ResolveConstantGather::Run(Model* model, std::size_t op_index) { CHECK_GE(op->inputs.size(), 2); CHECK_EQ(op->outputs.size(), 1); + + // If the output of this op is a non-discardable array such as an input_array + // or a state array of the model, then this is a job for RemoveUnusedOp, not + // for constants-propagation. + if (!IsDiscardableArray(*model, op->outputs[0])) { + return false; + } + auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes. ",0,train 8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a discardable array. If it's not discardable, it means that the user wants this array to keep existing in a way that is observable to them, i.e. not as weights. Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable). It seems that so far we have been relying on accidental ordering of graph transformations for such state arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array. So I don't have a test for this, but this seems to be tightening existing behavior, and should be good to have as long as it does not regress anything. PiperOrigin-RevId: 215500760",resolve_constant_pack.cc,"@@ -59,6 +59,14 @@ bool ResolveConstantPack::Run(Model* model, std::size_t op_index) { CHECK_GE(op->inputs.size(), 1); CHECK_EQ(op->outputs.size(), 1); + + // If the output of this op is a non-discardable array such as an input_array + // or a state array of the model, then this is a job for RemoveUnusedOp, not + // for constants-propagation. + if (!IsDiscardableArray(*model, op->outputs[0])) { + return false; + } + auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes ",0,train 8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a discardable array. If it's not discardable, it means that the user wants this array to keep existing in a way that is observable to them, i.e. not as weights. Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable). It seems that so far we have been relying on accidental ordering of graph transformations for such state arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array. So I don't have a test for this, but this seems to be tightening existing behavior, and should be good to have as long as it does not regress anything. PiperOrigin-RevId: 215500760",resolve_constant_random_uniform.cc,"@@ -70,6 +70,13 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) { CHECK_EQ(op->inputs.size(), 1); CHECK_EQ(op->outputs.size(), 1); + // If the output of this op is a non-discardable array such as an input_array + // or a state array of the model, then this is a job for RemoveUnusedOp, not + // for constants-propagation. + if (!IsDiscardableArray(*model, op->outputs[0])) { + return false; + } + auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes ",0,train 8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a discardable array. If it's not discardable, it means that the user wants this array to keep existing in a way that is observable to them, i.e. not as weights. Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable). It seems that so far we have been relying on accidental ordering of graph transformations for such state arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array. So I don't have a test for this, but this seems to be tightening existing behavior, and should be good to have as long as it does not regress anything. PiperOrigin-RevId: 215500760",resolve_constant_range.cc,"@@ -28,6 +28,14 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) { auto* op = static_cast(base_op); CHECK_EQ(op->inputs.size(), 3); + + // If the output of this op is a non-discardable array such as an input_array + // or a state array of the model, then this is a job for RemoveUnusedOp, not + // for constants-propagation. + if (!IsDiscardableArray(*model, op->outputs[0])) { + return false; + } + const auto& start_array = model->GetArray(op->inputs[0]); if (!start_array.has_shape()) { // Yield until all input dims have been resolved. ",0,train 8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a discardable array. If it's not discardable, it means that the user wants this array to keep existing in a way that is observable to them, i.e. not as weights. Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable). It seems that so far we have been relying on accidental ordering of graph transformations for such state arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array. So I don't have a test for this, but this seems to be tightening existing behavior, and should be good to have as long as it does not regress anything. PiperOrigin-RevId: 215500760",resolve_constant_reshape.cc,"@@ -33,6 +33,13 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) { CHECK_EQ(op->inputs.size(), 2); CHECK_EQ(op->outputs.size(), 1); + // If the output of this op is a non-discardable array such as an input_array + // or a state array of the model, then this is a job for RemoveUnusedOp, not + // for constants-propagation. + if (!IsDiscardableArray(*model, op->outputs[0])) { + return false; + } + // We require constant inputs. if (!IsConstantParameterArray(*model, op->inputs[0]) || !IsConstantParameterArray(*model, op->inputs[1])) { ",0,train 8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a discardable array. If it's not discardable, it means that the user wants this array to keep existing in a way that is observable to them, i.e. not as weights. Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable). It seems that so far we have been relying on accidental ordering of graph transformations for such state arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array. So I don't have a test for this, but this seems to be tightening existing behavior, and should be good to have as long as it does not regress anything. PiperOrigin-RevId: 215500760",resolve_constant_select.cc,"@@ -37,6 +37,14 @@ bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) { CHECK_GE(op->inputs.size(), 3); CHECK_EQ(op->outputs.size(), 1); + + // If the output of this op is a non-discardable array such as an input_array + // or a state array of the model, then this is a job for RemoveUnusedOp, not + // for constants-propagation. + if (!IsDiscardableArray(*model, op->outputs[0])) { + return false; + } + auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes. ",0,train 8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a discardable array. If it's not discardable, it means that the user wants this array to keep existing in a way that is observable to them, i.e. not as weights. Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable). It seems that so far we have been relying on accidental ordering of graph transformations for such state arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array. So I don't have a test for this, but this seems to be tightening existing behavior, and should be good to have as long as it does not regress anything. PiperOrigin-RevId: 215500760",resolve_constant_shape_or_rank.cc,"@@ -27,6 +27,14 @@ bool ResolveConstantShapeOrRank::Run(Model* model, std::size_t op_index) { } CHECK_EQ(op->outputs.size(), 1); + + // If the output of this op is a non-discardable array such as an input_array + // or a state array of the model, then this is a job for RemoveUnusedOp, not + // for constants-propagation. + if (!IsDiscardableArray(*model, op->outputs[0])) { + return false; + } + auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been resolved ",0,train 8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a discardable array. If it's not discardable, it means that the user wants this array to keep existing in a way that is observable to them, i.e. not as weights. Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable). It seems that so far we have been relying on accidental ordering of graph transformations for such state arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array. So I don't have a test for this, but this seems to be tightening existing behavior, and should be good to have as long as it does not regress anything. PiperOrigin-RevId: 215500760",resolve_constant_slice.cc,"@@ -96,6 +96,14 @@ bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) { const SliceOperator* op = static_cast(base_op); CHECK_EQ(op->outputs.size(), 1); + + // If the output of this op is a non-discardable array such as an input_array + // or a state array of the model, then this is a job for RemoveUnusedOp, not + // for constants-propagation. + if (!IsDiscardableArray(*model, op->outputs[0])) { + return false; + } + auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes. ",0,train 8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a discardable array. If it's not discardable, it means that the user wants this array to keep existing in a way that is observable to them, i.e. not as weights. Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable). It seems that so far we have been relying on accidental ordering of graph transformations for such state arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array. So I don't have a test for this, but this seems to be tightening existing behavior, and should be good to have as long as it does not regress anything. PiperOrigin-RevId: 215500760",resolve_constant_strided_slice.cc,"@@ -114,6 +114,14 @@ bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) { static_cast(base_op); CHECK_EQ(op->outputs.size(), 1); + + // If the output of this op is a non-discardable array such as an input_array + // or a state array of the model, then this is a job for RemoveUnusedOp, not + // for constants-propagation. + if (!IsDiscardableArray(*model, op->outputs[0])) { + return false; + } + auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes ",0,train 8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a discardable array. If it's not discardable, it means that the user wants this array to keep existing in a way that is observable to them, i.e. not as weights. Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable). It seems that so far we have been relying on accidental ordering of graph transformations for such state arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array. So I don't have a test for this, but this seems to be tightening existing behavior, and should be good to have as long as it does not regress anything. PiperOrigin-RevId: 215500760",resolve_constant_tile.cc,"@@ -105,6 +105,13 @@ bool ResolveConstantTile::Run(Model* model, std::size_t op_index) { } const auto* op = static_cast(base_op); + // If the output of this op is a non-discardable array such as an input_array + // or a state array of the model, then this is a job for RemoveUnusedOp, not + // for constants-propagation. + if (!IsDiscardableArray(*model, op->outputs[0])) { + return false; + } + CHECK_GE(op->inputs.size(), 2); CHECK_EQ(op->outputs.size(), 1); auto& output_array = model->GetArray(op->outputs[0]); ",0,train 8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a discardable array. If it's not discardable, it means that the user wants this array to keep existing in a way that is observable to them, i.e. not as weights. Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable). It seems that so far we have been relying on accidental ordering of graph transformations for such state arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array. So I don't have a test for this, but this seems to be tightening existing behavior, and should be good to have as long as it does not regress anything. PiperOrigin-RevId: 215500760",resolve_constant_transpose.cc,"@@ -111,6 +111,14 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) { CHECK_EQ(op->inputs.size(), 2); CHECK_EQ(op->outputs.size(), 1); + + // If the output of this op is a non-discardable array such as an input_array + // or a state array of the model, then this is a job for RemoveUnusedOp, not + // for constants-propagation. + if (!IsDiscardableArray(*model, op->outputs[0])) { + return false; + } + auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes. ",0,train 8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a discardable array. If it's not discardable, it means that the user wants this array to keep existing in a way that is observable to them, i.e. not as weights. Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable). It seems that so far we have been relying on accidental ordering of graph transformations for such state arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array. So I don't have a test for this, but this seems to be tightening existing behavior, and should be good to have as long as it does not regress anything. PiperOrigin-RevId: 215500760",resolve_constant_unary.cc,"@@ -48,6 +48,14 @@ bool CopyMinMaxFromFirstInput(const Operator& op, Model* model) { bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { const auto unary_it = model->operators.begin() + op_index; const auto* unary_op = unary_it->get(); + + // If the output of this op is a non-discardable array such as an input_array + // or a state array of the model, then this is a job for RemoveUnusedOp, not + // for constants-propagation. + if (!IsDiscardableArray(*model, unary_op->outputs[0])) { + return false; + } + // Test for unary ops of types that we know how to resolve. switch (unary_op->type) { case OperatorType::kCast: ",0,train 60f965adb6c0393fe6d2ce4b990af6ffa58c0852,tensorflow/tensorflow,"s/tf.contrib.eager.GradientTape/tf.GradientTape/ PiperOrigin-RevId: 201372249",gradients_impl.py,"@@ -548,9 +548,8 @@ def _GradientsHelper(ys, src_graph=None): """"""Implementation of gradients()."""""" if context.executing_eagerly(): - raise RuntimeError(""tf.gradients not supported when eager execution "" - ""is enabled. Use tf.contrib.eager.GradientTape "" - ""instead."") + raise RuntimeError(""tf.gradients is not supported when eager execution "" + ""is enabled. Use tf.GradientTape instead."") if src_graph is None: src_graph = ops.get_default_graph() ",0,train 832f57b14e5dfbad9946d039cd20a32a0314d9bb,tensorflow/tensorflow,"Fix tpu_strategy_tests on Cloud TPU. PiperOrigin-RevId: 302738954 Change-Id: Ib3164d271186fe976d6154e1a0ae02bf0002f2fc",tpu_strategy_test.py,"@@ -310,10 +310,11 @@ class TPUStrategyTest(test.TestCase): bar(1) - # TODO(b/152251070): Re-enable once modified to work on Cloud TPU. - def disable_test_using_external_variable_inside_tf_function(self): + def test_using_external_variable_inside_tf_function(self): strategy = get_tpu_strategy() - dataset = dataset_ops.Dataset.range(10, output_type=dtypes.float32).batch(2) + dataset = dataset_ops.Dataset.range( + strategy.num_replicas_in_sync * 2, + output_type=dtypes.float32).batch(strategy.num_replicas_in_sync) input_iterator = iter(strategy.experimental_distribute_dataset(dataset)) v = variables.Variable(2.0) @@ -330,12 +331,12 @@ class TPUStrategyTest(test.TestCase): expected_result, strategy.experimental_local_results(train_step(next(input_iterator)))) - # TODO(b/152251070): Re-enable once modified to work on Cloud TPU. - def disable_test_keras_metric_outside_strategy_scope_per_replica(self): + def test_keras_metric_outside_strategy_scope_per_replica(self): strategy = get_tpu_strategy() metric = keras.metrics.Mean(""test_metric"", dtype=dtypes.float32) - dataset = dataset_ops.Dataset.range(10).batch(2) + dataset = dataset_ops.Dataset.range(strategy.num_replicas_in_sync * + 2).batch(2) dataset = strategy.experimental_distribute_dataset(dataset) @def_function.function ",0,test 71bbebbf4d04c1bcb6ed44e2156087c9fec06e9e,tensorflow/tensorflow,Moved final StatusGroup method calls,status_group_fuzz.cc,"@@ -55,9 +55,7 @@ extern ""C"" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { } sg.as_summary_status(); - sg.as_concatenated_status(); - sg.AttachLogMessages(); return 0; ",0,test 074b66af3415cb3c60336b0a94f23aec04a715e3,tensorflow/tensorflow,"Change `dim` to `axis` for cosine_distance (#12801) * Change `dim` to `axis` for cosine_distance This fix changes `dim` to `axis` for cosine_distance so that the args are consistent with other methods in TensorFlow. The backward-compatibility has been maintained in the fix. This fix fixes 8205. Signed-off-by: Yong Tang * Change `dim` to `axis` for tf.losses.cosine_distance so that args are consistent with other TensorFlow methods. Signed-off-by: Yong Tang * Update API goldens and address review feedback This commit updates API goldens so that `//tensorflow/tools/api/tests:api_compatibility_test` could pass. Review feedback has also been addressed. Signed-off-by: Yong Tang ",loss_ops.py,"@@ -28,6 +28,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import nn_ops from tensorflow.python.util.deprecation import deprecated +from tensorflow.python.util.deprecation import deprecated_args __all__ = [""absolute_difference"", ""add_loss"", @@ -623,8 +624,9 @@ def mean_pairwise_squared_error( @deprecated(""2016-12-30"", ""Use tf.losses.cosine_distance instead."") +@deprecated_args(None, ""dim is deprecated, use axis instead"", ""dim"") def cosine_distance( - predictions, labels=None, dim=None, weights=1.0, scope=None): + predictions, labels=None, axis=None, weights=1.0, scope=None, dim=None): """"""Adds a cosine-distance loss to the training procedure. Note that the function assumes that `predictions` and `labels` are already @@ -633,10 +635,11 @@ def cosine_distance( Args: predictions: An arbitrary matrix. labels: A `Tensor` whose shape matches 'predictions' - dim: The dimension along which the cosine distance is computed. + axis: The dimension along which the cosine distance is computed. weights: Coefficients for the loss a scalar, a tensor of shape [batch_size] or a tensor whose shape matches `predictions`. scope: The scope for the operations performed in computing the loss. + dim: The old (deprecated) name for `axis`. Returns: A scalar `Tensor` representing the loss value. @@ -645,8 +648,12 @@ def cosine_distance( ValueError: If `predictions` shape doesn't match `labels` shape, or `weights` is `None`. """""" - if dim is None: - raise ValueError(""`dim` cannot be None."") + if dim is not None: + if axis is not None: + raise ValueError(""Cannot specify both 'axis' and 'dim'"") + axis = dim + if axis is None and dim is None: + raise ValueError(""You must specify 'axis'."") with ops.name_scope(scope, ""cosine_distance_loss"", [predictions, labels, weights]) as scope: predictions.get_shape().assert_is_compatible_with(labels.get_shape()) @@ -655,5 +662,5 @@ def cosine_distance( labels = math_ops.to_float(labels) radial_diffs = math_ops.multiply(predictions, labels) - losses = 1 - math_ops.reduce_sum(radial_diffs, reduction_indices=[dim,]) + losses = 1 - math_ops.reduce_sum(radial_diffs, reduction_indices=[axis,]) return compute_weighted_loss(losses, weights, scope=scope) ",0,train 074b66af3415cb3c60336b0a94f23aec04a715e3,tensorflow/tensorflow,"Change `dim` to `axis` for cosine_distance (#12801) * Change `dim` to `axis` for cosine_distance This fix changes `dim` to `axis` for cosine_distance so that the args are consistent with other methods in TensorFlow. The backward-compatibility has been maintained in the fix. This fix fixes 8205. Signed-off-by: Yong Tang * Change `dim` to `axis` for tf.losses.cosine_distance so that args are consistent with other TensorFlow methods. Signed-off-by: Yong Tang * Update API goldens and address review feedback This commit updates API goldens so that `//tensorflow/tools/api/tests:api_compatibility_test` could pass. Review feedback has also been addressed. Signed-off-by: Yong Tang ",losses_impl.py,"@@ -27,6 +27,7 @@ from tensorflow.python.ops import nn from tensorflow.python.ops import nn_ops from tensorflow.python.ops import weights_broadcast_ops from tensorflow.python.ops.losses import util +from tensorflow.python.util.deprecation import deprecated_args class Reduction(object): @@ -230,10 +231,12 @@ def absolute_difference( losses, weights, scope, loss_collection, reduction=reduction) +@deprecated_args(None, ""dim is deprecated, use axis instead"", ""dim"") def cosine_distance( - labels, predictions, dim=None, weights=1.0, scope=None, + labels, predictions, axis=None, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, - reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): + reduction=Reduction.SUM_BY_NONZERO_WEIGHTS, + dim=None): """"""Adds a cosine-distance loss to the training procedure. Note that the function assumes that `predictions` and `labels` are already @@ -242,13 +245,14 @@ def cosine_distance( Args: labels: `Tensor` whose shape matches 'predictions' predictions: An arbitrary matrix. - dim: The dimension along which the cosine distance is computed. + axis: The dimension along which the cosine distance is computed. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `losses` dimension). scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. reduction: Type of reduction to apply to loss. + dim: The old (deprecated) name for `axis`. Returns: Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same @@ -256,10 +260,14 @@ def cosine_distance( Raises: ValueError: If `predictions` shape doesn't match `labels` shape, or - `dim`, `labels`, `predictions` or `weights` is `None`. + `axis`, `labels`, `predictions` or `weights` is `None`. """""" - if dim is None: - raise ValueError(""`dim` cannot be None."") + if dim is not None: + if axis is not None: + raise ValueError(""Cannot specify both 'axis' and 'dim'"") + axis = dim + if axis is None and dim is None: + raise ValueError(""You must specify 'axis'."") if labels is None: raise ValueError(""labels must not be None."") if predictions is None: @@ -271,7 +279,7 @@ def cosine_distance( predictions.get_shape().assert_is_compatible_with(labels.get_shape()) radial_diffs = math_ops.multiply(predictions, labels) - losses = 1 - math_ops.reduce_sum(radial_diffs, axis=(dim,), keep_dims=True) + losses = 1 - math_ops.reduce_sum(radial_diffs, axis=(axis,), keep_dims=True) return compute_weighted_loss( losses, weights, scope, loss_collection, reduction=reduction) ",0,train 221a5146df1afcc72d0d2490af487868031ba037,tensorflow/tensorflow,"Enable tuple type sharding when using a single element InfeedDequeueTuple. This will enable spatial partitioning for single input Infeeds. PiperOrigin-RevId: 243594818",xla_sharding.py,"@@ -120,9 +120,14 @@ class Sharding(object): tile_assignment_dimensions=tile_assignment_dims, tile_assignment_devices=range(num_devices))) - def apply_to_tensor(self, tensor): - """"""Applies this Sharding attribute to `tensor`."""""" - if len(tensor.op.outputs) > 1: + def apply_to_tensor(self, tensor, assign_tuple_sharding=False): + """"""Applies this Sharding attribute to `tensor`. + + Args: + tensor: A tf.Tensor to split. + assign_tuple_sharding: If the sharding type should be a tuple. + """""" + if len(tensor.op.outputs) > 1 or assign_tuple_sharding: proto = self._get_or_create_tuple_proto(tensor.op) # We can't mutate an element of old_proto.tuple_shardings, so create # a new proto. @@ -166,21 +171,30 @@ class Sharding(object): # tensor = xla_sharding.replicate(tensor) -def replicate(tensor): - Sharding.replicate().apply_to_tensor(tensor) +def replicate(tensor, assign_tuple_sharding=False): + Sharding.replicate().apply_to_tensor( + tensor, + assign_tuple_sharding=assign_tuple_sharding) return tensor -def assign_device(tensor, device): - Sharding.assign_device(device).apply_to_tensor(tensor) +def assign_device(tensor, device, assign_tuple_sharding=False): + Sharding.assign_device(device).apply_to_tensor( + tensor, + assign_tuple_sharding=assign_tuple_sharding) return tensor -def tile(tensor, tile_assignment): - Sharding.tile(tile_assignment).apply_to_tensor(tensor) +def tile(tensor, tile_assignment, assign_tuple_sharding=False): + Sharding.tile(tile_assignment).apply_to_tensor( + tensor, + assign_tuple_sharding=assign_tuple_sharding + ) return tensor -def split(tensor, split_dimension, num_devices): - Sharding.split(tensor, split_dimension, num_devices).apply_to_tensor(tensor) +def split(tensor, split_dimension, num_devices, assign_tuple_sharding=False): + Sharding.split(tensor, split_dimension, num_devices).apply_to_tensor( + tensor, + assign_tuple_sharding=assign_tuple_sharding) return tensor ",0,train 221a5146df1afcc72d0d2490af487868031ba037,tensorflow/tensorflow,"Enable tuple type sharding when using a single element InfeedDequeueTuple. This will enable spatial partitioning for single input Infeeds. PiperOrigin-RevId: 243594818",tpu_feed.py,"@@ -86,6 +86,8 @@ def partition_or_replicate_on_host(tensor, dims): def _tag_sharding_attribute_for_dequeued_tensor(tensor, dims): """"""Tags appropriate XLA sharding attribute to the dequeued tensor. + The sharding attribute of the dequeued tensor will be a tuple. + Args: tensor: The dequeued tensor on TPU. dims: A list of integer describes how the tensor is partitioned. @@ -94,12 +96,15 @@ def _tag_sharding_attribute_for_dequeued_tensor(tensor, dims): The same tensor with the xla_sharding attribute. """""" if dims is None: - return xla_sharding.replicate(tensor) + return xla_sharding.replicate(tensor, assign_tuple_sharding=True) elif np.prod(dims) == 1: - return xla_sharding.assign_device(tensor, 0) + return xla_sharding.assign_device(tensor, 0, assign_tuple_sharding=True) else: tile_assignment = np.arange(np.prod(dims)).reshape(dims) - return xla_sharding.tile(tensor=tensor, tile_assignment=tile_assignment) + return xla_sharding.tile( + tensor=tensor, + tile_assignment=tile_assignment, + assign_tuple_sharding=True) def tag_sharding_attribute_for_dequeued_tensors(dequeues, dims): ",0,train f3b389ca9369e81c60ffa0615f1f87b19c19df85,tensorflow/tensorflow,"Automated rollback of commit b9a6fea1f0a501b226394431d0377eef0b40c4b0 PiperOrigin-RevId: 257837218",meta_optimizer.cc,"@@ -32,8 +32,8 @@ limitations under the License. #include ""tensorflow/core/grappler/optimizers/debug_stripper.h"" #include ""tensorflow/core/grappler/optimizers/dependency_optimizer.h"" #include ""tensorflow/core/grappler/optimizers/function_optimizer.h"" -#include ""tensorflow/core/grappler/optimizers/generic_layout_optimizer.h"" #include ""tensorflow/core/grappler/optimizers/implementation_selector.h"" +#include ""tensorflow/core/grappler/optimizers/layout_optimizer.h"" #include ""tensorflow/core/grappler/optimizers/loop_optimizer.h"" #include ""tensorflow/core/grappler/optimizers/memory_optimizer.h"" #include ""tensorflow/core/grappler/optimizers/model_pruner.h"" @@ -121,7 +121,7 @@ std::unique_ptr MetaOptimizer::MakeNewOptimizer( MK_OPT(""constfold"", new ConstantFolding(cpu_device_)); MK_OPT(""shape"", new ShapeOptimizer()); MK_OPT(""remap"", new Remapper(cfg_.remapping())); - MK_OPT(""layout"", new GenericLayoutOptimizer()); + MK_OPT(""layout"", new LayoutOptimizer()); MK_OPT(""auto_mixed_precision"", new AutoMixedPrecision(cfg_.auto_mixed_precision())); MK_OPT(""memory"", new MemoryOptimizer(RewriterConfig::MANUAL)); @@ -193,7 +193,7 @@ Status MetaOptimizer::InitializeOptimizers( MakeUnique(cfg_.dependency_optimization())); } if (cfg_.layout_optimizer() != RewriterConfig::OFF) { - optimizers->push_back(MakeUnique()); + optimizers->push_back(MakeUnique()); } if (AutoMixedPrecisionEnabled(cfg_.auto_mixed_precision())) { optimizers->push_back( @@ -267,7 +267,7 @@ Status MetaOptimizer::InitializeCustomGraphOptimizers( TF_RETURN_IF_ERROR(custom_optimizer->Init(&optimizer_config)); optimizers->push_back(std::move(custom_optimizer)); } else { - // If there are no custom optimizers with given name, try to initialize a + // If there are no custom optimizers with given name, try to initalize a // default optimizer. This way, custom configurable optimizers can be // mixed with default optimizers in any order. auto optimizer = MakeNewOptimizer(optimizer_config.name()); ",0,train f96dcc1584ada70a1e58513dab4af82fe54cb3fa,tensorflow/tensorflow,"Make tf.group() a tf.no_op() Change: 118211924",control_flow_ops_py_test.py,"@@ -1468,6 +1468,11 @@ class ControlFlowTest(tf.test.TestCase): self.assertAllClose([0.0], v1_val) self.assertAllClose([1.0], v2_val) + def testGroupEmpty(self): + op = tf.group() + self.assertEqual(op.type, ""NoOp"") + self.assertEqual(op.control_inputs, []) + def testMergeShapes(self): # All inputs unknown. p1 = tf.placeholder(tf.float32) ",0,test f96dcc1584ada70a1e58513dab4af82fe54cb3fa,tensorflow/tensorflow,"Make tf.group() a tf.no_op() Change: 118211924",control_flow_ops.py,"@@ -1685,7 +1685,7 @@ def group(*inputs, **kwargs): See also `tuple` and `with_dependencies`. Args: - *inputs: One or more tensors to group. + *inputs: Zero or more tensors to group. **kwargs: Optional parameters to pass when constructing the NodeDef. name: A name for this operation (optional). @@ -1693,16 +1693,16 @@ def group(*inputs, **kwargs): An Operation that executes all its inputs. Raises: - ValueError: If an unknown keyword argument is provided, or if there are - no inputs. + ValueError: If an unknown keyword argument is provided. """""" name = kwargs.pop(""name"", None) if kwargs: raise ValueError(""Unknown keyword arguments: "" + "", "".join(kwargs.keys())) - if not inputs: - # TODO(touts): Would make sense to return a NoOp. - raise ValueError(""No inputs provided"") with ops.op_scope(inputs, name, ""group_deps"") as name: + # Grouping no inputs means do nothing + if not inputs: + return no_op(name=name) + # Sorts *inputs according to their devices. ops_on_device = {} # device -> operations specified on the device. for inp in inputs: ",0,test 97249979d9a76ae05d590f9cbe199c0b47712b4f,tensorflow/tensorflow,"bug fix: evaluate nodes before swap the original graph PiperOrigin-RevId: 190291844",constant_folding_test.cc,"@@ -1922,6 +1922,8 @@ TEST_F(ConstantFoldingTest, PartialFolding_Concat) { item.fetch = {""concat0"", ""concat1"", ""concat2"", ""concat3"", ""concat4"", ""concat5"", ""concat6"", ""concat7"", ""concat8"", ""concat9""}; + auto tensors_expected = EvaluateNodes(item.graph, {""concat0""}); + EXPECT_EQ(1, tensors_expected.size()); ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); @@ -1971,9 +1973,7 @@ TEST_F(ConstantFoldingTest, PartialFolding_Concat) { } } - auto tensors_expected = EvaluateNodes(item.graph, {""concat0""}); auto tensors = EvaluateNodes(output, {""concat0""}); - EXPECT_EQ(1, tensors_expected.size()); EXPECT_EQ(1, tensors.size()); test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); } ",0,train ea02fb88d2abe11b1a7779abb0a7d50e07f9d7b8,tensorflow/tensorflow,"Unify num_accelerators for all Cluster Resolvers PiperOrigin-RevId: 224843723",cluster_resolver.py,"@@ -22,6 +22,8 @@ import abc import six +from tensorflow.python.client import session +from tensorflow.python.framework import ops from tensorflow.python.training.server_lib import ClusterSpec @@ -32,6 +34,14 @@ def format_master_url(master, rpc_layer=None): return master +def get_accelerator_devices(master, config_proto): + # TODO(frankchn): Add support for eager mode as well as graph mode. + with ops.Graph().as_default(): + with session.Session(master, config=config_proto) as s: + devices = s.list_devices() + return devices + + @six.add_metaclass(abc.ABCMeta) class ClusterResolver(object): """"""Abstract class for all implementations of ClusterResolvers. @@ -91,7 +101,6 @@ class ClusterResolver(object): """""" raise NotImplementedError() - @abc.abstractmethod def num_accelerators(self, task_type=None, task_index=None, @@ -119,7 +128,9 @@ class ClusterResolver(object): config_proto: (Optional) Configuration for starting a new session to query how many accelerator cores it has. """""" - raise NotImplementedError() + master = self.master(task_type, task_index) + devices = get_accelerator_devices(master, config_proto) + return sum(1 for d in devices if d.device_type == accelerator_type) @abc.abstractproperty def environment(self): ",0,train ea02fb88d2abe11b1a7779abb0a7d50e07f9d7b8,tensorflow/tensorflow,"Unify num_accelerators for all Cluster Resolvers PiperOrigin-RevId: 224843723",cluster_resolver_test.py,"@@ -18,11 +18,64 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.client import session +from tensorflow.python.distribute.cluster_resolver import ClusterResolver from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver from tensorflow.python.distribute.cluster_resolver import UnionClusterResolver from tensorflow.python.platform import test from tensorflow.python.training import server_lib +mock = test.mock + + +class MockBaseClusterResolver(ClusterResolver): + + def cluster_spec(self): + return None + + def master(self, task_type=None, task_index=None, rpc_layer=None): + return """" + + def environment(self): + return """" + + +class BaseClusterResolverTest(test.TestCase): + + @mock.patch.object(session.BaseSession, ""list_devices"") + def testNumAcceleratorsSuccess(self, mock_list_devices): + device_names = [ + ""/job:worker/task:0/device:GPU:0"", + ""/job:worker/task:0/device:GPU:1"", + ""/job:worker/task:0/device:GPU:2"", + ""/job:worker/task:0/device:GPU:3"", + ] + device_list = [ + session._DeviceAttributes( + name, ""GPU"", 1024, 0) for name in device_names + ] + mock_list_devices.return_value = device_list + + resolver = MockBaseClusterResolver() + self.assertEqual(resolver.num_accelerators(), 4) + + @mock.patch.object(session.BaseSession, ""list_devices"") + def testNumAcceleratorsFilterSuccess(self, mock_list_devices): + device_names = [ + ""/job:worker/task:0/device:TPU:0"", + ""/job:worker/task:0/device:TPU:1"", + ""/job:worker/task:0/device:TPU:2"", + ""/job:worker/task:0/device:TPU:3"", + ] + device_list = [ + session._DeviceAttributes( + name, ""TPU"", 1024, 0) for name in device_names + ] + mock_list_devices.return_value = device_list + + resolver = MockBaseClusterResolver() + self.assertEqual(resolver.num_accelerators(), 0) + class UnionClusterResolverTest(test.TestCase): # TODO(frankchn): Transform to parameterized test after it is included in the ",0,train ea02fb88d2abe11b1a7779abb0a7d50e07f9d7b8,tensorflow/tensorflow,"Unify num_accelerators for all Cluster Resolvers PiperOrigin-RevId: 224843723",gce_cluster_resolver.py,"@@ -51,7 +51,6 @@ class GceClusterResolver(ClusterResolver): task_type='worker', task_index=0, rpc_layer='grpc', - num_accelerators=0, credentials='default', service=None): """"""Creates a new GceClusterResolver object. @@ -73,8 +72,6 @@ class GceClusterResolver(ClusterResolver): can be distinguished from each other. rpc_layer: The RPC layer TensorFlow should use to communicate across instances. - num_accelerators: Number of accelerators (GPUs) present per - instance. credentials: GCE Credentials. If nothing is specified, this defaults to GoogleCredentials.get_application_default(). service: The GCE API object returned by the googleapiclient.discovery @@ -90,7 +87,6 @@ class GceClusterResolver(ClusterResolver): self._task_type = task_type self._task_index = task_index self._rpc_layer = rpc_layer - self._num_accelerators = num_accelerators self._port = port self._credentials = credentials @@ -201,12 +197,3 @@ class GceClusterResolver(ClusterResolver): @rpc_layer.setter def rpc_layer(self, rpc_layer): self._rpc_layer = rpc_layer - - def num_accelerators(self, - task_type=None, - task_index=None, - accelerator_type='GPU', - config_proto=None): - # Unused - del task_type, task_index, accelerator_type, config_proto - return self._num_accelerators ",0,train ea02fb88d2abe11b1a7779abb0a7d50e07f9d7b8,tensorflow/tensorflow,"Unify num_accelerators for all Cluster Resolvers PiperOrigin-RevId: 224843723",kubernetes_cluster_resolver.py,"@@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.client import device_lib from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver from tensorflow.python.distribute.cluster_resolver.cluster_resolver import format_master_url from tensorflow.python.training import server_lib @@ -167,16 +166,3 @@ class KubernetesClusterResolver(ClusterResolver): on internal systems. """""" return '' - - def num_accelerators(self, - task_type=None, - task_index=None, - accelerator_type='GPU', - config_proto=None): - # TODO(frankchn): Make querying non-local accelerators work - if task_type is not None or task_index is not None: - raise NotImplementedError('Querying non-local accelerators is not yet' - 'implemented.') - - local_devices = device_lib.list_local_devices(config_proto) - return sum(d.device_type == accelerator_type for d in local_devices) ",0,train ea02fb88d2abe11b1a7779abb0a7d50e07f9d7b8,tensorflow/tensorflow,"Unify num_accelerators for all Cluster Resolvers PiperOrigin-RevId: 224843723",tfconfig_cluster_resolver.py,"@@ -54,8 +54,7 @@ class TFConfigClusterResolver(ClusterResolver): task_type=None, task_index=None, rpc_layer=None, - environment=None, - num_accelerators=0): + environment=None): """"""Creates a new TFConfigClusterResolver. Args: @@ -66,17 +65,11 @@ class TFConfigClusterResolver(ClusterResolver): rpc_layer: (String, optional) Overrides the rpc layer TensorFlow uses. environment: (String, optional) Overrides the environment TensorFlow operates in. - num_accelerators: (Integer, optional) Specifies the number of - accelerators (e.g. GPUs, TPUs, others) that each node has. """""" - # TODO(frankchn): num_accelerators is a stop-gap and will be removed - # in favor of autodetection of devices soon. - self._task_type = task_type self._task_index = task_index self._rpc_layer = rpc_layer self._environment = environment - self._num_accelerators = num_accelerators @property def task_type(self): @@ -117,16 +110,6 @@ class TFConfigClusterResolver(ClusterResolver): def rpc_layer(self, rpc_layer): self._rpc_layer = rpc_layer - def num_accelerators(self, - task_type=None, - task_index=None, - accelerator_type='GPU', - config_proto=None): - # TODO(frankchn): Connect to server (w/ session_config) in the future. - # Unused, we do not connect to another server here right now. - del task_type, task_index, accelerator_type, config_proto - return self._num_accelerators - def cluster_spec(self): """"""Returns a ClusterSpec based on the TF_CONFIG environment variable. ",0,train ea02fb88d2abe11b1a7779abb0a7d50e07f9d7b8,tensorflow/tensorflow,"Unify num_accelerators for all Cluster Resolvers PiperOrigin-RevId: 224843723",tfconfig_cluster_resolver_test.py,"@@ -168,13 +168,11 @@ class TFConfigClusterResolverTest(test.TestCase): } """""" - cluster_resolver = TFConfigClusterResolver(task_type='ps', task_index=0, - num_accelerators=8) + cluster_resolver = TFConfigClusterResolver(task_type='ps', task_index=0) self.assertEqual('grpc://ps0:2222', cluster_resolver.master()) self.assertEqual('ps', cluster_resolver.task_type) self.assertEqual(0, cluster_resolver.task_index) - self.assertEqual(8, cluster_resolver.num_accelerators()) cluster_resolver.task_type = 'worker' cluster_resolver.task_index = 1 ",0,train ea02fb88d2abe11b1a7779abb0a7d50e07f9d7b8,tensorflow/tensorflow,"Unify num_accelerators for all Cluster Resolvers PiperOrigin-RevId: 224843723",tpu_cluster_resolver.py,"@@ -25,11 +25,10 @@ import re from six.moves.urllib.request import Request from six.moves.urllib.request import urlopen -from tensorflow.python.client import session from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver from tensorflow.python.distribute.cluster_resolver.cluster_resolver import format_master_url +from tensorflow.python.distribute.cluster_resolver.cluster_resolver import get_accelerator_devices from tensorflow.python.framework import errors -from tensorflow.python.framework import ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import server_lib from tensorflow.python.util import compat @@ -451,17 +450,16 @@ class TPUClusterResolver(ClusterResolver): retrieve the system metadata. Raises: - RuntimeError: If this is used with a non-TPU accelerator_type. + RuntimeError: If we cannot talk to a TPU worker after retrying or if the + number of TPU devices per host is different. """""" retry_count = 1 # TODO(b/120564445): Replace with standard library for retries. while True: try: - with ops.Graph().as_default(): - with session.Session(self.master(), config=config_proto) as s: - devices = s.list_devices() - device_details = _get_device_dict_and_cores(devices) - break + device_details = _get_device_dict_and_cores( + get_accelerator_devices(self.master(), config_proto=config_proto)) + break except errors.DeadlineExceededError: error_message = ('Failed to connect to master. The TPU might not be ' 'ready (e.g. still scheduling) or the master ' ",0,train add7a1a911b430ed14f8b6a1609dd3796587d131,tensorflow/tensorflow,"Make tf.contrib.proto.* TF2-friendly. This included fixing a bug where shape inference caught an incorrect shape, but since eager mode doesn't run shape inference the core code caused a segfault. PiperOrigin-RevId: 237316781",decode_proto_op_test_base.py,"@@ -296,14 +296,13 @@ class DecodeProtoOpTestBase(test_base.ProtoOpTestBase, parameterized.TestCase): field_names = ['sizes'] field_types = [dtypes.int32] - with self.cached_session() as sess: - ctensor, vtensor = self._decode_module.decode_proto( - batch, - message_type=msg_type, - field_names=field_names, - output_types=field_types, - sanitize=sanitize) - with self.assertRaisesRegexp(errors.DataLossError, - 'Unable to parse binary protobuf' - '|Failed to consume entire buffer'): - _ = sess.run([ctensor] + vtensor) + with self.assertRaisesRegexp( + errors.DataLossError, 'Unable to parse binary protobuf' + '|Failed to consume entire buffer'): + self.evaluate( + self._decode_module.decode_proto( + batch, + message_type=msg_type, + field_names=field_names, + output_types=field_types, + sanitize=sanitize)) ",0,train add7a1a911b430ed14f8b6a1609dd3796587d131,tensorflow/tensorflow,"Make tf.contrib.proto.* TF2-friendly. This included fixing a bug where shape inference caught an incorrect shape, but since eager mode doesn't run shape inference the core code caused a segfault. PiperOrigin-RevId: 237316781",encode_proto_op_test_base.py,"@@ -30,7 +30,9 @@ from google.protobuf import text_format from tensorflow.contrib.proto.python.kernel_tests import proto_op_test_base as test_base from tensorflow.contrib.proto.python.kernel_tests import test_example_pb2 +from tensorflow.python.eager import context from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.ops import array_ops @@ -50,56 +52,86 @@ class EncodeProtoOpTestBase(test_base.ProtoOpTestBase, parameterized.TestCase): self._decode_module = decode_module self._encode_module = encode_module + def testBadSizesShape(self): + if context.executing_eagerly(): + expected_error = (errors.InvalidArgumentError, + r'Invalid shape for field double_value.') + else: + expected_error = (ValueError, + r'Shape must be at least rank 2 but is rank 0') + with self.assertRaisesRegexp(*expected_error): + self.evaluate( + self._encode_module.encode_proto( + sizes=1, + values=[np.double(1.0)], + message_type='tensorflow.contrib.proto.TestValue', + field_names=['double_value'])) + def testBadInputs(self): # Invalid field name - with self.cached_session(): - with self.assertRaisesOpError('Unknown field: non_existent_field'): - self._encode_module.encode_proto( - sizes=[[1]], - values=[np.array([[0.0]], dtype=np.int32)], - message_type='tensorflow.contrib.proto.TestValue', - field_names=['non_existent_field']).eval() + with self.assertRaisesOpError('Unknown field: non_existent_field'): + self.evaluate( + self._encode_module.encode_proto( + sizes=[[1]], + values=[np.array([[0.0]], dtype=np.int32)], + message_type='tensorflow.contrib.proto.TestValue', + field_names=['non_existent_field'])) # Incorrect types. - with self.cached_session(): - with self.assertRaisesOpError( - 'Incompatible type for field double_value.'): - self._encode_module.encode_proto( - sizes=[[1]], - values=[np.array([[0.0]], dtype=np.int32)], - message_type='tensorflow.contrib.proto.TestValue', - field_names=['double_value']).eval() + with self.assertRaisesOpError('Incompatible type for field double_value.'): + self.evaluate( + self._encode_module.encode_proto( + sizes=[[1]], + values=[np.array([[0.0]], dtype=np.int32)], + message_type='tensorflow.contrib.proto.TestValue', + field_names=['double_value'])) # Incorrect shapes of sizes. - with self.cached_session(): + for sizes_value in 1, np.array([[[0, 0]]]): with self.assertRaisesOpError( r'sizes should be batch_size \+ \[len\(field_names\)\]'): - sizes = array_ops.placeholder(dtypes.int32) - values = array_ops.placeholder(dtypes.float64) - self._encode_module.encode_proto( - sizes=sizes, - values=[values], - message_type='tensorflow.contrib.proto.TestValue', - field_names=['double_value']).eval(feed_dict={ - sizes: [[[0, 0]]], - values: [[0.0]] - }) + if context.executing_eagerly(): + self.evaluate( + self._encode_module.encode_proto( + sizes=sizes_value, + values=[np.array([[0.0]])], + message_type='tensorflow.contrib.proto.TestValue', + field_names=['double_value'])) + else: + with self.cached_session(): + sizes = array_ops.placeholder(dtypes.int32) + values = array_ops.placeholder(dtypes.float64) + self._encode_module.encode_proto( + sizes=sizes, + values=[values], + message_type='tensorflow.contrib.proto.TestValue', + field_names=['double_value']).eval(feed_dict={ + sizes: sizes_value, + values: [[0.0]] + }) # Inconsistent shapes of values. - with self.cached_session(): - with self.assertRaisesOpError( - 'Values must match up to the last dimension'): - sizes = array_ops.placeholder(dtypes.int32) - values1 = array_ops.placeholder(dtypes.float64) - values2 = array_ops.placeholder(dtypes.int32) - (self._encode_module.encode_proto( - sizes=[[1, 1]], - values=[values1, values2], - message_type='tensorflow.contrib.proto.TestValue', - field_names=['double_value', 'int32_value']).eval(feed_dict={ - values1: [[0.0]], - values2: [[0], [0]] - })) + with self.assertRaisesOpError('Values must match up to the last dimension'): + if context.executing_eagerly(): + self.evaluate( + self._encode_module.encode_proto( + sizes=[[1, 1]], + values=[np.array([[0.0]]), + np.array([[0], [0]])], + message_type='tensorflow.contrib.proto.TestValue', + field_names=['double_value', 'int32_value'])) + else: + with self.cached_session(): + values1 = array_ops.placeholder(dtypes.float64) + values2 = array_ops.placeholder(dtypes.int32) + (self._encode_module.encode_proto( + sizes=[[1, 1]], + values=[values1, values2], + message_type='tensorflow.contrib.proto.TestValue', + field_names=['double_value', 'int32_value']).eval(feed_dict={ + values1: [[0.0]], + values2: [[0], [0]] + })) def _testRoundtrip(self, in_bufs, message_type, fields): ",0,train add7a1a911b430ed14f8b6a1609dd3796587d131,tensorflow/tensorflow,"Make tf.contrib.proto.* TF2-friendly. This included fixing a bug where shape inference caught an incorrect shape, but since eager mode doesn't run shape inference the core code caused a segfault. PiperOrigin-RevId: 237316781",encode_proto_op.cc,"@@ -525,11 +525,16 @@ class EncodeProtoOp : public OpKernel { ctx, proto_utils::IsCompatibleType(field_descs_[i]->type(), v.dtype()), errors::InvalidArgument( - ""Incompatible type for field "" + field_names_[i] + - "". Saw dtype: "", - DataTypeString(v.dtype()), + ""Incompatible type for field "", field_names_[i], + "". Saw dtype: "", DataTypeString(v.dtype()), "" but field type is: "", field_descs_[i]->type_name())); + OP_REQUIRES( + ctx, TensorShapeUtils::IsMatrixOrHigher(v.shape()), + errors::InvalidArgument(""Invalid shape for field "", field_names_[i], + "". Saw shape "", v.shape().DebugString(), + "" but it should be at least a matrix."")); + // All value tensors must have the same shape prefix (i.e. batch size). TensorShape shape_prefix = v.shape(); shape_prefix.RemoveDim(shape_prefix.dims() - 1); ",0,train 016e44afb875b8316b0d7239ebab1f92882aaf82,tensorflow/tensorflow,Typo correction in resize_bicubic_op.cc,resize_bicubic_op.cc,"@@ -130,7 +130,7 @@ class CachedInterpolation { } // We use 2 hands and walk through, copying from one to another where // we already have values. - // Invarient, new_indicies_hand <= cached_values_hand + // Invariant, new_indicies_hand <= cached_values_hand const std::array new_x_indices{{x_0, x_1, x_2, x_3}}; int cached_values_hand = 0; int new_indicies_hand = 0; ",0,test 410ef4f3b097d3ff47d2bc342bb3ac5bc9aedf72,tensorflow/tensorflow,Fix typo,mnist.py,"@@ -153,7 +153,7 @@ def evaluation(logits, labels): """""" # For a classifier model, we can use the in_top_k Op. # It returns a bool tensor with shape [batch_size] that is true for - # the examples where the label's is was in the top k (here k=1) + # the examples where the label is in the top k (here k=1) # of all logits for that example. correct = tf.nn.in_top_k(logits, labels, 1) # Return the number of true entries. ",0,train 86c8647f110220835c7783f96bf563fcc369378b,tensorflow/tensorflow,"Proxy decorator_target.__get__ in TFDecorator Prior to this change TFDecorator.__get__ mimicked the behaviour of functions via partial(self.__call__, instance). This is no needed as calling __get__ on a function would have a ~similar effect; and in fact incorrect if target implements a custom __get__ method. PiperOrigin-RevId: 234957239",tf_decorator.py,"@@ -59,7 +59,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import functools as _functools import traceback as _traceback @@ -212,8 +211,8 @@ class TFDecorator(object): else: self.__doc__ = '' - def __get__(self, obj, objtype): - return _functools.partial(self.__call__, obj) + def __get__(self, instance, owner): + return self._decorated_target.__get__(instance, owner) def __call__(self, *args, **kwargs): return self._decorated_target(*args, **kwargs) ",0,train 86c8647f110220835c7783f96bf563fcc369378b,tensorflow/tensorflow,"Proxy decorator_target.__get__ in TFDecorator Prior to this change TFDecorator.__get__ mimicked the behaviour of functions via partial(self.__call__, instance). This is no needed as calling __get__ on a function would have a ~similar effect; and in fact incorrect if target implements a custom __get__ method. PiperOrigin-RevId: 234957239",tf_decorator_test.py,"@@ -170,6 +170,17 @@ class TfDecoratorTest(test.TestCase): self.assertEqual('Return parameters.', TestDecoratedClass().return_params.__doc__) + def testTarget__get__IsProxied(self): + class Descr(object): + + def __get__(self, instance, owner): + return self + + class Foo(object): + foo = tf_decorator.TFDecorator('Descr', Descr()) + + self.assertIsInstance(Foo.foo, Descr) + def test_wrapper(*args, **kwargs): return test_function(*args, **kwargs) ",0,train 39ef9c36e9ba11ddfc222eec57027d478f26b6f7,tensorflow/tensorflow,"In interactive_graphviz, more complete help command",interactive_graphviz.cc,"@@ -168,7 +168,9 @@ void DoHelpCommand() { is specified, the new computation contains nodes up to nodes above the root. help - Prints this usage information.)"" + Prints this usage information. + quit + Exit the application.)"" << std::endl; } ",0,train b4a71efb3b1ebf0184bfc18d6b423e8aae952010,tensorflow/tensorflow,"[MLIR][KernelGen] Add experimental JIT-compiled div kernels for i8, ui32, and ui64 on GPU PiperOrigin-RevId: 404234804 Change-Id: I8f391254bdcea06429bd88b42b54ad2b18501b54",gpu_binary_ops_test.cc,"@@ -347,6 +347,26 @@ GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES( test::DefaultInputNonZero(), baseline_div, test::OpsTestConfig().ExpectStrictlyEqual()) +// These kernels are JIT-compiled. +#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \ + defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED) +GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES( + Div, + /*test_name=*/Int8, int8_t, int8_t, test::DefaultInput(), + test::DefaultInputNonZero(), baseline_div, + test::OpsTestConfig().ExpectStrictlyEqual()) +GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES( + Div, + /*test_name=*/Uint32, uint32_t, uint32_t, test::DefaultInput(), + test::DefaultInputNonZero(), baseline_div, + test::OpsTestConfig().ExpectStrictlyEqual()) +GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES( + Div, + /*test_name=*/Uint64, uint64_t, uint64_t, test::DefaultInput(), + test::DefaultInputNonZero(), baseline_div, + test::OpsTestConfig().ExpectStrictlyEqual()) +#endif + // The following tests don't work with Eigen kernels if the Eigen kernels are // compiled with nvcc. #if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) ",0,train b4a71efb3b1ebf0184bfc18d6b423e8aae952010,tensorflow/tensorflow,"[MLIR][KernelGen] Add experimental JIT-compiled div kernels for i8, ui32, and ui64 on GPU PiperOrigin-RevId: 404234804 Change-Id: I8f391254bdcea06429bd88b42b54ad2b18501b54",gpu_op_div.cc,"@@ -28,6 +28,13 @@ GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Div, DT_INT64); GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Div, DT_COMPLEX64); GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Div, DT_COMPLEX128); +// These kernels are JIT-compiled. +#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED) +GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Div, DT_INT8); +GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Div, DT_UINT32); +GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Div, DT_UINT64); +#endif + REGISTER_ALIASED_GPU_KERNEL(RealDiv, Div, DT_HALF, DT_HALF); REGISTER_ALIASED_GPU_KERNEL(RealDiv, Div, DT_FLOAT, DT_FLOAT); REGISTER_ALIASED_GPU_KERNEL(RealDiv, Div, DT_DOUBLE, DT_DOUBLE); ",0,train 69f60d4c8cb5edb6fdc63b837b6db29562d28744,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2018-10-09 PiperOrigin-RevId: 216323343",compat.py,"@@ -26,7 +26,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 8) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 9) @tf_export(""compat.forward_compatible"") ",0,train e18c81a0c5e20c170c4de5d1c484034eafd2aa55,tensorflow/tensorflow,"Fix sign errors in tf.contrib.distributions.logistic.{log_cdf, log_survival_function} and add accompanying tests. Fixes #10131 PiperOrigin-RevId: 157259406",logistic_test.py,"@@ -71,6 +71,52 @@ class LogisticTest(test.TestCase): self.assertEqual(cdf.get_shape(), (6,)) self.assertAllClose(cdf.eval(), expected_cdf) + def testLogisticLogCDF(self): + with self.test_session(): + batch_size = 6 + np_loc = np.array([2.0] * batch_size, dtype=np.float32) + loc = constant_op.constant(np_loc) + scale = 1.5 + + dist = logistic.Logistic(loc, scale) + x = np.array([2.5, 2.5, 4.0, 0.1, 1.0, 2.0], dtype=np.float32) + logcdf = dist.log_cdf(x) + expected_logcdf = stats.logistic.logcdf(x, np_loc, scale) + + self.assertEqual(logcdf.get_shape(), (6,)) + self.assertAllClose(logcdf.eval(), expected_logcdf) + + def testLogisticSurvivalFunction(self): + with self.test_session(): + batch_size = 6 + np_loc = np.array([2.0] * batch_size, dtype=np.float32) + loc = constant_op.constant(np_loc) + scale = 1.5 + + dist = logistic.Logistic(loc, scale) + x = np.array([2.5, 2.5, 4.0, 0.1, 1.0, 2.0], dtype=np.float32) + survival_function = dist.survival_function(x) + expected_survival_function = stats.logistic.sf(x, np_loc, scale) + + self.assertEqual(survival_function.get_shape(), (6,)) + self.assertAllClose(survival_function.eval(), expected_survival_function) + + def testLogisticLogSurvivalFunction(self): + with self.test_session(): + batch_size = 6 + np_loc = np.array([2.0] * batch_size, dtype=np.float32) + loc = constant_op.constant(np_loc) + scale = 1.5 + + dist = logistic.Logistic(loc, scale) + x = np.array([2.5, 2.5, 4.0, 0.1, 1.0, 2.0], dtype=np.float32) + logsurvival_function = dist.log_survival_function(x) + expected_logsurvival_function = stats.logistic.logsf(x, np_loc, scale) + + self.assertEqual(logsurvival_function.get_shape(), (6,)) + self.assertAllClose(logsurvival_function.eval(), + expected_logsurvival_function) + def testLogisticMean(self): with self.test_session(): loc = [2.0, 1.5, 1.0] ",0,train e18c81a0c5e20c170c4de5d1c484034eafd2aa55,tensorflow/tensorflow,"Fix sign errors in tf.contrib.distributions.logistic.{log_cdf, log_survival_function} and add accompanying tests. Fixes #10131 PiperOrigin-RevId: 157259406",logistic.py,"@@ -190,13 +190,13 @@ class Logistic(distribution.Distribution): return math_ops.exp(self._log_prob(x)) def _log_cdf(self, x): - return nn_ops.softplus(-self._z(x)) + return -nn_ops.softplus(-self._z(x)) def _cdf(self, x): return math_ops.sigmoid(self._z(x)) def _log_survival_function(self, x): - return nn_ops.softplus(self._z(x)) + return -nn_ops.softplus(self._z(x)) def _survival_function(self, x): return math_ops.sigmoid(-self._z(x)) ",0,train e3930fc11f042416a34ed5526bc506e1e0e32660,tensorflow/tensorflow,"Add user_ops.my_fact to the new TensorFlow API. PiperOrigin-RevId: 189415577",user_ops.py,"@@ -23,8 +23,10 @@ from tensorflow.python.ops import gen_user_ops as _gen_user_ops # go/tf-wildcard-import from tensorflow.python.ops.gen_user_ops import * # pylint: disable=wildcard-import +from tensorflow.python.util.tf_export import tf_export +@tf_export('user_ops.my_fact') def my_fact(): """"""Example of overriding the generated code for an Op."""""" return _gen_user_ops.fact() ",0,train e3930fc11f042416a34ed5526bc506e1e0e32660,tensorflow/tensorflow,"Add user_ops.my_fact to the new TensorFlow API. PiperOrigin-RevId: 189415577",api_compatibility_test.py,"@@ -268,17 +268,6 @@ class ApiCompatibilityTest(test.TestCase): for filename in golden_file_list } - # user_ops is an empty module. It is currently available in TensorFlow API - # but we don't keep empty modules in the new API. - # We delete user_ops from golden_proto_dict to make sure assert passes - # when diffing new API against goldens. - # TODO(annarev): remove user_ops from goldens once we switch to new API. - tf_module = golden_proto_dict['tensorflow'].tf_module - for i in range(len(tf_module.member)): - if tf_module.member[i].name == 'user_ops': - del tf_module.member[i] - break - # Diff them. Do not fail if called with update. # If the test is run to update goldens, only report diffs but do not fail. self._AssertProtoDictEquals( ",0,train 7fdcb7d20e3ced8e415cf18ad3cdd519ff34403c,tensorflow/tensorflow,"Convert `DCHECK` for L2 metric to an `OP_REQUIRES`. PiperOrigin-RevId: 411121699 Change-Id: Ie0e981aabb7e0db5deb93db55269a34519fead0f",stats_ops.cc,"@@ -326,7 +326,8 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel { OP_REQUIRES(context, l2_t->NumElements() == 1, errors::InvalidArgument(""l2 argument must be a scalar"")); const auto l2 = l2_t->scalar()(); - DCHECK_GE(l2, 0); + OP_REQUIRES(context, l2 >= 0, + errors::InvalidArgument(""l2 = "", l2, "" but it should be >= 0"")); const Tensor* tree_complexity_t; OP_REQUIRES_OK(context, ",0,test 5890405631a44f53a2d4d5c0ce5b625e1ae340cb,tensorflow/tensorflow,"Move IsZeroVector and BatchQuantizeFloats calls to top. Update Calibration and FP16 versions to be consistent with the Float version. PiperOrigin-RevId: 319050730 Change-Id: I1f026189f0b71570f230e794a690dca5be30d597",lstm_eval.cc,"@@ -511,6 +511,12 @@ inline void LstmStepFloat( float* cell_gate_scratch = scratch2; float* output_gate_scratch = scratch3; + const bool is_input_all_zeros = + tensor_utils::IsZeroVector(input_ptr, n_batch * n_input); + const bool is_aux_input_all_zeros = + (aux_input_ptr == nullptr || + tensor_utils::IsZeroVector(aux_input_ptr, n_batch * n_aux_input)); + // Initialize scratch buffers with bias for regular lstm or initialize with // zero for layer norm lstm. if (use_layer_norm) { @@ -535,7 +541,7 @@ inline void LstmStepFloat( // For each batch and cell: compute input_weight * input. // Skip if input is all zeros. - if (!tensor_utils::IsZeroVector(input_ptr, n_batch * n_input)) { + if (!is_input_all_zeros) { if (!use_cifg) { tensor_utils::MatrixBatchVectorMultiplyAccumulate( input_to_input_weights_ptr, n_cell, n_input, input_ptr, n_batch, @@ -555,8 +561,7 @@ inline void LstmStepFloat( // For each batch and cell: compute aux_input_weight * aux_input. // Skip if auxiliary input is not available or all zeros. - if (aux_input_ptr != nullptr && - !tensor_utils::IsZeroVector(aux_input_ptr, n_batch * n_aux_input)) { + if (!is_aux_input_all_zeros) { if (!use_cifg) { tensor_utils::MatrixBatchVectorMultiplyAccumulate( aux_input_to_input_weights_ptr, n_cell, n_aux_input, aux_input_ptr, @@ -807,28 +812,6 @@ inline void LstmStepHybrid( float* cell_gate_scratch = scratch2; float* output_gate_scratch = scratch3; - // Initialize scratch buffers with bias for regular lstm or initialize with - // zero for layer norm lstm. - if (use_layer_norm) { - if (!use_cifg) { - std::fill_n(input_gate_scratch, n_cell * n_batch, 0.0f); - } - std::fill_n(forget_gate_scratch, n_cell * n_batch, 0.0f); - std::fill_n(cell_gate_scratch, n_cell * n_batch, 0.0f); - std::fill_n(output_gate_scratch, n_cell * n_batch, 0.0f); - } else { - if (!use_cifg) { - tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, - n_batch, input_gate_scratch); - } - tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch, - forget_gate_scratch); - tensor_utils::VectorBatchVectorAssign(cell_gate_bias_ptr, n_cell, n_batch, - cell_gate_scratch); - tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch, - output_gate_scratch); - } - int32_t* input_to_input_row_sums = nullptr; int32_t* input_to_forget_row_sums = nullptr; int32_t* input_to_cell_row_sums = nullptr; @@ -896,10 +879,53 @@ inline void LstmStepHybrid( } } - if (!tensor_utils::IsZeroVector(input_ptr, n_batch * n_input)) { + const bool is_input_all_zeros = + tensor_utils::IsZeroVector(input_ptr, n_batch * n_input); + const bool is_aux_input_all_zeros = + (aux_input_ptr == nullptr || + tensor_utils::IsZeroVector(aux_input_ptr, n_batch * n_aux_input)); + const bool is_output_state_all_zeros = + tensor_utils::IsZeroVector(output_state_ptr, n_batch * n_output); + + if (!is_input_all_zeros) { tensor_utils::BatchQuantizeFloats(input_ptr, n_batch, n_input, quantized_input_ptr, input_sf, input_zp, asymmetric_quantize_inputs); + } + if (!is_aux_input_all_zeros) { + tensor_utils::BatchQuantizeFloats(aux_input_ptr, n_batch, n_aux_input, + quantized_aux_input_ptr, aux_input_sf, + aux_input_zp, asymmetric_quantize_inputs); + } + if (!is_output_state_all_zeros) { + tensor_utils::BatchQuantizeFloats( + output_state_ptr, n_batch, n_output, quantized_output_state_ptr, + output_state_sf, output_state_zp, asymmetric_quantize_inputs); + } + + // Initialize scratch buffers with bias for regular lstm or initialize with + // zero for layer norm lstm. + if (use_layer_norm) { + if (!use_cifg) { + std::fill_n(input_gate_scratch, n_cell * n_batch, 0.0f); + } + std::fill_n(forget_gate_scratch, n_cell * n_batch, 0.0f); + std::fill_n(cell_gate_scratch, n_cell * n_batch, 0.0f); + std::fill_n(output_gate_scratch, n_cell * n_batch, 0.0f); + } else { + if (!use_cifg) { + tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, + n_batch, input_gate_scratch); + } + tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch, + forget_gate_scratch); + tensor_utils::VectorBatchVectorAssign(cell_gate_bias_ptr, n_cell, n_batch, + cell_gate_scratch); + tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch, + output_gate_scratch); + } + + if (!is_input_all_zeros) { if (!use_cifg) { tensor_utils::MatrixBatchVectorMultiplyAccumulate( input_to_input_weights_ptr, n_cell, n_input, quantized_input_ptr, @@ -933,12 +959,7 @@ inline void LstmStepHybrid( // For each batch and cell: compute aux_input_weight * aux_input. // Skip if auxiliary input is not available or all zeros. - if (aux_input_ptr != nullptr && - !tensor_utils::IsZeroVector(aux_input_ptr, n_batch * n_aux_input)) { - tensor_utils::BatchQuantizeFloats(aux_input_ptr, n_batch, n_aux_input, - quantized_aux_input_ptr, aux_input_sf, - aux_input_zp, asymmetric_quantize_inputs); - + if (!is_aux_input_all_zeros) { if (!use_cifg) { tensor_utils::MatrixBatchVectorMultiplyAccumulate( aux_input_to_input_weights_ptr, n_cell, n_aux_input, @@ -973,11 +994,7 @@ inline void LstmStepHybrid( context); } - if (!tensor_utils::IsZeroVector(output_state_ptr, n_batch * n_output)) { - // Save quantization and matmul computation for all zero input. - tensor_utils::BatchQuantizeFloats( - output_state_ptr, n_batch, n_output, quantized_output_state_ptr, - output_state_sf, output_state_zp, asymmetric_quantize_inputs); + if (!is_output_state_all_zeros) { // For each batch and cell: compute recurrent_weight * output_state. if (!use_cifg) { tensor_utils::MatrixBatchVectorMultiplyAccumulate( ",0,train 5890405631a44f53a2d4d5c0ce5b625e1ae340cb,tensorflow/tensorflow,"Move IsZeroVector and BatchQuantizeFloats calls to top. Update Calibration and FP16 versions to be consistent with the Float version. PiperOrigin-RevId: 319050730 Change-Id: I1f026189f0b71570f230e794a690dca5be30d597",lstm.cc,"@@ -62,7 +62,7 @@ void UpdateLstmCellFloat(int n_batch, int n_cell, float* cell_state, } } -void CalculateLstmOutputFloat( +void CalculateLstmOutputCalibration( int n_batch, int n_cell, int n_output, const float* cell_state, const float* output_gate, TfLiteFusedActivation activation, const float* projection_weights, const float* projection_bias, @@ -97,7 +97,7 @@ void CalculateLstmOutputFloat( } } -inline void LstmStepWithAuxInput( +inline void LstmStepCalibration( const float* input_ptr, const float* input_to_input_weights_ptr, const float* input_to_forget_weights_ptr, const float* input_to_cell_weights_ptr, @@ -126,18 +126,19 @@ inline void LstmStepWithAuxInput( float* scratch1, float* scratch2, float* scratch3, float* output_ptr, Logger* logger, const std::vector& intermediate_tensor_indexes, ErrorReporter* error_reporter) { - // Make named scratch buffers for the different gates. - float* input_gate_scratch = scratch0; - float* forget_gate_scratch = scratch1; - float* cell_gate_scratch = scratch2; - float* output_gate_scratch = scratch3; - + ruy::profiler::ScopeLabel label(""LstmStepCalibration""); // Since we have already checked that weights are all there or none, we can // check the existence of only one to the get the condition. const bool use_cifg = (input_to_input_weights_ptr == nullptr); const bool use_peephole = (cell_to_output_weights_ptr != nullptr); const bool use_layer_norm = (forget_layer_norm_coefficients_ptr != nullptr); + // Make named scratch buffers for the different gates. + float* input_gate_scratch = scratch0; + float* forget_gate_scratch = scratch1; + float* cell_gate_scratch = scratch2; + float* output_gate_scratch = scratch3; + // Initialize scratch buffers with bias for regular lstm or initialize with // zero for layer norm lstm. if (use_layer_norm) { @@ -177,7 +178,8 @@ inline void LstmStepWithAuxInput( input_to_output_weights_ptr, n_cell, n_input, input_ptr, n_batch, output_gate_scratch); - // If auxiliary input is available then compute aux_input_weight * aux_input + // For each batch and cell: compute aux_input_weight * aux_input. + // Skip if auxiliary input is not available. if (aux_input_ptr != nullptr) { if (!use_cifg) { tensor_utils::MatrixBatchVectorMultiplyAccumulate( @@ -293,11 +295,11 @@ inline void LstmStepWithAuxInput( tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell, output_gate_scratch); - CalculateLstmOutputFloat(n_batch, n_cell, n_output, cell_state_ptr, - output_gate_scratch, params->activation, - projection_weights_ptr, projection_bias_ptr, - params->proj_clip, output_state_ptr, scratch2, - logger, intermediate_tensor_indexes, error_reporter); + CalculateLstmOutputCalibration( + n_batch, n_cell, n_output, cell_state_ptr, output_gate_scratch, + params->activation, projection_weights_ptr, projection_bias_ptr, + params->proj_clip, output_state_ptr, scratch2, logger, + intermediate_tensor_indexes, error_reporter); // Copy output_state to the output. Note that the output batch rows may not be // contiguous (output_batch_leading_dim != n_output). @@ -307,7 +309,7 @@ inline void LstmStepWithAuxInput( } } -TfLiteStatus EvalFloat( +TfLiteStatus EvalCalibration( const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights, const TfLiteTensor* input_to_forget_weights, const TfLiteTensor* input_to_cell_weights, @@ -392,7 +394,7 @@ TfLiteStatus EvalFloat( float* output_ptr_time = GetTensorData(output) + t_rel * output_step + output_offset; - LstmStepWithAuxInput( + LstmStepCalibration( input_ptr, GetTensorData(input_to_input_weights), GetTensorData(input_to_forget_weights), GetTensorData(input_to_cell_weights), @@ -454,7 +456,7 @@ TfLiteStatus EvalFloat( float* cell_gate_scratch_ptr = cell_gate_scratch + b * n_cell; float* output_gate_scratch_ptr = output_gate_scratch + b * n_cell; - LstmStepWithAuxInput( + LstmStepCalibration( input_ptr, GetTensorData(input_to_input_weights), GetTensorData(input_to_forget_weights), GetTensorData(input_to_cell_weights), @@ -587,7 +589,7 @@ TfLiteStatus lstm_eval(TfLiteContext* context, TfLiteNode* node, Logger* logger, switch (input_to_output_weights->type) { case kTfLiteFloat32: { - return EvalFloat( + return EvalCalibration( input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_input_weights, recurrent_to_forget_weights, ",0,train 246c1fab4668b85ab059d5c734be15cb1db4f1d1,tensorflow/tensorflow,"Fix string type inference for array, asarray and zero_like, and empty_like. tf only has a var-length string dtype, thus all fixed length numpy dtypes 'U8', 'S8' loose their length between a tf.string conversion. We can only gurrantee no 'loss' of data during the conversion. ones_like is updated too, to be consistent with other unary array creation functions. However ones_like throws an error rather than using a string ""1""s as numpy do. I don't intend making ones_like like numpy with this CL. PiperOrigin-RevId: 391407698 Change-Id: I1fbde2c08df210ebf3841b27d9e55c3c75c72282",np_array_ops.py,"@@ -66,13 +66,8 @@ def zeros(shape, dtype=float): # pylint: disable=redefined-outer-name @np_utils.np_doc('zeros_like') def zeros_like(a, dtype=None): # pylint: disable=missing-docstring - if dtype is None: - # We need to let np_utils.result_type decide the dtype, not tf.zeros_like - dtype = np_utils.result_type(a) - else: - # TF and numpy has different interpretations of Python types such as - # `float`, so we let `np_utils.result_type` decide. - dtype = np_utils.result_type(dtype) + dtype = np_utils.result_type_unary(a, dtype) + dtype = dtypes.as_dtype(dtype) # Work around b/149877262 return array_ops.zeros_like(a, dtype) @@ -86,10 +81,7 @@ def ones(shape, dtype=float): # pylint: disable=redefined-outer-name @np_utils.np_doc('ones_like') def ones_like(a, dtype=None): - if dtype is None: - dtype = np_utils.result_type(a) - else: - dtype = np_utils.result_type(dtype) + dtype = np_utils.result_type_unary(a, dtype) return array_ops.ones_like(a, dtype) @@ -161,8 +153,7 @@ def _array_internal(val, dtype=None, copy=True, ndmin=0): # pylint: disable=red result_t = val if not isinstance(result_t, ops.Tensor): - if not dtype: - dtype = np_utils.result_type(result_t) + dtype = np_utils.result_type_unary(result_t, dtype) # We can't call `convert_to_tensor(result_t, dtype=dtype)` here because # convert_to_tensor doesn't allow incompatible arguments such as (5.5, int) # while np.array allows them. We need to convert-then-cast. ",0,train 246c1fab4668b85ab059d5c734be15cb1db4f1d1,tensorflow/tensorflow,"Fix string type inference for array, asarray and zero_like, and empty_like. tf only has a var-length string dtype, thus all fixed length numpy dtypes 'U8', 'S8' loose their length between a tf.string conversion. We can only gurrantee no 'loss' of data during the conversion. ones_like is updated too, to be consistent with other unary array creation functions. However ones_like throws an error rather than using a string ""1""s as numpy do. I don't intend making ones_like like numpy with this CL. PiperOrigin-RevId: 391407698 Change-Id: I1fbde2c08df210ebf3841b27d9e55c3c75c72282",np_array_ops_test.py,"@@ -86,7 +86,7 @@ class ArrayCreationTest(test.TestCase): self.all_types = [ int, float, np.int16, np.int32, np.int64, np.float16, np.float32, - np.float64 + np.float64, np.complex64, np.complex128 ] source_array_data = [ @@ -1262,6 +1262,51 @@ class ArrayMathTest(test.TestCase, parameterized.TestCase): else: self.assertAllEqual(result, expected_result) + +class StringArrayTest(test.TestCase, parameterized.TestCase): + + StringParameters = parameterized.named_parameters( # pylint: disable=invalid-name + # Tensorflow always encodes python string into bytes, regardless of + # requested dtype. + ('str_u8', 'abcde\U0001f005', 'U8', b'abcde\xf0\x9f\x80\x85'), + ('str_s8', 'abcde\U0001f005', 'S8', b'abcde\xf0\x9f\x80\x85'), + ('str_none', 'abcde\U0001f005', None, b'abcde\xf0\x9f\x80\x85'), + ('zstr_u8', '\0abcde\U0001f005', 'U8', b'\0abcde\xf0\x9f\x80\x85'), + ('zstr_s8', '\0abcde\U0001f005', 'S8', b'\0abcde\xf0\x9f\x80\x85'), + ('zstr_none', '\0abcde\U0001f005', None, b'\0abcde\xf0\x9f\x80\x85'), + ('bytes_u8', b'abcdef', 'U8', b'abcdef'), + ('bytes_s8', b'abcdef', 'S8', b'abcdef'), + ('bytes_none', b'abcdef', None, b'abcdef'), + ('zbytes_u8', b'\0abcdef', 'U8', b'\0abcdef'), + ('zbytes_s8', b'\0abcdef', 'S8', b'\0abcdef'), + ('zbytes_none', b'\0abcdef', None, b'\0abcdef'), + ) + + @StringParameters + def testArray(self, a, dtype, a_as_bytes): + b = np_array_ops.array(a, dtype=dtype) + self.assertIsInstance(b.numpy(), bytes) + self.assertEqual(b.numpy(), a_as_bytes) + + @StringParameters + def testAsArray(self, a, dtype, a_as_bytes): + b = np_array_ops.asarray(a, dtype=dtype) + self.assertIsInstance(b.numpy(), bytes) + self.assertEqual(b.numpy(), a_as_bytes) + + @StringParameters + def testZerosLike(self, a, dtype, unused_a_as_bytes): + b = np_array_ops.zeros_like(a, dtype=dtype) + self.assertIsInstance(b.numpy(), bytes) + self.assertEqual(b.numpy(), b'') + + @StringParameters + def testEmptyLike(self, a, dtype, unused_a_as_bytes): + b = np_array_ops.empty_like(a, dtype=dtype) + self.assertIsInstance(b.numpy(), bytes) + self.assertEqual(b.numpy(), b'') + + if __name__ == '__main__': ops.enable_eager_execution() ops.enable_numpy_style_type_promotion() ",0,train 246c1fab4668b85ab059d5c734be15cb1db4f1d1,tensorflow/tensorflow,"Fix string type inference for array, asarray and zero_like, and empty_like. tf only has a var-length string dtype, thus all fixed length numpy dtypes 'U8', 'S8' loose their length between a tf.string conversion. We can only gurrantee no 'loss' of data during the conversion. ones_like is updated too, to be consistent with other unary array creation functions. However ones_like throws an error rather than using a string ""1""s as numpy do. I don't intend making ones_like like numpy with this CL. PiperOrigin-RevId: 391407698 Change-Id: I1fbde2c08df210ebf3841b27d9e55c3c75c72282",np_utils.py,"@@ -513,6 +513,24 @@ def result_type(*arrays_and_dtypes): # pylint: disable=missing-function-docstri return np_dtypes._result_type(*arrays_and_dtypes) # pylint: disable=protected-access +def result_type_unary(a, dtype): # pylint: disable=missing-function-docstring + """"""Find the result type from a single input and a dtype."""""" + if dtype: + # We need to let np_utils.result_type decide the dtype, not tf.zeros_like + return result_type(dtype) + + # np_utils.result_type treats string inputs as dtype strings, not as strings. + # but for unary we want to treat it as a string input. + if isinstance(a, str): + return np.unicode_ + elif isinstance(a, bytes): + return np.bytes_ + + # TF and numpy has different interpretations of Python types such as + # `float`, so we let `np_utils.result_type` decide. + return result_type(a) + + def _result_type_binary(t1, t2): # pylint: disable=missing-function-docstring """"""A specialization of result_type for 2 arguments for performance reasons."""""" try: ",0,train db82f8d7a38bef9a5603eecc8911c005d669794c,tensorflow/tensorflow,"Add all valid fusibles of the original ops to fusibles of a newly created fusion op in multi-output fusion. PiperOrigin-RevId: 290461690 Change-Id: I80312f9cdeeb0432291c7016b81ae91ce27c1ab0",multi_output_fusion.cc,"@@ -158,8 +158,6 @@ HloInstruction* MultiOutputFusion::CreateFusion(HloInstruction* base, base->shape(), HloInstruction::FusionKind::kLoop, base)); // Update candidate_ and all_fusion_candidates_. - std::vector> new_fusibles = - GetNewFusibles(base, to_fuse); int64 index; if (candidates_index_.contains(input_fusion)) { index = candidates_index_[input_fusion]; @@ -170,13 +168,6 @@ HloInstruction* MultiOutputFusion::CreateFusion(HloInstruction* base, all_fusion_candidates_.push_back(input_fusion); } - // Update the worklist_. - FusionCandidate& candidate_node = candidates_[index]; - for (auto it : new_fusibles) { - candidate_node.fusibles.emplace_back(it.first, it.second); - worklist_.emplace(input_fusion, it.first, it.second); - } - reachability_->Replace(base, input_fusion); TF_CHECK_OK(computation()->ReplaceInstruction(base, input_fusion)); return input_fusion; @@ -199,13 +190,19 @@ bool MultiOutputFusion::IsProfitableOperand(HloInstruction* instr) { } std::vector> -MultiOutputFusion::GetNewFusibles(HloInstruction* fusion, - HloInstruction* fused) { +MultiOutputFusion::GetNewFusibles(HloInstruction* instr1, + HloInstruction* instr2) { + HloInstruction* fusion = instr1; + HloInstruction* fused = instr2; + if (is_fused(instr1)) { + fusion = instr2; + fused = instr1; + } + FusionCandidate& fusion_node = candidates_[get_candidate_id(fusion)]; FusionCandidate& fused_node = candidates_[get_candidate_id(fused)]; - // Update the fusible list for fusion. Variable new_fusibles keeps - // track of the new or changed entries. + // The second entry of the pair is an old profit value. std::vector> new_fusibles; absl::flat_hash_set in_list; auto it = fusion_node.fusibles.begin(); @@ -216,11 +213,7 @@ MultiOutputFusion::GetNewFusibles(HloInstruction* fusion, continue; } in_list.insert(instr); - int64 profit = GetProfit(instr, fusion); - if (profit > it->second) { - it->second = profit; - new_fusibles.emplace_back(instr, profit); - } + new_fusibles.emplace_back(instr, it->second); ++it; } @@ -235,16 +228,17 @@ MultiOutputFusion::GetNewFusibles(HloInstruction* fusion, if (in_list.contains(instr)) { continue; } - int64 profit = GetProfit(instr, fusion); - fusion_node.fusibles.emplace_back(instr, profit); - new_fusibles.emplace_back(instr, profit); + // Set old profit to zero because instr is not originally fusible to + // fusion_node. + new_fusibles.emplace_back(instr, 0); } fused_node.fusibles.clear(); return new_fusibles; } -void MultiOutputFusion::Update(HloInstruction* instr1, HloInstruction* instr2) { +void MultiOutputFusion::UpdateBeforeFuse(HloInstruction* instr1, + HloInstruction* instr2) { HloInstruction* fusion = instr1; HloInstruction* fused = instr2; if (is_fused(instr1)) { @@ -264,13 +258,34 @@ void MultiOutputFusion::Update(HloInstruction* instr1, HloInstruction* instr2) { // Update the reachability graph. UpdateReachability(fusion, fused, all_fusion_candidates_, [this](HloInstruction* instr) { return is_fused(instr); }); +} - std::vector> new_fusibles = - GetNewFusibles(fusion, fused); - - // Update the worklist_. +void MultiOutputFusion::UpdateAfterFuse( + HloInstruction* fusion, + const std::vector>& new_fusibles, + bool new_fusion_node) { + FusionCandidate& candidate_node = candidates_[candidates_index_[fusion]]; for (auto it : new_fusibles) { - worklist_.emplace(fusion, it.first, it.second); + int64 profit = GetProfit(it.first, fusion); + if (new_fusion_node) { + // If `fusion' is a new fusion node, then add all fusibles. + if (profit > 0) { + candidate_node.fusibles.emplace_back(it.first, profit); + worklist_.emplace(fusion, it.first, profit); + } + } else { + if (profit > it.second) { + // If the new profit is higher than the old profit, add the fusible + // into worklist. + worklist_.emplace(fusion, it.first, profit); + } + if (it.second == 0) { + // If the old profit is zero, that means `it.first' is not + // originally fusible to the base op of `fusion', so we must add it + // to candidate_node.fusibles. + candidate_node.fusibles.emplace_back(it.first, profit); + } + } } } @@ -388,17 +403,23 @@ bool MultiOutputFusion::Perform() { << instr2->fused_instructions_computation()->ToString( HloPrintOptions().set_indent_amount(1)); } - Update(instr1, instr2); - HloInstruction* ret = Fuse(instr1, instr2); - if (ret != instr1) { + UpdateBeforeFuse(instr1, instr2); + std::vector> new_fusibles = + GetNewFusibles(instr1, instr2); + HloInstruction* fusion = Fuse(instr1, instr2); + if (fusion != instr1) { set_is_fused(instr1); } - if (ret != instr2) { + if (fusion != instr2) { set_is_fused(instr2); } + UpdateAfterFuse( + fusion, new_fusibles, + /*new_fusion_node=*/(fusion != instr1) && (fusion != instr2)); + changed = true; - VLOG(2) << ""After fusion, \t this: "" << ret->name() << ""\n"" - << ret->fused_instructions_computation()->ToString( + VLOG(2) << ""After fusion, \t this: "" << fusion->name() << ""\n"" + << fusion->fused_instructions_computation()->ToString( HloPrintOptions().set_indent_amount(1)); } } ",0,train db82f8d7a38bef9a5603eecc8911c005d669794c,tensorflow/tensorflow,"Add all valid fusibles of the original ops to fusibles of a newly created fusion op in multi-output fusion. PiperOrigin-RevId: 290461690 Change-Id: I80312f9cdeeb0432291c7016b81ae91ce27c1ab0",multi_output_fusion.h,"@@ -110,11 +110,12 @@ class MultiOutputFusion : public HloModulePass { // InstructionFusion instead. virtual bool DoProducerConsumerMultiOutputFusion(); - // Return a list of new fusible instructions that can be fused into `fusion' - // fused with `fused'. The second entry in the vector is a profit value from - // fusing the corresponding instruction. + // Return a list of fusible instructions that can be fused into the fusion of + // instr1 and instr2. The second entry in the vector is an old profit value + // from fusing the corresponding instruction and the base op of the new + // fusion. std::vector> GetNewFusibles( - HloInstruction* fusion, HloInstruction* fused); + HloInstruction* instr1, HloInstruction* instr2); // Create a new fusion instruction and add `base' into it. // Prepare for fusing `to_fuse' into the created fusion by updating @@ -140,9 +141,16 @@ class MultiOutputFusion : public HloModulePass { bool operator<(const ToBeFused& rhs) const { return score < rhs.score; } }; - // Update the internal data structures after instr1 and instr2 are fused into + // Update the internal data structures before instr1 and instr2 are fused into // one fusion instruction. - void Update(HloInstruction* instr1, HloInstruction* instr2); + void UpdateBeforeFuse(HloInstruction* instr1, HloInstruction* instr2); + + // Update the internal data structures after instructions are fused into + // one fusion instruction. + void UpdateAfterFuse( + HloInstruction* fusion, + const std::vector>& new_fusibles, + bool new_fusion_node); int64 get_candidate_id(HloInstruction* instr) { return FindOrDie(candidates_index_, instr); ",0,train 33bcb53486aa286ad16b0d1d7a2715febf696364,tensorflow/tensorflow,"Allow clients to pass a filename to the constructor of Saver. PiperOrigin-RevId: 156741424",saver.py,"@@ -656,7 +656,7 @@ class BaseSaverBuilder(object): restore_sequentially: A Bool, which if true, causes restore of different variables to happen sequentially within each device. filename: If known at graph construction time, filename used for variable - loading/saving. + loading/saving. If None, then the default name ""model"" will be used. Returns: A SaverDef proto. @@ -674,7 +674,7 @@ class BaseSaverBuilder(object): with ops.name_scope(name, ""save"", [saveable.op for saveable in saveables]) as name: # Add the Constant string tensor for the filename. - filename_tensor = constant_op.constant(filename) + filename_tensor = constant_op.constant(filename or ""model"") # Add the save ops. if sharded: @@ -1033,7 +1033,8 @@ class Saver(object): allow_empty=False, write_version=saver_pb2.SaverDef.V2, pad_step_number=False, - save_relative_paths=False): + save_relative_paths=False, + filename=None): """"""Creates a `Saver`. The constructor adds ops to save and restore variables. @@ -1109,6 +1110,8 @@ class Saver(object): save_relative_paths: If `True`, will write relative paths to the checkpoint state file. This is needed if the user wants to copy the checkpoint directory and reload from the copied directory. + filename: If known at graph construction time, filename used for variable + loading/saving. Raises: TypeError: If `var_list` is invalid. @@ -1132,6 +1135,7 @@ class Saver(object): self._is_empty = None self._write_version = write_version self._pad_step_number = pad_step_number + self._filename = filename if not defer_build: self.build() if self.saver_def: @@ -1164,7 +1168,8 @@ class Saver(object): max_to_keep=self._max_to_keep, keep_checkpoint_every_n_hours=self._keep_checkpoint_every_n_hours, name=self._name, - restore_sequentially=self._restore_sequentially) + restore_sequentially=self._restore_sequentially, + filename=self._filename) elif self.saver_def and self._name: # Since self._name is used as a name_scope by builder(), we are # overloading the use of this field to represent the ""import_scope"" as ",0,train 33bcb53486aa286ad16b0d1d7a2715febf696364,tensorflow/tensorflow,"Allow clients to pass a filename to the constructor of Saver. PiperOrigin-RevId: 156741424",saver_test.py,"@@ -236,6 +236,15 @@ class SaverTest(test.TestCase): self.assertEqual(b""k1"", v2.keys().eval()) self.assertEqual(30.0, v2.values().eval()) + def testFilenameTensor(self): + v0 = variables.Variable(0, name=""v0"") + filename = b""somerandomfilename"" + save = saver_module.Saver({""v0"": v0}, filename=filename) + with self.test_session() as sess: + tensor = sess.graph.get_tensor_by_name( + save.saver_def.filename_tensor_name) + self.assertEqual(sess.run(tensor), filename) + def testInvalidPath(self): v0 = variables.Variable(0, name=""v0"") for ver in (saver_pb2.SaverDef.V1, saver_pb2.SaverDef.V2): ",0,train 0bfdc5429e1c23fdcc94168ef734a152fb981fff,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2019-10-24 PiperOrigin-RevId: 276443625 Change-Id: I20e8893026dac243b663496e15ab17a936d7c2d8",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 10, 23) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 10, 24) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train 87a80d8bd2ac0235b2f6632dbc024b0509520897,tensorflow/tensorflow,Addressing review comments,mkl_conv_ops.cc,"@@ -66,6 +66,7 @@ struct MklConvFwdParams { memory::dims padding_left; memory::dims padding_right; MKL_TENSOR_FORMAT tf_fmt; + bool native_format; string dtypes = string(""""); struct PostOpParam { string name; @@ -79,7 +80,7 @@ struct MklConvFwdParams { memory::dims bias_dims, memory::dims dst_dims, memory::dims strides, memory::dims dilations, memory::dims padding_left, memory::dims padding_right, - MKL_TENSOR_FORMAT tf_fmt) + MKL_TENSOR_FORMAT tf_fmt, bool native_format) : src_dims(src_dims), filter_dims(filter_dims), bias_dims(bias_dims), @@ -88,13 +89,13 @@ struct MklConvFwdParams { dilations(dilations), padding_left(padding_left), padding_right(padding_right), - tf_fmt(tf_fmt) {} + tf_fmt(tf_fmt), + native_format(native_format) {} }; // With quantization, input, filter, and output can have different types // so we use different template parameter for each type -template +template class MklConvFwdPrimitive : public MklPrimitive { public: explicit MklConvFwdPrimitive(const MklConvFwdParams& convFwdDims) @@ -233,7 +234,7 @@ class MklConvFwdPrimitive : public MklPrimitive { void Setup(const MklConvFwdParams& convFwdDims) { MEMORY_FORMAT user_data_fmt; - if (native_format) { + if (convFwdDims.native_format) { user_data_fmt = MklTensorFormatToMklDnnDataFormat(convFwdDims.tf_fmt); } else { // Create memory descriptors for convolution data w/ no specified format @@ -370,31 +371,29 @@ class MklConvFwdPrimitive : public MklPrimitive { // TODO(nhasabni): We should not require passing a type to MklPrimitiveFactory. // But removing the need for type in MklPrimitiveFactory is going to require // change to every MKL op. So not doing it now. Instead passing float. -template +template class MklConvFwdPrimitiveFactory : public MklPrimitiveFactory { public: - static MklConvFwdPrimitive* - Get(const MklConvFwdParams& convFwdDims, bool do_not_cache) { - MklConvFwdPrimitive* - conv_fwd = nullptr; + static MklConvFwdPrimitive* Get( + const MklConvFwdParams& convFwdDims, bool do_not_cache) { + MklConvFwdPrimitive* conv_fwd = nullptr; if (do_not_cache) { // Always create a new primitive - conv_fwd = new MklConvFwdPrimitive(convFwdDims); + conv_fwd = + new MklConvFwdPrimitive(convFwdDims); } else { // Try to find a suitable one in pool - conv_fwd = dynamic_cast< - MklConvFwdPrimitive*>( - MklConvFwdPrimitiveFactory::GetInstance() - .GetConvFwd(convFwdDims)); + conv_fwd = + dynamic_cast*>( + MklConvFwdPrimitiveFactory::GetInstance() + .GetConvFwd(convFwdDims)); if (conv_fwd == nullptr) { - conv_fwd = new MklConvFwdPrimitive(convFwdDims); - MklConvFwdPrimitiveFactory::GetInstance() + conv_fwd = new MklConvFwdPrimitive( + convFwdDims); + MklConvFwdPrimitiveFactory::GetInstance() .SetConvFwd(convFwdDims, conv_fwd); } } @@ -426,7 +425,7 @@ class MklConvFwdPrimitiveFactory : public MklPrimitiveFactory { key_creator.AddAsKey(convFwdDims.padding_left); key_creator.AddAsKey(convFwdDims.padding_right); key_creator.AddAsKey(convFwdDims.dtypes); - if (native_format) { + if (convFwdDims.native_format) { key_creator.AddAsKey(convFwdDims.tf_fmt); } @@ -689,23 +688,22 @@ class MklConvOp : public OpKernel { IsConv1x1StrideNot1(filter_dims, strides)); // Get a conv2d fwd from primitive pool - MklConvFwdPrimitive* - conv_fwd = nullptr; + MklConvFwdPrimitive* conv_fwd = + nullptr; memory::dims bias_dims = {}; if (fuse_biasadd_) { conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims); } - MklConvFwdParams convFwdDims(src_dims, filter_dims, - fuse_biasadd_ ? bias_dims : NONE_DIMS, - dst_dims_mkl_order, strides, dilations, - padding_left, padding_right, tf_fmt); + MklConvFwdParams convFwdDims( + src_dims, filter_dims, fuse_biasadd_ ? bias_dims : NONE_DIMS, + dst_dims_mkl_order, strides, dilations, padding_left, padding_right, + tf_fmt, native_format); // TODO(mdfaijul): Extend the basic parameters for data types and fusions this->ExtendConvFwdParams(context, convFwdDims); conv_fwd = - MklConvFwdPrimitiveFactory::Get(convFwdDims, - do_not_cache); + MklConvFwdPrimitiveFactory::Get( + convFwdDims, do_not_cache); // Allocate output tensors `dst_tensor` and `filter_out_tensor` MklDnnShape output_mkl_shape; std::shared_ptr conv_fwd_pd = conv_fwd->GetPrimitiveDesc(); ",0,train 8077ae1d1e8bfe6a5cc55df07ad82ae91f431d2e,tensorflow/tensorflow,Minor changes in comments,mkl_util.h,"@@ -33,7 +33,7 @@ limitations under the License. #endif #ifdef INTEL_MKL_ML_ONLY -// Using pragma as #warning doesn't work with all compilers +// Using pragma message since #warning doesn't work with all compilers #pragma message(""Compiling for INTEL MKL ML only will be deprecated soon."") #pragma message(""Please use MKL DNN (the default option for --config=mkl)"") #endif ",0,train 4900e8a17367aa8c158e5e783d4776ffc206a77b,tensorflow/tensorflow,"Automated rollback of commit 1f856fb5d978177123ddd5ac5a3e4bb669288d65 PiperOrigin-RevId: 232750954",backend.py,"@@ -4143,8 +4143,8 @@ def conv1d(x, x = nn.convolution( input=x, filter=kernel, - dilation_rate=dilation_rate, - strides=strides, + dilation_rate=(dilation_rate,), + strides=(strides,), padding=padding, data_format=tf_data_format) if data_format == 'channels_first' and tf_data_format == 'NWC': ",0,train 4900e8a17367aa8c158e5e783d4776ffc206a77b,tensorflow/tensorflow,"Automated rollback of commit 1f856fb5d978177123ddd5ac5a3e4bb669288d65 PiperOrigin-RevId: 232750954",nn_ops.py,"@@ -65,9 +65,7 @@ def _get_sequence(value, n, channel_index, name): return value elif current_n == 1: value = list((value[0],) * n) - elif current_n == n: - value = list(value) - else: + elif current_n != n: raise ValueError(""{} should be of length 1, {} or {} but was {}"".format( name, n, n + 2, current_n)) @@ -883,14 +881,21 @@ def convolution( filter = deprecated_argument_lookup(""filters"", filters, ""filter"", filter) dilation_rate = deprecated_argument_lookup( ""dilations"", dilations, ""dilation_rate"", dilation_rate) - return convolution_internal( - input, - filter, - strides=strides, - padding=padding, - data_format=data_format, - dilations=dilation_rate, - name=name) + # pylint: enable=line-too-long + with ops.name_scope(name, ""convolution"", [input, filter]) as name: + input = ops.convert_to_tensor(input, name=""input"") # pylint: disable=redefined-builtin + input_shape = input.get_shape() + filter = ops.convert_to_tensor(filter, name=""filter"") # pylint: disable=redefined-builtin + filter_shape = filter.get_shape() + op = Convolution( + input_shape, + filter_shape, + padding, + strides=strides, + dilation_rate=dilation_rate, + name=name, + data_format=data_format) + return op(input, filter) @tf_export(""nn.convolution"", v1=[]) @@ -902,15 +907,14 @@ def convolution_v2( data_format=None, dilations=None, name=None): - return convolution_internal( + return convolution( input, # pylint: disable=redefined-builtin filters, - strides=strides, padding=padding, - data_format=data_format, - dilations=dilations, - name=name) - + strides=strides, + dilation_rate=dilations, + name=name, + data_format=data_format) convolution_v2.__doc__ = deprecation.rewrite_argument_docstring( deprecation.rewrite_argument_docstring( @@ -918,67 +922,6 @@ convolution_v2.__doc__ = deprecation.rewrite_argument_docstring( ""filter"", ""filters"") -def convolution_internal( - input, # pylint: disable=redefined-builtin - filters, - strides=None, - padding=""VALID"", - data_format=None, - dilations=None, - name=None): - """"""Internal function which performs rank agnostic convolution."""""" - with ops.name_scope(name, ""convolution"", [input, filter]) as name: - if input.shape is not None: - n = len(input.shape) - 2 - elif filters.shape is not None: - n = len(filters.shape) - 2 - else: - raise ValueError(""rank of input or filter must be known"") - - if n < 1 or n > 3: - raise ValueError( - ""Input tensor must be of rank 3, 4 or 5 but was {}."".format(n + 2)) - - if data_format is None: - channel_index = n + 1 - else: - channel_index = 1 if data_format.startswith(""NC"") else n + 1 - - strides = _get_sequence(strides, n, channel_index, ""strides"") - dilations = _get_sequence(dilations, n, channel_index, ""dilations"") - - conv_ops = {1: conv1d, 2: gen_nn_ops.conv2d, 3: gen_nn_ops.conv3d} - - if all(i == 1 for i in dilations): - # fast path if no dilation as gradient only supported on GPU for dilations - op = conv_ops.get(n) - return op( - input, - filters, - strides, - padding=padding, - data_format=data_format, - dilations=dilations, - name=name) - else: - if channel_index == 1: - strides = strides[2:] - dilations = dilations[2:] - else: - strides = strides[1:-1] - dilations = dilations[1:-1] - - op = Convolution( - tensor_shape.as_shape(input.shape), - tensor_shape.as_shape(filters.shape), - padding, - strides=strides, - dilation_rate=dilations, - name=name, - data_format=data_format) - return op(input, filters) - - class Convolution(object): """"""Helper class for convolution. @@ -4097,9 +4040,10 @@ def conv1d( entries by which the filter is moved right at each step. padding: 'SAME' or 'VALID' use_cudnn_on_gpu: An optional `bool`. Defaults to `True`. - data_format: An optional `string` from `""NWC"", ""NCW""`. Defaults to `""NWC""`, - the data is stored in the order of [batch, in_width, in_channels]. The - `""NCW""` format stores data as [batch, in_channels, in_width]. + data_format: An optional `string` from `""NWC"", ""NCW""`. Defaults + to `""NWC""`, the data is stored in the order of + [batch, in_width, in_channels]. The `""NCW""` format stores + data as [batch, in_channels, in_width]. name: A name for the operation (optional). input: Alias for value. dilations: An int or list of `ints` that has length `1` or `3` which @@ -4182,9 +4126,10 @@ def conv1d_v2( stride: An int or list of `ints` that has length `1` or `3`. The number of entries by which the filter is moved right at each step. padding: 'SAME' or 'VALID' - data_format: An optional `string` from `""NWC"", ""NCW""`. Defaults to `""NWC""`, - the data is stored in the order of [batch, in_width, in_channels]. The - `""NCW""` format stores data as [batch, in_channels, in_width]. + data_format: An optional `string` from `""NWC"", ""NCW""`. Defaults + to `""NWC""`, the data is stored in the order of + [batch, in_width, in_channels]. The `""NCW""` format stores + data as [batch, in_channels, in_width]. dilations: An int or list of `ints` that has length `1` or `3` which defaults to 1. The dilation factor for each dimension of input. If set to k > 1, there will be k-1 skipped cells between each filter element on that ",0,train 75ba615684492a49e67fd2c2a59af4ee0e56838b,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-02-04 PiperOrigin-RevId: 293094987 Change-Id: I3359d88507c68e1a9d96de43890f53f14c816f07",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 2, 3) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 2, 4) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train 07e0f88dd1ea60039267f4aeb57d6e24128e8c3b,tensorflow/tensorflow,"Whitelist InTopKV2, NextAfter and XlaKeyValueSort ops for the fallback path Enabled relevant tests. PiperOrigin-RevId: 335374607 Change-Id: I109c39459944648317c3a5274be4b5fe6c6e9586",legalize_tf_with_tf2xla.cc,"@@ -151,6 +151,7 @@ bool IsOpAllowedTf2XlaFallback(Operation* op) { TypeID::get(), TypeID::get(), TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), TypeID::get(), @@ -177,6 +178,7 @@ bool IsOpAllowedTf2XlaFallback(Operation* op) { TypeID::get(), TypeID::get(), TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), TypeID::get(), @@ -241,6 +243,7 @@ bool IsOpAllowedTf2XlaFallback(Operation* op) { TypeID::get(), TypeID::get(), TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), TypeID::get() ",0,train 07e0f88dd1ea60039267f4aeb57d6e24128e8c3b,tensorflow/tensorflow,"Whitelist InTopKV2, NextAfter and XlaKeyValueSort ops for the fallback path Enabled relevant tests. PiperOrigin-RevId: 335374607 Change-Id: I109c39459944648317c3a5274be4b5fe6c6e9586",binary_ops_test.py,"@@ -474,7 +474,6 @@ class BinaryOpsTest(xla_test.XLATestCase): expected=np.array([1 << 32, 1 << 36, 1 << 32, 1 << 36], dtype=np.int64)) - @test_util.disable_mlir_bridge(""Enable tf.NextAfter Compilation"") def testNextAfter(self): for dtype in self.numeric_types: if dtype in [np.float32, np.float64]: ",0,train f0b228a36dacaef00f7215df0d8ab3d3a84749a2,tensorflow/tensorflow,"TFL MCU: Move reference L2Normalization implementation into its own file. so that we won't need to import all the dependencies. This CL simply copies the existing code into the new file. PiperOrigin-RevId: 307134277 Change-Id: Idf7f9dffe6d6505337caefc736dce372aa014b14",common.h,"@@ -29,6 +29,8 @@ limitations under the License. namespace tflite { +constexpr int kReverseShift = -1; + inline void GetActivationMinMax(FusedActivationFunctionType ac, float* output_activation_min, float* output_activation_max) { ",0,train f0b228a36dacaef00f7215df0d8ab3d3a84749a2,tensorflow/tensorflow,"TFL MCU: Move reference L2Normalization implementation into its own file. so that we won't need to import all the dependencies. This CL simply copies the existing code into the new file. PiperOrigin-RevId: 307134277 Change-Id: Idf7f9dffe6d6505337caefc736dce372aa014b14",fully_connected.h,"@@ -23,8 +23,6 @@ limitations under the License. namespace tflite { namespace reference_ops { -const int kReverseShift = -1; - inline void FullyConnected( const FullyConnectedParams& params, const RuntimeShape& input_shape, const float* input_data, const RuntimeShape& weights_shape, ",0,train f0b228a36dacaef00f7215df0d8ab3d3a84749a2,tensorflow/tensorflow,"TFL MCU: Move reference L2Normalization implementation into its own file. so that we won't need to import all the dependencies. This CL simply copies the existing code into the new file. PiperOrigin-RevId: 307134277 Change-Id: Idf7f9dffe6d6505337caefc736dce372aa014b14",l2normalization.h,"@@ -41,8 +41,8 @@ inline void L2Normalization(int32_t input_zero_point, int32_t outer_size, } int32_t inv_l2norm_multiplier; int inv_l2norm_shift; - GetInvSqrtQuantizedMultiplierExp(acc, /*reverse_shift*/ -1, - &inv_l2norm_multiplier, &inv_l2norm_shift); + GetInvSqrtQuantizedMultiplierExp(acc, kReverseShift, &inv_l2norm_multiplier, + &inv_l2norm_shift); for (int inner_index = 0; inner_index < depth; ++inner_index) { int32_t input = ",0,train f0b228a36dacaef00f7215df0d8ab3d3a84749a2,tensorflow/tensorflow,"TFL MCU: Move reference L2Normalization implementation into its own file. so that we won't need to import all the dependencies. This CL simply copies the existing code into the new file. PiperOrigin-RevId: 307134277 Change-Id: Idf7f9dffe6d6505337caefc736dce372aa014b14",l2normalization.h,"@@ -0,0 +1,88 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_ + +#include + +#include ""tensorflow/lite/c/common.h"" +#include ""tensorflow/lite/kernels/internal/common.h"" +#include ""tensorflow/lite/kernels/internal/types.h"" + +namespace tflite { + +namespace reference_ops { + +inline void L2Normalization(const tflite::L2NormalizationParams& op_params, + const RuntimeShape& input_shape, + const float* input_data, + const RuntimeShape& output_shape, + float* output_data, float epsilon = 1e-6) { + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = + MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = + MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + for (int i = 0; i < outer_size; ++i) { + float squared_l2_norm = 0; + for (int c = 0; c < depth; ++c) { + const float val = input_data[depth * i + c]; + squared_l2_norm += val * val; + } + float l2_norm = std::sqrt(squared_l2_norm); + l2_norm = std::max(l2_norm, epsilon); + for (int c = 0; c < depth; ++c) { + output_data[depth * i + c] = input_data[depth * i + c] / l2_norm; + } + } +} + +inline void L2Normalization(const tflite::L2NormalizationParams& op_params, + const RuntimeShape& input_shape, + const uint8* input_data, + const RuntimeShape& output_shape, + uint8* output_data) { + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int depth = + MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + const int outer_size = + MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int32 input_zero_point = op_params.input_zero_point; + + for (int i = 0; i < outer_size; ++i) { + int32 square_l2_norm = 0; + for (int c = 0; c < depth; c++) { + int32 diff = input_data[depth * i + c] - input_zero_point; + square_l2_norm += diff * diff; + } + int32 inv_l2norm_multiplier; + int inv_l2norm_shift; + GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift, + &inv_l2norm_multiplier, &inv_l2norm_shift); + for (int c = 0; c < depth; c++) { + int32 diff = input_data[depth * i + c] - input_zero_point; + int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp( + 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift); + int32 unclamped_output_val = 128 + rescaled_diff; + int32 output_val = std::min(255, std::max(0, unclamped_output_val)); + output_data[depth * i + c] = static_cast(output_val); + } + } +} + + +} // namespace reference_ops +} // namespace tflite +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_ ",0,train f0b228a36dacaef00f7215df0d8ab3d3a84749a2,tensorflow/tensorflow,"TFL MCU: Move reference L2Normalization implementation into its own file. so that we won't need to import all the dependencies. This CL simply copies the existing code into the new file. PiperOrigin-RevId: 307134277 Change-Id: Idf7f9dffe6d6505337caefc736dce372aa014b14",reference_ops.h,"@@ -42,6 +42,7 @@ limitations under the License. #include ""tensorflow/lite/kernels/internal/reference/dequantize.h"" #include ""tensorflow/lite/kernels/internal/reference/floor.h"" #include ""tensorflow/lite/kernels/internal/reference/fully_connected.h"" +#include ""tensorflow/lite/kernels/internal/reference/l2normalization.h"" #include ""tensorflow/lite/kernels/internal/reference/logistic.h"" #include ""tensorflow/lite/kernels/internal/reference/maximum_minimum.h"" #include ""tensorflow/lite/kernels/internal/reference/mul.h"" @@ -294,62 +295,6 @@ inline void QuantizeLeakyRelu(const LeakyReluParams& params, } } -inline void L2Normalization(const tflite::L2NormalizationParams& op_params, - const RuntimeShape& input_shape, - const float* input_data, - const RuntimeShape& output_shape, - float* output_data, float epsilon = 1e-6) { - const int trailing_dim = input_shape.DimensionsCount() - 1; - const int outer_size = - MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int depth = - MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); - for (int i = 0; i < outer_size; ++i) { - float squared_l2_norm = 0; - for (int c = 0; c < depth; ++c) { - const float val = input_data[depth * i + c]; - squared_l2_norm += val * val; - } - float l2_norm = std::sqrt(squared_l2_norm); - l2_norm = std::max(l2_norm, epsilon); - for (int c = 0; c < depth; ++c) { - output_data[depth * i + c] = input_data[depth * i + c] / l2_norm; - } - } -} - -inline void L2Normalization(const tflite::L2NormalizationParams& op_params, - const RuntimeShape& input_shape, - const uint8* input_data, - const RuntimeShape& output_shape, - uint8* output_data) { - const int trailing_dim = input_shape.DimensionsCount() - 1; - const int depth = - MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); - const int outer_size = - MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int32 input_zero_point = op_params.input_zero_point; - for (int i = 0; i < outer_size; ++i) { - int32 square_l2_norm = 0; - for (int c = 0; c < depth; c++) { - int32 diff = input_data[depth * i + c] - input_zero_point; - square_l2_norm += diff * diff; - } - int32 inv_l2norm_multiplier; - int inv_l2norm_shift; - GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift, - &inv_l2norm_multiplier, &inv_l2norm_shift); - for (int c = 0; c < depth; c++) { - int32 diff = input_data[depth * i + c] - input_zero_point; - int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp( - 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift); - int32 unclamped_output_val = 128 + rescaled_diff; - int32 output_val = std::min(255, std::max(0, unclamped_output_val)); - output_data[depth * i + c] = static_cast(output_val); - } - } -} - // T is expected to be either float or int. template inline void AddN(const RuntimeShape& input_shape, const size_t num_inputs, ",0,train 1c6f10152fe850463422108f03b6b022b8f24ccc,tensorflow/tensorflow,"Split up conv_ops_fused kernels. This improves build times by allowing the double, float, and half implementations to build in parallel. PiperOrigin-RevId: 235576953",conv_ops_fused_double.cc,"@@ -0,0 +1,39 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/core/framework/register_types.h"" +#include ""tensorflow/core/kernels/conv_ops_fused_impl.h"" + +namespace tensorflow { + +// If we're using the alternative GEMM-based implementation of Conv2D for the +// CPU implementation, don't register this EigenTensor-based version. +// TODO(b/119765980): Upgrade upstream Eigen to set `m_can_use_xsmm=false` for +// contractions with non-default contraction output kernels. +#if !defined(USE_GEMM_FOR_CONV) && !defined(EIGEN_USE_LIBXSMM) +TF_CALL_double(REGISTER_FUSED_CPU_CONV2D); +#endif // !USE_GEMM_FOR_CONV + +#if GOOGLE_CUDA + +namespace functor { +DECLARE_FUNCTOR_GPU_SPEC(double); +} // namespace functor + +TF_CALL_double(REGISTER_FUSED_GPU_CONV2D); + +#endif // GOOGLE_CUDA + +} // namespace tensorflow ",0,train 1c6f10152fe850463422108f03b6b022b8f24ccc,tensorflow/tensorflow,"Split up conv_ops_fused kernels. This improves build times by allowing the double, float, and half implementations to build in parallel. PiperOrigin-RevId: 235576953",conv_ops_fused_float.cc,"@@ -0,0 +1,39 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/core/framework/register_types.h"" +#include ""tensorflow/core/kernels/conv_ops_fused_impl.h"" + +namespace tensorflow { + +// If we're using the alternative GEMM-based implementation of Conv2D for the +// CPU implementation, don't register this EigenTensor-based version. +// TODO(b/119765980): Upgrade upstream Eigen to set `m_can_use_xsmm=false` for +// contractions with non-default contraction output kernels. +#if !defined(USE_GEMM_FOR_CONV) && !defined(EIGEN_USE_LIBXSMM) +TF_CALL_float(REGISTER_FUSED_CPU_CONV2D); +#endif // !USE_GEMM_FOR_CONV + +#if GOOGLE_CUDA + +namespace functor { +DECLARE_FUNCTOR_GPU_SPEC(float); +} // namespace functor + +TF_CALL_float(REGISTER_FUSED_GPU_CONV2D); + +#endif // GOOGLE_CUDA + +} // namespace tensorflow ",0,train 1c6f10152fe850463422108f03b6b022b8f24ccc,tensorflow/tensorflow,"Split up conv_ops_fused kernels. This improves build times by allowing the double, float, and half implementations to build in parallel. PiperOrigin-RevId: 235576953",conv_ops_fused_half.cc,"@@ -0,0 +1,29 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/core/framework/register_types.h"" +#include ""tensorflow/core/kernels/conv_ops_fused_impl.h"" + +namespace tensorflow { + +#if GOOGLE_CUDA + +namespace functor { +DECLARE_FUNCTOR_GPU_SPEC(Eigen::half); +} // namespace functor + +#endif // GOOGLE_CUDA + +} // namespace tensorflow ",0,train 1c6f10152fe850463422108f03b6b022b8f24ccc,tensorflow/tensorflow,"Split up conv_ops_fused kernels. This improves build times by allowing the double, float, and half implementations to build in parallel. PiperOrigin-RevId: 235576953",conv_ops_fused_impl.h,"@@ -28,6 +28,9 @@ limitations under the License. // // NOTE: GPU only supports fusion of Conv2D + BiasAdd + . +#ifndef TENSORFLOW_CORE_KERNELS_CONV_OPS_FUSED_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_CONV_OPS_FUSED_IMPL_H_ + #define USE_EIGEN_TENSOR #define EIGEN_USE_THREADS @@ -63,7 +66,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -namespace { // Supported Conv2D fusions. Not all of them supported on all type of devices. enum class FusedComputationType { // NOTE(ezhulenev): CuDNN `cudnnConvolutionBiasActivationForward` supports @@ -463,12 +465,12 @@ class FusedConvParameters : public ConvParameters { se::dnn::ActivationMode activation_mode_; }; -bool operator==(const FusedConvParameters& lhs, +inline bool operator==(const FusedConvParameters& lhs, const FusedConvParameters& rhs) { return lhs.get_data_as_tuple() == rhs.get_data_as_tuple(); } -bool operator!=(const FusedConvParameters& lhs, +inline bool operator!=(const FusedConvParameters& lhs, const FusedConvParameters& rhs) { return !(lhs == rhs); } @@ -482,7 +484,7 @@ using AutoTuneFusedConv = AutoTuneSingleton; -int64 ConvolveScratchSize() { +inline int64 ConvolveScratchSize() { static int64 convolve_scratch_size = GetDnnWorkspaceLimit( // default value is in bytes despite the name of the environment variable ""TF_CUDNN_WORKSPACE_LIMIT_IN_MB"", 1LL << 32 // 4GB @@ -822,8 +824,6 @@ struct LaunchFusedConv2DOp { #endif // GOOGLE_CUDA -} // namespace - template class FusedConv2DOp : public OpKernel { public: @@ -962,22 +962,9 @@ class FusedConv2DOp : public OpKernel { Name(""_FusedConv2D"").Device(DEVICE_CPU).TypeConstraint(""T""), \ FusedConv2DOp); -// If we're using the alternative GEMM-based implementation of Conv2D for the -// CPU implementation, don't register this EigenTensor-based version. -// TODO(b/119765980): Upgrade upstream Eigen to set `m_can_use_xsmm=false` for -// contractions with non-default contraction output kernels. -#if !defined(USE_GEMM_FOR_CONV) && !defined(EIGEN_USE_LIBXSMM) -TF_CALL_float(REGISTER_FUSED_CPU_CONV2D); -TF_CALL_double(REGISTER_FUSED_CPU_CONV2D); -#endif // !USE_GEMM_FOR_CONV - -#undef REGISTER_FUSED_CPU_CONV2D - #if GOOGLE_CUDA -// Forward declarations of the functor specializations for GPU. -namespace functor { -#define DECLARE_GPU_SPEC(T) \ +#define DECLARE_FUNCTOR_GPU_SPEC(T) \ template <> \ void TransformFilter::operator()( \ const GPUDevice& d, FilterTensorFormat dst_filter_format, \ @@ -992,23 +979,14 @@ namespace functor { typename TTypes::Tensor out, TensorFormat data_format); \ extern template struct PadInput -DECLARE_GPU_SPEC(float); -DECLARE_GPU_SPEC(Eigen::half); -DECLARE_GPU_SPEC(double); -#undef DECLARE_GPU_SPEC -} // namespace functor - // Registration of the GPU implementations. #define REGISTER_FUSED_GPU_CONV2D(T) \ REGISTER_KERNEL_BUILDER( \ Name(""_FusedConv2D"").Device(DEVICE_GPU).TypeConstraint(""T""), \ FusedConv2DOp); -TF_CALL_float(REGISTER_FUSED_GPU_CONV2D); -TF_CALL_double(REGISTER_FUSED_GPU_CONV2D); - -#undef REGISTER_FUSED_GPU_CONV2D - #endif // GOOGLE_CUDA } // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_CONV_OPS_FUSED_IMPL_H_ ",0,train d72d45b6701279e12a0dd8325f143162b9060f33,tensorflow/tensorflow,"Add default_batch_size to ExportEstimator. Change: 127753127",monitors.py,"@@ -837,11 +837,15 @@ class GraphDump(BaseMonitor): class ExportMonitor(EveryN): """"""Monitor that exports Estimator every N steps."""""" + # TODO(philstahlfeld): Investigate switching export.export_estimator + # configuration values to **kwargs so that updates to the export_estimator + # function don't have to be reflected here. def __init__(self, every_n_steps, export_dir, exports_to_keep=5, - signature_fn=None): + signature_fn=None, + default_batch_size=1): """"""Initializes ExportMonitor. Args: @@ -851,11 +855,13 @@ class ExportMonitor(EveryN): signature_fn: Function that given `Tensor` of `Example` strings, `dict` of `Tensor`s for features and `dict` of `Tensor`s for predictions and returns default and named exporting signautres. + default_batch_size: Default batch size of the `Example` placeholder. """""" super(ExportMonitor, self).__init__(every_n_steps=every_n_steps) self.export_dir = export_dir self.exports_to_keep = exports_to_keep self.signature_fn = signature_fn + self._default_batch_size = default_batch_size def every_n_step_end(self, step, outputs): super(ExportMonitor, self).every_n_step_end(step, outputs) @@ -863,7 +869,8 @@ class ExportMonitor(EveryN): export.export_estimator(self._estimator, self.export_dir, exports_to_keep=self.exports_to_keep, - signature_fn=self.signature_fn) + signature_fn=self.signature_fn, + default_batch_size=self._default_batch_size) except (RuntimeError, TypeError): # Currently we are not syncronized with saving checkpoints, which leads to # runtime errors when we are calling export on the same global step. @@ -875,7 +882,8 @@ class ExportMonitor(EveryN): export.export_estimator(self._estimator, self.export_dir, exports_to_keep=self.exports_to_keep, - signature_fn=self.signature_fn) + signature_fn=self.signature_fn, + default_batch_size=self._default_batch_size) class CheckpointSaver(EveryN): ",0,train ec36e9c3efbf0cf84cee0ef43164d914b2e626c5,tensorflow/tensorflow,"Apply clang-tidy fixes for llvm-qualified-auto in fusion_utils.cc (NFC) PiperOrigin-RevId: 418588406 Change-Id: I0fc78fb3fac74d194710108cbcba3c371c560643",fusion_utils.cc,"@@ -351,8 +351,8 @@ void FusionPattern::calculateOperandsAndResults() { // Supports using EquivalenceClasses for Value bool operator<(const ValueWrapper& lhs, const ValueWrapper& rhs) { - auto lhs_value = lhs.getValue().getAsOpaquePointer(); - auto rhs_value = rhs.getValue().getAsOpaquePointer(); + auto* lhs_value = lhs.getValue().getAsOpaquePointer(); + auto* rhs_value = rhs.getValue().getAsOpaquePointer(); return lhs_value < rhs_value; } ",0,train 4b0687d70c4bcab5ec2837345bd0115a0b356946,tensorflow/tensorflow,"[XLA] Add --xla_hlo_profile_last_run flag to replay_computation. When using replay_computation for profiling, you usually only want to do one or two warmup runs and then profile the last run of your model. This flag makes that possible. PiperOrigin-RevId: 189208924",replay_computation.cc,"@@ -40,6 +40,7 @@ limitations under the License. #include ""tensorflow/compiler/xla/client/global_data.h"" #include ""tensorflow/compiler/xla/client/lib/testing.h"" #include ""tensorflow/compiler/xla/client/local_client.h"" +#include ""tensorflow/compiler/xla/execution_options_util.h"" #include ""tensorflow/compiler/xla/literal_util.h"" #include ""tensorflow/compiler/xla/service/session.pb.h"" #include ""tensorflow/compiler/xla/shape_util.h"" @@ -66,6 +67,7 @@ struct Options { bool use_fake_data = false; bool print_result = true; int num_runs = 1; + bool xla_hlo_profile_last_run = false; }; // Invokes the given computation passing arbitrary data for every (unbound) @@ -122,16 +124,21 @@ StatusOr> ReplayComputation( std::unique_ptr result; for (int i = 0; i < opts.num_runs; ++i) { ExecutionProfile profile; + ExecutionOptions execution_options = CreateDefaultExecutionOptions(); + if (opts.xla_hlo_profile_last_run && i == opts.num_runs - 1) { + execution_options.mutable_debug_options()->set_xla_hlo_profile(true); + } + if (opts.print_result) { - TF_ASSIGN_OR_RETURN(result, client->ExecuteAndTransfer( - computation, execute_arguments, - /*execution_options=*/nullptr, &profile)); + TF_ASSIGN_OR_RETURN( + result, client->ExecuteAndTransfer(computation, execute_arguments, + &execution_options, &profile)); } else { // If we're not printing the result, execute the computation but don't // bother retrieving the result. This can be a significant speedup. TF_RETURN_IF_ERROR(client ->Execute(computation, execute_arguments, - /*execution_options=*/nullptr, &profile) + &execution_options, &profile) .status()); } LOG(INFO) << ""Execution took "" @@ -191,6 +198,9 @@ int main(int argc, char** argv) { ""Number of times to run each computation""), tensorflow::Flag(""fake_infeed_shape"", &opts.fake_infeed_shape, ""Shape of fake data to construct for (infinite) infeed""), + tensorflow::Flag( + ""xla_hlo_profile_last_run"", &opts.xla_hlo_profile_last_run, + ""Pass --xla_hlo_profile the last time we run the computation.""), }; xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); bool parse_ok = tensorflow::Flags::Parse(&argc, argv, flag_list); ",0,train f292f31b57480d0b33f5c0feb5fb128e43c865dc,tensorflow/tensorflow,"Disabling benchmarkScanDefun for TFRT due to lack of MLIR lowering support. PiperOrigin-RevId: 324694962 Change-Id: I2398161dff9403ac115a031c5942f753daff7871",benchmarks_test.py,"@@ -1260,6 +1260,8 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._run(scan, 100) + @test_util.disable_tfrt( + ""tf.While not supported in TF to CoreRT lowing. b/162685874"") def benchmarkScanDefun(self): elems = math_ops.range(1600) ",0,train bb087feb3b1fd8b050b9e9cd3b256f9bc4f1de1c,tensorflow/tensorflow,"NFC: Change all helper functions in `tuple_ops` not to require `llvm::Module` argument, as it can be derived from the already provided `llvm::IRBuilder`. This makes functions easier to code and the calls less verbose, and additionally enables making calls from helper classes where `llvm::Module*` might not be available. PiperOrigin-RevId: 238523747",ir_emitter.cc,"@@ -302,7 +302,7 @@ Status IrEmitter::HandleGetTupleElement(HloInstruction* get_tuple_element) { const Shape& shape = get_tuple_element->shape(); emitted_value_[get_tuple_element] = llvm_ir::EmitGetTupleElement( shape, get_tuple_element->tuple_index(), MinimumAlignmentForShape(shape), - GetEmittedValueFor(operand), &b_, module_); + GetEmittedValueFor(operand), &b_); return Status::OK(); } @@ -322,7 +322,7 @@ Status IrEmitter::HandleTupleSelect(HloInstruction* tuple_select) { TF_RETURN_IF_ERROR(EmitTargetAddressForOp(tuple_select)); llvm_ir::EmitTupleSelect(GetIrArrayFor(tuple_select), GetIrArrayFor(pred), GetEmittedValueFor(on_true), - GetEmittedValueFor(on_false), &b_, module_); + GetEmittedValueFor(on_false), &b_); return Status::OK(); } @@ -345,8 +345,7 @@ Status IrEmitter::HandleInfeed(HloInstruction* instruction) { assignment_.GetUniqueSlice(infeed, {1})); llvm::Value* token_address = EmitBufferPointer( token_slice, ShapeUtil::GetTupleElementShape(infeed->shape(), 1)); - llvm_ir::EmitTuple(GetIrArrayFor(infeed), {data_address, token_address}, &b_, - module_); + llvm_ir::EmitTuple(GetIrArrayFor(infeed), {data_address, token_address}, &b_); if (data_shape.IsTuple()) { TF_RET_CHECK(!ShapeUtil::IsNestedTuple(data_shape)); @@ -377,7 +376,7 @@ Status IrEmitter::HandleInfeed(HloInstruction* instruction) { } llvm_ir::EmitTuple(llvm_ir::IrArray(data_address, data_shape), - tuple_element_addresses, &b_, module_); + tuple_element_addresses, &b_); } else { TF_RETURN_IF_ERROR( EmitXfeedTransfer(XfeedKind::kInfeed, data_shape, data_address)); @@ -498,7 +497,7 @@ Status IrEmitter::HandleOutfeed(HloInstruction* outfeed) { ShapeUtil::GetTupleElementShape(operand_shape, i); llvm::Value* tuple_element = llvm_ir::EmitGetTupleElement( tuple_element_shape, i, MinimumAlignmentForShape(tuple_element_shape), - value, &b_, module_); + value, &b_); TF_RETURN_IF_ERROR(EmitXfeedTransfer(XfeedKind::kOutfeed, tuple_element_shape, tuple_element)); } @@ -621,8 +620,7 @@ Status IrEmitter::HandleSort(HloInstruction* hlo) { GetProfileCountersArgument(), less_than_function}); if (sort->values_count() > 0) { - llvm_ir::EmitTuple(GetIrArrayFor(sort), destination_addresses, &b_, - module_); + llvm_ir::EmitTuple(GetIrArrayFor(sort), destination_addresses, &b_); } return Status::OK(); } @@ -633,7 +631,7 @@ Status IrEmitter::HandleTuple(HloInstruction* tuple) { for (auto operand : tuple->operands()) { base_ptrs.push_back(GetEmittedValueFor(operand)); } - llvm_ir::EmitTuple(GetIrArrayFor(tuple), base_ptrs, &b_, module_); + llvm_ir::EmitTuple(GetIrArrayFor(tuple), base_ptrs, &b_); return Status::OK(); } @@ -1349,7 +1347,7 @@ Status IrEmitter::HandleAllReduce(HloInstruction* crs) { MemCpy(operand_ptrs.back(), /*DstAlign=*/1, in_ptr, /*SrcAlign=*/1, ShapeUtil::ByteSizeOf(operand_shape)); } - llvm_ir::EmitTuple(GetIrArrayFor(crs), operand_ptrs, &b_, module_); + llvm_ir::EmitTuple(GetIrArrayFor(crs), operand_ptrs, &b_); return Status::OK(); } @@ -2289,7 +2287,7 @@ Status IrEmitter::HandleCustomCall(HloInstruction* custom_call) { llvm::Value* addr = EmitBufferPointer(slice, elem_shape); base_ptrs.push_back(addr); } - llvm_ir::EmitTuple(GetIrArrayFor(custom_call), base_ptrs, &b_, module_); + llvm_ir::EmitTuple(GetIrArrayFor(custom_call), base_ptrs, &b_); } auto* output_address_arg = PointerCast(GetEmittedValueFor(custom_call), i8_ptr_type); @@ -2980,7 +2978,7 @@ Status IrEmitter::EmitTargetElementLoop( for (int64 i = 0; i < output_arrays.size(); ++i) { tuple_operand_ptrs.push_back(output_arrays[i].GetBasePointer()); } - llvm_ir::EmitTuple(target_array, tuple_operand_ptrs, &b_, module_); + llvm_ir::EmitTuple(target_array, tuple_operand_ptrs, &b_); } else { if (ShouldEmitParallelLoopFor(*target_op)) { ",0,train bb087feb3b1fd8b050b9e9cd3b256f9bc4f1de1c,tensorflow/tensorflow,"NFC: Change all helper functions in `tuple_ops` not to require `llvm::Module` argument, as it can be derived from the already provided `llvm::IRBuilder`. This makes functions easier to code and the calls less verbose, and additionally enables making calls from helper classes where `llvm::Module*` might not be available. PiperOrigin-RevId: 238523747",hlo_to_ir_bindings.cc,"@@ -135,11 +135,11 @@ llvm::Value* HloToIrBindings::EmitGetTupleElement(const HloInstruction* gte, if (gte->operand(0)->opcode() != HloOpcode::kGetTupleElement) { return llvm_ir::EmitGetTupleElement( gte->shape(), gte->tuple_index(), /*alignment=*/1, - GetTypedIrValue(*gte->operand(0), {}, base_ptr), b_, module_); + GetTypedIrValue(*gte->operand(0), {}, base_ptr), b_); } return llvm_ir::EmitGetTupleElement( gte->shape(), gte->tuple_index(), /*alignment=*/1, - EmitGetTupleElement(gte->operand(0), base_ptr), b_, module_); + EmitGetTupleElement(gte->operand(0), base_ptr), b_); } // Returns true if `value` has a name that should not be changed. ",0,train bb087feb3b1fd8b050b9e9cd3b256f9bc4f1de1c,tensorflow/tensorflow,"NFC: Change all helper functions in `tuple_ops` not to require `llvm::Module` argument, as it can be derived from the already provided `llvm::IRBuilder`. This makes functions easier to code and the calls less verbose, and additionally enables making calls from helper classes where `llvm::Module*` might not be available. PiperOrigin-RevId: 238523747",ir_emitter.cc,"@@ -115,7 +115,7 @@ Status IrEmitter::HandleGetTupleElement(HloInstruction* get_tuple_element) { get_tuple_element->shape(), get_tuple_element->tuple_index(), // TODO(b/26344050): tighten the alignment here // based on the real element type. - /*alignment=*/1, GetBasePointer(*operand), &b_, module_)); + /*alignment=*/1, GetBasePointer(*operand), &b_)); return Status::OK(); } @@ -144,7 +144,7 @@ Status IrEmitter::HandleTuple(HloInstruction* tuple) { for (const HloInstruction* operand : tuple->operands()) { base_ptrs.push_back(GetBasePointer(*operand)); } - llvm_ir::EmitTuple(GetIrArray(*tuple, *tuple), base_ptrs, &b_, module_); + llvm_ir::EmitTuple(GetIrArray(*tuple, *tuple), base_ptrs, &b_); return Status::OK(); } @@ -434,7 +434,7 @@ Status IrEmitter::HandleTupleSelect(HloInstruction* tuple_select) { llvm_ir::EmitTupleSelect(GetIrArray(*tuple_select, *tuple_select), GetIrArray(*pred, *tuple_select), GetBasePointer(*on_true), GetBasePointer(*on_false), - &b_, module_); + &b_); return Status::OK(); } ",0,train bb087feb3b1fd8b050b9e9cd3b256f9bc4f1de1c,tensorflow/tensorflow,"NFC: Change all helper functions in `tuple_ops` not to require `llvm::Module` argument, as it can be derived from the already provided `llvm::IRBuilder`. This makes functions easier to code and the calls less verbose, and additionally enables making calls from helper classes where `llvm::Module*` might not be available. PiperOrigin-RevId: 238523747",ir_emitter_nested.cc,"@@ -123,7 +123,7 @@ Status IrEmitterNested::EmitTargetElementLoop( ConstructIrArrayForOutputs(hlo); TF_RETURN_IF_ERROR( llvm_ir::LoopEmitter(element_generator, target_arrays, &b_).EmitLoop()); - llvm_ir::EmitTuple(GetIrArray(hlo, hlo), target_arrays, &b_, module_); + llvm_ir::EmitTuple(GetIrArray(hlo, hlo), target_arrays, &b_); return Status::OK(); } return llvm_ir::LoopEmitter(element_generator, GetIrArray(hlo, hlo), &b_) ",0,train bb087feb3b1fd8b050b9e9cd3b256f9bc4f1de1c,tensorflow/tensorflow,"NFC: Change all helper functions in `tuple_ops` not to require `llvm::Module` argument, as it can be derived from the already provided `llvm::IRBuilder`. This makes functions easier to code and the calls less verbose, and additionally enables making calls from helper classes where `llvm::Module*` might not be available. PiperOrigin-RevId: 238523747",ir_emitter_unnested.cc,"@@ -2201,7 +2201,7 @@ Status IrEmitterUnnested::EmitTargetElementLoopInThunk( // kernel *anyway*. std::vector output_arrays = ConstructIrArrayForOutputs(hlo); KernelSupportLibrary{&b_}.If(""emit_mof_tuple"", IsBlock0Thread0(&b_), [&] { - llvm_ir::EmitTuple(GetIrArray(hlo, hlo), output_arrays, &b_, module_); + llvm_ir::EmitTuple(GetIrArray(hlo, hlo), output_arrays, &b_); }); // For multioutput fusion, we need to emit each operand and the root. @@ -3103,8 +3103,7 @@ LaunchDimensions IrEmitterUnnested::EmitKernel( if (!reduction_info && unnested_hlo->IsMultiOutputFusion()) { KernelSupportLibrary{&b_}.If(""emit_mof_tuple"", IsBlock0Thread0(&b_), [&] { llvm_ir::EmitTuple(GetIrArray(*unnested_hlo, *unnested_hlo), - ConstructIrArrayForOutputs(*unnested_hlo), &b_, - module_); + ConstructIrArrayForOutputs(*unnested_hlo), &b_); }); } ",0,train bb087feb3b1fd8b050b9e9cd3b256f9bc4f1de1c,tensorflow/tensorflow,"NFC: Change all helper functions in `tuple_ops` not to require `llvm::Module` argument, as it can be derived from the already provided `llvm::IRBuilder`. This makes functions easier to code and the calls less verbose, and additionally enables making calls from helper classes where `llvm::Module*` might not be available. PiperOrigin-RevId: 238523747",fused_ir_emitter.cc,"@@ -121,9 +121,9 @@ Status FusedIrEmitter::HandleGetTupleElement( } // Lookup tuple element pointer. - return llvm_ir::EmitGetTupleElement( - get_tuple_element->shape(), get_tuple_element->tuple_index(), - /*alignment=*/1, tuple_ptr, b_, module_); + return llvm_ir::EmitGetTupleElement(get_tuple_element->shape(), + get_tuple_element->tuple_index(), + /*alignment=*/1, tuple_ptr, b_); }; if (!get_tuple_element->shape().IsTuple()) { ",0,train bb087feb3b1fd8b050b9e9cd3b256f9bc4f1de1c,tensorflow/tensorflow,"NFC: Change all helper functions in `tuple_ops` not to require `llvm::Module` argument, as it can be derived from the already provided `llvm::IRBuilder`. This makes functions easier to code and the calls less verbose, and additionally enables making calls from helper classes where `llvm::Module*` might not be available. PiperOrigin-RevId: 238523747",tuple_ops.cc,"@@ -29,9 +29,14 @@ limitations under the License. namespace xla { namespace llvm_ir { +static llvm::Module* getModuleFromBuilder(llvm::IRBuilder<>* b) { + return b->GetInsertBlock()->getModule(); +} + void EmitTupleSelect(const IrArray& select, const IrArray& pred, llvm::Value* on_true, llvm::Value* on_false, - llvm::IRBuilder<>* b, llvm::Module* module) { + llvm::IRBuilder<>* b) { + llvm::Module* module = getModuleFromBuilder(b); CHECK(ShapeUtil::IsScalar(pred.GetShape())); llvm::LoadInst* pred_value = @@ -65,7 +70,8 @@ void EmitTupleSelect(const IrArray& select, const IrArray& pred, } void EmitTuple(const IrArray& tuple, absl::Span operands, - llvm::IRBuilder<>* b, llvm::Module* module) { + llvm::IRBuilder<>* b) { + llvm::Module* module = getModuleFromBuilder(b); for (size_t i = 0; i < operands.size(); ++i) { auto* store = b->CreateStore( b->CreatePointerCast(operands[i], PrimitiveTypeToIrType(TUPLE, module)), @@ -76,18 +82,19 @@ void EmitTuple(const IrArray& tuple, absl::Span operands, } void EmitTuple(const IrArray& tuple, absl::Span buffers, - llvm::IRBuilder<>* b, llvm::Module* module) { + llvm::IRBuilder<>* b) { std::vector buffer_ptrs; buffer_ptrs.reserve(buffers.size()); absl::c_transform( buffers, std::back_inserter(buffer_ptrs), [](const llvm_ir::IrArray& buffer) { return buffer.GetBasePointer(); }); - llvm_ir::EmitTuple(tuple, buffer_ptrs, b, module); + llvm_ir::EmitTuple(tuple, buffer_ptrs, b); } llvm::Value* EmitGetTupleElement(const Shape& target_shape, int64 index, int alignment, llvm::Value* operand, - llvm::IRBuilder<>* b, llvm::Module* module) { + llvm::IRBuilder<>* b) { + llvm::Module* module = getModuleFromBuilder(b); llvm::Value* element_ptr = b->CreateInBoundsGEP(operand, {b->getInt64(0), b->getInt64(index)}); llvm::LoadInst* src_buffer = b->CreateLoad(element_ptr); ",0,train bb087feb3b1fd8b050b9e9cd3b256f9bc4f1de1c,tensorflow/tensorflow,"NFC: Change all helper functions in `tuple_ops` not to require `llvm::Module` argument, as it can be derived from the already provided `llvm::IRBuilder`. This makes functions easier to code and the calls less verbose, and additionally enables making calls from helper classes where `llvm::Module*` might not be available. PiperOrigin-RevId: 238523747",tuple_ops.h,"@@ -61,17 +61,17 @@ namespace llvm_ir { // output[i] = pred ? tuple_on_true[i] : tuple_on_false[i] void EmitTupleSelect(const IrArray& select, const IrArray& pred, llvm::Value* on_true, llvm::Value* on_false, - llvm::IRBuilder<>* b, llvm::Module* module); + llvm::IRBuilder<>* b); // A tuple is an array of pointers, one for each operand. Each pointer points to // the output buffer of its corresponding operand. void EmitTuple(const IrArray& tuple, absl::Span operands, - llvm::IRBuilder<>* b, llvm::Module* module); + llvm::IRBuilder<>* b); // Similar to EmitTuple above, except that the output buffers are provided in // the form of IrArray. void EmitTuple(const IrArray& tuple, absl::Span buffers, - llvm::IRBuilder<>* b, llvm::Module* module); + llvm::IRBuilder<>* b); // A tuple is an array of pointers, one for each operand. Each pointer points to // the output buffer of its corresponding operand. A GetTupleElement instruction @@ -79,7 +79,7 @@ void EmitTuple(const IrArray& tuple, absl::Span buffers, // Returns an llvm value representing a pointer to the tuple element buffer. llvm::Value* EmitGetTupleElement(const Shape& target_shape, int64 index, int alignment, llvm::Value* operand, - llvm::IRBuilder<>* b, llvm::Module* module); + llvm::IRBuilder<>* b); } // namespace llvm_ir } // namespace xla ",0,train bccd445da9e75a9f196e52a8a9d1b388598851a4,tensorflow/tensorflow,"Avoid risk of including conflicting implementations of monitoring classes. The classes in tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h have two implementations. This is achieved by using a #if directive to provide the implementation directly in each header files, or alternately, a no-op implementation on mobile platforms from one of the header files mobile_{counter,gauge,percentile_sampler,sampler}.h. I believe the intent is that the mobile*.h be used only via the main header files, and never be included directly, but nothing was preventing this. If someone had included one of the mobile*.h files directly, and the resulting object file were linked with one that used the primary header files on a non-mobile platform, it may cause problems. There would have been no error at compile or link time, yet the classes would be defined in two different ways, leading to an unchecked ODR violation and undefined results. For example, the linker potentially could pick an arbitrary version of each routine in the class. This change tries to avoid the potential problem in two ways: - by restricting the visibility of the mobile_*.h variants (for bazel builds, at least); and - by causing the mobile_*.h files to use #error if they appear not to have been used on a mobile platform, or not included from their respective primary header files. Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime build rule. PiperOrigin-RevId: 341680724 Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff",counter.h,"@@ -24,7 +24,10 @@ limitations under the License. // We replace this implementation with a null implementation for mobile // platforms. #ifdef IS_MOBILE_PLATFORM +#define TENSORFLOW_INCLUDED_FROM_COUNTER_H // prevent accidental use of + // mobile_counter.h #include ""tensorflow/core/lib/monitoring/mobile_counter.h"" +#undef TENSORFLOW_INCLUDED_FROM_COUNTER_H #else #include ",0,train bccd445da9e75a9f196e52a8a9d1b388598851a4,tensorflow/tensorflow,"Avoid risk of including conflicting implementations of monitoring classes. The classes in tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h have two implementations. This is achieved by using a #if directive to provide the implementation directly in each header files, or alternately, a no-op implementation on mobile platforms from one of the header files mobile_{counter,gauge,percentile_sampler,sampler}.h. I believe the intent is that the mobile*.h be used only via the main header files, and never be included directly, but nothing was preventing this. If someone had included one of the mobile*.h files directly, and the resulting object file were linked with one that used the primary header files on a non-mobile platform, it may cause problems. There would have been no error at compile or link time, yet the classes would be defined in two different ways, leading to an unchecked ODR violation and undefined results. For example, the linker potentially could pick an arbitrary version of each routine in the class. This change tries to avoid the potential problem in two ways: - by restricting the visibility of the mobile_*.h variants (for bazel builds, at least); and - by causing the mobile_*.h files to use #error if they appear not to have been used on a mobile platform, or not included from their respective primary header files. Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime build rule. PiperOrigin-RevId: 341680724 Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff",gauge.h,"@@ -24,7 +24,10 @@ limitations under the License. // We replace this implementation with a null implementation for mobile // platforms. #ifdef IS_MOBILE_PLATFORM +#define TENSORFLOW_INCLUDED_FROM_GAUGE_H // prevent accidental use of + // mobile_gauge.h #include ""tensorflow/core/lib/monitoring/mobile_gauge.h"" +#undef TENSORFLOW_INCLUDED_FROM_GAUGE_H #else #include ",0,train bccd445da9e75a9f196e52a8a9d1b388598851a4,tensorflow/tensorflow,"Avoid risk of including conflicting implementations of monitoring classes. The classes in tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h have two implementations. This is achieved by using a #if directive to provide the implementation directly in each header files, or alternately, a no-op implementation on mobile platforms from one of the header files mobile_{counter,gauge,percentile_sampler,sampler}.h. I believe the intent is that the mobile*.h be used only via the main header files, and never be included directly, but nothing was preventing this. If someone had included one of the mobile*.h files directly, and the resulting object file were linked with one that used the primary header files on a non-mobile platform, it may cause problems. There would have been no error at compile or link time, yet the classes would be defined in two different ways, leading to an unchecked ODR violation and undefined results. For example, the linker potentially could pick an arbitrary version of each routine in the class. This change tries to avoid the potential problem in two ways: - by restricting the visibility of the mobile_*.h variants (for bazel builds, at least); and - by causing the mobile_*.h files to use #error if they appear not to have been used on a mobile platform, or not included from their respective primary header files. Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime build rule. PiperOrigin-RevId: 341680724 Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff",mobile_counter.h,"@@ -18,6 +18,14 @@ limitations under the License. #ifndef TENSORFLOW_CORE_LIB_MONITORING_MOBILE_COUNTER_H_ #define TENSORFLOW_CORE_LIB_MONITORING_MOBILE_COUNTER_H_ +#if !defined(IS_MOBILE_PLATFORM) || !defined(TENSORFLOW_INCLUDED_FROM_COUNTER_H) +// If this header file were included directly, and something else included its +// non-mobile counterpart, there could be an unchecked ODR violation on the +// classes below. +#error do not include mobile_counter.h directly; use counter.h instead +#endif // !defined(IS_MOBILE_PLATFORM) || + // !defined(TENSORFLOW_INCLUDED_FROM_COUNTER_H) + #include ""tensorflow/core/lib/core/status.h"" #include ""tensorflow/core/platform/macros.h"" #include ""tensorflow/core/platform/types.h"" ",0,train bccd445da9e75a9f196e52a8a9d1b388598851a4,tensorflow/tensorflow,"Avoid risk of including conflicting implementations of monitoring classes. The classes in tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h have two implementations. This is achieved by using a #if directive to provide the implementation directly in each header files, or alternately, a no-op implementation on mobile platforms from one of the header files mobile_{counter,gauge,percentile_sampler,sampler}.h. I believe the intent is that the mobile*.h be used only via the main header files, and never be included directly, but nothing was preventing this. If someone had included one of the mobile*.h files directly, and the resulting object file were linked with one that used the primary header files on a non-mobile platform, it may cause problems. There would have been no error at compile or link time, yet the classes would be defined in two different ways, leading to an unchecked ODR violation and undefined results. For example, the linker potentially could pick an arbitrary version of each routine in the class. This change tries to avoid the potential problem in two ways: - by restricting the visibility of the mobile_*.h variants (for bazel builds, at least); and - by causing the mobile_*.h files to use #error if they appear not to have been used on a mobile platform, or not included from their respective primary header files. Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime build rule. PiperOrigin-RevId: 341680724 Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff",mobile_gauge.h,"@@ -18,6 +18,14 @@ limitations under the License. #ifndef TENSORFLOW_CORE_LIB_MONITORING_MOBILE_GAUGE_H_ #define TENSORFLOW_CORE_LIB_MONITORING_MOBILE_GAUGE_H_ +#if !defined(IS_MOBILE_PLATFORM) || !defined(TENSORFLOW_INCLUDED_FROM_GAUGE_H) +// If this header file were included directly, and something else included its +// non-mobile counterpart, there could be an unchecked ODR violation on the +// classes below. +#error do not include mobile_gauge.h directly; use gauge.h instead +#endif // !defined(IS_MOBILE_PLATFORM) || + // !defined(TENSORFLOW_INCLUDED_FROM_GAUGE_H) + #include ""tensorflow/core/lib/core/status.h"" #include ""tensorflow/core/platform/macros.h"" #include ""tensorflow/core/platform/types.h"" ",0,train bccd445da9e75a9f196e52a8a9d1b388598851a4,tensorflow/tensorflow,"Avoid risk of including conflicting implementations of monitoring classes. The classes in tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h have two implementations. This is achieved by using a #if directive to provide the implementation directly in each header files, or alternately, a no-op implementation on mobile platforms from one of the header files mobile_{counter,gauge,percentile_sampler,sampler}.h. I believe the intent is that the mobile*.h be used only via the main header files, and never be included directly, but nothing was preventing this. If someone had included one of the mobile*.h files directly, and the resulting object file were linked with one that used the primary header files on a non-mobile platform, it may cause problems. There would have been no error at compile or link time, yet the classes would be defined in two different ways, leading to an unchecked ODR violation and undefined results. For example, the linker potentially could pick an arbitrary version of each routine in the class. This change tries to avoid the potential problem in two ways: - by restricting the visibility of the mobile_*.h variants (for bazel builds, at least); and - by causing the mobile_*.h files to use #error if they appear not to have been used on a mobile platform, or not included from their respective primary header files. Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime build rule. PiperOrigin-RevId: 341680724 Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff",mobile_percentile_sampler.h,"@@ -13,9 +13,20 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +// Null implementation of the PercentileSampler metric for mobile platforms. + #ifndef TENSORFLOW_CORE_LIB_MONITORING_MOBILE_PERCENTILE_SAMPLER_H_ #define TENSORFLOW_CORE_LIB_MONITORING_MOBILE_PERCENTILE_SAMPLER_H_ +#if !defined(IS_MOBILE_PLATFORM) || \ + !defined(TENSORFLOW_INCLUDED_FROM_PERCENTILE_SAMPLER_H) +// If this header file were included directly, and something else included its +// non-mobile counterpart, there could be an unchecked ODR violation on the +// classes below. +#error do not include mobile_percentile_sampler.h directly; use percetile_sampler.h instead +#endif // !defined(IS_MOBILE_PLATFORM) || + // !defined(TENSORFLOW_INCLUDED_FROM_PERCENTILE_SAMPLER_H) + #include ""tensorflow/core/lib/core/status.h"" #include ""tensorflow/core/lib/monitoring/collection_registry.h"" #include ""tensorflow/core/lib/monitoring/metric_def.h"" ",0,train bccd445da9e75a9f196e52a8a9d1b388598851a4,tensorflow/tensorflow,"Avoid risk of including conflicting implementations of monitoring classes. The classes in tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h have two implementations. This is achieved by using a #if directive to provide the implementation directly in each header files, or alternately, a no-op implementation on mobile platforms from one of the header files mobile_{counter,gauge,percentile_sampler,sampler}.h. I believe the intent is that the mobile*.h be used only via the main header files, and never be included directly, but nothing was preventing this. If someone had included one of the mobile*.h files directly, and the resulting object file were linked with one that used the primary header files on a non-mobile platform, it may cause problems. There would have been no error at compile or link time, yet the classes would be defined in two different ways, leading to an unchecked ODR violation and undefined results. For example, the linker potentially could pick an arbitrary version of each routine in the class. This change tries to avoid the potential problem in two ways: - by restricting the visibility of the mobile_*.h variants (for bazel builds, at least); and - by causing the mobile_*.h files to use #error if they appear not to have been used on a mobile platform, or not included from their respective primary header files. Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime build rule. PiperOrigin-RevId: 341680724 Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff",mobile_sampler.h,"@@ -18,6 +18,14 @@ limitations under the License. #ifndef TENSORFLOW_CORE_LIB_MONITORING_MOBILE_SAMPLER_H_ #define TENSORFLOW_CORE_LIB_MONITORING_MOBILE_SAMPLER_H_ +#if !defined(IS_MOBILE_PLATFORM) || !defined(TENSORFLOW_INCLUDED_FROM_SAMPLER_H) +// If this header file were included directly, and something else included its +// non-mobile counterpart, there could be an unchecked ODR violation on the +// classes below. +#error do not include mobile_sampler.h directly; use sampler.h to include it instead +#endif // !defined(IS_MOBILE_PLATFORM) || + // !defined(TENSORFLOW_INCLUDED_FROM_SAMPLER_H) + #include #include ""tensorflow/core/framework/summary.pb.h"" ",0,train bccd445da9e75a9f196e52a8a9d1b388598851a4,tensorflow/tensorflow,"Avoid risk of including conflicting implementations of monitoring classes. The classes in tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h have two implementations. This is achieved by using a #if directive to provide the implementation directly in each header files, or alternately, a no-op implementation on mobile platforms from one of the header files mobile_{counter,gauge,percentile_sampler,sampler}.h. I believe the intent is that the mobile*.h be used only via the main header files, and never be included directly, but nothing was preventing this. If someone had included one of the mobile*.h files directly, and the resulting object file were linked with one that used the primary header files on a non-mobile platform, it may cause problems. There would have been no error at compile or link time, yet the classes would be defined in two different ways, leading to an unchecked ODR violation and undefined results. For example, the linker potentially could pick an arbitrary version of each routine in the class. This change tries to avoid the potential problem in two ways: - by restricting the visibility of the mobile_*.h variants (for bazel builds, at least); and - by causing the mobile_*.h files to use #error if they appear not to have been used on a mobile platform, or not included from their respective primary header files. Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime build rule. PiperOrigin-RevId: 341680724 Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff",percentile_sampler.h,"@@ -24,7 +24,11 @@ limitations under the License. // We replace this implementation with a null implementation for mobile // platforms. #ifdef IS_MOBILE_PLATFORM +#define TENSORFLOW_INCLUDED_FROM_PERCENTILE_SAMPLER_H // prevent accidental use + // of +// mobile_percentile_sampler.h #include ""tensorflow/core/lib/monitoring/mobile_percentile_sampler.h"" +#undef TENSORFLOW_INCLUDED_FROM_PERCENTILE_SAMPLER_H #else #include ",0,train bccd445da9e75a9f196e52a8a9d1b388598851a4,tensorflow/tensorflow,"Avoid risk of including conflicting implementations of monitoring classes. The classes in tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h have two implementations. This is achieved by using a #if directive to provide the implementation directly in each header files, or alternately, a no-op implementation on mobile platforms from one of the header files mobile_{counter,gauge,percentile_sampler,sampler}.h. I believe the intent is that the mobile*.h be used only via the main header files, and never be included directly, but nothing was preventing this. If someone had included one of the mobile*.h files directly, and the resulting object file were linked with one that used the primary header files on a non-mobile platform, it may cause problems. There would have been no error at compile or link time, yet the classes would be defined in two different ways, leading to an unchecked ODR violation and undefined results. For example, the linker potentially could pick an arbitrary version of each routine in the class. This change tries to avoid the potential problem in two ways: - by restricting the visibility of the mobile_*.h variants (for bazel builds, at least); and - by causing the mobile_*.h files to use #error if they appear not to have been used on a mobile platform, or not included from their respective primary header files. Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime build rule. PiperOrigin-RevId: 341680724 Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff",sampler.h,"@@ -24,7 +24,10 @@ limitations under the License. // We replace this implementation with a null implementation for mobile // platforms. #ifdef IS_MOBILE_PLATFORM +#define TENSORFLOW_INCLUDED_FROM_SAMPLER_H // prevent accidental use of + // mobile_sampler.h #include ""tensorflow/core/lib/monitoring/mobile_sampler.h"" +#undef TENSORFLOW_INCLUDED_FROM_SAMPLER_H #else #include ",0,train 4ec3fcdc87687d33c1597aff9296041a6bb00434,tensorflow/tensorflow,"Adds support for explicitly assigning the replica to the VariableDeviceChooser. This is necessary for when the device with replica is set in a surrounding arg_scope. PiperOrigin-RevId: 200567897",variables.py,"@@ -712,7 +712,8 @@ class VariableDeviceChooser(object): num_tasks=0, job_name='ps', device_type='CPU', - device_index=0): + device_index=0, + replica=None): """"""Initialize VariableDeviceChooser. Usage: @@ -733,12 +734,15 @@ class VariableDeviceChooser(object): self._job_name = job_name self._device_type = device_type self._device_index = device_index + self._replica = replica self._num_tasks = num_tasks self._next_task_id = 0 def __call__(self, op): - device_spec = tf_device.DeviceSpec(device_type=self._device_type, - device_index=self._device_index) + device_spec = tf_device.DeviceSpec( + replica=self._replica, + device_type=self._device_type, + device_index=self._device_index) if self._num_tasks > 0: task_id = self._next_task_id self._next_task_id = (self._next_task_id + 1) % self._num_tasks ",0,train 4ec3fcdc87687d33c1597aff9296041a6bb00434,tensorflow/tensorflow,"Adds support for explicitly assigning the replica to the VariableDeviceChooser. This is necessary for when the device with replica is set in a surrounding arg_scope. PiperOrigin-RevId: 200567897",variables_test.py,"@@ -506,6 +506,35 @@ class VariablesTest(test.TestCase): self.assertDeviceEqual(e.device, '/job:ps/task:1/cpu:0') self.assertDeviceEqual(e.initial_value.device, '/cpu:99') + def testVariableWithVariableDeviceChooserWithReplica(self): + + with ops.Graph().as_default(): + device_fn = variables_lib2.VariableDeviceChooser(replica=3, num_tasks=2) + with arg_scope([variables_lib2.variable], device=device_fn): + a = variables_lib2.variable('a', []) + b = variables_lib2.variable('b', []) + c = variables_lib2.variable('c', [], device='cpu:12') + d = variables_lib2.variable('d', []) + with ops.device('cpu:99'): + e_init = constant_op.constant(12) + e = variables_lib2.variable('e', initializer=e_init) + # The values below highlight how the VariableDeviceChooser puts initial + # values on the same device as the variable job. + self.assertDeviceEqual(a.device, '/job:ps/replica:3/task:0/cpu:0') + self.assertEqual(a.initial_value.op.colocation_groups(), + a.op.colocation_groups()) + self.assertDeviceEqual(b.device, '/job:ps/replica:3/task:1/cpu:0') + self.assertEqual(b.initial_value.op.colocation_groups(), + b.op.colocation_groups()) + self.assertDeviceEqual(c.device, '/cpu:12') + self.assertEqual(c.initial_value.op.colocation_groups(), + c.op.colocation_groups()) + self.assertDeviceEqual(d.device, '/job:ps/replica:3/task:0/cpu:0') + self.assertEqual(d.initial_value.op.colocation_groups(), + d.op.colocation_groups()) + self.assertDeviceEqual(e.device, '/job:ps/replica:3/task:1/cpu:0') + self.assertDeviceEqual(e.initial_value.device, '/cpu:99') + def testVariableGPUPlacement(self): with ops.Graph().as_default(): @@ -930,8 +959,8 @@ class AssignFromCheckpointTest(test.TestCase): return saver.save(sess, checkpoint_dir, global_step=global_step) def testLoadExistingVariables(self): - model_dir = tempfile.mkdtemp(prefix=os.path.join(self.get_temp_dir(), - 'load_existing_variables')) + model_dir = tempfile.mkdtemp( + prefix=os.path.join(self.get_temp_dir(), 'load_existing_variables')) init_value0 = 10.0 init_value1 = 20.0 @@ -944,8 +973,8 @@ class AssignFromCheckpointTest(test.TestCase): var1 = variables_lib2.variable('my_var1', shape=[]) vars_to_restore = {'v0': var0, 'v1': var1} - op, feed_dict = variables_lib2.assign_from_checkpoint(model_path, - vars_to_restore) + op, feed_dict = variables_lib2.assign_from_checkpoint( + model_path, vars_to_restore) # Initialize the variables. sess.run(variables_lib.global_variables_initializer()) @@ -960,8 +989,8 @@ class AssignFromCheckpointTest(test.TestCase): # Tests restoring PartitionedVariables and tests using a dictionary # of lists as the assign_from_checkpoint() var_list param. def testLoadPartitionedVariables(self): - model_dir = tempfile.mkdtemp(prefix=os.path.join( - self.get_temp_dir(), 'load_partitioned_variables')) + model_dir = tempfile.mkdtemp( + prefix=os.path.join(self.get_temp_dir(), 'load_partitioned_variables')) init_value0 = np.array([[10.0, 11.0], [12.0, 13.0]]) init_value1 = np.array([20.0]) # Partitioned into 1 part, edge case. @@ -974,15 +1003,14 @@ class AssignFromCheckpointTest(test.TestCase): partitioner = partitioned_variables.variable_axis_size_partitioner(2) var0 = variables_lib2.variable( 'var0', shape=init_value0.shape, partitioner=partitioner) - var0full = variables_lib2.variable( - 'var0full', shape=init_value0.shape) + var0full = variables_lib2.variable('var0full', shape=init_value0.shape) var1 = variables_lib2.variable( 'var1', shape=init_value1.shape, partitioner=partitioner) # Convert var0 and var1 into a list of underlying variables. vars_to_restore = {'var0': list(var0) + [var0full], 'var1': list(var1)} - op, feed_dict = variables_lib2.assign_from_checkpoint(model_path, - vars_to_restore) + op, feed_dict = variables_lib2.assign_from_checkpoint( + model_path, vars_to_restore) # Initialize the variables. sess.run(variables_lib.global_variables_initializer()) @@ -992,16 +1020,18 @@ class AssignFromCheckpointTest(test.TestCase): # Request and test the variable values. PartitionedVariables can't # be evaled so we wrap them in an identity. - self.assertTrue(np.array_equal( - init_value0, array_ops.identity(var0).eval())) - self.assertTrue(np.array_equal( - init_value0, var0full.eval())) - self.assertTrue(np.array_equal( - init_value1, array_ops.identity(var1).eval())) + self.assertTrue( + np.array_equal(init_value0, + array_ops.identity(var0).eval())) + self.assertTrue(np.array_equal(init_value0, var0full.eval())) + self.assertTrue( + np.array_equal(init_value1, + array_ops.identity(var1).eval())) def testRaisesValueErrorIfAVariableIsntFound(self): - model_dir = tempfile.mkdtemp(prefix=os.path.join( - self.get_temp_dir(), 'raises_value_error_if_var_isnt_found')) + model_dir = tempfile.mkdtemp( + prefix=os.path.join(self.get_temp_dir(), + 'raises_value_error_if_var_isnt_found')) init_value0 = 10.0 init_value1 = 20.0 @@ -1019,8 +1049,9 @@ class AssignFromCheckpointTest(test.TestCase): variables_lib2.assign_from_checkpoint(model_path, vars_to_restore) def testInitFromCheckpointWithScopes(self): - model_dir = tempfile.mkdtemp(prefix=os.path.join( - self.get_temp_dir(), 'init_from_checkpoint_with_scopes')) + model_dir = tempfile.mkdtemp( + prefix=os.path.join(self.get_temp_dir(), + 'init_from_checkpoint_with_scopes')) init_value0 = np.asarray( [1.0, 3.0, 9.0], dtype=np.float32).reshape((1, 3, 1)) @@ -1038,8 +1069,8 @@ class AssignFromCheckpointTest(test.TestCase): var1 = variables_lib2.variable('my_var1', shape=init_value1.shape) vars_to_restore = {'layer0/v0': var0, 'layer1/v1': var1} - op, feed_dict = variables_lib2.assign_from_checkpoint(model_path, - vars_to_restore) + op, feed_dict = variables_lib2.assign_from_checkpoint( + model_path, vars_to_restore) # Initialize the variables. sess.run(variables_lib.global_variables_initializer()) @@ -1081,8 +1112,8 @@ class AssignFromCheckpointFnTest(test.TestCase): return saver.save(sess, checkpoint_dir, global_step=global_step) def testLoadExistingVariables(self): - model_dir = tempfile.mkdtemp(prefix=os.path.join(self.get_temp_dir(), - 'load_existing_variables')) + model_dir = tempfile.mkdtemp( + prefix=os.path.join(self.get_temp_dir(), 'load_existing_variables')) if gfile.Exists(model_dir): gfile.DeleteRecursively(model_dir) @@ -1097,8 +1128,8 @@ class AssignFromCheckpointFnTest(test.TestCase): var1 = variables_lib2.variable('my_var1', shape=[]) vars_to_restore = {'v0': var0, 'v1': var1} - init_fn = variables_lib2.assign_from_checkpoint_fn(model_path, - vars_to_restore) + init_fn = variables_lib2.assign_from_checkpoint_fn( + model_path, vars_to_restore) # Initialize the variables. sess.run(variables_lib.global_variables_initializer()) @@ -1111,8 +1142,9 @@ class AssignFromCheckpointFnTest(test.TestCase): self.assertEqual(init_value1, var1.eval()) def testLoadExistingVariablesDifferentShapeDefaultDoesNotAllowReshape(self): - model_dir = tempfile.mkdtemp(prefix=os.path.join( - self.get_temp_dir(), 'load_existing_vars_no_reshape')) + model_dir = tempfile.mkdtemp( + prefix=os.path.join(self.get_temp_dir(), + 'load_existing_vars_no_reshape')) if gfile.Exists(model_dir): gfile.DeleteRecursively(model_dir) @@ -1127,8 +1159,8 @@ class AssignFromCheckpointFnTest(test.TestCase): var1 = variables_lib2.variable('my_var1', shape=[]) vars_to_restore = {'v0': var0, 'v1': var1} - init_fn = variables_lib2.assign_from_checkpoint_fn(model_path, - vars_to_restore) + init_fn = variables_lib2.assign_from_checkpoint_fn( + model_path, vars_to_restore) # Initialize the variables. sess.run(variables_lib.global_variables_initializer()) @@ -1138,9 +1170,10 @@ class AssignFromCheckpointFnTest(test.TestCase): init_fn(sess) def testLoadExistingVariablesDifferentShapeAllowReshape(self): - model_dir = tempfile.mkdtemp(prefix=os.path.join( - self.get_temp_dir(), - 'load_existing_variables_different_shape_allow_reshape')) + model_dir = tempfile.mkdtemp( + prefix=os.path.join( + self.get_temp_dir(), + 'load_existing_variables_different_shape_allow_reshape')) if gfile.Exists(model_dir): gfile.DeleteRecursively(model_dir) @@ -1169,8 +1202,8 @@ class AssignFromCheckpointFnTest(test.TestCase): self.assertEqual(init_value1, var1.eval()) def testNotFoundError(self): - model_dir = tempfile.mkdtemp(prefix=os.path.join(self.get_temp_dir(), - 'not_found_error')) + model_dir = tempfile.mkdtemp( + prefix=os.path.join(self.get_temp_dir(), 'not_found_error')) if gfile.Exists(model_dir): gfile.DeleteRecursively(model_dir) @@ -1186,8 +1219,8 @@ class AssignFromCheckpointFnTest(test.TestCase): var2 = variables_lib2.variable('my_var2', shape=[]) vars_to_restore = {'v0': var0, 'v1': var1, 'v2': var2} - init_fn = variables_lib2.assign_from_checkpoint_fn(model_path, - vars_to_restore) + init_fn = variables_lib2.assign_from_checkpoint_fn( + model_path, vars_to_restore) # Initialize the variables. sess.run(variables_lib.global_variables_initializer()) @@ -1197,8 +1230,8 @@ class AssignFromCheckpointFnTest(test.TestCase): init_fn(sess) def testMissingVariablesList(self): - model_dir = tempfile.mkdtemp(prefix=os.path.join(self.get_temp_dir(), - 'missing_variables_list')) + model_dir = tempfile.mkdtemp( + prefix=os.path.join(self.get_temp_dir(), 'missing_variables_list')) if gfile.Exists(model_dir): gfile.DeleteRecursively(model_dir) @@ -1228,8 +1261,8 @@ class AssignFromCheckpointFnTest(test.TestCase): self.assertEqual(init_value1, var1.eval()) def testMissingVariablesDict(self): - model_dir = tempfile.mkdtemp(prefix=os.path.join(self.get_temp_dir(), - 'missing_variables_dict')) + model_dir = tempfile.mkdtemp( + prefix=os.path.join(self.get_temp_dir(), 'missing_variables_dict')) if gfile.Exists(model_dir): gfile.DeleteRecursively(model_dir) @@ -1279,9 +1312,8 @@ class ZeroInitializerOpTest(test.TestCase): def testZeroInitializer(self): for dtype in (dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64): for use_init in (False, True): - self._testZeroInitializer( - [10, 20], array_ops.ones( - [10, 20], dtype=dtype), use_init) + self._testZeroInitializer([10, 20], array_ops.ones( + [10, 20], dtype=dtype), use_init) class ZeroVarInitializerOpTest(test.TestCase): ",0,train 5de9d3c392d5531eb3bbcefd007fcc25db7448cd,tensorflow/tensorflow,"Added the ability to wait for queues to start running before returning from QueueRunner::Start(). This provides a reliable way to check the value of the status_ variable. Change: 137769682",queue_runner.cc,"@@ -48,6 +48,7 @@ Status QueueRunner::Init(const QueueRunnerDef& queue_runner_def) { thread_pool_.reset(new thread::ThreadPool( Env::Default(), SanitizeThreadSuffix(queue_name_), runs_)); should_stop_ = false; + return Status::OK(); } @@ -57,11 +58,29 @@ QueueRunner::~QueueRunner() { Join(); } -Status QueueRunner::Start(Session* sess) { +Status QueueRunner::Start(Session* sess) { return Start(sess, 0); } + +Status QueueRunner::Start(Session* sess, int wait_for) { + counter_.reset(new BlockingCounter(runs_)); for (const string& enqueue_op : enqueue_op_names_) { thread_pool_->Schedule( std::bind(&QueueRunner::Run, this, sess, enqueue_op)); } + // Wait for up to 'wait_for' milliseconds. + if (wait_for > 0) { + if (!counter_->WaitFor(std::chrono::milliseconds(wait_for))) { + return Status(error::DEADLINE_EXCEEDED, + ""Queues not fed before the timeout""); + } + // Check the status of the queue runner as well as the result of the enqueue + // operations. + mutex_lock l(mu_); + if (!enqueue_status_.ok()) { + return enqueue_status_; + } else { + return status_; + } + } return Status::OK(); } @@ -76,13 +95,23 @@ Status QueueRunner::Stop(Session* sess) { Status QueueRunner::Join() { thread_pool_.reset(); + mutex_lock l(mu_); return status_; } void QueueRunner::Run(Session* sess, const string& enqueue_op) { bool decremented = false; + bool first_iteration = true; while (!should_stop_.load()) { auto status = sess->Run({}, {}, {enqueue_op}, nullptr); + if (first_iteration) { + if (!status.ok()) { + mutex_lock l(mu_); + enqueue_status_ = status; + } + counter_->DecrementCount(); + first_iteration = false; + } if (status.ok()) { continue; } else if (queue_closed_exception_types_.count( @@ -114,6 +143,7 @@ void QueueRunner::Run(Session* sess, const string& enqueue_op) { // subsequent queues. Stop(sess); } + first_iteration = false; } if (!decremented) { ",0,train 5de9d3c392d5531eb3bbcefd007fcc25db7448cd,tensorflow/tensorflow,"Added the ability to wait for queues to start running before returning from QueueRunner::Start(). This provides a reliable way to check the value of the status_ variable. Change: 137769682",queue_runner.h,"@@ -21,6 +21,7 @@ limitations under the License. #include #include +#include ""tensorflow/core/lib/core/blocking_counter.h"" #include ""tensorflow/core/lib/core/error_codes.pb.h"" #include ""tensorflow/core/lib/core/status.h"" #include ""tensorflow/core/lib/core/threadpool.h"" @@ -46,6 +47,10 @@ class QueueRunner { // Starts the queue runner with the given session. Status Start(Session* sess); + // Starts the queue runner with the given session, and wait for up to the + // specified time (in milliseconds) for the queues to start to fill up. + Status Start(Session* sess, int wait_for); + // Requests to stop and runs the cancel op. Status Stop(Session* sess); @@ -78,7 +83,9 @@ class QueueRunner { mutex mu_; // TODO(yuefengz): implement c++ coordinator. int runs_ = 0; - Status status_; + Status status_ GUARDED_BY(mu_); + Status enqueue_status_ GUARDED_BY(mu_); + std::unique_ptr counter_; }; } // namespace tensorflow ",0,train 5de9d3c392d5531eb3bbcefd007fcc25db7448cd,tensorflow/tensorflow,"Added the ability to wait for queues to start running before returning from QueueRunner::Start(). This provides a reliable way to check the value of the status_ variable. Change: 137769682",queue_runner_test.cc,"@@ -317,5 +317,22 @@ TEST(QueueRunnerTest, EmptyEnqueueOps) { Code::INVALID_ARGUMENT); } +TEST(QueueRunnerTest, StartTimeout) { + GraphDef graph_def = BuildDoubleQueueGraph(); + SessionOptions options; + std::unique_ptr session(NewSession(options)); + TF_CHECK_OK(session->Create(graph_def)); + + QueueRunnerDef queue_runner_def = BuildQueueRunnerDef( + kQueueName1, {kEnqueueOp1}, kCloseOp1, kCancelOp1, {}); + + std::unique_ptr qr; + TF_EXPECT_OK(QueueRunner::New(queue_runner_def, &qr)); + // This will timeout since queue0 is not fed and queue1 is fetching data from + // queue0. + EXPECT_EQ(qr->Start(session.get(), 1).code(), Code::DEADLINE_EXCEEDED); + session->Close(); +} + } // namespace } // namespace tensorflow ",0,train 5de9d3c392d5531eb3bbcefd007fcc25db7448cd,tensorflow/tensorflow,"Added the ability to wait for queues to start running before returning from QueueRunner::Start(). This provides a reliable way to check the value of the status_ variable. Change: 137769682",blocking_counter.h,"@@ -31,7 +31,7 @@ class BlockingCounter { DCHECK_EQ((initial_count << 1) >> 1, initial_count); } - ~BlockingCounter() { DCHECK_EQ(state_ >> 1, 0); } + ~BlockingCounter() {} inline void DecrementCount() { unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2; @@ -53,6 +53,20 @@ class BlockingCounter { cond_var_.wait(l); } } + // Wait for the specified time, return false iff the count has not dropped to + // zero before the timeout expired. + inline bool WaitFor(std::chrono::milliseconds ms) { + unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel); + if ((v >> 1) == 0) return true; + mutex_lock l(mu_); + while (!notified_) { + const std::cv_status status = cond_var_.wait_for(l, ms); + if (status == std::cv_status::timeout) { + return false; + } + } + return true; + } private: mutex mu_; ",0,train 9cd3b856a732c62e803ad60d2464e5043a9be7c1,tensorflow/tensorflow,Just exporting linalg.normalize,nn_impl.py,"@@ -436,7 +436,7 @@ def swish(features): return features * math_ops.sigmoid(features) -@tf_export(""math.normalize"", ""linalg.normalize"", ""nn.normalize"") +@tf_export(""linalg.normalize"") def normalize(tensor, ord='euclidean', axis=None, ",0,test a68bf328ff6d4261203f2aa723d77174a771a0ec,tensorflow/tensorflow,minor spelling tweaks for eager execution docs (#16355),evaluator.py,"@@ -178,7 +178,7 @@ class Evaluator(object): call_op: An op that updates evaluation state on a mini-batch of examples. Must generate an tf.errors.OutOfRangeError when done. results_op: A dictionary of tensors that compute the final evaluation - results from the evaulation state. + results from the evaluation state. sess: The Session to run the evaluation in. Defaults to the default Session. ",0,test a68bf328ff6d4261203f2aa723d77174a771a0ec,tensorflow/tensorflow,minor spelling tweaks for eager execution docs (#16355),resnet50.py,"@@ -97,7 +97,7 @@ class _ConvBlock(tfe.Network): Args: kernel_size: the kernel size of middle conv layer at main path - filters: list of integers, the filterss of 3 conv layer at main path + filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names data_format: data_format for the input ('channels_first' or ",0,test a68bf328ff6d4261203f2aa723d77174a771a0ec,tensorflow/tensorflow,minor spelling tweaks for eager execution docs (#16355),rnn_ptb.py,"@@ -88,7 +88,7 @@ class Embedding(tf.layers.Layer): class PTBModel(tfe.Network): - """"""LSTM for word language modelling. + """"""LSTM for word language modeling. Model described in: (Zaremba, et. al.) Recurrent Neural Network Regularization @@ -340,7 +340,7 @@ if __name__ == ""__main__"": parser.add_argument( ""--logdir"", type=str, default="""", help=""Directory for checkpoint."") parser.add_argument( - ""--epoch"", type=int, default=20, help=""Number of epoches."") + ""--epoch"", type=int, default=20, help=""Number of epochs."") parser.add_argument(""--batch-size"", type=int, default=20, help=""Batch size."") parser.add_argument( ""--seq-len"", type=int, default=35, help=""Sequence length."") ",0,test a68bf328ff6d4261203f2aa723d77174a771a0ec,tensorflow/tensorflow,minor spelling tweaks for eager execution docs (#16355),data.py,"@@ -51,11 +51,11 @@ def get_non_parenthesis_words(items): """"""Get the non-parenthesis items from a SNLI parsed sentence. Args: - items: Data items from a parsed SNLI setence, with parentheses. E.g., + items: Data items from a parsed SNLI sentence, with parentheses. E.g., [""("", ""Man"", ""("", ""("", ""("", ""("", ""("", ""wearing"", ""pass"", "")"", ... Returns: - A list of non-parenthis word items, all converted to lower case. E.g., + A list of non-parentheses word items, all converted to lower case. E.g., [""man"", ""wearing"", ""pass"", ... """""" return [x.lower() for x in items if x not in PARENTHESES and x] @@ -201,7 +201,7 @@ def load_word_vectors(data_root, vocab): def calculate_bins(length2count, min_bin_size): - """"""Cacluate bin boundaries given a histogram of lengths and mininum bin size. + """"""Calculate bin boundaries given a histogram of lengths and minimum bin size. Args: length2count: A `dict` mapping length to sentence count. @@ -335,9 +335,9 @@ class SnliData(object): # The sorting above and the batching here makes sure that sentences of # similar max lengths are batched together, minimizing the inefficiency # due to uneven max lengths. The sentences are batched differently in - # each call to get_generator() due to the shuffling before sotring + # each call to get_generator() due to the shuffling before sorting # above. The pad_and_reverse_word_ids() and pad_transitions() functions - # take care of any remaning unevenness of the max sentence lengths. + # take care of any remaining unevenness of the max sentence lengths. end = min(begin + batch_size, len(labels)) # Transpose, because the SPINN model requires time-major, instead of # batch-major. ",0,test a68bf328ff6d4261203f2aa723d77174a771a0ec,tensorflow/tensorflow,minor spelling tweaks for eager execution docs (#16355),network_test.py,"@@ -688,7 +688,7 @@ class NetworkTest(test.TestCase): net2(one) # Layer names typically are globally unique rather than being unique within # the scope of their first use. However, within a Network they must be named - # locally so that previous Layer consutrciton does not interfere with + # locally so that previous Layer construction does not interfere with # variable naming (e.g. add a Layer construction before the Network, # suddenly your previously saved checkpoint is incompatible). self.assertEqual(""dense"", net1.l1.name) ",0,test a68bf328ff6d4261203f2aa723d77174a771a0ec,tensorflow/tensorflow,minor spelling tweaks for eager execution docs (#16355),saver.py,"@@ -82,7 +82,7 @@ def restore_variables_on_create(save_path, map_func=None): map_func_wrapper = lambda self, x: x else: if not callable(map_func): - raise ValueError(""map_func must be callaled."") + raise ValueError(""map_func must be callable."") map_func_wrapper = lambda self, x: map_func(x) ckpt_var_cache = dict() ",0,test a844366fa89373a29590f38f0a0a15e9aff1694b,tensorflow/tensorflow,"Added fix for Mali G710 matching to G71 in gpu_info. PiperOrigin-RevId: 414020574 Change-Id: Ib1ff48a2639462ccfbc27801daf3b3bf1fe2c5e9",gpu_info.cc,"@@ -105,20 +105,21 @@ AdrenoGpu GetAdrenoGpuVersion(const std::string& gpu_description) { } MaliGpu GetMaliGpuVersion(const std::string& gpu_description) { - const std::map kMapping = { + // Order must be preserved + const std::vector> kMapping = { {""t604"", MaliGpu::kT604}, {""t622"", MaliGpu::kT622}, {""t624"", MaliGpu::kT624}, {""t628"", MaliGpu::kT628}, {""t658"", MaliGpu::kT658}, {""t678"", MaliGpu::kT678}, {""t720"", MaliGpu::kT720}, {""t760"", MaliGpu::kT760}, {""t820"", MaliGpu::kT820}, {""t830"", MaliGpu::kT830}, {""t860"", MaliGpu::kT860}, {""t880"", MaliGpu::kT880}, - {""g31"", MaliGpu::kG31}, {""g51"", MaliGpu::kG51}, - {""g71"", MaliGpu::kG71}, {""g52"", MaliGpu::kG52}, + {""g310"", MaliGpu::kG310}, {""g31"", MaliGpu::kG31}, + {""g510"", MaliGpu::kG510}, {""g51"", MaliGpu::kG51}, + {""g52"", MaliGpu::kG52}, {""g57"", MaliGpu::kG57}, + {""g610"", MaliGpu::kG610}, {""g68"", MaliGpu::kG68}, + {""g710"", MaliGpu::kG710}, {""g71"", MaliGpu::kG71}, {""g72"", MaliGpu::kG72}, {""g76"", MaliGpu::kG76}, - {""g57"", MaliGpu::kG57}, {""g77"", MaliGpu::kG77}, - {""g68"", MaliGpu::kG68}, {""g78"", MaliGpu::kG78}, - {""g310"", MaliGpu::kG310}, {""g510"", MaliGpu::kG510}, - {""g610"", MaliGpu::kG610}, {""g710"", MaliGpu::kG710}, + {""g77"", MaliGpu::kG77}, {""g78"", MaliGpu::kG78}, }; for (const auto& v : kMapping) { if (gpu_description.find(v.first) != std::string::npos) { ",0,test 419ebe51e023e871590b19eb4df1c1fdbe9da51e,tensorflow/tensorflow,"[XLA:Python] Allow multiple partitions to correspond to a single executable. XLA SPMD partitioning only produces a single executable, rather than one per partition. PiperOrigin-RevId: 292646667 Change-Id: I810c80578bbd9fe7e785aa4798a849ecaba8db30",local_client.cc,"@@ -693,9 +693,12 @@ PyLocalExecutable::PyLocalExecutable( const int num_replicas = device_assignment_->replica_count(); const int num_partitions = device_assignment_->computation_count(); - CHECK_EQ(num_partitions, executables_.size()) - << ""Number of executables "" << executables_.size() - << "" did not match number of partitions "" << num_partitions; + // SPMD sharding produces a single executable for multiple partitions. + if (executables_.size() > 1) { + CHECK_EQ(num_partitions, executables_.size()) + << ""Number of executables "" << executables_.size() + << "" did not match number of partitions "" << num_partitions; + } for (int replica = 0; replica < num_replicas; ++replica) { for (int partition = 0; partition < num_partitions; ++partition) { @@ -789,8 +792,11 @@ StatusOr> PyLocalExecutable::ExecuteHelper( auto compute_reservation = std::make_shared( device_state->compute_semaphore().ScopedAcquire(1)); + // SPMD sharding produces a single executable for multiple partitions. + int executable_idx = executables_.size() > 1 ? partition : 0; + StatusOr result_buffer_or_status = - executables_[partition]->RunAsync(argument_buffer_ptrs, options); + executables_[executable_idx]->RunAsync(argument_buffer_ptrs, options); VLOG(1) << ""Replica "" << replica << "" partition "" << partition << "" completed; ok="" << result_buffer_or_status.ok(); @@ -820,7 +826,7 @@ StatusOr> PyLocalExecutable::ExecuteHelper( device_state->ThenRelease( device_state->compute_stream(), - std::make_tuple(executables_[partition], compute_reservation, + std::make_tuple(executables_[executable_idx], compute_reservation, device_assignment_)); return absl::make_unique( result_buffer.on_host_shape(), result_buffer.on_device_shape(), ",0,train 775ee0b3f7e96a295920b8723cfb42d7e9d8cacb,tensorflow/tensorflow,"Extend `lmhlo.fusion` op rewrite pattern to `lmhlo.scatter` and `lmhlo.sort`. PiperOrigin-RevId: 404871130 Change-Id: I024d97c22643d9c313b015d0e1cd7ba6d1e778c3",kernel_ops_pattern.cc,"@@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Pattern to lower lmhlo.fusion ops to gpu dialect. +// Pattern to lower lmhlo ops with help of the ir emitter to gpu device code +// and gpu dialect ops (gpu.launch_func and gpu.memcpy). #include #include @@ -54,6 +55,7 @@ namespace tensorflow { using mlir::ArrayRef; +using mlir::Operation; using mlir::SmallVector; using mlir::Value; using mlir::memref::GetGlobalOp; @@ -67,8 +69,8 @@ using ConstantInfo = xla::gpu::GpuExecutable::ConstantInfo; namespace { -// Replaces all lmhlo.fusion ops within a module with tfrt_gpu.launch ops. -struct FusionRewritePattern : mlir::OpRewritePattern { +// Replaces lmhlo ops within a module with gpu.launch_func and gpu.memcpy ops. +struct KernelOpsPattern : mlir::OpRewritePattern { using OpRewritePattern::OpRewritePattern; mlir::LogicalResult matchAndRewrite( @@ -76,8 +78,8 @@ struct FusionRewritePattern : mlir::OpRewritePattern { }; struct RewriteData { - mlir::lmhlo::FusionOp fusion_op; - mlir::SetVector captures; + Operation* op; + mlir::SmallVector arguments; std::vector allocations; std::unique_ptr thunks; std::vector constants; @@ -93,13 +95,13 @@ static llvm::Error MakeError(xla::Status status) { return MakeError(status.error_message()); } -// Clones `fusion_op` into a function within a module with `captures` arguments. -// The `get_global_ops` are the def ops of `captures`, or null otherwise. +// Clones `op` into a function within a module with `arguments`. +// The `get_global_ops` are the def ops of `arguments`, or null otherwise. static std::tuple CloneToModule( - mlir::lmhlo::FusionOp fusion_op, mlir::ValueRange captures, + Operation* op, mlir::ValueRange arguments, mlir::MutableArrayRef get_global_ops) { - auto loc = fusion_op->getLoc(); - auto* context = fusion_op->getContext(); + auto loc = op->getLoc(); + auto* context = op->getContext(); mlir::OpBuilder builder(context); mlir::OwningModuleRef module_op = builder.create(loc); @@ -115,8 +117,8 @@ static std::tuple CloneToModule( } auto func_type = builder.getType( - mlir::TypeRange(captures), mlir::TypeRange()); - auto func_name = fusion_op->getParentOfType().getName(); + mlir::TypeRange(arguments), mlir::TypeRange()); + auto func_name = op->getParentOfType().getName(); auto func_op = builder.create(loc, func_name, func_type); // Annotate the function arguments if they refer to a memref.global op. for (auto pair : llvm::enumerate(get_global_ops)) { @@ -128,14 +130,14 @@ static std::tuple CloneToModule( builder.setInsertionPointToEnd(func_op.addEntryBlock()); mlir::BlockAndValueMapping mapping; - for (const auto& pair : llvm::zip_first(captures, func_op.getArguments())) + for (const auto& pair : llvm::zip_first(arguments, func_op.getArguments())) mapping.map(std::get<0>(pair), std::get<1>(pair)); // Clone the memref.get_global ops. for (auto get_global_op : get_global_ops) { if (!get_global_op) continue; mapping.map(get_global_op, builder.clone(*get_global_op)->getResult(0)); } - auto* clone = builder.clone(*fusion_op, mapping); + auto* clone = builder.clone(*op, mapping); auto name_loc = mlir::NameLoc::get(builder.getIdentifier(func_name)); clone->setLoc(mlir::FusedLoc::get(context, {loc, name_loc})); builder.create(loc); @@ -145,11 +147,10 @@ static std::tuple CloneToModule( // Converts the argument's shaped types into buffer allocations. static llvm::Expected> GetAllocations( - const mlir::SetVector& captures, - ArrayRef get_global_ops) { + ArrayRef arguments, ArrayRef get_global_ops) { std::vector allocations; - allocations.reserve(captures.size()); - for (Value argument : captures) { + allocations.reserve(arguments.size()); + for (Value argument : arguments) { mlir::ShapedType type = argument.getType().dyn_cast(); if (!type || !type.hasStaticShape()) return MakeError(""Expected static shapes""); @@ -208,23 +209,25 @@ Emit(mlir::FuncOp func_op, absl::Span allocations, std::move(ir_emitter_context.constants())); } -// Returns the data to rewrite fusion_op without changing the IR. -static llvm::Expected Match(mlir::lmhlo::FusionOp fusion_op) { +// Returns the data to rewrite op without changing the IR. +static llvm::Expected Match(Operation* op) { + mlir::SmallVector arguments = op->getOperands(); mlir::SetVector captures; - getUsedValuesDefinedAbove(fusion_op->getRegions(), captures); + getUsedValuesDefinedAbove(op->getRegions(), captures); + llvm::copy(captures, std::back_inserter(arguments)); - // Collect captures that are defined by a memref.get_global op. The created - // module's annotations make the ir emitter recognize them as constants. + // Collect arguments that are defined by a memref.get_global op. The + // created module's annotations make the ir emitter recognize them as + // constants. SmallVector get_global_ops; - get_global_ops.reserve(captures.size()); + get_global_ops.reserve(arguments.size()); llvm::transform( - captures, std::back_inserter(get_global_ops), + arguments, std::back_inserter(get_global_ops), [](Value argument) { return argument.getDefiningOp(); }); - auto allocations = GetAllocations(captures, get_global_ops); + auto allocations = GetAllocations(arguments, get_global_ops); if (!allocations) return allocations.takeError(); - auto module_op = - CloneToModule(fusion_op, captures.getArrayRef(), get_global_ops); + auto module_op = CloneToModule(op, arguments, get_global_ops); xla::HloModuleConfig hlo_module_config; xla::DebugOptions options = xla::GetDebugOptionsFromFlags(); @@ -265,21 +268,23 @@ static llvm::Expected Match(mlir::lmhlo::FusionOp fusion_op) { hlo_module_config, libdevice_dir); if (!ptx.ok()) return MakeError(ptx.status()); - return RewriteData{ - fusion_op, std::move(captures), std::move(*allocations), - std::move(thunks), std::move(constants), std::move(*ptx)}; + return RewriteData{op, + std::move(arguments), + std::move(*allocations), + std::move(thunks), + std::move(constants), + std::move(*ptx)}; } -// Replaces fusion_op with gpu.launch_func. -static void Rewrite(mlir::lmhlo::FusionOp fusion_op, - mlir::PatternRewriter& rewriter, - mlir::SymbolTable& symbol_table, ArrayRef captures, +// Replaces op with gpu.launch_func and gpu.memcpy ops. +static void Rewrite(Operation* op, mlir::PatternRewriter& rewriter, + mlir::SymbolTable& symbol_table, ArrayRef arguments, ThunkSequence* thunks, ArrayRef constants, mlir::StringRef gpu_module_data) { mlir::OpBuilder::InsertionGuard guard(rewriter); - auto loc = fusion_op->getLoc(); + auto loc = op->getLoc(); - rewriter.setInsertionPoint(fusion_op->getParentOfType()); + rewriter.setInsertionPoint(op->getParentOfType()); auto gpu_module = rewriter.create(loc, ""gpu_module""); symbol_table.insert(gpu_module); gpu_module->setAttr(tfrt::gpu::getGpuBinaryAttrName(), @@ -302,7 +307,7 @@ static void Rewrite(mlir::lmhlo::FusionOp fusion_op, static_cast(thunk.get()); auto get_argument = [&](const xla::BufferAllocation::Slice& slice) { assert(slice.offset() == 0 && slice.size() == copy_thunk->size_bytes()); - Value result = captures[slice.index()]; + Value result = arguments[slice.index()]; // Annotate defining memref.get_global with the gpu_module symbol. // Unlike kernel thunks below, which use the global in the kernel only. if (auto op = result.getDefiningOp()) { @@ -311,7 +316,7 @@ static void Rewrite(mlir::lmhlo::FusionOp fusion_op, } return result; }; - rewriter.setInsertionPoint(fusion_op); + rewriter.setInsertionPoint(op); rewriter.create( loc, mlir::TypeRange(), mlir::ValueRange(), get_argument(copy_thunk->destination()), @@ -321,11 +326,11 @@ static void Rewrite(mlir::lmhlo::FusionOp fusion_op, const auto* kernel_thunk = static_cast(thunk.get()); rewriter.setInsertionPointToStart(gpu_module.getBody()); - SmallVector arguments; - for (auto argument : kernel_thunk->arguments()) - arguments.push_back(captures[argument->index()]); + SmallVector kernel_args; + for (auto kernel_arg : kernel_thunk->arguments()) + kernel_args.push_back(arguments[kernel_arg->index()]); auto func_type = rewriter.getType( - mlir::TypeRange(mlir::ValueRange(arguments)), mlir::TypeRange()); + mlir::TypeRange(mlir::ValueRange(kernel_args)), mlir::TypeRange()); mlir::gpu::GPUFuncOp kernel_func = rewriter.create( loc, kernel_thunk->kernel_name(), func_type); kernel_func->setAttr(mlir::gpu::GPUDialect::getKernelFuncAttrName(), @@ -333,7 +338,7 @@ static void Rewrite(mlir::lmhlo::FusionOp fusion_op, rewriter.setInsertionPointToEnd(&kernel_func.getBody().back()); rewriter.create(loc); - rewriter.setInsertionPoint(fusion_op); + rewriter.setInsertionPoint(op); auto make_const_idx = [&](int64_t value) { auto attr = rewriter.getIndexAttr(value); return rewriter.create(loc, attr).getResult(); @@ -349,28 +354,34 @@ static void Rewrite(mlir::lmhlo::FusionOp fusion_op, rewriter.create( loc, kernel_func, grid_size, block_size, - /*shared_memory_size_bytes=*/nullptr, arguments); + /*shared_memory_size_bytes=*/nullptr, kernel_args); } - rewriter.eraseOp(fusion_op); + rewriter.eraseOp(op); } -mlir::LogicalResult FusionRewritePattern::matchAndRewrite( +mlir::LogicalResult KernelOpsPattern::matchAndRewrite( mlir::ModuleOp module_op, mlir::PatternRewriter& rewriter) const { SmallVector rewrites; - // Gather data to rewrite each lmhlo.fusion op without changing the IR. - auto callback = [&](mlir::lmhlo::FusionOp fusion_op) -> mlir::WalkResult { - auto data = Match(fusion_op); - if (!data) - return rewriter.notifyMatchFailure(fusion_op, toString(data.takeError())); - rewrites.emplace_back(std::move(*data)); - return mlir::success(); + // Get data to rewrite kernel ops without changing the IR. + auto walk = [&](auto concrete_op) { + return module_op.walk([&](decltype(concrete_op) op) -> mlir::WalkResult { + auto data = Match(op); + if (!data) + return rewriter.notifyMatchFailure(op, toString(data.takeError())); + rewrites.emplace_back(std::move(*data)); + return mlir::success(); + }); }; - if (module_op.walk(callback).wasInterrupted()) return mlir::failure(); + if (walk(mlir::lmhlo::FusionOp()).wasInterrupted() || + walk(mlir::lmhlo::ScatterOp()).wasInterrupted() || + walk(mlir::lmhlo::SortOp()).wasInterrupted()) + return mlir::failure(); - if (rewrites.empty()) - return rewriter.notifyMatchFailure(module_op, ""No lmhlo.fusion ops""); + if (rewrites.empty()) { + return rewriter.notifyMatchFailure(module_op, ""No kernel ops""); + } // Mark module as gpu.container_module. rewriter.updateRootInPlace(module_op, [&] { @@ -378,18 +389,18 @@ mlir::LogicalResult FusionRewritePattern::matchAndRewrite( rewriter.getUnitAttr()); }); - // Replace the lmhlo.fusion ops with gpu.launch_func. + // Replace the kernel ops with gpu.launch_func. mlir::SymbolTable symbol_table(module_op); for (const auto& data : rewrites) { - Rewrite(data.fusion_op, rewriter, symbol_table, data.captures.getArrayRef(), - data.thunks.get(), data.constants, data.gpu_module_data); + Rewrite(data.op, rewriter, symbol_table, data.arguments, data.thunks.get(), + data.constants, data.gpu_module_data); } return mlir::success(); } -void populateFusionConversionPattern(mlir::RewritePatternSet& patterns) { - patterns.add(patterns.getContext()); +void populateKernelOpsPattern(mlir::RewritePatternSet& patterns) { + patterns.add(patterns.getContext()); } } // namespace tensorflow ",0,test 775ee0b3f7e96a295920b8723cfb42d7e9d8cacb,tensorflow/tensorflow,"Extend `lmhlo.fusion` op rewrite pattern to `lmhlo.scatter` and `lmhlo.sort`. PiperOrigin-RevId: 404871130 Change-Id: I024d97c22643d9c313b015d0e1cd7ba6d1e778c3",lmhlo_to_gpu_binary.cc,"@@ -30,7 +30,7 @@ limitations under the License. namespace tensorflow { -void populateFusionConversionPattern(mlir::RewritePatternSet&); +void populateKernelOpsPattern(mlir::RewritePatternSet&); namespace { @@ -42,7 +42,7 @@ struct ConvertLmhloToGpuBinaryPass private: void runOnOperation() override { mlir::RewritePatternSet patterns(&getContext()); - populateFusionConversionPattern(patterns); + populateKernelOpsPattern(patterns); if (failed(applyOpPatternsAndFold(getOperation(), std::move(patterns)))) return signalPassFailure(); } ",0,test 706a5baa6e633ffbbcdf49f69e3ef88421001a76,tensorflow/tensorflow,"Add partial shape inference for values that are used as shapes. With this change, the shape inference for `tf.reshape()` will correctly observe that, for example: ```python x = tf.placeholder(tf.float32) y = tf.placeholder(tf.int32) z = tf.reshape(x, [y, 37]) print(z.get_shape()) # ==> (?, 37) ``` Partially addresses #2938. Change: 125875146",tensor_util.py,"@@ -628,3 +628,55 @@ def constant_value(tensor): # conservatively prevent it from being fed. tensor.graph.prevent_feeding(tensor) return ret + + +def constant_value_as_shape(tensor): # pylint: disable=invalid-name + """"""A version of `constant_value()` that returns a `TensorShape`. + + This version should be used when a constant tensor value is + interpreted as a (possibly partial) shape, e.g. in the shape + function for `tf.reshape()`. By explicitly requesting a + `TensorShape` as the return value, it is possible to represent + unknown dimensions; by contrast, `constant_value()` is + all-or-nothing. + + Args: + tensor: The rank-1 Tensor to be evaluated. + + Returns: + A `TensorShape` based on the constant value of the given `tensor`. + """""" + shape = tensor.get_shape().with_rank(1) + if tensor.get_shape() == [0]: + return tensor_shape.scalar() + elif tensor.op.type == ""Shape"": + return tensor.op.inputs[0].get_shape() + elif tensor.op.type == ""Pack"": + ret = tensor_shape.scalar() # Empty list. + for pack_input in tensor.op.inputs: + # `pack_input` must be a scalar. Attempt to evaluate it, and append it + # to `ret`. + pack_input_val = constant_value(pack_input) + if pack_input_val is None or pack_input_val < 0: + new_dim = tensor_shape.Dimension(None) + else: + new_dim = tensor_shape.Dimension(pack_input_val) + ret = ret.concatenate([new_dim]) + return ret + elif tensor.op.type == ""Concat"": + # We assume that `tensor.op.inputs[0]` evaluates to 0, as this is + # the only legal value when concatenating vectors, and it will + # have been checked by a previous shape function. + ret = tensor_shape.scalar() # Empty list. + for concat_input in tensor.op.inputs[1:]: + # `concat_input` must be a vector. Attempt to evaluate it as a shape, + # and concatenate it with `ret`. + ret = ret.concatenate(constant_value_as_shape(concat_input)) + return ret + else: + ret = tensor_shape.unknown_shape(shape[0].value) + value = constant_value(tensor) + if value is not None: + ret = ret.merge_with(tensor_shape.TensorShape( + [d if d != -1 else None for d in value])) + return ret ",0,train 706a5baa6e633ffbbcdf49f69e3ef88421001a76,tensorflow/tensorflow,"Add partial shape inference for values that are used as shapes. With this change, the shape inference for `tf.reshape()` will correctly observe that, for example: ```python x = tf.placeholder(tf.float32) y = tf.placeholder(tf.int32) z = tf.reshape(x, [y, 37]) print(z.get_shape()) # ==> (?, 37) ``` Partially addresses #2938. Change: 125875146",tensor_util_test.py,"@@ -565,5 +565,38 @@ class ConstantValueTest(tf.test.TestCase): self.assertIs(None, c_val) +class ConstantValueAsShapeTest(tf.test.TestCase): + + def testConstant(self): + np_val = np.random.rand(3).astype(np.int32) + tf_val = tf.constant(np_val) + self.assertEqual(tf.TensorShape(np_val), + tensor_util.constant_value_as_shape(tf_val)) + + tf_val = tf.constant([], dtype=tf.int32) + self.assertEqual(tf.TensorShape([]), + tensor_util.constant_value_as_shape(tf_val)) + + def testShape(self): + tf_val = tf.shape(tf.constant(0.0, shape=[1, 2, 3])) + c_val = tensor_util.constant_value_as_shape(tf_val) + self.assertEqual(tf.TensorShape([1, 2, 3]), c_val) + + def testPack(self): + tf_val = tf.pack([tf.constant(16), 37, tf.placeholder(tf.int32)]) + c_val = tensor_util.constant_value_as_shape(tf_val) + self.assertEqual([16, 37, None], c_val.as_list()) + + def testConcat(self): + tf_val = tf.concat(0, [[16, 37], tf.placeholder(tf.int32, shape=(2,))]) + c_val = tensor_util.constant_value_as_shape(tf_val) + self.assertEqual([16, 37, None, None], c_val.as_list()) + + tf_val = tf.concat(0, + [[16, 37], tf.placeholder(tf.int32, shape=(1,)), [48]]) + c_val = tensor_util.constant_value_as_shape(tf_val) + self.assertEqual([16, 37, None, 48], c_val.as_list()) + + if __name__ == ""__main__"": tf.test.main() ",0,train 706a5baa6e633ffbbcdf49f69e3ef88421001a76,tensorflow/tensorflow,"Add partial shape inference for values that are used as shapes. With this change, the shape inference for `tf.reshape()` will correctly observe that, for example: ```python x = tf.placeholder(tf.float32) y = tf.placeholder(tf.int32) z = tf.reshape(x, [y, 37]) print(z.get_shape()) # ==> (?, 37) ``` Partially addresses #2938. Change: 125875146",reshape_op_test.py,"@@ -99,11 +99,6 @@ class ReshapeTest(tf.test.TestCase): self._testBothReshape(x, [1, -1, 5]) def testErrors(self): - x = tf.constant(0.0, shape=[1, 0, 3]) - with self.assertRaisesRegexp( - ValueError, ""cannot infer the missing input size""): - tf.reshape(x, [0, -1, 5]) - y = tf.constant(0.0, shape=[23, 29, 31]) with self.assertRaisesRegexp(ValueError, ""isn't divisible by 17""): tf.reshape(y, [17, -1]) @@ -128,6 +123,20 @@ class ReshapeTest(tf.test.TestCase): y = tf.reshape(x, tf.placeholder(tf.int32, shape=(3,))) self.assertEqual([None, None, None], y.get_shape().as_list()) + # Unknown input shape, partial new shape using `tf.pack()`. + y = tf.reshape(x, [tf.placeholder(tf.int32), 37]) + self.assertEqual([None, 37], y.get_shape().as_list()) + + # Unknown input shape, partial new shape using `tf.concat()`. + y = tf.reshape(x, tf.concat(0, [tf.placeholder(tf.int32, shape=(2,)), + [37, 42]])) + self.assertEqual([None, None, 37, 42], y.get_shape().as_list()) + + # Unknown input shape, partial new shape using `tf.shape()`. + y = tf.reshape(x, tf.shape(tf.placeholder(tf.float32, + shape=[None, 37, None]))) + self.assertEqual([None, 37, None], y.get_shape().as_list()) + if __name__ == ""__main__"": tf.test.main() ",0,train 706a5baa6e633ffbbcdf49f69e3ef88421001a76,tensorflow/tensorflow,"Add partial shape inference for values that are used as shapes. With this change, the shape inference for `tf.reshape()` will correctly observe that, for example: ```python x = tf.placeholder(tf.float32) y = tf.placeholder(tf.int32) z = tf.reshape(x, [y, 37]) print(z.get_shape()) # ==> (?, 37) ``` Partially addresses #2938. Change: 125875146",array_ops.py,"@@ -1780,45 +1780,38 @@ def _ReshapeShape(op): num_elements *= dim else: num_elements = tensor_shape.Dimension(None) - new_shape_shape = op.inputs[1].get_shape().with_rank(1) - new_shape = tensor_util.constant_value(op.inputs[1]) - if new_shape is None: - # Attempt to infer the rank of the output from the length of - # new_shape. - return [tensor_shape.unknown_shape(ndims=new_shape_shape[0].value)] - new_shape = np.reshape(new_shape, -1).tolist() - if -1 not in new_shape: + new_shape = tensor_util.constant_value_as_shape(op.inputs[1]) + if new_shape.ndims is None: + # We have no information about the shape of the output. + return [new_shape] + if None not in new_shape.as_list(): # The new shape is fully defined. if (num_elements.value is not None and num_elements.value != np.prod(new_shape)): raise ValueError( ""Cannot reshape a tensor with %d elements to shape %s (%d elements)"" % (num_elements.value, new_shape, np.prod(new_shape))) - return [tensor_shape.TensorShape(new_shape)] elif num_elements.value is not None: # We know the number of elements, so we can calculate the missing # dimension in the new_shape. known_elements = 1 - unknown_index = None + unknown_indices = [] for i, dim in enumerate(new_shape): - if dim == -1: - unknown_index = i + if dim.value is None: + unknown_indices.append(i) else: - known_elements *= dim - if known_elements == 0: - raise ValueError(""cannot infer the missing input size for "" - ""an empty tensor unless all specified "" - ""input sizes are non-zero"") - if num_elements % known_elements != 0: - raise ValueError(""input has %s elements, which isn't divisible by %d"" % - (num_elements, known_elements)) - new_shape[unknown_index] = num_elements // known_elements - return [tensor_shape.TensorShape(new_shape)] - else: - # We don't know the input shape, but we know n-1 of the dimensions - # in the new shape. - new_shape[new_shape.index(-1)] = None - return [tensor_shape.TensorShape(new_shape)] + known_elements *= dim.value + if known_elements != 0: + if num_elements % known_elements != 0: + raise ValueError(""input has %s elements, which isn't divisible by %d"" % + (num_elements, known_elements)) + if len(unknown_indices) == 1: + unknown_index = unknown_indices[0] + new_shape = new_shape.merge_with( + new_shape[:unknown_index].concatenate( + [num_elements // known_elements]).concatenate( + new_shape[unknown_index+1:])) + return [new_shape] @ops.RegisterShape(""BroadcastGradientArgs"") ",0,train 15fe88c0ed7ae2e024b345a5929e277398b66dad,tensorflow/tensorflow,"[MLIR] Move documentation closer to FunctionPasses for consistency. PiperOrigin-RevId: 273463381",hlo_legalize_to_lhlo.cc,"@@ -159,6 +159,46 @@ void populateHLOToLHLOConversionPattern(MLIRContext* context, context); } +// Lowers from HLO dialect to LHLO dialect allocating/deallocating temporary +// buffers if necessary. +// +// Example fusion with HLO ops. +// +// func @fusion(%arg0: memref<2x2xf32>, +// %arg1: memref<2x2xf32>, +// %arg2: memref<2x2xf32>, +// %arg3: memref<2x2xf32>) { +// ""xla_lhlo.fusion""() ({ +// %0 = tensor_load %arg1 : memref<2x2xf32> +// %1 = tensor_load %arg2 : memref<2x2xf32> +// %2 = ""xla_hlo.add""(%0, %1) {name = ""add""} : +// (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32> +// %3 = tensor_load %arg0 : memref<2x2xf32> +// %4 = ""xla_hlo.mul""(%2, %3) {name = ""multiply""} : +// (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32> +// tensor_store %4, %arg3 : memref<2x2xf32> +// ""xla_lhlo.terminator""() : () -> () +// }) {name = ""fusion""} : () -> () +// return +// } +// +// Transformed fusion with LHLO ops. +// func @fusion(%arg0: memref<2x2xf32>, +// %arg1: memref<2x2xf32>, +// %arg2: memref<2x2xf32>, +// %arg3: memref<2x2xf32>) { +// ""xla_lhlo.fusion""() ( { +// %0 = alloc() {temp = true} : memref<2x2xf32> +// ""xla_lhlo.add""(%arg1, %arg2, %0) : +// (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> () +// ""xla_lhlo.mul""(%0, %arg0, %arg3) : +// (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> () +// dealloc %0 : memref<2x2xf32> +// ""xla_lhlo.terminator""() : () -> () +// }) {name = ""fusion""} : () -> () +// return +// } +// } struct HloLegalizeToLhlo : public FunctionPass { void runOnFunction() override { OwningRewritePatternList patterns; ",0,train 15fe88c0ed7ae2e024b345a5929e277398b66dad,tensorflow/tensorflow,"[MLIR] Move documentation closer to FunctionPasses for consistency. PiperOrigin-RevId: 273463381",lhlo_legalize_to_linalg.cc,"@@ -126,25 +126,6 @@ Operation* GetLinalgBodyOp(Location loc, Type element_type, : nullptr; } -// Converts LHLO ops to Linalg generic. -// Sample result for xla_lhlo::AddOp. -// -// ""xla_lhlo.add""(%arg1, %arg2, %out) : -// (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> () -// -// will be converted to -// -// #map0 = (d0, d1) -> (d0, d1) -// ""linalg.generic""(%arg1, %arg2, %out) ( { -// ^bb0(%arg4: f32, %arg5: f32): -// %0 = addf %arg4, %arg5 : f32 -// ""linalg.yield""(%0) : (f32) -> () -// }) { -// indexing_maps = [#map0, #map0, #map0], -// n_loop_types = [2, 0, 0], -// n_views = [3, 1] -// } : (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> () -// } template class LhloToLinalgOpConverter : public ConversionPattern { public: @@ -229,6 +210,25 @@ void populateLHLOToLinalgConversionPattern(MLIRContext* context, LhloToLinalgOpConverter>(context); } +// Converts LHLO ops to Linalg generic. +// Sample result for xla_lhlo::AddOp. +// +// ""xla_lhlo.add""(%arg1, %arg2, %out) : +// (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> () +// +// will be converted to +// +// #map0 = (d0, d1) -> (d0, d1) +// ""linalg.generic""(%arg1, %arg2, %out) ( { +// ^bb0(%arg4: f32, %arg5: f32): +// %0 = addf %arg4, %arg5 : f32 +// ""linalg.yield""(%0) : (f32) -> () +// }) { +// indexing_maps = [#map0, #map0, #map0], +// n_loop_types = [2, 0, 0], +// n_views = [3, 1] +// } : (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> () +// } struct LhloLegalizeToLinalg : public FunctionPass { void runOnFunction() override { OwningRewritePatternList patterns; ",0,train 15fe88c0ed7ae2e024b345a5929e277398b66dad,tensorflow/tensorflow,"[MLIR] Move documentation closer to FunctionPasses for consistency. PiperOrigin-RevId: 273463381",passes.h,"@@ -45,44 +45,6 @@ std::unique_ptr> createLegalizeToStdPass(); // Lowers from HLO dialect to LHLO dialect allocating/deallocating temporary // buffers if necessary. -// -// Example fusion with HLO ops. -// -// func @fusion(%arg0: memref<2x2xf32>, -// %arg1: memref<2x2xf32>, -// %arg2: memref<2x2xf32>, -// %arg3: memref<2x2xf32>) { -// ""xla_lhlo.fusion""() ({ -// %0 = tensor_load %arg1 : memref<2x2xf32> -// %1 = tensor_load %arg2 : memref<2x2xf32> -// %2 = ""xla_hlo.add""(%0, %1) {name = ""add""} : -// (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32> -// %3 = tensor_load %arg0 : memref<2x2xf32> -// %4 = ""xla_hlo.mul""(%2, %3) {name = ""multiply""} : -// (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32> -// tensor_store %4, %arg3 : memref<2x2xf32> -// ""xla_lhlo.terminator""() : () -> () -// }) {name = ""fusion""} : () -> () -// return -// } -// -// Transformed fusion with LHLO ops. -// func @fusion(%arg0: memref<2x2xf32>, -// %arg1: memref<2x2xf32>, -// %arg2: memref<2x2xf32>, -// %arg3: memref<2x2xf32>) { -// ""xla_lhlo.fusion""() ( { -// %0 = alloc() {temp = true} : memref<2x2xf32> -// ""xla_lhlo.add""(%arg1, %arg2, %0) : -// (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> () -// ""xla_lhlo.mul""(%0, %arg0, %arg3) : -// (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> () -// dealloc %0 : memref<2x2xf32> -// ""xla_lhlo.terminator""() : () -> () -// }) {name = ""fusion""} : () -> () -// return -// } -// } std::unique_ptr> createLegalizeToLhloPass(); } // namespace xla_hlo ",0,train 666277f9a4a3a11e9350555e0974ae827a438cf9,tensorflow/tensorflow,"Support batched inputs to SparseSoftmaxCrossEntropyWithLogitsGradientFunction. This requires broadcasting the incoming grads to the softmax grad. PiperOrigin-RevId: 332372721 Change-Id: Ifa048f20d16de9997dec3d8111360d27b55ea941",gradient_checker_test.cc,"@@ -155,6 +155,12 @@ TEST_P(GradientCheckerTest, TestGradCheckMul) { } TEST_P(GradientCheckerTest, TestGradCheckSoftmax) { + bool use_function = !std::get<2>(GetParam()); + if (use_function) { + // TODO(b/168850692): Enable this. + GTEST_SKIP() << ""Can't take gradient of "" + ""SparseSoftmaxCrossEntropyWithLogits in tracing mode.""; + } std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); ",0,train 666277f9a4a3a11e9350555e0974ae827a438cf9,tensorflow/tensorflow,"Support batched inputs to SparseSoftmaxCrossEntropyWithLogitsGradientFunction. This requires broadcasting the incoming grads to the softmax grad. PiperOrigin-RevId: 332372721 Change-Id: Ifa048f20d16de9997dec3d8111360d27b55ea941",mnist_gradients_test.cc,"@@ -390,6 +390,12 @@ TEST_P(CppGradients, TestReluGrad) { } TEST_P(CppGradients, TestSoftmaxLossGrad) { + bool use_function = !std::get<2>(GetParam()); + if (use_function) { + // TODO(b/168850692): Enable this. + GTEST_SKIP() << ""Can't take gradient of "" + ""SparseSoftmaxCrossEntropyWithLogits in tracing mode.""; + } std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); @@ -458,6 +464,12 @@ TEST_P(CppGradients, TestSoftmaxLossGrad) { } TEST_P(CppGradients, TestMNISTGrad) { + bool use_function = !std::get<2>(GetParam()); + if (use_function) { + // TODO(b/168850692): Enable this. + GTEST_SKIP() << ""Can't take gradient of "" + ""SparseSoftmaxCrossEntropyWithLogits in tracing mode.""; + } std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); AbstractContextPtr ctx; @@ -605,6 +617,12 @@ TEST_P(CppGradients, TestScalarMul) { } TEST_P(CppGradients, TestMNIST_Training) { + bool use_function = !std::get<2>(GetParam()); + if (use_function) { + // TODO(b/168850692): Enable this. + GTEST_SKIP() << ""Can't take gradient of "" + ""SparseSoftmaxCrossEntropyWithLogits in tracing mode.""; + } std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); ",0,train 666277f9a4a3a11e9350555e0974ae827a438cf9,tensorflow/tensorflow,"Support batched inputs to SparseSoftmaxCrossEntropyWithLogitsGradientFunction. This requires broadcasting the incoming grads to the softmax grad. PiperOrigin-RevId: 332372721 Change-Id: Ifa048f20d16de9997dec3d8111360d27b55ea941",nn_grad.cc,"@@ -14,9 +14,15 @@ limitations under the License. ==============================================================================*/ #include ""tensorflow/c/experimental/gradients/nn_grad.h"" +#include ""absl/types/span.h"" +#include ""tensorflow/c/eager/abstract_tensor_handle.h"" +#include ""tensorflow/c/eager/immediate_execution_context.h"" +#include ""tensorflow/c/eager/immediate_execution_tensor_handle.h"" #include ""tensorflow/c/experimental/ops/array_ops.h"" #include ""tensorflow/c/experimental/ops/math_ops.h"" #include ""tensorflow/c/experimental/ops/nn_ops.h"" +#include ""tensorflow/core/lib/llvm_rtti/llvm_rtti.h"" +#include ""tensorflow/core/platform/errors.h"" using std::vector; using tensorflow::ops::Mul; @@ -54,9 +60,31 @@ class ReluGradientFunction : public GradientFunction { vector forward_outputs; }; -class SparseSoftmaxCrossEntropyLossGradientFunction : public GradientFunction { +Status BroadcastMul(AbstractContext* ctx, AbstractTensorHandle* vec, + AbstractTensorHandle* mat, + absl::Span outputs) { + if (!isa(ctx)) { + // TODO(b/168850692): Fix this. + return errors::Unimplemented( + ""BroadcastMul is not supported in tracing mode yet.""); + } + auto imm_ctx = dyn_cast(ctx); + AbstractTensorPtr minus_1(imm_ctx->CreateInt32Scalar(-1)); + ImmediateTensorHandlePtr dim(imm_ctx->CreateLocalHandle(minus_1.get())); + vector expand_dims_outputs(1); + TF_RETURN_IF_ERROR(ops::ExpandDims(ctx, {vec, dim.get()}, + absl::MakeSpan(expand_dims_outputs), + ""ExpandDims"")); + TF_RETURN_IF_ERROR( + ops::Mul(ctx, {expand_dims_outputs[0], mat}, outputs, ""Mul"")); + expand_dims_outputs[0]->Unref(); + return Status::OK(); +} + +class SparseSoftmaxCrossEntropyWithLogitsGradientFunction + : public GradientFunction { public: - explicit SparseSoftmaxCrossEntropyLossGradientFunction( + explicit SparseSoftmaxCrossEntropyWithLogitsGradientFunction( vector f_outputs) : forward_outputs(f_outputs) {} @@ -65,12 +93,10 @@ class SparseSoftmaxCrossEntropyLossGradientFunction : public GradientFunction { grad_outputs->resize(2); // Grad for Softmax Input - std::string name = ""Mul_Softmax_Grad""; vector mul_outputs(1); - TF_RETURN_IF_ERROR( - ops::Mul(ctx->ctx, {grad_inputs[0], forward_outputs[1]}, - absl::MakeSpan(mul_outputs), - name.c_str())); // upstream_grad * local softmax grad + TF_RETURN_IF_ERROR(BroadcastMul( + ctx->ctx, grad_inputs[0], forward_outputs[1], + absl::MakeSpan(mul_outputs))); // upstream_grad * local softmax grad (*grad_outputs)[0] = mul_outputs[0]; // Grad for labels is null @@ -78,7 +104,7 @@ class SparseSoftmaxCrossEntropyLossGradientFunction : public GradientFunction { return Status::OK(); } - ~SparseSoftmaxCrossEntropyLossGradientFunction() override {} + ~SparseSoftmaxCrossEntropyWithLogitsGradientFunction() override {} private: vector forward_outputs; @@ -98,7 +124,7 @@ BackwardFunction* ReluRegisterer(const ForwardOperation& op) { BackwardFunction* SparseSoftmaxCrossEntropyWithLogitsRegisterer( const ForwardOperation& op) { auto gradient_function = - new SparseSoftmaxCrossEntropyLossGradientFunction(op.outputs); + new SparseSoftmaxCrossEntropyWithLogitsGradientFunction(op.outputs); auto default_gradients = new PassThroughDefaultGradients(op); return new BackwardFunction(gradient_function, default_gradients); } ",0,train add0043e9d6233d9fabf2676e449d26ecd257ec5,tensorflow/tensorflow,"- Fix typo in evaluator PiperOrigin-RevId: 199164433",hlo_evaluator_typed_visitor.h,"@@ -1962,7 +1962,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { // TODO(b/74360564): This is implementation defined behavior, but is // currently respected by all implementations. Change this if we ever decide - // to oficially document different behavior. + // to officially document different behavior. for (int64 i = 0; i < start.size(); ++i) { start[i] = std::min( std::max(int64{0}, start[i]), ",0,test efd35d70d22d370bf4a997cbf53a8030031a48da,tensorflow/tensorflow,"[MLIR] Convert FuncOp signature with unranked types in HLO->LHLO conversion. PiperOrigin-RevId: 320146856 Change-Id: Ic534e97b2eecbd4573b91ff48ef90d38bbacd9a4",hlo_legalize_to_lhlo.cc,"@@ -391,16 +391,15 @@ struct HloLegalizeToLhlo target.addIllegalDialect(); BufferAssignmentTypeConverter converter; + auto isMemRefType = [](Type type) { return type.isa(); }; target.addDynamicallyLegalOp([&](FuncOp op) { auto inputs = op.getType().getInputs(); - return llvm::all_of(inputs, - [](Type input) { return input.isa(); }) && + return llvm::all_of(inputs, isMemRefType) && converter.isLegal(&op.getBody()); }); target.addDynamicallyLegalOp([&](mlir::ReturnOp returnOp) { return std::all_of(returnOp.operand_type_begin(), - returnOp.operand_type_end(), - [](Type type) { return type.isa(); }); + returnOp.operand_type_end(), isMemRefType); }); auto module = getOperation(); ",0,train bc5eddee3dda337c5b9287691b55d7a363b65c7b,tensorflow/tensorflow,"Add Ragged support to tf.math.sigmoid operation. PiperOrigin-RevId: 381407240 Change-Id: Ic64d0603d85da24b436ab190a9b7c0ba34c9412c",ragged_dispatch.py,"@@ -331,6 +331,7 @@ _UNARY_ELEMENTWISE_OPS = [ math_ops.rsqrt, math_ops.saturate_cast, math_ops.sign, + math_ops.sigmoid, math_ops.sin, math_ops.sinh, math_ops.sqrt, ",0,train bc5eddee3dda337c5b9287691b55d7a363b65c7b,tensorflow/tensorflow,"Add Ragged support to tf.math.sigmoid operation. PiperOrigin-RevId: 381407240 Change-Id: Ic64d0603d85da24b436ab190a9b7c0ba34c9412c",ragged_dispatch_test.py,"@@ -756,9 +756,9 @@ class RaggedDispatchTest(test_util.TensorFlowTestCase, parameterized.TestCase): 'math.reduce_any', 'math.reduce_max', 'math.reduce_mean', 'math.reduce_variance', 'math.reduce_std', 'math.reduce_min', 'math.reduce_prod', 'math.reduce_sum', 'math.rint', 'math.round', - 'math.rsqrt', 'math.sign', 'math.sin', 'math.sinh', 'math.sqrt', - 'math.square', 'math.squared_difference', 'math.subtract', 'math.tan', - 'math.truediv', 'math.unsorted_segment_max', + 'math.rsqrt', 'math.sign', 'math.sigmoid', 'math.sin', 'math.sinh', + 'math.sqrt', 'math.square', 'math.squared_difference', 'math.subtract', + 'math.tan', 'math.truediv', 'math.unsorted_segment_max', 'math.unsorted_segment_mean', 'math.unsorted_segment_min', 'math.unsorted_segment_prod', 'math.unsorted_segment_sqrt_n', 'math.unsorted_segment_sum', 'one_hot', 'ones_like', 'rank', 'realdiv', ",0,train bc5eddee3dda337c5b9287691b55d7a363b65c7b,tensorflow/tensorflow,"Add Ragged support to tf.math.sigmoid operation. PiperOrigin-RevId: 381407240 Change-Id: Ic64d0603d85da24b436ab190a9b7c0ba34c9412c",ragged_tensor_test_ops.py,"@@ -64,6 +64,7 @@ UNARY_FLOAT_OPS = [ math_ops.round, math_ops.rsqrt, math_ops.sign, + math_ops.sigmoid, math_ops.sin, math_ops.sinh, math_ops.sqrt, ",0,train 2775ac493806fefa4e7c2fd798be5b1f87e01a94,tensorflow/tensorflow,"Extend tensor_list with basic support for appending to TensorArrays. This allows handling list-type operations on lists that we haven't created, e.g. received as parameters. PiperOrigin-RevId: 188094077",tensor_list.py,"@@ -18,7 +18,26 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.framework import ops from tensorflow.python.ops import list_ops +from tensorflow.python.ops import tensor_array_ops + + +def dynamic_list_append(target, element): + """"""Converts a list append call inline."""""" + if isinstance(target, tensor_array_ops.TensorArray): + return target.write(target.size(), element) + # TODO(mdan): What's the right way to check this? + # TODO(mdan): We may not need this branch. + # It may be possible to use TensorList alone if the loop body will not + # require wrapping it, although we'd have to think about an autoboxing + # mechanism for lists received as parameter. + if isinstance(target, ops.Tensor): + return list_ops.tensor_list_push_back(target, element) + + # Python targets (including TensorList): fallback to their original append. + target.append(element) + return target class TensorList(object): ",0,test 2775ac493806fefa4e7c2fd798be5b1f87e01a94,tensorflow/tensorflow,"Extend tensor_list with basic support for appending to TensorArrays. This allows handling list-type operations on lists that we haven't created, e.g. received as parameters. PiperOrigin-RevId: 188094077",tensor_list_test.py,"@@ -21,13 +21,41 @@ from __future__ import print_function from tensorflow.contrib.py2tf.utils import tensor_list as tl from tensorflow.python.client.session import Session from tensorflow.python.eager import context +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework.constant_op import constant +from tensorflow.python.ops import list_ops +from tensorflow.python.ops import tensor_array_ops from tensorflow.python.platform import test class TensorListTest(test.TestCase): + def _shape(self, shape_tuple): + return constant(shape_tuple, dtypes.int32) + + def test_dynamic_list_append(self): + l = [] + l = tl.dynamic_list_append(l, 1) + self.assertListEqual(l, [1]) + + l = list_ops.empty_tensor_list(self._shape(()), dtypes.int32) + l = tl.dynamic_list_append(l, 1) + s = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32) + with self.test_session() as sess: + self.assertAllEqual(sess.run(s), [1]) + + l = tensor_array_ops.TensorArray(dtypes.int32, size=0, dynamic_size=True) + l = tl.dynamic_list_append(l, 1) + s = l.stack() + with self.test_session() as sess: + self.assertAllEqual(sess.run(s), [1]) + + l = tl.TensorList(self._shape(()), dtypes.int32) + l = tl.dynamic_list_append(l, 1) + with self.test_session() as sess: + self.assertAllEqual(sess.run(l[0]), 1) + def test_list_append_python(self): with context.eager_mode(): a = constant(3.0) ",0,test 41e4c2033d0ca35587724abe4940db0bee2d6d96,tensorflow/tensorflow,"[tf:tfrt] Add [1, 0, 2] case to transpose benchmarks The [1, 0, 2] case was missing. PiperOrigin-RevId: 427990241 Change-Id: Icf9a59f13cf4b0043f850b52a6d441c99f77bf6a",transpose_op_benchmark.cc,"@@ -122,6 +122,15 @@ BM(Tfrt(Transpose_small_1x2x0, Transpose3D({1, 2, 0}), ""compute"", Inputs({32, 32, 16}))); BM(Eigen(Transpose_small_1x2x0, Shuffle<3>({1, 2, 0}), Inputs({32, 32, 16}))); +// Small 3D Transpose: [1, 0, 2] +BM(Jitrt(Transpose_small_1x0x2, Transpose3D({1, 0, 2}), ""compute"", + Inputs({32, 32, 16}))); +BM(JitrtV(Transpose_small_1x0x2, Transpose3D({1, 0, 2}), ""compute"", + Inputs({32, 32, 16}))); +BM(Tfrt(Transpose_small_1x0x2, Transpose3D({1, 0, 2}), ""compute"", + Inputs({32, 32, 16}))); +BM(Eigen(Transpose_small_1x0x2, Shuffle<3>({1, 0, 2}), Inputs({32, 32, 16}))); + // 2D Transpose: [1, 0] BM(Jitrt(Transpose_1x0, Transpose2D(), ""compute"", Inputs({4096, 4096}))); BM(JitrtV(Transpose_1x0, Transpose2D(), ""compute"", Inputs({4096, 4096}))); @@ -164,4 +173,13 @@ BM(Tfrt(Transpose_1x2x0, Transpose3D({1, 2, 0}), ""compute"", Inputs({256, 256, 256}))); BM(Eigen(Transpose_1x2x0, Shuffle<3>({1, 2, 0}), Inputs({256, 256, 256}))); +// 3D Transpose: [1, 0, 2] +BM(Jitrt(Transpose_1x0x2, Transpose3D({1, 0, 2}), ""compute"", + Inputs({256, 256, 256}))); +BM(JitrtV(Transpose_1x0x2, Transpose3D({1, 0, 2}), ""compute"", + Inputs({256, 256, 256}))); +BM(Tfrt(Transpose_1x0x2, Transpose3D({1, 0, 2}), ""compute"", + Inputs({256, 256, 256}))); +BM(Eigen(Transpose_1x0x2, Shuffle<3>({1, 0, 2}), Inputs({256, 256, 256}))); + } // namespace tensorflow ",0,test 50c58837d2be9aa218736ebe5eacb499bcbe7052,tensorflow/tensorflow,Fix: fixed LSTMBlockCell cuda kernel,lstm_ops_gpu.cu.cc,"@@ -350,8 +350,8 @@ __global__ void lstm_gates_bprop( di[cid] = di_local; dgates[gid + 0 * cell_size] = di_local; - dgates[gate_c_offset(gate_layout, cell_size)] = dci_local; - dgates[gate_f_offset(gate_layout, cell_size)] = df_local; + dgates[gid + gate_c_offset(gate_layout, cell_size)] = dci_local; + dgates[gid + gate_f_offset(gate_layout, cell_size)] = df_local; dgates[gid + 3 * cell_size] = do_local; cs_prev_grad[cid] = dcs_local * f_local; ",0,test 50c58837d2be9aa218736ebe5eacb499bcbe7052,tensorflow/tensorflow,Fix: fixed LSTMBlockCell cuda kernel,rnn_grad_test.py,"@@ -66,6 +66,60 @@ class RNNGradTest(test.TestCase): self.assertAllEqual(w_grad, w_ifco_grad) self.assertAllEqual(b_grad, b_ifco_grad) + @test_util.deprecated_graph_mode_only + def testLSTMBlockCell(self): + batch_size = np.random.randint(1, 32) + input_size = np.random.randint(1, 32) + hidden_size = np.random.randint(1, 32) + w = deterministic_random_uniform( + [input_size + hidden_size, 4 * hidden_size]) + b = deterministic_random_uniform([4 * hidden_size]) + x = deterministic_random_uniform([batch_size, input_size]) + cs_prev = h_prev = deterministic_random_uniform([batch_size, hidden_size]) + w_peephole = array_ops.zeros(cs_prev.shape[1:], dtype=w.dtype) + cs_grad = deterministic_random_uniform([batch_size, hidden_size]) + h_grad = deterministic_random_uniform([batch_size, hidden_size]) + + outputs = [] + grads = [] + for use_gpu in [False, True]: + with self.cached_session(use_gpu=use_gpu): + output = gen_rnn_ops.lstm_block_cell( + x=x, + cs_prev=cs_prev, + h_prev=h_prev, + w=w, + wci=w_peephole, + wcf=w_peephole, + wco=w_peephole, + b=b, + forget_bias=1.0, + cell_clip=0.0, + use_peephole=False) + (i, cs, f, o, ci, co, _) = output + grad = gen_rnn_ops.lstm_block_cell_grad( + x=x, + cs_prev=cs_prev, + h_prev=h_prev, + w=w, + wci=w_peephole, + wcf=w_peephole, + wco=w_peephole, + b=b, + i=i, + cs=cs, + f=f, + o=o, + ci=ci, + co=co, + cs_grad=cs_grad, + h_grad=h_grad, + use_peephole=False) + outputs.append(output) + grads.append(grad) + self.assertAllClose(outputs[0], outputs[1]) + self.assertAllClose(grads[0], grads[1]) + def _lstm_block(self, op, w, b, x, cs_prev, h_prev): w_peephole = array_ops.zeros(cs_prev.shape[1:], dtype=w.dtype) _, all_cs, _, _, _, _, all_h = op( ",0,test 871d07bafaba5af6ea68ae5a86e36ced5a52a32a,tensorflow/tensorflow,"Add basic support for declarative Linalg transformations Linalg ops provide a good anchor for pattern matching/rewriting transformations. This CL adds a simple example of how multi-level tiling may be specified by attaching a simple StringAttr to ops as they are transformed so we can easily specify partial lowering to control transformation application. This is a first stab at taking advantage of higher-level information contained in Linalg ops and will evolve in the future. PiperOrigin-RevId: 277497958 Change-Id: I6d504c7c39373d49cb8e5ae3b596c24d1efd2581",Passes.h,"@@ -41,6 +41,8 @@ std::unique_ptr> createLinalgPromotionPass(); std::unique_ptr> createLowerLinalgToLoopsPass(); std::unique_ptr> createLowerLinalgToLLVMPass(); + +std::unique_ptr> createLinalgTransformsPass(); } // namespace linalg } // namespace mlir ",0,train a703b288d8a8bae22a3cb4587e3ee1f98235a0a5,tensorflow/tensorflow,"Simplify reset_uids() Remove unnecessary dict clearance code",backend.py,"@@ -211,10 +211,8 @@ def get_uid(prefix=''): def reset_uids(): """"""Resets graph identifiers. """""" - per_graph_object_name_uids = PER_GRAPH_OBJECT_NAME_UIDS - keys = list(per_graph_object_name_uids.keys()) - for key in keys: - del per_graph_object_name_uids[key] + + PER_GRAPH_OBJECT_NAME_UIDS.clear() @keras_export('keras.backend.clear_session') ",0,train e576acf5dbd7b800d3b6aa4de4b69952a9e2c0fb,tensorflow/tensorflow,"Internal-only change. PiperOrigin-RevId: 224362520",tpu_cluster_resolver.py,"@@ -197,13 +197,14 @@ class TPUClusterResolver(ClusterResolver): elif tpu == 'local' or not tpu: # Google environment, where the TPU is attached to the host. self._environment = 'google' - elif tpu.startswith('/bns'): + elif tpu.startswith('/bns') or tpu.startswith('uptc://'): # Google environment, where we reach the TPU through BNS. self._environment = 'google' # If TPU is in the Google environment or exists locally, we don't use any # RPC layer. - if tpu.startswith('/bns') or tpu == 'local' or not tpu: + if tpu.startswith('/bns') or tpu.startswith( + 'uptc://') or tpu == 'local' or not tpu: self.rpc_layer = None else: self.rpc_layer = 'grpc' ",0,train 76319741cd303273a542eae0cdf78df61e2c4e83,tensorflow/tensorflow,"[MLIR][NFC] Use CallOpInterface::resolveCallable() to reduce some code clutter - Use this to reduce the nesting of if's needed to get to the FuncOp for a call - Add helper functions to get attached FuncOp for WhileOp PiperOrigin-RevId: 323365892 Change-Id: If6c6d4f1c8359c5df50366f90cbcb67fc9311771",prepare_composite_functions_tf.cc,"@@ -239,18 +239,14 @@ LogicalResult CheckOutputConsumer( LogicalResult CheckFusableKerasLstm(FuncOp lstm_func, ModuleOp module) { for (auto func : module.getOps()) { - auto result = func.walk([&](Operation* op) { - if (auto call_op = dyn_cast(op)) { - CallInterfaceCallable callable = call_op.getCallableForCallee(); - if (auto sym = callable.dyn_cast()) { - if (sym.getRootReference() == lstm_func.getName()) { - // Keras LSTM have 5 outputs. - // We should make sure only the first or the second output are - // consumed. - if (failed(CheckOutputConsumer(call_op, 5, {0, 1}))) - return WalkResult::interrupt(); - } - } + if (func == lstm_func) continue; + auto result = func.walk([&](CallOpInterface op) { + if (dyn_cast(op.resolveCallable()) == lstm_func) { + // Keras LSTM have 5 outputs. + // We should make sure only the first or the second output are + // consumed. + if (failed(CheckOutputConsumer(op.getOperation(), 5, {0, 1}))) + return WalkResult::interrupt(); } return WalkResult::advance(); }); ",0,train 76319741cd303273a542eae0cdf78df61e2c4e83,tensorflow/tensorflow,"[MLIR][NFC] Use CallOpInterface::resolveCallable() to reduce some code clutter - Use this to reduce the nesting of if's needed to get to the FuncOp for a call - Add helper functions to get attached FuncOp for WhileOp PiperOrigin-RevId: 323365892 Change-Id: If6c6d4f1c8359c5df50366f90cbcb67fc9311771",executor_tpuv1_inline_tpu_island.cc,"@@ -61,11 +61,11 @@ void TPUBridgeExecutorIslandInlining::runOnOperation() { LLVM_DEBUG(llvm::dbgs() << ""Found call to inline: "" << *call_op.getOperation() << ""\n""); - FuncOp called_func = dyn_cast_or_null( - symbol_table.lookupSymbolIn(getOperation(), call_op.f())); + auto call_interface = cast(call_op.getOperation()); + auto called_func = + dyn_cast_or_null(call_interface.resolveCallable()); - if (failed(inlineCall(inliner, - cast(call_op.getOperation()), + if (failed(inlineCall(inliner, call_interface, cast(called_func.getOperation()), called_func.getCallableRegion(), /* shouldCloneInlinedRegion = */ false))) { ",0,train 76319741cd303273a542eae0cdf78df61e2c4e83,tensorflow/tensorflow,"[MLIR][NFC] Use CallOpInterface::resolveCallable() to reduce some code clutter - Use this to reduce the nesting of if's needed to get to the FuncOp for a call - Add helper functions to get attached FuncOp for WhileOp PiperOrigin-RevId: 323365892 Change-Id: If6c6d4f1c8359c5df50366f90cbcb67fc9311771",optimize_global_tensors.cc,"@@ -68,9 +68,8 @@ bool IsResource(Value value) { return IsResourceType(value.getType()); } class ResourceAnalyzer { public: explicit ResourceAnalyzer(ModuleOp module) { - SymbolTable symbol_table(module); for (auto func : module.getOps()) { - AnalyzeFunc(func, symbol_table); + AnalyzeFunc(func); } } @@ -89,7 +88,7 @@ class ResourceAnalyzer { // written"". Do this recursively across the chain of funcs via call or control // flow ops. // TODO(ashwinm): Move to iterative traversal. - LogicalResult AnalyzeFunc(FuncOp func, const SymbolTable& symbol_table) { + LogicalResult AnalyzeFunc(FuncOp func) { // Avoid infinite recursion. if (!discovered_.insert(func).second) { return success(); @@ -104,24 +103,20 @@ class ResourceAnalyzer { return; } if (auto call = dyn_cast(op)) { - if (auto sym = op->getAttrOfType(""f"")) { - PropagatePotentiallyWrittenUpFromCallee( - sym.cast().getValue(), call.getArgOperands(), - symbol_table); + if (auto func = dyn_cast(call.resolveCallable())) { + PropagatePotentiallyWrittenUpFromCallee(func, call.getArgOperands()); } return; } if (auto if_op = dyn_cast(op)) { - for (auto callee : {if_op.then_branch(), if_op.else_branch()}) { - PropagatePotentiallyWrittenUpFromCallee(callee, if_op.input(), - symbol_table); + for (auto callee : {if_op.then_func(), if_op.else_func()}) { + PropagatePotentiallyWrittenUpFromCallee(callee, if_op.input()); } return; } if (auto while_op = dyn_cast(op)) { - for (auto callee : {while_op.cond(), while_op.body()}) { - PropagatePotentiallyWrittenUpFromCallee(callee, while_op.input(), - symbol_table); + for (auto callee : {while_op.cond_func(), while_op.body_func()}) { + PropagatePotentiallyWrittenUpFromCallee(callee, while_op.input()); } return; } @@ -149,15 +144,13 @@ class ResourceAnalyzer { }); } - // Given a funcOp associated with the callee and operands from the + // Given a FuncOp associated with the callee and operands from the // corresponding callOp, propagate the potentially written decision to the // callOp's operands, if the corresponding func's arguments are potentially // written resources. void PropagatePotentiallyWrittenUpFromCallee( - StringRef callee, Operation::operand_range propagate_to, - const SymbolTable& symbol_table) { - auto func = symbol_table.lookup(callee); - AnalyzeFunc(func, symbol_table); + FuncOp func, Operation::operand_range propagate_to) { + AnalyzeFunc(func); for (auto t : llvm::zip(func.getArguments(), propagate_to)) { if (!IsResource(std::get<0>(t))) { continue; ",0,train 76319741cd303273a542eae0cdf78df61e2c4e83,tensorflow/tensorflow,"[MLIR][NFC] Use CallOpInterface::resolveCallable() to reduce some code clutter - Use this to reduce the nesting of if's needed to get to the FuncOp for a call - Add helper functions to get attached FuncOp for WhileOp PiperOrigin-RevId: 323365892 Change-Id: If6c6d4f1c8359c5df50366f90cbcb67fc9311771",shape_inference.cc,"@@ -39,6 +39,7 @@ limitations under the License. #include ""mlir/IR/StandardTypes.h"" // from @llvm-project #include ""mlir/IR/SymbolTable.h"" // from @llvm-project #include ""mlir/IR/Value.h"" // from @llvm-project +#include ""mlir/Interfaces/CallInterfaces.h"" // from @llvm-project #include ""mlir/Pass/Pass.h"" // from @llvm-project #include ""mlir/Pass/PassRegistry.h"" // from @llvm-project #include ""mlir/Support/LLVM.h"" // from @llvm-project @@ -243,14 +244,11 @@ bool RefineResultType(Operation* op, Value result, // Infers the shape from a (Stateful)PartionedCall operation by looking up the // called function and propagating the return type. -bool InferShapeForCall(Operation* op) { - auto call_op = cast(op); - CallInterfaceCallable callable = call_op.getCallableForCallee(); - SymbolRefAttr sym = callable.dyn_cast(); - if (!sym) return false; - FuncOp func = dyn_cast(SymbolTable::lookupNearestSymbolFrom(op, sym)); +bool InferShapeForCall(CallOpInterface call_op) { + FuncOp func = dyn_cast(call_op.resolveCallable()); if (!func) return false; + Operation* op = call_op.getOperation(); bool changed = false; // Map each of the results of the call to the returned type of the // function. @@ -533,7 +531,7 @@ class ShapeInference { // like predicate). LogicalResult PropagateShapeToFunctions( ModuleOp module, Operation::operand_type_range input_types, - ArrayRef func_names, int64_t max_iteration); + ArrayRef functions, int64_t max_iteration); // Propagates shapes to regions given the shapes of the inputs of the regions. // All regions provided in `regions` are assumed to have inputs of type @@ -555,13 +553,13 @@ class ShapeInference { // // TODO(b/154065712): Move this to a more general inter-procedural constant // folding pass. - void PropagateConstantToCallee(CallOpInterface call_op, - SymbolRefAttr callee_sym, ModuleOp module); + void PropagateConstantToCallee(CallOpInterface call_op, FuncOp func, + ModuleOp module); // Propagates any constant return value of the callee function to the call // op's corresponding result. - void PropagateConstantFromCallee(CallOpInterface call_op, - SymbolRefAttr callee_sym, ModuleOp module); + void PropagateConstantFromCallee(CallOpInterface call_op, FuncOp func, + ModuleOp module); // Tries to compute the result of folding the op. This doesn't actually // perform constant folding, it is just computes the equivalent constants. @@ -779,9 +777,7 @@ bool ShapeInference::InferShapeForSingleOperation(Operation* op) { // Handle call operations by looking up callee and infering return shape as // needed. - if (isa( - op)) - return InferShapeForCall(op); + if (auto call = dyn_cast(op)) return InferShapeForCall(call); // tf.Cast are only inferred if they have at least one user in the TF dialect // or feeding into the function return. This is necessary to avoid inserting @@ -984,14 +980,13 @@ bool ShapeInference::InferShapeForSingleOperation(Operation* op) { LogicalResult ShapeInference::PropagateShapeToFunctions( ModuleOp module, Operation::operand_type_range input_types, - ArrayRef func_names, int64_t max_iteration) { + ArrayRef functions, int64_t max_iteration) { bool all_succeeded = true; auto types = llvm::to_vector<4>(input_types); // If shape propagation fails for one function, return failure, but do not // early exit and attempt to propagate shapes for all provided functions to // have a best-effort propagation. - for (auto func_name : func_names) { - FuncOp func = module.lookupSymbol(func_name); + for (FuncOp func : functions) { auto func_uses = SymbolTable::getSymbolUses(func, &module.getBodyRegion()); if (!llvm::hasSingleElement(func_uses.getValue())) { int num_uses = std::distance(func_uses->begin(), func_uses->end()); @@ -1046,12 +1041,9 @@ LogicalResult ShapeInference::PropagateShapeToRegions( } void ShapeInference::PropagateConstantToCallee(CallOpInterface call_op, - SymbolRefAttr callee_sym, - ModuleOp module) { - auto func = module.lookupSymbol(callee_sym.getRootReference()); + FuncOp func, ModuleOp module) { auto func_uses = SymbolTable::getSymbolUses(func, &module.getBodyRegion()); - int num_uses = std::distance(func_uses->begin(), func_uses->end()); - if (num_uses != 1) return; + if (!llvm::hasSingleElement(func_uses.getValue())) return; OpBuilder builder(&func.front().front()); Operation* op = call_op.getOperation(); @@ -1077,9 +1069,7 @@ void ShapeInference::PropagateConstantToCallee(CallOpInterface call_op, } void ShapeInference::PropagateConstantFromCallee(CallOpInterface call_op, - SymbolRefAttr callee_sym, - ModuleOp module) { - auto func = module.lookupSymbol(callee_sym.getRootReference()); + FuncOp func, ModuleOp module) { // If the return value is a constant, use the constant as the value of // the call return. Operation* op = call_op.getOperation(); @@ -1111,28 +1101,29 @@ LogicalResult ShapeInference::PropagateShapeIntoAttachedFunctions( if (auto if_op = dyn_cast(op)) { return PropagateShapeToFunctions( module, drop_begin(if_op.getOperandTypes(), 1), - {if_op.then_branch(), if_op.else_branch()}, max_iteration); + {if_op.then_func(), if_op.else_func()}, max_iteration); } else if (auto case_op = dyn_cast(op)) { - SmallVector branches; - for (Attribute branch : case_op.branches()) - branches.push_back(branch.cast().getValue()); + SmallVector branches; + for (Attribute branch : case_op.branches()) { + auto sym = branch.cast(); + branches.push_back(SymbolTable::lookupNearestSymbolFrom(op, sym)); + } return PropagateShapeToFunctions(module, drop_begin(case_op.getOperandTypes(), 1), branches, max_iteration); } else if (auto while_op = dyn_cast(op)) { - return PropagateShapeToFunctions(module, while_op.getOperandTypes(), - {while_op.cond(), while_op.body()}, - max_iteration); + return PropagateShapeToFunctions( + module, while_op.getOperandTypes(), + {while_op.cond_func(), while_op.body_func()}, max_iteration); } else if (auto call_op = dyn_cast(op)) { - CallInterfaceCallable callable = call_op.getCallableForCallee(); - if (SymbolRefAttr sym = callable.dyn_cast()) { - PropagateConstantToCallee(call_op, sym, module); - if (failed(PropagateShapeToFunctions( - module, call_op.getArgOperands().getTypes(), - {sym.getRootReference()}, max_iteration))) { + if (auto func = dyn_cast(call_op.resolveCallable())) { + PropagateConstantToCallee(call_op, func, module); + if (failed(PropagateShapeToFunctions(module, + call_op.getArgOperands().getTypes(), + {func}, max_iteration))) { return failure(); } - PropagateConstantFromCallee(call_op, sym, module); + PropagateConstantFromCallee(call_op, func, module); return success(); } } ",0,train 3edab0abb1213f88507692042a320abc695ff674,tensorflow/tensorflow,"Remove reshape of sparse tensor indices in for maybe_batch. PiperOrigin-RevId: 191310753",input.py,"@@ -515,8 +515,7 @@ def _store_sparse_tensors(tensor_list, enqueue_many, keep_input, def _sparse_values_to_keep(t, keep_input): """"""Convert a per-row `keep_input` vector to a per-value one."""""" # Get the rows of every value in the sparse Tensor. - row_values = array_ops.reshape( - t.indices, [array_ops.shape(t.indices)[0], -1])[:, 0] + row_values = t.indices[:, 0] # The value should be kept iff the row should be kept. return array_ops.gather(keep_input, row_values) if keep_input.shape.ndims == 1: ",0,train d48d6758481ee0e24bd60996daceb241d272d310,tensorflow/tensorflow,"Move tpu fingerprint lookup to OSS PiperOrigin-RevId: 361656725 Change-Id: I4c879184f93dc89405659bd0a4ea5acd1332f614",tpu_fingerprint_lookup.cc,"@@ -0,0 +1,72 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include ""tensorflow/core/tpu/kernels/tpu_fingerprint_lookup.h"" + +namespace tensorflow { +namespace tpu { + +TpuFingerprintLookup* TpuFingerprintLookup::Create() { + return new TpuFingerprintLookup(); +} + +bool TpuFingerprintLookup::RegisterKeyValuePair(uint64 key, std::string value) { + absl::MutexLock lock(&mu_); + bool is_successful = false; + VLOG(2) << ""registering key ("" << key << "") with value: "" << value; + auto it = key_to_value_.find(key); + if (it == key_to_value_.end()) { + // A new key. If the value is not seen before, register key-value and + // value-key pairs. Otherwise, skip registration. + auto maybe_existing_key = value_to_key_.find(value); + if (maybe_existing_key == value_to_key_.end()) { + key_to_value_.emplace(key, value); + value_to_key_.emplace(value, key); + is_successful = true; + } else { + // The value is registered before with a different key. Skip registration. + if (maybe_existing_key->second != key) { + VLOG(2) << ""The value ("" << value + << "") is associated with an existing key ( "" + << maybe_existing_key->second + << ""), which does not match the requesting key ("" << key + << "").""; + } + } + } else { + // The key is registered before, no actions needed. For debugging purpose, + // check if existing value agrees with the value. + if (it->second != value) { + VLOG(2) << ""The key ("" << key + << "") has been registered and the requesting value ( "" << value + << "" and the existing"" << it->second << "") doesn't match.""; + } + } + DCHECK(key_to_value_.size() == value_to_key_.size()); + + return is_successful; +} + +absl::optional TpuFingerprintLookup::Lookup(uint64 key) { + absl::MutexLock lock(&mu_); + auto it = key_to_value_.find(key); + if (it == key_to_value_.end()) { + return absl::optional{}; + } else { + return it->second; + } +} + +} // namespace tpu +} // namespace tensorflow ",0,test d48d6758481ee0e24bd60996daceb241d272d310,tensorflow/tensorflow,"Move tpu fingerprint lookup to OSS PiperOrigin-RevId: 361656725 Change-Id: I4c879184f93dc89405659bd0a4ea5acd1332f614",tpu_fingerprint_lookup.h,"@@ -0,0 +1,84 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_FINGERPRINT_LOOKUP_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_FINGERPRINT_LOOKUP_H_ + +#include +#include + +#include ""absl/base/integral_types.h"" +#include ""absl/container/flat_hash_map.h"" +#include ""absl/container/node_hash_map.h"" +#include ""absl/strings/string_view.h"" +#include ""absl/synchronization/mutex.h"" +#include ""tensorflow/core/framework/resource_mgr.h"" + +namespace tensorflow { +namespace tpu { + +// A class that holds the key-value pair of fingerprints. By calling the +// Register method, this class can map the key to the value. Note that this +// class holds invariant key-value pairs. That is, it does not allow updating +// key-value pairs, nor N-key-to-1-value and 1-key-to-M-value pairs. If such +// cases occur, the class keeps the earliest registered pairs and discards any +// violating pairs. +// +// Example: +// TpuFingerprintLookup fingerprint_lookup; +// +// // Register value with key. +// fingerprint_lookup.RegisterKeyValuePair(""key1"", ""program1""); +// +// // Lookup fingerprint with key. +// std::string fingerprint = fingerprint_lookup.Lookup(""key1""); +// +// TODO(chiachenc): use templates and add Unregister methods. +class TpuFingerprintLookup : public ResourceBase { + public: + // Creates an instance of TpuFingerprintLookup. + static TpuFingerprintLookup* Create(); + + // Register value with tag. Return true if successfully registering a + // key-value pair; otherwise, return false. + bool RegisterKeyValuePair(uint64 key, std::string value); + + // Look up fingerprint with key. Return absl::optional{} if + // not found. + absl::optional Lookup(uint64 key); + + size_t num_valid() { + absl::MutexLock lock(&mu_); + return key_to_value_.size(); + } + + std::string DebugString() const override { return ""TpuFingerprintLookup""; } + + private: + explicit TpuFingerprintLookup() {} + + absl::Mutex mu_; + // Main storage for lookup + absl::node_hash_map key_to_value_ ABSL_GUARDED_BY(mu_); + + // An auxiliary storage to ensure 1-to-1 and invariant key-value pair + absl::node_hash_map value_to_key_ ABSL_GUARDED_BY(mu_); + + TpuFingerprintLookup(const TpuFingerprintLookup&) = delete; + TpuFingerprintLookup& operator=(const TpuFingerprintLookup&) = delete; +}; +} // namespace tpu +} // namespace tensorflow +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_FINGERPRINT_LOOKUP_H_ ",0,test 5c7047ad96f3dc9ff13c1feeb7ade5e1e831e645,tensorflow/tensorflow,"[tf.lite] Restrict Interpreter initialization methods Begin making private Interpreter initialization methods currently marked public, starting with: * Interpreter::SetExecutionPlan * Interpreter::ReserveNodes Follow-up changes will introduce a test-only helper for accessing such methods for test purposes, but otherwise such methods should only be accessible to the InterpreterBuilder. PiperOrigin-RevId: 375160379 Change-Id: Ia48289115e35eed2df26226fbedc4db02b210bed",subgraph.h,"@@ -72,11 +72,6 @@ class Subgraph { // interpreter. TfLiteStatus SetVariables(std::vector variables); - // Ensure the internal node storage memory allocates at least `count` - // spots for node. NOTE, this doesn't actually add operators. This is an - // efficiency optimization that is subject to change. - void ReserveNodes(int count); - // Adds a node with the given parameters and returns the index of the new // node in `node_index` (optionally). Interpreter will take ownership of // `builtin_data` and destroy it with `free`. Ownership of 'init_data' @@ -137,10 +132,6 @@ class Subgraph { bool is_variable = false, const size_t rank_dims_signature = 0, const int* dims_signature = nullptr); - // WARNING: Experimental interface, subject to change - // Overrides execution plan. This bounds checks indices sent in. - TfLiteStatus SetExecutionPlan(const std::vector& new_plan); - // Get a mutable tensor data structure. TfLiteTensor* tensor(int tensor_index) { if (tensor_index < 0 || @@ -351,6 +342,7 @@ class Subgraph { const std::string& GetName() const; private: + friend class InterpreterBuilder; friend class TestDelegate; // SubgraphAwareProfiler wraps an actual TFLite profiler, such as a // BufferedProfiler instance, and takes care of event profiling/tracing in a @@ -395,6 +387,16 @@ class Subgraph { const int64_t subgraph_index_; }; + // Ensure the internal node storage memory allocates at least `count` + // spots for node. NOTE, this doesn't actually add operators. This is an + // efficiency optimization that is subject to change. + // Note: Only used during initialization. + void ReserveNodes(int count); + + // Overrides execution plan. This bounds checks indices sent in. + // Note: Only used during initialization. + TfLiteStatus SetExecutionPlan(const std::vector& new_plan); + // Prevent 'context_' from accessing functions that are only available to // delegated kernels. void SwitchToKernelContext(); ",0,train 5c7047ad96f3dc9ff13c1feeb7ade5e1e831e645,tensorflow/tensorflow,"[tf.lite] Restrict Interpreter initialization methods Begin making private Interpreter initialization methods currently marked public, starting with: * Interpreter::SetExecutionPlan * Interpreter::ReserveNodes Follow-up changes will introduce a test-only helper for accessing such methods for test purposes, but otherwise such methods should only be accessible to the InterpreterBuilder. PiperOrigin-RevId: 375160379 Change-Id: Ia48289115e35eed2df26226fbedc4db02b210bed",interpreter.cc,"@@ -229,10 +229,6 @@ TfLiteStatus Interpreter::AllocateTensors() { return primary_subgraph().AllocateTensors(); } -void Interpreter::ReserveNodes(int count) { - primary_subgraph().ReserveNodes(count); -} - void Interpreter::AddSubgraphs(int subgraphs_to_add, int* first_new_subgraph_index) { const size_t base_index = subgraphs_.size(); ",0,train 5c7047ad96f3dc9ff13c1feeb7ade5e1e831e645,tensorflow/tensorflow,"[tf.lite] Restrict Interpreter initialization methods Begin making private Interpreter initialization methods currently marked public, starting with: * Interpreter::SetExecutionPlan * Interpreter::ReserveNodes Follow-up changes will introduce a test-only helper for accessing such methods for test purposes, but otherwise such methods should only be accessible to the InterpreterBuilder. PiperOrigin-RevId: 375160379 Change-Id: Ia48289115e35eed2df26226fbedc4db02b210bed",interpreter.h,"@@ -125,11 +125,6 @@ class Interpreter { /// interpreter. TfLiteStatus SetVariables(std::vector variables); - /// Ensure the internal node storage memory allocates at least `count` - /// spots for node. NOTE, this doesn't actually add operators. This is an - /// efficiency optimization that is subject to change. - void ReserveNodes(int count); - /// Adds a node with the given parameters and returns the index of the new /// node in `node_index` (optionally). Interpreter will take ownership of /// `builtin_data` and destroy it with `free`. Ownership of 'init_data' @@ -242,12 +237,6 @@ class Interpreter { return primary_subgraph().execution_plan(); } -#ifndef DOXYGEN_ - /// WARNING: Experimental interface, subject to change - /// Overrides execution plan. This bounds checks indices sent in. - TfLiteStatus SetExecutionPlan(const std::vector& new_plan); -#endif // DOXYGEN_SKIP - /// Get a mutable tensor data structure. // TODO(aselle): Create a safe ArrayHandle interface to avoid exposing this // read/write access to structure @@ -717,6 +706,10 @@ class Interpreter { return -1; } + // Overrides execution plan. This bounds checks indices sent in. + // Note: Only used during initialization. + TfLiteStatus SetExecutionPlan(const std::vector& new_plan); + // Sets the profiler to all subgraphs. void SetSubgraphProfiler(); ",0,train 5c7047ad96f3dc9ff13c1feeb7ade5e1e831e645,tensorflow/tensorflow,"[tf.lite] Restrict Interpreter initialization methods Begin making private Interpreter initialization methods currently marked public, starting with: * Interpreter::SetExecutionPlan * Interpreter::ReserveNodes Follow-up changes will introduce a test-only helper for accessing such methods for test purposes, but otherwise such methods should only be accessible to the InterpreterBuilder. PiperOrigin-RevId: 375160379 Change-Id: Ia48289115e35eed2df26226fbedc4db02b210bed",interpreter_test.cc,"@@ -1182,7 +1182,7 @@ TEST_F(InterpreterTest, ExternalBackendContextClearsCachesOnDelete) { // node graph that can be executed in either [0,1] order or [1,0] order. // The CopyOp records when it is invoked in the class member run_order_ // so we can test whether the execution plan was honored. -class TestExecutionPlan : public ::testing::Test { +class TestExecutionPlan : public InterpreterTest { // Encapsulates the node ids and provides them to a C primitive data type // Allocatable with placement new, but never destructed, so make sure this // doesn't own any heap allocated data. This is then is used as op local @@ -1276,8 +1276,6 @@ class TestExecutionPlan : public ::testing::Test { } protected: - Interpreter interpreter_; - // list of node_ids that were run std::vector run_order_; }; @@ -1290,21 +1288,21 @@ TEST_F(TestExecutionPlan, DefaultExecutionPlan) { TEST_F(TestExecutionPlan, ReversedExecutionPlan) { // Check reversed order - interpreter_.SetExecutionPlan({1, 0}); + SetExecutionPlan({1, 0}); ASSERT_EQ(interpreter_.Invoke(), kTfLiteOk); ASSERT_EQ(run_order_, std::vector({1, 0})); } TEST_F(TestExecutionPlan, SubsetExecutionPlan) { // Check running only node index 1 - interpreter_.SetExecutionPlan({1}); + SetExecutionPlan({1}); ASSERT_EQ(interpreter_.Invoke(), kTfLiteOk); ASSERT_EQ(run_order_, std::vector({1})); } TEST_F(TestExecutionPlan, NullExecutionPlan) { // Check nothing executed. - interpreter_.SetExecutionPlan({}); + SetExecutionPlan({}); ASSERT_EQ(interpreter_.Invoke(), kTfLiteOk); ASSERT_EQ(run_order_, std::vector()); } ",0,train 5c7047ad96f3dc9ff13c1feeb7ade5e1e831e645,tensorflow/tensorflow,"[tf.lite] Restrict Interpreter initialization methods Begin making private Interpreter initialization methods currently marked public, starting with: * Interpreter::SetExecutionPlan * Interpreter::ReserveNodes Follow-up changes will introduce a test-only helper for accessing such methods for test purposes, but otherwise such methods should only be accessible to the InterpreterBuilder. PiperOrigin-RevId: 375160379 Change-Id: Ia48289115e35eed2df26226fbedc4db02b210bed",interpreter_test_util.h,"@@ -60,6 +60,10 @@ class InterpreterTest : public ::testing::Test { interpreter_.SetSignatureDef({signature}); } + TfLiteStatus SetExecutionPlan(const std::vector& new_plan) { + return interpreter_.SetExecutionPlan(new_plan); + } + Interpreter interpreter_; }; ",0,train 0632e92abc4f08ffacf6802205f9880accf7ecd2,tensorflow/tensorflow,"DOCFIX: hmc.sample_chain kwarg num_steps_between_results docstring seemed to indicate a different type of thinning than what is actually going on. PiperOrigin-RevId: 186349630",hmc_impl.py,"@@ -109,10 +109,13 @@ def sample_chain( Note: `target_log_prob_fn` is called exactly twice. - Only one out of every `num_steps_between_samples + 1` steps is included in the - returned results. This ""thinning"" comes at a cost of reduced statistical - power, while reducing memory requirements and autocorrelation. For more - discussion see [1]. + Since HMC states are correlated, it is sometimes desirable to produce + additional intermediate states, and then discard them, ending up with a set of + states with decreased autocorrelation. See [1]. Such ""thinning"" is made + possible by setting `num_steps_between_results > 0`. The chain then takes + `num_steps_between_results` extra steps between the steps that make it into + the results. The extra steps are never materialized (in calls to `sess.run`), + and thus do not increase memory requirements. [1]: ""Statistically efficient thinning of a Markov chain sampler."" Art B. Owen. April 2017. @@ -225,10 +228,8 @@ def sample_chain( Default value: 0 (i.e., no burn-in). num_steps_between_results: Integer number of chain steps between collecting a result. Only one out of every `num_steps_between_samples + 1` steps is - included in the returned results. This ""thinning"" comes at a cost of - reduced statistical power, while reducing memory requirements and - autocorrelation. For more discussion see [1]. - Default value: 0 (i.e., no subsampling). + included in the returned results. The number of returned chain states is + still equal to `num_results`. Default value: 0 (i.e., no thinning). seed: Python integer to seed the random number generator. current_target_log_prob: (Optional) `Tensor` representing the value of `target_log_prob_fn` at the `current_state`. The only reason to specify ",0,train f6533187f2bba0e6717d5a3c7cd018b311392a56,tensorflow/tensorflow,"Improve error message in ColocateResourceAndRefEdges. PiperOrigin-RevId: 237395227",colocation_graph.cc,"@@ -212,7 +212,9 @@ Status Member::EnsureCompatibilityAcrossResourceEdge( ""Cannot place the graph because a reference or resource edge "" ""connects colocation groups with incompatible assigned devices: "", DeviceNameUtils::ParsedNameToString(src_root.assigned_device_name_), - "" vs "", DeviceNameUtils::ParsedNameToString(assigned_device_name_)); + "" vs "", DeviceNameUtils::ParsedNameToString(assigned_device_name_), + "". The edge src node is "", src.name(), "" , and the dst node is "", + dst.name()); } if (DeviceNameUtils::AreCompatibleDevNames(src_root.requested_device_name_, ",0,train f436e2a967a66b81239aec398a847dc266dbd08e,tensorflow/tensorflow,"Fix minor typos exection -> execution hitogram -> histogram opertor -> operator",c_api_experimental_test.cc,"@@ -267,10 +267,10 @@ TEST(CAPI, MonitoringMultipleSampler) { TFE_MonitoringSamplerCellAdd(cell1, 2.0); TF_Buffer* result1 = TF_NewBuffer(); TFE_MonitoringSamplerCellValue(cell1, result1); - tensorflow::HistogramProto hitogram1; - EXPECT_TRUE(hitogram1.ParseFromString( + tensorflow::HistogramProto histogram1; + EXPECT_TRUE(histogram1.ParseFromString( {reinterpret_cast(result1->data), result1->length})); - EXPECT_EQ(hitogram1.sum(), 3.0); + EXPECT_EQ(histogram1.sum(), 3.0); delete result1; auto* sampler2 = TFE_MonitoringNewSampler2(""test/sampler2"", buckets, status, @@ -281,10 +281,10 @@ TEST(CAPI, MonitoringMultipleSampler) { TFE_MonitoringSamplerCellAdd(cell2, 3.0); TF_Buffer* result2 = TF_NewBuffer(); TFE_MonitoringSamplerCellValue(cell2, result2); - tensorflow::HistogramProto hitogram2; - EXPECT_TRUE(hitogram2.ParseFromString( + tensorflow::HistogramProto histogram2; + EXPECT_TRUE(histogram2.ParseFromString( {reinterpret_cast(result2->data), result2->length})); - EXPECT_EQ(hitogram2.sum(), 5.0); + EXPECT_EQ(histogram2.sum(), 5.0); delete result2; TFE_MonitoringDeleteBuckets(buckets); ",0,train f436e2a967a66b81239aec398a847dc266dbd08e,tensorflow/tensorflow,"Fix minor typos exection -> execution hitogram -> histogram opertor -> operator",client.h,"@@ -42,7 +42,7 @@ class Client { // Compile the computation with the given argument shapes and returns the // handle to the compiled executable. The compiled executable is cached on the - // service, and the returned handle can be used for exection without + // service, and the returned handle can be used for execution without // re-compile. // * The shape and layout of the arguments being executed with will affect how // the computation is compiled. If argument_shapes is empty, the parameters' ",0,train f436e2a967a66b81239aec398a847dc266dbd08e,tensorflow/tensorflow,"Fix minor typos exection -> execution hitogram -> histogram opertor -> operator",runtime_matmul_mkl.cc,"@@ -110,7 +110,7 @@ __xla_cpu_runtime_MKLSingleThreadedMatMulF32(const void* run_options_ptr, int64 m, int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { - // Set the thread number to 1 for single threaded excution. + // Set the thread number to 1 for single threaded execution. int prev_num_threads = mkl_set_num_threads_local(1); MatMulF32(nullptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); // Set thread number back to the previous number. @@ -123,7 +123,7 @@ __xla_cpu_runtime_MKLSingleThreadedMatMulF64(const void* run_options_ptr, double* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { - // Set the thread number to 1 for single threaded excution. + // Set the thread number to 1 for single threaded execution. int prev_num_threads = mkl_set_num_threads_local(1); MatMulF64(nullptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); // Set thread number back to the previous number. ",0,train f436e2a967a66b81239aec398a847dc266dbd08e,tensorflow/tensorflow,"Fix minor typos exection -> execution hitogram -> histogram opertor -> operator",distributed_training_utils.py,"@@ -864,7 +864,7 @@ def _make_execution_function_with_cloning(model, mode): distributed_function = _make_graph_execution_function(model, mode) # We cache the distributed execution function on the model since creating - # distributed models and exection functions are expensive. + # distributed models and execution functions are expensive. distributed_model._distributed_function = distributed_function return distributed_function ",0,train 3583bf48e9032aa21861b6dbddfd06ea383f8f2a,tensorflow/tensorflow,Add definition of what PS stands for,test_util.py,"@@ -2708,6 +2708,19 @@ def create_local_cluster(num_workers, ps_config=None): """"""Create and start local servers and return the associated `Server` objects. + ""PS"" stands for ""parameter server"": a task responsible for storing and + updating the model's parameters. Other tasks send updates to these parameters + as they work on optimizing the parameters. This particular division of labor + between tasks is not required, but is common for distributed training. + + Read more at https://www.tensorflow.org/guide/extend/architecture + + TODO: image from https://www.tensorflow.org/images/diag1.svg + + Figure 2 illustrates the interaction of these components. + ""/job:worker/task:0"" and ""/job:ps/task:0"" are both tasks with worker services. + + Example: ```python workers, _ = tf.test.create_local_cluster(num_workers=2, num_ps=2) ",0,test bbed106374e5db95057f19fd17113811bd85c4b3,tensorflow/tensorflow,Fix a typo,map_dataset_op_test.cc,"@@ -550,7 +550,7 @@ TEST_P(ParameterizedMapDatasetOpTest, Roundtrip) { TF_EXPECT_OK(iterator->Save(serialization_ctx.get(), &writer)); TF_EXPECT_OK(writer.Flush()); VariantTensorDataReader reader(&data); - TF_EXPECT_OK(RestoreIterator(iterator_ctx.get(), &reader, ""Iterator"", + TF_EXPECT_OK(RestoreIterator(iterator_context.get(), &reader, ""Iterator"", *map_dataset, &iterator)); while (cur_iteration <= breakpoint) { ",0,train 39357a6f3d6cc75bb8eee002bd7c05b117a05968,tensorflow/tensorflow,"Allow grayscale images to be 2D, either [H, W] or [H, 1] PiperOrigin-RevId: 248224634",image_ops_impl.py,"@@ -157,21 +157,21 @@ def _Check3DImage(image, require_static=True): def _Assert3DImage(image): """"""Assert that we are working with a properly shaped image. - Performs the check statically if possible (i.e. if the shape - is statically known). Otherwise adds a control dependency - to an assert op that checks the dynamic shape. + Performs the check statically if possible (i.e. if the shape + is statically known). Otherwise adds a control dependency + to an assert op that checks the dynamic shape. - Args: - image: 3-D Tensor of shape [height, width, channels] + Args: + image: 3-D Tensor of shape [height, width, channels] - Raises: - ValueError: if `image.shape` is not a 3-vector. + Raises: + ValueError: if `image.shape` is not a 3-vector. - Returns: - If the shape of `image` could be verified statically, `image` is - returned unchanged, otherwise there will be a control dependency - added that asserts the correct dynamic shape. - """""" + Returns: + If the shape of `image` could be verified statically, `image` is + returned unchanged, otherwise there will be a control dependency + added that asserts the correct dynamic shape. + """""" return control_flow_ops.with_dependencies( _Check3DImage(image, require_static=False), image) @@ -179,20 +179,20 @@ def _Assert3DImage(image): def _AssertAtLeast3DImage(image): """"""Assert that we are working with a properly shaped image. - Performs the check statically if possible (i.e. if the shape - is statically known). Otherwise adds a control dependency - to an assert op that checks the dynamic shape. + Performs the check statically if possible (i.e. if the shape + is statically known). Otherwise adds a control dependency + to an assert op that checks the dynamic shape. - Args: - image: >= 3-D Tensor of size [*, height, width, depth] + Args: + image: >= 3-D Tensor of size [*, height, width, depth] - Raises: - ValueError: if image.shape is not a [>= 3] vector. + Raises: + ValueError: if image.shape is not a [>= 3] vector. - Returns: - If the shape of `image` could be verified statically, `image` is - returned unchanged, otherwise there will be a control dependency - added that asserts the correct dynamic shape. + Returns: + If the shape of `image` could be verified statically, `image` is + returned unchanged, otherwise there will be a control dependency + added that asserts the correct dynamic shape. """""" return control_flow_ops.with_dependencies( _CheckAtLeast3DImage(image, require_static=False), image) @@ -241,40 +241,37 @@ def _CheckAtLeast3DImage(image, require_static=True): def _AssertGrayscaleImage(image): - """"""Assert that we are working with a properly shaped - - grayscale image. + """"""Assert that we are working with a properly shaped grayscale image. - Performs the check statically if possible (i.e. if the shape - is statically known). Otherwise adds a control dependency - to an assert op that checks the dynamic shape. + Performs the check statically if possible (i.e. if the shape + is statically known). Otherwise adds a control dependency + to an assert op that checks the dynamic shape. - Args: - image: >= 3-D Tensor of size [*, height, width, depth] + Args: + image: >= 2-D Tensor of size [*, 1] - Raises: - ValueError: if image.shape is not a [>= 3] vector or if - last dimension is not size 1. + Raises: + ValueError: if image.shape is not a [>= 2] vector or if + last dimension is not size 1. - Returns: - If the shape of `image` could be verified statically, `image` is - returned unchanged, otherwise there will be a control dependency - added that asserts the correct dynamic shape. + Returns: + If the shape of `image` could be verified statically, `image` is + returned unchanged, otherwise there will be a control dependency + added that asserts the correct dynamic shape. """""" return control_flow_ops.with_dependencies( _CheckGrayscaleImage(image, require_static=False), image) def _CheckGrayscaleImage(image, require_static=True): - """"""Assert that we are working with properly shaped - - grayscale image. + """"""Assert that we are working with properly shaped grayscale image. Args: - image: >= 3-D Tensor of size [*, height, width, depth] + image: >= 2-D Tensor of size [*, 1] + require_static: Boolean, whether static shape is required. Raises: - ValueError: if image.shape is not a [>= 3] vector or if + ValueError: if image.shape is not a [>= 2] vector or if last dimension is not size 1. Returns: @@ -283,11 +280,11 @@ def _CheckGrayscaleImage(image, require_static=True): """""" try: if image.get_shape().ndims is None: - image_shape = image.get_shape().with_rank(3) + image_shape = image.get_shape().with_rank(2) else: - image_shape = image.get_shape().with_rank_at_least(3) + image_shape = image.get_shape().with_rank_at_least(2) except ValueError: - raise ValueError('A grayscale image must be at least three-dimensional.') + raise ValueError('A grayscale image must be at least two-dimensional.') if require_static and not image_shape.is_fully_defined(): raise ValueError('\'image\' must be fully defined.') if image_shape.is_fully_defined(): @@ -302,7 +299,7 @@ def _CheckGrayscaleImage(image, require_static=True): check_ops.assert_greater_equal( array_ops.rank(image), 3, - message='A grayscale image must be at least three-dimensional.') + message='A grayscale image must be at least two-dimensional.') ] else: return [] @@ -468,6 +465,7 @@ def _flip(image, flip_index, scope_name): image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor of shape `[height, width, channels]`. flip_index: 0 For vertical, 1 for horizontal. + scope_name: string, scope name. Returns: A tensor of the same type and shape as `image`. @@ -2168,7 +2166,7 @@ def decode_image(contents, expand_animations: Controls the shape of the returned op's output. If `True`, the returned op will produce a 3-D tensor for PNG, JPEG, and BMP files; and a 4-D tensor for all GIFs, whether animated or not. - If, `False`, the returned op will produce a 3-D tensor for all file + If, `False`, the returned op will produce a 3-D tensor for all file types and will truncate animated GIFs to the first frame. Returns: ",0,train 39357a6f3d6cc75bb8eee002bd7c05b117a05968,tensorflow/tensorflow,"Allow grayscale images to be 2D, either [H, W] or [H, 1] PiperOrigin-RevId: 248224634",image_ops_test.py,"@@ -234,13 +234,13 @@ class GrayscaleToRGBTest(test_util.TensorFlowTestCase): # tests if an exception is raised if a two dimensional # input is used, i.e. the images have shape [height, width] with self.cached_session(use_gpu=True): - # 2-D input without batch dimension. - x_np = np.array([[1, 2]], dtype=np.uint8).reshape([1, 2]) + # 1-D input without batch dimension. + x_np = np.array([[1, 2]], dtype=np.uint8).reshape([2]) x_tf = constant_op.constant(x_np, shape=x_np.shape) # this is the error message we expect the function to raise - err_msg = ""A grayscale image must be at least three-dimensional"" + err_msg = ""A grayscale image must be at least two-dimensional"" with self.assertRaisesRegexp(ValueError, err_msg): image_ops.grayscale_to_rgb(x_tf) @@ -4553,11 +4553,11 @@ class NonMaxSuppressionWithOverlapsTest(test_util.TensorFlowTestCase): [0.2, 0.0, 1.0], ] scores_np = [0.7, 0.9, 0.1] - max_ouput_size_np = 3 + max_output_size_np = 3 overlaps = constant_op.constant(overlaps_np) scores = constant_op.constant(scores_np) - max_output_size = constant_op.constant(max_ouput_size_np) + max_output_size = constant_op.constant(max_output_size_np) overlap_threshold = 0.6 score_threshold = 0.4 ",0,train 42b5d6692bf87c79efbd3fa688ed6c49bcdc6254,tensorflow/tensorflow,"Fix Keras API functional API op cloning inside TPUEstimator. The ops that are being cloned have the TPU attributes added. When the NodeDef is cloned, they continue to have the attribute, so the TPUReplicateContext complains about nesting. Add a private _cloned attribute to indicate that it was cloned by TensorFlowOpLayer. PiperOrigin-RevId: 276175985 Change-Id: I99cd09e74d2c573bc625fa284713858d82529a37",base_layer.py,"@@ -2500,6 +2500,9 @@ class TensorFlowOpLayer(Layer): def _make_node_def(self, graph): node_def = node_def_pb2.NodeDef() node_def.CopyFrom(self.node_def) + # Used in TPUReplicateContext to indicate whether this node has been cloned + # and to not add TPU attributes. + node_def.attr['_cloned'].b = True node_def.name = graph.unique_name(node_def.name) return node_def ",0,train 42b5d6692bf87c79efbd3fa688ed6c49bcdc6254,tensorflow/tensorflow,"Fix Keras API functional API op cloning inside TPUEstimator. The ops that are being cloned have the TPU attributes added. When the NodeDef is cloned, they continue to have the attribute, so the TPUReplicateContext complains about nesting. Add a private _cloned attribute to indicate that it was cloned by TensorFlowOpLayer. PiperOrigin-RevId: 276175985 Change-Id: I99cd09e74d2c573bc625fa284713858d82529a37",base_layer_utils.py,"@@ -28,6 +28,7 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.keras import backend from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_util from tensorflow.python.ops import control_flow_v2_func_graphs from tensorflow.python.ops import init_ops from tensorflow.python.ops import init_ops_v2 @@ -230,10 +231,15 @@ def _create_keras_history_helper(tensors, processed_ops, created_layers): else: # Treat any value not originating from a `keras.Input` as # a constant. Variables cannot be supported. - if (distribution_strategy_context.in_cross_replica_context() and - not ops.executing_eagerly_outside_functions()): + ds_with_session = ( + distribution_strategy_context.in_cross_replica_context() and + not ops.executing_eagerly_outside_functions()) + using_xla = control_flow_util.GraphOrParentsInXlaContext( + ops.get_default_graph()) + if ds_with_session or using_xla: # In Legacy Graph mode, evaluating here makes Session be - # configured improperly. + # configured improperly. The downside of this is that saving + # via `get_config` breaks, but SavedModel still works. constants[i] = op_input else: with ops.init_scope(): ",0,train 42b5d6692bf87c79efbd3fa688ed6c49bcdc6254,tensorflow/tensorflow,"Fix Keras API functional API op cloning inside TPUEstimator. The ops that are being cloned have the TPU attributes added. When the NodeDef is cloned, they continue to have the attribute, so the TPUReplicateContext complains about nesting. Add a private _cloned attribute to indicate that it was cloned by TensorFlowOpLayer. PiperOrigin-RevId: 276175985 Change-Id: I99cd09e74d2c573bc625fa284713858d82529a37",tpu.py,"@@ -486,8 +486,13 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext): raise NotImplementedError( ""Non-resource Variables are not supported inside TPU computations "" ""(operator name: %s)"" % op.name) - if _TPU_REPLICATE_ATTR in op.node_def.attr: - raise ValueError(""TPU computations cannot be nested"") + + # TensorFlowOpLayer may clone nodes that are in tpu.rewrite()s. It'll add + # the ""_cloned"" attribute and we should continue in that case. + if (_TPU_REPLICATE_ATTR in op.node_def.attr and + ""_cloned"" not in op.node_def.attr): + raise ValueError(""TPU computations cannot be nested on op (%s)"" % + op) op._set_attr_with_buf( _TPU_REPLICATE_ATTR, self._tpu_relicate_attr_buf._buffer) if self._outside_compilation_cluster: ",0,train 9e78991b5c380b7fba0444685e5c6ef40e3c5b26,tensorflow/tensorflow,"Fix typo in Tensorflow control_flow_ops_py_test. The test would fall back to GPU:0 when unable to find a GPU. This should be CPU. PiperOrigin-RevId: 212649435",control_flow_ops_py_test.py,"@@ -1753,7 +1753,7 @@ class ControlFlowTest(test.TestCase): def _testWhileGrad_ColocateGradients(self, colocate): gpu_dev_name = test.gpu_device_name() if test.is_gpu_available( - ) else ""/device:GPU:0"" + ) else ""/device:CPU:0"" graph = ops.Graph() with graph.as_default(): ",0,train 0f5d0cfc2a738c5605cb23a1975ac4a1ceb11e24,tensorflow/tensorflow,"[tfdbg] Fix gRPC message length limit issue in source remote Fixes https://github.com/tensorflow/tensorboard/issues/1103 PiperOrigin-RevId: 257419107",grpc_debug_server.py,"@@ -346,7 +346,10 @@ class EventListenerBaseServicer(debug_service_pb2_grpc.EventListenerServicer): if self._server_started: raise ValueError(""Server has already started running"") - self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) + no_max_message_sizes = [(""grpc.max_receive_message_length"", -1), + (""grpc.max_send_message_length"", -1)] + self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10), + options=no_max_message_sizes) debug_service_pb2_grpc.add_EventListenerServicer_to_server(self, self.server) self.server.add_insecure_port(""[::]:%d"" % self._server_port) ",0,test 0f5d0cfc2a738c5605cb23a1975ac4a1ceb11e24,tensorflow/tensorflow,"[tfdbg] Fix gRPC message length limit issue in source remote Fixes https://github.com/tensorflow/tensorboard/issues/1103 PiperOrigin-RevId: 257419107",source_remote.py,"@@ -28,7 +28,6 @@ from tensorflow.python.debug.lib import common from tensorflow.python.debug.lib import debug_service_pb2_grpc from tensorflow.python.debug.lib import source_utils from tensorflow.python.platform import gfile -from tensorflow.python.platform import tf_logging from tensorflow.python.profiler import tfprof_logger @@ -96,11 +95,6 @@ def _source_file_paths_outside_tensorflow_py_library(code_defs, id_to_string): return non_tf_files -def grpc_message_length_bytes(): - """"""Maximum gRPC message length in bytes."""""" - return 4 * 1024 * 1024 - - def _send_call_tracebacks(destinations, origin_stack, is_eager_execution=False, @@ -169,20 +163,14 @@ def _send_call_tracebacks(destinations, debugged_source_files.append(source_files) for destination in destinations: - channel = grpc.insecure_channel(destination) + no_max_message_sizes = [(""grpc.max_receive_message_length"", -1), + (""grpc.max_send_message_length"", -1)] + channel = grpc.insecure_channel(destination, options=no_max_message_sizes) stub = debug_service_pb2_grpc.EventListenerStub(channel) stub.SendTracebacks(call_traceback) if send_source: - for path, source_files in zip( - source_file_paths, debugged_source_files): - if source_files.ByteSize() < grpc_message_length_bytes(): - stub.SendSourceFiles(source_files) - else: - tf_logging.warn( - ""The content of the source file at %s is not sent to "" - ""gRPC debug server %s, because the message size exceeds "" - ""gRPC message length limit (%d bytes)."" % ( - path, destination, grpc_message_length_bytes())) + for source_files in debugged_source_files: + stub.SendSourceFiles(source_files) def send_graph_tracebacks(destinations, ",0,test 0f5d0cfc2a738c5605cb23a1975ac4a1ceb11e24,tensorflow/tensorflow,"[tfdbg] Fix gRPC message length limit issue in source remote Fixes https://github.com/tensorflow/tensorboard/issues/1103 PiperOrigin-RevId: 257419107",source_remote_test.py,"@@ -21,6 +21,8 @@ from __future__ import print_function import os import traceback +import grpc + from tensorflow.core.debug import debug_service_pb2 from tensorflow.python.client import session from tensorflow.python.debug.lib import grpc_debug_test_server @@ -129,9 +131,17 @@ class SendTracebacksTest(test_util.TensorFlowTestCase): send_traceback = traceback.extract_stack() send_lineno = line_number_above() - source_remote.send_graph_tracebacks( - [self._server_address, self._server_address_2], - ""dummy_run_key"", send_traceback, sess.graph) + + with test.mock.patch.object( + grpc, ""insecure_channel"", + wraps=grpc.insecure_channel) as mock_grpc_channel: + source_remote.send_graph_tracebacks( + [self._server_address, self._server_address_2], + ""dummy_run_key"", send_traceback, sess.graph) + mock_grpc_channel.assert_called_with( + test.mock.ANY, + options=[(""grpc.max_receive_message_length"", -1), + (""grpc.max_send_message_length"", -1)]) servers = [self._server, self._server_2] for server in servers: @@ -157,51 +167,6 @@ class SendTracebacksTest(test_util.TensorFlowTestCase): self.assertEqual([""dummy_run_key""], server.query_call_keys()) self.assertEqual([sess.graph.version], server.query_graph_versions()) - def testSourceFileSizeExceedsGrpcMessageLengthLimit(self): - """"""In case source file size exceeds the grpc message length limit. - - it ought not to have been sent to the server. - """""" - this_func_name = ""testSourceFileSizeExceedsGrpcMessageLengthLimit"" - - # Patch the method to simulate a very small message length limit. - with test.mock.patch.object( - source_remote, ""grpc_message_length_bytes"", return_value=2): - with session.Session() as sess: - a = variables.Variable(21.0, name=""two/a"") - a_lineno = line_number_above() - b = variables.Variable(2.0, name=""two/b"") - b_lineno = line_number_above() - x = math_ops.add(a, b, name=""two/x"") - x_lineno = line_number_above() - - send_traceback = traceback.extract_stack() - send_lineno = line_number_above() - source_remote.send_graph_tracebacks( - [self._server_address, self._server_address_2], - ""dummy_run_key"", send_traceback, sess.graph) - - servers = [self._server, self._server_2] - for server in servers: - # Even though the source file content is not sent, the traceback - # should have been sent. - tb = server.query_op_traceback(""two/a"") - self.assertIn((self._curr_file_path, a_lineno, this_func_name), tb) - tb = server.query_op_traceback(""two/b"") - self.assertIn((self._curr_file_path, b_lineno, this_func_name), tb) - tb = server.query_op_traceback(""two/x"") - self.assertIn((self._curr_file_path, x_lineno, this_func_name), tb) - - self.assertIn( - (self._curr_file_path, send_lineno, this_func_name), - server.query_origin_stack()[-1]) - - tf_trace_file_path = ( - self._findFirstTraceInsideTensorFlowPyLibrary(x.op)) - # Verify that the source content is not sent to the server. - with self.assertRaises(ValueError): - self._server.query_source_file_line(tf_trace_file_path, 0) - def testSendEagerTracebacksToSingleDebugServer(self): this_func_name = ""testSendEagerTracebacksToSingleDebugServer"" send_traceback = traceback.extract_stack() @@ -213,6 +178,20 @@ class SendTracebacksTest(test_util.TensorFlowTestCase): self.assertIn((self._curr_file_path, send_lineno, this_func_name), self._server.query_origin_stack()[-1]) + def testGRPCServerMessageSizeLimit(self): + """"""Assert gRPC debug server is started with unlimited message size."""""" + with test.mock.patch.object( + grpc, ""server"", wraps=grpc.server) as mock_grpc_server: + (_, _, _, server_thread, + server) = grpc_debug_test_server.start_server_on_separate_thread( + poll_server=True) + mock_grpc_server.assert_called_with( + test.mock.ANY, + options=[(""grpc.max_receive_message_length"", -1), + (""grpc.max_send_message_length"", -1)]) + server.stop_server().wait() + server_thread.join() + if __name__ == ""__main__"": googletest.main() ",0,test 47ea7eeb96bd9f46ab1f6a7bfaf0ab8f98ad2e42,tensorflow/tensorflow,"Minor updates to flatbuffer utilities PiperOrigin-RevId: 307732210 Change-Id: I6b97ccdff0323dbf0fd20fc20d6bc7e49d5e08ad",flatbuffer_utils_test.py,"@@ -31,7 +31,7 @@ class WriteReadModelTest(test_util.TensorFlowTestCase): def testWriteReadModel(self): # 1. SETUP # Define the initial model - initial_model = test_utils.build_mock_model_python_object() + initial_model = test_utils.build_mock_model() # Define temporary files tmp_dir = self.get_temp_dir() model_filename = os.path.join(tmp_dir, 'model.tflite') @@ -76,7 +76,7 @@ class StripStringsTest(test_util.TensorFlowTestCase): def testStripStrings(self): # 1. SETUP # Define the initial model - initial_model = test_utils.build_mock_model_python_object() + initial_model = test_utils.build_mock_model() final_model = copy.deepcopy(initial_model) # 2. INVOKE @@ -121,7 +121,7 @@ class RandomizeWeightsTest(test_util.TensorFlowTestCase): def testRandomizeWeights(self): # 1. SETUP # Define the initial model - initial_model = test_utils.build_mock_model_python_object() + initial_model = test_utils.build_mock_model() final_model = copy.deepcopy(initial_model) # 2. INVOKE ",0,train 47ea7eeb96bd9f46ab1f6a7bfaf0ab8f98ad2e42,tensorflow/tensorflow,"Minor updates to flatbuffer utilities PiperOrigin-RevId: 307732210 Change-Id: I6b97ccdff0323dbf0fd20fc20d6bc7e49d5e08ad",test_utils.py,"@@ -14,7 +14,7 @@ # ============================================================================== """"""Utility functions that support testing. -All functions that can be commonly used by various tests are in this file. +All functions that can be commonly used by various tests. """""" from __future__ import absolute_import @@ -25,7 +25,7 @@ from flatbuffers.python import flatbuffers from tensorflow.lite.python import schema_py_generated as schema_fb -def build_mock_model(): +def build_mock_flatbuffer_model(): """"""Creates a flatbuffer containing an example model."""""" builder = flatbuffers.Builder(1024) @@ -205,10 +205,14 @@ def build_mock_model(): return model -def build_mock_model_python_object(): - """"""Creates a python flatbuffer object containing an example model."""""" - model_mock = build_mock_model() - model_obj = schema_fb.Model.GetRootAsModel(model_mock, 0) - model = schema_fb.ModelT.InitFromObj(model_obj) - +def load_model_from_flatbuffer(flatbuffer_model): + """"""Loads a model as a python object from a flatbuffer model."""""" + model = schema_fb.Model.GetRootAsModel(flatbuffer_model, 0) + model = schema_fb.ModelT.InitFromObj(model) return model + + +def build_mock_model(): + """"""Creates an object containing an example model."""""" + model = build_mock_flatbuffer_model() + return load_model_from_flatbuffer(model) ",0,train 47ea7eeb96bd9f46ab1f6a7bfaf0ab8f98ad2e42,tensorflow/tensorflow,"Minor updates to flatbuffer utilities PiperOrigin-RevId: 307732210 Change-Id: I6b97ccdff0323dbf0fd20fc20d6bc7e49d5e08ad",visualize_test.py,"@@ -35,8 +35,8 @@ class VisualizeTest(test_util.TensorFlowTestCase): self.assertEqual('HASHTABLE_LOOKUP', visualize.BuiltinCodeToName(10)) def testFlatbufferToDict(self): - model_data = test_utils.build_mock_model() - model_dict = visualize.CreateDictFromFlatbuffer(model_data) + model = test_utils.build_mock_flatbuffer_model() + model_dict = visualize.CreateDictFromFlatbuffer(model) self.assertEqual(0, model_dict['version']) self.assertEqual(1, len(model_dict['subgraphs'])) self.assertEqual(1, len(model_dict['operator_codes'])) @@ -45,12 +45,11 @@ class VisualizeTest(test_util.TensorFlowTestCase): self.assertEqual(0, model_dict['subgraphs'][0]['tensors'][0]['buffer']) def testVisualize(self): - model_data = test_utils.build_mock_model() - + model = test_utils.build_mock_flatbuffer_model() tmp_dir = self.get_temp_dir() model_filename = os.path.join(tmp_dir, 'model.tflite') with open(model_filename, 'wb') as model_file: - model_file.write(model_data) + model_file.write(model) html_filename = os.path.join(tmp_dir, 'visualization.html') visualize.CreateHtmlFile(model_filename, html_filename) ",0,train b08e6cd85aba13749accab67f9f94f00621ecb9c,tensorflow/tensorflow,"Remove use of gtl/cleanup.h and hash.h in cupti_tracer PiperOrigin-RevId: 310582340 Change-Id: I1c087a126947c1eced849deb3f8a5689ebe08d08",cupti_tracer.cc,"@@ -18,8 +18,6 @@ limitations under the License. #include ""absl/container/flat_hash_map.h"" #include ""absl/container/node_hash_map.h"" #include ""tensorflow/core/lib/core/errors.h"" -#include ""tensorflow/core/lib/gtl/cleanup.h"" -#include ""tensorflow/core/lib/hash/hash.h"" #include ""tensorflow/core/platform/env.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/macros.h"" @@ -286,19 +284,14 @@ void CUPTIAPI FreeCuptiActivityBuffer(CUcontext context, uint32_t stream_id, << reinterpret_cast(buffer) << std::dec << "" size: "" << size << "" valid_size: "" << valid_size; - // Ensure buffer is free when this function returns. - auto buffer_cleanup = - gtl::MakeCleanup([buffer] { port::AlignedFree(buffer); }); + if (valid_size > 0) { + VLOG(3) << ""Activity profile for stream "" << stream_id; - if (valid_size <= 0) { - return; + CuptiTracer *cupti_tracer = CuptiTracer::GetCuptiTracerSingleton(); + cupti_tracer->ProcessActivityBuffer(context, stream_id, buffer, valid_size) + .IgnoreError(); } - - VLOG(3) << ""Activity profile for stream "" << stream_id; - - CuptiTracer *cupti_tracer = CuptiTracer::GetCuptiTracerSingleton(); - cupti_tracer->ProcessActivityBuffer(context, stream_id, buffer, valid_size) - .IgnoreError(); + port::AlignedFree(buffer); } void AddKernelEventUponApiExit(CuptiTraceCollector *collector, uint32 device_id, @@ -984,7 +977,7 @@ class CudaEventRecorder { using StreamKey = std::pair; absl::node_hash_map context_infos_; - absl::flat_hash_map> stream_infos_; + absl::flat_hash_map stream_infos_; }; // This hook uses cuda events to measure device side activities. ",0,train 3492b4307ba62b54eca66a64edbcfdbade005d8d,tensorflow/tensorflow,"allow (different) sparse types in unary eltwise ops PiperOrigin-RevId: 436568879",hlo_ops.h,"@@ -84,6 +84,10 @@ class TokenType : public Type::TypeBase { using Base::Base; }; +// Returns the type, but without any sparsity encoding. Used to +// strip sparsity properties of tensor types before is-same tests. +Type getTypeWithoutSparseEncoding(Type tp); + // Shape derivation function that computes the shape of the result based on an // operand. For a 2-dimensional input tensor, this produces IR of the form // ",0,train 3492b4307ba62b54eca66a64edbcfdbade005d8d,tensorflow/tensorflow,"allow (different) sparse types in unary eltwise ops PiperOrigin-RevId: 436568879",hlo_ops.cc,"@@ -50,6 +50,7 @@ limitations under the License. #include ""mlir-hlo/utils/hlo_utils.h"" #include ""mlir/Dialect/Arithmetic/IR/Arithmetic.h"" #include ""mlir/Dialect/Shape/IR/Shape.h"" +#include ""mlir/Dialect/SparseTensor/IR/SparseTensor.h"" #include ""mlir/Dialect/Tensor/IR/Tensor.h"" #include ""mlir/IR/Attributes.h"" #include ""mlir/IR/Builders.h"" @@ -7501,6 +7502,18 @@ static LogicalResult VerifyArgResultAliasAttr(StringAttr attr_name, return success(); } +//===----------------------------------------------------------------------===// +// Type utilities for ignoring sparsity encoding +//===----------------------------------------------------------------------===// + +Type getTypeWithoutSparseEncoding(Type tp) { + if (sparse_tensor::getSparseTensorEncoding(tp)) { + auto rtp = tp.dyn_cast(); + tp = RankedTensorType::get(rtp.getShape(), rtp.getElementType()); + } + return tp; +} + //===----------------------------------------------------------------------===// // Shape inference //===----------------------------------------------------------------------===// ",0,train 924a8f24a9b8b8a3b1a561123f3e4cf9ebe91708,tensorflow/tensorflow,"Fix the version string in setup.py. The PR seemed to miss it. PiperOrigin-RevId: 205443195",setup.py,"@@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n') # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.9.0-rc0' +_VERSION = '1.9.0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', ",0,train b42e59a82275a8a630c6f91621628a8c578f132d,tensorflow/tensorflow,"Do not panic on type annotations in python3 for layers / functions passed to TF. inspect.getargspec raises errors if they are present but getfullargspec is perfectly happy to let functions with type annotations pass. PiperOrigin-RevId: 207127930",backprop.py,"@@ -276,7 +276,7 @@ def implicit_grad(f): def _get_arg_spec(f, params, param_args): """"""The positions of the parameters of f to be differentiated in param_args."""""" try: - args = tf_inspect.getargspec(f).args + args = tf_inspect.getfullargspec(f).args except TypeError as e: # TypeError can happen when f is a callable object. if params is None: ",0,train b42e59a82275a8a630c6f91621628a8c578f132d,tensorflow/tensorflow,"Do not panic on type annotations in python3 for layers / functions passed to TF. inspect.getargspec raises errors if they are present but getfullargspec is perfectly happy to let functions with type annotations pass. PiperOrigin-RevId: 207127930",graph_callable.py,"@@ -288,7 +288,7 @@ def _graph_callable_internal(func, shape_and_dtypes): with tmp_graph.as_default(): # Placeholders for the non-variable inputs. func_inputs = _get_graph_callable_inputs(shape_and_dtypes) - func_num_args = len(tf_inspect.getargspec(func).args) + func_num_args = len(tf_inspect.getfullargspec(func).args) if len(func_inputs) != func_num_args: raise TypeError(""The number of arguments accepted by the decorated "" ""function `%s` (%d) must match the number of "" ",0,train b42e59a82275a8a630c6f91621628a8c578f132d,tensorflow/tensorflow,"Do not panic on type annotations in python3 for layers / functions passed to TF. inspect.getargspec raises errors if they are present but getfullargspec is perfectly happy to let functions with type annotations pass. PiperOrigin-RevId: 207127930",backend.py,"@@ -2935,8 +2935,8 @@ def function(inputs, outputs, updates=None, **kwargs): """""" if kwargs: for key in kwargs: - if (key not in tf_inspect.getargspec(session_module.Session.run)[0] and - key not in tf_inspect.getargspec(Function.__init__)[0]): + if (key not in tf_inspect.getfullargspec(session_module.Session.run)[0] + and key not in tf_inspect.getfullargspec(Function.__init__)[0]): msg = ('Invalid argument ""%s"" passed to K.function with TensorFlow ' 'backend') % key raise ValueError(msg) ",0,train b42e59a82275a8a630c6f91621628a8c578f132d,tensorflow/tensorflow,"Do not panic on type annotations in python3 for layers / functions passed to TF. inspect.getargspec raises errors if they are present but getfullargspec is perfectly happy to let functions with type annotations pass. PiperOrigin-RevId: 207127930",base_layer.py,"@@ -175,7 +175,7 @@ class Layer(checkpointable.CheckpointableBase): self.supports_masking = False - call_argspec = tf_inspect.getargspec(self.call) + call_argspec = tf_inspect.getfullargspec(self.call) if 'training' in call_argspec.args: self._expects_training_arg = True else: @@ -904,7 +904,7 @@ class Layer(checkpointable.CheckpointableBase): assert len(call_args) == 1 # TypeError raised earlier in __call__. return call_args[0], call_kwargs else: - call_arg_spec = tf_inspect.getargspec(self.call) + call_arg_spec = tf_inspect.getfullargspec(self.call) # There is no explicit ""inputs"" argument expected or provided to # call(). Arguments which have default values are considered non-inputs, # and arguments without are considered inputs. @@ -924,8 +924,8 @@ class Layer(checkpointable.CheckpointableBase): _, unwrapped_call = tf_decorator.unwrap(self.call) bound_args = inspect.getcallargs( unwrapped_call, *call_args, **call_kwargs) - if call_arg_spec.keywords is not None: - var_kwargs = bound_args.pop(call_arg_spec.keywords) + if call_arg_spec.varkw is not None: + var_kwargs = bound_args.pop(call_arg_spec.varkw) bound_args.update(var_kwargs) keyword_arg_names = keyword_arg_names.union(var_kwargs.keys()) all_args = call_arg_spec.args ",0,train b42e59a82275a8a630c6f91621628a8c578f132d,tensorflow/tensorflow,"Do not panic on type annotations in python3 for layers / functions passed to TF. inspect.getargspec raises errors if they are present but getfullargspec is perfectly happy to let functions with type annotations pass. PiperOrigin-RevId: 207127930",network.py,"@@ -215,7 +215,7 @@ class Network(base_layer.Layer): self._base_init(name=name) self._compute_previous_mask = ( - 'mask' in tf_inspect.getargspec(self.call).args or + 'mask' in tf_inspect.getfullargspec(self.call).args or hasattr(self, 'compute_mask')) # A Network does not create weights of its own, thus it is already # built. @@ -309,7 +309,7 @@ class Network(base_layer.Layer): def _init_subclassed_network(self, name=None): self._base_init(name=name) self._is_graph_network = False - call_argspec = tf_inspect.getargspec(self.call) + call_argspec = tf_inspect.getfullargspec(self.call) if 'training' in call_argspec.args: self._expects_training_arg = True else: @@ -788,7 +788,7 @@ class Network(base_layer.Layer): x = base_layer.generate_placeholders_from_shape(input_shape) kwargs = {} - num_call_args = len(tf_inspect.getargspec(self.call).args) + num_call_args = len(tf_inspect.getfullargspec(self.call).args) if self._expects_training_arg and num_call_args == 3: # Has call signature of call(self, input, training) kwargs['training'] = False @@ -1035,9 +1035,9 @@ class Network(base_layer.Layer): if len(computed_data) == 1: computed_tensor, computed_mask = computed_data[0] # Ensure mask propagation if applicable. - if 'mask' in tf_inspect.getargspec(layer.call).args: + if 'mask' in tf_inspect.getfullargspec(layer.call).args: kwargs.setdefault('mask', computed_mask) - if 'training' in tf_inspect.getargspec(layer.call).args: + if 'training' in tf_inspect.getfullargspec(layer.call).args: kwargs.setdefault('training', training) output_tensors = nest.flatten( @@ -1055,9 +1055,9 @@ class Network(base_layer.Layer): else: computed_tensors = [x[0] for x in computed_data] computed_masks = [x[1] for x in computed_data] - if 'mask' in tf_inspect.getargspec(layer.call).args: + if 'mask' in tf_inspect.getfullargspec(layer.call).args: kwargs.setdefault('mask', computed_masks) - if 'training' in tf_inspect.getargspec(layer.call).args: + if 'training' in tf_inspect.getfullargspec(layer.call).args: kwargs.setdefault('training', training) output_tensors = nest.flatten( ",0,train b42e59a82275a8a630c6f91621628a8c578f132d,tensorflow/tensorflow,"Do not panic on type annotations in python3 for layers / functions passed to TF. inspect.getargspec raises errors if they are present but getfullargspec is perfectly happy to let functions with type annotations pass. PiperOrigin-RevId: 207127930",generic_utils.py,"@@ -162,7 +162,7 @@ def deserialize_keras_object(identifier, if cls is None: raise ValueError('Unknown ' + printable_module_name + ': ' + class_name) if hasattr(cls, 'from_config'): - arg_spec = tf_inspect.getargspec(cls.from_config) + arg_spec = tf_inspect.getfullargspec(cls.from_config) custom_objects = custom_objects or {} if 'custom_objects' in arg_spec.args: @@ -281,8 +281,8 @@ def has_arg(fn, name, accept_all=False): Returns: bool, whether `fn` accepts a `name` keyword argument. """""" - arg_spec = tf_inspect.getargspec(fn) - if accept_all and arg_spec.keywords is not None: + arg_spec = tf_inspect.getfullargspec(fn) + if accept_all and arg_spec.varkw is not None: return True return name in arg_spec.args ",0,train b42e59a82275a8a630c6f91621628a8c578f132d,tensorflow/tensorflow,"Do not panic on type annotations in python3 for layers / functions passed to TF. inspect.getargspec raises errors if they are present but getfullargspec is perfectly happy to let functions with type annotations pass. PiperOrigin-RevId: 207127930",custom_gradient.py,"@@ -142,9 +142,9 @@ def _graph_mode_decorator(f, *args, **kwargs): # The variables that grad_fn needs to return gradients for are the set of # variables used that are *not* part of the inputs. variables = list(set(tape.watched_variables()) - set(args)) - grad_argspec = tf_inspect.getargspec(grad_fn) + grad_argspec = tf_inspect.getfullargspec(grad_fn) variables_in_signature = (""variables"" in grad_argspec.args or - grad_argspec.keywords) + grad_argspec.varkw) if variables and not variables_in_signature: raise TypeError(""If using @custom_gradient with a function that "" ""uses variables, then grad_fn must accept a keyword "" @@ -194,9 +194,9 @@ def _eager_mode_decorator(f, *args, **kwargs): # The variables that grad_fn needs to return gradients for are the set of # variables used that are *not* part of the inputs. variables = [v for v in set(tape.watched_variables()) if v not in all_inputs] - grad_argspec = tf_inspect.getargspec(grad_fn) - if (variables and - not (""variables"" in grad_argspec.args or grad_argspec.keywords)): + grad_argspec = tf_inspect.getfullargspec(grad_fn) + if (variables and (""variables"" not in grad_argspec.args) and + not grad_argspec.varkw): raise TypeError(""If using @custom_gradient with a function that "" ""uses variables, then grad_fn must accept a keyword "" ""argument 'variables'."") ",0,train b42e59a82275a8a630c6f91621628a8c578f132d,tensorflow/tensorflow,"Do not panic on type annotations in python3 for layers / functions passed to TF. inspect.getargspec raises errors if they are present but getfullargspec is perfectly happy to let functions with type annotations pass. PiperOrigin-RevId: 207127930",deprecation.py,"@@ -388,7 +388,7 @@ def deprecated_args(date, instructions, *deprecated_arg_names_or_tuples, Args: names_to_ok_vals: dict from string arg_name to a list of values, possibly empty, which should not elicit a warning. - arg_spec: Output from tf_inspect.getargspec on the called function. + arg_spec: Output from tf_inspect.getfullargspec on the called function. Returns: Dictionary from arg_name to DeprecatedArgSpec. @@ -408,16 +408,16 @@ def deprecated_args(date, instructions, *deprecated_arg_names_or_tuples, decorator_utils.validate_callable(func, 'deprecated_args') deprecated_arg_names = _get_arg_names_to_ok_vals() - arg_spec = tf_inspect.getargspec(func) + arg_spec = tf_inspect.getfullargspec(func) deprecated_positions = _get_deprecated_positional_arguments( deprecated_arg_names, arg_spec) is_varargs_deprecated = arg_spec.varargs in deprecated_arg_names - is_kwargs_deprecated = arg_spec.keywords in deprecated_arg_names + is_kwargs_deprecated = arg_spec.varkw in deprecated_arg_names if (len(deprecated_positions) + is_varargs_deprecated + is_kwargs_deprecated != len(deprecated_arg_names_or_tuples)): - known_args = arg_spec.args + [arg_spec.varargs, arg_spec.keywords] + known_args = arg_spec.args + [arg_spec.varargs, arg_spec.varkw] missing_args = [arg_name for arg_name in deprecated_arg_names if arg_name not in known_args] raise ValueError('The following deprecated arguments are not present ' @@ -467,7 +467,7 @@ def deprecated_args(date, instructions, *deprecated_arg_names_or_tuples, if is_varargs_deprecated and len(args) > len(arg_spec.args): invalid_args.append(arg_spec.varargs) if is_kwargs_deprecated and kwargs: - invalid_args.append(arg_spec.keywords) + invalid_args.append(arg_spec.varkw) for arg_name in deprecated_arg_names: if (arg_name in kwargs and not (deprecated_positions[arg_name].has_ok_value and ",0,train b543d891faf7283b3a7342aa89ecb8ff9d44629a,tensorflow/tensorflow,"Make summary names for linear models unique Change: 130962334",models.py,"@@ -81,8 +81,9 @@ def linear_regression(x, y, init_mean=None, init_stddev=1.0): uniform_unit_scaling_initialzer will be used. """""" with vs.variable_scope('linear_regression'): - logging_ops.histogram_summary('linear_regression.x', x) - logging_ops.histogram_summary('linear_regression.y', y) + scope_name = vs.get_variable_scope().name + logging_ops.histogram_summary('%s.x' % scope_name, x) + logging_ops.histogram_summary('%s.y' % scope_name, y) dtype = x.dtype.base_dtype y_shape = y.get_shape() if len(y_shape) == 1: @@ -103,8 +104,8 @@ def linear_regression(x, y, init_mean=None, init_stddev=1.0): initializer=init_ops.random_normal_initializer( init_mean, init_stddev, dtype=dtype), dtype=dtype) - logging_ops.histogram_summary('linear_regression.weights', weights) - logging_ops.histogram_summary('linear_regression.bias', bias) + logging_ops.histogram_summary('%s.weights' % scope_name, weights) + logging_ops.histogram_summary('%s.bias' % scope_name, bias) return losses_ops.mean_squared_error_regressor(x, y, weights, bias) @@ -139,8 +140,9 @@ def logistic_regression(x, uniform_unit_scaling_initialzer will be used. """""" with vs.variable_scope('logistic_regression'): - logging_ops.histogram_summary('%s.x' % vs.get_variable_scope().name, x) - logging_ops.histogram_summary('%s.y' % vs.get_variable_scope().name, y) + scope_name = vs.get_variable_scope().name + logging_ops.histogram_summary('%s.x' % scope_name, x) + logging_ops.histogram_summary('%s.y' % scope_name, y) dtype = x.dtype.base_dtype # Set up the requested initialization. if init_mean is None: @@ -157,10 +159,8 @@ def logistic_regression(x, initializer=init_ops.random_normal_initializer( init_mean, init_stddev, dtype=dtype), dtype=dtype) - logging_ops.histogram_summary('%s.weights' % vs.get_variable_scope().name, - weights) - logging_ops.histogram_summary('%s.bias' % vs.get_variable_scope().name, - bias) + logging_ops.histogram_summary('%s.weights' % scope_name, weights) + logging_ops.histogram_summary('%s.bias' % scope_name, bias) # If no class weight provided, try to retrieve one from pre-defined # tensor name in the graph. if not class_weight: ",0,train 4c960c9c2c54a2f3130af4de46805cf27c616126,tensorflow/tensorflow,"[tf.data] Update benchmarks to reflect moving the `AUTOTUNE` constant from `optimization` to `dataset_ops`. PiperOrigin-RevId: 251289847",autotune_benchmark.py,"@@ -22,7 +22,6 @@ import time import numpy as np from tensorflow.python.client import session -from tensorflow.python.data.experimental.ops import optimization from tensorflow.python.data.ops import dataset_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import test @@ -42,7 +41,7 @@ class AutotuneBenchmark(test.Benchmark): np.random.rand(4 * k, 1))).repeat() dataset = dataset.map( - math_ops.matmul, num_parallel_calls=optimization.AUTOTUNE) + math_ops.matmul, num_parallel_calls=dataset_ops.AUTOTUNE) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.autotune = autotune @@ -78,7 +77,7 @@ class AutotuneBenchmark(test.Benchmark): np.random.rand(4 * k, 1))).repeat() dataset = dataset.map( - math_ops.matmul, num_parallel_calls=optimization.AUTOTUNE) + math_ops.matmul, num_parallel_calls=dataset_ops.AUTOTUNE) dataset = dataset.batch(batch_size=batch_size) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False @@ -118,7 +117,7 @@ class AutotuneBenchmark(test.Benchmark): dataset = dataset_ops.Dataset.range(1).repeat().interleave( lambda _: dataset, cycle_length=10, - num_parallel_calls=optimization.AUTOTUNE) + num_parallel_calls=dataset_ops.AUTOTUNE) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.autotune = autotune @@ -164,21 +163,21 @@ class AutotuneBenchmark(test.Benchmark): return a, math_ops.matmul(x, y) dataset = dataset_a - dataset = dataset.map(f1, num_parallel_calls=optimization.AUTOTUNE) + dataset = dataset.map(f1, num_parallel_calls=dataset_ops.AUTOTUNE) dataset = dataset_ops.Dataset.range(1).repeat().interleave( lambda _: dataset, - num_parallel_calls=optimization.AUTOTUNE, + num_parallel_calls=dataset_ops.AUTOTUNE, cycle_length=2) dataset = dataset_ops.Dataset.zip((dataset, dataset_b)) - dataset = dataset.map(f2, num_parallel_calls=optimization.AUTOTUNE) + dataset = dataset.map(f2, num_parallel_calls=dataset_ops.AUTOTUNE) dataset = dataset_ops.Dataset.range(1).repeat().interleave( lambda _: dataset, - num_parallel_calls=optimization.AUTOTUNE, + num_parallel_calls=dataset_ops.AUTOTUNE, cycle_length=2) dataset = dataset_ops.Dataset.zip((dataset, dataset_c)) - dataset = dataset.map(f2, num_parallel_calls=optimization.AUTOTUNE) + dataset = dataset.map(f2, num_parallel_calls=dataset_ops.AUTOTUNE) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.autotune = autotune ",0,train 4c960c9c2c54a2f3130af4de46805cf27c616126,tensorflow/tensorflow,"[tf.data] Update benchmarks to reflect moving the `AUTOTUNE` constant from `optimization` to `dataset_ops`. PiperOrigin-RevId: 251289847",parallel_interleave_benchmark.py,"@@ -23,7 +23,6 @@ import numpy as np from tensorflow.python.client import session from tensorflow.python.data.experimental.ops import interleave_ops -from tensorflow.python.data.experimental.ops import optimization from tensorflow.python.data.experimental.ops import sleep from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import ops @@ -96,7 +95,7 @@ class ParallelInterleaveBenchmark(test.Benchmark): def dataset_fn(): return dataset_ops.Dataset.range(1).repeat().interleave( _make_fake_dataset_fn(), - cycle_length=10, num_parallel_calls=optimization.AUTOTUNE) + cycle_length=10, num_parallel_calls=dataset_ops.AUTOTUNE) self._benchmark(dataset_fn=dataset_fn, iters=100, num_elements=1000) ",0,train 6b031486f84e66f112231a75201d521829b389c3,tensorflow/tensorflow,Move header to comply with formatting standard,hlo_verifier_test.cc,"@@ -18,6 +18,7 @@ limitations under the License. #include #include +#include ""absl/strings/str_replace.h"" #include ""tensorflow/compiler/xla/service/hlo_computation.h"" #include ""tensorflow/compiler/xla/service/hlo_instruction.h"" #include ""tensorflow/compiler/xla/service/hlo_module_config.h"" @@ -32,8 +33,6 @@ limitations under the License. #include ""tensorflow/compiler/xla/xla_data.pb.h"" #include ""tensorflow/core/lib/core/status_test_util.h"" -#include ""absl/strings/str_replace.h"" - namespace xla { namespace { ",0,train d3d9dc68ec625ed853b6356757210b063302f396,tensorflow/tensorflow,"internal change PiperOrigin-RevId: 183333411",capture_tpu_profile.cc,"@@ -47,12 +47,14 @@ string GetCurrentTimeStampAsString() { return s; } -ProfileResponse Profile(const string& service_addr, int duration_ms) { +ProfileResponse Profile(const string& service_addr, int duration_ms, + const ProfileOptions& opts) { ProfileRequest request; request.set_duration_ms(duration_ms); request.set_max_events(kMaxEvents); request.add_tools(""input_pipeline""); request.add_tools(""overview_page""); + *request.mutable_opts() = opts; std::cout << ""Limiting the number of trace events to "" << kMaxEvents << std::endl; ::grpc::ClientContext context; @@ -76,6 +78,7 @@ int main(int argc, char** argv) { tensorflow::string FLAGS_service_addr; tensorflow::string FLAGS_logdir; int FLAGS_duration_ms = 2000; + bool FLAGS_include_dataset_ops = true; std::vector flag_list = { tensorflow::Flag(""service_addr"", &FLAGS_service_addr, ""Address of TPU profiler service e.g. localhost:8466""), @@ -83,6 +86,8 @@ int main(int argc, char** argv) { ""Path of TensorBoard log directory e.g. /tmp/tb_log""), tensorflow::Flag(""duration_ms"", &FLAGS_duration_ms, ""Duration of tracing in ms. Default is 2000ms.""), + tensorflow::Flag(""include_dataset_ops"", &FLAGS_include_dataset_ops, + ""Set to false to profile longer TPU device traces.""), }; std::cout << ""Welcome to the Cloud TPU Profiler v"" << TPU_PROFILER_VERSION @@ -97,8 +102,10 @@ int main(int argc, char** argv) { tensorflow::port::InitMain(argv[0], &argc, &argv); int duration_ms = FLAGS_duration_ms; + tensorflow::ProfileOptions opts; + opts.set_include_dataset_ops(FLAGS_include_dataset_ops); tensorflow::ProfileResponse response = - tensorflow::tpu::Profile(FLAGS_service_addr, duration_ms); + tensorflow::tpu::Profile(FLAGS_service_addr, duration_ms, opts); // Use the current timestamp as the run name. tensorflow::string run = tensorflow::tpu::GetCurrentTimeStampAsString(); TF_CHECK_OK(tensorflow::tpu::WriteTensorboardTPUProfile( ",0,train 34709045c9cabf6e88a069ecbb1046d054b9d0aa,tensorflow/tensorflow,"Propagate static shape information in tf.boolean_mask Change: 122184973",array_ops_test.py,"@@ -31,14 +31,12 @@ from tensorflow.python.platform import googletest class BooleanMaskTest(test_util.TensorFlowTestCase): - def CheckVersusNumpy(self, ndims_mask, arr_shape): + def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None): """"""Check equivalence between boolean_mask and numpy masking."""""" - arr_size = arr_shape.prod() - arr = np.random.rand(arr_size).reshape(arr_shape) - mask_shape = arr_shape[: ndims_mask] - mask_size = mask_shape.prod() - mask = np.random.randint( - 0, 2, size=mask_size).reshape(mask_shape).astype(bool) + if make_mask is None: + make_mask = lambda shape: np.random.randint(0, 2, size=shape).astype(bool) + arr = np.random.rand(*arr_shape) + mask = make_mask(arr_shape[: ndims_mask]) masked_arr = arr[mask] with self.test_session(): masked_tensor = array_ops.boolean_mask(arr, mask) @@ -47,6 +45,12 @@ class BooleanMaskTest(test_util.TensorFlowTestCase): masked_tensor.eval(), err_msg=""masked_arr:\n%s\n\nmasked_tensor:\n%s"" % ( masked_arr, masked_tensor.eval())) + masked_tensor.get_shape().assert_is_compatible_with(masked_arr.shape) + self.assertSequenceEqual( + masked_tensor.get_shape()[1:].as_list(), + masked_arr.shape[1:], + msg=""shape information lost %s -> %s"" % ( + masked_arr.shape, masked_tensor.get_shape())) def testOneDimensionalMask(self): # Do 1d separately because it's the only easy one to debug! @@ -63,6 +67,14 @@ class BooleanMaskTest(test_util.TensorFlowTestCase): arr_shape = np.random.randint(1, 5, size=ndims_arr) self.CheckVersusNumpy(ndims_mask, arr_shape) + def testEmptyOutput(self): + make_mask = lambda shape: np.zeros(shape, dtype=bool) + for ndims_mask in range(1, 4): + for ndims_arr in range(ndims_mask, ndims_mask + 3): + for _ in range(3): + arr_shape = np.random.randint(1, 5, size=ndims_arr) + self.CheckVersusNumpy(ndims_mask, arr_shape, make_mask=make_mask) + def testWorksWithDimensionsEqualToNoneDuringGraphBuild(self): # The rank of the mask tensor must be specified. This is explained # in the docstring as well. ",0,train 34709045c9cabf6e88a069ecbb1046d054b9d0aa,tensorflow/tensorflow,"Propagate static shape information in tf.boolean_mask Change: 122184973",array_ops.py,"@@ -453,6 +453,11 @@ def boolean_mask(tensor, mask, name=""boolean_mask""): shape_tensor[:ndims_mask].assert_is_compatible_with(shape_mask) tensor = reshape(tensor, concat(0, [[-1], shape(tensor)[ndims_mask:]])) + first_dim = shape_tensor[:ndims_mask].num_elements() + tensor.set_shape( + tensor_shape.as_shape([first_dim]) + .concatenate(shape_tensor[ndims_mask:])) + mask = reshape(mask, [-1]) return _apply_mask_1d(tensor, mask) ",0,train 1943c55f6c6b25f0eef5359914fc1285f828f05c,tensorflow/tensorflow,"Introduce GetFirstResultType() for a pattern This is to facilitate a change to happen in MLIR declarative rewrite rules: a captured operation will change from Operation* to Value* if it just has one result. PiperOrigin-RevId: 262220172",prepare_tf.cc,"@@ -65,6 +65,18 @@ namespace TFL { // pass. namespace { +// Returns the first result type of the given `op`. +Type GetFirstResultType(Operation *op) { return *op->result_type_begin(); } +// TODO(antiagainst): We need overload functions of the above to facilitate +// changes brought by declarative rewrite rules. Remove this post variadic +// operand support is improved. +// NOLINTNEXTLINE +Type GetFirstResultType(TF::TransposeOp op) { return op.getType(); } +// NOLINTNEXTLINE +Type GetFirstResultType(TF::ReshapeOp op) { return op.getType(); } +// NOLINTNEXTLINE +Type GetFirstResultType(Value *val) { return val->getType(); } + // Prepare TF operations in functions for subsequent legalization. struct PrepareTFPass : public FunctionPass { void runOnFunction() override; ",0,train 00979a1a952045eac6f19f42f87a003fe0a819c8,tensorflow/tensorflow,"iOS Metal delegate: squared diff operation tests added. PiperOrigin-RevId: 272055201",operations.cc,"@@ -163,6 +163,7 @@ OperationType OperationTypeFromString(const std::string& name) { {""softmax"", OperationType::SOFTMAX}, {""sqrt"", OperationType::SQRT}, {""square"", OperationType::SQUARE}, + {""squared_diff"", OperationType::SQUARED_DIFF}, {""subtract"", OperationType::SUB}, {""tanh"", OperationType::TANH}, {""upsample_2d"", OperationType::UPSAMPLE_2D}, ",0,train 2d0887780f7cac362b40936ff282ff0589edb791,tensorflow/tensorflow,"Add legacy init op to SavedModel py. Change: 136619012",builder.py,"@@ -1,4 +1,4 @@ -## Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the ""License""); # you may not use this file except in compliance with the License. @@ -141,7 +141,8 @@ class SavedModelBuilder(object): Args: assets_collection_to_add: The collection where the asset paths are setup. """""" - asset_source_filepath_list = self._save_assets(assets_collection_to_add) + asset_source_filepath_list = self._maybe_save_assets( + assets_collection_to_add) # Return if there are no assets to write. if len(asset_source_filepath_list) is 0: @@ -167,7 +168,22 @@ class SavedModelBuilder(object): tf_logging.info(""Assets written to: %s"", assets_destination_dir) - def _save_assets(self, assets_collection_to_add=None): + def _maybe_add_legacy_init_op(self, legacy_init_op=None): + """"""Add legacy init op to the SavedModel. + + Args: + legacy_init_op: Optional legacy init op to support backward compatibility. + + Raises: + TypeError if legacy init op is not of type `Operation`. + """""" + if legacy_init_op is not None: + if not isinstance(legacy_init_op, ops.Operation): + raise TypeError(""legacy_init_op needs to be an Operation: %r"" % + legacy_init_op) + ops.add_to_collection(constants.LEGACY_INIT_OP_KEY, legacy_init_op) + + def _maybe_save_assets(self, assets_collection_to_add=None): """"""Saves assets to the meta graph. Args: @@ -225,8 +241,11 @@ class SavedModelBuilder(object): proto_meta_graph_def = self._saved_model.meta_graphs.add() proto_meta_graph_def.CopyFrom(meta_graph_def) - def add_meta_graph(self, tags, signature_def_map=None, - assets_collection=None): + def add_meta_graph(self, + tags, + signature_def_map=None, + assets_collection=None, + legacy_init_op=None): """"""Adds the current meta graph to the SavedModel. Creates a Saver in the current scope and uses the Saver to export the meta @@ -240,6 +259,8 @@ class SavedModelBuilder(object): assets_collection: Assets collection to be saved with SavedModel. Note that this collection should be a subset of the assets saved as part of the first meta graph in the SavedModel. + legacy_init_op: Op or group of ops to execute after the restore op upon a + load. Raises: AssertionError: If the variables for the SavedModel have not been saved @@ -251,12 +272,16 @@ class SavedModelBuilder(object): ""Please invoke `add_meta_graph_and_variables()` first."") # Save asset files, if any. - self._save_assets(assets_collection) + self._maybe_save_assets(assets_collection) + + # Add legacy init op to the SavedModel. + self._maybe_add_legacy_init_op(legacy_init_op) saver = tf_saver.Saver( variables.all_variables(), sharded=True, write_version=saver_pb2.SaverDef.V2) + meta_graph_def = saver.export_meta_graph() # Tag the meta graph def and add it to the SavedModel. @@ -266,7 +291,8 @@ class SavedModelBuilder(object): sess, tags, signature_def_map=None, - assets_collection=None): + assets_collection=None, + legacy_init_op=None): """"""Adds the current meta graph to the SavedModel and saves variables. Creates a Saver to save the variables from the provided session. Exports the @@ -282,6 +308,8 @@ class SavedModelBuilder(object): signature_def_map: The map of signature def map to add to the meta graph def. assets_collection: Assets collection to be saved with SavedModel. + legacy_init_op: Op or group of ops to execute after the restore op upon a + load. """""" if self._has_saved_variables: raise AssertionError(""Variables and assets have already been saved. "" @@ -301,6 +329,9 @@ class SavedModelBuilder(object): compat.as_text(variables_dir), compat.as_text(constants.VARIABLES_FILENAME)) + # Add legacy init op to the SavedModel. + self._maybe_add_legacy_init_op(legacy_init_op) + # Save the variables and export meta graph def. saver = tf_saver.Saver( variables.all_variables(), ",0,train 2d0887780f7cac362b40936ff282ff0589edb791,tensorflow/tensorflow,"Add legacy init op to SavedModel py. Change: 136619012",constants.py,"@@ -22,6 +22,8 @@ from __future__ import print_function ASSETS_DIRECTORY = ""assets"" ASSETS_KEY = ""saved_model_assets"" +LEGACY_INIT_OP_KEY = ""legacy_init_op"" + SAVED_MODEL_SCHEMA_VERSION = 1 SAVED_MODEL_FILENAME_PB = ""saved_model.pb"" SAVED_MODEL_FILENAME_PBTXT = ""saved_model.pbtxt"" ",0,train 2d0887780f7cac362b40936ff282ff0589edb791,tensorflow/tensorflow,"Add legacy init op to SavedModel py. Change: 136619012",loader.py,"@@ -29,8 +29,6 @@ variables though will correspond to the saved values from the first meta graph added to the SavedModel using `add_meta_graph_and_variables(...)` in `builder.py`. -TODO(sukritiramesh): Add support for a single init or main op to run upon load. - Typical usage: ```python ... @@ -64,6 +62,7 @@ from __future__ import division from __future__ import print_function import os +import tensorflow as tf from google.protobuf import text_format from tensorflow.core.protobuf import meta_graph_pb2 @@ -150,6 +149,30 @@ def _get_asset_tensors(export_dir, meta_graph_def_to_load): return asset_tensor_dict +def _get_legacy_init_op_tensor(meta_graph_def_to_load): + """"""Gets the legacy init op tensor, if one exists. + + Args: + meta_graph_def_to_load: The meta graph def from the SavedModel to be loaded. + + Returns: + The legacy init op tensor, if it exists and `None` otherwise. + + Raises: + RuntimeError: If the collection def corresponding to the legacy init op key + has other than exactly one tensor. + """""" + collection_def = meta_graph_def_to_load.collection_def + legacy_init_op_tensor = None + if constants.LEGACY_INIT_OP_KEY in collection_def: + legacy_init_ops = collection_def[ + constants.LEGACY_INIT_OP_KEY].node_list.value + if len(legacy_init_ops) != 1: + raise RuntimeError(""Expected exactly one legacy serving init op."") + legacy_init_op_tensor = tf.get_collection(constants.LEGACY_INIT_OP_KEY)[0] + return legacy_init_op_tensor + + def load(sess, tags, export_dir): """"""Loads the model from a SavedModel as specified by tags. @@ -194,7 +217,15 @@ def load(sess, tags, export_dir): saver.restore(sess, variables_path) # Get asset tensors, if any. - _get_asset_tensors(export_dir, meta_graph_def_to_load) + asset_tensors_dictionary = _get_asset_tensors(export_dir, + meta_graph_def_to_load) + + # TODO(sukritiramesh): Add support for a single main op to run upon load, + # which will supersede the legacy_init_op. + legacy_init_op_tensor = _get_legacy_init_op_tensor(meta_graph_def_to_load) + + if legacy_init_op_tensor is not None: + sess.run(fetches=[legacy_init_op_tensor], + feed_dict=asset_tensors_dictionary) - # Return the meta graph def that was loaded into the session. return meta_graph_def_to_load ",0,train 2d0887780f7cac362b40936ff282ff0589edb791,tensorflow/tensorflow,"Add legacy init op to SavedModel py. Change: 136619012",saved_model_test.py,"@@ -373,6 +373,40 @@ class SavedModelTest(tf.test.TestCase): compat.as_bytes(""ignored.txt"")) self.assertFalse(file_io.file_exists(ignored_asset_path)) + def testLegacyInitOp(self): + export_dir = os.path.join(tf.test.get_temp_dir(), ""test_legacy_init_op"") + builder = saved_model_builder.SavedModelBuilder(export_dir) + + with self.test_session(graph=tf.Graph()) as sess: + # Add `v1` and `v2` variables to the graph. + v1 = tf.Variable(1, name=""v1"") + tf.add_to_collection(""v"", v1) + v2 = tf.Variable(2, name=""v2"") + tf.add_to_collection(""v"", v2) + + # Initialize another variable `v3` to 42. + v3 = tf.Variable(42, name=""v3"", trainable=False, collections=[]) + tf.add_to_collection(""v"", v3) + + # Set up an assignment op to be run as part of the legacy_init_op. + assign_v3 = tf.assign(v3, tf.add(v1, v2)) + legacy_init_op = tf.group(assign_v3, name=""legacy_init_op"") + + sess.run(tf.initialize_all_variables()) + builder.add_meta_graph_and_variables( + sess, [""foo""], legacy_init_op=legacy_init_op) + + # Save the SavedModel to disk. + builder.save() + + with self.test_session(graph=tf.Graph()) as sess: + loader.load(sess, [""foo""], export_dir) + self.assertEqual(1, tf.get_collection(""v"")[0].eval()) + self.assertEqual(2, tf.get_collection(""v"")[1].eval()) + # Evaluates to the sum of the first two variables and assigned as part of + # the legacy_init_op, following a restore. + self.assertEqual(3, tf.get_collection(""v"")[2].eval()) + def testOp(self): export_dir = os.path.join(tf.test.get_temp_dir(), ""test_op"") builder = saved_model_builder.SavedModelBuilder(export_dir) ",0,train c96b33aaba891e6ce6aeb8f693e79c56959cbb5f,tensorflow/tensorflow,"Skip some tests for oss kokoro test as initializing tpu system is very slow. Also combine some tests to reduce the number of times when tpu system is re-initialized. PiperOrigin-RevId: 426245148 Change-Id: Ic0e7dbae68a6b4497a5638d76951a3da5ffd823a",tpu_embedding_v2_correctness_test.py,"@@ -54,6 +54,12 @@ flags.DEFINE_string('model_dir', os.environ.get('TEST_TMPDIR'), class TPUEmbeddingCorrectness(parameterized.TestCase, test.TestCase): + def skip_if_oss(self): + if FLAGS.project is not None or FLAGS.zone is not None: + self.skipTest( + 'Skipping tests for oss as it is slow to run every test in cloud tpu.' + ) + def setUp(self): super(TPUEmbeddingCorrectness, self).setUp() self.embedding_values = np.array(list(range(32)), dtype=np.float64) @@ -186,6 +192,8 @@ class TPUEmbeddingCorrectness(parameterized.TestCase, test.TestCase): [True, False], [True, False], [True, False])) def test_embedding(self, optimizer_name, training, sparse, is_high_dimensional): + if optimizer_name != 'sgd': + self.skip_if_oss() strategy, mid_level_api, optimizer = ( self._create_strategy_and_mid_level(optimizer_name)) ",0,train c96b33aaba891e6ce6aeb8f693e79c56959cbb5f,tensorflow/tensorflow,"Skip some tests for oss kokoro test as initializing tpu system is very slow. Also combine some tests to reduce the number of times when tpu system is re-initialized. PiperOrigin-RevId: 426245148 Change-Id: Ic0e7dbae68a6b4497a5638d76951a3da5ffd823a",tpu_embedding_v2_test.py,"@@ -67,6 +67,12 @@ flags.DEFINE_string('model_dir', os.environ.get('TEST_TMPDIR'), class TPUEmbeddingCheckpointTest(parameterized.TestCase, test.TestCase): + def skip_if_oss(self): + if FLAGS.project is not None or FLAGS.zone is not None: + self.skipTest( + 'Skipping tests for oss as it is slow to run every test in cloud tpu.' + ) + def setUp(self): super(TPUEmbeddingCheckpointTest, self).setUp() self.resolver = tpu_cluster_resolver.TPUClusterResolver( @@ -161,6 +167,7 @@ class TPUEmbeddingCheckpointTest(parameterized.TestCase, test.TestCase): # This test works right now because we only have one TPU host in the unit # environment. Initializing from checkpoint does not understand how to # pass the sharding info to the restore op right now. + self.skip_if_oss() class TestModule(module.Module): @@ -310,6 +317,8 @@ class TPUEmbeddingCheckpointTest(parameterized.TestCase, test.TestCase): optimizer): # Reinitialize the TPU so that we can re-initialize the embeddings with the # given optimizer. + if optimizer != tpu_embedding_v2_utils.SGD: + self.skip_if_oss() tpu_strategy_util.initialize_tpu_system(self.resolver) optimizer = optimizer(learning_rate=0.1) @@ -333,6 +342,12 @@ class TPUEmbeddingCheckpointTest(parameterized.TestCase, test.TestCase): class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): + def skip_if_oss(self): + if FLAGS.project is not None or FLAGS.zone is not None: + self.skipTest( + 'Skipping tests for oss as it is slow to run every test in cloud tpu.' + ) + def setUp(self): super(TPUEmbeddingTest, self).setUp() self.embedding_values = np.array(list(range(32)), dtype=np.float64) @@ -435,27 +450,23 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): # matter. mid_level_api.build(64) + # Test pass non tensor to apply_gradients. @def_function.function - def test_apply(): + def test_apply_1(): mid_level_api.apply_gradients((1, 2, 3)) with self.assertRaisesRegex(ValueError, 'found non-tensor type'): - strategy.run(test_apply) + strategy.run(test_apply_1) - def test_pass_different_structure_to_apply_gradients(self): - strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') - # We aren't going to actually run anything, so the batch_size here does not - # matter. - mid_level_api.build(64) + # Test pass different structure to apply_gradients. @def_function.function - def test_apply(): + def test_apply_2(): # This should be a tuple as feature_config is a tuple of 3 configs. mid_level_api.apply_gradients([1, 2, 3]) with self.assertRaisesRegex( - TypeError, - 'The two structures don\'t have the same nested structure.'): - strategy.run(test_apply) + TypeError, 'The two structures don\'t have the same nested structure.'): + strategy.run(test_apply_2) def test_pass_none_to_apply_gradients(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') @@ -520,13 +531,12 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): self.num_replicas = strategy.num_replicas_in_sync return strategy - def test_dequeue_on_cpu(self): + def test_enqueue_dequeue_apply_gradients_on_cpu(self): + # Dequeue on CPU. mid_level_api = self._create_mid_level() with self.assertRaises(RuntimeError): mid_level_api.dequeue() - - def test_enqueue_on_cpu(self): - mid_level_api = self._create_mid_level() + # Enqueue on CPU. features = { 'watched': sparse_tensor.SparseTensor( indices=self.feature_watched_indices, @@ -534,11 +544,10 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): dense_shape=[2, 2])} with self.assertRaises(RuntimeError): mid_level_api.enqueue(features) - - def test_apply_gradients_on_cpu(self): + # Apply gradient on CPU. mid_level_api = self._create_mid_level() with self.assertRaises(RuntimeError): - mid_level_api.enqueue(None) + mid_level_api.apply_gradients(None) def test_get_embedding_tables_on_cpu(self): mid_level_api = self._create_mid_level() @@ -573,10 +582,10 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): with self.assertRaisesRegex(ValueError, 'Weight specified for dense input'): test_fn() - def test_enqueue_wrong_weight_type_for_sparse_tensor(self): + def test_enqueue_wrong_weight_type_for_sparse_and_ragged_tensor(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') - sparse = self._create_sparse_dataset(strategy) + sparse = self._create_sparse_dataset(strategy, include_weights=True) ragged = self._create_ragged_dataset(strategy, include_weights=True) sparse_iter = iter( strategy.experimental_distribute_dataset( @@ -590,48 +599,32 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): experimental_fetch_to_device=False))) @def_function.function - def test_fn(): + def test_sparse_fn(): def step(): return mid_level_api.dequeue() - features = next(sparse_iter) + features, _ = next(sparse_iter) _, weights = next(ragged_iter) mid_level_api.enqueue(features, weights=weights, training=False) return strategy.run(step) with self.assertRaisesRegex( ValueError, 'which does not match type input which is SparseTensor.'): - test_fn() - - def test_enqueue_wrong_weight_type_for_ragged_tensor(self): - strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') - - sparse = self._create_sparse_dataset(strategy, include_weights=True) - ragged = self._create_ragged_dataset(strategy) - sparse_iter = iter( - strategy.experimental_distribute_dataset( - sparse, - options=distribute_lib.InputOptions( - experimental_fetch_to_device=False))) - ragged_iter = iter( - strategy.experimental_distribute_dataset( - ragged, - options=distribute_lib.InputOptions( - experimental_fetch_to_device=False))) + test_sparse_fn() @def_function.function - def test_fn(): + def test_ragged_fn(): def step(): return mid_level_api.dequeue() _, weights = next(sparse_iter) - features = next(ragged_iter) + features, _ = next(ragged_iter) mid_level_api.enqueue(features, weights=weights, training=False) return strategy.run(step) with self.assertRaisesRegex( ValueError, 'which does not match type input which is RaggedTensor.'): - test_fn() + test_ragged_fn() def test_enqueue_sparse_and_ragged(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') @@ -662,10 +655,10 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): test_fn() - def test_enqueue_incorrect_structure_for_features(self): + def test_enqueue_incorrect_structure_for_features_and_weights(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') - sparse = self._create_sparse_dataset(strategy) + sparse = self._create_sparse_dataset(strategy, include_weights=True) sparse_iter = iter( strategy.experimental_distribute_dataset( sparse, @@ -673,7 +666,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): experimental_fetch_to_device=False))) @def_function.function - def test_fn(): + def test_features_fn(): def step(): return mid_level_api.dequeue() @@ -684,20 +677,10 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): # The error here is raised from nest.assert_same_structure with self.assertRaises(ValueError): - test_fn() - - def test_enqueue_incorrect_structure_for_weights(self): - strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') - - sparse = self._create_sparse_dataset(strategy, include_weights=True) - sparse_iter = iter( - strategy.experimental_distribute_dataset( - sparse, - options=distribute_lib.InputOptions( - experimental_fetch_to_device=False))) + test_features_fn() @def_function.function - def test_fn(): + def test_weights_fn(): def step(): return mid_level_api.dequeue() @@ -708,7 +691,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): # The error here is raised from nest.assert_same_structure with self.assertRaises(ValueError): - test_fn() + test_weights_fn() def test_enqueue_ragged_tensor(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') @@ -812,6 +795,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): @parameterized.parameters([True, False]) def test_enqueue_cpu_tensor_with_outside_compilation(self, use_mlir): + if use_mlir: config.enable_mlir_bridge() @@ -834,6 +818,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): @parameterized.parameters(True, False) def test_enqueue_with_weights(self, ragged): + self.skip_if_oss() strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') weight = 0.5 if ragged: @@ -885,6 +870,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): @parameterized.parameters([True, False]) def test_enqueue_with_outside_compilation(self, use_mlir): + self.skip_if_oss() if use_mlir: config.enable_mlir_bridge() @@ -928,6 +914,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): @parameterized.parameters(True, False) def test_enqueue_with_outside_compilation_in_control_flow(self, use_mlir): + self.skip_if_oss() if use_mlir: config.enable_mlir_bridge() @@ -959,6 +946,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): enqueue_with_outside_compilation() def test_enqueue_with_outside_compilation_non_direct_input(self): + self.skip_if_oss() strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') mid_level_api.build([ TensorShape((self.batch_size, 2)), @@ -987,6 +975,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): enqueue_with_outside_compilation() def test_enqueue_with_outside_compilation_auto_mode(self): + self.skip_if_oss() strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') mid_level_api.build([ TensorShape((self.batch_size, 2)), @@ -1483,6 +1472,12 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): class TPUEmbeddingHighDimensionalTensorTest(parameterized.TestCase, test.TestCase): + def skip_if_oss(self): + if FLAGS.project is not None or FLAGS.zone is not None: + self.skipTest( + 'Skipping tests for oss as it is slow to run every test in cloud tpu.' + ) + def setUp(self): super(TPUEmbeddingHighDimensionalTensorTest, self).setUp() self.embedding_values = np.array(list(range(32)), dtype=np.float64) @@ -1815,6 +1810,7 @@ class TPUEmbeddingHighDimensionalTensorTest(parameterized.TestCase, test_fn() def test_not_fully_defined_output_shapes_in_feature_config(self): + self.skip_if_oss() _, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') # Feature config sets undefined output shapes @@ -1823,6 +1819,7 @@ class TPUEmbeddingHighDimensionalTensorTest(parameterized.TestCase, mid_level_api.build() def test_not_fully_defined_output_shapes_for_build(self): + self.skip_if_oss() _, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') # Build with undefined output shape ",0,train 0fccb5d8384d2fd6e0c8d57fc1ebfd094a5c19af,tensorflow/tensorflow,"Replace assert_called() with called to fix python3 test failures PiperOrigin-RevId: 169702185",training_test.py,"@@ -253,7 +253,7 @@ class _TrainingExecutorTrainingTest(object): config=test.mock.ANY, start=False) - mock_server_instance.start.assert_called() + self.assertTrue(mock_server_instance.start.called) mock_est.train.assert_called_with(input_fn=train_spec.input_fn, max_steps=train_spec.max_steps, @@ -365,7 +365,7 @@ class TrainingExecutorRunWorkerTest(_TrainingExecutorTrainingTest, with test.mock.patch.object(time, 'sleep') as mock_sleep: mock_sleep.side_effect = lambda s: self.assertEqual(expected_secs, s) self._run_task(executor) - mock_sleep.assert_called() + self.assertTrue(mock_sleep.called) class TrainingExecutorRunChiefTest(_TrainingExecutorTrainingTest, @@ -546,8 +546,8 @@ class TrainingExecutorRunPsTest(test.TestCase): config=test.mock.ANY, start=False) - mock_server_instance.start.assert_called() - mock_server_instance.join.assert_called() + self.assertTrue(mock_server_instance.start.called) + self.assertTrue(mock_server_instance.join.called) def test_fail_with_empty_cluster_spec(self): mock_est = test.mock.Mock(spec=estimator_lib.Estimator) ",0,train 8950c470bb11a9b94c0dd08d73156008dfac60c9,tensorflow/tensorflow,"Remove automatic control dep wrapping from layers in v2. PiperOrigin-RevId: 316638920 Change-Id: Iad14b1a4b0b14052f34784401b375a14b49a7641",base_layer.py,"@@ -40,7 +40,6 @@ from tensorflow.python.eager import context from tensorflow.python.eager import execute from tensorflow.python.eager import function from tensorflow.python.eager import monitoring -from tensorflow.python.framework import auto_control_deps from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -1105,17 +1104,7 @@ class Layer(module.Module, version_utils.LayerVersionSelector): try: with ops.enable_auto_cast_variables(self._compute_dtype_object): - # Add auto_control_deps in V2 when they are not already added by - # a `tf.function`. - if (ops.executing_eagerly_outside_functions() and - not base_layer_utils.is_in_eager_or_tf_function()): - with auto_control_deps.AutomaticControlDependencies() as acd: - outputs = call_fn(cast_inputs, *args, **kwargs) - # Wrap Tensors in `outputs` in `tf.identity` to avoid - # circular dependencies. - outputs = base_layer_utils.mark_as_return(outputs, acd) - else: - outputs = call_fn(cast_inputs, *args, **kwargs) + outputs = call_fn(cast_inputs, *args, **kwargs) except errors.OperatorNotAllowedInGraphError as e: raise TypeError('You are attempting to use Python control ' ",0,train 601e77f51f558724c7b71c2d9d362e724211f813,tensorflow/tensorflow,"[XLA:Python] Add an mlir_module_to_xla_computation Python helper. In passing, remove some unnecessary MLIR dependencies from the XLA Python client. PiperOrigin-RevId: 413926439 Change-Id: Iac9011d9dd446ab88ce6191537085e34991b094a",mlir.cc,"@@ -13,26 +13,32 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include + #include ""llvm/Support/raw_ostream.h"" #include ""mlir/Dialect/StandardOps/IR/Ops.h"" // from @llvm-project #include ""mlir/IR/BuiltinOps.h"" // from @llvm-project #include ""mlir/IR/MLIRContext.h"" // from @llvm-project +#include ""mlir/Parser.h"" // from @llvm-project #include ""pybind11/pybind11.h"" +#include ""tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h"" #include ""tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"" #include ""tensorflow/compiler/mlir/xla/hlo_to_mlir_hlo.h"" #include ""tensorflow/compiler/xla/client/xla_computation.h"" +#include ""tensorflow/compiler/xla/pjrt/mlir_to_hlo.h"" #include ""tensorflow/compiler/xla/python/types.h"" #include ""tensorflow/compiler/xla/status.h"" namespace py = pybind11; namespace xla { +namespace { // Converts an XlaComputation to an MHLO mlir::Module string. Exists for // backwards compatibility. // TODO(phawkins): port remaining users of XlaComputations to use mlir::Modules // instead and delete this function. -StatusOr XlaComputationToMlirModule( +StatusOr PyXlaComputationToMlirModule( const XlaComputation& computation) { mlir::MLIRContext context; mlir::OwningModuleRef module = @@ -47,11 +53,43 @@ StatusOr XlaComputationToMlirModule( return s; } +StatusOr PyMlirModuleToXlaComputation(std::string mlir_module, + bool use_tuple_args, + bool return_tuple) { + mlir::MLIRContext context; + mlir::OwningModuleRef module; + context.loadDialect(); + context.loadDialect(); + context.loadDialect(); + mlir::StatusScopedDiagnosticHandler diagnostic_handler(&context); + module = mlir::parseSourceString( + llvm::StringRef(mlir_module.data(), mlir_module.size()), &context); + if (!module) { + return diagnostic_handler.ConsumeStatus(); + } + if (failed(module->verify())) { + VLOG(1) << ""MLIR verification failed.""; + module->dump(); + return diagnostic_handler.ConsumeStatus(); + } + + XlaComputation computation; + TF_RETURN_IF_ERROR( + MlirToXlaComputation(*module, computation, use_tuple_args, return_tuple)); + return computation; +} + +} // namespace + void BuildMlirSubmodule(py::module& m) { py::module mlir_module = m.def_submodule(""mlir"", ""MLIR/XLA integration""); mlir_module.def(""xla_computation_to_mlir_module"", - &XlaComputationToMlirModule); + &PyXlaComputationToMlirModule); + mlir_module.def(""mlir_module_to_xla_computation"", + &PyMlirModuleToXlaComputation, py::arg(""mlir_module""), + py::arg(""use_tuple_args"") = false, + py::arg(""return_tuple"") = false); } } // namespace xla ",0,test 601e77f51f558724c7b71c2d9d362e724211f813,tensorflow/tensorflow,"[XLA:Python] Add an mlir_module_to_xla_computation Python helper. In passing, remove some unnecessary MLIR dependencies from the XLA Python client. PiperOrigin-RevId: 413926439 Change-Id: Iac9011d9dd446ab88ce6191537085e34991b094a",xla_client.py,"@@ -44,7 +44,7 @@ profiler = _xla.profiler # Just an internal arbitrary increasing number to help with backward-compatible # changes. -_version = 45 +_version = 46 xla_platform_names = { 'cpu': 'Host', ",0,test aaac4ac3e9d1d8c48db9e4010459a417a07553d2,tensorflow/tensorflow,"K-FAC: Example using tf.estimator and K-FAC. - Removes FisherEstimator.inv_updates_dict. Users should create directly from FisherEstimator.inv_update_ops. - Adds (cov|inv)_update_(thunks|ops) to KfacOptimizer. PiperOrigin-RevId: 182135826",convnet.py,"@@ -286,7 +286,7 @@ def minimize_loss_distributed(task_id, num_worker_tasks, num_ps_tasks, master, damping=0.001, layer_collection=layer_collection, momentum=0.9) - inv_update_queue = oq.OpQueue(optimizer.inv_updates_dict.values()) + inv_update_queue = oq.OpQueue(optimizer.inv_update_ops) sync_optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=_num_gradient_tasks(num_worker_tasks)) ",0,train aaac4ac3e9d1d8c48db9e4010459a417a07553d2,tensorflow/tensorflow,"K-FAC: Example using tf.estimator and K-FAC. - Removes FisherEstimator.inv_updates_dict. Users should create directly from FisherEstimator.inv_update_ops. - Adds (cov|inv)_update_(thunks|ops) to KfacOptimizer. PiperOrigin-RevId: 182135826",mlp.py,"@@ -239,3 +239,85 @@ def train_mnist_multitower(data_dir, }) return minimize( loss, accuracy, layer_collection, session_config=session_config) + + +def train_mnist_estimator(data_dir, num_epochs, use_fake_data=False): + """"""Train an MLP on MNIST using tf.estimator. + + Args: + data_dir: string. Directory to read MNIST examples from. + num_epochs: int. Number of passes to make over the training set. + use_fake_data: bool. If True, generate a synthetic dataset. + + Returns: + accuracy of model on the final minibatch of training data. + """""" + + # Load a dataset. + def input_fn(): + tf.logging.info(""Loading MNIST into memory."") + return mnist.load_mnist( + data_dir, + num_epochs=num_epochs, + batch_size=64, + flatten_images=True, + use_fake_data=use_fake_data) + + def model_fn(features, labels, mode, params): + """"""Model function for MLP trained with K-FAC. + + Args: + features: Tensor of shape [batch_size, input_size]. Input features. + labels: Tensor of shape [batch_size]. Target labels for training. + mode: tf.estimator.ModeKey. Must be TRAIN. + params: ignored. + + Returns: + EstimatorSpec for training. + + Raises: + ValueError: If 'mode' is anything other than TRAIN. + """""" + del params + + if mode != tf.estimator.ModeKeys.TRAIN: + raise ValueError(""Only training is supposed with this API."") + + # Build a ConvNet. + layer_collection = lc.LayerCollection() + loss, accuracy = build_model( + features, labels, num_labels=10, layer_collection=layer_collection) + + # Train with K-FAC. + global_step = tf.train.get_or_create_global_step() + optimizer = opt.KfacOptimizer( + learning_rate=tf.train.exponential_decay( + 0.00002, global_step, 10000, 0.5, staircase=True), + cov_ema_decay=0.95, + damping=0.0001, + layer_collection=layer_collection, + momentum=0.99) + + # Run cov_update_op every step. Run 1 inv_update_ops per step. + cov_update_op = optimizer.cov_update_op + inv_update_op = tf.group( + tf.contrib.kfac.utils.batch_execute( + global_step, optimizer.inv_update_thunks, batch_size=1)) + with tf.control_dependencies([cov_update_op, inv_update_op]): + train_op = optimizer.minimize(loss, global_step=global_step) + + # Print metrics every 5 sec. + hooks = [ + tf.train.LoggingTensorHook( + { + ""loss"": loss, + ""accuracy"": accuracy + }, every_n_secs=5), + ] + return tf.estimator.EstimatorSpec( + mode=mode, loss=loss, train_op=train_op, training_hooks=hooks) + + # Train until input_fn() is empty with Estimator. This is a prerequisite for + # TPU compatibility. + estimator = tf.estimator.Estimator(model_fn=model_fn) + estimator.train(input_fn=input_fn) ",0,train aaac4ac3e9d1d8c48db9e4010459a417a07553d2,tensorflow/tensorflow,"K-FAC: Example using tf.estimator and K-FAC. - Removes FisherEstimator.inv_updates_dict. Users should create directly from FisherEstimator.inv_update_ops. - Adds (cov|inv)_update_(thunks|ops) to KfacOptimizer. PiperOrigin-RevId: 182135826",mlp_mnist_main.py,"@@ -33,7 +33,11 @@ FLAGS = None def main(argv): _ = argv - if FLAGS.num_towers > 1: + if FLAGS.use_estimator: + if FLAGS.num_towers != 1: + raise ValueError(""Only 1 device supported in tf.estimator example."") + mlp.train_mnist_estimator(FLAGS.data_dir, num_epochs=200) + elif FLAGS.num_towers > 1: mlp.train_mnist_multitower( FLAGS.data_dir, num_epochs=200, num_towers=FLAGS.num_towers) else: @@ -52,5 +56,9 @@ if __name__ == ""__main__"": type=int, default=1, help=""Number of CPUs to split minibatch across."") + parser.add_argument( + ""--use_estimator"", + action=""store_true"", + help=""Use tf.estimator API to train."") FLAGS, unparsed = parser.parse_known_args() tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) ",0,train aaac4ac3e9d1d8c48db9e4010459a417a07553d2,tensorflow/tensorflow,"K-FAC: Example using tf.estimator and K-FAC. - Removes FisherEstimator.inv_updates_dict. Users should create directly from FisherEstimator.inv_update_ops. - Adds (cov|inv)_update_(thunks|ops) to KfacOptimizer. PiperOrigin-RevId: 182135826",mlp_test.py,"@@ -53,6 +53,11 @@ class MlpTest(tf.test.TestCase): mlp.train_mnist_multitower( data_dir=None, num_epochs=1, num_towers=2, use_fake_data=True) + def testTrainMnistEstimator(self): + with tf.Graph().as_default(): + # Ensure model training doesn't crash. + mlp.train_mnist_estimator(data_dir=None, num_epochs=1, use_fake_data=True) + if __name__ == ""__main__"": tf.test.main() ",0,train aaac4ac3e9d1d8c48db9e4010459a417a07553d2,tensorflow/tensorflow,"K-FAC: Example using tf.estimator and K-FAC. - Removes FisherEstimator.inv_updates_dict. Users should create directly from FisherEstimator.inv_update_ops. - Adds (cov|inv)_update_(thunks|ops) to KfacOptimizer. PiperOrigin-RevId: 182135826",estimator.py,"@@ -281,11 +281,6 @@ class FisherEstimator(object): return thunk - @property - def inv_updates_dict(self): - """"""Returns a dictionary mapping strings to inv_update_ops."""""" - return {op.name: op for op in self.inv_update_ops} - def _get_grads_lists_gradients(self, tensors): grads_flat = gradients_impl.gradients( self._layers.total_sampled_loss(), ",0,train aaac4ac3e9d1d8c48db9e4010459a417a07553d2,tensorflow/tensorflow,"K-FAC: Example using tf.estimator and K-FAC. - Removes FisherEstimator.inv_updates_dict. Users should create directly from FisherEstimator.inv_update_ops. - Adds (cov|inv)_update_(thunks|ops) to KfacOptimizer. PiperOrigin-RevId: 182135826",optimizer.py,"@@ -137,12 +137,32 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): self._batch_size = array_ops.shape(layer_collection.losses[0].inputs)[0] self._losses = layer_collection.losses - self.cov_update_op = self._fisher_est.cov_update_op - self.inv_update_op = self._fisher_est.inv_update_op - self.inv_updates_dict = self._fisher_est.inv_updates_dict - super(KfacOptimizer, self).__init__(learning_rate, name=name) + @property + def cov_update_thunks(self): + return self._fisher_est.cov_update_thunks + + @property + def cov_update_ops(self): + return self._fisher_est.cov_update_ops + + @property + def cov_update_op(self): + return self._fisher_est.cov_update_op + + @property + def inv_update_thunks(self): + return self._fisher_est.inv_update_thunks + + @property + def inv_update_ops(self): + return self._fisher_est.inv_update_ops + + @property + def inv_update_op(self): + return self._fisher_est.inv_update_op + @property def variables(self): return self._fisher_est.variables ",0,train eb6474b35cd1c5792c9e9034396ba6351c198915,tensorflow/tensorflow,"Fix documentation of ResourceApplyFtrlV2. The doc says that we add grad_with_shrinkage^2 to accum, but what's really added in the op kernel is just grad^2 (same for XLA). So adjust the documentation to reflect the implementation's behavior. Also, add/enable a test for this. PiperOrigin-RevId: 378933095 Change-Id: If0258ad3e79e87f2253cc0be878b8034e4f3249a",ftrl_ops_test.py,"@@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import unittest import numpy as np from tensorflow.compiler.tests import xla_test @@ -137,19 +136,16 @@ class ResourceApplyFtrlTest(xla_test.XLATestCase): lr=1, l1=1, l2=-1.25, lr_power=1) self.assertAllClose(0.25 * np.ones((1, 3, 2)), var) - @unittest.skip(""Needs cl/378772774"") def testL2Shrinkage(self): - """"""Test that 2 * l2_shrinkage * var is added to the gradient."""""" - # TODO(kramm): XLA adds grad^2 to accum, not grad_to_use^2 + """"""Test that 2 * l2_shrinkage * var is *not* added to the gradient."""""" _, accum, _ = self._eval( var=np.ones((1, 3, 2)), accum=np.zeros((1, 3, 2)), linear=np.zeros((1, 3, 2)), grad=np.zeros((1, 3, 2)), lr=7, l1=3, l2=7, lr_power=2, l2_shrinkage=0.5) - self.assertAllClose(np.ones((1, 3, 2)), accum) + self.assertAllClose(np.zeros((1, 3, 2)), accum) - @unittest.skip(""Needs cl/378772774"") def testL2ShrinkageOnLinear(self): """"""Test that 2 * l2_shrinkage * var is added to linear."""""" _, _, linear = self._eval( ",0,train f54c05b8375f7cdb1cb6300d7c55914bea3df520,tensorflow/tensorflow,"Return the new graph instead of using input/output param PiperOrigin-RevId: 400762263 Change-Id: I5cfe18d7c959d80a7b7f95fd35b7746048b2c2f0",tfrt_graph_execution_state.cc,"@@ -202,7 +202,14 @@ TfrtGraphExecutionState::CreateOptimizedGraph( result.functionalization_duration = grappler_start_time - functionalization_start_time; - TF_RETURN_IF_ERROR(OptimizeGraph(result.graph, build_graph_options)); + auto status_or_optimized_graph = + OptimizeGraph(*result.graph, build_graph_options); + if (status_or_optimized_graph.ok()) { + result.graph = std::move(status_or_optimized_graph.ValueOrDie()); + } else { + LOG(WARNING) << ""TFRT failed to optimize graph: "" + << status_or_optimized_graph.status(); + } if (VLOG_IS_ON(1)) { DumpGraphToFile(""after_grappler"", *result.graph); @@ -536,21 +543,17 @@ Status OptimizeFunctions(FunctionDefLibrary& flib_proto, } // namespace -Status TfrtGraphExecutionState::OptimizeGraph( - std::unique_ptr& graph, +StatusOr> +TfrtGraphExecutionState::OptimizeGraph( + const tensorflow::Graph& graph, const tensorflow::BuildGraphOptions& build_graph_options) { std::unique_ptr optimized_graph; std::unique_ptr optimized_flib; // Invoke Grappler to optimize the graph. - auto status = graph_execution_state_->OptimizeGraph( - build_graph_options, *graph, &graph->flib_def(), &optimized_graph, - &optimized_flib); - - if (!status.ok()) { - LOG(WARNING) << ""TFRT failed to optimize graph: "" << status; - return tensorflow::Status::OK(); - } + TF_RETURN_IF_ERROR(graph_execution_state_->OptimizeGraph( + build_graph_options, graph, &graph.flib_def(), &optimized_graph, + &optimized_flib)); FunctionDefLibrary optimized_flib_proto = optimized_flib->ToProto(); if (run_placer_grappler_on_functions_) { @@ -564,8 +567,7 @@ Status TfrtGraphExecutionState::OptimizeGraph( TF_RETURN_IF_ERROR(optimized_graph->AddFunctionLibrary(optimized_flib_proto)); - graph = std::move(optimized_graph); - return tensorflow::Status::OK(); + return optimized_graph; } } // namespace tfrt_stub ",0,train f54c05b8375f7cdb1cb6300d7c55914bea3df520,tensorflow/tensorflow,"Return the new graph instead of using input/output param PiperOrigin-RevId: 400762263 Change-Id: I5cfe18d7c959d80a7b7f95fd35b7746048b2c2f0",tfrt_graph_execution_state.h,"@@ -81,8 +81,8 @@ class TfrtGraphExecutionState { return graph_execution_state_->flib_def(); } - Status OptimizeGraph( - std::unique_ptr& graph, + StatusOr> OptimizeGraph( + const tensorflow::Graph& graph, const tensorflow::BuildGraphOptions& build_graph_options); std::unique_ptr graph_execution_state_; ",0,train 8a33966dbf9c190199dac4ca529bf70bce9c2a86,tensorflow/tensorflow,"Change PySeqToTensor to return TFE_TensorHandle PiperOrigin-RevId: 289108443 Change-Id: I2aac99acb068b0dae2f8aabf72e323d0d303ebb1",pywrap_tensor.cc,"@@ -252,25 +252,6 @@ TFE_TensorHandle* EagerCast(TFE_Context* ctx, TFE_TensorHandle* handle, #undef RETURN_ERROR } -TFE_TensorHandle* PySeqToTFE_TensorHandle(TFE_Context* ctx, PyObject* value, - DataType dtype) { - tensorflow::TensorHandle* handle = nullptr; - tensorflow::Tensor t; - // TODO(josh11b): Have PySeqToTensor set python errors instead of - // returning Status. - auto cppstatus = tensorflow::PySeqToTensor(value, dtype, &t); - if (cppstatus.ok()) { - cppstatus = tensorflow::TensorHandle::CreateLocalHandle( - t, /*d=*/nullptr, /*op_device=*/nullptr, ctx->context, &handle); - } - if (!cppstatus.ok()) { - PyErr_SetString(PyExc_ValueError, cppstatus.error_message().c_str()); - return nullptr; - } - CHECK_NE(handle, nullptr); - return new TFE_TensorHandle{tensorflow::TensorHandleInterface(handle)}; -} - TFE_TensorHandle* ConvertToEagerTensorUncached(TFE_Context* ctx, PyObject* value, tensorflow::DataType dtype, ",0,test 8a33966dbf9c190199dac4ca529bf70bce9c2a86,tensorflow/tensorflow,"Change PySeqToTensor to return TFE_TensorHandle PiperOrigin-RevId: 289108443 Change-Id: I2aac99acb068b0dae2f8aabf72e323d0d303ebb1",py_seq_tensor.cc,"@@ -15,6 +15,7 @@ limitations under the License. #include ""tensorflow/python/lib/core/py_seq_tensor.h"" +#include ""tensorflow/c/eager/c_api_internal.h"" #include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/tensor_shape.h"" #include ""tensorflow/core/framework/types.h"" @@ -67,7 +68,7 @@ bool IsPyFloat(PyObject* obj) { struct ConverterState { // The inferred tensor shape. - TensorShape inferred_shape; + gtl::InlinedVector inferred_shape; // The inferred tensor data type. DataType inferred_dtype; @@ -155,14 +156,14 @@ Status InferShapeAndType(PyObject* obj, ConverterState* state) { } else if (PySequence_Check(obj)) { auto length = PySequence_Length(obj); if (length > 0) { - state->inferred_shape.AddDim(length); + state->inferred_shape.push_back(length); PyObject* elem = nullptr; TF_RETURN_IF_ERROR(SampleElementFromSequence(obj, &elem)); obj = elem; refs_to_clean.push_back(make_safe(obj)); continue; } else if (length == 0) { - state->inferred_shape.AddDim(length); + state->inferred_shape.push_back(length); state->inferred_dtype = DT_INVALID; // Invalid dtype for empty tensors. } else { // The sequence does not have a valid length (PySequence_Length < 0). @@ -247,12 +248,12 @@ struct Converter { Safe_PyObjectPtr seq = make_safe(PySequence_Fast(obj, """")); if (TF_PREDICT_FALSE(seq == nullptr)) return ErrorRectangular; - const int64 s = state->inferred_shape.dim_size(depth); + const int64 s = state->inferred_shape[depth]; if (TF_PREDICT_FALSE(s != PySequence_Fast_GET_SIZE(seq.get()))) { return ErrorRectangular; } - if (state->inferred_shape.dims() - depth > 1) { + if (state->inferred_shape.size() - depth > 1) { /* Iterate over outer dim, and recursively convert each element. */ for (int64 i = 0; i < s; ++i) { const char* error = Helper(PySequence_Fast_GET_ITEM(seq.get(), i), @@ -272,24 +273,31 @@ struct Converter { return nullptr; } - static const char* Convert(PyObject* obj, ConverterState* state, - Tensor* dest) { + static Status Convert(TFE_Context* ctx, PyObject* obj, ConverterState* state, + TFE_TensorHandle** h, const char** error) { /* TODO(josh11b): Allocator & attributes? */ - Tensor result(ConverterTraits::kTypeEnum, state->inferred_shape); - if (state->inferred_shape.dims() == 0) { /* Scalar case */ + Tensor result(ConverterTraits::kTypeEnum, + TensorShape(state->inferred_shape)); + if (state->inferred_shape.empty()) { /* Scalar case */ T value; auto scalar = ZeroDimArrayToScalar(obj, state); - const char* error = ConverterTraits::ConvertScalar(scalar, &value); + *error = ConverterTraits::ConvertScalar(scalar, &value); Py_DECREF(scalar); - if (error != nullptr) return error; + if (*error != nullptr) return errors::InvalidArgument(*error); result.scalar()() = value; } else { T* buf = result.flat().data(); - const char* error = Helper(obj, 0, state, &buf); - if (error != nullptr) return error; + *error = Helper(obj, 0, state, &buf); + if (*error != nullptr) return errors::InvalidArgument(*error); } - *dest = result; - return nullptr; + tensorflow::TensorHandle* handle = nullptr; + auto status = tensorflow::TensorHandle::CreateLocalHandle( + result, /*d=*/nullptr, /*op_device=*/nullptr, ctx->context, &handle); + if (!status.ok()) { + return status; + } + *h = new TFE_TensorHandle{TensorHandleInterface(handle)}; + return Status::OK(); } }; @@ -592,16 +600,14 @@ typedef Converter BoolConverter; } // namespace -#define RETURN_STRING_AS_STATUS(...) \ - do { \ - const char* _error = (__VA_ARGS__); \ - if (TF_PREDICT_TRUE(_error == nullptr)) return Status::OK(); \ - return errors::InvalidArgument(_error); \ - } while (0) - -Status PySeqToTensor(PyObject* obj, DataType dtype, Tensor* ret) { +TFE_TensorHandle* PySeqToTFE_TensorHandle(TFE_Context* ctx, PyObject* obj, + DataType dtype) { ConverterState state; - TF_RETURN_IF_ERROR(InferShapeAndType(obj, &state)); + Status status = InferShapeAndType(obj, &state); + if (!status.ok()) { + PyErr_SetString(PyExc_ValueError, status.error_message().c_str()); + return nullptr; + } DataType requested_dtype = DT_INVALID; if (dtype != DT_INVALID) { requested_dtype = dtype; @@ -610,116 +616,131 @@ Status PySeqToTensor(PyObject* obj, DataType dtype, Tensor* ret) { // we just try instead to create a tensor of the inferred type and // let the caller convert it to the requested type using a cast // operation. + const char* error = nullptr; + TFE_TensorHandle* handle = nullptr; + status = errors::Unimplemented(""Missing Python -> Tensor conversion for "", + DataTypeString(state.inferred_dtype)); switch (requested_dtype) { case DT_FLOAT: - if (FloatConverter::Convert(obj, &state, ret) == nullptr) - return Status::OK(); + status = FloatConverter::Convert(ctx, obj, &state, &handle, &error); break; case DT_DOUBLE: - if (DoubleConverter::Convert(obj, &state, ret) == nullptr) - return Status::OK(); + status = DoubleConverter::Convert(ctx, obj, &state, &handle, &error); break; case DT_HALF: - if (NumpyHalfConverter::Convert(obj, &state, ret) == nullptr) - return Status::OK(); + status = NumpyHalfConverter::Convert(ctx, obj, &state, &handle, &error); break; case DT_INT64: - if (Int64Converter::Convert(obj, &state, ret) == nullptr) - return Status::OK(); + status = Int64Converter::Convert(ctx, obj, &state, &handle, &error); break; case DT_INT32: - if (Int32Converter::Convert(obj, &state, ret) == nullptr) - return Status::OK(); + status = Int32Converter::Convert(ctx, obj, &state, &handle, &error); break; case DT_UINT64: - if (UInt64Converter::Convert(obj, &state, ret) == nullptr) - return Status::OK(); + status = UInt64Converter::Convert(ctx, obj, &state, &handle, &error); break; case DT_COMPLEX128: - if (Complex128Converter::Convert(obj, &state, ret) == nullptr) - return Status::OK(); + status = Complex128Converter::Convert(ctx, obj, &state, &handle, &error); break; case DT_STRING: - if (StringConverter::Convert(obj, &state, ret) == nullptr) - return Status::OK(); + status = StringConverter::Convert(ctx, obj, &state, &handle, &error); break; case DT_BOOL: - if (BoolConverter::Convert(obj, &state, ret) == nullptr) - return Status::OK(); + status = BoolConverter::Convert(ctx, obj, &state, &handle, &error); break; default: break; } + if (status.ok()) return handle; + switch (state.inferred_dtype) { case DT_FLOAT: // TODO(josh11b): Handle mixed floats and complex numbers? if (requested_dtype == DT_INVALID) { // TensorFlow uses float32s to represent floating point numbers // by default (for space and speed over using doubles). - RETURN_STRING_AS_STATUS(FloatConverter::Convert(obj, &state, ret)); + status = FloatConverter::Convert(ctx, obj, &state, &handle, &error); } else { // We are going to do a cast to the user's requested dtype // after this. We use doubles for this intermediate result so // we don't lose precision that might be representable in the // final type. - RETURN_STRING_AS_STATUS(DoubleConverter::Convert(obj, &state, ret)); + status = DoubleConverter::Convert(ctx, obj, &state, &handle, &error); } + break; case DT_DOUBLE: - RETURN_STRING_AS_STATUS(DoubleConverter::Convert(obj, &state, ret)); + status = DoubleConverter::Convert(ctx, obj, &state, &handle, &error); + break; case DT_HALF: - RETURN_STRING_AS_STATUS(NumpyHalfConverter::Convert(obj, &state, ret)); + status = NumpyHalfConverter::Convert(ctx, obj, &state, &handle, &error); + break; case DT_INT64: if (requested_dtype == DT_INVALID) { - const char* error = Int32Converter::Convert(obj, &state, ret); + status = Int32Converter::Convert(ctx, obj, &state, &handle, &error); if (error == ErrorFoundInt64) { - error = Int64Converter::Convert(obj, &state, ret); + status = Int64Converter::Convert(ctx, obj, &state, &handle, &error); } if (error == ErrorFoundFloat) { - error = FloatConverter::Convert(obj, &state, ret); + status = FloatConverter::Convert(ctx, obj, &state, &handle, &error); } // TODO(josh11b): May also want to fall back to using doubles if // error == ErrorOutOfRange? - RETURN_STRING_AS_STATUS(error); } else { - const char* error = Int64Converter::Convert(obj, &state, ret); + status = Int64Converter::Convert(ctx, obj, &state, &handle, &error); if (error == ErrorFoundFloat) { - error = DoubleConverter::Convert(obj, &state, ret); + status = DoubleConverter::Convert(ctx, obj, &state, &handle, &error); } - RETURN_STRING_AS_STATUS(error); } + break; case DT_STRING: - RETURN_STRING_AS_STATUS(StringConverter::Convert(obj, &state, ret)); + status = StringConverter::Convert(ctx, obj, &state, &handle, &error); + break; case DT_COMPLEX128: - RETURN_STRING_AS_STATUS(Complex128Converter::Convert(obj, &state, ret)); + status = Complex128Converter::Convert(ctx, obj, &state, &handle, &error); + break; case DT_BOOL: - RETURN_STRING_AS_STATUS(BoolConverter::Convert(obj, &state, ret)); + status = BoolConverter::Convert(ctx, obj, &state, &handle, &error); + break; case DT_INVALID: // Only occurs for empty tensors. - *ret = Tensor(requested_dtype == DT_INVALID ? DT_FLOAT : requested_dtype, - state.inferred_shape); - return Status::OK(); + { + tensorflow::TensorHandle* h = nullptr; + Tensor tensor(requested_dtype == DT_INVALID ? DT_FLOAT : requested_dtype, + TensorShape(state.inferred_shape)); + status = tensorflow::TensorHandle::CreateLocalHandle( + tensor, /*d=*/nullptr, /*op_device=*/nullptr, ctx->context, &h); + if (!status.ok()) { + PyErr_SetString(PyExc_ValueError, status.error_message().c_str()); + return nullptr; + } + return new TFE_TensorHandle{TensorHandleInterface(h)}; + } default: - return errors::Unimplemented(""Missing Python -> Tensor conversion for "", - DataTypeString(state.inferred_dtype)); + break; } - return Status::OK(); + if (!status.ok()) { + PyErr_SetString(PyExc_ValueError, status.error_message().c_str()); + return nullptr; + } + + return handle; } } // namespace tensorflow ",0,test 8a33966dbf9c190199dac4ca529bf70bce9c2a86,tensorflow/tensorflow,"Change PySeqToTensor to return TFE_TensorHandle PiperOrigin-RevId: 289108443 Change-Id: I2aac99acb068b0dae2f8aabf72e323d0d303ebb1",py_seq_tensor.h,"@@ -18,6 +18,7 @@ limitations under the License. #include +#include ""tensorflow/c/eager/c_api_internal.h"" #include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/lib/core/status.h"" @@ -25,12 +26,16 @@ namespace tensorflow { // Converts Python object `obj` representing a rectangular array of // Python values (a scalar, a sequence of scalars, a sequence of -// sequences, etc.) into a C++ TensorFlow Tensor and stores it in -// *ret. If dtype is not None it should by a Python integer +// sequences, etc.) into a TFE_TensorHandle. +// If dtype is not None it should by a Python integer // representing the desired dtype of the resulting Tensor. // This is used only as a hint, *ret may not have that dtype on // success and may require a cast. -Status PySeqToTensor(PyObject* obj, DataType dtype, Tensor* ret); +// +// If an error occurs, this return nullptr and sets the python error indicator +// with PyErr_SetString. +TFE_TensorHandle* PySeqToTFE_TensorHandle(TFE_Context* ctx, PyObject* obj, + DataType dtype); } // namespace tensorflow ",0,test 969489871031946f438d0899f3a0815270863296,tensorflow/tensorflow,"[XLA:Python] Add TopK operation to Python API. PiperOrigin-RevId: 296327503 Change-Id: I345150c480b48ba97645376674faa6109f6631a7",xla.cc,"@@ -32,6 +32,7 @@ limitations under the License. #include ""tensorflow/compiler/xla/client/lib/math.h"" #include ""tensorflow/compiler/xla/client/lib/qr.h"" #include ""tensorflow/compiler/xla/client/lib/self_adjoint_eig.h"" +#include ""tensorflow/compiler/xla/client/lib/sorting.h"" #include ""tensorflow/compiler/xla/client/lib/svd.h"" #include ""tensorflow/compiler/xla/client/local_client.h"" #include ""tensorflow/compiler/xla/client/xla_builder.h"" @@ -454,6 +455,7 @@ void BuildOpsSubmodule(py::module* m) { }, py::arg(""builder""), py::arg(""operands""), py::arg(""dimension"") = -1, py::arg(""comparator"") = absl::nullopt); + ops.def(""TopK"", &TopK, py::arg(""input""), py::arg(""k"")); ops.def(""Transpose"", &Transpose); ops.def(""TriangularSolve"", &TriangularSolve); ops.def(""Tuple"", &Tuple); ",0,train 969489871031946f438d0899f3a0815270863296,tensorflow/tensorflow,"[XLA:Python] Add TopK operation to Python API. PiperOrigin-RevId: 296327503 Change-Id: I345150c480b48ba97645376674faa6109f6631a7",xla_client.py,"@@ -1725,6 +1725,7 @@ _OTHER_OPS = [ 'Rev', 'Select', 'SliceInDim', + 'TopK', ] ",0,train 63ef3b47e7b82bc92426814aff48ddcd31d36c82,tensorflow/tensorflow,"Add injection sites for customizing page construction. * For single-page changes, tag an object using `doc_controls.set_custom_page_builder_c lass(obj, cls) * For global changes pass a dict of `{ObjectType:Type[PageInfo]}` to the DocGenerator's `page_builder_classes` argument. * Switch generate2 to use the new customization pathways. PiperOrigin-RevId: 420801142 Change-Id: I57d02ffa4dd439ddd63578b76109ca5794d4d8da",generate2.py,"@@ -37,6 +37,8 @@ import tensorflow as tf from tensorflow_docs.api_generator import doc_controls from tensorflow_docs.api_generator import doc_generator_visitor from tensorflow_docs.api_generator import generate_lib +from tensorflow_docs.api_generator.pretty_docs import base_page +from tensorflow_docs.api_generator.pretty_docs import module_page from tensorflow.python.framework import ops from tensorflow.python.util import tf_export @@ -99,38 +101,51 @@ tf.__doc__ = """""" """""" -def generate_raw_ops_doc(): - """"""Generates docs for `tf.raw_ops`."""""" +class RawOpsPageInfo(module_page.ModulePageInfo): + """"""Generates a custom page for `tf.raw_ops`."""""" + DEFAULT_BUILDER_CLASS = base_page.TemplatePageBuilder - warning = textwrap.dedent(""""""\n - Note: `tf.raw_ops` provides direct/low level access to all TensorFlow ops. - See [the RFC](https://github.com/tensorflow/community/blob/master/rfcs/20181225-tf-raw-ops.md) - for details. Unless you are library writer, you likely do not need to use - these ops directly."""""") + def build(self): + # Skip the ModulePage implementation, which doesn't use a template. + content = base_page.PageInfo.build(self) - table_header = textwrap.dedent("""""" + raw_ops_doc = self.generate_raw_ops_doc() - | Op Name | Has Gradient | - |---------|:------------:|"""""") + return ""\n"".join([content, raw_ops_doc]) - parts = [warning, table_header] + def generate_raw_ops_doc(self): + """"""Generates docs for `tf.raw_ops`."""""" + del self - for op_name in sorted(dir(tf.raw_ops)): - try: - ops._gradient_registry.lookup(op_name) # pylint: disable=protected-access - has_gradient = ""\N{HEAVY CHECK MARK}\N{VARIATION SELECTOR-16}"" - except LookupError: - has_gradient = ""\N{CROSS MARK}"" + warning = textwrap.dedent(""""""\n + Note: `tf.raw_ops` provides direct/low level access to all TensorFlow ops. + See [the RFC](https://github.com/tensorflow/community/blob/master/rfcs/20181225-tf-raw-ops.md) + for details. Unless you are library writer, you likely do not need to use + these ops directly."""""") - if not op_name.startswith(""_""): - path = pathlib.Path(""/"") / FLAGS.site_path / ""tf/raw_ops"" / op_name - path = path.with_suffix("".md"") - link = ('{op_name}').format( - op_name=op_name, path=str(path)) - parts.append(""| {link} | {has_gradient} |"".format( - link=link, has_gradient=has_gradient)) + table_header = textwrap.dedent("""""" - return ""\n"".join(parts) + | Op Name | Has Gradient | + |---------|:------------:|"""""") + + parts = [warning, table_header] + + for op_name in sorted(dir(tf.raw_ops)): + try: + ops._gradient_registry.lookup(op_name) # pylint: disable=protected-access + has_gradient = ""\N{HEAVY CHECK MARK}\N{VARIATION SELECTOR-16}"" + except LookupError: + has_gradient = ""\N{CROSS MARK}"" + + if not op_name.startswith(""_""): + path = pathlib.Path(""/"") / FLAGS.site_path / ""tf/raw_ops"" / op_name + path = path.with_suffix("".md"") + link = ('{op_name}').format( + op_name=op_name, path=str(path)) + parts.append(""| {link} | {has_gradient} |"".format( + link=link, has_gradient=has_gradient)) + + return ""\n"".join(parts) # The doc generator isn't aware of tf_export. @@ -167,7 +182,7 @@ def build_docs(output_dir, code_url_prefix, search_hints): search_hints: Bool. Include meta-data search hints at the top of each file. """""" # The custom page will be used for raw_ops.md not the one generated above. - doc_controls.set_custom_page_content(tf.raw_ops, generate_raw_ops_doc()) + doc_controls.set_custom_page_builder_cls(tf.raw_ops, RawOpsPageInfo) # Hide raw_ops from search. for name, obj in tf_inspect.getmembers(tf.raw_ops): ",0,train b0ce57a4a7be3735c73e33a927117d78792d5cd6,tensorflow/tensorflow,Fix typo in irfft2d.py test,irfft2d.py,"@@ -25,7 +25,7 @@ from tensorflow.lite.testing.zip_test_utils import register_make_test_function @register_make_test_function() -def make_rfft2d_tests(options): +def make_irfft2d_tests(options): """"""Make a set of tests to do irfft2d."""""" test_parameters = [{ ",0,train 9a7288ccf576a25ab65e61246242dd9bb90345bd,tensorflow/tensorflow,"Add dot configuration as part of module configuration. PiperOrigin-RevId: 299879284 Change-Id: I64932896316470cda7556a3618b32ac3556b3dac",hlo_module_config.h,"@@ -183,6 +183,12 @@ class HloModuleConfig { return &fusion_config_; } + const std::vector>& dot_config() const { + return dot_config_; + } + + std::vector>* mutable_dot_config() { return &dot_config_; } + private: // If you add new members, be sure to update compilation_cache_key. @@ -214,6 +220,8 @@ class HloModuleConfig { FusionConfigCollection::kOff; std::vector> fusion_config_; + + std::vector> dot_config_; }; } // namespace xla ",0,train 29fdee8e85e750d04f6e9d378e85443ba5c7a239,tensorflow/tensorflow,"Fix for error_reporter. Change-Id: I58745cc97872af74b1ad5b0af3ad778b39f01555",quantization_utils.cc,"@@ -113,7 +113,8 @@ TfLiteStatus GetQuantizationParams(TensorT* tensor, TensorType activations_type, tensor->quantization->max[0], quantized_range, quantization_params); } else { - error_reporter->Report( + TF_LITE_REPORT_ERROR( + error_reporter, ""Unsupported activation type for quantize-activation: %s"", activations_type); return kTfLiteError; ",0,train 29fdee8e85e750d04f6e9d378e85443ba5c7a239,tensorflow/tensorflow,"Fix for error_reporter. Change-Id: I58745cc97872af74b1ad5b0af3ad778b39f01555",quantize_model.cc,"@@ -370,9 +370,9 @@ TfLiteStatus ApplyConstraints(ModelT* model, std::unique_ptr additional_tensor; const string requant_tensor_name = input_tensor->name + ""_requantized""; utils::MakeTensorWithQuantParam( - requant_tensor_name, input_tensor->shape, - input_tensor->shape_signature, activations_type, - output_scale, output_zp, &additional_tensor); + requant_tensor_name, input_tensor->shape, + input_tensor->shape_signature, activations_type, output_scale, + output_zp, &additional_tensor); const int32_t additional_tensor_idx = subgraph->tensors.size(); subgraph->tensors.push_back(std::move(additional_tensor)); @@ -869,13 +869,15 @@ TfLiteStatus QuantizeWeightsInputOutput( if (activations_type == TensorType_INT16 && !property.quantizable && !allow_float) { - error_reporter->Report( - ""Quantization to 16x8-bit not yet supported for op: %s"", + TF_LITE_REPORT_ERROR( + error_reporter, + ""Quantization to 16x8-bit not yet supported for op: %"", EnumNameBuiltinOperator(op_code)); return kTfLiteError; } else if (!property.quantizable && !allow_float) { - error_reporter->Report(""Quantization not yet supported for op: %s"", - EnumNameBuiltinOperator(op_code)); + TF_LITE_REPORT_ERROR(error_reporter, + ""Quantization not yet supported for op: %"", + EnumNameBuiltinOperator(op_code)); return kTfLiteError; } ",0,train 012a1167d2b3db1a79a823dee959e58c162b3843,tensorflow/tensorflow,"Forwardprop: Ensure that inner nested accumulators don't see outer accumulators' jvps Just for consistency; apparently this was a difference between function-wrapped and non-function-wrapped accumulation. PiperOrigin-RevId: 260979468",tape.h,"@@ -262,6 +262,12 @@ class ForwardAccumulator { const std::function& backward_function_getter, const std::function& backward_function_deleter); + // Returns true if `Accumulate` is active somewhere above on the stack. This + // is useful for ordering ForwardAccumulators, where more deeply nested + // accumulators should not see computations from less deeply nested + // accumulators. + bool BusyAccumulating() const { return this->accumulating_; } + // Fetches the current Jacobian-vector product associated with `tensor_id`, or // a nullptr if none is available. // ",0,train 012a1167d2b3db1a79a823dee959e58c162b3843,tensorflow/tensorflow,"Forwardprop: Ensure that inner nested accumulators don't see outer accumulators' jvps Just for consistency; apparently this was a difference between function-wrapped and non-function-wrapped accumulation. PiperOrigin-RevId: 260979468",forwardprop_test.py,"@@ -282,9 +282,12 @@ class ForwardpropTest(test.TestCase, parameterized.TestCase): f = _forwardgrad(f) self.assertAllClose(expected, f(primal)) - def testFunctionGradPureForward(self): + @parameterized.named_parameters( + [(""Function"", def_function.function), + (""NoFunction"", lambda f: f)]) + def testGradPureForward(self, decorator): - @def_function.function + @decorator def f(x): return x ** 3.5 ",0,train 012a1167d2b3db1a79a823dee959e58c162b3843,tensorflow/tensorflow,"Forwardprop: Ensure that inner nested accumulators don't see outer accumulators' jvps Just for consistency; apparently this was a difference between function-wrapped and non-function-wrapped accumulation. PiperOrigin-RevId: 260979468",pywrap_tfe_src.cc,"@@ -1905,6 +1905,12 @@ void TapeSetRecordOperation( if (MaybeRaiseExceptionFromStatus(status, nullptr)) { return; } + if (accumulator->accumulator->BusyAccumulating()) { + // Ensure inner accumulators don't see outer accumulators' jvps. This + // mostly happens on its own, with some potentially surprising + // exceptions, so the blanket policy is for consistency. + break; + } } } } ",0,train f2134cbd2ec4dd98f9f20ac41e4f46cdd0246af2,tensorflow/tensorflow,use get_item_tensor_string for string with rank 0,slices_test.py,"@@ -53,6 +53,12 @@ class SlicesTest(test.TestCase): with self.test_session() as sess: self.assertEqual(sess.run(t), b""b"") + initial_list_str = constant_op.constant([""abcd"", ""bcde""]) + t = slices.get_item(initial_list_str, 1, slices.GetItemOpts(element_dtype=initial_str.dtype)) + + with self.test_session() as sess: + self.assertEqual(sess.run(t), b""bcde"") + if __name__ == '__main__': test.main() ",0,train 7cd52d03c423c27e5daf4e981ec44a5c84362d2c,tensorflow/tensorflow,"[XLA] Initialize fields of RematerializationSizes by default PiperOrigin-RevId: 355954409 Change-Id: I0da0d3ce320c53321778ad66ab9703307bc231c7",hlo_rematerialization.h,"@@ -45,8 +45,8 @@ class HloRematerialization : public HloModulePass { // Helper struct that communicates the before / after sizes for the // rematerialization process. struct RematerializationSizes { - int64 before_bytes; - int64 after_bytes; + int64 before_bytes = -1; + int64 after_bytes = -1; }; // Mode in which the rematerialization algorithm should be run. ",0,train 4f8410553665507aa09763284e426e81a6084023,tensorflow/tensorflow,test: check statefulness with number of executions,script_ops_test.py,"@@ -40,8 +40,11 @@ class NumpyFunctionTest(test.TestCase): self.assertAllEqual(actual_result, expect_result) def test_stateless_flag(self): + call_count = 0 def plus(a, b): + global call_count + call_count += 1 return a + b @def_function.function @@ -53,21 +56,30 @@ class NumpyFunctionTest(test.TestCase): return numpy_function(plus, [a, b], dtypes.int32, stateful=False) @def_function.function(autograph=False) - def tensor_double_plus(a, b, c, d): - sum_stateful = tensor_plus_stateful(a, b) - assert sum_stateful.op.op_def.is_stateful + def tensor_double_plus_stateless(a, b): + sum1 = tensor_plus_stateless(a, b) + sum2 = tensor_plus_stateless(a, b) + return sum1 + sum2 - sum_stateless = tensor_plus_stateless(c, d) - assert not sum_stateless.op.op_def.is_stateful - - return sum_stateful, sum_stateless - - tensor_double_plus( + # different argument + tensor_double_plus_stateless( constant_op.constant(1, dtype=dtypes.int32), constant_op.constant(2, dtype=dtypes.int32), + ) + assert call_count == 1 # +1 as only the first one was executed + + @def_function.function(autograph=False) + def tensor_double_plus_stateful(a, b): + sum1 = tensor_plus_stateful(a, b) + sum2 = tensor_plus_stateful(a, b) + return sum1 + sum2 + + tensor_double_plus_stateful( constant_op.constant(3, dtype=dtypes.int32), constant_op.constant(4, dtype=dtypes.int32), - ) + ) + assert call_count == 3 # +2 as it is stateful, both were executed + if __name__ == ""__main__"": ",0,test d70a2cf2ab0495926dadd9d190def0d4a8522878,tensorflow/tensorflow,"Print a cycle if detected by DFS. Example output Directed cycle: fusion.48 get-tuple-element.32 fusion.62 get-tuple-element.67 fusion.44 get-tuple-element.65 fusion.48 PiperOrigin-RevId: 266934452",hlo_instruction.cc,"@@ -2209,11 +2209,52 @@ string PrintName(const string& name, bool print_ids) { namespace { +using DFSStack = absl::InlinedVector, 16>; + string PrintNameInternal(const string& name, const HloPrintOptions& options) { return StrCat(options.print_percent() ? ""%"" : """", PrintName(name, options.print_ids())); } +void PrintCycle(const HloInstruction* child, DFSStack* dfs_stack) { + // This set contains HloInstructions from the top of `DFSStack` that might + // belong to the cycle, i.e. if DFSStack :=[back,...,child,...,top], then + // `subgraph` := {child,...,top}. + absl::flat_hash_set subgraph; + while (!dfs_stack->empty() && dfs_stack->back().second != child) { + subgraph.insert(dfs_stack->back().second); + dfs_stack->pop_back(); + } + // Start dfs at `child` and find a cycle with all nodes in `subgraph`. + absl::flat_hash_set visited; + absl::InlinedVector dfs; + dfs.push_back(child); + while (!dfs.empty()) { + bool found_next_instr = false; + for (const auto& user : dfs.back()->users()) { + if (user == child) { + dfs.push_back(child); + LOG(INFO) << ""\n\nDirected cycle:\n "" + << absl::StrJoin( + dfs, ""\n "", + [](std::string* out, const HloInstruction* instr) { + out->append(instr->name()); + }); + return; + } + if (!subgraph.contains(user) || visited.contains(user)) { + continue; + } + visited.insert(user); + dfs.push_back(user); + found_next_instr = true; + } + if (!found_next_instr) { + dfs.pop_back(); + } + } +} + } // namespace string HloInstruction::ToString(const HloPrintOptions& options) const { @@ -2847,8 +2888,6 @@ Status HloInstruction::Visit(DfsHloVisitorBase* visitor) { template Status HloInstruction::Visit(DfsHloVisitor* visitor); template Status HloInstruction::Visit(ConstDfsHloVisitor* visitor); -using DFSStack = absl::InlinedVector, 16>; - // Push ""child"" onto the dfs_stack if not already visited. Returns false if a // cycle was detected, and true otherwise. template @@ -2926,6 +2965,7 @@ static Status PostOrderDFS(HloInstruction* root, Visitor* visitor, const size_t old_dfs_stack_size = dfs_stack.size(); for (HloInstruction* child : current_node->operands()) { if (!TF_PREDICT_TRUE(PushDFSChild(visitor, &dfs_stack, child))) { + PrintCycle(child, &dfs_stack); return FailedPrecondition( ""A cycle is detected while visiting instruction %s"", current_node->ToString()); @@ -2935,6 +2975,7 @@ static Status PostOrderDFS(HloInstruction* root, Visitor* visitor, if (!ignore_control_predecessors) { for (HloInstruction* child : current_node->control_predecessors()) { if (!TF_PREDICT_TRUE(PushDFSChild(visitor, &dfs_stack, child))) { + PrintCycle(child, &dfs_stack); return FailedPrecondition( ""A cycle is detected while visiting instruction %s"", current_node->ToString()); ",0,train 6f737e0dd60fc02138c6bf0dc34c6a7e64297c73,tensorflow/tensorflow,"Doc improvements to ReductionToOneDevice. PiperOrigin-RevId: 264908765",cross_device_ops.py,"@@ -404,15 +404,20 @@ class CrossDeviceOps(object): class ReductionToOneDevice(CrossDeviceOps): """"""Always do reduction to one device first and then do broadcasting. - Batch reduction is done by reduction on each element one by one. + Batch reduction is done by reduction on each element one by one. + + ``` + mirrored_strategy = tf.distribute.MirroredStrategy( + cross_device_ops=tf.distribute.ReductionToOneDevice()) + ``` """""" def __init__(self, reduce_to_device=None, accumulation_fn=None): - """"""Initializes the instance of ReductionToOneDevice. + """"""Initializes with a device to reduce to and a way to accumulate. Args: reduce_to_device: the intermediate device to reduce to. If None, reduce - to the first device in `destinations` of the reduce() method. + to the first device in `destinations` of the `reduce()` method. accumulation_fn: a function that does accumulation. If None, then `tf.math.add_n` is used. """""" ",0,train 650172a574504223ec2bdb328ed7c985389313d7,tensorflow/tensorflow,"Update test case for complex support of squared difference Signed-off-by: Yong Tang ",math_ops_test.py,"@@ -217,7 +217,7 @@ class SquaredDifferenceTest(test_util.TensorFlowTestCase): for dtype in [np.complex64, np.complex128]: x = np.array([[1+3j, 2+2j, 3+1j], [4-1j, 5-2j, 6-3j]], dtype=dtype) y = np.array([-3+1j, -2+2j, -1+3j], dtype=dtype) - z = (x - y) * (x - y) + z = np.conj(x - y) * (x - y) with test_util.device(use_gpu=False): z_tf = self.evaluate(math_ops.squared_difference(x, y)) self.assertAllClose(z, z_tf) ",0,test e479a1690683a64cdabac0ca46ce6265c0b0dbec,tensorflow/tensorflow,"Refactor kernel thunk's launch dimension setting - part 5/8. Move SetThunkLaunchDimensions() to right after KernelThunk construction. Launch dimension will be passed to KernelThunk's constructor as a parameter. PiperOrigin-RevId: 386342276 Change-Id: Ie486ddd8f35b1e1377007e29b3b6bf026067684b",ir_emitter_unnested.cc,"@@ -1696,41 +1696,10 @@ Status IrEmitterUnnested::EmitLoopFusion(mlir::Operation* op) { MlirEmitterContext context; context.SetOperation(fusion); - std::vector ir_arrays; - Thunk* kernel_thunk; - { - TF_ASSIGN_OR_RETURN(std::unique_ptr kernel_thunk_ptr, - BuildKernelThunk(fusion, GetThunkInfo(op), &ir_arrays)); - kernel_thunk = kernel_thunk_ptr.get(); - thunk_sequence_.emplace_back(std::move(kernel_thunk_ptr)); - } - - auto operand_arrays = - absl::MakeSpan(ir_arrays).subspan(0, context.operand_shapes.size()); - auto output_element_arrays = absl::MakeSpan(ir_arrays).subspan( - context.operand_shapes.size(), context.output_shapes.size()); - TF_ASSIGN_OR_RETURN(const HloComputation* fused_computation, GetOrCreateSubComputationFromRegion(&fusion.region(), /*is_fusion=*/true)); - GpuElementalIrEmitter elemental_emitter(hlo_module_config_, module_, &b_, - GetNestedComputer()); - FusedIrEmitter fused_emitter(&elemental_emitter); - - for (int i = 0; i < context.operand_shapes.size(); i++) { - auto* builder = &b_; - auto ir_array = operand_arrays[i]; - fused_emitter.BindGenerator( - fused_computation->parameter_instruction(i), - [builder, ir_array](llvm_ir::IrArray::Index index) { - return ir_array.EmitReadArrayElement(index, builder); - }); - } - TF_ASSIGN_OR_RETURN( - auto element_generator, - fused_emitter.GetGenerator(fused_computation->root_instruction())); - int unroll_factor; if (!MayPreventVectorization(fusion)) { unroll_factor = ComputeMaxUnrollFactor(fusion, hlo_module_config_); @@ -1782,8 +1751,40 @@ Status IrEmitterUnnested::EmitLoopFusion(mlir::Operation* op) { CalculateLaunchDimensions( element_shape, ir_emitter_context_->gpu_device_info(), launch_config)); + + std::vector ir_arrays; + Thunk* kernel_thunk; + { + TF_ASSIGN_OR_RETURN(std::unique_ptr kernel_thunk_ptr, + BuildKernelThunk(fusion, GetThunkInfo(op), &ir_arrays)); + kernel_thunk = kernel_thunk_ptr.get(); + thunk_sequence_.emplace_back(std::move(kernel_thunk_ptr)); + } SetThunkLaunchDimensions(launch_dimensions, kernel_thunk, ir_emitter_context_->llvm_module()); + + auto operand_arrays = + absl::MakeSpan(ir_arrays).subspan(0, context.operand_shapes.size()); + auto output_element_arrays = absl::MakeSpan(ir_arrays).subspan( + context.operand_shapes.size(), context.output_shapes.size()); + + GpuElementalIrEmitter elemental_emitter(hlo_module_config_, module_, &b_, + GetNestedComputer()); + FusedIrEmitter fused_emitter(&elemental_emitter); + + for (int i = 0; i < context.operand_shapes.size(); i++) { + auto* builder = &b_; + auto ir_array = operand_arrays[i]; + fused_emitter.BindGenerator( + fused_computation->parameter_instruction(i), + [builder, ir_array](llvm_ir::IrArray::Index index) { + return ir_array.EmitReadArrayElement(index, builder); + }); + } + TF_ASSIGN_OR_RETURN( + auto element_generator, + fused_emitter.GetGenerator(fused_computation->root_instruction())); + llvm::Type* index_type = GetIndexTypeForKernel(fusion, launch_dimensions.launch_bound(), &b_); ",0,train 723c4048790d6f0636f6c1df5f4fb793ef7a4ae6,tensorflow/tensorflow,"FileSystem directory creation fixes: - Ensure that CreateDir returns error::ALREADY_EXISTS if the dirname exists. - Ensure that RecursivelyCreateDirectory ignores error::ALREADY_EXISTS when creating directories and subdirectories. Fixes #6974 Change: 145144720",file_system.cc,"@@ -229,7 +229,10 @@ Status FileSystem::RecursivelyCreateDir(const string& dirname) { string built_path = remaining_dir.ToString(); for (const StringPiece sub_dir : sub_dirs) { built_path = io::JoinPath(built_path, sub_dir); - TF_RETURN_IF_ERROR(CreateDir(io::CreateURI(scheme, host, built_path))); + Status status = CreateDir(io::CreateURI(scheme, host, built_path)); + if (!status.ok() && status.code() != tensorflow::error::ALREADY_EXISTS) { + return status; + } } return Status::OK(); } ",0,train 723c4048790d6f0636f6c1df5f4fb793ef7a4ae6,tensorflow/tensorflow,"FileSystem directory creation fixes: - Ensure that CreateDir returns error::ALREADY_EXISTS if the dirname exists. - Ensure that RecursivelyCreateDirectory ignores error::ALREADY_EXISTS when creating directories and subdirectories. Fixes #6974 Change: 145144720",file_system.h,"@@ -87,7 +87,7 @@ class FileSystem { // '\\' c: matches character c // lo '-' hi: matches character c for lo <= c <= hi // - // Typical return codes + // Typical return codes: // * OK - no errors // * UNIMPLEMENTED - Some underlying functions (like GetChildren) are not // implemented @@ -100,10 +100,16 @@ class FileSystem { virtual Status DeleteFile(const string& fname) = 0; + // \brief Creates the specified directory. + // Typical return codes: + // * OK - successfully created the directory. + // * ALREADY_EXISTS - directory with name dirname already exists. + // * PERMISSION_DENIED - dirname is not writable. virtual Status CreateDir(const string& dirname) = 0; // \brief Creates the specified directory and all the necessary - // subdirectories. Typical return codes. + // subdirectories. + // Typical return codes: // * OK - successfully created the directory and sub directories, even if // they were already created. // * PERMISSION_DENIED - dirname or some subdirectory is not writable. @@ -116,7 +122,7 @@ class FileSystem { // files and directories that weren't deleted (unspecified if the return // status is not OK). // REQUIRES: undeleted_files, undeleted_dirs to be not null. - // Typical return codes + // Typical return codes: // * OK - dirname exists and we were able to delete everything underneath. // * NOT_FOUND - dirname doesn't exist // * PERMISSION_DENIED - dirname or some descendant is not writable ",0,train 723c4048790d6f0636f6c1df5f4fb793ef7a4ae6,tensorflow/tensorflow,"FileSystem directory creation fixes: - Ensure that CreateDir returns error::ALREADY_EXISTS if the dirname exists. - Ensure that RecursivelyCreateDirectory ignores error::ALREADY_EXISTS when creating directories and subdirectories. Fixes #6974 Change: 145144720",file_system_test.cc,"@@ -44,9 +44,10 @@ class InterPlanetaryFileSystem : public NullFileSystem { Status CreateDir(const string& dirname) override { string parsed_path; ParsePath(dirname, &parsed_path); - // If the directory already exists then ignore. + // If the directory already exists, throw an error. if (celestial_bodies_.find(parsed_path) != celestial_bodies_.end()) { - return Status::OK(); + return Status(tensorflow::error::ALREADY_EXISTS, + ""dirname already exists.""); } std::vector split_path = str_util::Split(parsed_path, '/'); // If the path is too long then we don't support it. @@ -248,4 +249,14 @@ TEST(TestFileSystem, MatchMultipleWildcards) { ""match-00/abc/00,match-00/abc/01,match-01/abc/00,match-01/abc/04""); } +TEST(TestFileSystem, RecursivelyCreateAlreadyExistingDir) { + InterPlanetaryFileSystem ipfs; + const string dirname = io::JoinPath(kPrefix, ""match-00/abc/00""); + TF_EXPECT_OK(ipfs.RecursivelyCreateDir(dirname)); + // Ensure that CreateDir throws an error, to sanity check that this test + // actually tests the behavior of RecursivelyCreateDir. + EXPECT_EQ(ipfs.CreateDir(dirname).code(), tensorflow::error::ALREADY_EXISTS); + TF_EXPECT_OK(ipfs.RecursivelyCreateDir(dirname)); +} + } // namespace tensorflow ",0,train c41f4652b45bf70f20686e612b41574b4b8139d7,tensorflow/tensorflow,"Add an option to enable MLIR bridge for tpu_py_test rule If enable_mlir_bridge is True, a new test will be generated that runs with the MLIR bridge enabled. This option is off by default. PiperOrigin-RevId: 317173675 Change-Id: I332e1ae24cf82fceea20fd0aff2cec7c9b236a24",test_util.py,"@@ -1933,6 +1933,9 @@ class TensorFlowTestCase(googletest.TestCase): # disable it here. pywrap_tf_session.TF_SetXlaConstantFoldingDisabled(True) + if is_mlir_bridge_enabled(): + context.context().enable_mlir_bridge = True + self._threads = [] self._tempdir = None self._cached_session = None ",0,train d916f20e1f1897696a19158ac7f5bd8d83e1b857,tensorflow/tensorflow,"Merging of GpuModel moved to api neutral common/gpu_model from opencl inference context. PiperOrigin-RevId: 419787683 Change-Id: I860fd1866d2a046559fb44d350afb65713a18b2b",inference_context.cc,"@@ -55,15 +55,6 @@ namespace gpu { namespace cl { namespace { -bool IsReady(const absl::flat_hash_set& ready_tensors, - const GpuNode& node) { - for (const ValueId in_id : node.inputs) { - if (ready_tensors.find(in_id) == ready_tensors.end()) { - return false; - } - } - return true; -} std::vector> GetCLNodeTensors( const CLNode& node) { @@ -80,15 +71,6 @@ std::vector> GetCLNodeTensors( return result; } -absl::Status MergeGpuNodes(GpuNode* src, GpuNode* dst) { - for (int j = 1; j < src->inputs.size(); ++j) { - dst->inputs.push_back(src->inputs[j]); - } - dst->outputs[0] = src->outputs[0]; - dst->name += "" linked : "" + src->name; - return dst->gpu_operation->AddOperation(src->gpu_operation.get()); -} - void AddUsage(ValueId id, int task_index, std::map* usage_records) { auto it = usage_records->find(id); @@ -401,53 +383,6 @@ absl::Status ConvertOperations(const GpuInfo& gpu_info, return absl::OkStatus(); } -absl::Status Merge(GpuModel* gpu_model) { - absl::flat_hash_set ready_tensors; - for (const auto& input : gpu_model->input_ids_and_refs) { - ready_tensors.insert(input.first); - } - auto& nodes = gpu_model->nodes; - for (int i = 0; i < nodes.size(); ++i) { - auto& node = nodes[i]; - for (const auto& out_id : node.outputs) { - ready_tensors.insert(out_id); - } - if (node.outputs.size() != 1) { - continue; - } - std::vector next_nodes; - int link_index = 0; - for (int j = i + 1; j < nodes.size(); ++j) { - for (int k = 0; k < nodes[j].inputs.size(); ++k) { - if (nodes[j].inputs[k] == node.outputs[0]) { - next_nodes.push_back(j); - link_index = k; - } - } - } - if (next_nodes.size() != 1 || link_index != 0) { - continue; - } - auto& linkable_node = nodes[next_nodes[0]]; - if (!linkable_node.gpu_operation->IsLinkable() || - linkable_node.outputs.size() != 1 || - !IsReady(ready_tensors, linkable_node)) { - continue; - } - const auto& original_dst_def = - node.gpu_operation->GetDefinition().dst_tensors[0]; - const auto& link_dst_def = - linkable_node.gpu_operation->GetDefinition().dst_tensors[0]; - if (original_dst_def != link_dst_def) { - continue; - } - RETURN_IF_ERROR(MergeGpuNodes(&linkable_node, &node)); - nodes.erase(nodes.begin() + next_nodes[0]); - i -= 1; - } - return absl::OkStatus(); -} - void CopyExternals(const GraphFloat32& graph, GpuModel* gpu_model) { const auto inputs = graph.inputs(); for (const auto& value : inputs) { @@ -521,7 +456,7 @@ absl::Status GraphToGpuModel(const CreateGpuModelInfo& create_info, CopyExternals(graph, gpu_model); RETURN_IF_ERROR(ConvertOperations(gpu_info, graph, create_info, &tensor_reserver, gpu_model)); - RETURN_IF_ERROR(Merge(gpu_model)); + RETURN_IF_ERROR(MergeNodes(gpu_model)); gpu_model->tensors = std::move(tensor_reserver.reservations_); for (auto& node : gpu_model->nodes) { ",0,test d916f20e1f1897696a19158ac7f5bd8d83e1b857,tensorflow/tensorflow,"Merging of GpuModel moved to api neutral common/gpu_model from opencl inference context. PiperOrigin-RevId: 419787683 Change-Id: I860fd1866d2a046559fb44d350afb65713a18b2b",gpu_model.cc,"@@ -0,0 +1,93 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""tensorflow/lite/delegates/gpu/common/gpu_model.h"" + +#include ""absl/container/flat_hash_set.h"" + +namespace tflite { +namespace gpu { + +namespace { +bool IsReady(const absl::flat_hash_set& ready_tensors, + const GpuNode& node) { + for (const ValueId in_id : node.inputs) { + if (ready_tensors.find(in_id) == ready_tensors.end()) { + return false; + } + } + return true; +} + +absl::Status MergeGpuNodes(GpuNode* src, GpuNode* dst) { + for (int j = 1; j < src->inputs.size(); ++j) { + dst->inputs.push_back(src->inputs[j]); + } + dst->outputs[0] = src->outputs[0]; + dst->name += "" linked : "" + src->name; + return dst->gpu_operation->AddOperation(src->gpu_operation.get()); +} + +} // namespace + +absl::Status MergeNodes(GpuModel* gpu_model) { + absl::flat_hash_set ready_tensors; + for (const auto& input : gpu_model->input_ids_and_refs) { + ready_tensors.insert(input.first); + } + auto& nodes = gpu_model->nodes; + for (int i = 0; i < nodes.size(); ++i) { + auto& node = nodes[i]; + for (const auto& out_id : node.outputs) { + ready_tensors.insert(out_id); + } + if (node.outputs.size() != 1) { + continue; + } + std::vector next_nodes; + int link_index = 0; + for (int j = i + 1; j < nodes.size(); ++j) { + for (int k = 0; k < nodes[j].inputs.size(); ++k) { + if (nodes[j].inputs[k] == node.outputs[0]) { + next_nodes.push_back(j); + link_index = k; + } + } + } + if (next_nodes.size() != 1 || link_index != 0) { + continue; + } + auto& linkable_node = nodes[next_nodes[0]]; + if (!linkable_node.gpu_operation->IsLinkable() || + linkable_node.outputs.size() != 1 || + !IsReady(ready_tensors, linkable_node)) { + continue; + } + const auto& original_dst_def = + node.gpu_operation->GetDefinition().dst_tensors[0]; + const auto& link_dst_def = + linkable_node.gpu_operation->GetDefinition().dst_tensors[0]; + if (original_dst_def != link_dst_def) { + continue; + } + RETURN_IF_ERROR(MergeGpuNodes(&linkable_node, &node)); + nodes.erase(nodes.begin() + next_nodes[0]); + i -= 1; + } + return absl::OkStatus(); +} + +} // namespace gpu +} // namespace tflite ",0,test d916f20e1f1897696a19158ac7f5bd8d83e1b857,tensorflow/tensorflow,"Merging of GpuModel moved to api neutral common/gpu_model from opencl inference context. PiperOrigin-RevId: 419787683 Change-Id: I860fd1866d2a046559fb44d350afb65713a18b2b",gpu_model.h,"@@ -103,6 +103,8 @@ struct GpuModel { absl::flat_hash_map const_tensors; }; +absl::Status MergeNodes(GpuModel* gpu_model); + } // namespace gpu } // namespace tflite ",0,test 90c5838c5d8fc672b020e4baa3d5138f3940cd03,tensorflow/tensorflow,"Add the tensor shape to the Exception string when the image doesnt match. Change: 150251692",image_ops_impl.py,"@@ -128,9 +128,11 @@ def _Check3DImage(image, require_static=True): try: image_shape = image.get_shape().with_rank(3) except ValueError: - raise ValueError(""'image' must be three-dimensional."") + raise ValueError(""'image' (shape %s) must be three-dimensional."" % + image.shape) if require_static and not image_shape.is_fully_defined(): - raise ValueError(""'image' must be fully defined."") + raise ValueError(""'image' (shape %s) must be fully defined."" % + image_shape) if any(x == 0 for x in image_shape): raise ValueError(""all dims of 'image.shape' must be > 0: %s"" % image_shape) ",0,train 90c5838c5d8fc672b020e4baa3d5138f3940cd03,tensorflow/tensorflow,"Add the tensor shape to the Exception string when the image doesnt match. Change: 150251692",image_ops_test.py,"@@ -314,29 +314,29 @@ class AdjustHueTest(test_util.TensorFlowTestCase): [1000, 1, 3], ] test_styles = [ - 'all_random', - 'rg_same', - 'rb_same', - 'gb_same', - 'rgb_same', + ""all_random"", + ""rg_same"", + ""rb_same"", + ""gb_same"", + ""rgb_same"", ] for x_shape in x_shapes: for test_style in test_styles: x_np = np.random.rand(*x_shape) * 255. delta_h = np.random.rand() * 2.0 - 1.0 - if test_style == 'all_random': + if test_style == ""all_random"": pass - elif test_style == 'rg_same': + elif test_style == ""rg_same"": x_np[..., 1] = x_np[..., 0] - elif test_style == 'rb_same': + elif test_style == ""rb_same"": x_np[..., 2] = x_np[..., 0] - elif test_style == 'gb_same': + elif test_style == ""gb_same"": x_np[..., 2] = x_np[..., 1] - elif test_style == 'rgb_same': + elif test_style == ""rgb_same"": x_np[..., 1] = x_np[..., 0] x_np[..., 2] = x_np[..., 0] else: - raise AssertionError('Invalid test style: %s' % (test_style)) + raise AssertionError(""Invalid test style: %s"" % (test_style)) y_np = self._adjustHueNp(x_np, delta_h) y_tf = self._adjustHueTf(x_np, delta_h) self.assertAllClose(y_tf, y_np, rtol=2e-5, atol=1e-5) @@ -350,11 +350,11 @@ class AdjustHueTest(test_util.TensorFlowTestCase): x_np = np.random.rand(2, 3) * 255. delta_h = np.random.rand() * 2.0 - 1.0 fused = False - with self.assertRaisesRegexp(ValueError, 'Shape must be at least rank 3'): + with self.assertRaisesRegexp(ValueError, ""Shape must be at least rank 3""): self._adjustHueTf(x_np, delta_h) x_np = np.random.rand(4, 2, 4) * 255. delta_h = np.random.rand() * 2.0 - 1.0 - with self.assertRaisesOpError('input must have 3 channels'): + with self.assertRaisesOpError(""input must have 3 channels""): self._adjustHueTf(x_np, delta_h) @@ -368,7 +368,7 @@ class AdjustHueBenchmark(test.Benchmark): if cpu_count is not None: config.inter_op_parallelism_threads = 1 config.intra_op_parallelism_threads = cpu_count - with session.Session('', graph=ops.Graph(), config=config) as sess: + with session.Session("""", graph=ops.Graph(), config=config) as sess: with ops.device(device): inputs = variables.Variable( random_ops.random_uniform( @@ -385,19 +385,19 @@ class AdjustHueBenchmark(test.Benchmark): sess.run(run_op) end = time.time() step_time = (end - start) / benchmark_rounds - tag = '%s' % (cpu_count) if cpu_count is not None else '_all' - print('benchmarkAdjustHue_299_299_3_cpu%s step_time: %.2f us' % + tag = ""%s"" % (cpu_count) if cpu_count is not None else ""_all"" + print(""benchmarkAdjustHue_299_299_3_cpu%s step_time: %.2f us"" % (tag, step_time * 1e6)) self.report_benchmark( - name='benchmarkAdjustHue_299_299_3_cpu%s' % (tag), + name=""benchmarkAdjustHue_299_299_3_cpu%s"" % (tag), iters=benchmark_rounds, wall_time=step_time) def benchmarkAdjustHueCpu1(self): - self._benchmarkAdjustHue('/cpu:0', 1) + self._benchmarkAdjustHue(""/cpu:0"", 1) def benchmarkAdjustHueCpuAll(self): - self._benchmarkAdjustHue('/cpu:0', None) + self._benchmarkAdjustHue(""/cpu:0"", None) def benchmarkAdjustHueGpu(self): self._benchmarkAdjustHue(test.gpu_device_name(), None) @@ -413,7 +413,7 @@ class AdjustSaturationBenchmark(test.Benchmark): if cpu_count is not None: config.inter_op_parallelism_threads = 1 config.intra_op_parallelism_threads = cpu_count - with session.Session('', graph=ops.Graph(), config=config) as sess: + with session.Session("""", graph=ops.Graph(), config=config) as sess: with ops.device(device): inputs = variables.Variable( random_ops.random_uniform( @@ -431,19 +431,19 @@ class AdjustSaturationBenchmark(test.Benchmark): sess.run(run_op) end = time.time() step_time = (end - start) / benchmark_rounds - tag = '%s' % (cpu_count) if cpu_count is not None else '_all' - print('benchmarkAdjustSaturation_599_599_3_cpu%s step_time: %.2f us' % + tag = ""%s"" % (cpu_count) if cpu_count is not None else ""_all"" + print(""benchmarkAdjustSaturation_599_599_3_cpu%s step_time: %.2f us"" % (tag, step_time * 1e6)) self.report_benchmark( - name='benchmarkAdjustSaturation_599_599_3_cpu%s' % (tag), + name=""benchmarkAdjustSaturation_599_599_3_cpu%s"" % (tag), iters=benchmark_rounds, wall_time=step_time) def benchmarkAdjustSaturationCpu1(self): - self._benchmarkAdjustSaturation('/cpu:0', 1) + self._benchmarkAdjustSaturation(""/cpu:0"", 1) def benchmarkAdjustSaturationCpuAll(self): - self._benchmarkAdjustSaturation('/cpu:0', None) + self._benchmarkAdjustSaturation(""/cpu:0"", None) def benchmarkAdjustSaturationGpu(self): self._benchmarkAdjustSaturation(test.gpu_device_name(), None) @@ -457,7 +457,7 @@ class ResizeBilinearBenchmark(test.Benchmark): img = variables.Variable( random_ops.random_normal( [batch_size, image_size[0], image_size[1], num_channels]), - name='img') + name=""img"") deps = [] for _ in xrange(num_ops): @@ -472,9 +472,9 @@ class ResizeBilinearBenchmark(test.Benchmark): results = self.run_op_benchmark( sess, benchmark_op, - name=('resize_bilinear_%s_%s_%s' % + name=(""resize_bilinear_%s_%s_%s"" % (image_size[0], image_size[1], num_channels))) - print('%s : %.2f ms/img' % (results['name'], 1000 * results['wall_time'] + print(""%s : %.2f ms/img"" % (results[""name""], 1000 * results[""wall_time""] / (batch_size * num_ops))) def benchmarkSimilar3Channel(self): @@ -504,7 +504,7 @@ class ResizeBicubicBenchmark(test.Benchmark): img = variables.Variable( random_ops.random_normal( [batch_size, image_size[0], image_size[1], num_channels]), - name='img') + name=""img"") deps = [] for _ in xrange(num_ops): @@ -520,9 +520,9 @@ class ResizeBicubicBenchmark(test.Benchmark): sess, benchmark_op, min_iters=20, - name=('resize_bicubic_%s_%s_%s' % (image_size[0], image_size[1], + name=(""resize_bicubic_%s_%s_%s"" % (image_size[0], image_size[1], num_channels))) - print('%s : %.2f ms/img' % (results['name'], 1000 * results['wall_time'] + print(""%s : %.2f ms/img"" % (results[""name""], 1000 * results[""wall_time""] / (batch_size * num_ops))) def benchmarkSimilar3Channel(self): @@ -561,7 +561,7 @@ class ResizeAreaBenchmark(test.Benchmark): img = variables.Variable( random_ops.random_normal([batch_size, image_size[0], image_size[1], num_channels]), - name='img') + name=""img"") deps = [] for _ in xrange(num_ops): @@ -574,11 +574,11 @@ class ResizeAreaBenchmark(test.Benchmark): sess.run(variables.global_variables_initializer()) results = self.run_op_benchmark( sess, benchmark_op, - name=('resize_area_%s_%s_%s' % + name=(""resize_area_%s_%s_%s"" % (image_size[0], image_size[1], num_channels))) - print('%s : %.2f ms/img' % ( - results['name'], - 1000*results['wall_time'] / (batch_size * num_ops))) + print(""%s : %.2f ms/img"" % ( + results[""name""], + 1000*results[""wall_time""] / (batch_size * num_ops))) def benchmarkSimilar3Channel(self): self._benchmarkResize((183, 229), 3) @@ -632,7 +632,7 @@ class AdjustSaturationTest(test_util.TensorFlowTestCase): self.assertAllEqual(y_tf, y_np) def _adjust_saturation(self, image, saturation_factor): - image = ops.convert_to_tensor(image, name='image') + image = ops.convert_to_tensor(image, name=""image"") orig_dtype = image.dtype flt_image = image_ops.convert_image_dtype(image, dtypes.float32) saturation_adjusted_image = gen_image_ops.adjust_saturation( @@ -697,30 +697,30 @@ class AdjustSaturationTest(test_util.TensorFlowTestCase): [1000, 1, 3], ] test_styles = [ - 'all_random', - 'rg_same', - 'rb_same', - 'gb_same', - 'rgb_same', + ""all_random"", + ""rg_same"", + ""rb_same"", + ""gb_same"", + ""rgb_same"", ] with self.test_session(): for x_shape in x_shapes: for test_style in test_styles: x_np = np.random.rand(*x_shape) * 255. scale = np.random.rand() - if test_style == 'all_random': + if test_style == ""all_random"": pass - elif test_style == 'rg_same': + elif test_style == ""rg_same"": x_np[..., 1] = x_np[..., 0] - elif test_style == 'rb_same': + elif test_style == ""rb_same"": x_np[..., 2] = x_np[..., 0] - elif test_style == 'gb_same': + elif test_style == ""gb_same"": x_np[..., 2] = x_np[..., 1] - elif test_style == 'rgb_same': + elif test_style == ""rgb_same"": x_np[..., 1] = x_np[..., 0] x_np[..., 2] = x_np[..., 0] else: - raise AssertionError('Invalid test style: %s' % (test_style)) + raise AssertionError(""Invalid test style: %s"" % (test_style)) y_baseline = self._adjustSaturationNp(x_np, scale) y_fused = self._adjust_saturation(x_np, scale).eval() self.assertAllClose(y_fused, y_baseline, rtol=2e-5, atol=1e-5) @@ -846,9 +846,9 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): transformed_unknown_width = op(p_unknown_width) self.assertEqual(3, transformed_unknown_width.get_shape().ndims) - with self.assertRaisesRegexp(ValueError, 'must be three-dimensional'): + with self.assertRaisesRegexp(ValueError, ""must be three-dimensional""): op(p_wrong_rank) - with self.assertRaisesRegexp(ValueError, 'must be > 0'): + with self.assertRaisesRegexp(ValueError, ""must be > 0""): op(p_zero_dim) def testRot90GroupOrder(self): @@ -1130,7 +1130,7 @@ class CropToBoundingBoxTest(test_util.TensorFlowTestCase): if err_msg not in str(e): raise else: - raise AssertionError('Exception not raised: %s' % err_msg) + raise AssertionError(""Exception not raised: %s"" % err_msg) def _assertShapeInference(self, pre_shape, height, width, post_shape): image = array_ops.placeholder(dtypes.float32, shape=pre_shape) @@ -1187,7 +1187,7 @@ class CropToBoundingBoxTest(test_util.TensorFlowTestCase): for x_shape in ([1, 3, 5, 1], [3, 5]): self._assertRaises(x, x_shape, offset_height, offset_width, target_height, - target_width, ""'image' must be three-dimensional"") + target_width, ""must be three-dimensional"") def testZeroLengthInput(self): # Input image has 0-length dimension(s). @@ -1217,7 +1217,7 @@ class CropToBoundingBoxTest(test_util.TensorFlowTestCase): offset_width, target_height, target_width, - 'assertion failed:', + ""assertion failed:"", use_tensor_inputs_options=[True]) def testBadParams(self): @@ -1226,12 +1226,12 @@ class CropToBoundingBoxTest(test_util.TensorFlowTestCase): # Each line is a test configuration: # (offset_height, offset_width, target_height, target_width), err_msg - test_config = (([-1, 0, 3, 3], 'offset_height must be >= 0'), - ([0, -1, 3, 3], 'offset_width must be >= 0'), - ([0, 0, 0, 3], 'target_height must be > 0'), - ([0, 0, 3, 0], 'target_width must be > 0'), - ([2, 0, 3, 3], 'height must be >= target + offset'), - ([0, 2, 3, 3], 'width must be >= target + offset')) + test_config = (([-1, 0, 3, 3], ""offset_height must be >= 0""), + ([0, -1, 3, 3], ""offset_width must be >= 0""), + ([0, 0, 0, 3], ""target_height must be > 0""), + ([0, 0, 3, 0], ""target_width must be > 0""), + ([2, 0, 3, 3], ""height must be >= target + offset""), + ([0, 2, 3, 3], ""width must be >= target + offset"")) for params, err_msg in test_config: self._assertRaises(x, x_shape, *params, err_msg=err_msg) @@ -1362,7 +1362,7 @@ class PadToBoundingBoxTest(test_util.TensorFlowTestCase): if err_msg not in str(e): raise else: - raise AssertionError('Exception not raised: %s' % err_msg) + raise AssertionError(""Exception not raised: %s"" % err_msg) def _assertShapeInference(self, pre_shape, height, width, post_shape): image = array_ops.placeholder(dtypes.float32, shape=pre_shape) @@ -1432,7 +1432,7 @@ class PadToBoundingBoxTest(test_util.TensorFlowTestCase): for x_shape in ([1, 3, 5, 1], [3, 5]): self._assertRaises(x, x_shape, offset_height, offset_width, target_height, - target_width, ""'image' must be three-dimensional"") + target_width, ""must be three-dimensional"") def testZeroLengthInput(self): # Input image has 0-length dimension(s). @@ -1474,10 +1474,10 @@ class PadToBoundingBoxTest(test_util.TensorFlowTestCase): # Each line is a test configuration: # offset_height, offset_width, target_height, target_width, err_msg - test_config = ((-1, 0, 4, 4, 'offset_height must be >= 0'), - ( 0,-1, 4, 4, 'offset_width must be >= 0'), - ( 2, 0, 4, 4, 'height must be <= target - offset'), - ( 0, 2, 4, 4, 'width must be <= target - offset')) + test_config = ((-1, 0, 4, 4, ""offset_height must be >= 0""), + (0, -1, 4, 4, ""offset_width must be >= 0""), + (2, 0, 4, 4, ""height must be <= target - offset""), + (0, 2, 4, 4, ""width must be <= target - offset"")) for config_item in test_config: self._assertRaises(x, x_shape, *config_item) @@ -1554,7 +1554,7 @@ class SelectDistortedCropBoxTest(test_util.TensorFlowTestCase): # For reference, here is what the distribution of area ratios look like. area_ratio_hist, _ = np.histogram(area_ratios, bins=10, range=area_range) - print('area_ratio_hist ', area_ratio_hist) + print(""area_ratio_hist "", area_ratio_hist) # Ensure that fraction_object_covered is satisfied. # TODO(wicke, shlens, dga): Restore this test so that it is no longer flaky. @@ -2048,7 +2048,7 @@ class ResizeImageWithCropOrPadTest(test_util.TensorFlowTestCase): if err_msg not in str(e): raise else: - raise AssertionError('Exception not raised: %s' % err_msg) + raise AssertionError(""Exception not raised: %s"" % err_msg) def _assertShapeInference(self, pre_shape, height, width, post_shape): image = array_ops.placeholder(dtypes.float32, shape=pre_shape) @@ -2222,7 +2222,7 @@ class ResizeImageWithCropOrPadTest(test_util.TensorFlowTestCase): for x_shape in ([1, 3, 5, 1], [3, 5]): self._assertRaises(x, x_shape, target_height, target_width, - ""'image' must be three-dimensional"") + ""must be three-dimensional"") def testZeroLengthInput(self): # Input image has 0-length dimension(s). @@ -2256,12 +2256,12 @@ class ResizeImageWithCropOrPadTest(test_util.TensorFlowTestCase): # target_height <= 0 target_height, target_width = [0, 5] self._assertRaises(x, x_shape, target_height, target_width, - 'target_height must be > 0') + ""target_height must be > 0"") # target_width <= 0 target_height, target_width = [5, 0] self._assertRaises(x, x_shape, target_height, target_width, - 'target_width must be > 0') + ""target_width must be > 0"") def _SimpleColorRamp(): @@ -2286,8 +2286,8 @@ class JpegTest(test_util.TensorFlowTestCase): def testExisting(self): # Read a real jpeg and verify shape - path = ('tensorflow/core/lib/jpeg/testdata/' - 'jpeg_merge_test1.jpg') + path = (""tensorflow/core/lib/jpeg/testdata/"" + ""jpeg_merge_test1.jpg"") with self.test_session(use_gpu=True) as sess: jpeg0 = io_ops.read_file(path) image0 = image_ops.decode_jpeg(jpeg0) @@ -2299,9 +2299,9 @@ class JpegTest(test_util.TensorFlowTestCase): def testCmyk(self): # Confirm that CMYK reads in as RGB - base = 'tensorflow/core/lib/jpeg/testdata' - rgb_path = os.path.join(base, 'jpeg_merge_test1.jpg') - cmyk_path = os.path.join(base, 'jpeg_merge_test1_cmyk.jpg') + base = ""tensorflow/core/lib/jpeg/testdata"" + rgb_path = os.path.join(base, ""jpeg_merge_test1.jpg"") + cmyk_path = os.path.join(base, ""jpeg_merge_test1_cmyk.jpg"") shape = 256, 128, 3 for channels in 3, 0: with self.test_session(use_gpu=True) as sess: @@ -2320,9 +2320,9 @@ class JpegTest(test_util.TensorFlowTestCase): # Encode it, then decode it, then encode it image0 = constant_op.constant(_SimpleColorRamp()) jpeg0 = image_ops.encode_jpeg(image0) - image1 = image_ops.decode_jpeg(jpeg0, dct_method='INTEGER_ACCURATE') + image1 = image_ops.decode_jpeg(jpeg0, dct_method=""INTEGER_ACCURATE"") image2 = image_ops.decode_jpeg( - image_ops.encode_jpeg(image1), dct_method='INTEGER_ACCURATE') + image_ops.encode_jpeg(image1), dct_method=""INTEGER_ACCURATE"") jpeg0, image0, image1, image2 = sess.run([jpeg0, image0, image1, image2]) # The decoded-encoded image should be similar to the input @@ -2340,9 +2340,9 @@ class JpegTest(test_util.TensorFlowTestCase): # Encode it, then decode it, then encode it image0 = constant_op.constant(_SimpleColorRamp()) jpeg0 = image_ops.encode_jpeg(image0) - image1 = image_ops.decode_jpeg(jpeg0, dct_method='INTEGER_FAST') + image1 = image_ops.decode_jpeg(jpeg0, dct_method=""INTEGER_FAST"") image2 = image_ops.decode_jpeg( - image_ops.encode_jpeg(image1), dct_method='INTEGER_FAST') + image_ops.encode_jpeg(image1), dct_method=""INTEGER_FAST"") jpeg0, image0, image1, image2 = sess.run([jpeg0, image0, image1, image2]) # The decoded-encoded image should be similar to the input, but @@ -2364,7 +2364,7 @@ class JpegTest(test_util.TensorFlowTestCase): # default. They should be the same. image0 = constant_op.constant(_SimpleColorRamp()) jpeg0 = image_ops.encode_jpeg(image0) - image1 = image_ops.decode_jpeg(jpeg0, dct_method='INTEGER_FAST') + image1 = image_ops.decode_jpeg(jpeg0, dct_method=""INTEGER_FAST"") image2 = image_ops.decode_jpeg(jpeg0) image1, image2 = sess.run([image1, image2]) @@ -2373,7 +2373,7 @@ class JpegTest(test_util.TensorFlowTestCase): def testShape(self): with self.test_session(use_gpu=True) as sess: - jpeg = constant_op.constant('nonsense') + jpeg = constant_op.constant(""nonsense"") for channels in 0, 1, 3: image = image_ops.decode_jpeg(jpeg, channels=channels) self.assertEqual(image.get_shape().as_list(), @@ -2384,8 +2384,8 @@ class PngTest(test_util.TensorFlowTestCase): def testExisting(self): # Read some real PNGs, converting to different channel numbers - prefix = 'tensorflow/core/lib/png/testdata/' - inputs = (1, 'lena_gray.png'), (4, 'lena_rgba.png') + prefix = ""tensorflow/core/lib/png/testdata/"" + inputs = (1, ""lena_gray.png""), (4, ""lena_rgba.png"") for channels_in, filename in inputs: for channels in 0, 1, 3, 4: with self.test_session(use_gpu=True) as sess: @@ -2451,7 +2451,7 @@ class PngTest(test_util.TensorFlowTestCase): def testShape(self): with self.test_session(use_gpu=True): - png = constant_op.constant('nonsense') + png = constant_op.constant(""nonsense"") for channels in 0, 1, 3: image = image_ops.decode_png(png, channels=channels) self.assertEqual(image.get_shape().as_list(), @@ -2462,8 +2462,8 @@ class GifTest(test_util.TensorFlowTestCase): def testValid(self): # Read some real GIFs - prefix = 'tensorflow/core/lib/gif/testdata/' - filename = 'scan.gif' + prefix = ""tensorflow/core/lib/gif/testdata/"" + filename = ""scan.gif"" WIDTH = 20 HEIGHT = 40 STRIDE = 5 @@ -2492,8 +2492,8 @@ class GifTest(test_util.TensorFlowTestCase): def testInValid(self): # Read some real GIFs - prefix = 'tensorflow/core/lib/gif/testdata/' - filename = 'optimized.gif' + prefix = ""tensorflow/core/lib/gif/testdata/"" + filename = ""optimized.gif"" with self.test_session(use_gpu=True) as sess: gif0 = io_ops.read_file(prefix + filename) @@ -2503,7 +2503,7 @@ class GifTest(test_util.TensorFlowTestCase): def testShape(self): with self.test_session(use_gpu=True) as sess: - gif = constant_op.constant('nonsense') + gif = constant_op.constant(""nonsense"") image = image_ops.decode_gif(gif) self.assertEqual(image.get_shape().as_list(), [None, None, None, 3]) @@ -2526,7 +2526,7 @@ class ConvertImageTest(test_util.TensorFlowTestCase): image = constant_op.constant([1], dtype=dtypes.uint8) image_ops.convert_image_dtype(image, dtypes.uint8) y = image_ops.convert_image_dtype(image, dtypes.uint8) - self.assertEquals(y.op.type, 'Identity') + self.assertEquals(y.op.type, ""Identity"") self.assertEquals(y.op.inputs[0], image) def testConvertBetweenInteger(self): @@ -2751,5 +2751,5 @@ class TotalVariationTest(test_util.TensorFlowTestCase): self._test(multi, tot_var * np.array([1.0, 1.1, 1.2])) -if __name__ == '__main__': +if __name__ == ""__main__"": googletest.main() ",0,train 37e7693c78ef7f73192d95b439d3c3be0bee5271,tensorflow/tensorflow,"Re-enable contrib/quantize test when unfused mean and variance updates are used. Fix missed change from ""executing_eagerly"" to ""executing_eagerly_outside_functions"" that was accidentally dropped in cl/300392015. PiperOrigin-RevId: 300407526 Change-Id: Iaa88ac039a440b4e1081bc210a647de61cfad675",normalization.py,"@@ -20,7 +20,6 @@ from __future__ import print_function from tensorflow.python.compat import compat from tensorflow.python.distribute import distribution_strategy_context -from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -544,8 +543,9 @@ class BatchNormalizationBase(Layer): # TODO(rmlarsen): Support using fused avg updates for non-eager execution # after fixing graph pattern matching and enabling fused_batch_norm to # take exponential_avg_factor as a tensor input. - use_fused_avg_updates = (compat.forward_compatible(2020, 3, 6) and - context.executing_eagerly()) + use_fused_avg_updates = ( + compat.forward_compatible(2020, 3, 6) and + ops.executing_eagerly_outside_functions()) if use_fused_avg_updates: exponential_avg_factor = 1.0 - self.momentum else: ",0,train 580c7502f68dd84cf74c34b9d454a37def81d286,tensorflow/tensorflow,"internal change. Change: 149658231",learn_runner.py,"@@ -24,6 +24,28 @@ from tensorflow.contrib.learn.python.learn.experiment import Experiment from tensorflow.python.platform import tf_logging as logging +# TODO(xiejw): Refactor the learn_runner to make code reusable. +def _execute_schedule(experiment, schedule): + """"""Execute the method named `schedule` of `experiment`."""""" + if not hasattr(experiment, schedule): + logging.error('Schedule references non-existent task %s', schedule) + valid_tasks = [x for x in dir(experiment) + if not x.startswith('_') + and callable(getattr(experiment, x))] + logging.error('Allowed values for this experiment are: %s', valid_tasks) + raise ValueError('Schedule references non-existent task %s' % schedule) + + task = getattr(experiment, schedule) + if not callable(task): + logging.error('Schedule references non-callable member %s', schedule) + valid_tasks = [x for x in dir(experiment) + if not x.startswith('_') + and callable(getattr(experiment, x))] + logging.error('Allowed values for this experiment are: %s', valid_tasks) + raise TypeError('Schedule references non-callable member %s' % schedule) + return task() + + def run(experiment_fn, output_dir, schedule=None): """"""Make and run an experiment. @@ -86,25 +108,7 @@ def run(experiment_fn, output_dir, schedule=None): config = experiment.estimator.config schedule = schedule or _get_default_schedule(config) - # Execute the schedule - if not hasattr(experiment, schedule): - logging.error('Schedule references non-existent task %s', schedule) - valid_tasks = [x for x in dir(experiment) - if not x.startswith('_') - and callable(getattr(experiment, x))] - logging.error('Allowed values for this experiment are: %s', valid_tasks) - raise ValueError('Schedule references non-existent task %s' % schedule) - - task = getattr(experiment, schedule) - if not callable(task): - logging.error('Schedule references non-callable member %s', schedule) - valid_tasks = [x for x in dir(experiment) - if not x.startswith('_') - and callable(getattr(experiment, x))] - logging.error('Allowed values for this experiment are: %s', valid_tasks) - raise TypeError('Schedule references non-callable member %s' % schedule) - - return task() + return _execute_schedule(experiment, schedule) @experimental ",0,train 580c7502f68dd84cf74c34b9d454a37def81d286,tensorflow/tensorflow,"internal change. Change: 149658231",tuner.py,"@@ -24,7 +24,6 @@ import abc from tensorflow.contrib.framework.python.framework import experimental -@experimental class Tuner(object): """"""Tuner class is the interface for Experiment hyper-parameters tuning. @@ -45,6 +44,7 @@ class Tuner(object): __metaclass__ = abc.ABCMeta + @experimental @abc.abstractmethod def next_trial(self): """"""Switch to the next trial. @@ -59,6 +59,7 @@ class Tuner(object): """""" raise NotImplementedError(""Calling an abstract method."") + @experimental @abc.abstractmethod def run_experiment(self, experiment_fn): """"""Creates an Experiment by calling `experiment_fn` and executes it. ",0,train 398e65b283cdd213c3f2474bb0fedf3d3c10d848,tensorflow/tensorflow,"[TF:MLIR] Replace the used of saved model unused function removal pass with mark function visibility pass followed by symbol DCE pass. Fix the saved model unused function test. PiperOrigin-RevId: 293184416 Change-Id: Ifa8ca9da834ac384643c952edfd73b6d7fd00864",delete_unused_funcs.cc,"@@ -1,99 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the ""License""); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an ""AS IS"" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// This pass uses tf_saved_model dialect linkage information to delete -// unused func's. - -#include ""llvm/ADT/DenseMap.h"" -#include ""llvm/ADT/STLExtras.h"" -#include ""mlir/IR/Module.h"" // TF:llvm-project -#include ""mlir/Pass/Pass.h"" // TF:llvm-project -#include ""tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h"" - -namespace mlir { -namespace tf_saved_model { - -namespace { -struct DeleteUnusedFuncsPass : public ModulePass { - void runOnModule() override; -}; -} // namespace - -void DeleteUnusedFuncsPass::runOnModule() { - // If the model doesn't have tf_saved_model semantics, we can't do anything. - if (!HasTfSavedModelSemantics(getModule())) { - return; - } - - // TODO(silvasean): Use more generic MLIR functionality when available. - // This is just a basic call graph reachability pass (which in the case of TF - // functional control flow also implies handling tf.If/tf.While). - // The only thing specific to tf_saved_model is the set of roots. - - auto module = getModule(); - SymbolTable symbol_table(module); - - // Calculate func reachability with a DFS on the symbol reference graph. - SmallPtrSet dfs_visited_set; - SmallVector dfs_stack; - - // Initialize the roots of the DFS search. - for (auto func : module.getOps()) { - if (IsExported(func)) { - dfs_stack.push_back(func); - } - } - - // Do the DFS. - while (!dfs_stack.empty()) { - FuncOp func = dfs_stack.pop_back_val(); - if (!dfs_visited_set.insert(func).second) { - // If we already visited this node, skip it. - continue; - } - - SmallPtrSet callees; - auto uses = SymbolTable::getSymbolUses(func); - for (auto use : *uses) { - auto func = symbol_table.lookup( - use.getSymbolRef().cast().getValue()); - if (func) { - callees.insert(func); - } - } - - for (auto callee : callees) { - dfs_stack.push_back(callee); - } - } - - // Erase all unreachable func's. - for (auto func : llvm::make_early_inc_range(module.getOps())) { - if (dfs_visited_set.find(func) == dfs_visited_set.end()) { - func.erase(); - } - } -} - -std::unique_ptr> CreateDeleteUnusedFuncsPass() { - return std::make_unique(); -} - -static PassRegistration pass( - ""tf-saved-model-delete-unused-funcs"", - ""Use tf_saved_model linkage information to delete unused func's.""); - -} // namespace tf_saved_model -} // namespace mlir ",0,train 398e65b283cdd213c3f2474bb0fedf3d3c10d848,tensorflow/tensorflow,"[TF:MLIR] Replace the used of saved model unused function removal pass with mark function visibility pass followed by symbol DCE pass. Fix the saved model unused function test. PiperOrigin-RevId: 293184416 Change-Id: Ifa8ca9da834ac384643c952edfd73b6d7fd00864",passes.h,"@@ -182,10 +182,6 @@ void CreateTPUBridge(OpPassManager& pm); namespace tf_saved_model { -// Creates a pass that uses tf_saved_model dialect linkage information -// to delete unused func's. -std::unique_ptr> CreateDeleteUnusedFuncsPass(); - // Creates a pass that optimizes tf_saved_model.global_tensor ops. std::unique_ptr> CreateOptimizeGlobalTensorsPass(); ",0,train df2fbb89588065fca2c6e5fcfba7d8c2b4378591,tensorflow/tensorflow,"[XLA:Python] Plumb xla_gpu_enable_fast_min_max into the XLA:Python client. Disable it by default to get correct NaN semantics for min/max. Will fix https://github.com/google/jax/issues/1072 when deployed in jaxlib. PiperOrigin-RevId: 260567980",xla.cc,"@@ -425,7 +425,10 @@ PYBIND11_MODULE(xla_extension, m) { &DebugOptions::set_xla_cpu_fast_math_honor_nans) .def_property(""xla_cpu_fast_math_honor_division"", &DebugOptions::xla_cpu_fast_math_honor_division, - &DebugOptions::set_xla_cpu_fast_math_honor_division); + &DebugOptions::set_xla_cpu_fast_math_honor_division) + .def_property(""xla_gpu_enable_fast_min_max"", + &DebugOptions::xla_gpu_enable_fast_min_max, + &DebugOptions::set_xla_gpu_enable_fast_min_max); py::class_(m, ""ExecutableBuildOptions"") .def(py::init<>()) ",0,train df2fbb89588065fca2c6e5fcfba7d8c2b4378591,tensorflow/tensorflow,"[XLA:Python] Plumb xla_gpu_enable_fast_min_max into the XLA:Python client. Disable it by default to get correct NaN semantics for min/max. Will fix https://github.com/google/jax/issues/1072 when deployed in jaxlib. PiperOrigin-RevId: 260567980",xla_client.py,"@@ -109,6 +109,7 @@ class LocalBackend(Backend): options.debug_options.xla_cpu_fast_math_honor_infs = True options.debug_options.xla_cpu_fast_math_honor_nans = True options.debug_options.xla_cpu_fast_math_honor_division = True + options.debug_options.xla_gpu_enable_fast_min_max = False return _xla.LocalExecutable.Compile(c_computation, compile_options.argument_layouts, options, self.client, ",0,train c9cd1784bf287543d89593ca1432170cdbf694de,tensorflow/tensorflow,"Use a header to declare Register_AUDIO_MICROFRONTEND, instead of having to forward-declare. PiperOrigin-RevId: 275381415 Change-Id: Ib0abc4e0a8813532362ac70a95b9e68c344c4ca9",audio_microfrontend.h,"@@ -0,0 +1,29 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_AUDIO_MICROFRONTEND_H_ +#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_AUDIO_MICROFRONTEND_H_ + +#include ""tensorflow/lite/context.h"" + +namespace tflite { +namespace ops { +namespace custom { +TfLiteRegistration* Register_AUDIO_MICROFRONTEND(); +} // namespace custom +} // namespace ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_AUDIO_MICROFRONTEND_H_ ",0,train c9cd1784bf287543d89593ca1432170cdbf694de,tensorflow/tensorflow,"Use a header to declare Register_AUDIO_MICROFRONTEND, instead of having to forward-declare. PiperOrigin-RevId: 275381415 Change-Id: Ib0abc4e0a8813532362ac70a95b9e68c344c4ca9",audio_microfrontend_test.cc,"@@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ // Unit test for TFLite Micro Frontend op. +#include ""tensorflow/lite/experimental/microfrontend/audio_microfrontend.h"" + #include #include @@ -27,9 +29,6 @@ limitations under the License. namespace tflite { namespace ops { namespace custom { - -TfLiteRegistration* Register_AUDIO_MICROFRONTEND(); - namespace { using ::testing::ElementsAreArray; ",0,train 9f3d53da8262cba49716ec85781fb88d80626b81,tensorflow/tensorflow,"Less strict error bound in conv_ops_test PiperOrigin-RevId: 243868038",conv_ops_test.cc,"@@ -794,7 +794,7 @@ class FusedConv2DOpTest : public OpsTestBase { if (image_width == filter_size && image_height == filter_size) { test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-4); } else { - test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-6); + test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-5); } } @@ -844,7 +844,7 @@ class FusedConv2DOpTest : public OpsTestBase { if (image_width == filter_size && image_height == filter_size) { test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-4); } else { - test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-6); + test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-5); } } ",0,train 002677f22fb686e34c464143f8ce2a71fbd03190,tensorflow/tensorflow,"Using TensorDescriptor instead of DummyTensor in OpenCL inference context. PiperOrigin-RevId: 401161076 Change-Id: I9750034906311eb9ac234e6fade6d09b2385b41c",inference_context.cc,"@@ -330,8 +330,9 @@ absl::Status InferenceContext::ReserveGraphTensors( } RETURN_IF_ERROR(SelectBestStorageType(gpu_info, shape, storage_type, data_type, layout, &storage_type)); - tensor_reserver_.Add( - t->id, {shape, TensorDescriptor{data_type, storage_type, layout}}); + TensorDescriptor tensor_desc{data_type, storage_type, layout}; + tensor_desc.shape = BHWDC(shape.b, shape.h, shape.w, 1, shape.c); + tensor_reserver_.Add(t->id, tensor_desc); max_id = std::max(max_id, t->id); } tensor_reserver_.SetNext(max_id + 1); @@ -344,7 +345,7 @@ absl::Status InferenceContext::ConvertOperations(const GpuInfo& gpu_info, std::map tensor_descriptors; const auto values = graph.values(); for (auto value : values) { - tensor_descriptors[value->id] = tensor_reserver_.Get(value->id).descriptor; + tensor_descriptors[value->id] = tensor_reserver_.Get(value->id); } std::set consumed_nodes; std::vector graph_nodes = graph.nodes(); @@ -365,7 +366,7 @@ absl::Status InferenceContext::ConvertOperations(const GpuInfo& gpu_info, absl::any_cast(node.operation.attributes); auto outputs = graph.FindOutputs(node.id); const_tensors_descs_[outputs[0]->id] = - tensor_reserver_.Get(outputs[0]->id).descriptor; + tensor_reserver_.Get(outputs[0]->id); const_tensors_descs_[outputs[0]->id].UploadData(attr.tensor); continue; } @@ -405,12 +406,10 @@ absl::Status InferenceContext::ConvertOperations(const GpuInfo& gpu_info, OperationDef op_def; op_def.precision = precision_; for (int j = 0; j < inputs.size(); ++j) { - op_def.src_tensors.push_back( - tensor_reserver_.Get(inputs[j]->id).descriptor); + op_def.src_tensors.push_back(tensor_reserver_.Get(inputs[j]->id)); } for (int j = 0; j < outputs.size(); ++j) { - op_def.dst_tensors.push_back( - tensor_reserver_.Get(outputs[j]->id).descriptor); + op_def.dst_tensors.push_back(tensor_reserver_.Get(outputs[j]->id)); } RETURN_IF_ERROR(GPUOperationFromNode(gpu_info, op_def, hints, inputs, outputs, node, &gpu_subgraph)); @@ -418,7 +417,9 @@ absl::Status InferenceContext::ConvertOperations(const GpuInfo& gpu_info, absl::flat_hash_map mapping_to_global_ids; for (int j = 0; j < gpu_subgraph.new_tensors.size(); ++j) { const auto& t = gpu_subgraph.new_tensors[j]; - auto global_id = tensor_reserver_.Add({t.first, t.second}); + TensorDescriptor td = t.second; + td.shape = BHWDC(t.first.b, t.first.h, t.first.w, 1, t.first.c); + auto global_id = tensor_reserver_.Add(td); mapping_to_global_ids[j] = global_id; } for (auto& gpu_op : gpu_subgraph.operations) { @@ -525,8 +526,7 @@ InferenceContext::TensorMemoryType InferenceContext::GetTensorMemoryType( return TensorMemoryType::kConst; } else if (variable_ids_and_refs_.find(id) != variable_ids_and_refs_.end()) { return TensorMemoryType::kVariable; - } else if (IsBufferBased(gpu_info, - tensor_reserver_.Get(id).descriptor.storage_type)) { + } else if (IsBufferBased(gpu_info, tensor_reserver_.Get(id).storage_type)) { return TensorMemoryType::kBuffer; } else { return TensorMemoryType::kStrongShape; @@ -560,7 +560,7 @@ absl::Status InferenceContext::AllocateMemoryForVariableTensors( ref_value_to_tensor_index.end()) { const auto& t = tensor_reserver_.Get(value_and_ref_value.first); const auto& shape = t.shape; - const auto& descriptor = t.descriptor; + const auto& descriptor = t; RETURN_IF_ERROR( CreateTensor(*context, shape, descriptor, @@ -583,7 +583,7 @@ absl::Status InferenceContext::AllocateMemoryForBuffers(const GpuInfo& gpu_info, for (auto& usage : buffer_usages) { const auto& t = tensor_reserver_.Get(usage.first); const auto& shape = t.shape; - const auto& descriptor = t.descriptor; + const auto& descriptor = t; const size_t element_size = descriptor.data_type == DataType::FLOAT32 ? 4 : 2; size_t buffer_size; @@ -664,7 +664,8 @@ absl::Status InferenceContext::AllocateMemoryForBuffers(const GpuInfo& gpu_info, continue; const int tensor_index = graph_ids_to_shared_buffer_tensors_[t.first]; if (created_tensors[tensor_index]) continue; - const auto& shape = tensor_reserver_.Get(t.first).shape; + const auto& shape_5d = tensor_reserver_.Get(t.first).shape; + const auto shape = BHWC(shape_5d.b, shape_5d.h, shape_5d.w, shape_5d.c); const int buffer_index = use_offset_assignment ? tensor_index : buffer_assignment.object_ids[tensor_index]; @@ -698,7 +699,7 @@ absl::Status InferenceContext::AllocateMemoryForStrongShapes( }, &usages); - std::vector> usage_records; + std::vector> usage_records; std::map remap_from_graph_ids; for (auto& usage : usages) { remap_from_graph_ids[usage.first] = usage_records.size(); @@ -707,7 +708,7 @@ absl::Status InferenceContext::AllocateMemoryForStrongShapes( static_cast(usage.second.y)}); } - ObjectsAssignment assignment; + ObjectsAssignment assignment; RETURN_IF_ERROR(AssignObjectsToTensors( usage_records, MemoryStrategy::EQUALITY, &assignment)); ",0,test 002677f22fb686e34c464143f8ce2a71fbd03190,tensorflow/tensorflow,"Using TensorDescriptor instead of DummyTensor in OpenCL inference context. PiperOrigin-RevId: 401161076 Change-Id: I9750034906311eb9ac234e6fade6d09b2385b41c",inference_context.h,"@@ -170,56 +170,32 @@ class InferenceContext { // anywhere. std::vector nodes_; - struct DummyTensor { - BHWC shape; - TensorDescriptor descriptor; - - bool operator==(const DummyTensor& b) const { - return shape == b.shape && descriptor == b.descriptor; - } - }; - class TensorReserver { public: TensorReserver() : next_(0) {} - ValueId Add(const DummyTensor& dummy) { + ValueId Add(const TensorDescriptor& dummy) { reservations_[next_] = dummy; return next_++; } - void Add(ValueId id, const DummyTensor& dummy) { + void Add(ValueId id, const TensorDescriptor& dummy) { reservations_[id] = dummy; } void SetNext(ValueId id) { next_ = id; } - DummyTensor Get(ValueId id) { return reservations_[id]; } + TensorDescriptor Get(ValueId id) { return reservations_[id]; } std::vector> GetTensorDescs() const { - std::vector> result; - for (auto& v : reservations_) { - TensorDescriptor desc = v.second.descriptor; - desc.shape.b = v.second.shape.b; - desc.shape.h = v.second.shape.h; - desc.shape.w = v.second.shape.w; - desc.shape.d = 1; - desc.shape.c = v.second.shape.c; - result.push_back({v.first, desc}); - } - return result; + return std::vector>( + reservations_.begin(), reservations_.end()); } void Add(const std::vector>& tensors) { for (auto& v : tensors) { - DummyTensor dummy; - dummy.descriptor = v.second; - dummy.shape.b = v.second.shape.b; - dummy.shape.h = v.second.shape.h; - dummy.shape.w = v.second.shape.w; - dummy.shape.c = v.second.shape.c; - Add(v.first, dummy); + Add(v.first, v.second); } } private: - absl::flat_hash_map reservations_; + absl::flat_hash_map reservations_; ValueId next_; }; TensorReserver tensor_reserver_; ",0,test 002677f22fb686e34c464143f8ce2a71fbd03190,tensorflow/tensorflow,"Using TensorDescriptor instead of DummyTensor in OpenCL inference context. PiperOrigin-RevId: 401161076 Change-Id: I9750034906311eb9ac234e6fade6d09b2385b41c",tensor.cc,"@@ -615,7 +615,17 @@ absl::Status CreateSharedImage2DBufferTensor(const CLContext& context, const TensorDescriptor& descriptor, int row_bytes_alignment, Tensor* result) { - const int width = shape.b * shape.w; + BHWDC shape5d(shape.b, shape.h, shape.w, 1, shape.c); + return CreateSharedImage2DBufferTensor(context, memory, shape5d, descriptor, + row_bytes_alignment, result); +} + +absl::Status CreateSharedImage2DBufferTensor(const CLContext& context, + cl_mem memory, const BHWDC& shape, + const TensorDescriptor& descriptor, + int row_bytes_alignment, + Tensor* result) { + const int width = shape.b * shape.w * shape.d; const int height = descriptor.storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape.h ",0,test 002677f22fb686e34c464143f8ce2a71fbd03190,tensorflow/tensorflow,"Using TensorDescriptor instead of DummyTensor in OpenCL inference context. PiperOrigin-RevId: 401161076 Change-Id: I9750034906311eb9ac234e6fade6d09b2385b41c",tensor.h,"@@ -158,6 +158,12 @@ absl::Status CreateSharedImage2DBufferTensor(const CLContext& context, int row_bytes_alignment, Tensor* result); +absl::Status CreateSharedImage2DBufferTensor(const CLContext& context, + cl_mem memory, const BHWDC& shape, + const TensorDescriptor& descriptor, + int row_bytes_alignment, + Tensor* result); + template absl::Status Tensor::WriteData(CLCommandQueue* queue, const tflite::gpu::Tensor& src) { ",0,test 28340a4b12e286fe14bb7ac08aebe325c3e150b4,tensorflow/tensorflow,"Fix cmake for MacOS (#17005) This change address cmake build issues for MacOS. Also fixes #14712",hexagon_controller.c,"@@ -19,7 +19,7 @@ limitations under the License. #include ""hexagon_controller.h"" -#include +#include #include #include ""adspmsgd.h"" ",0,train 613dad93cebadd573da162cad261318bcebe1416,tensorflow/tensorflow,"Fix colocation in function inlining PiperOrigin-RevId: 331814867 Change-Id: Idf8692894dd83ebd9de195b5b676ef681d20d11c",inline_function_utils.cc,"@@ -587,6 +587,10 @@ Status InlineFunctionBody(const FunctionLibraryDefinition& flib_def, Graph* g, // // If 'x' is a node in fbody->graph and its copy in 'g' is 'y', we // remember 'y' in node_map[x->id()]. + absl::flat_hash_set fn_nodes; + for (Node* n : fbody->graph->op_nodes()) { + fn_nodes.insert(n->name()); + } std::vector node_map(fbody->graph->num_node_ids()); for (Node* n : fbody->graph->op_nodes()) { NodeDef ndef = n->def(); @@ -605,6 +609,8 @@ Status InlineFunctionBody(const FunctionLibraryDefinition& flib_def, Graph* g, const string prefix = strings::StrCat(caller->name(), ""/""); TF_RETURN_IF_ERROR(AddPrefixAndSuffixToNode(prefix, /*suffix=*/"""", &ndef, options.uniquify_frame_names)); + TF_RETURN_IF_ERROR( + MaybeAddPrefixToColocationConstraints(fn_nodes, prefix, &ndef)); Status added_node; Node* clone = g->AddNode(ndef, &added_node); ",0,train 613dad93cebadd573da162cad261318bcebe1416,tensorflow/tensorflow,"Fix colocation in function inlining PiperOrigin-RevId: 331814867 Change-Id: Idf8692894dd83ebd9de195b5b676ef681d20d11c",node_def_util.cc,"@@ -795,6 +795,8 @@ bool IsValidControlInputName(StringPiece sp) { } } +const StringPiece kColocationGroupPrefixStringPiece(kColocationGroupPrefix); + } // namespace Status ValidateOpInput(const string& input_name, bool* is_control_input) { @@ -924,17 +926,27 @@ Status AddPrefixAndSuffixToNode(StringPiece prefix, StringPiece suffix, attr.set_s(frame_name); } - // Update colocation constraints. - constexpr char kClassAttr[] = ""_class""; - auto class_attr = node_def->mutable_attr()->find(kClassAttr); - if (class_attr != node_def->mutable_attr()->end()) { - AttrValue new_value; - new_value.mutable_list()->add_s( - strings::StrCat(prefix, class_attr->second.s())); - node_def->mutable_attr()->erase(kClassAttr); - node_def->mutable_attr()->insert({kClassAttr, new_value}); - } + return Status::OK(); +} +Status MaybeAddPrefixToColocationConstraints( + const absl::flat_hash_set& match, StringPiece prefix, + NodeDef* node_def) { + auto attr = node_def->mutable_attr()->find(kColocationAttrName); + if (attr == node_def->mutable_attr()->end()) { + return Status::OK(); + } + auto constraints_list = attr->second.mutable_list(); + auto constraints_size = constraints_list->s_size(); + for (size_t i = 0; i < constraints_size; ++i) { + StringPiece original(constraints_list->s(i)); + if (absl::ConsumePrefix(&original, kColocationGroupPrefixStringPiece)) { + if (match.contains(original)) { + (*constraints_list->mutable_s(i)) = + strings::StrCat(kColocationGroupPrefix, prefix, original); + } + } + } return Status::OK(); } ",0,train 613dad93cebadd573da162cad261318bcebe1416,tensorflow/tensorflow,"Fix colocation in function inlining PiperOrigin-RevId: 331814867 Change-Id: Idf8692894dd83ebd9de195b5b676ef681d20d11c",node_def_util.h,"@@ -19,6 +19,7 @@ limitations under the License. #include #include +#include ""absl/container/flat_hash_set.h"" #include ""tensorflow/core/framework/attr_value_util.h"" #include ""tensorflow/core/framework/node_def.pb.h"" #include ""tensorflow/core/framework/tensor.h"" @@ -391,6 +392,13 @@ Status AttachDef(const Status& status, const NodeDef& node_def, Status AddPrefixAndSuffixToNode(StringPiece prefix, StringPiece suffix, NodeDef* node_def, bool uniquify_frame_name = true); + +// Appends the given prefix to the colocation group name if the name exists +// in `to_match`. +Status MaybeAddPrefixToColocationConstraints( + const absl::flat_hash_set& match, StringPiece prefix, + NodeDef* node_def); + } // namespace tensorflow #endif // TENSORFLOW_CORE_FRAMEWORK_NODE_DEF_UTIL_H_ ",0,train 613dad93cebadd573da162cad261318bcebe1416,tensorflow/tensorflow,"Fix colocation in function inlining PiperOrigin-RevId: 331814867 Change-Id: Idf8692894dd83ebd9de195b5b676ef681d20d11c",node_def_util_test.cc,"@@ -615,6 +615,39 @@ TEST(AddPrefixAndSuffixToNode, Enter) { EXPECT_EQ(""prefix/test_frame/suffix"", frame_name); } +TEST(MaybeAddPrefixToColocationConstraints, Basic) { + NodeDef node_def; + node_def.set_name(""Identity""); + node_def.set_op(""Identity""); + AddNodeAttr(kColocationAttrName, + {strings::StrCat(kColocationGroupPrefix, ""Node1""), + strings::StrCat(kColocationGroupPrefix, ""Node2""), + strings::StrCat(kColocationGroupPrefix, ""Node3"")}, + &node_def); + + absl::flat_hash_set match; + match.insert(""Node1""); + match.insert(""Node3""); + TF_ASSERT_OK(MaybeAddPrefixToColocationConstraints(match, ""fn/"", &node_def)); + std::vector coloc_constraints; + TF_ASSERT_OK(GetNodeAttr(node_def, kColocationAttrName, &coloc_constraints)); + EXPECT_EQ( + coloc_constraints, + std::vector({""loc:@fn/Node1"", ""loc:@Node2"", ""loc:@fn/Node3""})); +} + +TEST(MaybeAddPrefixToColocationConstraints, NoConstraints) { + NodeDef node_def; + node_def.set_name(""Identity""); + node_def.set_op(""Identity""); + + absl::flat_hash_set match; + match.insert(""Node1""); + match.insert(""Node3""); + TF_ASSERT_OK(MaybeAddPrefixToColocationConstraints(match, ""fn/"", &node_def)); + EXPECT_FALSE(HasNodeAttr(node_def, kColocationAttrName)); +} + TEST(FormatNodeForErrorTest, Node) { Graph g(OpRegistry::Global()); Node* node; ",0,train 0557f9ef182290b28bb30076f9e4c52f67c6cc55,tensorflow/tensorflow,"Apply clang-tidy fixes for llvm-header-guard in test_passes.h (NFC) PiperOrigin-RevId: 434206359",test_passes.h,"@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_GML_ST_TRANSFORMS_TEST_PASSES_H_ -#define TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_GML_ST_TRANSFORMS_TEST_PASSES_H_ +#ifndef MLIR_HLO_DIALECT_GML_ST_TRANSFORMS_TEST_PASSES_H +#define MLIR_HLO_DIALECT_GML_ST_TRANSFORMS_TEST_PASSES_H #include ""mlir/Pass/Pass.h"" @@ -31,4 +31,4 @@ std::unique_ptr> createTestGmlStLoopTilingPass(); } // namespace gml_st } // namespace mlir -#endif // TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_GML_ST_TRANSFORMS_TEST_PASSES_H_ +#endif // MLIR_HLO_DIALECT_GML_ST_TRANSFORMS_TEST_PASSES_H ",0,test 8714a150aae0cc62703e5b7070747443296903d3,tensorflow/tensorflow,"Fix bug to enable function conversion with main graph disable flag. Setting the `minimum_segment_size` to -1 will disable the main graph conversion, but it currently also disables the function conversions. This change disables only the main graph from conversion, and runs function conversions. PiperOrigin-RevId: 392517438 Change-Id: I32eb70f4016bd111391cef72d3fb81d34180b118",trt_optimization_pass.cc,"@@ -353,7 +353,9 @@ Status TRTOptimizationPass::Optimize(grappler::Cluster* cluster, VLOG(1) << ""Called TRTOptimization Pass "" << name_ << "" on a grappler item with id="" << item.id; TF_ASSIGN_OR_RETURN(bool do_function_conversion, ShouldConvertFunction(item)); - if (minimum_segment_size_ == -1 || + // Optimizing the main graph(identified with `item.id == ""tf_graph""`) with + // `minimim_segment_size == -1` indicates skipping main graph conversion. + if ((minimum_segment_size_ == -1 && item.id == ""tf_graph"") || (item.id != ""tf_graph"" && !do_function_conversion)) { VLOG(1) << ""Not optimizing this grappler item: "" << item.id; *optimized_graph = item.graph; @@ -410,6 +412,7 @@ Status TRTOptimizationPass::Optimize(grappler::Cluster* cluster, tensorflow::down_cast(item); TF_RETURN_IF_ERROR( UpdateFunctionSpecificConversionParams(cp, func_item.func_attr())); + assert(cp.minimum_segment_size > 0); } auto status = ConvertAfterShapes(cp); ",0,train 2ef925512189149d9374f20a02389382f75a15ce,tensorflow/tensorflow,"lite: Release cond and body subgraph of WHILE op It will save more runtime memory with very few latency overhead. PiperOrigin-RevId: 394379825 Change-Id: I32e9d81ccc727687f7f7e4bdaa2de6d612de7fa8",while.cc,"@@ -111,6 +111,7 @@ struct OpData { int body_subgraph_index; bool cond_has_dynamic_output_tensors; bool body_has_dynamic_output_tensors; + bool subgraphs_allocated; }; void* Init(TfLiteContext* context, const char* buffer, size_t length) { @@ -120,6 +121,7 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) { op_data->body_subgraph_index = params->body_subgraph_index; op_data->cond_has_dynamic_output_tensors = false; op_data->body_has_dynamic_output_tensors = false; + op_data->subgraphs_allocated = false; return op_data; } @@ -175,6 +177,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { context, this_subgraph, TfLiteIntArrayView(node->inputs), body_subgraph, body_subgraph->inputs(), true)); TF_LITE_ENSURE_OK(context, body_subgraph->AllocateTensors()); + op_data->subgraphs_allocated = true; if (body_subgraph->HasDynamicTensors()) { op_data->body_has_dynamic_output_tensors = true; } else { @@ -214,7 +217,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const OpData* op_data = reinterpret_cast(node->user_data); + OpData* op_data = reinterpret_cast(node->user_data); Subgraph* this_subgraph = reinterpret_cast(context->impl_); auto* subgraphs = this_subgraph->GetSubgraphs(); Subgraph* cond_subgraph = (*subgraphs)[op_data->cond_subgraph_index].get(); @@ -256,6 +259,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // isn't optimized yet and a lot of redundant copies are made. // TODO(b/120234921): Optimize and avoid copying tensors between subgraphs. + if (op_data->subgraphs_allocated == false) { + TF_LITE_ENSURE_OK(context, cond_subgraph->AllocateTensors()); + TF_LITE_ENSURE_OK(context, body_subgraph->AllocateTensors()); + } + if (op_data->body_has_dynamic_output_tensors) { // If body subgraph has dynamic outputs, the input of condition subgraph may // be changed in the last invocation and may need resizing. @@ -329,6 +337,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { context, CopyTensorsData(context, cond_subgraph, cond_subgraph->inputs(), this_subgraph, TfLiteIntArrayView(node->outputs))); + + TF_LITE_ENSURE_OK(context, cond_subgraph->ReleaseNonPersistentMemory()); + TF_LITE_ENSURE_OK(context, body_subgraph->ReleaseNonPersistentMemory()); + op_data->subgraphs_allocated = false; + return kTfLiteOk; } ",0,train 512f92db4e27a2871d94ffccaf9d01e7389b497c,tensorflow/tensorflow,"Set mlir-cpu-runner JIT codegen opt level correctly - the JIT codegen was being run at the default -O0 level; instead, propagate the opt level from the cmd line. Signed-off-by: Uday Bondhugula Closes #123 COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/mlir/pull/123 from bondhugula:jit-runner 3b055e47f94c9a48bf487f6400787478738cda02 PiperOrigin-RevId: 267778586",ExecutionEngine.h,"@@ -72,13 +72,15 @@ public: /// Creates an execution engine for the given module. If `transformer` is /// provided, it will be called on the LLVM module during JIT-compilation and - /// can be used, e.g., for reporting or optimization. - /// If `sharedLibPaths` are provided, the underlying JIT-compilation will open - /// and link the shared libraries for symbol resolution. - /// If `objectCache` is provided, JIT compiler will use it to store the object - /// generated for the given module. + /// can be used, e.g., for reporting or optimization. `jitCodeGenOptLevel`, + /// when provided, is used as the optimization level for target code + /// generation. If `sharedLibPaths` are provided, the underlying + /// JIT-compilation will open and link the shared libraries for symbol + /// resolution. If `objectCache` is provided, JIT compiler will use it to + /// store the object generated for the given module. static llvm::Expected> create( ModuleOp m, std::function transformer = {}, + Optional jitCodeGenOptLevel = llvm::None, ArrayRef sharedLibPaths = {}, bool enableObjectCache = false); /// Looks up a packed-argument function with the given name and returns a ",0,test f90484b9a4302ccae5168e3a06bd539071661fee,tensorflow/tensorflow,[ROCm] Adding ROCm support for the stateful_random ops,stateful_random_ops_cpu_gpu.h,"@@ -82,7 +82,7 @@ struct RngSkip_Philox; using CPUDevice = Eigen::ThreadPoolDevice; -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM using GPUDevice = Eigen::GpuDevice; @@ -100,7 +100,7 @@ struct RngSkip_Philox { void operator()(const GPUDevice& device, int64 delta, Tensor* state_tensor); }; -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // end namespace tensorflow ",0,train f90484b9a4302ccae5168e3a06bd539071661fee,tensorflow/tensorflow,[ROCm] Adding ROCm support for the stateful_random ops,stateful_random_ops_gpu.cu.cc,"@@ -13,13 +13,18 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM + +#if TENSORFLOW_USE_ROCM +#include ""rocm/include/hip/hip_runtime.h"" +#endif #define EIGEN_USE_GPU #include ""tensorflow/core/kernels/random_op_gpu.h"" #include ""tensorflow/core/kernels/stateful_random_ops_cpu_gpu.h"" #include ""tensorflow/core/util/gpu_launch_config.h"" +#include ""tensorflow/core/util/gpu_kernel_helper.h"" namespace tensorflow { @@ -74,7 +79,11 @@ void UpdateVariableAndFill_Philox::operator()( GetGpuLaunchConfig(work_element_count, d, FillKernel, 0, 0); int zero = 0; +#if GOOGLE_CUDA cudaMemcpyToSymbol(thread_counter, &zero, sizeof(int)); +#else // TENSORFLOW_USE_ROCM + hipMemcpyToSymbol(HIP_SYMBOL(thread_counter), &zero, sizeof(int)); +#endif TF_CHECK_OK(GpuLaunchKernel( FillKernel, cfg.block_count, cfg.thread_per_block, 0, d.stream(), dist, state_size, output_size, state_data, output_data)); @@ -88,8 +97,8 @@ __global__ void SkipKernel(int64 delta, StateElementType* state_data) { void RngSkip_Philox::operator()(const GPUDevice& d, int64 delta, Tensor* state_tensor) { - SkipKernel<<<1, 1, 0, d.stream()>>>( - delta, state_tensor->flat().data()); + TF_CHECK_OK(GpuLaunchKernel(SkipKernel, 1, 1, 0, d.stream(), + delta, state_tensor->flat().data())); } // Explicit instantiation of the GPU distributions functors. @@ -140,4 +149,4 @@ template struct UpdateVariableAndFill_Philox< } // end namespace tensorflow -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM ",0,train f90484b9a4302ccae5168e3a06bd539071661fee,tensorflow/tensorflow,[ROCm] Adding ROCm support for the stateful_random ops,philox_random.h,"@@ -25,7 +25,7 @@ limitations under the License. #include ""tensorflow/core/platform/types.h"" // Function qualifiers that need to work on both CPU and GPU. -#if defined(__CUDACC__) +#if defined(__CUDACC__) || defined(__HIPCC__) // For nvcc. #define PHILOX_DEVICE_FUNC __host__ __device__ #define PHILOX_INLINE __inline__ ",0,train cb6047d9f30754c8339721e0f21c2e17f32cdf3a,tensorflow/tensorflow,"[XLA:GPU] Re-enable tests that are passing at head. All of these were disabled due to zero-sized shapes. PiperOrigin-RevId: 202132109",reshape_test.cc,"@@ -125,10 +125,7 @@ XLA_TEST_P(ReshapeTest, ScalarToSingleElementArray) { zero_error_spec_); } -// TODO(b/29185393): Make this work with the GPU backend. The GPU backend -// does not handle zero-sized shapes correctly. Failed last on 2017-11-30 -// with an incorrect result rank. -XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial0x3)) { +XLA_TEST_P(ReshapeTest, Trivial0x3) { XlaBuilder builder(TestName()); Array2D input_array(0, 3); auto input_literal = Literal::CreateR2FromArray2D(input_array); @@ -141,10 +138,7 @@ XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial0x3)) { zero_error_spec_); } -// TODO(b/29185393): Make this work with the GPU backend. The GPU backend -// does not handle zero-sized shapes correctly. Failed last on 2017-05-15 -// with an incorrect result rank. -XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial0x3WithParameter)) { +XLA_TEST_P(ReshapeTest, Trivial0x3WithParameter) { XlaBuilder builder(TestName()); std::unique_ptr param0_literal = @@ -158,10 +152,7 @@ XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial0x3WithParameter)) { zero_error_spec_); } -// TODO(b/29185393): Make this work with the GPU backend. The GPU backend -// does not handle zero-sized shapes correctly. Failed last on 2017-11-30 -// with an incorrect result rank. -XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial3x0)) { +XLA_TEST_P(ReshapeTest, Trivial3x0) { XlaBuilder builder(TestName()); Array2D input_array(3, 0); auto input_literal = Literal::CreateR2FromArray2D(input_array); @@ -200,12 +191,8 @@ XLA_TEST_P(ReshapeTest, Trivial3x1) { zero_error_spec_); } -// TODO(b/29185393): Make this work with the GPU backend. The GPU backend -// does not handle zero-sized shapes correctly. Failed last on 2017-11-30 -// with an incorrect result rank. -// // Splits an empty vector into an empty matrix. -XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(R1ToR2_0_To_2x0)) { +XLA_TEST_P(ReshapeTest, R1ToR2_0_To_2x0) { XlaBuilder builder(TestName()); auto input_literal = Literal::CreateR1({}); XlaOp parameter; @@ -234,12 +221,8 @@ XLA_TEST_P(ReshapeTest, R1ToR2_6_To_2x3) { zero_error_spec_); } -// TODO(b/29185393): Make this work with the GPU backend. The GPU backend -// does not handle zero-sized shapes correctly. Failed last on 2017-11-30 -// with an incorrect result rank. -// // Transposes a 2x0 array to a 0x2 array. -XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Reshape0x2To2x0)) { +XLA_TEST_P(ReshapeTest, Reshape0x2To2x0) { XlaBuilder builder(TestName()); auto input_literal = Literal::CreateFromArray(Array2D(0, 2)); XlaOp parameter; @@ -286,12 +269,8 @@ XLA_TEST_P(ReshapeTest, TransposeAsReshape) { zero_error_spec_); } -// TODO(b/29185393): Make this work with the GPU backend. The GPU backend -// does not handle zero-sized shapes correctly. Failed last on 2017-11-30 -// with an incorrect result rank. -// // Transposes a 0x4 array with XlaBuilder::Transpose. -XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Transpose0x4)) { +XLA_TEST_P(ReshapeTest, Transpose0x4) { XlaBuilder builder(TestName()); auto input_literal = Literal::CreateFromArray(Array2D(0, 4)); XlaOp parameter; @@ -319,13 +298,9 @@ XLA_TEST_P(ReshapeTest, Transpose4x3) { zero_error_spec_); } -// TODO(b/29185393): Make this work with the GPU backend. The GPU backend -// does not handle zero-sized shapes correctly. Failed last on 2017-11-30 -// with an incorrect result rank. -// // Reshapes an empty 2-dimensional array with dimensions that are not just a // rearrangement of the originals (split), but no reordering (no shuffle). -XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeSplitNoShuffleZeroElements)) { +XLA_TEST_P(ReshapeTest, ReshapeSplitNoShuffleZeroElements) { XlaBuilder builder(TestName()); auto input_literal = Literal::CreateFromArray(Array2D(6, 0)); XlaOp parameter; @@ -338,10 +313,7 @@ XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeSplitNoShuffleZeroElements)) { zero_error_spec_); } -// TODO(b/29185393): Make this work with the GPU backend. The GPU backend -// does not handle zero-sized shapes correctly. Failed last on 2017-11-30 -// with an incorrect result rank. -XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeR4ToR2ZeroElements)) { +XLA_TEST_P(ReshapeTest, ReshapeR4ToR2ZeroElements) { XlaBuilder builder(TestName()); auto input_literal = Literal::CreateFromArray(Array4D(2, 3, 4, 0)); XlaOp parameter; @@ -372,11 +344,7 @@ XLA_TEST_P(ReshapeTest, ReshapeSplitNoShuffle) { zero_error_spec_); } -// TODO(b/29185393): Make this work with the GPU backend. The GPU backend -// does not handle zero-sized shapes correctly. Failed last on 2017-11-30 -// with an incorrect result rank. -// -XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeSplitAndShuffleZeroElements)) { +XLA_TEST_P(ReshapeTest, ReshapeSplitAndShuffleZeroElements) { XlaBuilder builder(TestName()); auto input_literal = Literal::CreateFromArray(Array2D(0, 6)); XlaOp parameter; ",0,test cb6047d9f30754c8339721e0f21c2e17f32cdf3a,tensorflow/tensorflow,"[XLA:GPU] Re-enable tests that are passing at head. All of these were disabled due to zero-sized shapes. PiperOrigin-RevId: 202132109",while_test.cc,"@@ -184,8 +184,7 @@ TEST_F(WhileTest, WhileWithPredicateResult) { // while (result.sum() < 15.5f) { // result = result + vector(0); // } -// TODO(b/29185393): does not terminate on CPU. -TEST_F(WhileTest, DISABLED_WhileWithEmptyVectorResult) { +TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithEmptyVectorResult)) { Shape result_shape = ShapeUtil::MakeShape(F32, {0}); // Create a computation for the reduction. ",0,test 396b58416e927a6cfbc7fba85265119e0c769168,tensorflow/tensorflow,Fix small typo in pooling_ops_test (#1953),pooling_ops_test.py,"@@ -870,9 +870,9 @@ class PoolingTest(tf.test.TestCase): def testShapeFunctionEdgeCases(self): # All shapes unknown. for pool_func in [tf.nn.max_pool, tf.nn.avg_pool]: - p = tf.nn.max_pool(tf.placeholder(tf.float32), - ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1], - padding=""SAME"") + p = pool_func(tf.placeholder(tf.float32), + ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1], + padding=""SAME"") self.assertEqual([None, None, None, None], p.get_shape().as_list()) p, am = tf.nn.max_pool_with_argmax( tf.placeholder(tf.float32), ",0,train b1c1547bcf3a38e9275e1c0a3c4ddee6cbf47ab7,tensorflow/tensorflow,Update example to use tf.function,check_ops.py,"@@ -2121,25 +2121,16 @@ def ensure_shape(x, shape, name=None): For example: - >>> # tf.placeholder() is not compatible with eager execution - ... - >>> tf.compat.v1.disable_eager_execution() - >>> x = tf.compat.v1.placeholder(tf.int32) - >>> print(x.shape) - TensorShape(None) - >>> y = x * 2 - >>> print(y.shape) - TensorShape(None) - >>> y = tf.ensure_shape(y, (None, 3, 3)) - >>> print(y.shape) - TensorShape([Dimension(None), Dimension(3), Dimension(3)]) - >>> with tf.compat.v1.Session() as sess: - >>> sess.run(y, feed_dict={x: [1, 2, 3]}) + >>> @tf.function(input_signature=[tf.TensorSpec(dtype=tf.float32, shape=None)]) + >>> def f(tensor): + >>> return tf.ensure_shape(x, [3, 3]) + >>> + >>> f(tf.zeros([3, 3])) # Passes + >>> f([1, 2, 3]) # fails Traceback (most recent call last): - ... - InvalidArgumentError: Shape of tensor mul [3] is not compatible with - expected shape [?,3,3]. - + ... + InvalidArgumentError: Shape of tensor x [3] is not compatible with expected shape [3,3]. + The above example raises `tf.errors.InvalidArgumentError`, because the shape (3,) is not compatible with the shape (None, 3, 3) ",0,train 1408e0342948d10ddc6e3ec9996777a9cbd5ac86,tensorflow/tensorflow,"Tpu driver changes. PiperOrigin-RevId: 289914023 Change-Id: Ie4a98a2c2b79f1647bbaac6da7040f350f352099",c_api_client.c,"@@ -23,12 +23,12 @@ limitations under the License. #include #include -#include ""c_api.h"" +#include ""libtpu.h"" void* LoadAndInitializeDriver(const char* shared_lib, struct TpuDriverFn* driver_fn) { void* handle; - handle = dlopen(""./c_api.so"", RTLD_NOW); + handle = dlopen(""libtpu.so"", RTLD_NOW); if (!handle) { fprintf(stderr, ""Error: %s\n"", dlerror()); exit(EXIT_FAILURE); ",0,train 1408e0342948d10ddc6e3ec9996777a9cbd5ac86,tensorflow/tensorflow,"Tpu driver changes. PiperOrigin-RevId: 289914023 Change-Id: Ie4a98a2c2b79f1647bbaac6da7040f350f352099",libtpu.h,"@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_CLIENT_C_API_H_ -#define TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_CLIENT_C_API_H_ +#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_CLIENT_LIBTPU_H_ +#define TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_CLIENT_LIBTPU_H_ #include @@ -255,4 +255,4 @@ struct TpuDriverFn { PrototypeTpuDriver_Version* TpuDriver_Version; // NOLINT }; -#endif // TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_CLIENT_C_API_H_ +#endif // TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_CLIENT_LIBTPU_H_ ",0,train 1408e0342948d10ddc6e3ec9996777a9cbd5ac86,tensorflow/tensorflow,"Tpu driver changes. PiperOrigin-RevId: 289914023 Change-Id: Ie4a98a2c2b79f1647bbaac6da7040f350f352099",external_tpu_driver.cc,"@@ -17,7 +17,7 @@ #include ""absl/strings/str_format.h"" #include ""absl/time/time.h"" -#include ""tensorflow/compiler/xla/python/tpu_driver/client/c_api.h"" +#include ""tensorflow/compiler/xla/python/tpu_driver/client/libtpu.h"" #include ""tensorflow/compiler/xla/python/tpu_driver/tpu_driver.h"" #include ""tensorflow/compiler/xla/python/tpu_driver/tpu_driver.pb.h"" #include ""tensorflow/compiler/xla/statusor.h"" ",0,train 3a63696e3b417603830131f989865a6f5b141482,tensorflow/tensorflow,Update math_ops.py,math_ops.py,"@@ -4229,7 +4229,7 @@ def polyval(coeffs, x, name=None): p(x) = coeffs[n-1] + x * (coeffs[n-2] + ... + x * (coeffs[1] + x * coeffs[0])) -Usage Example: + Usage Example: >>> coefficients = [1.0, 2.5, -4.2] >>> x = 5.0 >>> y = tf.math.polyval(coefficients, x) ",0,train 8af8ce384998e327687fbea3e4675e47b9d864e1,tensorflow/tensorflow,"Add more Resize Bilinear tests PiperOrigin-RevId: 251897573",image_ops_test.py,"@@ -561,6 +561,13 @@ class ResizeBilinearTest(parameterized.TestCase, xla_test.XLATestCase): (""86x86To456x456"", 86, 86, 456, 456), (""100x100To456x456"", 100, 100, 456, 456), (""64x64To224x224"", 64, 64, 224, 224), + (""128x128To224x224"", 128, 128, 224, 224), + (""256x256To224x224"", 256, 256, 224, 224), + (""512x512To224x224"", 512, 512, 224, 224), + (""64x64To299x299"", 64, 64, 299, 299), + (""128x128To299x299"", 128, 128, 299, 299), + (""256x256To299x299"", 256, 256, 299, 299), + (""512x512To299x299"", 512, 512, 299, 299), (""224x224To224x224"", 224, 224, 224, 224), # This test is disabled because it is very slow. It is slow because # 383 is prime, 383 and 2047 are coprime, and 2048 is large. ",0,test 6be738d758c5c60a9f8a04d48b24aff43c352efc,tensorflow/tensorflow,"Annotate data race on signgam. Change: 129369856",cwise_op_lgamma.cc,"@@ -16,8 +16,17 @@ limitations under the License. #include ""tensorflow/core/kernels/cwise_ops_common.h"" namespace tensorflow { -REGISTER3(UnaryOp, CPU, ""Lgamma"", functor::lgamma, float, Eigen::half, double); + +template +class LgammaOp : public UnaryOp { + public: + explicit LgammaOp(OpKernelConstruction* ctx) : UnaryOp(ctx) { + TF_ANNOTATE_BENIGN_RACE(&signgam, ""signgam output from lgamma is unused""); + } +}; + +REGISTER3(LgammaOp, CPU, ""Lgamma"", functor::lgamma, float, Eigen::half, double); #if GOOGLE_CUDA -REGISTER3(UnaryOp, GPU, ""Lgamma"", functor::lgamma, float, Eigen::half, double); +REGISTER3(LgammaOp, GPU, ""Lgamma"", functor::lgamma, float, Eigen::half, double); #endif } // namespace tensorflow ",0,train 6be738d758c5c60a9f8a04d48b24aff43c352efc,tensorflow/tensorflow,"Annotate data race on signgam. Change: 129369856",dynamic_annotations.h,"@@ -19,9 +19,14 @@ limitations under the License. // IWYU pragma: private, include ""third_party/tensorflow/core/platform/mem.h"" // IWYU pragma: friend third_party/tensorflow/core/platform/mem.h -// Do nothing for this platform +// Do nothing for this platform. + #define TF_ANNOTATE_MEMORY_IS_INITIALIZED(ptr, bytes) \ do { \ } while (0) +#define TF_ANNOTATE_BENIGN_RACE(ptr, description) \ + do { \ + } while (0) + #endif // TENSORFLOW_CORE_PLATFORM_DEFAULT_DYNAMIC_ANNOTATIONS_H_ ",0,train aa5956dc18f65027bc28c8be132505cf9859d328,tensorflow/tensorflow,"Depthwise convolution 3x3 per-channel int8 for dot-product ARM (16). Invoke new dot-product ASM path in normal per-channel flow. PiperOrigin-RevId: 295755806 Change-Id: Ief16e2acd78d2bbb9c5ced91f7a0312681d833fe",depthwiseconv_uint8_3x3_filter.h,"@@ -13405,6 +13405,20 @@ inline void DepthwiseConvDotProduct3x3( thread_dim); } +template +inline void DepthwiseConvDotProduct3x3PerChannel( + const DepthwiseParams& params, const RuntimeShape& input_shape, + const int8* input_data, const RuntimeShape& filter_shape, + const int8* filter_data, const RuntimeShape& bias_shape, + const int32* bias_data, const RuntimeShape& output_shape, int8* output_data, + int thread_start, int thread_end, int thread_dim) { + DepthwiseConvDotProduct3x3Impl< + implementation, depthwise_conv::QuantizationType::kPerChannelInt8>( + params, input_shape, input_data, filter_shape, filter_data, bias_shape, + bias_data, output_shape, output_data, thread_start, thread_end, + thread_dim); +} + #undef vst1_lane_8x4 #undef vst1q_lane_8x4 #undef vld1q_lane_s8x8 ",0,test aa5956dc18f65027bc28c8be132505cf9859d328,tensorflow/tensorflow,"Depthwise convolution 3x3 per-channel int8 for dot-product ARM (16). Invoke new dot-product ASM path in normal per-channel flow. PiperOrigin-RevId: 295755806 Change-Id: Ief16e2acd78d2bbb9c5ced91f7a0312681d833fe",depthwise_conv.h,"@@ -20,6 +20,7 @@ limitations under the License. #include ""tensorflow/lite/kernels/cpu_backend_threadpool.h"" #include ""tensorflow/lite/kernels/internal/optimized/cpu_check.h"" #include ""tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h"" +#include ""tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h"" #include ""tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_3x3_filter.h"" #include ""tensorflow/lite/kernels/internal/optimized/optimized_ops.h"" #include ""tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"" @@ -1789,7 +1790,8 @@ inline void DepthwiseConvWithRounding( const int8* input_data, const RuntimeShape& filter_shape, const int8* filter_data, const RuntimeShape& bias_shape, const int32* bias_data, const RuntimeShape& output_shape, int8* output_data, - int thread_start, int thread_end, int thread_dim) { + int thread_start, int thread_end, int thread_dim, + const CpuBackendContext& cpu_backend_context) { ruy::profiler::ScopeLabel label(""DepthwiseConvInt8/8bit""); const int depth_multiplier = params.depth_multiplier; const int dilation_width_factor = params.dilation_width_factor; @@ -1807,6 +1809,36 @@ inline void DepthwiseConvWithRounding( // Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on // Jetson TX-2. This compiler does not support the offsetof() macro. #if defined(__aarch64__) && !defined(GOOGLE_L4T) +#if defined(__ANDROID__) && defined(__clang__) + ruy::Context* ruy_context = cpu_backend_context.ruy_context(); + const auto ruy_paths = ruy_context != nullptr + ? ruy_context->GetRuntimeEnabledPaths() + : ruy::Path::kNone; + const bool has_dot_product_instructions = + (ruy_paths & ruy::Path::kNeonDotprod) != ruy::Path::kNone; + + // Dispatch to dot-product 3x3 kernels when supported. + if (has_dot_product_instructions) { + using optimized_ops::depthwise_conv::DotProduct3x3KernelType; + DotProduct3x3KernelType kernel_type = + optimized_ops::depthwise_conv::CategorizeDotProductKernel< + optimized_ops::depthwise_conv::QuantizationType::kPerChannelInt8>( + input_shape, filter_shape, output_shape, params); + if (kernel_type != DotProduct3x3KernelType::kNone) { + ruy::profiler::ScopeLabel specialized_label( + ""DepthwiseConvInt8/8bit/3x3XDotProduct""); + optimized_ops::depthwise_conv::DepthwiseConvDotProduct3x3PerChannel< + DepthwiseConvImplementation::kUseNeon3x3DotProduct>( + params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, thread_start, + thread_end, thread_dim); + return; + } + } + +#endif + // Dispatch to non-dot-product 3x3 kernels when supported. + const int stride_width = params.stride_width; const int stride_height = params.stride_height; const int pad_width = params.padding_values.width; @@ -1842,11 +1874,12 @@ inline void DepthwiseConvImpl( const int8* input_data, const RuntimeShape& filter_shape, const int8* filter_data, const RuntimeShape& bias_shape, const int32* bias_data, const RuntimeShape& output_shape, int8* output_data, - int thread_start, int thread_end, int thread_dim) { + int thread_start, int thread_end, int thread_dim, + const CpuBackendContext& cpu_backend_context) { return DepthwiseConvWithRounding( params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, output_shape, - output_data, thread_start, thread_end, thread_dim); + output_data, thread_start, thread_end, thread_dim, cpu_backend_context); } template @@ -1859,7 +1892,8 @@ struct DepthwiseConvWorkerTask : cpu_backend_threadpool::Task { const T* filter_data, const RuntimeShape& bias_shape, const TS* bias_data, const RuntimeShape& output_shape, T* output_data, int thread_start, int thread_end, - int thread_dim) + int thread_dim, + const CpuBackendContext& cpu_backend_context_x) : params_(params), output_multiplier_(output_multiplier), output_shift_(output_shift), @@ -1873,13 +1907,14 @@ struct DepthwiseConvWorkerTask : cpu_backend_threadpool::Task { output_data_(output_data), thread_start_(thread_start), thread_end_(thread_end), - thread_dim_(thread_dim) {} + thread_dim_(thread_dim), + cpu_backend_context(cpu_backend_context_x) {} void Run() override { DepthwiseConvImpl(params_, output_multiplier_, output_shift_, input_shape_, input_data_, filter_shape_, filter_data_, bias_shape_, bias_data_, output_shape_, output_data_, thread_start_, - thread_end_, thread_dim_); + thread_end_, thread_dim_, cpu_backend_context); } private: @@ -1897,6 +1932,7 @@ struct DepthwiseConvWorkerTask : cpu_backend_threadpool::Task { int thread_start_; int thread_end_; int thread_dim_; + const CpuBackendContext& cpu_backend_context; }; inline int HowManyConvThreads(const RuntimeShape& output_shape, @@ -1947,7 +1983,8 @@ inline void DepthwiseConvPerChannel( DepthwiseConvImpl(params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, output_shape, output_data, /*thread_start=*/0, - /*thread_end=*/output_rows, /*thread_dim=*/1); + /*thread_end=*/output_rows, /*thread_dim=*/1, + *cpu_backend_context); } else { std::vector> tasks; // TODO(b/131746020) don't create new heap allocations every time. @@ -1960,7 +1997,7 @@ inline void DepthwiseConvPerChannel( tasks.emplace_back(params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, output_shape, output_data, thread_start, - thread_end, thread_dim); + thread_end, thread_dim, *cpu_backend_context); thread_start = thread_end; } cpu_backend_threadpool::Execute(tasks.size(), tasks.data(), ",0,test aa5956dc18f65027bc28c8be132505cf9859d328,tensorflow/tensorflow,"Depthwise convolution 3x3 per-channel int8 for dot-product ARM (16). Invoke new dot-product ASM path in normal per-channel flow. PiperOrigin-RevId: 295755806 Change-Id: Ief16e2acd78d2bbb9c5ced91f7a0312681d833fe",legacy_optimized_ops.h,"@@ -512,10 +512,11 @@ struct LegacyPerChannelDepthwiseConvWorkerTask : public gemmlowp::Task { thread_dim_(thread_dim) {} void Run() override { + CpuBackendContext backend_context; optimized_integer_ops::DepthwiseConvImpl( params_, output_multiplier_, output_shift_, input_shape_, input_data_, filter_shape_, filter_data_, bias_shape_, bias_data_, output_shape_, - output_data_, thread_start_, thread_end_, thread_dim_); + output_data_, thread_start_, thread_end_, thread_dim_, backend_context); } private: @@ -568,11 +569,12 @@ inline void DepthwiseConvPerChannel( thread_count = std::max(1, std::min(thread_count, max_threads)); if (thread_count == 1) { + CpuBackendContext backend_context; optimized_integer_ops::DepthwiseConvImpl( params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, output_shape, output_data, /*thread_start=*/0, - /*thread_end=*/output_rows, /*thread_dim=*/1); + /*thread_end=*/output_rows, /*thread_dim=*/1, backend_context); } else { std::vector tasks(thread_count); int thread_start = 0; ",0,test 1be692cf1161539fdfa77257cd969a549da8cc97,tensorflow/tensorflow,"Fix assert_called error on Python3 by replacing it with assertTrue(....called)",training_test.py,"@@ -626,7 +626,7 @@ class _TrainingExecutorTrainingTest(object): self._run_task(training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec)) - mock_est.train.assert_called() + self.assertTrue(mock_est.train.called) mock_server.assert_not_called() def test_fail_with_empty_task_type(self): @@ -836,7 +836,7 @@ class TrainingExecutorRunMasterTest(test.TestCase): executor.run_master() mock_server.assert_not_called() - mock_est.train.assert_called() + self.assertTrue(mock_est.train.called) def test_fail_with_empty_task_type(self): mock_est = test.mock.Mock(spec=estimator_lib.Estimator) ",0,train 87c2f2dc3b263f90b79c4d31b6d9dbc410d8145d,tensorflow/tensorflow,"Allowing a slice to move through a reverse (i.e., slice(reverse) is reverse(slice)). PiperOrigin-RevId: 322473168 Change-Id: Ia8c8563f121cfb3aac52464336a03642c7ae6b2a",algebraic_simplifier.cc,"@@ -509,6 +509,9 @@ class AlgebraicSimplifierVisitor : public DfsHloRewriteVisitor { // Tries to convert slice(reshape(X)) into reshape(slice(X)) StatusOr TryToReorderSliceAndReshape(HloInstruction* slice); + // Tries to convert slice(reverse(X)) into reverse(slice(X)) + StatusOr TryToReorderSliceAndReverse(HloInstruction* slice); + // Tries to simplify `(and (< a N) (< a K))` in cases where `N <= K` into // `(< a N)`. This is crucial for being able to figure out the loop trip // count. @@ -3574,6 +3577,52 @@ StatusOr AlgebraicSimplifierVisitor::TryToReorderSliceAndReshape( return false; } +// Allowing a slice to move through a reverse with any necessary updates to the +// slice config. +StatusOr AlgebraicSimplifierVisitor::TryToReorderSliceAndReverse( + HloInstruction* slice) { + VLOG(2) << ""Entered TryToReorderSliceAndReverse for slice:"" + << slice->ToString(); + if (Match(slice, m::Slice(m::Reverse()))) { + HloInstruction* reverse = slice->mutable_operand(0); + HloInstruction* reverse_operand = reverse->mutable_operand(0); + std::vector new_starts = slice->slice_starts(); + std::vector new_limits = slice->slice_limits(); + std::vector new_strides = slice->slice_strides(); + for (auto rdim : reverse->dimensions()) { + int64 start = slice->slice_starts(rdim); + int64 limit = slice->slice_limits(rdim); + int64 stride = slice->slice_strides(rdim); + // find_nth allows us to compute the appropriate index to begin + // with during reverse even in the presence of non-unit strides + int64 find_nth = (limit - start - 1) / stride; + find_nth = start + find_nth * stride; + limit = find_nth + 1; + new_starts[rdim] = + (reverse->shape().dimensions(rdim) - start) - (limit - start); + new_limits[rdim] = reverse->shape().dimensions(rdim) - start; + VLOG(2) << ""Analyzing dim:"" << rdim << "" (start,limit):"" << start << "","" + << limit << "" and new (start, limit):"" << new_starts[rdim] << "","" + << new_limits[rdim]; + } + // New slice formed from the reverse_operand, but strides and shape of the + // slice output remains the same. New slice's starts and limits are updated + // for ONLY the reversed dimensions as indicated above. + HloInstruction* new_slice = computation_->AddInstruction( + HloInstruction::CreateSlice(slice->shape(), reverse_operand, new_starts, + new_limits, new_strides)); + simplifier_->UpdateLayout(new_slice->mutable_shape()); + TF_RETURN_IF_ERROR(ReplaceWithNewInstruction( + slice, HloInstruction::CreateReverse(new_slice->shape(), new_slice, + reverse->dimensions()))); + // We do not delete the old reverse, since there might be another + // consumer of that reverse (i.e., full reverse output). DCE should take + // care of any deletion that is necessary if there was no use of reverse. + return true; + } + return false; +} + Status AlgebraicSimplifierVisitor::HandleSlice(HloInstruction* slice) { // Delete no-op slices, i.e. where shape = operand shape. if (ReplaceInstructionIfSameShape(slice, slice->mutable_operand(0))) { @@ -3728,6 +3777,15 @@ Status AlgebraicSimplifierVisitor::HandleSlice(HloInstruction* slice) { if (replaced) { return Status::OK(); } + + bool reversed = false; + if (Match(slice, m::Slice(m::Reverse(m::Op())))) { + TF_ASSIGN_OR_RETURN(reversed, TryToReorderSliceAndReverse(slice)); + } + if (reversed) { + return Status::OK(); + } + return Status::OK(); } ",0,train 87c2f2dc3b263f90b79c4d31b6d9dbc410d8145d,tensorflow/tensorflow,"Allowing a slice to move through a reverse (i.e., slice(reverse) is reverse(slice)). PiperOrigin-RevId: 322473168 Change-Id: Ia8c8563f121cfb3aac52464336a03642c7ae6b2a",algebraic_simplifier_test.cc,"@@ -2014,6 +2014,80 @@ TEST_F(AlgebraicSimplifierTest, RemoveUnaryConcatenate) { EXPECT_THAT(computation->root_instruction(), param0); } +TEST_F(AlgebraicSimplifierTest, SliceReverse) { + const char* const hlo_string = R""( +HloModule module + +ENTRY test { + param = f32[6,7,32] parameter(0) + constant = f32[] constant(0) + pad = f32[8,7,32] pad(param, constant), padding=1_1x0_0x0_0 + rev = f32[8,7,32] reverse(pad), dimensions={0,2} + slice = f32[1,7,32] slice(rev), slice={[2:3:1], [0:7:1], [0:32:1]} + ROOT tuple = (f32[1,7,32]) tuple(slice) +})""; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + AlgebraicSimplifier simplifier(default_options_); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + HloComputation* computation = module->entry_computation(); + EXPECT_THAT(computation->root_instruction(), + GmockMatch(m::Tuple(m::Reverse(m::Slice(m::Pad()))))); + const HloInstruction* slice = + computation->root_instruction()->operand(0)->operand(0); + EXPECT_TRUE( + ShapeUtil::Equal(slice->shape(), ShapeUtil::MakeShape(F32, {1, 7, 32}))); + // slice start,limit of 0th and 2nd dimensions are changed + // while 1st dimension's slice start, limit remains the same since + // it is not reversed. + EXPECT_EQ(slice->slice_starts(0), 5); + EXPECT_EQ(slice->slice_limits(0), 6); + EXPECT_EQ(slice->slice_starts(1), 0); + EXPECT_EQ(slice->slice_limits(1), 7); + EXPECT_EQ(slice->slice_starts(2), 0); + EXPECT_EQ(slice->slice_limits(2), 32); + EXPECT_EQ(slice->slice_strides(0), 1); + EXPECT_EQ(slice->slice_strides(1), 1); + EXPECT_EQ(slice->slice_strides(2), 1); +} + +TEST_F(AlgebraicSimplifierTest, SliceReverseNonUnitEvenOddStrides) { + const char* const hlo_string = R""( +HloModule module + +ENTRY test { + param = f32[6,7,32] parameter(0) + constant = f32[] constant(0) + pad = f32[8,7,32] pad(param, constant), padding=1_1x0_0x0_0 + rev = f32[8,7,32] reverse(pad), dimensions={0,1,2} + slice = f32[1,2,7] slice(rev), slice={[2:3:2], [0:7:4], [0:32:5]} + ROOT tuple = (f32[1,2,7]) tuple(slice) +})""; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + AlgebraicSimplifier simplifier(default_options_); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + HloComputation* computation = module->entry_computation(); + EXPECT_THAT(computation->root_instruction(), + GmockMatch(m::Tuple(m::Reverse(m::Slice(m::Pad()))))); + const HloInstruction* slice = + computation->root_instruction()->operand(0)->operand(0); + EXPECT_TRUE( + ShapeUtil::Equal(slice->shape(), ShapeUtil::MakeShape(F32, {1, 2, 7}))); + // slice start,limit of all dimensions are changed + EXPECT_EQ(slice->slice_starts(0), 5); + EXPECT_EQ(slice->slice_limits(0), 6); + EXPECT_EQ(slice->slice_starts(1), 2); + EXPECT_EQ(slice->slice_limits(1), 7); + EXPECT_EQ(slice->slice_starts(2), 1); + EXPECT_EQ(slice->slice_limits(2), 32); + EXPECT_EQ(slice->slice_strides(0), 2); + EXPECT_EQ(slice->slice_strides(1), 4); + EXPECT_EQ(slice->slice_strides(2), 5); +} + // Test that empty operands of concatenates are removed. TEST_F(AlgebraicSimplifierTest, RemoveEmptyConcatenateOperands) { auto m = CreateNewVerifiedModule(); ",0,train 9c57bb91998a34fdbc7b5e6dfe5a90bb8d5362aa,tensorflow/tensorflow,"Bug fixes in memory space assignment and buffer assignment. PiperOrigin-RevId: 260802228",buffer_assignment.cc,"@@ -1222,8 +1222,13 @@ Status BufferAssigner::AssignPresetBuffers( preset_allocations; for (auto& color_and_size : preset_assignments_->sizes()) { LogicalBuffer::Color color(color_and_size.first); - preset_allocations.emplace( + auto inserted = preset_allocations.emplace( color, assignment->NewEmptyAllocation(color_and_size.second, color)); + BufferAllocation* inserted_allocation = inserted.first->second; + VLOG(3) << ""Created preset buffer allocation "" + << inserted_allocation->index() + << "", color: "" << inserted_allocation->color() + << "", size: "" << inserted_allocation->size(); } const HloAliasAnalysis& alias_analysis = assignment->alias_analysis(); @@ -1234,8 +1239,12 @@ Status BufferAssigner::AssignPresetBuffers( alias_analysis.GetUniqueBufferAt(position.instruction, position.index); VLOG(3) << ""Preset allocation for buffer: "" << buffer; const HeapSimulator::Chunk& chunk = position_and_chunk.second; - preset_allocations[buffer.color()]->AddAssignment(buffer.GetUniqueValue(), - chunk.offset, chunk.size); + auto preset_allocations_iter = preset_allocations.find(buffer.color()); + CHECK(preset_allocations_iter != preset_allocations.end()) + << ""No preset buffer allocation for color "" << buffer.color() + << "" found.""; + preset_allocations_iter->second->AddAssignment(buffer.GetUniqueValue(), + chunk.offset, chunk.size); // Ensure that there is at most one preset allocation for each buffer. CHECK_EQ(assigned_buffers->count(&buffer), 0); assigned_buffers->emplace(&buffer); ",0,train 9c57bb91998a34fdbc7b5e6dfe5a90bb8d5362aa,tensorflow/tensorflow,"Bug fixes in memory space assignment and buffer assignment. PiperOrigin-RevId: 260802228",memory_space_assignment.cc,"@@ -402,7 +402,7 @@ Status MemorySpaceAssignment::Process() { } } - if (preset_assignments_->chunks().empty()) { + if (!preset_assignments_->chunks().empty()) { preset_assignments_->add_size(alternate_memory_space_, alternate_memory_size); } @@ -413,6 +413,10 @@ Status MemorySpaceAssignment::Process() { VLOG(3) << "" ["" << pair.second.offset << "", "" << pair.second.size << ""] : "" << pair.first.ToString(); } + VLOG(3) << ""Exported alternate memory sizes:""; + for (auto& pair : preset_assignments_->sizes()) { + VLOG(3) << "" space: "" << pair.first << "", size: "" << pair.second; + } } return Status::OK(); } @@ -427,7 +431,9 @@ void MemorySpaceAssignment::ScheduleAsynchronousCopy( Status MemorySpaceAssignment::FixSchedule() { CHECK(module_->has_schedule()); HloSchedule& schedule = module_->schedule(); - for (const HloComputation* computation : module_->computations()) { + for (const HloComputation* computation : + module_->MakeNonfusionComputations()) { + CHECK(schedule.is_computation_scheduled(computation)); const HloInstructionSequence& sequence = schedule.sequence(computation); HloInstructionSequence new_sequence; ",0,train 9c57bb91998a34fdbc7b5e6dfe5a90bb8d5362aa,tensorflow/tensorflow,"Bug fixes in memory space assignment and buffer assignment. PiperOrigin-RevId: 260802228",memory_space_assignment_test.cc,"@@ -31,7 +31,7 @@ class MemorySpaceAssignmentTest : public HloTestBase { const int64 kDefaultMemorySpace = 0; const int64 kAlternateMemorySpace = 1; - void AssignMemorySpace(HloModule* module) { + std::unique_ptr AssignMemorySpace(HloModule* module) { auto size_fn = [](const BufferValue& buffer) { return ShapeUtil::ByteSizeOf(buffer.shape(), /*pointer_size=*/8); }; @@ -49,13 +49,14 @@ class MemorySpaceAssignmentTest : public HloTestBase { return true; }; - ASSERT_IS_OK(MemorySpaceAssignment::Run( - module, kAlternateMemorySpace, /*max_size_in_bytes=*/128, - /*min_prefetch_interval=*/2, - /*max_prefetch_interval=*/10, - /*alternate_memory_space_alignment_in_bytes=*/8, size_fn, - is_allowed_in_alternate_mem) - .status()); + return std::move(MemorySpaceAssignment::Run( + module, kAlternateMemorySpace, + /*max_size_in_bytes=*/128, + /*min_prefetch_interval=*/2, + /*max_prefetch_interval=*/10, + /*alternate_memory_space_alignment_in_bytes=*/8, + size_fn, is_allowed_in_alternate_mem) + .ValueOrDie()); } }; @@ -103,7 +104,7 @@ TEST_F(MemorySpaceAssignmentTest, Simple) { schedule.set_sequence(computation, {p0, p1, add, sub, mul}); TF_CHECK_OK(module->set_schedule(schedule)); - AssignMemorySpace(module.get()); + auto preset_assignments = AssignMemorySpace(module.get()); // Inputs and outputs are currently placed in the default memory. Everything // else should be in the alternate memory. @@ -116,6 +117,10 @@ TEST_F(MemorySpaceAssignmentTest, Simple) { EXPECT_THAT(mul, op::ShapeWithLayout(shape)); EXPECT_THAT(add, op::ShapeWithLayout(shape_in_alternate_mem)); EXPECT_THAT(sub, op::ShapeWithLayout(shape_in_alternate_mem)); + + // Make sure the preset assignments is sane. + EXPECT_THAT(preset_assignments->chunks().size(), 2); + EXPECT_THAT(preset_assignments->sizes().size(), 1); } TEST_F(MemorySpaceAssignmentTest, NegateChain) { ",0,train f4150f34c3c56abd61d24b5dd226585e006c9488,tensorflow/tensorflow,"Internal Change PiperOrigin-RevId: 251500717",gru_ops_test.cc,"@@ -23,7 +23,7 @@ namespace tensorflow { class GruOpsTest : public ::testing::Test { public: - static void SetUpTestCase() { + static void SetUpTestSuite() { TF_Status* status = TF_NewStatus(); auto* lib = TF_LoadLibrary( ""tensorflow/contrib/rnn/python/ops/_gru_ops.so"", status); ",0,train f4150f34c3c56abd61d24b5dd226585e006c9488,tensorflow/tensorflow,"Internal Change PiperOrigin-RevId: 251500717",lstm_ops_test.cc,"@@ -25,7 +25,7 @@ namespace tensorflow { class LSTMOpsTest : public ::testing::Test { public: - static void SetUpTestCase() { + static void SetUpTestSuite() { TF_Status* status = TF_NewStatus(); auto* lib = TF_LoadLibrary( ""tensorflow/contrib/rnn/python/ops/_lstm_ops.so"", status); ",0,train c6fdeaca7dd32c6bec3ff2df14889c3f2c129f14,tensorflow/tensorflow,"adding ps_strategy to run_config to enable different placement strate… (#15640) * adding ps_strategy to run_config to enable different placement strategy in estimator * 1. Moved estimator._device_fn to RunConfig as @property 2. Made RunConfig.device_fn to return custom device function if one is specified, otherwise the result from `tf.train.replica_device_setter` call is used 3. Added some basic unit tests, may need further tests. * 1. Removing ps_strategy. 2. Modified estimator to take overriden device_fn from if set. 3. Removed ps_strategy related unit tests. * Adding manual initialization of _device_fn in legacy RunConfig class * Updated estimator golden API through 1. bazel build //tensorflow/tools/api/tests:api_compatibility_test 2. bazel-bin/tensorflow/tools/api/tests/api_compatibility_test --update_goldens True * fixing code styles",run_config.py,"@@ -299,6 +299,7 @@ class RunConfig(ClusterConfig, core_run_config.RunConfig): # so instead of breaking compatibility with that assumption, we # just manually initialize this field: self._train_distribute = None + self._device_fn = None gpu_options = config_pb2.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) ",0,train c6fdeaca7dd32c6bec3ff2df14889c3f2c129f14,tensorflow/tensorflow,"adding ps_strategy to run_config to enable different placement strate… (#15640) * adding ps_strategy to run_config to enable different placement strategy in estimator * 1. Moved estimator._device_fn to RunConfig as @property 2. Made RunConfig.device_fn to return custom device function if one is specified, otherwise the result from `tf.train.replica_device_setter` call is used 3. Added some basic unit tests, may need further tests. * 1. Removing ps_strategy. 2. Modified estimator to take overriden device_fn from if set. 3. Removed ps_strategy related unit tests. * Adding manual initialization of _device_fn in legacy RunConfig class * Updated estimator golden API through 1. bazel build //tensorflow/tools/api/tests:api_compatibility_test 2. bazel-bin/tensorflow/tools/api/tests/api_compatibility_test --update_goldens True * fixing code styles",estimator.py,"@@ -216,7 +216,8 @@ class Estimator(object): else: self._session_config = self._config.session_config - self._device_fn = _get_replica_device_setter(self._config) + self._device_fn = self._config.device_fn or \ + _get_replica_device_setter(self._config) if model_fn is None: raise ValueError('model_fn must be provided to Estimator.') ",0,train c6fdeaca7dd32c6bec3ff2df14889c3f2c129f14,tensorflow/tensorflow,"adding ps_strategy to run_config to enable different placement strate… (#15640) * adding ps_strategy to run_config to enable different placement strategy in estimator * 1. Moved estimator._device_fn to RunConfig as @property 2. Made RunConfig.device_fn to return custom device function if one is specified, otherwise the result from `tf.train.replica_device_setter` call is used 3. Added some basic unit tests, may need further tests. * 1. Removing ps_strategy. 2. Modified estimator to take overriden device_fn from if set. 3. Removed ps_strategy related unit tests. * Adding manual initialization of _device_fn in legacy RunConfig class * Updated estimator golden API through 1. bazel build //tensorflow/tools/api/tests:api_compatibility_test 2. bazel-bin/tensorflow/tools/api/tests/api_compatibility_test --update_goldens True * fixing code styles",run_config.py,"@@ -27,11 +27,13 @@ import six from tensorflow.core.protobuf import config_pb2 from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import server_lib +from tensorflow.python.estimator import util from tensorflow.python.util import compat_internal from tensorflow.python.util.tf_export import tf_export _USE_DEFAULT = object() +_VALID_DEVICE_FN_ARGS = set(['op']) # A list of the property names in RunConfig that the user is allowed to change. _DEFAULT_REPLACEABLE_LIST = [ @@ -44,7 +46,8 @@ _DEFAULT_REPLACEABLE_LIST = [ 'keep_checkpoint_max', 'keep_checkpoint_every_n_hours', 'log_step_count_steps', - 'train_distribute' + 'train_distribute', + 'device_fn' ] _SAVE_CKPT_ERR = ( @@ -279,6 +282,11 @@ def _validate_properties(run_config): _validate('tf_random_seed', lambda seed: isinstance(seed, six.integer_types), message='tf_random_seed must be integer.') + _validate('device_fn', lambda device_fn: six.callable(device_fn) and + set(util.fn_args(device_fn)) == _VALID_DEVICE_FN_ARGS, + message='device_fn must be callable with exactly' + ' one argument ""op"".') + class TaskType(object): MASTER = 'master' @@ -302,7 +310,8 @@ class RunConfig(object): keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000, log_step_count_steps=100, - train_distribute=None): + train_distribute=None, + device_fn=None): """"""Constructs a RunConfig. All distributed training related properties `cluster_spec`, `is_chief`, @@ -430,6 +439,10 @@ class RunConfig(object): `tf.contrib.distribute.DistributionStrategy`. If specified, then Estimator will distribute the user's model during training, according to the policy specified by that strategy. + device_fn: A callable invoked for every `Operation` that takes the + `Operation` and returns the device string. If `None`, defaults to + the device function returned by `tf.train.replica_device_setter` + with round-robin strategy. Raises: ValueError: If both `save_checkpoints_steps` and `save_checkpoints_secs` @@ -466,7 +479,8 @@ class RunConfig(object): keep_checkpoint_max=keep_checkpoint_max, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, log_step_count_steps=log_step_count_steps, - train_distribute=train_distribute) + train_distribute=train_distribute, + device_fn=device_fn) self._init_distributed_setting_from_environment_var(tf_config) @@ -568,6 +582,16 @@ class RunConfig(object): def cluster_spec(self): return self._cluster_spec + @property + def device_fn(self): + """"""Returns the device_fn. + + If device_fn is not `None`, it overrides the default + device function used in `Estimator`. + Otherwise the default one is used. + """""" + return self._device_fn + @property def evaluation_master(self): return self._evaluation_master @@ -697,7 +721,8 @@ class RunConfig(object): - `keep_checkpoint_max`, - `keep_checkpoint_every_n_hours`, - `log_step_count_steps`, - - `train_distribute`. + - `train_distribute`, + - `device_fn`. In addition, either `save_checkpoints_steps` or `save_checkpoints_secs` can be set (should not be both). ",0,train c6fdeaca7dd32c6bec3ff2df14889c3f2c129f14,tensorflow/tensorflow,"adding ps_strategy to run_config to enable different placement strate… (#15640) * adding ps_strategy to run_config to enable different placement strategy in estimator * 1. Moved estimator._device_fn to RunConfig as @property 2. Made RunConfig.device_fn to return custom device function if one is specified, otherwise the result from `tf.train.replica_device_setter` call is used 3. Added some basic unit tests, may need further tests. * 1. Removing ps_strategy. 2. Modified estimator to take overriden device_fn from if set. 3. Removed ps_strategy related unit tests. * Adding manual initialization of _device_fn in legacy RunConfig class * Updated estimator golden API through 1. bazel build //tensorflow/tools/api/tests:api_compatibility_test 2. bazel-bin/tensorflow/tools/api/tests/api_compatibility_test --update_goldens True * fixing code styles",run_config_test.py,"@@ -42,6 +42,7 @@ _SESSION_CONFIG_ERR = 'session_config must be instance of ConfigProto' _KEEP_CKPT_MAX_ERR = 'keep_checkpoint_max should be >= 0' _KEEP_CKPT_HOURS_ERR = 'keep_checkpoint_every_n_hours should be > 0' _TF_RANDOM_SEED_ERR = 'tf_random_seed must be integer' +_DEVICE_FN_ERR = 'device_fn must be callable with exactly one argument ""op"".' _ONE_CHIEF_ERR = 'The ""cluster"" in TF_CONFIG must have only one ""chief"" node.' _ONE_MASTER_ERR = 'The ""cluster"" in TF_CONFIG must have only one ""master"" node.' _INVALID_TASK_TYPE_FOR_EVAL_MASTER = ( @@ -83,6 +84,7 @@ class RunConfigTest(test.TestCase): self.assertEqual(5, config.keep_checkpoint_max) self.assertEqual(10000, config.keep_checkpoint_every_n_hours) self.assertIsNone(config.service) + self.assertIsNone(config.device_fn) def test_model_dir(self): empty_config = run_config_lib.RunConfig() @@ -93,6 +95,7 @@ class RunConfigTest(test.TestCase): def test_replace_with_allowed_properties(self): session_config = config_pb2.ConfigProto(allow_soft_placement=True) + device_fn = lambda op: ""/cpu:0"" config = run_config_lib.RunConfig().replace( tf_random_seed=11, @@ -100,13 +103,15 @@ class RunConfigTest(test.TestCase): save_checkpoints_secs=14, session_config=session_config, keep_checkpoint_max=16, - keep_checkpoint_every_n_hours=17) + keep_checkpoint_every_n_hours=17, + device_fn=device_fn) self.assertEqual(11, config.tf_random_seed) self.assertEqual(12, config.save_summary_steps) self.assertEqual(14, config.save_checkpoints_secs) self.assertEqual(session_config, config.session_config) self.assertEqual(16, config.keep_checkpoint_max) self.assertEqual(17, config.keep_checkpoint_every_n_hours) + self.assertEqual(device_fn, config.device_fn) def test_replace_none_value(self): config = run_config_lib.RunConfig().replace( @@ -117,7 +122,8 @@ class RunConfigTest(test.TestCase): save_checkpoints_steps=None, session_config=None, keep_checkpoint_max=None, - keep_checkpoint_every_n_hours=None) + keep_checkpoint_every_n_hours=None, + device_fn=None) self.assertIsNone(config.tf_random_seed) self.assertIsNone(config.model_dir) self.assertIsNone(config.save_summary_steps) @@ -126,6 +132,7 @@ class RunConfigTest(test.TestCase): self.assertIsNone(config.session_config) self.assertIsNone(config.keep_checkpoint_max) self.assertIsNone(config.keep_checkpoint_every_n_hours) + self.assertIsNone(config.device_fn) def test_replace_with_disallowallowed_properties(self): config = run_config_lib.RunConfig() @@ -166,9 +173,12 @@ class RunConfigTest(test.TestCase): config.replace(keep_checkpoint_every_n_hours=0) with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR): config.replace(tf_random_seed=1.0) + with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR): + config.replace(device_fn=lambda x, y: 0) def test_init_with_allowed_properties(self): session_config = config_pb2.ConfigProto(allow_soft_placement=True) + device_fn = lambda op: ""/cpu:0"" config = run_config_lib.RunConfig( tf_random_seed=11, @@ -176,13 +186,15 @@ class RunConfigTest(test.TestCase): save_checkpoints_secs=14, session_config=session_config, keep_checkpoint_max=16, - keep_checkpoint_every_n_hours=17) + keep_checkpoint_every_n_hours=17, + device_fn=device_fn) self.assertEqual(11, config.tf_random_seed) self.assertEqual(12, config.save_summary_steps) self.assertEqual(14, config.save_checkpoints_secs) self.assertEqual(session_config, config.session_config) self.assertEqual(16, config.keep_checkpoint_max) self.assertEqual(17, config.keep_checkpoint_every_n_hours) + self.assertEqual(device_fn, config.device_fn) def test_init_none_value(self): config = run_config_lib.RunConfig( @@ -193,7 +205,8 @@ class RunConfigTest(test.TestCase): save_checkpoints_steps=None, session_config=None, keep_checkpoint_max=None, - keep_checkpoint_every_n_hours=None) + keep_checkpoint_every_n_hours=None, + device_fn=None) self.assertIsNone(config.tf_random_seed) self.assertIsNone(config.model_dir) self.assertIsNone(config.save_summary_steps) @@ -202,6 +215,7 @@ class RunConfigTest(test.TestCase): self.assertIsNone(config.session_config) self.assertIsNone(config.keep_checkpoint_max) self.assertIsNone(config.keep_checkpoint_every_n_hours) + self.assertIsNone(config.device_fn) def test_init_invalid_values(self): with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR): @@ -220,6 +234,8 @@ class RunConfigTest(test.TestCase): run_config_lib.RunConfig(keep_checkpoint_every_n_hours=0) with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR): run_config_lib.RunConfig(tf_random_seed=1.0) + with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR): + run_config_lib.RunConfig(device_fn=lambda x: ""/cpu:0"") class RunConfigDistributedSettingTest(test.TestCase): ",0,train f1406b4d064b56d1fc51b8ba88b91b8ddbed8b48,tensorflow/tensorflow,Switch from SimplePhilox to SingleSampleAdapter,sampling_dataset_op.cc,"@@ -138,8 +138,8 @@ class SamplingDatasetOp::Dataset : public DatasetBase { void ResetRngs() EXCLUSIVE_LOCKS_REQUIRED(mu_) { // Reset the generators based on the current iterator seeds. parent_generator_ = random::PhiloxRandom(seed_, seed2_); - generator_ = random::SimplePhilox(&parent_generator_); - + generator_ = + random::SingleSampleAdapter(&parent_generator_); generator_.Skip(num_random_samples_); } @@ -188,13 +188,17 @@ class SamplingDatasetOp::Dataset : public DatasetBase { float Random() { mutex_lock l(mu_); num_random_samples_++; - auto out = generator_.RandFloat(); - return out; + uint32 random_uint = generator_(); + + // PhiloxRandom returns 32-bit unsigned ints. Convert to float in [0,1) + // using the same method that the RandomUniform op uses. + return random::Uint32ToFloat(random_uint); } // random util random::PhiloxRandom parent_generator_ GUARDED_BY(mu_); - random::SimplePhilox generator_ GUARDED_BY(mu_); + random::SingleSampleAdapter generator_ + GUARDED_BY(mu_); int64 num_random_samples_ GUARDED_BY(mu_) = 0; }; ",0,test f1406b4d064b56d1fc51b8ba88b91b8ddbed8b48,tensorflow/tensorflow,Switch from SimplePhilox to SingleSampleAdapter,simple_philox.h,"@@ -66,9 +66,6 @@ class SimplePhilox { // range [0,2^max_log-1] with bias towards smaller numbers. uint32 Skewed(int max_log); - // Skip ahead `num_skips` entries in the stream of random numbers - void Skip(uint64 num_skips) { single_.Skip(num_skips); } - private: SingleSampleAdapter single_; }; ",0,test 5febf24c804c692b7444b12cbea8c63fe6d06f31,tensorflow/tensorflow,"[XLA:GPU] [NFC] Refactor the code to allocate buffers for a given BufferAssignment PiperOrigin-RevId: 314453117 Change-Id: I4be382808f1026d6136967de8954627f408db15b",gpu_executable.cc,"@@ -320,49 +320,64 @@ GpuExecutable::ResolveConstantGlobals(se::Stream* stream) { return &module_globals_.emplace(executor, std::move(globals)).first->second; } +StatusOr GpuExecutable::BufferForAllocation( + absl::Span arguments, + const GpuExecutable::BufferAllocToDeviceMemoryMap* globals, + const BufferAllocation& allocation, + se::DeviceMemoryAllocator* const memory_allocator, int device_ordinal, + int64 arg_idx) { + if (allocation.is_thread_local()) { + return se::DeviceMemoryBase{}; + } else if (allocation.is_entry_computation_parameter()) { + auto param_no = allocation.parameter_number(); + se::DeviceMemoryBase registered_buffer = + arguments[param_no] + .Buffer(allocation.param_shape_index()) + .AsDeviceMemoryBase(); + if (registered_buffer.is_null() && registered_buffer.size() > 0) { + return FailedPrecondition( + ""Cannot run XLA computation because pointer to (sub-)buffer at "" + ""index %s of parameter %d was null. All pointers to "" + ""(sub-)buffers must not be null, unless the (sub-)buffer has "" + ""zero elements."", + allocation.param_shape_index().ToString(), param_no); + } + return registered_buffer; + } else if (allocation.is_constant()) { + return FindOrDie(*globals, arg_idx); + } else { + // Allocate each allocation that might escape, or is the temp buffer. + CHECK(allocation.maybe_live_out() || allocation.IsPreallocatedTempBuffer()); + const int64 buffer_size = allocation.size(); + se::DeviceMemoryBase buffer_address; + if (buffer_size > 0) { + TF_ASSIGN_OR_RETURN( + se::OwningDeviceMemory buffer, + memory_allocator->Allocate(device_ordinal, buffer_size)); + buffer_address = buffer.Release(); + } + return buffer_address; + } +} + StatusOr GpuExecutable::GenerateBufferAllocations( absl::Span arguments, const GpuExecutable::BufferAllocToDeviceMemoryMap* globals, se::DeviceMemoryAllocator* const memory_allocator, se::StreamExecutor* executor) { - absl::flat_hash_map - registered_buffers; tensorflow::profiler::TraceMe hlo_module_activity( [&] { return std::string(""Build buffer allocations""); }, tensorflow::profiler::TraceMeLevel::kInfo); const int64 num_buffers = assignment_->Allocations().size(); - std::vector buffers(num_buffers); - for (BufferAllocation::Index i = 0; i < num_buffers; ++i) { - const BufferAllocation& allocation = assignment_->GetAllocation(i); - if (allocation.is_entry_computation_parameter()) { - auto param_no = allocation.parameter_number(); - se::DeviceMemoryBase buffer = arguments[param_no] - .Buffer(allocation.param_shape_index()) - .AsDeviceMemoryBase(); - - // All top-level buffers and sub-buffers must have an explicit, non-null - // pointer, except for zero-sized buffers, which may be null. - if (buffer.is_null() && buffer.size() > 0) { - return FailedPrecondition( - ""Cannot run XLA computation because pointer to (sub-)buffer at "" - ""index %s of parameter %d was null. All pointers to "" - ""(sub-)buffers must not be null, unless the (sub-)buffer has "" - ""zero elements."", - allocation.param_shape_index().ToString(), param_no); - } - - InsertOrDie(®istered_buffers, i, buffer); - } - - if (allocation.is_constant()) { - InsertOrDie(®istered_buffers, i, FindOrDie(*globals, i)); - } - } - - int device_ordinal = executor->device_ordinal(); - for (BufferAllocation::Index i = 0; i < num_buffers; ++i) { + std::vector buffers; + buffers.reserve(num_buffers); + for (int64 i = 0; i < num_buffers; ++i) { const BufferAllocation& allocation = assignment_->GetAllocation(i); + TF_ASSIGN_OR_RETURN( + se::DeviceMemoryBase buffer, + BufferForAllocation(arguments, globals, allocation, memory_allocator, + executor->device_ordinal(), i)); const int64 expected_alignment = [&] { if (allocation.is_entry_computation_parameter()) { return kEntryParameterAlignBytes; @@ -372,50 +387,17 @@ StatusOr GpuExecutable::GenerateBufferAllocations( return kXlaAllocatedBufferAlignBytes; } }(); - - // If buffer #i's address is already registered (e.g. external arguments or - // result buffers), use that registered buffer. - if (se::DeviceMemoryBase* address = - tensorflow::gtl::FindOrNull(registered_buffers, i)) { - if (reinterpret_cast(address->opaque()) % expected_alignment != - 0) { - return InternalError( - ""Address of registered buffer %d must be a multiple of %x, but "" - ""was %p"", - i, kEntryParameterAlignBytes, address->opaque()); - } - CHECK_LT(i, buffers.size()); - buffers[i] = *address; - continue; - } - - // Allocate each allocation that might escape, or is the temp buffer. - if (allocation.maybe_live_out() || allocation.IsPreallocatedTempBuffer()) { - const int64 buffer_size = allocation.size(); - se::DeviceMemoryBase buffer_address; - if (buffer_size > 0) { - TF_ASSIGN_OR_RETURN( - se::OwningDeviceMemory buffer, - memory_allocator->Allocate(device_ordinal, buffer_size)); - if (reinterpret_cast(buffer->opaque()) % - expected_alignment != + if (!buffer.is_null() && + reinterpret_cast(buffer.opaque()) % expected_alignment != 0) { - return InternalError( - ""Address returned by memory_allocator->Allocate must be a "" - ""multiple of 0x%x, but was %p"", - kXlaAllocatedBufferAlignBytes, buffer->opaque()); - } - // We do manual memory management within BufferAllocations. Be sure not - // to do a TF_RETURN_IF_ERROR between this line and the - // buffer_allocations.SetBuffer(buffer_address) call below! - buffer_address = buffer.Release(); - } - - CHECK_LT(i, buffers.size()); - buffers[i] = buffer_address; + return InternalError( + ""Address of buffer %d must be a multiple of %x, but "" + ""was %p"", + i, expected_alignment, buffer.opaque()); } + buffers.push_back(buffer); } - return {{buffers, device_ordinal, memory_allocator}}; + return {{buffers, executor->device_ordinal(), memory_allocator}}; } StatusOr GpuExecutable::ExecuteAsyncOnStream( @@ -457,13 +439,11 @@ StatusOr GpuExecutable::ExecuteAsyncOnStream( HloInstruction* root = hlo_module_->entry_computation()->root_instruction(); auto device_ordinal = executor->device_ordinal(); - ScopedShapedBuffer shaped_buffer(root->shape(), root->shape(), - memory_allocator, device_ordinal); + ExecutionOutput result(root->shape(), root->shape(), memory_allocator, + device_ordinal); - // Copy DeviceMemoryBase values which contain the array(s) of the result into - // the respective location in ShapedBuffer. std::set buffers_in_result; - for (auto& p : shaped_buffer.buffers()) { + for (auto& p : result.MutableResult()->buffers()) { const ShapeIndex& index = p.first; se::DeviceMemoryBase& device_memory = p.second; const auto& sources = GetRootValueSet().element(index); @@ -522,7 +502,7 @@ StatusOr GpuExecutable::ExecuteAsyncOnStream( } } } - return ExecutionOutput(std::move(shaped_buffer), std::move(buffers_to_free)); + return result; } const InstructionValueSet& GpuExecutable::GetRootValueSet() const { ",0,test 5febf24c804c692b7444b12cbea8c63fe6d06f31,tensorflow/tensorflow,"[XLA:GPU] [NFC] Refactor the code to allocate buffers for a given BufferAssignment PiperOrigin-RevId: 314453117 Change-Id: I4be382808f1026d6136967de8954627f408db15b",gpu_executable.h,"@@ -129,10 +129,17 @@ class GpuExecutable : public Executable { se::DeviceMemoryAllocator* const memory_allocator, se::StreamExecutor* executor); - // The LLVM IR, in string format, of the unoptimized module generated for this - // GpuExecutable. We save a string instead of an llvm::Module* because leaving - // llvm::Module* in a singleton can cause the heap checker to emit false - // positives. + StatusOr BufferForAllocation( + absl::Span arguments, + const GpuExecutable::BufferAllocToDeviceMemoryMap* globals, + const BufferAllocation& allocation, + se::DeviceMemoryAllocator* const memory_allocator, int device_ordinal, + int64 arg_idx); + + // The LLVM IR, in string format, of the unoptimized module generated for + // this GpuExecutable. We save a string instead of an llvm::Module* because + // leaving llvm::Module* in a singleton can cause the heap checker to emit + // false positives. // // This string should be modified only before ExecuteOnStream. string ir_module_string_; ",0,test 8e195d991452af81b467192e538fe8f459d7c9c0,tensorflow/tensorflow,"Update mnist.py clean and clear",mnist.py,"@@ -261,17 +261,13 @@ def read_data_sets(train_dir, train_images = train_images[validation_size:] train_labels = train_labels[validation_size:] - train = DataSet( - train_images, train_labels, dtype=dtype, reshape=reshape, seed=seed) - validation = DataSet( - validation_images, - validation_labels, - dtype=dtype, - reshape=reshape, - seed=seed) - test = DataSet( - test_images, test_labels, dtype=dtype, reshape=reshape, seed=seed) - + + options = dict(dtype=dtype, reshape=reshape, seed=seed) + + train = DataSet(train_images, train_labels, **options) + validation = DataSet(validation_images, validation_labels, **options) + test = DataSet(test_images, test_labels, **options) + return base.Datasets(train=train, validation=validation, test=test) ",0,train 133867e35f75360d5df83cfe03df70115a670264,tensorflow/tensorflow,"Add a tf_executor.graph pruning pass In a tf_executor.graph block, only the operations contributing to the fetch results need to be preserved regardless of side-effects. This ""dead-code elimination"" pass is made trivial by this property. PiperOrigin-RevId: 263066534",graph_pruning.cc,"@@ -0,0 +1,87 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include ""llvm/ADT/STLExtras.h"" +#include ""llvm/ADT/SmallVector.h"" +#include ""llvm/ADT/iterator_range.h"" +#include ""mlir/IR/Block.h"" // TF:local_config_mlir +#include ""mlir/IR/Builders.h"" // TF:local_config_mlir +#include ""mlir/IR/Location.h"" // TF:local_config_mlir +#include ""mlir/IR/Operation.h"" // TF:local_config_mlir +#include ""mlir/Pass/Pass.h"" // TF:local_config_mlir +#include ""mlir/Pass/PassRegistry.h"" // TF:local_config_mlir +#include ""tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"" +#include ""tensorflow/compiler/mlir/tensorflow/transforms/passes.h"" + +namespace mlir { +namespace tf_executor { + +// Prunes a TF graph eliminating dead nodes. +void prune_graph(GraphOp graph) { + // A graph has a single block which forms a DAG: nodes that aren't reachable + // from the `fetch` operands can be eliminated. + + // Delete unreachable node from the graph. We traverse it in reverse order so + // that we just have to check that a node does not have any users to delete + // it. + for (Operation &op : llvm::make_early_inc_range( + llvm::drop_begin(llvm::reverse(graph.GetBody()), 1))) { + // NextIteration.Sink operation are handled specially: they are live if the + // source is live, and removed when the source is processed. + if (auto sinkOp = dyn_cast(op)) continue; + + // For NextIteration.Source, we just check that the source does not have any + // other user than the sink. + if (auto sourceOp = dyn_cast(op)) { + Operation *sink = sourceOp.GetSink().getOperation(); + if (llvm::any_of(sourceOp.getResults(), [sink](Value *result) { + return llvm::any_of(result->getUsers(), [sink](Operation *user) { + return user != sink; + }); + })) + continue; + + // No other users than the sink, erase the pair! + sink->erase(); + sourceOp.erase(); + continue; + } + + // General case. + if (op.use_empty()) op.erase(); + } +} + +namespace { + +// This transformation pass prunes a TF graph eliminating dead-nodes. +struct GraphPruning : public FunctionPass { + void runOnFunction() override { + getFunction().walk( + [](tf_executor::GraphOp graph) { prune_graph(graph); }); + } +}; + +} // namespace + +FunctionPassBase *CreateTFExecutorGraphPruningPass() { + return new GraphPruning(); +} + +static PassRegistration pass( + ""tf-executor-graph-pruning"", ""Prune a TensorFlow Graph from dead nodes.""); + +} // namespace tf_executor +} // namespace mlir ",0,train 133867e35f75360d5df83cfe03df70115a670264,tensorflow/tensorflow,"Add a tf_executor.graph pruning pass In a tf_executor.graph block, only the operations contributing to the fetch results need to be preserved regardless of side-effects. This ""dead-code elimination"" pass is made trivial by this property. PiperOrigin-RevId: 263066534",passes.h,"@@ -37,9 +37,17 @@ std::unique_ptr CreateRaiseTFControlFlowPass(); } // namespace TFControlFlow namespace tf_executor { +class GraphOp; + // Create a pass to merge IslandOps from TFExecutor dialect. std::unique_ptr CreateTFExecutorIslandCoarseningPass(); +// Create a pass to prune tf_executor.graph from dead nodes. +FunctionPassBase* CreateTFExecutorGraphPruningPass(); + +// Prune a tf_executor.graph operation from dead nodes. +void prune_graph(GraphOp graph); + } // namespace tf_executor namespace TFDevice { ",0,train 7f12947e4f31cdf9a0cca291a653980fa204d686,tensorflow/tensorflow,"Don't provide a padding op for strings: the code never did what could be reasonably expected (i.e. pad with spaces), and doesn't compile anymore. Change: 115936929",pad_op.cc,"@@ -136,7 +136,7 @@ class PadOp : public OpKernel { .HostMemory(""paddings""), \ PadOp) -TF_CALL_ALL_TYPES(REGISTER_KERNEL); +TF_CALL_POD_TYPES(REGISTER_KERNEL); #undef REGISTER_KERNEL #if GOOGLE_CUDA ",0,test 1bbca7cfb6f01eb3536577102e3e0088c57c6a31,tensorflow/tensorflow,"tf.max will return -inf if the input is empty, which results in exception in tf.range() below. PiperOrigin-RevId: 235773577",array_ops.py,"@@ -3091,6 +3091,7 @@ def sequence_mask(lengths, maxlen=None, dtype=dtypes.bool, name=None): if maxlen is None: maxlen = gen_math_ops._max(lengths, _all_dimensions(lengths)) + maxlen = gen_math_ops.maximum(constant(0, maxlen.dtype), maxlen) else: maxlen = ops.convert_to_tensor(maxlen) if maxlen.get_shape().ndims is not None and maxlen.get_shape().ndims != 0: ",0,train 5a29a86050ae0c093dbf98e9321bdbd766f13096,tensorflow/tensorflow,"[XLA] Simplify dynamic-slice(iota, index) => index. - This is a common pattern generated by SPMD partitioning for 1D sharding. PiperOrigin-RevId: 370084429 Change-Id: Ie924f1356b5ce7f9efeb389af31fafdd97eb05b1",algebraic_simplifier.cc,"@@ -2683,6 +2683,26 @@ Status AlgebraicSimplifierVisitor::HandleClamp(HloInstruction* clamp) { return Status::OK(); } + // Eliminate redundant clamping of replica-id or partition-id. + if ((Match(to_clamp, m::PartitionId()) || Match(to_clamp, m::ReplicaId())) && + Match(clamp_lower_bound, m::ConstantScalar(0U)) && + Match(clamp_upper_bound, m::ConstantScalar())) { + int64 upper_bound = Cast(clamp_upper_bound) + ->literal() + .GetFirstElement(); + const HloModuleConfig& config = clamp->GetModule()->config(); + int64 runtime_bound = Match(to_clamp, m::PartitionId()) + ? config.num_partitions() + : config.replica_count(); + + // If num_partitions or replica_count is 1, infer it as unknown. + // pid/rid < runtime_bound => The clamp(0, pid/rid, upper_bound) is + // redundant if the runtime_bound <= upper_bound + 1; + if (runtime_bound != 1 && runtime_bound <= upper_bound + 1) { + return ReplaceInstruction(clamp, to_clamp); + } + } + return Status::OK(); } @@ -4416,6 +4436,50 @@ Status AlgebraicSimplifierVisitor::HandleDynamicSlice( HloInstruction::CreateSlice(dynamic_slice->shape(), operand, slice_starts, slice_limits, slice_strides)); } + + // Convert the dynamic slice of an iota to just a reference to the index + // (possibly clamped). Index is always a scalar integer. Output should be a + // rank 1 array of size 1 with element type matching that of the scalar index + // (except the signedness). + const PrimitiveType element_type = dynamic_slice->shape().element_type(); + if (operand->opcode() == HloOpcode::kIota && operand->shape().rank() == 1 && + dynamic_slice->shape().rank() == 1 && + dynamic_slice->shape().dimensions(0) == 1 && + (element_type == S32 || element_type == U32)) { + // This dynamic_slice will have a single start_index operand (since its + // operand is rank 1). + HloInstruction* index = dynamic_slice->mutable_operand(1); + const PrimitiveType index_type = index->shape().element_type(); + + auto create_constant = [&](int64 value) { + if (index_type == S32) { + return MakeScalarLike(index, value); + } else { + return MakeScalarLike(index, value); + } + }; + + if (index_type == S32 || index_type == U32) { + // Clamp the index to the range of the iota. + int64 iota_size = operand->shape().dimensions(0); + HloInstruction* low = create_constant(0); + HloInstruction* high = create_constant(iota_size - 1); + HloInstruction* clamped = + computation_->AddInstruction(HloInstruction::CreateTernary( + index->shape(), HloOpcode::kClamp, low, index, high)); + Shape reshape_shape = ShapeUtil::MakeShape(index_type, {1}); + HloInstruction* result = computation_->AddInstruction( + HloInstruction::CreateReshape(reshape_shape, clamped)); + + if (index_type != element_type) { + result = computation_->AddInstruction( + HloInstruction::CreateConvert(dynamic_slice->shape(), result)); + } + + return ReplaceInstruction(dynamic_slice, result); + } + } + return Status::OK(); } ",0,train 5a29a86050ae0c093dbf98e9321bdbd766f13096,tensorflow/tensorflow,"[XLA] Simplify dynamic-slice(iota, index) => index. - This is a common pattern generated by SPMD partitioning for 1D sharding. PiperOrigin-RevId: 370084429 Change-Id: Ie924f1356b5ce7f9efeb389af31fafdd97eb05b1",algebraic_simplifier_test.cc,"@@ -7373,5 +7373,53 @@ ENTRY f { EXPECT_EQ(pad->padding_config().dimensions(0).edge_padding_high(), 0); } +// Test folding of dynamic_slice(iota, index) -> clamp(index, 0, size-1) +TEST_F(AlgebraicSimplifierTest, DynamicSliceOfIota) { + const char* hlo_string = R""( +HloModule module + +ENTRY f { + %cst = s32[2]{0} constant({0, 1}) + %index = u32[] parameter(0) + ROOT %dynamic-slice = s32[1]{0} dynamic-slice(s32[2]{0} %cst, u32[] %index), + dynamic_slice_sizes={1} +} +)""; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + AlgebraicSimplifier simplifier(default_options_); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + VLOG(2) << ""After rewrite \n"" << module->ToString(); + + EXPECT_THAT(module->entry_computation()->root_instruction(), + GmockMatch(m::Convert(m::Reshape( + m::Clamp(m::Constant(), m::Parameter(0), m::Constant()))))); +} + +// Test folding of clamp(pid, 0, limit) -> pid +TEST_F(AlgebraicSimplifierTest, ClampOfPartitionId) { + const char* hlo_string = R""( +HloModule module + +ENTRY f { + %pid = u32[] partition-id() + %low = u32[] constant(0) + %high = u32[] constant(5) + ROOT %c = u32[] clamp(%low, %pid, %high) +} +)""; + + TF_ASSERT_OK_AND_ASSIGN( + auto module, ParseAndReturnVerifiedModule(hlo_string, /*replica_count=*/1, + /*num_partitions=*/6)); + AlgebraicSimplifier simplifier(default_options_); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + VLOG(2) << ""After rewrite \n"" << module->ToString(); + + EXPECT_THAT(module->entry_computation()->root_instruction(), + GmockMatch(m::PartitionId())); +} + } // namespace } // namespace xla ",0,train 5a29a86050ae0c093dbf98e9321bdbd766f13096,tensorflow/tensorflow,"[XLA] Simplify dynamic-slice(iota, index) => index. - This is a common pattern generated by SPMD partitioning for 1D sharding. PiperOrigin-RevId: 370084429 Change-Id: Ie924f1356b5ce7f9efeb389af31fafdd97eb05b1",hlo_matchers.h,"@@ -209,6 +209,7 @@ HLO_MATCHER(AllToAll); HLO_MATCHER(And); HLO_MATCHER(BatchNormGrad); HLO_MATCHER(Bitcast); +HLO_MATCHER(BitcastConvert); HLO_MATCHER(Broadcast); HLO_MATCHER(Call); HLO_MATCHER(Ceil); ",0,train 5a29a86050ae0c093dbf98e9321bdbd766f13096,tensorflow/tensorflow,"[XLA] Simplify dynamic-slice(iota, index) => index. - This is a common pattern generated by SPMD partitioning for 1D sharding. PiperOrigin-RevId: 370084429 Change-Id: Ie924f1356b5ce7f9efeb389af31fafdd97eb05b1",pattern_matcher.h,"@@ -1982,6 +1982,8 @@ XLA_NULLOP_PATTERN(Constant) XLA_NULLOP_PATTERN(Parameter) XLA_NULLOP_PATTERN(Iota) XLA_NULLOP_PATTERN(Rng) +XLA_NULLOP_PATTERN(PartitionId) +XLA_NULLOP_PATTERN(ReplicaId) #undef XLA_NULLOP_PATTERN // Helpers for unary instructions. ",0,train 659c981a3556c6424237eacd0bf4cdc86f228f16,tensorflow/tensorflow,"Fix error when trying to fit a model with a nested model that has been compiled with metrics. PiperOrigin-RevId: 254472839",training.py,"@@ -60,6 +60,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops.losses import util as tf_losses_utils from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training.tracking import base as trackable +from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils from tensorflow.python.util import nest from tensorflow.python.util import serialization from tensorflow.python.util.tf_export import keras_export @@ -381,7 +382,9 @@ class Model(network.Network): metrics = [] if self._is_compiled: metrics += self._compile_metric_functions - return metrics + super(Model, self).metrics + metrics.extend(self._metrics) + metrics.extend(_get_metrics_from_layers(self._layers)) + return metrics @property def metrics_names(self): @@ -3113,3 +3116,27 @@ def _convert_scipy_sparse_tensor(value, expected_input): return sparse_tensor.SparseTensor(indices, data, shape) else: return value + + +def _get_metrics_from_layers(layers): + """"""Returns list of metrics from the given layers. + + This will not include the `compile` metrics of a model layer. + + Arguments: + layers: List of layers. + + Returns: + List of metrics. + """""" + metrics = [] + layers = trackable_layer_utils.filter_empty_layer_containers(layers) + for layer in layers: + if isinstance(layer, Model): + # We cannot call 'metrics' on the model because we do not want to + # include the metrics that were added in compile API of a nested model. + metrics.extend(layer._metrics) # pylint: disable=protected-access + metrics.extend(_get_metrics_from_layers(layer.layers)) + else: + metrics.extend(layer.metrics) + return metrics ",0,train 659c981a3556c6424237eacd0bf4cdc86f228f16,tensorflow/tensorflow,"Fix error when trying to fit a model with a nested model that has been compiled with metrics. PiperOrigin-RevId: 254472839",training_test.py,"@@ -3123,6 +3123,51 @@ class TestTrainingWithMetrics(keras_parameterized.TestCase): for key in ['loss', 'mae_1', 'mae_2', 'mae_3']: self.assertAllClose(history.history[key], expected_val, 1e-3) + @keras_parameterized.run_all_keras_modes + def test_model_with_nested_compiled_model(self): + + class LayerWithAddMetric(keras.layers.Layer): + + def __init__(self): + super(LayerWithAddMetric, self).__init__() + self.dense = keras.layers.Dense(1, kernel_initializer='ones') + + def call(self, inputs): + outputs = self.dense(inputs) + self.add_metric( + math_ops.reduce_sum(outputs), name='mean', aggregation='mean') + return outputs + + x = keras.layers.Input(shape=(1,)) + y = LayerWithAddMetric()(x) + + inner_model = keras.models.Model(x, y) + inner_model.add_metric( + math_ops.reduce_sum(y), name='mean1', aggregation='mean') + + inner_model.compile( + 'sgd', + loss='mse', + metrics=[metrics_module.Accuracy('acc')], + run_eagerly=testing_utils.should_run_eagerly()) + + self.assertEqual([m.name for m in inner_model.metrics], + ['acc', 'mean', 'mean1']) + + x = keras.layers.Input(shape=[1]) + y = inner_model(x) + outer_model = keras.Model(x, y) + outer_model.add_metric( + math_ops.reduce_sum(y), name='mean2', aggregation='mean') + + outer_model.compile( + 'sgd', + loss='mse', + metrics=[metrics_module.Accuracy('acc2')], + run_eagerly=testing_utils.should_run_eagerly()) + self.assertEqual([m.name for m in outer_model.metrics], + ['acc2', 'mean', 'mean1', 'mean2']) + class BareUpdateLayer(keras.layers.Layer): ",0,train 6f968a3a59b2d11ac74e0c0d9921dc3d660e765c,tensorflow/tensorflow,"Remove all the save model related code in base_layer. They are not applicable for the v1 tf.layers case. PiperOrigin-RevId: 299962368 Change-Id: I7c6f4fb76ff3e5aa83d4fc0db817ed4e1b73a827",legacy_base_layer.py,"@@ -61,7 +61,6 @@ from tensorflow.python.frozen_keras.engine import node as node_module from tensorflow.python.frozen_keras.utils import generic_utils from tensorflow.python.frozen_keras.utils import layer_utils from tensorflow.python.frozen_keras.utils import tf_utils -from tensorflow.python.keras.saving.saved_model import layer_serialization # A module that only depends on `keras.layers` import these from here. from tensorflow.python.keras.utils.generic_utils import to_snake_case # pylint: disable=unused-import from tensorflow.python.keras.utils.tf_utils import is_tensor_or_tensor_list # pylint: disable=unused-import @@ -2575,26 +2574,6 @@ class LegacyBaseLayer(module.Module): # SavedModel properties. Please see keras/saving/saved_model for details. - @property - def _trackable_saved_model_saver(self): - return layer_serialization.LayerSavedModelSaver(self) - - @property - def _object_identifier(self): - return self._trackable_saved_model_saver.object_identifier - - @property - def _tracking_metadata(self): - return self._trackable_saved_model_saver.tracking_metadata - - def _list_extra_dependencies_for_serialization(self, serialization_cache): - return (self._trackable_saved_model_saver - .list_extra_dependencies_for_serialization(serialization_cache)) - - def _list_functions_for_serialization(self, serialization_cache): - return (self._trackable_saved_model_saver - .list_functions_for_serialization(serialization_cache)) - def __getstate__(self): # Override to support `copy.deepcopy` and pickling. # Thread-local objects cannot be copied in Python 3, so pop these. ",0,train 3c98b456afb144832294df944aa01b80e6004a0f,tensorflow/tensorflow,"Switch `FastParseSingleExample()` to accept an `absl::string_view`. PiperOrigin-RevId: 261806721",example_proto_fast_parsing.cc,"@@ -1273,8 +1273,8 @@ Status FastParseExample(const Config& config, return Status::OK(); } -Status FastParseSingleExample(const Config& config, const string& serialized, - Result* result) { +Status FastParseSingleExample(const Config& config, + absl::string_view serialized, Result* result) { DCHECK(result != nullptr); // Check config so we can safely CHECK(false) in switches on config.*.dtype for (auto& c : config.sparse) { ",0,train 3c98b456afb144832294df944aa01b80e6004a0f,tensorflow/tensorflow,"Switch `FastParseSingleExample()` to accept an `absl::string_view`. PiperOrigin-RevId: 261806721",example_proto_fast_parsing.h,"@@ -107,7 +107,7 @@ Status FastParseExample(const FastParseExampleConfig& config, typedef FastParseExampleConfig FastParseSingleExampleConfig; Status FastParseSingleExample(const FastParseSingleExampleConfig& config, - const string& serialized, Result* result); + absl::string_view serialized, Result* result); // Parses a batch of serialized SequenceExample protos and converts them into // result according to given config. ",0,train 6b0ab72dfcd8dd608b2a056a156be960b1abe878,tensorflow/tensorflow,"Integrate LLVM at https://github.com/llvm/llvm-project/commit/c2171457e281 PiperOrigin-RevId: 306950518 Change-Id: I6da501ea9c226bfeb35f8854b6a12dc7a42938df",xla_legalize_to_linalg.cc,"@@ -134,7 +134,7 @@ class PointwiseToLinalgConverter : public OpConversionPattern { rewriter.getI64IntegerAttr(bodyResultTypes.size()), // args_out rewriter.getArrayAttr(indexingMaps), GetNParallelLoopsAttrs(nloops, &rewriter), - /*doc=*/nullptr, /*fun=*/nullptr, /*library_call=*/nullptr); + /*doc=*/nullptr, /*library_call=*/nullptr); // Add a block to the region. auto* region = &linalgOp.region(); @@ -218,7 +218,7 @@ class DataMovementOpConverter : public OpConversionPattern { loc, isLHLO ? ArrayRef{} : resultType, args, rewriter.getI64IntegerAttr(1), rewriter.getI64IntegerAttr(1), indexingMapsAttr, GetNParallelLoopsAttrs(nloops, &rewriter), - /*doc=*/nullptr, /*fun=*/nullptr, /*library_call=*/nullptr); + /*doc=*/nullptr, /*library_call=*/nullptr); auto* region = &linalgOp.region(); auto* block = rewriter.createBlock(region, region->end()); @@ -400,7 +400,7 @@ class IotaConverter : public OpConversionPattern { rewriter.getI64IntegerAttr(1), // args_out rewriter.getArrayAttr(indexingMaps), GetNParallelLoopsAttrs(nloops, &rewriter), - /*doc=*/nullptr, /*fun=*/nullptr, /*library_call=*/nullptr); + /*doc=*/nullptr, /*library_call=*/nullptr); // Add a block to the region. auto* region = &linalgOp.region(); ",0,train c487a28e5e84835ef5cd36daa09ddfad8e99c5cb,tensorflow/tensorflow,"Fix comment typos and formatting PiperOrigin-RevId: 218836217",Identifier.h,"@@ -52,14 +52,10 @@ public: const char *c_str() const { return pointer; } /// Return a pointer to the start of the string data. - const char *data() const { - return pointer; - } + const char *data() const { return pointer; } /// Return the number of bytes in this string. - unsigned size() const { - return ::strlen(pointer); - } + unsigned size() const { return ::strlen(pointer); } /// Return true if this identifier is the specified string. bool is(StringRef string) const { return strref().equals(string); } ",0,train c487a28e5e84835ef5cd36daa09ddfad8e99c5cb,tensorflow/tensorflow,"Fix comment typos and formatting PiperOrigin-RevId: 218836217",OpDefinition.h,"@@ -311,7 +311,7 @@ protected: } }; -/// This class provides the API for ops that are known to have exactly one +/// This class provides the API for ops that are known to have no /// SSA operand. template class ZeroOperands : public TraitBase { @@ -473,7 +473,7 @@ public: } }; -/// This class provides return value APIs for ops that are known to have a +/// This class provides return value APIs for ops that are known to have /// zero results. template class ZeroResult : public TraitBase { ",0,train c487a28e5e84835ef5cd36daa09ddfad8e99c5cb,tensorflow/tensorflow,"Fix comment typos and formatting PiperOrigin-RevId: 218836217",OperationSupport.h,"@@ -134,7 +134,7 @@ private: const OperationProperties opProperties; }; -/// NamedAttribute is a used for operation attribute lists, it holds an +/// NamedAttribute is used for operation attribute lists, it holds an /// identifier for the name and a value for the attribute. The attribute /// pointer should always be non-null. using NamedAttribute = std::pair; ",0,train c487a28e5e84835ef5cd36daa09ddfad8e99c5cb,tensorflow/tensorflow,"Fix comment typos and formatting PiperOrigin-RevId: 218836217",StmtVisitor.h,"@@ -212,7 +212,7 @@ public: // and try visiting the subtype. All of this should be inlined perfectly, // because there are no virtual functions to get in the way. - // When visiting a specific stmt directly during a walk, these methods get + // When visiting a specific stmt directly during a walk, these methods get // called. These are typically O(1) complexity and shouldn't be recursively // processing their descendants in some way. When using RetTy, all of these // need to be overridden. ",0,train 0d6939bd371b3558278720f06a03083c28c1b0b7,tensorflow/tensorflow,"[mhlo] Import tuple-return from mhlo::mapOp's reducer-block to flattened return-val. During import (from HLO to MHLO) we flatten the tuple return-type of region-blocks. MHLO mapOp::verifier ensures that the flattened return-type is comaptible with the op-specification. PiperOrigin-RevId: 433874864",hlo_function_importer.cc,"@@ -1190,8 +1190,9 @@ StatusOr HloFunctionImporter::ImportInstructionImpl( auto op = func_builder->create( loc, result_type, operands, ConvertDimensions(instruction->dimensions())); - TF_RETURN_IF_ERROR( - ImportAsRegion(*instruction->to_apply(), &op.computation())); + TF_RETURN_IF_ERROR(ImportAsRegion(*instruction->to_apply(), + &op.computation(), + /*flatten_region_arg_tuple=*/true)); return op.getOperation(); } case HloOpcode::kConvolution: { ",0,train 3bdccb536b4cc96e66ff0452e11c21bfff44376e,tensorflow/tensorflow,"Extracting hand-coded real number support out of generated C++ Conj op and into the only file (math_grad.cc) where it was used. This allows the current set of C++ ops to be fully auto-generated. PiperOrigin-RevId: 368748247 Change-Id: I5634d14022f2456bbd09194774c41916c2115f3d",math_grad.cc,"@@ -22,7 +22,6 @@ limitations under the License. using std::vector; using tensorflow::ops::AddV2; -using tensorflow::ops::Conj; using tensorflow::ops::Div; using tensorflow::ops::DivNoNan; using tensorflow::ops::MatMul; @@ -35,6 +34,20 @@ namespace tensorflow { namespace gradients { namespace { +static Status SafeConj(AbstractContext* ctx, AbstractTensorHandle* const input, + AbstractTensorHandle** output, const char* name) { + auto dtype = input->DataType(); + if (DataTypeIsFloating(BaseType(dtype)) || + DataTypeIsInteger(BaseType(dtype))) { + return tensorflow::ops::Identity(ctx, input, output, name); + } else if (!DataTypeIsComplex(BaseType(dtype)) && + BaseType(dtype) != DT_VARIANT) { + return errors::InvalidArgument( + ""Expected numeric or variant tensor, got dtype "", dtype); + } + return tensorflow::ops::Conj(ctx, input, output, name); +} + class AddGradientFunction : public GradientFunction { public: Status Compute(AbstractContext* ctx, @@ -63,7 +76,7 @@ class ExpGradientFunction : public GradientFunction { absl::Span grad_inputs) override { AbstractTensorHandle* conj_output; std::string name = ""Conj_Exp_Grad""; - TF_RETURN_IF_ERROR(Conj(ctx, exp_.get(), &conj_output, name.c_str())); + TF_RETURN_IF_ERROR(SafeConj(ctx, exp_.get(), &conj_output, name.c_str())); AbstractTensorHandlePtr conj_output_releaser(conj_output); name = ""Mul_Exp_Grad""; @@ -131,13 +144,13 @@ class MatMulGradientFunction : public GradientFunction { AbstractTensorHandle* conj_output; std::string name = ""Conj_A_MatMul_Grad""; TF_RETURN_IF_ERROR( - Conj(ctx, forward_inputs_[0], &conj_output, name.c_str())); + SafeConj(ctx, forward_inputs_[0], &conj_output, name.c_str())); AbstractTensorHandlePtr A(conj_output); name = ""Conj_B_MatMul_Grad""; TF_RETURN_IF_ERROR( - Conj(ctx, forward_inputs_[1], &conj_output, name.c_str())); + SafeConj(ctx, forward_inputs_[1], &conj_output, name.c_str())); AbstractTensorHandlePtr B(conj_output); @@ -332,7 +345,7 @@ class Log1pGradientFunction : public GradientFunction { // Calculate conjugate of X std::string name = ""Conj_Log1p_Grad_X""; - TF_RETURN_IF_ERROR(Conj(ctx, X, &temp_output, name.c_str())); + TF_RETURN_IF_ERROR(SafeConj(ctx, X, &temp_output, name.c_str())); AbstractTensorHandlePtr Conj_X(temp_output); ",0,train 3bdccb536b4cc96e66ff0452e11c21bfff44376e,tensorflow/tensorflow,"Extracting hand-coded real number support out of generated C++ Conj op and into the only file (math_grad.cc) where it was used. This allows the current set of C++ ops to be fully auto-generated. PiperOrigin-RevId: 368748247 Change-Id: I5634d14022f2456bbd09194774c41916c2115f3d",math_ops.cc,"@@ -17,7 +17,6 @@ limitations under the License. #include ""tensorflow/c/eager/abstract_context.h"" #include ""tensorflow/c/eager/abstract_tensor_handle.h"" #include ""tensorflow/c/eager/tracing_utils.h"" -#include ""tensorflow/c/experimental/ops/array_ops.h"" #include ""tensorflow/core/framework/types.h"" #include ""tensorflow/core/platform/errors.h"" @@ -64,16 +63,6 @@ Status Mul(AbstractContext* ctx, AbstractTensorHandle* const x, // ``` Status Conj(AbstractContext* ctx, AbstractTensorHandle* const input, AbstractTensorHandle** output, const char* name) { - // Hand-coded optimization: - auto dtype = input->DataType(); - if (DataTypeIsFloating(BaseType(dtype)) || - DataTypeIsInteger(BaseType(dtype))) { - return Identity(ctx, input, output, name); - } else if (!DataTypeIsComplex(BaseType(dtype)) && - BaseType(dtype) != DT_VARIANT) { - return errors::InvalidArgument( - ""Expected numeric or variant tensor, got dtype "", dtype); - } AbstractOperationPtr op_ptr(ctx->CreateOperation()); TF_RETURN_IF_ERROR(op_ptr->Reset(""Conj"", /*raw_device_name=*/nullptr)); TF_RETURN_IF_ERROR(MaybeSetOpName(op_ptr.get(), name)); ",0,train 3b4e53b0739804af7e8f51412bac366dd842a3f1,tensorflow/tensorflow,"Add an options argument to EqualGraphDef and EqualNodeDef. Currently the only option is controlling whether internal attributes (whose names start with ""_"") are tested for equality. Change: 145362690",equal_graph_def.cc,"@@ -25,7 +25,7 @@ limitations under the License. namespace tensorflow { bool EqualGraphDef(const GraphDef& actual, const GraphDef& expected, - string* diff) { + string* diff, const EqualGraphDefOptions& options) { // Intentionally do not check that versions match so that this routine can // be used for less brittle golden file tests. @@ -44,7 +44,9 @@ bool EqualGraphDef(const GraphDef& actual, const GraphDef& expected, return false; } - if (!EqualNodeDef(*actual_iter->second, expected_node, diff)) return false; + if (!EqualNodeDef(*actual_iter->second, expected_node, diff, options)) { + return false; + } actual_index.erase(actual_iter); } @@ -75,8 +77,8 @@ string JoinStringField(const protobuf::RepeatedPtrField& f) { } // namespace -bool EqualNodeDef(const NodeDef& actual, const NodeDef& expected, - string* diff) { +bool EqualNodeDef(const NodeDef& actual, const NodeDef& expected, string* diff, + const EqualGraphDefOptions& options) { if (actual.name() != expected.name()) { if (diff != nullptr) { *diff = strings::StrCat(""Actual node name '"", actual.name(), @@ -156,13 +158,15 @@ bool EqualNodeDef(const NodeDef& actual, const NodeDef& expected, std::unordered_set actual_attr; for (const auto& a : actual.attr()) { - if (!a.first.empty() && a.first[0] == '_') { + if (options.ignore_internal_attrs && !a.first.empty() && + a.first[0] == '_') { continue; } actual_attr.insert(a.first); } for (const auto& e : expected.attr()) { - if (!e.first.empty() && e.first[0] == '_') { + if (options.ignore_internal_attrs && !e.first.empty() && + e.first[0] == '_') { continue; } ",0,train 3b4e53b0739804af7e8f51412bac366dd842a3f1,tensorflow/tensorflow,"Add an options argument to EqualGraphDef and EqualNodeDef. Currently the only option is controlling whether internal attributes (whose names start with ""_"") are tested for equality. Change: 145362690",equal_graph_def.h,"@@ -22,20 +22,27 @@ limitations under the License. namespace tensorflow { +struct EqualGraphDefOptions { + // Should internal attributes (attribute names that start with '_') be + // ignored? + bool ignore_internal_attrs = true; +}; + // Determines if actual and expected are equal, ignoring versions and ordering // of nodes, attrs, and control inputs. If the GraphDefs are different and // diff != nullptr, *diff is set to an explanation of the difference. Note that // we use node names to match up nodes between the graphs, and so the naming of // nodes must be consistent. bool EqualGraphDef(const GraphDef& actual, const GraphDef& expected, - string* diff); + string* diff, const EqualGraphDefOptions& options = {}); // Determines if actual and expected are equal, ignoring: ordering of -// attrs, internal attributes, and control inputs. +// attrs, internal attributes (if set in `options`), and control inputs. // // If the NodeDefs are different and // diff != nullptr, *diff is set to an explanation of the difference. -bool EqualNodeDef(const NodeDef& actual, const NodeDef& expected, string* diff); +bool EqualNodeDef(const NodeDef& actual, const NodeDef& expected, string* diff, + const EqualGraphDefOptions& options = {}); #define TF_EXPECT_GRAPH_EQ(expected, actual) \ do { \ ",0,train b265dd5fb3be0b7a860b5419c0a2285f9693ae6d,tensorflow/tensorflow,"Set the AssignedDevice of the lowered control flow nodes (Enter, Switch, Merge, NextIteration, Exit) to be the same as the assigned device of the input loop variable if that is available during lowering. PiperOrigin-RevId: 310578865 Change-Id: I7118c26054be2d8fb239c3ed03b9a3e5c4685ef6",lower_while_op.cc,"@@ -238,12 +238,14 @@ Status LowerWhileHelper::CreateEnterNodes() { TF_RETURN_IF_ERROR(while_op_->input_edges(&edges)); for (const Edge* edge : edges) { Node* enter_node; - NodeBuilder builder = NodeBuilder(NewName(""enter""), ""Enter"", - graph_->op_registry(), &debug_info_) - .Input(NodeOut(edge->src(), edge->src_output())) - .Attr(""frame_name"", name_) - .Attr(""parallel_iterations"", parallel_iterations_) - .Device(while_op_->requested_device()); + NodeBuilder builder = + NodeBuilder(NewName(""enter""), ""Enter"", graph_->op_registry(), + &debug_info_) + .Input(NodeOut(edge->src(), edge->src_output())) + .Attr(""frame_name"", name_) + .Attr(""parallel_iterations"", parallel_iterations_) + .Device(edge->src()->requested_device()) + .AssignedDevice(edge->src()->assigned_device_name()); if (IsResource(edge->dst_input())) { builder.Attr(""is_constant"", true); } @@ -282,7 +284,8 @@ Status LowerWhileHelper::CreateMergeNodes() { NodeBuilder(NewName(""merge""), ""Merge"", graph_->op_registry(), &debug_info_) .Input({NodeOut(enter_node, 0), NodeOut(enter_node, 0)}) - .Device(while_op_->requested_device()) + .Device(enter_node->requested_device()) + .AssignedDevice(enter_node->assigned_device_name()) .Finalize(graph_, &merge_node)); merge_nodes_.emplace_back(merge_node); } @@ -323,21 +326,19 @@ Status LowerWhileHelper::CreateSwitchNodes() { TF_RETURN_IF_ERROR(while_op_->input_node(i, &input_node)); op_name = strings::StrCat(input_node->name(), ""_switch""); } + Node* merge_node = merge_nodes_[op_input_output_to_lowered_node_[i]]; Node* switch_node; string op_type = ""Switch""; - if (IsRefType( - merge_nodes_[op_input_output_to_lowered_node_[i]]->output_type( - 0))) { + if (IsRefType(merge_node->output_type(0))) { op_type = ""RefSwitch""; } - TF_RETURN_IF_ERROR( - NodeBuilder(NewName(op_name), op_type, graph_->op_registry(), - &debug_info_) - .Input( - NodeOut(merge_nodes_[op_input_output_to_lowered_node_[i]], 0)) - .Input(NodeOut(loop_cond_node_, 0)) - .Device(while_op_->requested_device()) - .Finalize(graph_, &switch_node)); + TF_RETURN_IF_ERROR(NodeBuilder(NewName(op_name), op_type, + graph_->op_registry(), &debug_info_) + .Input(NodeOut(merge_node, 0)) + .Input(NodeOut(loop_cond_node_, 0)) + .Device(merge_node->requested_device()) + .AssignedDevice(merge_node->assigned_device_name()) + .Finalize(graph_, &switch_node)); switch_nodes_.emplace_back(switch_node); } return Status::OK(); @@ -392,7 +393,10 @@ Status LowerWhileHelper::CreateExitNodes() { &debug_info_) .Input(NodeOut(switch_nodes_[op_input_output_to_lowered_node_[i]], 0)) - .Device(while_op_->requested_device()) + .Device(switch_nodes_[op_input_output_to_lowered_node_[i]] + ->requested_device()) + .AssignedDevice(switch_nodes_[op_input_output_to_lowered_node_[i]] + ->assigned_device_name()) .Finalize(graph_, &exit_node)); exit_nodes_.emplace_back(exit_node); outputs.emplace_back(NodeOut(exit_node, 0)); @@ -440,12 +444,15 @@ Status LowerWhileHelper::CreateNextIterationNodes() { if (IsResource(i)) { continue; } - TF_RETURN_IF_ERROR(NodeBuilder(NewName(""next_iteration""), ""NextIteration"", - graph_->op_registry(), &debug_info_) - .Input(NodeOut(body_call_node_, i)) - .ControlInput(body_call_node_) - .Device(while_op_->requested_device()) - .Finalize(graph_, &next_iteration)); + TF_RETURN_IF_ERROR( + NodeBuilder(NewName(""next_iteration""), ""NextIteration"", + graph_->op_registry(), &debug_info_) + .Input(NodeOut(body_call_node_, i)) + .ControlInput(body_call_node_) + .Device(while_op_->requested_device()) + .AssignedDevice(merge_nodes_[op_input_output_to_lowered_node_[i]] + ->assigned_device_name()) + .Finalize(graph_, &next_iteration)); next_iterations_nodes_.emplace_back(next_iteration); } return Status::OK(); ",0,train b265dd5fb3be0b7a860b5419c0a2285f9693ae6d,tensorflow/tensorflow,"Set the AssignedDevice of the lowered control flow nodes (Enter, Switch, Merge, NextIteration, Exit) to be the same as the assigned device of the input loop variable if that is available during lowering. PiperOrigin-RevId: 310578865 Change-Id: I7118c26054be2d8fb239c3ed03b9a3e5c4685ef6",lower_while_op_test.cc,"@@ -169,6 +169,99 @@ TEST(LowerWhileOpTest, Simple) { } } +TEST(LowerWhileOpTest, ForwardAssignedInputDevice) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + + // Add test functions for cond and body. + FunctionDefLibrary f_lib_proto; + *f_lib_proto.add_function() = test::function::XTimesTwo(); + *f_lib_proto.add_function() = test::function::LessThanOrEqualToN(8); + + TF_ASSERT_OK(graph->AddFunctionLibrary(f_lib_proto)); + auto type = DT_FLOAT; + Node* placeholder; + TF_CHECK_OK(NodeBuilder(""placed_node"", ""Placeholder"") + .Attr(""dtype"", type) + .Finalize(graph.get(), &placeholder)); + const string assigned_device_name = ""/job:localhost/replica:0/task:0/gpu:0""; + placeholder->set_assigned_device_name(assigned_device_name); + Node* while_node; + std::vector inputs({NodeBuilder::NodeOut(placeholder)}); + AttrValue cond_func; + cond_func.mutable_func()->set_name(""LessThanOrEqualToN""); + AttrValue body_func; + body_func.mutable_func()->set_name(""XTimesTwo""); + TF_ASSERT_OK( + NodeBuilder(""while"", ""While"", &graph->flib_def()) + .Input(inputs) + .Attr(""T"", {type}) + .Attr(""cond"", cond_func) + .Attr(""body"", body_func) + .Attr(""parallel_iterations"", 100) + .Attr(LowerFunctionalOpsPass::kLowerUsingSwitchMergeAttr, true) + .Finalize(graph.get(), &while_node)); + TF_ASSERT_OK(Rewrite(&graph)); + + const Node* placeholder_node = nullptr; + for (const auto* op : graph->op_nodes()) { + if (op->name() == ""placed_node"") { + placeholder_node = op; + } + } + ASSERT_NE(placeholder_node, nullptr); + // Verify the assigned device of the Enter node. + int enter_consumers = 0; + const Node* enter_node = nullptr; + for (const Node* consumer : placeholder_node->out_nodes()) { + if (consumer->type_string() == ""Enter"") { + enter_consumers += 1; + enter_node = consumer; + ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name); + } + } + ASSERT_EQ(enter_consumers, 1); + // Verify the assigned device of the Merge node. + int merge_consumers = 0; + const Node* merge_node = nullptr; + for (const Node* consumer : enter_node->out_nodes()) { + if (consumer->type_string() == ""Merge"") { + merge_consumers += 1; + merge_node = consumer; + ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name); + } + } + ASSERT_EQ(merge_consumers, 1); + // Verify the assigned device of the NextIteration node. + int next_iteration_consumers = 0; + for (const Node* consumer : merge_node->in_nodes()) { + if (consumer->type_string() == ""NextIteration"") { + next_iteration_consumers += 1; + ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name); + } + } + ASSERT_EQ(next_iteration_consumers, 1); + // Verify the assigned device of the Switch node. + int switch_consumers = 0; + const Node* switch_node = nullptr; + for (const Node* consumer : merge_node->out_nodes()) { + if (consumer->type_string() == ""Switch"") { + switch_consumers += 1; + switch_node = consumer; + ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name); + } + } + ASSERT_EQ(switch_consumers, 1); + // Verify the assigned device of the Exit node. + int exit_consumers = 0; + for (const Node* consumer : switch_node->out_nodes()) { + if (consumer->type_string() == ""Exit"") { + exit_consumers += 1; + ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name); + } + } + ASSERT_EQ(exit_consumers, 1); +} + TEST(LowerWhileOpTest, MultipleInputs) { std::unique_ptr graph(new Graph(OpRegistry::Global())); ",0,train 7a7b72855e7894b169ae78f4b46f247552bb62cb,tensorflow/tensorflow,"Pulls out variable initialization in tf.function().get_concrete_function PiperOrigin-RevId: 220548234",def_function.py,"@@ -51,6 +51,7 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable): name=None, dtype=None, constraint=None, + add_initializers_to=None, **unused_kwargs): """"""Creates a variable. @@ -81,6 +82,9 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable): variable and return the Tensor for the projected value (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. + add_initializers_to: if not None and not in legacy graph mode, the + initializer tensor will be added to this map instead of adding the + assignment to the function. Raises: ValueError: If the initial value is not specified, or does not have a @@ -166,21 +170,24 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable): self._graph_element = value ops.add_to_collection(ops.GraphKeys.GLOBAL_VARIABLES, self) else: - def assign_fn(): - with ops.name_scope(""Assign"") as n, ops.colocate_with(self._handle): - resource_variable_ops.assign_variable_op( - self._handle, - initial_value, - name=n) - # Returning values to keep tf.cond happy. - return ops.convert_to_tensor(1) - def not_assign_fn(): - return ops.convert_to_tensor(0) - # Note: this cond is always guaranteed to run because we're inside a - # defun which will insert automatic control dependencies. - control_flow_ops.cond( - resource_variable_ops.var_is_initialized_op(self._handle), - not_assign_fn, assign_fn) + if add_initializers_to is not None: + add_initializers_to[self] = initial_value + else: + def assign_fn(): + with ops.name_scope(""Assign"") as n, ops.colocate_with(self._handle): + resource_variable_ops.assign_variable_op( + self._handle, + initial_value, + name=n) + # Returning values to keep tf.cond happy. + return ops.convert_to_tensor(1) + def not_assign_fn(): + return ops.convert_to_tensor(0) + # Note: this cond is always guaranteed to run because we're inside a + # defun which will insert automatic control dependencies. + control_flow_ops.cond( + resource_variable_ops.var_is_initialized_op(self._handle), + not_assign_fn, assign_fn) # After the handle has been created, set up a way to clean it up when # executing eagerly. We'll hold the only reference to the deleter, so that @@ -252,14 +259,15 @@ class PolymorphicFunction(object): input_signature=self._input_signature, experimental_autograph=self._autograph) - def _initialize(self, args, kwds): + def _initialize(self, args, kwds, add_initializers_to=None): """"""Initializes, on the first call."""""" self._created_variables = [] def variable_capturing_scope(unused_next_creator, **kwds): """"""Creates UnliftedInitializerVariables and saves references to them."""""" - v = UnliftedInitializerVariable(**kwds) + v = UnliftedInitializerVariable( + add_initializers_to=add_initializers_to, **kwds) self._created_variables.append(weakref.ref(v)) return v @@ -405,14 +413,22 @@ class PolymorphicFunction(object): Raises: ValueError: if this object has not yet been called on concrete values. """""" - # TODO(apassos) figure out how to handle this case (what should we return - # here?) + assert context.executing_eagerly() if self._stateful_fn is None: - raise ValueError( - ""Call this function with concrete values before asking for a"" - "" concrete function. Calling the function will ensure that, in"" - "" case this function creates variables, that those are properly"" - "" initialized."") + # Here we trace the function, collect the initializers, and attempt to + # extract them and run them eagerly. Fail only if we cannot do so. + initializer_map = {} + self._initialize(args, kwargs, add_initializers_to=initializer_map) + if not self._created_variables: + + @function + def initialize_variables(): + for v, init in initializer_map.items(): + v.assign(lift_to_graph.lift_to_graph( + init, ops.get_default_graph())[init]) + + initialize_variables() + if self._created_variables: # In this case we have created variables on the first call, so we run the # defunned version which is guaranteed to never create variables. ",0,train 7a7b72855e7894b169ae78f4b46f247552bb62cb,tensorflow/tensorflow,"Pulls out variable initialization in tf.function().get_concrete_function PiperOrigin-RevId: 220548234",function_test.py,"@@ -190,7 +190,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): def testBasicGraphFunction(self): matmul = def_function.function(math_ops.matmul) - @function.defun + @def_function.function def sq(a): return matmul(a, a) @@ -204,7 +204,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): def testInputSpecGraphFunction(self): matmul = def_function.function(math_ops.matmul) - @function.defun + @def_function.function def sq(a): return matmul(a, a) @@ -223,7 +223,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): def testNestedInputSpecGraphFunction(self): matmul = def_function.function(math_ops.matmul) - @function.defun + @def_function.function def sq(mats): ((a, b),) = mats return matmul(a, b) @@ -347,7 +347,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): pair = collections.namedtuple('pair', ['a', 'b']) - @function.defun + @def_function.function def a_times_b(inputs): return matmul(inputs.a['a'], inputs.b['b']) @@ -362,7 +362,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): def testNestedOutputGraphFunction(self): matmul = def_function.function(math_ops.matmul) - @function.defun + @def_function.function def sq(a): return (matmul(a, a), {'b': constant_op.constant(1.0)}) @@ -381,7 +381,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): def testGraphFunctionWithGradients(self): v = resource_variable_ops.ResourceVariable(1.0, name='v') - @function.defun + @def_function.function def step(): def inner(): return v * v @@ -394,7 +394,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): self.assertAllEqual(step_op(), 2.0) def testGraphFunctionNoneOutput(self): - @function.defun + @def_function.function def fn(unused_a, unused_b): return None @@ -968,7 +968,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): v_gpu = resource_variable_ops.ResourceVariable( [0.0, 1.0, 2.0], name='gpu') - @function.defun + @def_function.function def resource_apply_adam(): training_ops.resource_apply_adam( v_cpu.handle, @@ -1040,11 +1040,11 @@ class FunctionTest(test.TestCase, parameterized.TestCase): @test_util.run_in_graph_and_eager_modes def testNestedDifferentiableFunction(self): - @function.defun + @def_function.function def inner_fn(a, b): return a * math_ops.add(a, b) - @function.defun + @def_function.function def outer_fn(x): return inner_fn(x, 1.0) @@ -1058,19 +1058,19 @@ class FunctionTest(test.TestCase, parameterized.TestCase): @test_util.run_in_graph_and_eager_modes def testDeeplyNestedDifferentiableFunction(self): - @function.defun + @def_function.function def inner_inner_fn(a, b): return math_ops.add(a, b) - @function.defun + @def_function.function def inner_fn(a, b): return inner_inner_fn(a, b) - @function.defun + @def_function.function def middle_fn(a, b): return a * inner_fn(a, b) - @function.defun + @def_function.function def outer_fn(x): return middle_fn(x, 1.0) @@ -1084,15 +1084,15 @@ class FunctionTest(test.TestCase, parameterized.TestCase): @test_util.run_in_graph_and_eager_modes def testDeeplyNestedDifferentiableFunctionWithMultipleGradCalls(self): - @function.defun + @def_function.function def inner_fn(a, b): return math_ops.add(a, b) - @function.defun + @def_function.function def middle_fn(a, b): return math_ops.mul(a, inner_fn(a, b)) - @function.defun + @def_function.function def outer_fn(x): return middle_fn(x, 3.0) @@ -1132,19 +1132,19 @@ class FunctionTest(test.TestCase, parameterized.TestCase): @test_util.run_in_graph_and_eager_modes def testDeeplyNestedDifferentiableFunctionGradientTapeInDefun(self): - @function.defun + @def_function.function def inner_inner_fn(a, b): return math_ops.add(a, b) - @function.defun + @def_function.function def inner_fn(a, b): return inner_inner_fn(a, b) - @function.defun + @def_function.function def middle_fn(a, b): return a * inner_fn(a, b) - @function.defun + @def_function.function def outer_fn(x): with backprop.GradientTape() as tp: tp.watch(x) @@ -1158,19 +1158,19 @@ class FunctionTest(test.TestCase, parameterized.TestCase): @test_util.run_in_graph_and_eager_modes def testDeeplyNestedDifferentiableFunctionGradientTapeInNestedDefun(self): - @function.defun + @def_function.function def inner_inner_fn(a, b): return math_ops.add(a, b) - @function.defun + @def_function.function def inner_fn(a, b): return inner_inner_fn(a, b) - @function.defun + @def_function.function def middle_fn(a, b): return a * inner_fn(a, b) - @function.defun + @def_function.function def almost_outer_fn(x): with backprop.GradientTape() as tp: tp.watch(x) @@ -1178,7 +1178,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): grad = tp.gradient(result, x) return grad - @function.defun + @def_function.function def outer_fn(x): return almost_outer_fn(x) @@ -1188,19 +1188,19 @@ class FunctionTest(test.TestCase, parameterized.TestCase): @test_util.run_in_graph_and_eager_modes def testDeeplyNestedDifferentiableFunctionGradientTapeInMultNestedDefun(self): - @function.defun + @def_function.function def inner_inner_fn(a, b): return math_ops.add(a, b) - @function.defun + @def_function.function def inner_fn(a, b): return inner_inner_fn(a, b) - @function.defun + @def_function.function def middle_fn(a, b): return a * inner_fn(a, b) - @function.defun + @def_function.function def almost_outer_fn(x): with backprop.GradientTape() as tp: tp.watch(x) @@ -1208,11 +1208,11 @@ class FunctionTest(test.TestCase, parameterized.TestCase): grad = tp.gradient(result, x) return grad - @function.defun + @def_function.function def outer_fn(x): return almost_outer_fn(x) - @function.defun + @def_function.function def outer_outer_fn(x): return outer_fn(x) @@ -1222,19 +1222,19 @@ class FunctionTest(test.TestCase, parameterized.TestCase): @test_util.run_in_graph_and_eager_modes def testDeeplyNestedDifferentiableFunctionTFGradientInDefun(self): - @function.defun + @def_function.function def inner_inner_fn(a, b): return math_ops.add(a, b) - @function.defun + @def_function.function def inner_fn(a, b): return inner_inner_fn(a, b) - @function.defun + @def_function.function def middle_fn(a, b): return a * inner_fn(a, b) - @function.defun + @def_function.function def outer_fn(x): result = middle_fn(x, 1.0) return gradients_impl.gradients(result, [x])[0] @@ -1245,24 +1245,24 @@ class FunctionTest(test.TestCase, parameterized.TestCase): @test_util.run_in_graph_and_eager_modes def testDeeplyNestedDifferentiableFunctionTFGradientInNestedDefun(self): - @function.defun + @def_function.function def inner_inner_fn(a, b): return math_ops.add(a, b) - @function.defun + @def_function.function def inner_fn(a, b): return inner_inner_fn(a, b) - @function.defun + @def_function.function def middle_fn(a, b): return a * inner_fn(a, b) - @function.defun + @def_function.function def almost_outer_fn(x): result = middle_fn(x, 1.0) return gradients_impl.gradients(result, [x])[0] - @function.defun + @def_function.function def outer_fn(x): return almost_outer_fn(x) @@ -1272,28 +1272,28 @@ class FunctionTest(test.TestCase, parameterized.TestCase): @test_util.run_in_graph_and_eager_modes def testDeeplyNestedDifferentiableFunctionTFGradientInMultNestedDefun(self): - @function.defun + @def_function.function def inner_inner_fn(a, b): return math_ops.add(a, b) - @function.defun + @def_function.function def inner_fn(a, b): return inner_inner_fn(a, b) - @function.defun + @def_function.function def middle_fn(a, b): return a * inner_fn(a, b) - @function.defun + @def_function.function def almost_outer_fn(x): result = middle_fn(x, 1.0) return gradients_impl.gradients(result, [x])[0] - @function.defun + @def_function.function def outer_fn(x): return almost_outer_fn(x) - @function.defun + @def_function.function def outer_outer_fn(x): return outer_fn(x) @@ -1461,7 +1461,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): def add(a, b): return math_ops.add(a, b) - @function.defun + @def_function.function def add_one(x): return add(x, 1) @@ -1675,7 +1675,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): with ops.device('gpu:0'): y = constant_op.constant(1.0) - @function.defun + @def_function.function def foo(): return test_ops.device_placement_op() ",0,train 7a7b72855e7894b169ae78f4b46f247552bb62cb,tensorflow/tensorflow,"Pulls out variable initialization in tf.function().get_concrete_function PiperOrigin-RevId: 220548234",lift_to_graph.py,"@@ -37,10 +37,8 @@ def lift_to_graph(init_tensor, graph, sources=None): visited_ops = set([x.op for x in sources]) ops_to_visit = [init_tensor.op] op_outputs = collections.defaultdict(set) - print(""ops_to_visit"", ops_to_visit) while ops_to_visit: op = ops_to_visit.pop() - print(""visiting"", op) if op in visited_ops: continue visited_ops.add(op) ",0,train 0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,window_util.cc,"@@ -42,7 +42,7 @@ Window MakeWindow(absl::Span sizes, absl::Span strides) { Window window; CHECK_EQ(sizes.size(), strides.size()); - for (auto nb = 0; static_cast(nb) < sizes.size(); ++nb) { + for (auto nb = 0; nb < sizes.size(); ++nb) { auto* dimension = window.add_dimensions(); dimension->set_size(sizes[nb]); dimension->set_stride(strides[nb]); ",0,train 0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,tensor_shape.cc,"@@ -182,7 +182,7 @@ void TensorShapeBase::InitDims(gtl::ArraySlice dim_sizes) { // Allow sizes that are under kint64max^0.25 so that 4-way multiplication // below cannot overflow. - static const int64 kMaxSmall = 0xd744; + static const uint64 kMaxSmall = 0xd744; static_assert(kMaxSmall * kMaxSmall * kMaxSmall * kMaxSmall <= kint64max, ""bad overflow check""); bool large_size = false; ",0,train 0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,random_inputstream.cc,"@@ -92,7 +92,7 @@ Status RandomAccessInputStream::SkipNBytes(int64 bytes_to_skip) { } else { return s; } - if (data.size() < static_cast(bytes_to_read)) { + if (data.size() < bytes_to_read) { return errors::OutOfRange(""reached end of file""); } bytes_to_skip -= bytes_to_read; ",0,train 0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,snappy_inputbuffer.cc,"@@ -134,7 +134,7 @@ Status SnappyInputBuffer::ReadCompressedBlockLength(uint32* length) { } size_t readable = std::min(bytes_to_read, avail_in_); - for (size_t i = 0; i < readable; i++) { + for (int i = 0; i < readable; i++) { // The ""unsigned char"" type cast is intentional to avoid implicit type // casting of the signed char to unsigned int during bitwise OR which // causes weird overflow errors. ",0,train 0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,snappy_outputbuffer.cc,"@@ -76,7 +76,7 @@ Status SnappyOutputBuffer::Write(StringPiece data) { // If there is sufficient free space in input_buffer_ to fit data we // add it there and return. - if (static_cast(bytes_to_write) <= AvailableInputSpace()) { + if (bytes_to_write <= AvailableInputSpace()) { AddToInputBuffer(data); return Status::OK(); } @@ -87,7 +87,7 @@ Status SnappyOutputBuffer::Write(StringPiece data) { TF_RETURN_IF_ERROR(DeflateBuffered()); // input_buffer_ should be empty at this point. - if (static_cast(bytes_to_write) <= AvailableInputSpace()) { + if (bytes_to_write <= AvailableInputSpace()) { AddToInputBuffer(data); return Status::OK(); } @@ -144,7 +144,7 @@ void SnappyOutputBuffer::AddToInputBuffer(StringPiece data) { const int32 free_tail_bytes = input_buffer_capacity_ - (read_bytes + unread_bytes); - if (static_cast(bytes_to_write) > free_tail_bytes) { + if (bytes_to_write > free_tail_bytes) { memmove(input_buffer_.get(), next_in_, avail_in_); next_in_ = input_buffer_.get(); } ",0,train 0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,zlib_outputbuffer.cc,"@@ -98,7 +98,7 @@ void ZlibOutputBuffer::AddToInputBuffer(StringPiece data) { int32 unread_bytes = z_stream_->avail_in; int32 free_tail_bytes = input_buffer_capacity_ - (read_bytes + unread_bytes); - if (static_cast(bytes_to_write) > free_tail_bytes) { + if (bytes_to_write > free_tail_bytes) { memmove(z_stream_input_.get(), z_stream_->next_in, z_stream_->avail_in); z_stream_->next_in = z_stream_input_.get(); } @@ -154,7 +154,7 @@ Status ZlibOutputBuffer::Append(StringPiece data) { size_t bytes_to_write = data.size(); - if (static_cast(bytes_to_write) <= AvailableInputSpace()) { + if (bytes_to_write <= AvailableInputSpace()) { AddToInputBuffer(data); return Status::OK(); } @@ -162,7 +162,7 @@ Status ZlibOutputBuffer::Append(StringPiece data) { TF_RETURN_IF_ERROR(DeflateBuffered(zlib_options_.flush_mode)); // At this point input stream should be empty. - if (static_cast(bytes_to_write) <= AvailableInputSpace()) { + if (bytes_to_write <= AvailableInputSpace()) { AddToInputBuffer(data); return Status::OK(); } ",0,train 0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,env.cc,"@@ -214,7 +214,7 @@ bool Env::FilesExist(const std::vector& files, } if (fs_status) { result &= fs_result; - for (size_t i = 0; i < itr.second.size(); ++i) { + for (int i = 0; i < itr.second.size(); ++i) { per_file_status[itr.second[i]] = fs_status->at(i); } } else if (!fs_result) { ",0,train 0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,file_system.cc,"@@ -308,7 +308,7 @@ StringPiece FileSystem::Basename(StringPiece path) const { StringPiece FileSystem::Extension(StringPiece path) const { StringPiece basename = this->Basename(path); - size_t pos = basename.rfind('.'); + int pos = basename.rfind('.'); if (pos == StringPiece::npos) { return StringPiece(path.data() + path.size(), 0); } else { ",0,train 0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,file_system_helper.cc,"@@ -103,7 +103,7 @@ Status GetMatchingPaths(FileSystem* fs, Env* env, const string& pattern, children_dir_status[i] = fs->IsDirectory(child_path); } }); - for (size_t i = 0; i < children.size(); ++i) { + for (int i = 0; i < children.size(); ++i) { const string child_path = io::JoinPath(current_dir, children[i]); // If the IsDirectory call was cancelled we bail. if (children_dir_status[i].code() == tensorflow::error::CANCELLED) { ",0,train 0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,status.cc,"@@ -74,9 +74,7 @@ class StatusLogSink : public TFLogSink { mutex_lock lock(mu_); messages_.emplace_back(entry.ToString()); - if (messages_.size() > static_cast(num_messages_)){ - messages_.pop_front(); - } + if (messages_.size() > num_messages_) messages_.pop_front(); } private: ",0,train 0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,parse_annotation.cc,"@@ -50,7 +50,7 @@ std::vector SplitNameAndMetadata( std::vector SplitPairs(absl::string_view metadata) { std::vector key_value_pairs; std::stack quotes; - size_t start = 0, end = 0; + int start = 0, end = 0; for (; end < metadata.size(); ++end) { char ch = metadata[end]; switch (ch) { ",0,train 83839064dd8061089a7fdf69e1065655b432c4fd,tensorflow/tensorflow,"[tf.data] Optimize `tf.contrib.data.sample_from_datasets()` when the weights are not a dataset. Previously, we were recomputing the logits from the weights for each element, which is only necessary when the weights can differ for each element. PiperOrigin-RevId: 210128640",interleave_ops.py,"@@ -216,25 +216,46 @@ def sample_from_datasets(datasets, weights=None, seed=None): length of the `datasets` element. """""" num_datasets = len(datasets) - if weights is None: - weights = dataset_ops.Dataset.from_tensors([1.0] * num_datasets).repeat() - elif not isinstance(weights, dataset_ops.Dataset): - weights = ops.convert_to_tensor(weights, name=""weights"") - if weights.dtype not in (dtypes.float32, dtypes.float64): - raise TypeError(""`weights` must be convertible to a tensor of "" - ""`tf.float32` or `tf.float64` elements."") - if not weights.shape.is_compatible_with([num_datasets]): - raise ValueError(""`weights` must be a vector of length `len(datasets)`."") - weights = dataset_ops.Dataset.from_tensors(weights).repeat() - - # The `stateless_multinomial()` op expects log-probabilities, as opposed to - # weights. - logits_ds = weights.map(lambda *p: math_ops.log(p, name=""logits"")) - def select_dataset(logits, seed): - return array_ops.squeeze( - stateless.stateless_multinomial(logits, 1, seed=seed), axis=[0, 1]) - selector_input = dataset_ops.Dataset.zip( - (logits_ds, random_ops.RandomDataset(seed).batch(2))).map(select_dataset) + if not isinstance(weights, dataset_ops.Dataset): + if weights is None: + # Select inputs with uniform probability. + logits = [[1.0] * num_datasets] + else: + # Use the given `weights` as the probability of choosing the respective + # input. + weights = ops.convert_to_tensor(weights, name=""weights"") + if weights.dtype not in (dtypes.float32, dtypes.float64): + raise TypeError(""`weights` must be convertible to a tensor of "" + ""`tf.float32` or `tf.float64` elements."") + if not weights.shape.is_compatible_with([num_datasets]): + raise ValueError( + ""`weights` must be a vector of length `len(datasets)`."") + + # The `stateless_multinomial()` op expects log-probabilities, as opposed + # to weights. + logits = array_ops.expand_dims(math_ops.log(weights, name=""logits""), 0) + + def select_dataset_constant_logits(seed): + return array_ops.squeeze( + stateless.stateless_multinomial(logits, 1, seed=seed), axis=[0, 1]) + + selector_input = random_ops.RandomDataset(seed).batch(2).map( + select_dataset_constant_logits) + else: + # Use each element of the given `weights` dataset as the probability of + # choosing the respective input. + + # The `stateless_multinomial()` op expects log-probabilities, as opposed to + # weights. + logits_ds = weights.map(lambda *p: math_ops.log(p, name=""logits"")) + + def select_dataset_varying_logits(logits, seed): + return array_ops.squeeze( + stateless.stateless_multinomial(logits, 1, seed=seed), axis=[0, 1]) + + selector_input = dataset_ops.Dataset.zip( + (logits_ds, random_ops.RandomDataset(seed).batch(2) + )).map(select_dataset_varying_logits) return _DirectedInterleaveDataset(selector_input, datasets) ",0,train 2d8b5115ab308c8d934eb150c1015d102728013e,tensorflow/tensorflow,"Automated g4 rollback of changelist 193451839 PiperOrigin-RevId: 200275406",transpose_folding.cc,"@@ -178,7 +178,6 @@ bool FoldTransposeIntoConvolution(InstructionOperandsPair pair) { auto new_conv = HloInstruction::CreateConvolve( convolution.shape(), new_lhs, new_rhs, convolution.window(), new_dnums); - convolution.SetupDerivedInstruction(new_conv.get()); TF_CHECK_OK(convolution.parent()->ReplaceWithNewInstruction( &convolution, std::move(new_conv))); ",0,train 7036e0472258ad392c13fec50717f19e1670fa22,tensorflow/tensorflow,"Remove the stride-matching restriction on reduce-windows PiperOrigin-RevId: 371705477 Change-Id: I8154c6e79dc3ee8de3d6b0302758f7aefc8abe89",space_to_batch_converter.cc,"@@ -1397,19 +1397,19 @@ bool ConvolutionVisitor::SupportedOpForPropagation(HloInstruction* consumer, auto new_operand = old_to_new_instrs_[first_operand]; auto permute_dims = instr_to_dim_permute_map_[new_operand]; - const int64 new_space_dim = DimLookUp(permute_dims, old_space_dim); - - // Make sure that the stride lines up. - if (window.dimensions(old_space_dim).size() != 1) { - if (new_operand->shape().dimensions(new_space_dim) % - window.dimensions(old_space_dim).stride() != - 0) { - return false; - } - } // Select-and-scatter specific checks. if (consumer->opcode() == HloOpcode::kSelectAndScatter) { + const int64 new_space_dim = DimLookUp(permute_dims, old_space_dim); + // Make sure that the stride lines up. + if (window.dimensions(old_space_dim).size() != 1) { + if (new_operand->shape().dimensions(new_space_dim) % + window.dimensions(old_space_dim).stride() != + 0) { + return false; + } + } + // Only support floating point datatypes. if (!ShapeUtil::ElementIsFloating(consumer->shape())) { return false; @@ -1657,6 +1657,14 @@ StatusOr ConvolutionVisitor::Propagate(HloInstruction* consumer, const int64 new_batch_dim = DimLookUp(permute_dims, old_batch_dim); const int64 new_space_dim = DimLookUp(permute_dims, old_space_dim); + // Calculate the required halo size + auto new_shape = first_operand->shape(); + auto old_shape = consumer->mutable_operand(0)->shape(); + + const int64 new_batch_size = new_shape.dimensions(new_batch_dim); + const int64 new_space_size = new_shape.dimensions(new_space_dim); + const int64 stride = consumer->window().dimensions(old_space_dim).stride(); + auto pad_val = is_select_and_scatter ? computation_->AddInstruction( @@ -1669,13 +1677,27 @@ StatusOr ConvolutionVisitor::Propagate(HloInstruction* consumer, new_batch_dim, new_space_dim, old_batch_dim, old_space_dim)); - // Calculate the required halo size - auto new_shape = first_operand->shape(); - auto old_shape = consumer->mutable_operand(0)->shape(); - - const int64 new_batch_size = new_shape.dimensions(new_batch_dim); - const int64 new_space_size = new_shape.dimensions(new_space_dim); - const int64 stride = consumer->window().dimensions(old_space_dim).stride(); + const int64 extra_space = new_space_size % stride; + if (extra_space) { + CHECK_EQ(consumer->opcode(), HloOpcode::kReduceWindow); + const int64 old_batch_size = old_shape.dimensions(old_batch_dim); + const int64 old_space_size = old_shape.dimensions(old_space_dim); + // If the shrunk space is still larger/equal than the original space, we + // reduce the space. + if ((new_space_size - extra_space) * new_batch_size >= + old_batch_size * old_space_size) { + TF_ASSIGN_OR_RETURN(first_operand, + DecreaseSpatialSizeOnSpaceToBatchedShape( + first_operand, new_batch_dim, old_batch_size, + new_space_dim, new_space_size - extra_space)); + } else { + TF_ASSIGN_OR_RETURN( + first_operand, + IncreaseSpatialSizeOnSpaceToBatchedShape( + first_operand, new_batch_dim, old_batch_size, new_space_dim, + new_space_size + stride - extra_space)); + } + } const int64 window_size = consumer->window().dimensions(old_space_dim).size(); const int64 last_overlap_point = ((new_space_size - 1) / stride) * stride; ",0,train f0ffba31ed278e2ada5537b54575ea05af1091a9,tensorflow/tensorflow,Update output_init_files_test.py,output_init_files_test.py,"@@ -45,7 +45,7 @@ def _get_modules(package, attr_name, constants_attr_name): API constant names. Returns: - Set of TensorFow API modules. + Set of TensorFlow API modules. """""" modules = set() # TODO(annarev): split up the logic in create_python_api.py so that ",0,train bfa7016612c0255edb6a02d7134f4babacfbf1ca,tensorflow/tensorflow,"[XLA:HLO] Prevent while buffer entry parameter buffer sharing if buffer is live out. PiperOrigin-RevId: 170099782",buffer_assignment.cc,"@@ -1121,6 +1121,7 @@ void BufferAssigner::AddWhileSetToColocatedBufferSets( // Scan 'colocated_buffer_sets' in reverse order for locality; colocated sets // are added in postorder over computations and instructions. const int64 init_buffer_size = buffer_size(*while_init_buffer); + const bool is_live_out = buffer_liveness.MaybeLiveOut(*while_result_buffer); for (int i = colocated_buffer_sets->size() - 1; i >= 0; --i) { const ColocatedBufferSet& predecessor_set = (*colocated_buffer_sets)[i]; @@ -1141,6 +1142,20 @@ void BufferAssigner::AddWhileSetToColocatedBufferSets( continue; } + // Skip predecessor sets with entry parameter if the while result is live + // out. + if (is_live_out && + std::any_of(predecessor_set.begin(), predecessor_set.end(), + [](const LogicalBuffer* buffer) { + auto* instruction = buffer->instruction(); + auto* computation = instruction->parent(); + auto* module = computation->parent(); + return instruction->opcode() == HloOpcode::kParameter && + computation == module->entry_computation(); + })) { + continue; + } + // Build vector of predecessor while result and init buffers, which are // checked for liveness interference below. We must check both the result // and init buffers because they're aliased together, but ",0,train bfa7016612c0255edb6a02d7134f4babacfbf1ca,tensorflow/tensorflow,"[XLA:HLO] Prevent while buffer entry parameter buffer sharing if buffer is live out. PiperOrigin-RevId: 170099782",buffer_assignment_test.cc,"@@ -1764,5 +1764,62 @@ TEST_F(WhileBufferAssignmentTest, DISABLED_TwoWhiles) { EXPECT_TRUE(BuffersDistinct({while0}, {while1}, *assignment)); } +TEST_F(WhileBufferAssignmentTest, WhilesDontShareEntryParamIfLiveOut) { + auto module = MakeUnique(TestName()); + auto builder = HloComputation::Builder(""entry""); + + auto input0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, data_shape_, ""input0"")); + auto weights0 = builder.AddInstruction( + HloInstruction::CreateParameter(1, data_shape_, ""weights0"")); + + auto zero = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(0.0))); + auto output0 = builder.AddInstruction( + HloInstruction::CreateBroadcast(data_shape_, zero, {1})); + auto output1 = builder.AddInstruction( + HloInstruction::CreateBroadcast(data_shape_, zero, {1})); + + auto cond0 = + module->AddEmbeddedComputation(BuildWhileConditionComputation(""cond"")); + auto body0 = + module->AddEmbeddedComputation(BuildWhileBodyComputation(""body"")); + + auto tuple0 = builder.AddInstruction( + HloInstruction::CreateTuple({input0, weights0, output0})); + auto while0 = builder.AddInstruction( + HloInstruction::CreateWhile(loop_state_shape_, cond0, body0, tuple0)); + + // Get output of 'while0' and feed as input to 'while1'. + auto while0_out = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(data_shape_, while0, 2)); + + auto cond1 = + module->AddEmbeddedComputation(BuildWhileConditionComputation(""cond"")); + auto body1 = + module->AddEmbeddedComputation(BuildWhileBodyComputation(""body"")); + + auto tuple1 = builder.AddInstruction( + HloInstruction::CreateTuple({while0_out, weights0, output1})); + auto while1 = builder.AddInstruction( + HloInstruction::CreateWhile(loop_state_shape_, cond1, body1, tuple1)); + + // Get output of 'while1' so that it is live out of computation. + auto while1_out = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(data_shape_, while1, 2)); + + module->AddEntryComputation(builder.Build()); + RunCopyInsertion(module.get()); + auto assignment = RunBufferAssignment(module.get()); + // Get BufferAllocation for root instruction. + auto* root_alloc = assignment->GetUniqueTopLevelSlice(while1_out) + .ConsumeValueOrDie() + .allocation(); + // Test that root instruction allocation is live out. + EXPECT_TRUE(root_alloc->maybe_live_out()); + // Test that root instruction allocation is not an entry parameter. + EXPECT_FALSE(root_alloc->is_entry_computation_parameter()); +} + } // namespace } // namespace xla ",0,train 634888a82f46694e2747ffde745d269b6cdf7c80,tensorflow/tensorflow,TFLu: detection_postprocess: fix review comments and build issues,detection_postprocess.cc,"@@ -309,14 +309,12 @@ void DequantizeBoxEncodings(const TfLiteTensor* input_box_encodings, int idx, template T ReInterpretTensor(const TfLiteTensor* tensor) { - // TODO (chowdhery): check float const float* tensor_base = GetTensorData(tensor); return reinterpret_cast(tensor_base); } template T ReInterpretTensor(TfLiteTensor* tensor) { - // TODO (chowdhery): check float float* tensor_base = GetTensorData(tensor); return reinterpret_cast(tensor_base); } @@ -791,7 +789,6 @@ TfLiteStatus NonMaxSuppressionMultiClass(TfLiteContext* context, } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - // TODO(chowdhery): Generalize for any batch size TF_LITE_ENSURE(context, (kBatchSize == 1)); // Set up scratch buffers @@ -837,17 +834,20 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // highest scoring non-overlapping boxes. TF_LITE_ENSURE_STATUS(NonMaxSuppressionMultiClass(context, node, op_data)); - // TODO(chowdhery): Generalize for any batch size - return kTfLiteOk; } } // namespace detection_postprocess TfLiteRegistration* Register_DETECTION_POSTPROCESS() { - static TfLiteRegistration r = { - detection_postprocess::Init, detection_postprocess::Free, - detection_postprocess::Prepare, detection_postprocess::Eval}; + static TfLiteRegistration r = {/*init=*/detection_postprocess::Init, + /*free=*/detection_postprocess::Free, + /*prepare=*/detection_postprocess::Prepare, + /*invoke=*/detection_postprocess::Eval, + /*profiling_string=*/nullptr, + /*builtin_code=*/0, + /*custom_name=*/nullptr, + /*version=*/0}; return &r; } ",0,test 6b1371de9389f90ed93c1d5db2112a10877b410b,tensorflow/tensorflow,"Remove legacy EagerContext constructor PiperOrigin-RevId: 253598769",context.cc,"@@ -53,14 +53,6 @@ bool ReadBoolFromEnvVar(StringPiece env_var_name, bool default_val) { } // namespace -EagerContext::EagerContext(const SessionOptions& opts, - ContextDevicePlacementPolicy default_policy, - bool async, - std::unique_ptr device_mgr, - Rendezvous* rendezvous) - : EagerContext(opts, default_policy, async, device_mgr.release(), - /*device_mgr_owned*/ true, rendezvous, nullptr) {} - EagerContext::EagerContext( const SessionOptions& opts, ContextDevicePlacementPolicy default_policy, bool async, const DeviceMgr* device_mgr, bool device_mgr_owned, ",0,train 6b1371de9389f90ed93c1d5db2112a10877b410b,tensorflow/tensorflow,"Remove legacy EagerContext constructor PiperOrigin-RevId: 253598769",context.h,"@@ -83,12 +83,6 @@ class RunMetadataListener { class EagerContext : public core::RefCounted { public: - // TODO: remove this constructor once we migrate all callers to the next one. - EagerContext(const SessionOptions& opts, - ContextDevicePlacementPolicy default_policy, bool async, - std::unique_ptr device_mgr, - Rendezvous* rendezvous); - EagerContext( const SessionOptions& opts, ContextDevicePlacementPolicy default_policy, bool async, const DeviceMgr* device_mgr, bool device_mgr_owned, @@ -97,7 +91,7 @@ class EagerContext : public core::RefCounted { std::function rendezvous_creator = nullptr, const DeviceMgr* remote_device_mgr = nullptr); - ~EagerContext(); + ~EagerContext() override; // Returns the function library runtime for the given device. FunctionLibraryRuntime* func_lib(const Device* d) const { ",0,train 6b1371de9389f90ed93c1d5db2112a10877b410b,tensorflow/tensorflow,"Remove legacy EagerContext constructor PiperOrigin-RevId: 253598769",delegate_data.cc,"@@ -37,7 +37,7 @@ tensorflow::Status DelegateData::Prepare( TF_RETURN_IF_ERROR(tensorflow::DeviceFactory::AddDevices( session_options, ""/job:localhost/replica:0/task:0"", &devices)); - std::unique_ptr device_mgr = + auto device_mgr = absl::make_unique(std::move(devices)); // Note that Rendezvous is ref-counted so it will be automatically deleted. tensorflow::Rendezvous* rendezvous = @@ -45,7 +45,8 @@ tensorflow::Status DelegateData::Prepare( eager_context_ = new tensorflow::EagerContext( session_options, tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, - /*async=*/false, std::move(device_mgr), rendezvous); + /*async=*/false, device_mgr.release(), /*device_mgr_owned*/ true, + rendezvous, nullptr); return tensorflow::Status(); } ",0,train 8eb015561b3a2f2e27f01617fd14d5a4f4b215bd,tensorflow/tensorflow,"[XLA] Sort vector of HloBuffer::Id values before uniqifying. Fix a latent bug in HLO dataflow analysis in the method Phi. When uniquifying a vector of HloBuffer:Id values, first std::sort it before calling std::erase and std::unique. PiperOrigin-RevId: 158888326",hlo_dataflow_analysis.cc,"@@ -409,6 +409,7 @@ InstructionValueSet HloDataflowAnalysis::Phi( input_value_ids.push_back(value_id); } } + std::sort(input_value_ids.begin(), input_value_ids.end()); input_value_ids.erase( std::unique(input_value_ids.begin(), input_value_ids.end()), input_value_ids.end()); ",0,train 4182ece77aae763f2acc07255c40279cbe3c587a,tensorflow/tensorflow,"improve compute high rank hessians (#15308) * fix possible compute high rank hessian fix possible compute high rank hessian * add high rank hessians unittest * fix retuning a shape of hessian \w test * fix use implicitly tensor shape * Space nearby operators. * fix to tensorflow style guide * fix to tensorflow style guide (Space nearby operators)",gradients_impl.py,"@@ -977,9 +977,7 @@ def hessians(ys, xs, name=""hessians"", colocate_gradients_with_ops=False, `hessians()` adds ops to the graph to output the Hessian matrix of `ys` with respect to `xs`. It returns a list of `Tensor` of length `len(xs)` - where each tensor is the Hessian of `sum(ys)`. This function currently - only supports evaluating the Hessian with respect to (a list of) one- - dimensional tensors. + where each tensor is the Hessian of `sum(ys)`. The Hessian is a matrix of second-order partial derivatives of a scalar tensor (see https://en.wikipedia.org/wiki/Hessian_matrix for more details). @@ -1005,31 +1003,32 @@ def hessians(ys, xs, name=""hessians"", colocate_gradients_with_ops=False, 'colocate_gradients_with_ops': colocate_gradients_with_ops, 'gate_gradients': gate_gradients, 'aggregation_method': aggregation_method - } + } # Compute first-order derivatives and iterate for each x in xs. hessians = [] _gradients = gradients(ys, xs, **kwargs) - for i, _gradient, x in zip(range(len(xs)), _gradients, xs): - # Ensure that x is a vector. - check_rank = check_ops.assert_rank( - x, 1, message='Cannot compute Hessian because element %d of `xs` does ' - 'not have rank one.' % i - ) - with ops.control_dependencies([check_rank]): - # Declare an iterator and tensor array loop variables for the gradients. - n = array_ops.size(x) - loop_vars = [ + for gradient, x in zip(_gradients, xs): + # change shape to one-dimension without graph branching + gradient = array_ops.reshape(gradient, [-1]) + + # Declare an iterator and tensor array loop variables for the gradients. + n = array_ops.size(x) + loop_vars = [ array_ops.constant(0, dtypes.int32), tensor_array_ops.TensorArray(x.dtype, n) - ] - # Iterate over all elements of the gradient and compute second order - # derivatives. - _, hessian = control_flow_ops.while_loop( - lambda j, _: j < n, - lambda j, result: (j + 1, - result.write(j, gradients(_gradient[j], x)[0])), - loop_vars - ) - - hessians.append(hessian.stack()) + ] + # Iterate over all elements of the gradient and compute second order + # derivatives. + _, hessian = control_flow_ops.while_loop( + lambda j, _: j < n, + lambda j, result: (j + 1, + result.write(j, gradients(gradient[j], x)[0])), + loop_vars + ) + + _shape = array_ops.shape(x) + _reshaped_hessian = array_ops.reshape( + hessian.stack(), array_ops.concat((_shape, _shape), 0) + ) + hessians.append(_reshaped_hessian) return hessians ",0,train 4182ece77aae763f2acc07255c40279cbe3c587a,tensorflow/tensorflow,"improve compute high rank hessians (#15308) * fix possible compute high rank hessian fix possible compute high rank hessian * add high rank hessians unittest * fix retuning a shape of hessian \w test * fix use implicitly tensor shape * Space nearby operators. * fix to tensorflow style guide * fix to tensorflow style guide (Space nearby operators)",gradients_test.py,"@@ -621,6 +621,45 @@ class HessianTest(test_util.TensorFlowTestCase): with self.assertRaises(ValueError): gradients.hessians(x, x) + def testHessian2D_square_matrix(self): + # Manually compute the Hessian explicitly for a low-dimensional problem + # and check that `hessian` matches. Specifically, the Hessian of + # f(x) = 1/2 * x^T * x is H = constant (block identity matrix) + m = 3 + rng = np.random.RandomState([1, 2, 3]) + x_value = rng.randn(m, m).astype(""float32"") + with self.test_session(use_gpu=True): + x = constant_op.constant(x_value) + x_square = math_ops.reduce_sum( + math_ops.matmul(array_ops.transpose(x), x) * 0.5 + ) + hess = gradients.hessians(x_square, x)[0] + hess_actual = hess.eval() + hess_value = np.bmat([ + [elem*np.ones((m, m)) for elem in vec] + for vec in np.eye(m) + ]).astype(""float32"") + self.assertAllEqual((m, m, m, m), hess_actual.shape) + self.assertAllClose(hess_value, hess_actual.reshape((m * m, m * m))) + + def testHessian2D_non_square_matrix(self): + m = 3 + n = 4 + rng = np.random.RandomState([1, 2, 3]) + x_value = rng.randn(m, n).astype(""float32"") + with self.test_session(use_gpu=True): + x = constant_op.constant(x_value) + x_square = math_ops.reduce_sum( + math_ops.matmul(array_ops.transpose(x), x) * 0.5 + ) + hess = gradients.hessians(x_square, x)[0] + hess_actual = hess.eval() + hess_value = np.bmat([ + [elem*np.ones((n, n)) for elem in vec] + for vec in np.eye(m) + ]).astype(""float32"") + self.assertAllEqual((m, n, m, n), hess_actual.shape) + self.assertAllClose(hess_value, hess_actual.reshape((m * n, m * n))) @test_util.with_c_api class IndexedSlicesToTensorTest(test_util.TensorFlowTestCase): ",0,train 9f42ebd5982688511ecc0ef7d23de02b64d8dd1e,tensorflow/tensorflow,"Improve error messages and doc strings for eager-mode tf.keras.Model.fit() + tf.data objects - Previously, when validation_steps was missing, the error message incorrectly says ""please provide either batch_size or steps_per_epoch"". Now it reads ""please provide either batch_size or validation_steps"". - Some whitespace-related fixes. PiperOrigin-RevId: 215503991",training.py,"@@ -1419,6 +1419,8 @@ class Model(Network): - tuple `(x_val, y_val)` of Numpy arrays or tensors - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays - dataset or a dataset iterator + For the first two cases, `batch_size` must be provided. + For the last case, `validation_steps` must be provided. shuffle: Boolean (whether to shuffle the training data before each epoch) or str (for 'batch'). 'batch' is a special option for dealing with the @@ -1454,9 +1456,10 @@ class Model(Network): TensorFlow data tensors, the default `None` is equal to the number of samples in your dataset divided by the batch size, or 1 if that cannot be determined. - validation_steps: Only relevant if `steps_per_epoch` - is specified. Total number of steps (batches of samples) - to validate before stopping. + validation_steps: Only relevant if `validation_data` is provided and + is a dataset or dataset iterator. Total number of steps (batches of + samples) to draw before stopping when performing validation + at the end of every epoch. max_queue_size: Integer. Used for generator or `keras.utils.Sequence` input only. Maximum size for the generator queue. If unspecified, `max_queue_size` will default to 10. ",0,test 9f42ebd5982688511ecc0ef7d23de02b64d8dd1e,tensorflow/tensorflow,"Improve error messages and doc strings for eager-mode tf.keras.Model.fit() + tf.data objects - Previously, when validation_steps was missing, the error message incorrectly says ""please provide either batch_size or steps_per_epoch"". Now it reads ""please provide either batch_size or validation_steps"". - Some whitespace-related fixes. PiperOrigin-RevId: 215503991",training_eager.py,"@@ -739,7 +739,8 @@ def test_loop(model, inputs, targets, y=targets, sample_weights=sample_weights, batch_size=batch_size, - steps_per_epoch=steps) + steps_per_epoch=steps, + is_validation=True) with backend.learning_phase_scope(0): return iterator_test_loop(model, inputs, steps, verbose=verbose) ",0,test 9f42ebd5982688511ecc0ef7d23de02b64d8dd1e,tensorflow/tensorflow,"Improve error messages and doc strings for eager-mode tf.keras.Model.fit() + tf.data objects - Previously, when validation_steps was missing, the error message incorrectly says ""please provide either batch_size or steps_per_epoch"". Now it reads ""please provide either batch_size or validation_steps"". - Some whitespace-related fixes. PiperOrigin-RevId: 215503991",training_eager_test.py,"@@ -125,6 +125,36 @@ class TrainingTest(test.TestCase): model.train_on_batch(inputs, targets) model.test_on_batch(inputs, targets) + def test_model_fit_and_validation_with_missing_arg_errors(self): + x = keras.layers.Input(shape=(3,), name='input') + y = keras.layers.Dense(4, name='dense')(x) + model = keras.Model(x, y) + model.compile(optimizer=RMSPropOptimizer(learning_rate=0.001), loss='mse') + + x = keras.backend.zeros(shape=(10, 3)) + y = keras.backend.zeros(shape=(10, 4)) + dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).repeat(10).batch(5) + iterator = dataset.make_one_shot_iterator() + validation_dataset = dataset_ops.Dataset.from_tensor_slices( + (x, y)).repeat(10).batch(5) + validation_iterator = validation_dataset.make_one_shot_iterator() + + with self.assertRaisesRegexp( + ValueError, r'specify .* `steps_per_epoch`'): + model.fit(iterator, epochs=1, verbose=0) + with self.assertRaisesRegexp( + ValueError, r'provide either `batch_size` or `validation_steps`'): + model.fit(iterator, steps_per_epoch=2, epochs=1, verbose=0, + validation_data=(x, y)) + with self.assertRaisesRegexp( + ValueError, r'provide either `batch_size` or `validation_steps`'): + model.fit(iterator, steps_per_epoch=2, epochs=1, verbose=0, + validation_data=validation_dataset) + with self.assertRaisesRegexp( + ValueError, r'provide either `batch_size` or `validation_steps`'): + model.fit(iterator, steps_per_epoch=2, epochs=1, verbose=0, + validation_data=validation_iterator) + def test_generator_methods(self): model = keras.Sequential() model.add(keras.layers.Dense(4, input_shape=(3,))) ",0,test 9f42ebd5982688511ecc0ef7d23de02b64d8dd1e,tensorflow/tensorflow,"Improve error messages and doc strings for eager-mode tf.keras.Model.fit() + tf.data objects - Previously, when validation_steps was missing, the error message incorrectly says ""please provide either batch_size or steps_per_epoch"". Now it reads ""please provide either batch_size or validation_steps"". - Some whitespace-related fixes. PiperOrigin-RevId: 215503991",training_utils.py,"@@ -106,7 +106,8 @@ def convert_to_iterator(x=None, batch_size=None, steps_per_epoch=None, epochs=1, - shuffle=False): + shuffle=False, + is_validation=False): """"""Converts NumPy arrays or EagerTensors to an EagerIterator. Combines all provided data into a single EagerIterator. @@ -124,6 +125,9 @@ def convert_to_iterator(x=None, epoch. epochs: Epochs to repeat iterator for. shuffle: Whether to shuffle data after each epoch. + is_validation: Whether this call is for validation during a training + (e.g., `fit()`) call. This info is used to construct error messages + (if any). Raises: ValueError: if steps_per_epoch cannot be calculated from the data @@ -151,9 +155,12 @@ def convert_to_iterator(x=None, steps_per_epoch = int(math.ceil(num_samples / batch_size)) if steps_per_epoch is None: - raise ValueError('Could not determine steps_per_epoch.' - 'Please provide either batch_size or' - 'steps_per_epoch.') + alternative_arg_name = ( + 'validation_steps' if is_validation else 'steps_per_epoch') + raise ValueError( + 'Could not determine how to convert EagerTensors into EagerIterator. ' + 'Please provide either `batch_size` or ' + '`%s`.' % alternative_arg_name) # TODO(omalleyt) for NumPy arrays in graph mode # placeholder ops should be used ",0,test a13b9d4d58e311729de3a967d8c780a95f6691ae,tensorflow/tensorflow,NFC following code review. comment typo and rename unroll to unrollInnerTileLoop.,ir_emitter_unnested.cc,"@@ -1897,7 +1897,7 @@ bool MayPreventVectorization(const HloInstruction& hlo) { return false; } } else if (hlo.opcode() == HloOpcode::kReduce) { - // TODO: check if the to_apply() attribute contain instruction + // TODO: check if the to_apply() attribute contains instruction // that break LLVM vectorization. return false; } @@ -1942,17 +1942,17 @@ static llvm::Value* GetStartOffsetX(const KernelMappingScheme& mapping_scheme, return b->CreateMul(thread_id_x, constant(x_num_steps)); } -// This function calls emit_elem_function() x_num_steps times. If -// vector_size==1, then each element index passed to -// emit_elem_function() will be separated by step_x. If vector_size>1, -// then it must be a multiple of x_num_steps. In that case, it +// Calls `emit_elem_function()` `x_num_steps` times. If +// `vector_size`==1, then each element index passed to +// `emit_elem_function()` will be separated by `step_x`. If `vector_size`>1, +// then it must be a multiple of `x_num_steps`. In that case, it // triggers a different indexing order that is vectorizable by -// LLVM. It generates many groups of calls to emit_elem_function. Each -// group is separated by step_x elements. Inside a group, elements -// are consecutive. If check_x_tile_bounds is true, then it will check -// if the element index is in bound compared to tile_width before -// calling emit_elem_function. -static void Unroll( +// LLVM. It generates many groups of calls to `emit_elem_function`. Each +// group is separated by `step_x` elements. Inside a group, elements +// are consecutive. If `check_x_tile_bounds` is true, then it will check +// if the element index is in bound compared to `tile_width` before +// calling `emit_elem_function`. +static void UnrollInnerTileLoop( bool check_x_tile_bounds, int64 x_num_steps, int64 step_x, int64 vector_size, const string& loop_name, KernelSupportLibrary* ksl, llvm::Value* start_offset_x, llvm::Value* y_loc, llvm::Value* tile_width, @@ -2035,38 +2035,39 @@ void IrEmitterUnnested::EmitTile( // // TODO(cheshire): Once ptxas is fixed and TF switches to it, remove the // workaround. - ksl->For(loop_name + ""_y_in_tile"", - /*start=*/constant(0), - /*end=*/ - ceil_of_ratio(b_.CreateSub(tile_height, thread_id_info.thread_id_y), - num_threads_y), - /*step=*/constant(1), [&](llvm::Value* y_indvar) { - llvm::Value* y_loc = - b_.CreateAdd(thread_id_info.thread_id_y, - b_.CreateMul(y_indvar, num_threads_y)); - auto unroll = [&](bool check_x_tile_bounds) { - return Unroll(check_x_tile_bounds, x_num_steps, step_x, - vector_size, loop_name, ksl, start_offset_x, y_loc, - tile_width, source_idx, b_, &emit_elem_function); - }; - - // Only take this path when we unroll in a way vectorizable by - // LLVM. Special case when the tile doesn't fit completely for even - // row size. For odd row size every other row isn't aligned to the - // vectorized size, so it can't be vectorized by LLVM. - if (!x_tile_fits && - mapping_scheme.GetIndexingOrder() == kLinearStridedIndexingX) { - ksl->If(loop_name + ""_is_full_tile"", - // For the last block, tile_width will be the number of - // elements left. - b_.CreateICmpEQ(constant(mapping_scheme.GetTileSizeX()), - tile_width), - [&] { unroll(/*check_x_tile_bounds=*/false); }, - [&] { unroll(/*check_x_tile_bounds=*/true); }); - } else { - unroll(/*check_x_tile_bounds=*/!x_tile_fits); - } - }); + ksl->For( + loop_name + ""_y_in_tile"", + /*start=*/constant(0), + /*end=*/ + ceil_of_ratio(b_.CreateSub(tile_height, thread_id_info.thread_id_y), + num_threads_y), + /*step=*/constant(1), [&](llvm::Value* y_indvar) { + llvm::Value* y_loc = b_.CreateAdd( + thread_id_info.thread_id_y, b_.CreateMul(y_indvar, num_threads_y)); + auto unrollInnerTileLoop = [&](bool check_x_tile_bounds) { + return UnrollInnerTileLoop(check_x_tile_bounds, x_num_steps, step_x, + vector_size, loop_name, ksl, + start_offset_x, y_loc, tile_width, + source_idx, b_, &emit_elem_function); + }; + + // Only take this path when we unroll in a way vectorizable by + // LLVM. Special case when the tile doesn't fit completely for even + // row size. For odd row size every other row isn't aligned to the + // vectorized size, so it can't be vectorized by LLVM. + if (!x_tile_fits && + mapping_scheme.GetIndexingOrder() == kLinearStridedIndexingX) { + ksl->If(loop_name + ""_is_full_tile"", + // For the last block, tile_width will be the number of + // elements left. + b_.CreateICmpEQ(constant(mapping_scheme.GetTileSizeX()), + tile_width), + [&] { unrollInnerTileLoop(/*check_x_tile_bounds=*/false); }, + [&] { unrollInnerTileLoop(/*check_x_tile_bounds=*/true); }); + } else { + unrollInnerTileLoop(/*check_x_tile_bounds=*/!x_tile_fits); + } + }); } // Emits code to process a tensor element in a tile for the given kCopy HLO that ",0,train 342f6b571f261da303969e0d2da275661d93955a,tensorflow/tensorflow,"0 Hz is now accepted as the lower frequency limit for the MFCC filterbank. PiperOrigin-RevId: 170594836",mfcc_mel_filterbank.cc,"@@ -62,8 +62,8 @@ bool MfccMelFilterbank::Initialize(int input_length, return false; } - if (lower_frequency_limit <= 0) { - LOG(ERROR) << ""Lower frequency limit must be positive.""; + if (lower_frequency_limit < 0) { + LOG(ERROR) << ""Lower frequency limit must be nonnegative.""; return false; } ",0,train 9810da1b87aae689cac42bae754e7e4cb5a99d57,tensorflow/tensorflow,"Adds XLA support for tf.nn.dynamic_rnn Changes tf.nn.dynamic_rnn to specify `maximum_iterations` argument for the while_loop. When `maximum_iterations` argument is supplied to tf.while_loop, use this to provide an upper bound on the size of Stacks used for gradient computation. By specifying the stack limit we can generate gradient code for while loops that uses fixed shape TensorArrays and hence can be compiled with XLA. PiperOrigin-RevId: 178802710",control_flow_ops.py,"@@ -748,22 +748,26 @@ class GradLoopState(object): outer_grad_ctxt = outer_grad_state.grad_context outer_grad_ctxt.Enter() - self._grad_context = WhileContext(forward_ctxt.parallel_iterations, - forward_ctxt.back_prop, - forward_ctxt.swap_memory, - forward_ctxt.name, - self) + self._grad_context = WhileContext( + maximum_iterations=forward_ctxt.maximum_iterations, + parallel_iterations=forward_ctxt.parallel_iterations, + back_prop=forward_ctxt.back_prop, + swap_memory=forward_ctxt.swap_memory, + name=forward_ctxt.name, + grad_state=self) real_cnt = outer_grad_state.AddBackpropAccumulatedValue(history_cnt, cnt) self._grad_index = self._grad_context.AddBackpropLoopCounter( real_cnt, outer_grad_state) outer_grad_ctxt.Exit() else: if outer_forward_ctxt: outer_forward_ctxt.Enter() - self._grad_context = WhileContext(forward_ctxt.parallel_iterations, - forward_ctxt.back_prop, - forward_ctxt.swap_memory, - forward_ctxt.name, - self) + self._grad_context = WhileContext( + maximum_iterations=forward_ctxt.maximum_iterations, + parallel_iterations=forward_ctxt.parallel_iterations, + back_prop=forward_ctxt.back_prop, + swap_memory=forward_ctxt.swap_memory, + name=forward_ctxt.name, + grad_state=self) self._grad_index = self._grad_context.AddBackpropLoopCounter( cnt, outer_grad_state) if outer_forward_ctxt: outer_forward_ctxt.Exit() @@ -893,9 +897,14 @@ class GradLoopState(object): with ops.control_dependencies(None): if curr_ctxt: curr_ctxt.Enter() with ops.colocate_with(value): + maximum_iterations = self.forward_context.maximum_iterations + if maximum_iterations is None: + maximum_iterations = constant_op.constant(-1, dtypes.int32) # pylint: disable=protected-access - acc = gen_data_flow_ops._stack_v2(-1, value.dtype.base_dtype, - name=""f_acc"") + acc = gen_data_flow_ops._stack_v2( + max_size=maximum_iterations, + elem_type=value.dtype.base_dtype, + name=""f_acc"") # pylint: enable=protected-access if curr_ctxt: curr_ctxt.Exit() @@ -1767,6 +1776,7 @@ def _UnpackIfSingleton(res): return res +# pylint: disable=redefined-outer-name # pylint: disable=g-doc-args @deprecation.deprecated_args( None, @@ -1943,6 +1953,7 @@ def cond(pred, true_fn=None, false_fn=None, strict=False, name=None, merges = _UnpackIfSingleton(merges) return merges # pylint: enable=g-doc-args +# pylint: enable=redefined-outer-name def _resource_safe_shape(t): @@ -1960,12 +1971,19 @@ def _resource_safe_shape(t): class WhileContext(ControlFlowContext): """"""The context for the loop construct."""""" - def __init__(self, parallel_iterations=10, back_prop=True, swap_memory=False, - name=""while_context"", grad_state=None, context_def=None, + def __init__(self, + maximum_iterations=None, + parallel_iterations=10, + back_prop=True, + swap_memory=False, + name=""while_context"", + grad_state=None, + context_def=None, import_scope=None): """"""""Creates a `WhileContext`. Args: + maximum_iterations: Optional upper bound on number of loop iterations. parallel_iterations: The number of iterations allowed to run in parallel. back_prop: Whether backprop is enabled for this while loop. swap_memory: Whether GPU-CPU memory swap is enabled for this loop. @@ -1980,16 +1998,17 @@ class WhileContext(ControlFlowContext): self._init_from_proto(context_def, import_scope=import_scope) else: ControlFlowContext.__init__(self) - self._init_from_args(parallel_iterations, back_prop, swap_memory, - name) + self._init_from_args(maximum_iterations, parallel_iterations, back_prop, + swap_memory, name) # The gradient loop state. self._grad_state = grad_state - def _init_from_args(self, parallel_iterations, back_prop, swap_memory, - name): + def _init_from_args(self, maximum_iterations, parallel_iterations, back_prop, + swap_memory, name): """"""Creates a new `WhileContext` from arguments. Args: + maximum_iterations: Optional upper bound on number of loop iterations. parallel_iterations: The number of iterations allowed to run in parallel. back_prop: Whether backprop is enabled for this while loop. swap_memory: Whether GPU-CPU memory swap is enabled for this loop. @@ -2002,6 +2021,7 @@ class WhileContext(ControlFlowContext): raise ValueError(""`parallel_iterations` must be a positive integer: "" ""%s"" % parallel_iterations) self._name = ops.get_default_graph().unique_name(name) + self._maximum_iterations = maximum_iterations self._parallel_iterations = parallel_iterations self._back_prop = back_prop self._swap_memory = swap_memory @@ -2029,6 +2049,12 @@ class WhileContext(ControlFlowContext): g = ops.get_default_graph() self._name = ops.prepend_name_scope( context_def.context_name, import_scope) + if context_def.maximum_iterations_name: + self._maximum_iterations = g.as_graph_element( + ops.prepend_name_scope(context_def.maximum_iterations_name, + import_scope)) + else: + self._maximum_iterations = None self._parallel_iterations = context_def.parallel_iterations self._back_prop = context_def.back_prop self._swap_memory = context_def.swap_memory @@ -2056,6 +2082,11 @@ class WhileContext(ControlFlowContext): def name(self): return self._name + @property + def maximum_iterations(self): + """"""The maximum number of iterations that will be executed."""""" + return self._maximum_iterations + @property def parallel_iterations(self): """"""The number of iterations allowed to run in parallel."""""" @@ -2106,6 +2137,9 @@ class WhileContext(ControlFlowContext): context_def.context_name = ops.strip_name_scope( self.name, export_scope) context_def.parallel_iterations = self._parallel_iterations + if self._maximum_iterations is not None: + context_def.maximum_iterations_name = ops.strip_name_scope( + self._maximum_iterations.name, export_scope) context_def.back_prop = self._back_prop context_def.swap_memory = self._swap_memory context_def.pivot_for_pred_name = ops.strip_name_scope( @@ -2724,6 +2758,7 @@ class WhileContext(ControlFlowContext): return True +# pylint: disable=redefined-outer-name def while_loop(cond, body, loop_vars, shape_invariants=None, parallel_iterations=10, back_prop=True, swap_memory=False, name=None, maximum_iterations=None): @@ -2889,13 +2924,18 @@ def while_loop(cond, body, loop_vars, shape_invariants=None, shape_invariants = (tensor_shape.TensorShape([]), shape_invariants) nest.assert_same_structure(loop_vars, shape_invariants) - loop_context = WhileContext(parallel_iterations, back_prop, swap_memory) # pylint: disable=redefined-outer-name + loop_context = WhileContext( + maximum_iterations=maximum_iterations, + parallel_iterations=parallel_iterations, + back_prop=back_prop, + swap_memory=swap_memory) ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context) result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants) if maximum_iterations is not None: return result[1] else: return result +# pylint: enable=redefined-outer-name def _AsTensorList(x, p): ",0,train 9810da1b87aae689cac42bae754e7e4cb5a99d57,tensorflow/tensorflow,"Adds XLA support for tf.nn.dynamic_rnn Changes tf.nn.dynamic_rnn to specify `maximum_iterations` argument for the while_loop. When `maximum_iterations` argument is supplied to tf.while_loop, use this to provide an upper bound on the size of Stacks used for gradient computation. By specifying the stack limit we can generate gradient code for while loops that uses fixed shape TensorArrays and hence can be compiled with XLA. PiperOrigin-RevId: 178802710",control_flow_ops_test.py,"@@ -452,18 +452,25 @@ class ContextTest(test_util.TensorFlowTestCase): c.to_proto(), control_flow_ops.CondContext.from_proto(c.to_proto()).to_proto()) - def testWhileContext(self): + def _testWhileContextHelper(self, maximum_iterations=None): with self.test_session() as sess: i = constant_op.constant(0) c = lambda i: math_ops.less(i, 10) b = lambda i: math_ops.add(i, 1) - control_flow_ops.while_loop(c, b, [i]) + control_flow_ops.while_loop( + c, b, [i], maximum_iterations=maximum_iterations) for op in sess.graph.get_operations(): - c = op._get_control_flow_context() - if c: - self.assertProtoEquals( - c.to_proto(), - control_flow_ops.WhileContext.from_proto(c.to_proto()).to_proto()) + context = op._get_control_flow_context() + if context: + self.assertProtoEquals(context.to_proto(), + control_flow_ops.WhileContext.from_proto( + context.to_proto()).to_proto()) + + def testWhileContext(self): + self._testWhileContextHelper() + + def testWhileContextWithMaximumIterations(self): + self._testWhileContextHelper(maximum_iterations=10) def testControlContextImportScope(self): with self.test_session(): ",0,train 9810da1b87aae689cac42bae754e7e4cb5a99d57,tensorflow/tensorflow,"Adds XLA support for tf.nn.dynamic_rnn Changes tf.nn.dynamic_rnn to specify `maximum_iterations` argument for the while_loop. When `maximum_iterations` argument is supplied to tf.while_loop, use this to provide an upper bound on the size of Stacks used for gradient computation. By specifying the stack limit we can generate gradient code for while loops that uses fixed shape TensorArrays and hence can be compiled with XLA. PiperOrigin-RevId: 178802710",rnn.py,"@@ -665,7 +665,7 @@ def _dynamic_rnn_loop(cell, final_outputs: A `Tensor` of shape `[time, batch_size, cell.output_size]`. If `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape` - objects, then this returns a (possibly nsted) tuple of Tensors matching + objects, then this returns a (possibly nested) tuple of Tensors matching the corresponding shapes. final_state: A `Tensor`, or possibly nested tuple of Tensors, matching in length @@ -806,11 +806,17 @@ def _dynamic_rnn_loop(cell, return (time + 1, output_ta_t, new_state) + # TODO(pbar) `loop_bound` can be reduced to `max_sequence_length` once + # TensorArray shape inference is working. When sequence lengths are highly + # variable, this will reduce the performance overheads of padding to a fixed + # maximum length. + loop_bound = time_steps _, output_final_ta, final_state = control_flow_ops.while_loop( - cond=lambda time, *_: time < time_steps, + cond=lambda time, *_: time < loop_bound, body=_time_step, loop_vars=(time, output_ta, state), parallel_iterations=parallel_iterations, + maximum_iterations=time_steps, swap_memory=swap_memory) # Unpack final output if not using output tuples. ",0,train 9810da1b87aae689cac42bae754e7e4cb5a99d57,tensorflow/tensorflow,"Adds XLA support for tf.nn.dynamic_rnn Changes tf.nn.dynamic_rnn to specify `maximum_iterations` argument for the while_loop. When `maximum_iterations` argument is supplied to tf.while_loop, use this to provide an upper bound on the size of Stacks used for gradient computation. By specifying the stack limit we can generate gradient code for while loops that uses fixed shape TensorArrays and hence can be compiled with XLA. PiperOrigin-RevId: 178802710",model_analyzer_test.py,"@@ -230,12 +230,12 @@ class PrintModelAnalysisTest(test.TestCase): with gfile.Open(outfile, 'r') as f: lines = f.read().split('\n') result = '\n'.join([l[:min(len(l), 80)] for l in lines]) - self.assertEqual(compat.as_bytes('node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/168.85k flops)\n model_analyzer_testlib.py:63:BuildFullModel (0/1.80k params, 0/45.37k flops)\n model_analyzer_testlib.py:40:BuildSmallModel (0/0 params, 0/0 flops)\n model_analyzer_testlib.py:44:BuildSmallModel (0/4 params, 0/8 flops)\n model_analyzer_testlib.py:48:BuildSmallModel (0/648 params, 0/1.30k flops)\n model_analyzer_testlib.py:49:BuildSmallModel (0/0 params, 0/23.33k flops)\n model_analyzer_testlib.py:53:BuildSmallModel (0/1.15k params, 0/2.30k flops)\n model_analyzer_testlib.py:54:BuildSmallModel (0/0 params, 0/18.43k flops)\n model_analyzer_testlib.py:63:BuildFullModel (gradient) (0/0 params, 0/67.39k f\n model_analyzer_testlib.py:49:BuildSmallModel (gradient) (0/0 params, 0/46.66\n model_analyzer_testlib.py:54:BuildSmallModel (gradient) (0/0 params, 0/20.74\n model_analyzer_testlib.py:67:BuildFullModel (0/1.04k params, 0/18.57k flops)\n model_analyzer_testlib.py:67:BuildFullModel (gradient) (0/0 params, 0/37.00k f\n model_analyzer_testlib.py:69:BuildFullModel (0/0 params, 0/0 flops)\n model_analyzer_testlib.py:70:BuildFullModel (0/0 params, 0/258 flops)\n model_analyzer_testlib.py:70:BuildFullModel (gradient) (0/0 params, 0/129 flop\n model_analyzer_testlib.py:72:BuildFullModel (0/0 params, 0/141 flops)\n'), + self.assertEqual(compat.as_bytes('node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/168.86k flops)\n model_analyzer_testlib.py:63:BuildFullModel (0/1.80k params, 0/45.37k flops)\n model_analyzer_testlib.py:40:BuildSmallModel (0/0 params, 0/0 flops)\n model_analyzer_testlib.py:44:BuildSmallModel (0/4 params, 0/8 flops)\n model_analyzer_testlib.py:48:BuildSmallModel (0/648 params, 0/1.30k flops)\n model_analyzer_testlib.py:49:BuildSmallModel (0/0 params, 0/23.33k flops)\n model_analyzer_testlib.py:53:BuildSmallModel (0/1.15k params, 0/2.30k flops)\n model_analyzer_testlib.py:54:BuildSmallModel (0/0 params, 0/18.43k flops)\n model_analyzer_testlib.py:63:BuildFullModel (gradient) (0/0 params, 0/67.39k f\n model_analyzer_testlib.py:49:BuildSmallModel (gradient) (0/0 params, 0/46.66\n model_analyzer_testlib.py:54:BuildSmallModel (gradient) (0/0 params, 0/20.74\n model_analyzer_testlib.py:67:BuildFullModel (0/1.04k params, 0/18.58k flops)\n model_analyzer_testlib.py:67:BuildFullModel (gradient) (0/0 params, 0/37.00k f\n model_analyzer_testlib.py:69:BuildFullModel (0/0 params, 0/0 flops)\n model_analyzer_testlib.py:70:BuildFullModel (0/0 params, 0/258 flops)\n model_analyzer_testlib.py:70:BuildFullModel (gradient) (0/0 params, 0/129 flop\n model_analyzer_testlib.py:72:BuildFullModel (0/0 params, 0/141 flops)\n'), compat.as_bytes(result)) self.assertLess(0, tfprof_node.total_exec_micros) self.assertEqual(2844, tfprof_node.total_parameters) - self.assertEqual(168854, tfprof_node.total_float_ops) + self.assertEqual(168863, tfprof_node.total_float_ops) self.assertEqual(8, len(tfprof_node.children)) self.assertEqual('_TFProfRoot', tfprof_node.name) self.assertEqual( ",0,train 6c1737e6c8c9e5db405853178fb5e42abc080ba3,tensorflow/tensorflow,contrib/factorization: minor spelling tweaks (#17992),clustering_ops.cc,"@@ -353,7 +353,7 @@ class NearestNeighborsOp : public OpKernel { auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads()); const int64 num_threads = worker_threads.num_threads; // This kernel might be configured to use fewer than the total number of - // available CPUs on the host machine. To avoid descructive interference + // available CPUs on the host machine. To avoid destructive interference // with other jobs running on the host machine, we must only use a fraction // of total available L3 cache. Unfortunately, we cannot query the host // machine to get the number of physical CPUs. So, we use a fixed per-CPU ",0,train 6c1737e6c8c9e5db405853178fb5e42abc080ba3,tensorflow/tensorflow,contrib/factorization: minor spelling tweaks (#17992),factorization_ops.py,"@@ -106,7 +106,7 @@ class WALSModel(object): # the prep_gramian_op for row(column) can be run. worker_init_op = model.worker_init - # To be run once per interation sweep before the row(column) update + # To be run once per integration sweep before the row(column) update # initialize ops can be run. Note that in the distributed training # situations, this should only be run by the chief trainer. All other # trainers need to block until this is done. @@ -118,9 +118,9 @@ class WALSModel(object): init_row_update_op = model.initialize_row_update_op init_col_update_op = model.initialize_col_update_op - # Ops to upate row(column). This can either take the entire sparse tensor - # or slices of sparse tensor. For distributed trainer, each trainer - # handles just part of the matrix. + # Ops to update row(column). This can either take the entire sparse + # tensor or slices of sparse tensor. For distributed trainer, each + # trainer handles just part of the matrix. _, row_update_op, unreg_row_loss, row_reg, _ = model.update_row_factors( sp_input=matrix_slices_from_queue_for_worker_shard) row_loss = unreg_row_loss + row_reg @@ -220,7 +220,7 @@ class WALSModel(object): in the form of [[w_0, w_1, ...], [w_k, ... ], [...]], with the number of inner lists matching the number of row factor shards and the elements in each inner list are the weights for the rows of the corresponding row - factor shard. In this case, w_ij = unonbserved_weight + + factor shard. In this case, w_ij = unobserved_weight + row_weights[i] * col_weights[j]. - If this is a single non-negative real number, this value is used for all row weights and w_ij = unobserved_weight + row_weights * @@ -435,7 +435,7 @@ class WALSModel(object): gramian: Variable storing the gramian calculated from the factors. Returns: - A op that updates the gramian with the calcuated value from the factors. + A op that updates the gramian with the calculated value from the factors. """""" partial_gramians = [] for f in factors: @@ -564,7 +564,7 @@ class WALSModel(object): Note that specifically this initializes the cache of the row and column weights on workers when `use_factors_weights_cache` is True. In this case, - if these weights are being calcualted and reset after the object is created, + if these weights are being calculated and reset after the object is created, it is important to ensure this ops is run afterwards so the cache reflects the correct values. """""" ",0,train 6c1737e6c8c9e5db405853178fb5e42abc080ba3,tensorflow/tensorflow,contrib/factorization: minor spelling tweaks (#17992),factorization_ops_test.py,"@@ -210,7 +210,7 @@ class WalsModelTest(test.TestCase): # Test row projection. # Using the specified projection weights for the 2 row feature vectors. - # This is expected to reprodue the same row factors in the model as the + # This is expected to reproduce the same row factors in the model as the # weights and feature vectors are identical to that used in model # training. projected_rows = wals_model.project_row_factors( @@ -283,7 +283,7 @@ class WalsModelTest(test.TestCase): # Test column projection. # Using the specified projection weights for the 3 column feature vectors. - # This is expected to reprodue the same column factors in the model as the + # This is expected to reproduce the same column factors in the model as the # weights and feature vectors are identical to that used in model # training. projected_cols = wals_model.project_col_factors( @@ -385,7 +385,7 @@ class WalsModelTest(test.TestCase): # Test row projection. # Using the specified projection weights for the 2 row feature vectors. - # This is expected to reprodue the same row factors in the model as the + # This is expected to reproduce the same row factors in the model as the # weights and feature vectors are identical to that used in model # training. projected_rows = wals_model.project_row_factors( @@ -462,7 +462,7 @@ class WalsModelTest(test.TestCase): # Test column projection. # Using the specified projection weights for the 2 column feature vectors. - # This is expected to reprodue the same column factors in the model as the + # This is expected to reproduce the same column factors in the model as the # weights and feature vectors are identical to that used in model # training. projected_cols = wals_model.project_col_factors( ",0,train 6c1737e6c8c9e5db405853178fb5e42abc080ba3,tensorflow/tensorflow,contrib/factorization: minor spelling tweaks (#17992),gmm_ops.py,"@@ -280,7 +280,7 @@ class GmmAlgorithm(object): self._define_score_samples() def _define_full_covariance_probs(self, shard_id, shard): - """"""Defines the full covariance probabilties per example in a class. + """"""Defines the full covariance probabilities per example in a class. Updates a matrix with dimension num_examples X num_classes. @@ -344,7 +344,7 @@ class GmmAlgorithm(object): def _define_prior_log_prob_operation(self, shard_id): """"""Computes the prior probability of all samples. - Updates a vector where each item is the prior probabibility of an + Updates a vector where each item is the prior probability of an input example. Args: ",0,train 6c1737e6c8c9e5db405853178fb5e42abc080ba3,tensorflow/tensorflow,contrib/factorization: minor spelling tweaks (#17992),gmm_test.py,"@@ -210,7 +210,7 @@ class GMMTestQueues(test.TestCase): return _fn # This test makes sure that there are no deadlocks when using a QueueRunner. - # Note that since cluster initialization is dependendent on inputs, if input + # Note that since cluster initialization is dependent on inputs, if input # is generated using a QueueRunner, one has to make sure that these runners # are started before the initialization. def test_queues(self): ",0,train 6c1737e6c8c9e5db405853178fb5e42abc080ba3,tensorflow/tensorflow,contrib/factorization: minor spelling tweaks (#17992),kmeans_test.py,"@@ -413,7 +413,7 @@ class KMeansCosineDistanceTest(KMeansTestBase): self.assertAllClose(score, self.true_score, atol=1e-2) def test_predict_kmeans_plus_plus(self): - # Most points are concetrated near one center. KMeans++ is likely to find + # Most points are concentrated near one center. KMeans++ is likely to find # the less populated centers. points = np.array( [[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3], [-3.1, -3.2], @@ -604,7 +604,7 @@ class KMeansTestQueues(test.TestCase): return _fn # This test makes sure that there are no deadlocks when using a QueueRunner. - # Note that since cluster initialization is dependendent on inputs, if input + # Note that since cluster initialization is dependent on inputs, if input # is generated using a QueueRunner, one has to make sure that these runners # are started before the initialization. def test_queues(self): ",0,train 6c1737e6c8c9e5db405853178fb5e42abc080ba3,tensorflow/tensorflow,contrib/factorization: minor spelling tweaks (#17992),wals.py,"@@ -235,7 +235,7 @@ def _wals_factorization_model_function(features, labels, mode, params): num_items: An integer, the total number of items of this axis. update_fn: A function that takes one argument (`sp_input`), and that returns a tuple of - * new_factors: A flot Tensor of the factor values after update. + * new_factors: A float Tensor of the factor values after update. * update_op: a TensorFlow op which updates the factors. * loss: A float Tensor, the unregularized loss. * reg_loss: A float Tensor, the regularization loss. ",0,train fd73284e085ab9b969775b7a71ac9b7dd8d8d2b6,tensorflow/tensorflow,"Use eagertensor dtype when inferring dtypes. Also pass tensorflow dtype to PySeqToTensor. When inferring types from other tensors for converttotensor in the eager fastpath, I was seeing calls to the python EagerTensor method dtype(). This change makes it get the dtype from the EagerTensor in C when possible instead of calling back into python. PiperOrigin-RevId: 243681547",benchmarks_test.py,"@@ -917,6 +917,14 @@ class MicroBenchmarks(test.Benchmark): self._run(scan, 100) + def benchmark_fastpath_conversion_type_inference(self): + c = constant_op.constant(1., dtype=dtypes.float32) + + def fn(): + return gen_math_ops.add(c, 1) + + self._run(fn, 10000) + def _benchmarkFunctionWithResourceInputs(self, num_resources, num_iters): @def_function.function def add_all(*args): ",0,test fd73284e085ab9b969775b7a71ac9b7dd8d8d2b6,tensorflow/tensorflow,"Use eagertensor dtype when inferring dtypes. Also pass tensorflow dtype to PySeqToTensor. When inferring types from other tensors for converttotensor in the eager fastpath, I was seeing calls to the python EagerTensor method dtype(). This change makes it get the dtype from the EagerTensor in C when possible instead of calling back into python. PiperOrigin-RevId: 243681547",pywrap_tensor.cc,"@@ -13,24 +13,22 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include ""tensorflow/python/eager/pywrap_tensor.h"" + #include +#include ""structmember.h"" // NOLINT // For PyMemberDef +#include ""tensorflow/c/c_api.h"" +#include ""tensorflow/core/framework/types.h"" +#include ""tensorflow/core/framework/types.pb.h"" +#include ""tensorflow/core/lib/strings/strcat.h"" +#include ""tensorflow/python/eager/pywrap_tfe.h"" +#include ""tensorflow/python/lib/core/ndarray_tensor.h"" #include ""tensorflow/python/lib/core/ndarray_tensor_bridge.h"" #include ""tensorflow/python/lib/core/numpy.h"" #include ""tensorflow/python/lib/core/py_seq_tensor.h"" #include ""tensorflow/python/lib/core/safe_ptr.h"" -#include ""tensorflow/python/eager/pywrap_tensor.h"" -#include ""tensorflow/python/eager/pywrap_tfe.h"" - -#include ""tensorflow/c/c_api.h"" -#include ""tensorflow/core/lib/strings/strcat.h"" -#include ""tensorflow/python/lib/core/ndarray_tensor.h"" - -#include ""tensorflow/core/framework/types.h"" - -#include ""structmember.h"" // NOLINT // For PyMemberDef - // forward declare struct EagerTensor; @@ -106,19 +104,19 @@ TFE_TensorHandle* CopyToDevice(TFE_TensorHandle* handle, PyObject* ctx, return new_handle; } -// Helper function to convert `v` to an int and store it in `*out`. Returns true -// on success, false otherwise. +// Helper function to convert `v` to a tensorflow::DataType and store it in +// `*out`. Returns true on success, false otherwise. // Note that we assume that v is a python int (not long) representing a -// TF_DataType value. -bool PyIntToDataType(PyObject* v, int* out) { +// TF_DataType/tensorflow::DataType value. +bool PyIntToDataType(PyObject* v, tensorflow::DataType* out) { #if PY_MAJOR_VERSION < 3 if (PyInt_Check(v)) { - *out = PyInt_AS_LONG(v); + *out = static_cast(PyInt_AS_LONG(v)); return true; } #else if (PyLong_Check(v)) { - *out = PyLong_AsLong(v); + *out = static_cast(PyLong_AsLong(v)); return true; } #endif @@ -208,18 +206,8 @@ TFE_TensorHandle* EagerCast(TFE_Context* ctx, TFE_TensorHandle* handle, #undef RETURN_ERROR } -TFE_TensorHandle* ConvertToEagerTensor(PyObject* value, PyObject* dtype) { - int desired_dtype = -1; - if (dtype != Py_None) { - if (!PyIntToDataType(dtype, &desired_dtype)) { - PyErr_SetString(PyExc_TypeError, - tensorflow::strings::StrCat( - ""Expecting a DataType value for dtype. Got "", - Py_TYPE(dtype)->tp_name) - .c_str()); - return nullptr; - } - } +TFE_TensorHandle* ConvertToEagerTensor(PyObject* value, + tensorflow::DataType dtype) { tensorflow::Safe_PyObjectPtr value_decrefer; if (PyArray_IsScalar(value, Generic)) { // Convert numpy scalars to numpy arrays. @@ -230,14 +218,14 @@ TFE_TensorHandle* ConvertToEagerTensor(PyObject* value, PyObject* dtype) { } if (PyArray_Check(value)) { int desired_np_dtype = -1; - if (desired_dtype >= 0) { + if (dtype != tensorflow::DT_INVALID) { if (!tensorflow::TF_DataType_to_PyArray_TYPE( - static_cast(desired_dtype), &desired_np_dtype) + static_cast(dtype), &desired_np_dtype) .ok()) { - PyErr_SetString(PyExc_TypeError, - tensorflow::strings::StrCat( - ""Invalid dtype argument value "", desired_dtype) - .c_str()); + PyErr_SetString( + PyExc_TypeError, + tensorflow::strings::StrCat(""Invalid dtype argument value "", dtype) + .c_str()); return nullptr; } } @@ -402,7 +390,7 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) { } // Extract dtype - int desired_dtype = -1; + tensorflow::DataType desired_dtype = tensorflow::DT_INVALID; if (dtype != Py_None) { if (!PyIntToDataType(dtype, &desired_dtype)) { PyErr_SetString(PyExc_TypeError, @@ -416,10 +404,11 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) { PyErr_Clear(); tensorflow::Safe_TFE_TensorHandlePtr handle = tensorflow::make_safe(static_cast( - tensorflow::ConvertToEagerTensor(value, dtype))); + tensorflow::ConvertToEagerTensor(value, desired_dtype))); if (handle == nullptr) return -1; TF_DataType handle_dtype = TFE_TensorHandleDataType(handle.get()); - if (desired_dtype >= 0 && desired_dtype != handle_dtype) { + if (desired_dtype != tensorflow::DT_INVALID && + static_cast(desired_dtype) != handle_dtype) { // Check type compatibility. if (tensorflow::IsCompatible(desired_dtype, handle_dtype)) { handle = tensorflow::make_safe(tensorflow::EagerCast( ",0,test fd73284e085ab9b969775b7a71ac9b7dd8d8d2b6,tensorflow/tensorflow,"Use eagertensor dtype when inferring dtypes. Also pass tensorflow dtype to PySeqToTensor. When inferring types from other tensors for converttotensor in the eager fastpath, I was seeing calls to the python EagerTensor method dtype(). This change makes it get the dtype from the EagerTensor in C when possible instead of calling back into python. PiperOrigin-RevId: 243681547",pywrap_tensor.h,"@@ -27,7 +27,7 @@ tensorflow::int64 PyEagerTensor_NumElements(const PyObject* tensor); namespace tensorflow { bool IsCompatible(int desired_dtype, TF_DataType returned_dtype); -TFE_TensorHandle* ConvertToEagerTensor(PyObject* value, PyObject* dtype); +TFE_TensorHandle* ConvertToEagerTensor(PyObject* value, DataType dtype); // TODO(nareshmodi): Move EagerCast and ReadVariableOp (which use the C API to // execute TFE Ops) to a separate common library. ",0,test fd73284e085ab9b969775b7a71ac9b7dd8d8d2b6,tensorflow/tensorflow,"Use eagertensor dtype when inferring dtypes. Also pass tensorflow dtype to PySeqToTensor. When inferring types from other tensors for converttotensor in the eager fastpath, I was seeing calls to the python EagerTensor method dtype(). This change makes it get the dtype from the EagerTensor in C when possible instead of calling back into python. PiperOrigin-RevId: 243681547",pywrap_tfe_src.cc,"@@ -16,8 +16,6 @@ limitations under the License. #include #include -#include ""tensorflow/python/eager/pywrap_tfe.h"" - #include ""absl/strings/str_cat.h"" #include ""absl/types/variant.h"" #include ""tensorflow/c/c_api.h"" @@ -35,6 +33,7 @@ limitations under the License. #include ""tensorflow/core/platform/protobuf.h"" #include ""tensorflow/core/platform/types.h"" #include ""tensorflow/python/eager/pywrap_tensor.h"" +#include ""tensorflow/python/eager/pywrap_tfe.h"" #include ""tensorflow/python/lib/core/safe_ptr.h"" #include ""tensorflow/python/util/util.h"" @@ -1872,33 +1871,25 @@ bool CheckInputsOk(PyObject* seq, int start_index, return true; } -PyObject* MaybeGetDType(PyObject* item) { - if (EagerTensor_CheckExact(item)) { - tensorflow::Safe_PyObjectPtr py_dtype( - PyObject_GetAttrString(item, ""dtype"")); - return PyObject_GetAttrString(py_dtype.get(), ""_type_enum""); +tensorflow::DataType MaybeGetDType(PyObject* item) { + if (EagerTensor_CheckExact(item) || CheckResourceVariable(item)) { + return FastTensorDtype(item); } - if (CheckResourceVariable(item)) { - tensorflow::Safe_PyObjectPtr py_dtype( - PyObject_GetAttrString(item, ""_dtype"")); - return PyObject_GetAttrString(py_dtype.get(), ""_type_enum""); - } - - return nullptr; + return tensorflow::DT_INVALID; } -PyObject* MaybeGetDTypeForAttr(const string& attr, - FastPathOpExecInfo* op_exec_info) { +tensorflow::DataType MaybeGetDTypeForAttr(const string& attr, + FastPathOpExecInfo* op_exec_info) { auto cached_it = op_exec_info->cached_dtypes.find(attr); if (cached_it != op_exec_info->cached_dtypes.end()) { - return GetPythonObjectFromInt(cached_it->second); + return cached_it->second; } auto it = op_exec_info->attr_to_inputs_map->find(attr); if (it == op_exec_info->attr_to_inputs_map->end()) { // No other inputs - this should never happen. - Py_RETURN_NONE; + return tensorflow::DT_INVALID; } for (const auto& input_info : it->second) { @@ -1908,17 +1899,17 @@ PyObject* MaybeGetDTypeForAttr(const string& attr, tensorflow::Safe_PyObjectPtr fast_item( PySequence_Fast(item, ""Unable to allocate"")); for (int i = 0; i < PySequence_Fast_GET_SIZE(fast_item.get()); i++) { - auto* dtype = + auto dtype = MaybeGetDType(PySequence_Fast_GET_ITEM(fast_item.get(), i)); - if (dtype != nullptr) return dtype; + if (dtype != tensorflow::DT_INVALID) return dtype; } } else { - auto* dtype = MaybeGetDType(item); - if (dtype != nullptr) return dtype; + auto dtype = MaybeGetDType(item); + if (dtype != tensorflow::DT_INVALID) return dtype; } } - Py_RETURN_NONE; + return tensorflow::DT_INVALID; } // TODO(agarwal): use an automatic mechanism for handling None arguments to @@ -2310,9 +2301,9 @@ bool ConvertToTensor( const FastPathOpExecInfo& op_exec_info, PyObject* input, tensorflow::Safe_PyObjectPtr* output_handle, // This gets a hint for this particular input. - const std::function& dtype_hint_getter, + const std::function& dtype_hint_getter, // This sets the dtype after conversion is complete. - const std::function& dtype_setter, + const std::function& dtype_setter, TF_Status* status) { if (EagerTensor_CheckExact(input)) { Py_INCREF(input); @@ -2323,28 +2314,18 @@ bool ConvertToTensor( } // The hint comes from a supposedly similarly typed tensor. - tensorflow::Safe_PyObjectPtr dtype_hint(dtype_hint_getter()); - if (PyErr_Occurred()) { - return false; - } + tensorflow::DataType dtype_hint = dtype_hint_getter(); tensorflow::Safe_TFE_TensorHandlePtr handle = tensorflow::make_safe(static_cast( - tensorflow::ConvertToEagerTensor(input, dtype_hint.get()))); + tensorflow::ConvertToEagerTensor(input, dtype_hint))); if (handle == nullptr) { return MaybeRaiseExceptionFromTFStatus(status, nullptr); } int desired_dtype = -1; - if (dtype_hint.get() != Py_None) { - if (!ParseTypeValue("""", dtype_hint.get(), status, &desired_dtype)) { - PyErr_SetString(PyExc_TypeError, - tensorflow::strings::StrCat( - ""Expecting a DataType value for dtype. Got "", - Py_TYPE(dtype_hint.get())->tp_name) - .c_str()); - return false; - } + if (dtype_hint != tensorflow::DT_INVALID) { + desired_dtype = static_cast(dtype_hint); } // Maybe cast to the desired type. This is intended to match python @@ -2372,7 +2353,7 @@ bool ConvertToTensor( } output_handle->reset(EagerTensorFromHandle(handle.release())); - dtype_setter(output_dtype); + dtype_setter(static_cast(output_dtype)); return true; } @@ -2394,13 +2375,12 @@ bool AddInputToOp(FastPathOpExecInfo* op_exec_info, PyObject* input, *op_exec_info, input, &py_eager_tensor, [&]() { if (input_arg.type() != tensorflow::DataType::DT_INVALID) { - return GetPythonObjectFromInt(input_arg.type()); + return input_arg.type(); } return MaybeGetDTypeForAttr(input_arg.type_attr(), op_exec_info); }, - [&](const TF_DataType dtype) { - op_exec_info->cached_dtypes[input_arg.type_attr()] = - static_cast(dtype); + [&](const tensorflow::DataType dtype) { + op_exec_info->cached_dtypes[input_arg.type_attr()] = dtype; }, status)) { return false; @@ -2737,8 +2717,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { tensorflow::Safe_PyObjectPtr py_eager_tensor; if (!ConvertToTensor( op_exec_info, py_input, &py_eager_tensor, - []() { Py_RETURN_NONE; }, [](const TF_DataType& dtype) {}, - status)) { + []() { return tensorflow::DT_INVALID; }, + [](const tensorflow::DataType dtype) {}, status)) { return nullptr; } ",0,test fd73284e085ab9b969775b7a71ac9b7dd8d8d2b6,tensorflow/tensorflow,"Use eagertensor dtype when inferring dtypes. Also pass tensorflow dtype to PySeqToTensor. When inferring types from other tensors for converttotensor in the eager fastpath, I was seeing calls to the python EagerTensor method dtype(). This change makes it get the dtype from the EagerTensor in C when possible instead of calling back into python. PiperOrigin-RevId: 243681547",py_seq_tensor.cc,"@@ -490,16 +490,13 @@ DEFINE_HELPER(ConvertBool, bool, DT_BOOL, ConvertOneBool); return errors::InvalidArgument(_error); \ } while (0) -Status PySeqToTensor(PyObject* obj, PyObject* dtype, Tensor* ret) { +Status PySeqToTensor(PyObject* obj, DataType dtype, Tensor* ret) { DataType infer_dtype; TensorShape shape; TF_RETURN_IF_ERROR(InferShapeAndType(obj, &shape, &infer_dtype)); DataType requested_dtype = DT_INVALID; - if (dtype != Py_None) { - int32 dtype_as_int = -1; - if (ConvertOneInt32(dtype, &dtype_as_int) == nullptr) { - requested_dtype = static_cast(dtype_as_int); - } + if (dtype != DT_INVALID) { + requested_dtype = dtype; } // NOTE(josh11b): If don't successfully convert to the requested type, // we just try instead to create a tensor of the inferred type and ",0,test fd73284e085ab9b969775b7a71ac9b7dd8d8d2b6,tensorflow/tensorflow,"Use eagertensor dtype when inferring dtypes. Also pass tensorflow dtype to PySeqToTensor. When inferring types from other tensors for converttotensor in the eager fastpath, I was seeing calls to the python EagerTensor method dtype(). This change makes it get the dtype from the EagerTensor in C when possible instead of calling back into python. PiperOrigin-RevId: 243681547",py_seq_tensor.h,"@@ -30,7 +30,7 @@ namespace tensorflow { // representing the desired dtype of the resulting Tensor. // This is used only as a hint, *ret may not have that dtype on // success and may require a cast. -Status PySeqToTensor(PyObject* obj, PyObject* dtype, Tensor* ret); +Status PySeqToTensor(PyObject* obj, DataType dtype, Tensor* ret); } // namespace tensorflow ",0,test 3b167c42039dfbc70cec16ae2ac75c11870e1729,tensorflow/tensorflow,"Fix typo in docstring PiperOrigin-RevId: 251339076",loss_scale.py,"@@ -289,7 +289,7 @@ class DynamicLossScale(LossScale): initial_loss_scale: A Python float. The loss scale to use at the beginning. It's better to start this at a very high number, because a loss scale that is too high gets lowered far more quickly than a loss - scale that is to low gets raised. The default is 2 ** 15, which is + scale that is too low gets raised. The default is 2 ** 15, which is approximately half the maximum float16 value. increment_period: Increases loss scale every `increment_period` consecutive steps that finite gradients are encountered. If a nonfinite ",0,train 87f5182a5644993e747c2f42dfe6da75b7431e66,tensorflow/tensorflow,"Fix broken test: tensorflow/contrib/eager/python:datasets_test PiperOrigin-RevId: 168914742",function.py,"@@ -348,7 +348,11 @@ class _DefinedFunction(object): def _create_definition_if_needed(self): """"""Creates the function definition if it's not created yet."""""" + with context.graph_mode(): + self._create_definition_if_needed_impl() + def _create_definition_if_needed_impl(self): + """"""This is not what you want, see _create_definition_if_needed."""""" if self._definition is not None: return ",0,train 5e6153293b73fdded18657efacb33440a5cef91b,tensorflow/tensorflow,"[tf:tfrt] Verify returned tensors alignment in TFRT/JIT python tests PiperOrigin-RevId: 401732079 Change-Id: I0c39292b21622d0fe7e5abbf00c19c8aa83d42d6",tf_cpurt_executor.cc,"@@ -22,6 +22,7 @@ limitations under the License. #include ""mlir/ExecutionEngine/CRunnerUtils.h"" #include ""mlir/Transforms/Bufferize.h"" +#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" #include ""tensorflow/compiler/mlir/tensorflow/dialect_registration.h"" #include ""tensorflow/compiler/mlir/tfrt/jit/tf_cpurt_pipeline.h"" #include ""tensorflow/core/platform/dynamic_annotations.h"" @@ -220,6 +221,15 @@ using PyBindingReturnValueConverter = ReturnValueConverter; } // namespace +template +static bool IsAligned(const T* ptr) { +#if EIGEN_MAX_ALIGN_BYTES == 0 + return true; +#else + return reinterpret_cast(ptr) % EIGEN_MAX_ALIGN_BYTES == 0; +#endif +} + // Converts StridedMemrefType to the Python array. This struct satisfies // ReturnStridedMemref's concept (see cpurt.h). // @@ -234,6 +244,7 @@ struct MemrefToPyArray { template static py::array Convert(const ConversionContext&, void* memref_ptr) { auto* memref = static_cast*>(memref_ptr); + assert(IsAligned(memref->data) && ""returned memref must be aligned""); auto memref_sizes = Sizes(memref); auto memref_strides = Strides(memref); ",0,train 05f8ea8e9522a3027d4f3f7a54d716bfafed427a,tensorflow/tensorflow,"[XLA:GPU] Do not fuse loop fusions with different output shapes. PiperOrigin-RevId: 209724594",multi_output_fusion.cc,"@@ -187,6 +187,19 @@ bool GpuMultiOutputFusion::LegalToFuse(HloInstruction* instr1, return false; } + // Multi-output loop fusions must have equal output shapes to be lowered. + if (instr1->fusion_kind() == HloInstruction::FusionKind::kLoop) { + Shape shape1 = instr1->IsMultiOutputFusion() + ? instr1->shape().tuple_shapes(0) + : instr1->shape(); + Shape shape2 = instr2->IsMultiOutputFusion() + ? instr2->shape().tuple_shapes(0) + : instr2->shape(); + if (!ShapeUtil::Equal(shape1, shape2)) { + return false; + } + } + // Do this check last, as it may be expensive. return !GpuInstructionFusion::FusionWouldBeTooLarge(instr1, instr2); } ",0,train 05f8ea8e9522a3027d4f3f7a54d716bfafed427a,tensorflow/tensorflow,"[XLA:GPU] Do not fuse loop fusions with different output shapes. PiperOrigin-RevId: 209724594",multi_output_fusion_test.cc,"@@ -256,6 +256,90 @@ TEST_F(MultiOutputFusionTest, MultiOutputFusionTwoLoops) { op::Tuple(op::Multiply(), op::Divide())); } +TEST_F(MultiOutputFusionTest, MultiOutputFusionSiblingLoopsDifferentShapes) { + auto module = ParseHloString(tensorflow::strings::StrCat(kModulePrefix, R""( + fused_computation_1 { + p0.1 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0) + ROOT mul = f32[8,1,5,16,1,1]{5,4,3,2,1,0} multiply(p0.1, p0.1) + } + + fused_computation_2 { + p0.2 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0) + const.2 = f32[] constant(0) + ROOT reduce = f32[8,1,5,1,1]{4,3,2,1,0} reduce(p0.2, const.2), dimensions={3}, to_apply=scalar_add_computation + } + + ENTRY entry { + p0 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0) + fusion.1 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} fusion(p0), kind=kLoop, calls=fused_computation_1 + fusion.2 = f32[8,1,5,1,1]{4,3,2,1,0} fusion(p0), kind=kLoop, calls=fused_computation_2 + ROOT root = (f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,1,1]{4,3,2,1,0}) tuple(fusion.1, fusion.2) + })"")) + .ValueOrDie(); + ASSERT_FALSE(GpuMultiOutputFusion().Run(module.get()).ValueOrDie()); +} + +TEST_F(MultiOutputFusionTest, MultiOutputFusionSiblingLoopAndMultiOutputLoop) { + auto module = ParseHloString(tensorflow::strings::StrCat(kModulePrefix, R""( + fused_computation_1 { + p0.1 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0) + mul = f32[8,1,5,16,1,1]{5,4,3,2,1,0} multiply(p0.1, p0.1) + exp = f32[8,1,5,16,1,1]{5,4,3,2,1,0} exponential(p0.1) + ROOT tuple = (f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,16,1,1]{5,4,3,2,1,0}) tuple(mul, exp) + } + + fused_computation_2 { + p0.2 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0) + const.2 = f32[] constant(0) + ROOT add = f32[8,1,5,16,1,1]{5,4,3,2,1,0} add(p0.2, const.2) + } + + ENTRY entry { + p0 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0) + fusion.1 = (f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,16,1,1]{5,4,3,2,1,0}) fusion(p0), kind=kLoop, calls=fused_computation_1 + fusion.2 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} fusion(p0), kind=kLoop, calls=fused_computation_2 + gte0 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} get-tuple-element(fusion.1), index=0 + gte1 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} get-tuple-element(fusion.1), index=1 + ROOT root = (f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,16,1,1]{5,4,3,2,1,0}) tuple(gte0, gte1, fusion.2) + })"")) + .ValueOrDie(); + ASSERT_TRUE(GpuMultiOutputFusion().Run(module.get()).ValueOrDie()); + SCOPED_TRACE(module->ToString()); + const HloInstruction* fusion = + module->entry_computation()->root_instruction()->operand(0)->operand(0); + ASSERT_TRUE(fusion->IsMultiOutputFusion()); + EXPECT_THAT(fusion->fused_expression_root(), + op::Tuple(op::Multiply(), op::Exp(), op::Add())); +} + +TEST_F(MultiOutputFusionTest, + MultiOutputFusionSiblingLoopAndMultiOutputLoopDifferentShapes) { + auto module = ParseHloString(tensorflow::strings::StrCat(kModulePrefix, R""( + fused_computation_1 { + p0.1 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0) + mul = f32[8,1,5,16,1,1]{5,4,3,2,1,0} multiply(p0.1, p0.1) + exp = f32[8,1,5,16,1,1]{5,4,3,2,1,0} exponential(p0.1) + ROOT tuple = (f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,16,1,1]{5,4,3,2,1,0}) tuple(mul, exp) + } + + fused_computation_2 { + p0.2 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0) + const.2 = f32[] constant(0) + ROOT reduce = f32[8,1,5,1,1]{4,3,2,1,0} reduce(p0.2, const.2), dimensions={3}, to_apply=scalar_add_computation + } + + ENTRY entry { + p0 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0) + fusion.1 = (f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,16,1,1]{5,4,3,2,1,0}) fusion(p0), kind=kLoop, calls=fused_computation_1 + fusion.2 = f32[8,1,5,1,1]{4,3,2,1,0} fusion(p0), kind=kLoop, calls=fused_computation_2 + gte0 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} get-tuple-element(fusion.1), index=0 + gte1 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} get-tuple-element(fusion.1), index=1 + ROOT root = (f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,1,1]{4,3,2,1,0}) tuple(gte0, gte1, fusion.2) + })"")) + .ValueOrDie(); + ASSERT_FALSE(GpuMultiOutputFusion().Run(module.get()).ValueOrDie()); +} + TEST_F(MultiOutputFusionTest, ProducerConsumerFusionElementwiseAndReduce) { auto module = ParseHloString(tensorflow::strings::StrCat(kModulePrefix, R""( ENTRY reduce { ",0,train 9aedc576ae2499d337b9e5ceaa78bd6f123bc77d,tensorflow/tensorflow,"[tf.data service] Extend round robin unit test to cover multiple replicas per host. PiperOrigin-RevId: 351900031 Change-Id: I2d1eb18cc24da64581f324f66ff244a277cc37ce",data_service_ops_test.py,"@@ -334,14 +334,18 @@ class DataServiceOpsTest(data_service_test_base.TestBase, # Round robin reads can cause slow cluster shutdown. GLOBAL_CLUSTERS.add(cluster) num_elements = 100 - ds = dataset_ops.Dataset.range(num_elements, output_type=dtypes.int32) - ds = ds.shuffle(num_elements) low_bucket_max = 30 mid_bucket_max = 60 bucket_boundaries = [low_bucket_max, mid_bucket_max] batch_size = 10 - num_consumers = 3 + num_consumer_hosts = 3 + replicas_per_consumer_host = 5 + num_consumers = num_consumer_hosts * replicas_per_consumer_host bucket_batch_sizes = [batch_size] * (len(bucket_boundaries) + 1) + # Set up the dataset that will run on the tf.data workers. + ds = dataset_ops.Dataset.range(num_elements, output_type=dtypes.int32) + ds = ds.shuffle(num_elements) + ds = ds.repeat() ds = ds.apply( grouping.bucket_by_sequence_length( lambda x: x, @@ -354,28 +358,43 @@ class DataServiceOpsTest(data_service_test_base.TestBase, lambda _, x: dataset_ops.Dataset.from_tensors(x), window_size=num_consumers)) ds = ds.flat_map(lambda x: x) - ds = ds.repeat() - consumers = [] - for consumer_index in range(num_consumers): - consumers.append( - self.make_distributed_dataset( - ds, - cluster, - job_name=""test"", - consumer_index=consumer_index, - num_consumers=num_consumers)) - # Use parallel interleave to read from consumers in parallel. - ds = dataset_ops.Dataset.from_tensor_slices(consumers) + # Set up the per-consumer-host datasets. During each global step, we pull + # `replicas_per_consumer_host` batches from each of these datasets. + host_datasets = [] + for host_index in range(num_consumer_hosts): + per_replica_datasets = [] + for i in range(replicas_per_consumer_host): + consumer_index = host_index * replicas_per_consumer_host + i + per_replica_datasets.append( + self.make_distributed_dataset( + ds, + cluster, + job_name=""test"", + consumer_index=consumer_index, + num_consumers=num_consumers)) + host_dataset = dataset_ops.Dataset.from_tensor_slices( + per_replica_datasets) + host_dataset = host_dataset.interleave( + lambda x: x, + cycle_length=len(per_replica_datasets), + num_parallel_calls=len(per_replica_datasets), + deterministic=True) + host_datasets.append(host_dataset) + + # Use parallel interleave to read from host datasets in parallel. + ds = dataset_ops.Dataset.from_tensor_slices(host_datasets) ds = ds.interleave( - lambda x: x.prefetch(num_elements), - cycle_length=num_consumers, - num_parallel_calls=num_consumers) + lambda x: x, + block_length=replicas_per_consumer_host, + cycle_length=len(host_datasets), + num_parallel_calls=len(host_datasets), + deterministic=True) num_rounds = 10 get_next = self.getNext(ds, requires_initialization=True) results = [] - for _ in range(num_rounds): + for _ in range(num_rounds * num_consumers): results.append(self.evaluate(get_next())) def get_bucket(elem): @@ -385,8 +404,10 @@ class DataServiceOpsTest(data_service_test_base.TestBase, bucket_ind += 1 return bucket_ind + # Check that the batches for each step contain elements from the same + # bucket. for i in range(0, len(results), num_consumers): - batches = results[num_consumers * i:num_consumers * i + num_consumers] + batches = results[num_consumers * i:num_consumers * (i + 1)] bucket_inds = [get_bucket(batch[0]) for batch in batches] for bucket_ind in bucket_inds[1:]: self.assertEqual(bucket_inds[0], bucket_ind) ",0,train af62b5ccb9d06381096d18418920d06390d90be9,tensorflow/tensorflow,"Fixed the order of arguments for softmax_loss_function in two places, including a semantic code change. Change: 150685391",loss.py,"@@ -48,7 +48,7 @@ def sequence_loss(logits, targets, weights, timesteps. average_across_batch: If set, sum the cost across the batch dimension and divide the returned cost by the batch size. - softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch + softmax_loss_function: Function (labels-batch, inputs-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to ""sequence_loss"". @@ -76,7 +76,7 @@ def sequence_loss(logits, targets, weights, crossent = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=targets, logits=probs_flat) else: - crossent = softmax_loss_function(probs_flat, targets) + crossent = softmax_loss_function(targets, probs_flat) crossent = crossent * array_ops.reshape(weights, [-1]) if average_across_timesteps and average_across_batch: crossent = math_ops.reduce_sum(crossent) ",0,test e2b5397f126ba9cbc76a840ea0a46331e0f10897,tensorflow/tensorflow,"Update GraphDef version to 434. PiperOrigin-RevId: 316639748 Change-Id: I2f62575a1ffdf72dbbafd5a2d6a10ae2a64d4b7c",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 433 // Updated: 2020/6/15 +#define TF_GRAPH_DEF_VERSION 434 // Updated: 2020/6/16 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,train 7282528b715696147a927cb3bf595d957b2f4c42,tensorflow/tensorflow,readd some types,lookup_table_op.cc,"@@ -820,6 +820,7 @@ REGISTER_KERNEL(int64, double); REGISTER_KERNEL(int64, float); REGISTER_KERNEL(int64, int32); REGISTER_KERNEL(int64, int64); +REGISTER_KERNEL(int64, string); REGISTER_KERNEL(string, bool); REGISTER_KERNEL(string, double); REGISTER_KERNEL(string, float); ",0,train 41523b32072d3fd8d87bc246234e0ec29d9146f6,tensorflow/tensorflow,"Adding examples for tf.image.random_flip_left_right and tf.image.random_flip_up_down usage. PiperOrigin-RevId: 270365884",image_ops_impl.py,"@@ -326,6 +326,26 @@ def random_flip_up_down(image, seed=None): With a 1 in 2 chance, outputs the contents of `image` flipped along the first dimension, which is `height`. Otherwise output the image as-is. + When passing a batch of images, each image will be randomly flipped + independent of other images. + + Example usage: + + Randomly flip a single image. + >>> import numpy as np + + >>> image = np.array([[[1], [2]], [[3], [4]]]) + >>> tf.image.random_flip_up_down(image, 3).numpy().tolist() + [[[3], [4]], [[1], [2]]] + + Randomly flip multiple images. + >>> images = np.array( + ... [ + ... [[[1], [2]], [[3], [4]]], + ... [[[5], [6]], [[7], [8]]] + ... ]) + >>> tf.image.random_flip_up_down(images, 4).numpy().tolist() + [[[[3], [4]], [[1], [2]]], [[[5], [6]], [[7], [8]]]] Args: image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor @@ -347,6 +367,25 @@ def random_flip_left_right(image, seed=None): With a 1 in 2 chance, outputs the contents of `image` flipped along the second dimension, which is `width`. Otherwise output the image as-is. + When passing a batch of images, each image will be randomly flipped + independent of other images. + + Example usage: + Randomly flip a single image. + >>> import numpy as np + + >>> image = np.array([[[1], [2]], [[3], [4]]]) + >>> tf.image.random_flip_left_right(image, 5).numpy().tolist() + [[[2], [1]], [[4], [3]]] + + Randomly flip multiple images. + >>> images = np.array( + ... [ + ... [[[1], [2]], [[3], [4]]], + ... [[[5], [6]], [[7], [8]]] + ... ]) + >>> tf.image.random_flip_left_right(images, 6).numpy().tolist() + [[[[2], [1]], [[4], [3]]], [[[5], [6]], [[7], [8]]]] Args: image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor ",0,train f5de0a77b3301fa1990eda1047f77c1236324b58,tensorflow/tensorflow,"Add the quantization specs for the inputs and outputs If the value is annotated by the fake quant ops, the quantization spec is extracted from the fake quant and put in the quantization attributes. PiperOrigin-RevId: 302558753 Change-Id: I26b79ee1eab32f71e4be356bd58f6d815bc19243",cpu_kernel_fusion.cc,"@@ -32,6 +32,7 @@ limitations under the License. #include ""mlir/IR/Attributes.h"" // from @llvm-project #include ""mlir/IR/BlockAndValueMapping.h"" // from @llvm-project #include ""mlir/IR/MLIRContext.h"" // from @llvm-project +#include ""mlir/IR/Matchers.h"" // from @llvm-project #include ""mlir/IR/PatternMatch.h"" // from @llvm-project #include ""mlir/IR/StandardTypes.h"" // from @llvm-project #include ""mlir/IR/Value.h"" // from @llvm-project @@ -47,21 +48,56 @@ limitations under the License. #define DEBUG_TYPE ""quant-kernel-fusion"" +constexpr int kFakeQuantOperandsNum = 5; +constexpr int kFakeQuantPerChannelOperandsNum = 6; + namespace mlir { namespace xla_hlo { namespace { +TypeAttr GetQuantSpec(Operation* op) { + auto fake_quant = llvm::dyn_cast_or_null(op); + if (!fake_quant || fake_quant.getNumOperands() < kFakeQuantOperandsNum || + fake_quant.getNumOperands() > kFakeQuantPerChannelOperandsNum || + fake_quant.call_target_name() != ""fake_quant_with_min_max_vars"") + return {}; + + DenseFPElementsAttr min, max; + DenseIntElementsAttr bit_width, narrow_range, quant_dim; + if (!matchPattern(fake_quant.getOperand(1), m_Constant(&min)) || + !matchPattern(fake_quant.getOperand(2), m_Constant(&max)) || + !matchPattern(fake_quant.getOperand(3), m_Constant(&bit_width)) || + !matchPattern(fake_quant.getOperand(4), m_Constant(&narrow_range))) + return {}; + + auto bit_width_val = (*bit_width.attr_value_begin()).cast(); + auto narrow_range_val = (*narrow_range.int_value_begin()).getSExtValue(); + int quant_dim_val = -1; + if (fake_quant.getNumOperands() == kFakeQuantPerChannelOperandsNum && + matchPattern(fake_quant.getOperand(kFakeQuantPerChannelOperandsNum - 1), + m_Constant(&quant_dim))) { + quant_dim_val = (*quant_dim.int_value_begin()).getSExtValue(); + } + + OpBuilder builder(op); + Type input_type = + fake_quant.getOperand(0).getType().cast().getElementType(); + return quant::GetQuantizedTypeAttr( + builder, input_type, min, max, quant_dim_val, bit_width_val, + builder.getBoolAttr(narrow_range_val), /*is_signed=*/true); +} + // Collects input values from outside for 'ops'. void CollectInputs(llvm::ArrayRef ops, llvm::SmallVectorImpl* inputs, llvm::SmallVectorImpl* input_specs) { - for (auto* op : ops) { - for (auto operand : op->getOperands()) { + for (Operation* op : ops) { + for (Value operand : op->getOperands()) { if (std::find(inputs->begin(), inputs->end(), operand) != inputs->end()) { continue; } - if (auto* def_op = operand.getDefiningOp()) { + if (Operation* def_op = operand.getDefiningOp()) { if (std::find(ops.begin(), ops.end(), def_op) == ops.end()) { inputs->push_back(operand); } @@ -71,10 +107,13 @@ void CollectInputs(llvm::ArrayRef ops, } } - for (auto input : *inputs) { + for (Value input : *inputs) { ShapedType input_type = input.getType().cast(); - // TODO(fengliuai): detect whether it is from fake quant. - input_specs->push_back(TypeAttr::get(input_type.getElementType())); + if (TypeAttr spec = GetQuantSpec(input.getDefiningOp())) { + input_specs->push_back(spec); + } else { + input_specs->push_back(TypeAttr::get(input_type.getElementType())); + } } } @@ -84,16 +123,19 @@ void CollectRets(llvm::ArrayRef ops, llvm::SmallVectorImpl* rets, llvm::SmallVectorImpl* ret_types, llvm::SmallVectorImpl* ret_specs) { - for (auto* op : ops) { - for (auto result : op->getResults()) { - for (auto* user : result.getUsers()) { + for (Operation* op : ops) { + for (Value result : op->getResults()) { + for (Operation* user : result.getUsers()) { // If there are any user outside of 'ops' if (std::find(ops.begin(), ops.end(), user) == ops.end()) { ShapedType ret_type = result.getType().cast(); rets->push_back(result); ret_types->push_back(ret_type); - // TODO(fengliuai): detect whether it is used by fake quant. - ret_specs->push_back(TypeAttr::get(ret_type.getElementType())); + if (TypeAttr spec = GetQuantSpec(user)) { + ret_specs->push_back(spec); + } else { + ret_specs->push_back(TypeAttr::get(ret_type.getElementType())); + } break; } } ",0,train 0655342040635311ba9221da6ab2b8e6a8ec7f32,tensorflow/tensorflow,Ref #40: Simple example for text classification saving/restoring,text_classification_save_restore.py,"@@ -0,0 +1,101 @@ +# Copyright 2015 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import csv +import numpy as np +from sklearn import metrics + +import tensorflow as tf +from tensorflow.models.rnn import rnn, rnn_cell +import skflow + +### Training data + +# Download dbpedia_csv.tar.gz from +# https://drive.google.com/folderview?id=0Bz8a_Dbh9Qhbfll6bVpmNUtUcFdjYmF2SEpmZUZUcVNiMUw1TWN6RDV3a0JHT3kxLVhVR2M +# Unpack: tar -xvf dbpedia_csv.tar.gz + +def load_dataset(filename): + target = [] + data = [] + reader = csv.reader(open(filename), delimiter=',') + for line in reader: + target.append(int(line[0])) + data.append(line[2]) + return data, np.array(target, np.float32) + +X_train, y_train = load_dataset('dbpedia_csv/train.csv') +X_test, y_test = load_dataset('dbpedia_csv/test.csv') + +### Process vocabulary + +MAX_DOCUMENT_LENGTH = 10 + +vocab_processor = skflow.preprocessing.VocabularyProcessor(MAX_DOCUMENT_LENGTH) +X_train = np.array(list(vocab_processor.fit_transform(X_train))) +X_test = np.array(list(vocab_processor.transform(X_test))) + +n_words = len(vocab_processor.vocabulary_) +print('Total words: %d' % n_words) + +### Models + +EMBEDDING_SIZE = 50 + +def average_model(X, y): + word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words, + embedding_size=EMBEDDING_SIZE, name='words') + features = tf.reduce_max(word_vectors, reduction_indices=1) + return skflow.models.logistic_regression(features, y) + +def rnn_model(X, y): + """"""Recurrent neural network model to predict from sequence of words + to a class."""""" + # Convert indexes of words into embeddings. + # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then + # maps word indexes of the sequence into [batch_size, sequence_length, + # EMBEDDING_SIZE]. + word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words, + embedding_size=EMBEDDING_SIZE, name='words') + # Split into list of embedding per word, while removing doc length dim. + # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE]. + word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors) + # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE. + cell = rnn_cell.GRUCell(EMBEDDING_SIZE) + # Create an unrolled Recurrent Neural Networks to length of + # MAX_DOCUMENT_LENGTH and passes word_list as inputs for each unit. + _, encoding = rnn.rnn(cell, word_list, dtype=tf.float32) + # Given encoding of RNN, take encoding of last step (e.g hidden size of the + # neural network of last step) and pass it as features for logistic + # regression over output classes. + return skflow.models.logistic_regression(encoding[-1], y) + +model_path = '/tmp/skflow_examples/text_classification' +if os.path.exists(model_path): + classifier = skflow.TensorFlowEstimator.restore(model_path) + score = metrics.accuracy_score(classifier.predict(X_test), y_test) + print('Accuracy: {0:f}'.format(score)) +else: + classifier = skflow.TensorFlowEstimator(model_fn=rnn_model, n_classes=15, + steps=100, optimizer='Adam', learning_rate=0.01, continue_training=True) + + # Continuesly train for 1000 steps & predict on test set. + while True: + try: + classifier.fit(X_train, y_train) + except KeyboardInterrupt: + classifier.save(model_path) + break + ",0,train 032daa478ef18007cf8214dbd4db0a83daebb62f,tensorflow/tensorflow,Add a comment,ir_emitter_unnested.cc,"@@ -5377,6 +5377,9 @@ ReductionCodegenInfo IrEmitterUnnested::ComputeReductionCodegenInfo( if (auto fusion = mlir::dyn_cast(unnested_hlo)) { fan_out = fusion.getFusionResults().size(); } + + // 64 is the general advice as the smallest block sizes. + // Moreover, XLA:GPU emitters need at least 32 threads at some places. int64 max_block_size = std::max(64LL, 512LL / NearestPowerOfTwo(fan_out)); return std::min( max_block_size, ",0,train d4ddccd3cca4fc837c66ae1dfa190739420ad122,tensorflow/tensorflow,"SVDF Hybrid op INT8 support. PiperOrigin-RevId: 226440102",register.cc,"@@ -173,7 +173,9 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D(), /* min_version */ 1, /* max_version */ 3); - AddBuiltin(BuiltinOperator_SVDF, Register_SVDF()); + AddBuiltin(BuiltinOperator_SVDF, Register_SVDF(), + /* min_version */ 1, + /* max_version */ 2); AddBuiltin(BuiltinOperator_RNN, Register_RNN(), /* min_version */ 1, /* max_version */ 2); ",0,train d4ddccd3cca4fc837c66ae1dfa190739420ad122,tensorflow/tensorflow,"SVDF Hybrid op INT8 support. PiperOrigin-RevId: 226440102",svdf.cc,"@@ -176,8 +176,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { context->ResizeTensor(context, output, output_size_array)); // The weights are of consistent type, so it suffices to check one. - const bool is_hybrid_op = - (input->type == kTfLiteFloat32 && weights_feature->type == kTfLiteUInt8); + const bool is_hybrid_op = (input->type == kTfLiteFloat32 && + (weights_feature->type == kTfLiteUInt8 || + weights_feature->type == kTfLiteInt8)); // Resize scratch. TfLiteIntArrayFree(node->temporaries); @@ -203,7 +204,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // of input tensors. node->temporaries->data[1] = scratch_tensor_index + 1; TfLiteTensor* input_quantized = GetTemporary(context, node, /*index=*/1); - input_quantized->type = kTfLiteUInt8; + input_quantized->type = weights_feature->type; input_quantized->allocation_type = kTfLiteArenaRw; if (!TfLiteIntArrayEqual(input_quantized->dims, input->dims)) { TfLiteIntArray* input_quantized_size = TfLiteIntArrayCopy(input->dims); @@ -297,16 +298,24 @@ TfLiteStatus EvalHybrid( // Initialize the pointer to input. const float* input_ptr_batch = input->data.f; - // Initialize the pointer to storage for quantized values and - // scaling factors. - int8_t* quantized_input_ptr_batch = - reinterpret_cast(input_quantized->data.uint8); + // Initialize the pointer to storage for quantized values and the weights + // feature. + int8_t* quantized_input_ptr_batch; + const int8_t* weights_feature_ptr; + if (weights_feature->type == kTfLiteUInt8) { + quantized_input_ptr_batch = + reinterpret_cast(input_quantized->data.uint8); + weights_feature_ptr = + reinterpret_cast(weights_feature->data.uint8); + } else { + quantized_input_ptr_batch = input_quantized->data.int8; + weights_feature_ptr = weights_feature->data.int8; + } + // Initialize the pointer to storage for scaling factors. float* scaling_factors_ptr = scaling_factors->data.f; - // Other initializations. - const int8_t* weights_feature_ptr = - reinterpret_cast(weights_feature->data.uint8); + // Initialize the weights scale. const float weights_feature_scale = weights_feature->params.scale; // Clear the activation (state left most column). @@ -374,7 +383,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { bias, params, scratch, activation_state, output); break; } - case kTfLiteUInt8: { + case kTfLiteUInt8: + case kTfLiteInt8: { TfLiteTensor* input_quantized = GetTemporary(context, node, /*index=*/1); TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/2); TfLiteTensor* float_weights_time = @@ -388,8 +398,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // TODO(alanchiao): refactor logic out into dequantize function. if (!op_data->float_weights_time_initialized) { const float dequantization_scale = weights_time->params.scale; - const int8_t* weights_time_ptr = - reinterpret_cast(weights_time->data.uint8); + const int8_t* weights_time_ptr; + if (weights_feature->type == kTfLiteUInt8) { + weights_time_ptr = + reinterpret_cast(weights_time->data.uint8); + } else { + weights_time_ptr = weights_time->data.int8; + } for (int i = 0; i < NumElements(float_weights_time); ++i) { float_weights_time->data.f[i] = weights_time_ptr[i] * dequantization_scale; ",0,train d4ddccd3cca4fc837c66ae1dfa190739420ad122,tensorflow/tensorflow,"SVDF Hybrid op INT8 support. PiperOrigin-RevId: 226440102",svdf_test.cc,"@@ -203,17 +203,30 @@ class SVDFOpModel : public BaseSVDFOpModel { class HybridSVDFOpModel : public BaseSVDFOpModel { public: HybridSVDFOpModel(int batches, int units, int input_size, int memory_size, - int rank) + int rank, TensorType tensor_type) : BaseSVDFOpModel(batches, units, input_size, memory_size, rank, - TensorType_UINT8, TensorType_UINT8) {} + tensor_type, tensor_type) { + tensor_type_ = tensor_type; + } + + void SetWeights(int weights_idx, std::vector f) { + if (tensor_type_ == TensorType_UINT8) { + SymmetricQuantizeAndPopulate(weights_idx, f); + } else { + SignedSymmetricQuantizeAndPopulate(weights_idx, f); + } + } void SetWeightsFeature(std::initializer_list f) { - SymmetricQuantizeAndPopulate(weights_feature_, f); + SetWeights(weights_feature_, f); } void SetWeightsTime(std::initializer_list f) { - SymmetricQuantizeAndPopulate(weights_time_, f); + SetWeights(weights_time_, f); } + + protected: + TensorType tensor_type_; }; class SVDFOpTest : public ::testing::Test { @@ -312,9 +325,74 @@ TEST_F(SVDFOpTest, BlackBoxTestRank2) { &svdf); } -TEST_F(SVDFOpTest, BlackBoxTestHybridRank1) { +TEST_F(SVDFOpTest, BlackBoxTestHybridRank1Uint8) { + HybridSVDFOpModel svdf(/*batches=*/2, /*units=*/4, /*input_size=*/3, + /*memory_size=*/10, /*rank=*/1, TensorType_UINT8); + svdf.SetWeightsFeature({-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}); + + svdf.SetWeightsTime( + {-0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}); + + VerifyGoldens(svdf_input, svdf_golden_output_rank_1, sizeof(svdf_input), + &svdf, + /*tolerance=*/0.002945); +} + +TEST_F(SVDFOpTest, BlackBoxTestHybridRank2Uint8) { + HybridSVDFOpModel svdf(/*batches=*/2, /*units=*/4, /*input_size=*/3, + /*memory_size=*/10, /*rank=*/2, TensorType_UINT8); + svdf.SetWeightsFeature({-0.31930989, 0.0079667, 0.39296314, 0.37613347, + 0.12416199, 0.15785322, 0.27901134, 0.3905206, + 0.21931258, -0.36137494, -0.10640851, 0.31053296, + -0.36118156, -0.0976817, -0.36916667, 0.22197971, + 0.15294972, 0.38031587, 0.27557442, 0.39635518, + -0.21580373, -0.06634006, -0.02702999, 0.27072677}); + + svdf.SetWeightsTime( + {-0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657, + + -0.14884081, 0.19931212, -0.36002168, 0.34663299, -0.11405486, + 0.12672701, 0.39463779, -0.07886535, -0.06384811, 0.08249187, + + -0.26816407, -0.19905911, 0.29211238, 0.31264046, -0.28664589, + 0.05698794, 0.11613581, 0.14078894, 0.02187902, -0.21781836, + + -0.15567942, 0.08693647, -0.38256618, 0.36580828, -0.22922277, + -0.0226903, 0.12878349, -0.28122205, -0.10850525, -0.11955214, + + 0.27179423, -0.04710215, 0.31069002, 0.22672787, 0.09580326, + 0.08682203, 0.1258215, 0.1851041, 0.29228821, 0.12366763}); + + VerifyGoldens(svdf_input, svdf_golden_output_rank_2, sizeof(svdf_input), + &svdf, + /*tolerance=*/0.00625109); +} + +TEST_F(SVDFOpTest, BlackBoxTestHybridRank1Int8) { HybridSVDFOpModel svdf(/*batches=*/2, /*units=*/4, /*input_size=*/3, - /*memory_size=*/10, /*rank=*/1); + /*memory_size=*/10, /*rank=*/1, TensorType_INT8); svdf.SetWeightsFeature({-0.31930989, -0.36118156, 0.0079667, 0.37613347, 0.22197971, 0.12416199, 0.27901134, 0.27557442, 0.3905206, -0.36137494, -0.06634006, -0.10640851}); @@ -337,9 +415,9 @@ TEST_F(SVDFOpTest, BlackBoxTestHybridRank1) { /*tolerance=*/0.002945); } -TEST_F(SVDFOpTest, BlackBoxTestHybridRank2) { +TEST_F(SVDFOpTest, BlackBoxTestHybridRank2Int8) { HybridSVDFOpModel svdf(/*batches=*/2, /*units=*/4, /*input_size=*/3, - /*memory_size=*/10, /*rank=*/2); + /*memory_size=*/10, /*rank=*/2, TensorType_INT8); svdf.SetWeightsFeature({-0.31930989, 0.0079667, 0.39296314, 0.37613347, 0.12416199, 0.15785322, 0.27901134, 0.3905206, 0.21931258, -0.36137494, -0.10640851, 0.31053296, ",0,train d4ddccd3cca4fc837c66ae1dfa190739420ad122,tensorflow/tensorflow,"SVDF Hybrid op INT8 support. PiperOrigin-RevId: 226440102",operator.cc,"@@ -472,6 +472,20 @@ class Svdf : public BuiltinOperatorinputs[0]; + const string& weights_feature_name = op_signature.op->inputs[1]; + const string& output_name = op_signature.op->outputs[0]; + const Array& input_array = op_signature.model->GetArray(input_name); + const Array& weights_feature_array = + op_signature.model->GetArray(weights_feature_name); + const Array& output_array = op_signature.model->GetArray(output_name); + // If the op is a signed int8 hybrid operation, we need to return + // version 2. + if (input_array.data_type == ArrayDataType::kFloat && + weights_feature_array.data_type == ArrayDataType::kInt8 && + output_array.data_type == ArrayDataType::kFloat) { + return 2; + } return 1; } }; ",0,train a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change. PiperOrigin-RevId: 213770000",xla_device.cc,"@@ -434,6 +434,16 @@ Status XlaDevice::MakeTensorFromProto(const TensorProto& tensor_proto, return status; } +void XlaDevice::SetRequiresSyncOnCompletion(bool sync_on_completion) { + mutex_lock lock(mu_); + sync_on_completion_ = sync_on_completion; +} + +bool XlaDevice::RequiresSyncOnCompletion() const { + mutex_lock lock(mu_); + return sync_on_completion_; +} + XlaDeviceOpRegistrations* RegisterXlaDeviceKernels(const char* device, const char* jit_device) { // Any op assigned to the device that isn't rewritten by the graph rewriter ",0,train a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change. PiperOrigin-RevId: 213770000",xla_device.h,"@@ -151,6 +151,12 @@ class XlaDevice : public LocalDevice { // information for GPU and TPU devices. Status UseGpuDeviceInfo() LOCKS_EXCLUDED(mu_); + // Instructs this XlaDevice to return 'sync_on_completion' for + // RequiresSyncOnCompletion(). + void SetRequiresSyncOnCompletion(bool sync_on_completion) LOCKS_EXCLUDED(mu_); + + bool RequiresSyncOnCompletion() const override LOCKS_EXCLUDED(mu_); + private: xla::LocalClient* client() const; Allocator* GetAllocatorLocked(AllocatorAttributes attr) @@ -165,7 +171,7 @@ class XlaDevice : public LocalDevice { static Status GetMetadataFromDevice(DeviceBase* device, const XlaDevice::Metadata** metadata); - mutex mu_; + mutable mutex mu_; // The metadata of this XlaDevice. const Metadata xla_metadata_; // Which hardware device in the client's platform this XlaDevice controls. @@ -207,6 +213,10 @@ class XlaDevice : public LocalDevice { // Thread pool used for running closures std::unique_ptr thread_pool_; + + // True if the device requires XlaDevice::Sync to be called on completion + // regardless of status. + bool sync_on_completion_ GUARDED_BY(mu_) = false; }; // Builds OpKernel registrations on 'device' for the JIT operators ",0,train a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change. PiperOrigin-RevId: 213770000",stream_pool.cc,"@@ -28,8 +28,14 @@ StreamPool::Ptr StreamPool::BorrowStream(se::StreamExecutor* executor) { // Re-use an existing stream from the pool. stream = std::move(streams_.back()); streams_.pop_back(); - VLOG(1) << stream->DebugStreamPointers() - << "" StreamPool reusing existing stream""; + if (stream->ok()) { + VLOG(1) << stream->DebugStreamPointers() + << "" StreamPool reusing existing stream""; + } else { + VLOG(1) << stream->DebugStreamPointers() + << "" stream was not ok, StreamPool deleting""; + stream = nullptr; + } } } ",0,train a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change. PiperOrigin-RevId: 213770000",stream_pool_test.cc,"@@ -132,5 +132,39 @@ TEST_F(StreamPoolTest, BadStreamDiscarded) { EXPECT_EQ(stream2_ptr, stream3_ptr); } +TEST_F(StreamPoolTest, BadStreamAfterReturnDiscarded) { + std::unique_ptr executor = NewStreamExecutor(); + StreamPool pool; + + // Borrow a stream. + StreamPool::Ptr stream1 = pool.BorrowStream(executor.get()); + EXPECT_TRUE(stream1->ok()); + + // Return the stream, but hold a handle to it. + se::Stream* stream1_ptr = stream1.get(); + stream1 = nullptr; + + // Now stream1 is back in the pool, force an error on the stream. Here we call + // a method that requires DNN support, which we know the Host platform doesn't + // support. + stream1_ptr->ThenDepthConcatenate({}, {}, nullptr); + EXPECT_FALSE(stream1_ptr->ok()); + + // Borrow stream2. + StreamPool::Ptr stream2 = pool.BorrowStream(executor.get()); + EXPECT_TRUE(stream2->ok()); + + // The underlying streams should be different. They would have been + // the same, but since we forced an error on stream1, it cannot be + // put back into the pool. Sadly we can't just check: + // EXPECT_NE(stream1_ptr, stream2_ptr); + // + // The above should hold logically, but it may fail if the new + // stream instance allocated for stream2 happens to reside in the + // same memory address as stream1, which has been deleted. + // + // The check that stream2->ok() serves as a good-enough check. +} + } // namespace } // namespace xla ",0,train a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change. PiperOrigin-RevId: 213770000",device.h,"@@ -106,6 +106,10 @@ class Device : public DeviceBase { // at completion. virtual Status Sync() = 0; + // Override this to return true for devices that require a Sync() call before + // session completion. + virtual bool RequiresSyncOnCompletion() const { return false; } + // Optionally modify the device's GraphDef before execution. // // This method should be considered experimental and is supplied to enable ",0,train a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change. PiperOrigin-RevId: 213770000",executor.cc,"@@ -2301,13 +2301,15 @@ void ExecutorState::Finish() { auto done_cb = std::move(done_cb_); auto runner = std::move(runner_); mu_.unlock(); - if (sync_on_finish_ && status.ok()) { + Device* device = impl_->params_.device; + if ((sync_on_finish_ && status.ok()) || device->RequiresSyncOnCompletion()) { // Block until the device has finished all queued operations. For // devices like GPUs that continue to execute Ops after their Compute // methods have completed, this ensures that control is not returned to // the user until the step (and its side-effects) has actually completed. - status = impl_->params_.device->Sync(); + status.Update(device->Sync()); } + delete this; CHECK(done_cb != nullptr); runner([=]() { done_cb(status); }); ",0,train a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change. PiperOrigin-RevId: 213770000",cancellation.cc,"@@ -89,6 +89,16 @@ bool CancellationManager::DeregisterCallback(CancellationToken token) { } } +bool CancellationManager::TryDeregisterCallback(CancellationToken token) { + mutex_lock lock(mu_); + if (is_cancelled_ || is_cancelling_) { + return false; + } else { + callbacks_.erase(token); + return true; + } +} + CancellationManager::~CancellationManager() { if (!callbacks_.empty()) { StartCancel(); ",0,train a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change. PiperOrigin-RevId: 213770000",cancellation.h,"@@ -122,6 +122,15 @@ class CancellationManager { // cancellation manager. bool DeregisterCallback(CancellationToken token); + // Deregister the callback that, when registered, was associated + // with the given cancellation token. Returns true iff the callback + // was deregistered and will not be invoked; otherwise returns false + // immediately, with no guarantee that the callback has completed. + // + // This method is guaranteed to return true if StartCancel has not been + // called. + bool TryDeregisterCallback(CancellationToken token); + private: bool is_cancelling_; std::atomic_bool is_cancelled_; ",0,train a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change. PiperOrigin-RevId: 213770000",cancellation_test.cc,"@@ -115,4 +115,56 @@ TEST(Cancellation, IsCancelled) { delete cm; } +TEST(Cancellation, TryDeregisterWithoutCancel) { + bool is_cancelled = false; + CancellationManager* manager = new CancellationManager(); + auto token = manager->get_cancellation_token(); + bool registered = manager->RegisterCallback( + token, [&is_cancelled]() { is_cancelled = true; }); + EXPECT_TRUE(registered); + bool deregistered = manager->TryDeregisterCallback(token); + EXPECT_TRUE(deregistered); + delete manager; + EXPECT_FALSE(is_cancelled); +} + +TEST(Cancellation, TryDeregisterAfterCancel) { + bool is_cancelled = false; + CancellationManager* manager = new CancellationManager(); + auto token = manager->get_cancellation_token(); + bool registered = manager->RegisterCallback( + token, [&is_cancelled]() { is_cancelled = true; }); + EXPECT_TRUE(registered); + manager->StartCancel(); + EXPECT_TRUE(is_cancelled); + bool deregistered = manager->TryDeregisterCallback(token); + EXPECT_FALSE(deregistered); + delete manager; +} + +TEST(Cancellation, TryDeregisterDuringCancel) { + Notification cancel_started, finish_callback, cancel_complete; + CancellationManager* manager = new CancellationManager(); + auto token = manager->get_cancellation_token(); + bool registered = manager->RegisterCallback(token, [&]() { + cancel_started.Notify(); + finish_callback.WaitForNotification(); + }); + EXPECT_TRUE(registered); + + thread::ThreadPool w(Env::Default(), ""test"", 1); + w.Schedule([&]() { + manager->StartCancel(); + cancel_complete.Notify(); + }); + cancel_started.WaitForNotification(); + + bool deregistered = manager->TryDeregisterCallback(token); + EXPECT_FALSE(deregistered); + + finish_callback.Notify(); + cancel_complete.WaitForNotification(); + delete manager; +} + } // namespace tensorflow ",0,train 811a9ef9974d61bf1a351aaeb3895e95909aece1,tensorflow/tensorflow,Fix for fsanitize=undefined.,common.h,"@@ -156,7 +156,7 @@ inline int32 MultiplyByQuantizedMultiplier(int32 x, int32 quantized_multiplier, right_shift); } -inline int32 MultiplyByQuantizedMultiplier(std::int64_t x, +inline int32 MultiplyByQuantizedMultiplier(int64_t x, int32 quantized_multiplier, int shift) { // Inputs: @@ -172,7 +172,7 @@ inline int32 MultiplyByQuantizedMultiplier(std::int64_t x, int32_t reduced_multiplier = (quantized_multiplier + (1 << 15)) >> 16; int total_shift = 15 - shift; - x = (x * (int64_t)reduced_multiplier) + (1 << (total_shift - 1)); + x = (x * (int64_t)reduced_multiplier) + ((int64_t)1 << (total_shift - 1)); int32_t result = x >> total_shift; return result; } ",0,train 3a86edf9a298a33f08936daffa97119e545fd706,tensorflow/tensorflow,review changes.,mkl_cpu_allocator.cc,"@@ -21,7 +21,6 @@ namespace tensorflow { constexpr const char* MklCPUAllocator::kMaxLimitStr; constexpr const size_t MklCPUAllocator::kDefaultMaxLimit; - } // namespace tensorflow #endif // INTEL_MKL ",0,train 3a86edf9a298a33f08936daffa97119e545fd706,tensorflow/tensorflow,review changes.,mkl_cpu_allocator.h,"@@ -30,6 +30,7 @@ limitations under the License. #include ""tensorflow/core/platform/mem.h"" #include ""tensorflow/core/platform/numa.h"" #include ""tensorflow/core/util/env_var.h"" +#include ""tensorflow/core/util/onednn_env_vars.h"" #ifdef _WIN32 typedef unsigned int uint; #endif @@ -217,7 +218,7 @@ class MklCPUAllocator : public Allocator { // otherwise call large-size allocator (BFC). We found that BFC allocator // does not deliver good performance for small allocations when // inter_op_parallelism_threads is high. - if (always_use_system_allocator_ || + if (UseSystemAlloc() || num_bytes < kSmallAllocationsThreshold) { return small_size_allocator_->AllocateRaw(alignment, num_bytes); } else { @@ -230,7 +231,7 @@ class MklCPUAllocator : public Allocator { inline void DeallocateRaw(void* ptr) override { // Check if ptr is for ""small"" allocation. If it is, then call Free // directly. Otherwise, call BFC to handle free. - if (always_use_system_allocator_ || IsSmallSizeAllocation(ptr)) { + if (UseSystemAlloc() || IsSmallSizeAllocation(ptr)) { small_size_allocator_->DeallocateRaw(ptr); } else { mutex_lock l(mutex_); @@ -265,11 +266,6 @@ class MklCPUAllocator : public Allocator { private: // Hooks provided by this allocator for memory allocation routines from MKL - bool always_use_system_allocator_ = [] { - bool value = false; - TF_CHECK_OK(ReadBoolFromEnvVar(""TF_USE_SYSTEM_ALLOCATOR"", false, &value)); - return value; - }(); static inline void* MallocHook(size_t size) { VLOG(3) << ""MklCPUAllocator: In MallocHook""; return cpu_allocator()->AllocateRaw(kAlignment, size); ",0,train 3a86edf9a298a33f08936daffa97119e545fd706,tensorflow/tensorflow,review changes.,mkl_conv_ops.h,"@@ -38,6 +38,7 @@ limitations under the License. #include ""tensorflow/core/lib/strings/str_util.h"" #include ""tensorflow/core/platform/macros.h"" #include ""tensorflow/core/util/mkl_util.h"" +#include ""tensorflow/core/util/onednn_env_vars.h"" #include ""tensorflow/core/util/padding.h"" #include ""tensorflow/core/util/tensor_format.h"" ",0,train 3a86edf9a298a33f08936daffa97119e545fd706,tensorflow/tensorflow,review changes.,mkl_matmul_ops_common.h,"@@ -25,6 +25,7 @@ limitations under the License. #include ""tensorflow/core/framework/op.h"" #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/util/mkl_util.h"" +#include ""tensorflow/core/util/onednn_env_vars.h"" using dnnl::inner_product_forward; using dnnl::primitive_attr; ",0,train 3a86edf9a298a33f08936daffa97119e545fd706,tensorflow/tensorflow,review changes.,mkl_util.h,"@@ -135,8 +135,6 @@ inline void execute_primitives( } } -bool AreWeightsFrozen(); - // In oneDNN v1.x, the format (ex. NCHW) used to initialize a memory descriptor // (md) structure will no longer be recorded in its `format` field. Instead, it // will be set to a canonical `blocked` format for every fully described md. ",0,train 3a86edf9a298a33f08936daffa97119e545fd706,tensorflow/tensorflow,review changes.,onednn_env_vars.cc,"@@ -1,4 +1,4 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the ""License""); you may not use this file except in compliance with the License. @@ -15,7 +15,9 @@ limitations under the License. #ifdef INTEL_MKL -#include ""tensorflow/core/util/mkl_util.h"" +#include ""absl/base/call_once.h"" +#include ""tensorflow/core/util/onednn_env_vars.h"" +#include ""tensorflow/core/util/env_var.h"" namespace tensorflow { @@ -28,5 +30,16 @@ bool AreWeightsFrozen() { }); return weights_const; } + +bool UseSystemAlloc() { + static bool use_sys_alloc = false; + static absl::once_flag once; + absl::call_once(once, [&] { + TF_CHECK_OK(ReadBoolFromEnvVar(""TF_ONEDNN_USE_SYSTEM_ALLOCATOR"", + /*default_value*/ false, &use_sys_alloc)); + }); + return use_sys_alloc; +} + } // namespace tensorflow #endif // INTEL_MKL ",0,train 3a86edf9a298a33f08936daffa97119e545fd706,tensorflow/tensorflow,review changes.,onednn_env_vars.h,"@@ -0,0 +1,25 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_UTIL_ONEDNN_ENV_VARS_H_ +#define TENSORFLOW_CORE_UTIL_ONEDNN_ENV_VARS_H_ +#ifdef INTEL_MKL + +namespace tensorflow { + bool AreWeightsFrozen(); + bool UseSystemAlloc(); +} // namespace tensorflow +#endif // INTEL_MKL +#endif // TENSORFLOW_CORE_UTIL_ONEDNN_ENV_VARS_H_ ",0,train 29a587e4aac990c1529b3c4a3331e3945cfff0ff,tensorflow/tensorflow,"Improved support for variables PiperOrigin-RevId: 156646899",op_types.cc,"@@ -43,7 +43,7 @@ bool IsTranspose(const NodeDef& node) { bool IsVariable(const NodeDef& node) { const auto op = node.op(); return op == ""Variable"" || op == ""VariableV2"" || op == ""AutoReloadVariable"" || - op == ""VarHandleOp""; + op == ""VarHandleOp"" || op == ""TemporaryVariable""; } bool IsMerge(const NodeDef& node) { ",0,train 689f7a0b8468c8feec4d2a6db54bb6bc3759fbe2,tensorflow/tensorflow,"Fix #5946. Fix error in documentation about global_variables_initializer.",variables.py,"@@ -82,12 +82,12 @@ class Variable(object): ``` The most common initialization pattern is to use the convenience function - `global_variable_initializers()` to add an Op to the graph that initializes + `global_variables_initializer()` to add an Op to the graph that initializes all the variables. You then run that Op after launching the graph. ```python # Add an Op to initialize global variables. - init_op = tf.global_variable_initializers() + init_op = tf.global_variables_initializer() # Launch the graph in a session. with tf.Session() as sess: @@ -494,7 +494,7 @@ class Variable(object): ```python v = tf.Variable([1, 2]) - init = tf.global_variable_initializers() + init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) ",0,train 6ae13f689ff2c65691e790cd50a3c1c867ad00ac,tensorflow/tensorflow,"Migrate experimental_relax_shapes to reduce_retracing PiperOrigin-RevId: 438578345",benchmarks_test.py,"@@ -470,7 +470,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): num_iters, execution_mode=None): - @def_function.function(experimental_relax_shapes=True) + @def_function.function(reduce_retracing=True) def defun_matmul(m): return math_ops.matmul(m, m) ",0,train 188a2a87248c2d96140080542dd402ac517df59b,tensorflow/tensorflow,"Fix ShapeOp result type for scalar input ShapeOp returns empty 1-d tensor if the input is scalar. Also, allow folder to fold scalar inputs. TESTED with unit test PiperOrigin-RevId: 256571257",tf_ops.cc,"@@ -573,9 +573,7 @@ static LogicalResult Verify(ShapeOp op) { // The operand is a ranked tensor. if (resultType.hasStaticShape()) { if ((!rankedTensorType.getShape().empty() && - resultType.getDimSize(0) != rankedTensorType.getShape().size()) || - (rankedTensorType.getShape().empty() && - resultType.getDimSize(0) != 1)) + resultType.getDimSize(0) != rankedTensorType.getShape().size())) return op.emitOpError( ""requires dimension size of result to match rank of operand""); } @@ -597,7 +595,6 @@ OpFoldResult ShapeOp::fold(ArrayRef operands) { auto shape = rankedTensorType.getShape(); int rank = shape.size(); - if (rank == 0) return {}; Builder b(getContext()); auto elementType = getType().cast().getElementType(); ",0,train 8703a34f9d998ff33eae3124bd634e663aed252c,tensorflow/tensorflow,"Add a debug option to disable xla dumping. Rename xla_detailed_logging to disable both logging and dumping. PiperOrigin-RevId: 364414688 Change-Id: I6d1d4c394d13653e94be74005c45f744c1edceab",xla_compilation_cache.cc,"@@ -181,7 +181,7 @@ Status XlaCompilationCache::BuildExecutable( build_options.set_result_layout(result.xla_output_shape); build_options.set_device_allocator(options.device_allocator.get()); build_options.set_alias_passthrough_params(options.alias_passthrough_params); - build_options.mutable_debug_options()->set_xla_detailed_logging( + build_options.mutable_debug_options()->set_xla_detailed_logging_and_dumping( options.detailed_logging); TF_ASSIGN_OR_RETURN( auto executables, ",0,train 8703a34f9d998ff33eae3124bd634e663aed252c,tensorflow/tensorflow,"Add a debug option to disable xla dumping. Rename xla_detailed_logging to disable both logging and dumping. PiperOrigin-RevId: 364414688 Change-Id: I6d1d4c394d13653e94be74005c45f744c1edceab",debug_options_flags.cc,"@@ -76,7 +76,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() { opts.set_xla_cpu_enable_xprof_traceme(false); opts.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(false); opts.set_xla_multiheap_size_constraint_per_heap(-1); - opts.set_xla_detailed_logging(true); + opts.set_xla_detailed_logging_and_dumping(true); return opts; } ",0,train 8703a34f9d998ff33eae3124bd634e663aed252c,tensorflow/tensorflow,"Add a debug option to disable xla dumping. Rename xla_detailed_logging to disable both logging and dumping. PiperOrigin-RevId: 364414688 Change-Id: I6d1d4c394d13653e94be74005c45f744c1edceab",dump.cc,"@@ -67,6 +67,11 @@ struct CanonicalDebugOptions { dump_as_text = true; } + // Disable dumping if specified by the user. + if (!opts.xla_detailed_logging_and_dumping()) { + dump_to = """"; + } + // If dump_to is empty, default to dumping to stdout, so long as some dump // format other than dump-as-url was specified. If the user only specified // --xla_dump_hlo_as_url, then don't dump to stdout, that is likely noise @@ -110,7 +115,7 @@ struct CanonicalDebugOptions { // Output dirs ""sponge"" and ""test_undeclared_outputs_dir"" (case-insensitive) // have a special meaning: Dump into the directory specified by the // environment variable TEST_UNDECLARED_OUTPUTS_DIR. - string dump_to_lower = absl::AsciiStrToLower(opts.xla_dump_to()); + string dump_to_lower = absl::AsciiStrToLower(dump_to); if (dump_to_lower == ""sponge"" || dump_to_lower == ""test_undeclared_outputs_dir"") { if (!tensorflow::io::GetTestUndeclaredOutputsDir(&dump_to)) { ",0,train 055855171b52a3d284ded6a549ee5c6471d9a4c9,tensorflow/tensorflow,"Fix space_to_batch_converter on windows. PiperOrigin-RevId: 333398025 Change-Id: I4d713db2c910d462f1c70a1cd4979a9a3cfe0905",space_to_batch_converter.cc,"@@ -371,8 +371,8 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) { new_dim_numbers.set_output_feature_dimension(dim_count); int p = 0; - for (auto [k, v] : dim_map) { - transpose_dims[p] = v; + for (const auto& entry : dim_map) { + transpose_dims[p] = entry.second; p++; } ",0,train 0c2565de1108fc2063ec04b335ef7356298b7ad6,tensorflow/tensorflow,"Don't lower nested control flow if we're compiling to XLA. PiperOrigin-RevId: 227579512",cond_v2_test.py,"@@ -145,6 +145,22 @@ class CondV2Test(test.TestCase): self.assertEqual(cond_op.type, ""If"") return output, cond_op + def _createNestedCond(self, name): + """"""Like _createCond but creates a nested cond_v2 call as well."""""" + pred = constant_op.constant(True, name=""pred"") + x = constant_op.constant(1.0, name=""x"") + + def true_fn(): + return cond_v2.cond_v2(pred, lambda: x, lambda: x + 1) + + def false_fn(): + return x + 2 + + output = cond_v2.cond_v2(pred, true_fn, false_fn, name=name) + cond_op = output.op.inputs[0].op + self.assertEqual(cond_op.type, ""If"") + return output, cond_op + def testDefaultName(self): with ops.Graph().as_default(): _, cond_op = self._createCond(None) @@ -645,9 +661,14 @@ class CondV2Test(test.TestCase): # Build the cond_v2 in an XLA context xla_context = control_flow_ops.XLAControlFlowContext() xla_context.Enter() - cond_output, _ = self._createCond(""cond"") + cond_output, cond_op = self._createCond(""cond"") xla_context.Exit() + # Check lowering attr is not set. + with self.assertRaises(ValueError): + cond_op.get_attr(""_lower_using_switch_merge"") + + # Check the actual graph that is run. run_options = config_pb2.RunOptions(output_partition_graphs=True) run_metadata = config_pb2.RunMetadata() sess.run(cond_output, options=run_options, run_metadata=run_metadata) @@ -672,6 +693,29 @@ class CondV2Test(test.TestCase): if_found, ""An `If` op was not found, but the graph should not be lowered."") + @test_util.run_deprecated_v1 + def testNestedLoweringDisabledInXLA(self): + # Build the cond_v2 in an XLA context + xla_context = control_flow_ops.XLAControlFlowContext() + xla_context.Enter() + _, cond_op = self._createNestedCond(""cond"") + xla_context.Exit() + + # Check lowering attr is not set for either If node. + with self.assertRaises(ValueError): + cond_op.get_attr(""_lower_using_switch_merge"") + + nested_if_ops = [] + for func in ops.get_default_graph()._functions.values(): + nested_if_ops.extend(op for op in func._graph.get_operations() + if op.type == ""If"") + self.assertEqual(len(nested_if_ops), 1) + with self.assertRaises(ValueError): + nested_if_ops[0].get_attr(""_lower_using_switch_merge"") + + # TODO(skyewm): check the actual graphs that are run once we have a way to + # programmatically access those graphs. + @test_util.run_deprecated_v1 def testLoweringDisabledWithSingleThreadedExecutorContext(self): with self.session(graph=ops.Graph()) as sess: ",0,train 0c2565de1108fc2063ec04b335ef7356298b7ad6,tensorflow/tensorflow,"Don't lower nested control flow if we're compiling to XLA. PiperOrigin-RevId: 227579512",control_flow_util.py,"@@ -57,6 +57,15 @@ def InXlaContext(graph): return GetContainingXLAContext(ctxt) is not None +def GraphOrParentsInXlaContext(graph): + while True: + if InXlaContext(graph): return True + try: + graph = graph.outer_graph + except AttributeError: + return False + + def IsInWhileLoop(op): ctxt = op._get_control_flow_context() # pylint: disable=protected-access return GetContainingWhileContext(ctxt) is not None ",0,train 0c2565de1108fc2063ec04b335ef7356298b7ad6,tensorflow/tensorflow,"Don't lower nested control flow if we're compiling to XLA. PiperOrigin-RevId: 227579512",control_flow_util_v2.py,"@@ -114,7 +114,7 @@ def maybe_set_lowering_attr(op): Args: op: An `If` or `While` Operation. """""" - if (not control_flow_util.IsInXLAContext(op) and + if (not control_flow_util.GraphOrParentsInXlaContext(op.graph) and context.context().get_function_call_options().executor_type != ""SINGLE_THREADED_EXECUTOR""): # pylint: disable=protected-access ",0,train c412c22d19e1a198b9e0b2409f026ce742b15df6,tensorflow/tensorflow,"[MLIR][HLO] Hold symbolic and concrete factors together in `SymbolicProduct` Introduce struct `SymbolicProduct` to hold concrete and symbolic factors together. This is in preparation to merge shape collapsing and expanding, which requires in-depth analysis of symbolic products. PiperOrigin-RevId: 436787990",symbolic_shape_optimization.cc,"@@ -205,16 +205,13 @@ struct RemoveComputeReshapeShape final } }; -bool IsSimpleProduct( - AffineExpr expr, - llvm::function_ref cbkConstantFactor, - llvm::function_ref cbkSymbolicFactor) { +bool IsProduct(AffineExpr expr, + llvm::function_ref cbkConstantFactor, + llvm::function_ref cbkSymbolicFactor) { auto binExpr = expr.dyn_cast(); if (binExpr && binExpr.getKind() == AffineExprKind::Mul) { - return IsSimpleProduct(binExpr.getLHS(), cbkConstantFactor, - cbkSymbolicFactor) && - IsSimpleProduct(binExpr.getRHS(), cbkConstantFactor, - cbkSymbolicFactor); + return IsProduct(binExpr.getLHS(), cbkConstantFactor, cbkSymbolicFactor) && + IsProduct(binExpr.getRHS(), cbkConstantFactor, cbkSymbolicFactor); } if (auto symExpr = expr.dyn_cast()) { cbkSymbolicFactor(symExpr); @@ -227,10 +224,10 @@ bool IsSimpleProduct( return false; } -bool IsSimpleProduct(const SymbolicExpr &symbolicExpr, - llvm::function_ref cbkConstantFactor, - llvm::function_ref cbkSymbolicFactor) { - return IsSimpleProduct( +bool IsSymbolicProduct(const SymbolicExpr &symbolicExpr, + llvm::function_ref cbkConstantFactor, + llvm::function_ref cbkSymbolicFactor) { + return IsProduct( symbolicExpr.expr, [&](AffineConstantExpr cexpr) { cbkConstantFactor(cexpr.getValue()); }, [&](AffineSymbolExpr sexpr) { @@ -238,11 +235,21 @@ bool IsSimpleProduct(const SymbolicExpr &symbolicExpr, }); } -bool IsSimpleProduct(const SymbolicExpr &symbolicExpr, int64_t *concreteProduct, - SmallVectorImpl *symbolicFactors) { - return IsSimpleProduct( - symbolicExpr, [&](int64_t c) { *concreteProduct *= c; }, - [&](Symbol s) { symbolicFactors->push_back(s); }); +// Represents a product of symbolic and concrete factors. This will allow us to +// prove product equalities symbolically. +struct SymbolicProduct { + // Product of all concrete factors. + int64_t concrete = 1; + // List all symbolic factors as they can not be aggregated. + llvm::SmallVector symbolic; + bool empty() { return concrete == 1 && symbolic.empty(); } +}; + +bool IsSymbolicProduct(const SymbolicExpr &symbolicExpr, + SymbolicProduct *product) { + return IsSymbolicProduct( + symbolicExpr, [&](int64_t c) { product->concrete *= c; }, + [&](Symbol s) { product->symbolic.push_back(s); }); } struct RemoveRedundantCstrReshapable final @@ -278,13 +285,11 @@ struct RemoveRedundantCstrReshapable final // We can only handle simple products with constants and symbols. Find all // the factors based on the number of elements. - int64_t concreteProductNumElems = 1; - SmallVector remainingSymbolicFactorsNumElems; - if (!IsSimpleProduct(numElements, &concreteProductNumElems, - &remainingSymbolicFactorsNumElems)) { + SymbolicProduct numElementsRemainingFactors; + if (!IsSymbolicProduct(numElements, &numElementsRemainingFactors)) { return failure(); } - assert(concreteProductNumElems >= 1 && + assert(numElementsRemainingFactors.concrete >= 1 && ""number of elements cannot entail negative or zero factors""); // Find all factors based on the dynamic shape. @@ -296,7 +301,7 @@ struct RemoveRedundantCstrReshapable final int64_t concreteProductDynShape = 1; for (const auto &dim : *dynShapeDims) { SmallVector partialSymbolicFactorsDynShape; - if (!IsSimpleProduct( + if (!IsSymbolicProduct( dim, [&](int64_t c) { if (c != -1) concreteProductDynShape *= c; @@ -305,9 +310,10 @@ struct RemoveRedundantCstrReshapable final return failure(); } for (const Symbol &symDynShape : partialSymbolicFactorsDynShape) { - auto *it = llvm::find(remainingSymbolicFactorsNumElems, symDynShape); - if (it == remainingSymbolicFactorsNumElems.end()) return failure(); - remainingSymbolicFactorsNumElems.erase(it); + auto *it = + llvm::find(numElementsRemainingFactors.symbolic, symDynShape); + if (it == numElementsRemainingFactors.symbolic.end()) return failure(); + numElementsRemainingFactors.symbolic.erase(it); } } assert(concreteProductDynShape >= 1 && @@ -316,15 +322,16 @@ struct RemoveRedundantCstrReshapable final // A wildcard dimension can subsume the remaining symbolic factors and // potentially also a concrete factor. if (unique_wildcard_dimension) { - if (concreteProductNumElems % concreteProductDynShape != 0) + if (numElementsRemainingFactors.concrete % concreteProductDynShape != 0) return failure(); rewriter.replaceOpWithNewOp(op, true); return success(); } // W/o a wildcard, the symbolic and concrete products must be equal. - bool isReshapable = remainingSymbolicFactorsNumElems.empty() && - concreteProductNumElems == concreteProductDynShape; + bool isReshapable = + numElementsRemainingFactors.symbolic.empty() && + numElementsRemainingFactors.concrete == concreteProductDynShape; rewriter.replaceOpWithNewOp(op, isReshapable); return success(); } @@ -359,42 +366,42 @@ struct TurnDynamicReshapeIntoCollapseShape final // Find the concrete/symbolic factors for the current dimension of the // target shape. - int64_t remainingConcreteProductShapeDim = 1; - SmallVector remainingSymbolicFactorsShapeDim; - if (!IsSimpleProduct(shapeDim, &remainingConcreteProductShapeDim, - &remainingSymbolicFactorsShapeDim)) { + SymbolicProduct remainingFactorsShapeDim; + if (!IsSymbolicProduct(shapeDim, &remainingFactorsShapeDim)) { return failure(); } // Consume (and collapse) as many of the operand dimensions as needed to // match the target dimension. This is monotonic. - while (remainingConcreteProductShapeDim != 1 || - !remainingSymbolicFactorsShapeDim.empty()) { + while (!remainingFactorsShapeDim.empty()) { // Fail if there are no more operand dimensions to consume. if (i >= argShapeInfo->size()) return failure(); // Find the concrete/symbolic factors for the next dimension of the // operand shape. - int64_t concreteProductArgShapeDim = 1; - SmallVector symbolicFactorsArgShapeDim; - if (!IsSimpleProduct((*argShapeInfo)[i], &concreteProductArgShapeDim, - &symbolicFactorsArgShapeDim)) { + SymbolicProduct remainingFactorsArgShapeDim; + if (!IsSymbolicProduct((*argShapeInfo)[i], + &remainingFactorsArgShapeDim)) { return failure(); } // Eliminate the common concrete factors. Fail if we cannot consume a // concrete factor of the operand shape. - if (remainingConcreteProductShapeDim % concreteProductArgShapeDim != 0) + if (remainingFactorsShapeDim.concrete % + remainingFactorsArgShapeDim.concrete != + 0) return failure(); - remainingConcreteProductShapeDim /= concreteProductArgShapeDim; + remainingFactorsShapeDim.concrete /= + remainingFactorsArgShapeDim.concrete; // Eliminate the common symbolic factors. Fail if we cannot consume a // symbolic factor of the operand shape. - for (const Symbol &symArgShapeDim : symbolicFactorsArgShapeDim) { + for (const Symbol &symArgShapeDim : + remainingFactorsArgShapeDim.symbolic) { auto *it = - llvm::find(remainingSymbolicFactorsShapeDim, symArgShapeDim); - if (it == remainingSymbolicFactorsShapeDim.end()) return failure(); - remainingSymbolicFactorsShapeDim.erase(it); + llvm::find(remainingFactorsShapeDim.symbolic, symArgShapeDim); + if (it == remainingFactorsShapeDim.symbolic.end()) return failure(); + remainingFactorsShapeDim.symbolic.erase(it); } // If all the concrete/symbolic factors were consumable, collapse this ",0,train ea95f4a740725e9b7f864ef5766772c64dbbaf0d,tensorflow/tensorflow,"TPUEstimator: Fix shutdown behavior after preemption. PiperOrigin-RevId: 299005210 Change-Id: I925e5308c8470dce0e071361441266856278a391",session_support.py,"@@ -418,7 +418,7 @@ class ResetComputation(object): def __call__(self, run_context, all_workers, lame_workers): del run_context, lame_workers - all_workers.shutdown() + all_workers.shutdown(exit_code=42) logging.info('Resetting coordinator.') raise CoordinatorResetError() @@ -435,7 +435,7 @@ class ShutdownLameWorkers(object): pass def __call__(self, run_context, all_workers, lame_workers): - lame_workers.shutdown() + lame_workers.shutdown(exit_code=42) class ShutdownAllWorkers(object): @@ -449,4 +449,4 @@ class ShutdownAllWorkers(object): pass def __call__(self, run_context, all_workers, lame_workers): - all_workers.shutdown() + all_workers.shutdown(exit_code=42) ",0,test 9aa32a6eacd0e8f507d1c57f0658d6c3ecaecaba,tensorflow/tensorflow,"Enable mixing value tensors (eager tensors or numpy arrays) and Keras symbolic tensors when building Keras graphs-of-layers in an eager scope. In these cases, the value tensors are treated as symbolic constants. This enables the following pattern to work in the same way in both V1 and V2: ``` lstm = LSTM(2) inputs = keras.Input((None, 3)) outputs = lstm(inputs, initial_state=tf.ones(shape)) ``` (without this change, the above code works in V1 but fails in V2 with an artificial exception). Known issue: in case a random tensor is used, there is a (usually harmless) behavior discrepancy remaining between V1 and V2, which is that in V2 we'd be using the same random value every time, whereas in V1 we'd be drawing new random values (since the tensor would be treated as a random op and not as a constant). We think this is not a problem because in V2 users should have the mental model ""tensors are values"" and thus would be expecting a random tensor to behave like a constant value and not like a random generator. PiperOrigin-RevId: 224915621",execute.py,"@@ -66,12 +66,6 @@ def quick_execute(op_name, num_outputs, inputs, attrs, ctx, name=None): six.raise_from(core._status_to_exception(e.code, message), None) except TypeError as e: if any(ops._is_keras_symbolic_tensor(x) for x in inputs): - if any(isinstance(x, ops.EagerTensor) for x in inputs): - raise TypeError(""You are attempting to mix computation of symbolic "" - ""Tensors (computation rooted at tf.keras.Input()) "" - ""and concrete values. This is not supported. "" - ""If you need this support, file an issue on the "" - ""TensorFlow GitHub repository."") raise core._SymbolicException raise e # pylint: enable=protected-access ",0,train 9aa32a6eacd0e8f507d1c57f0658d6c3ecaecaba,tensorflow/tensorflow,"Enable mixing value tensors (eager tensors or numpy arrays) and Keras symbolic tensors when building Keras graphs-of-layers in an eager scope. In these cases, the value tensors are treated as symbolic constants. This enables the following pattern to work in the same way in both V1 and V2: ``` lstm = LSTM(2) inputs = keras.Input((None, 3)) outputs = lstm(inputs, initial_state=tf.ones(shape)) ``` (without this change, the above code works in V1 but fails in V2 with an artificial exception). Known issue: in case a random tensor is used, there is a (usually harmless) behavior discrepancy remaining between V1 and V2, which is that in V2 we'd be using the same random value every time, whereas in V1 we'd be drawing new random values (since the tensor would be treated as a random op and not as a constant). We think this is not a problem because in V2 users should have the mental model ""tensors are values"" and thus would be expecting a random tensor to behave like a constant value and not like a random generator. PiperOrigin-RevId: 224915621",base_layer_test.py,"@@ -167,19 +167,26 @@ class BaseLayerTest(test.TestCase): def test_mixing_keras_symbolic_tensors_and_eager_tensors(self): x1 = keras.Input((3,)) x2 = array_ops.ones((3, 3)) - with self.assertRaisesRegexp( - TypeError, - 'mix computation of symbolic Tensors'): - math_ops.matmul(x1, x2) + y = math_ops.matmul(x1, x2) + self.assertEqual(y.graph, keras.backend.get_graph()) + fn = keras.backend.function(inputs=[x1], outputs=[y]) + x_val = np.random.random((3, 3)) + y_val = np.ones((3, 3)) + self.assertAllClose(fn([x_val])[0], + np.matmul(x_val, y_val), + atol=1e-5) def test_mixing_keras_symbolic_tensors_and_numpy_arrays(self): - # For the time being we treat Numpy arrays as EagerTensors when mixing both. x1 = keras.Input((3,)) x2 = np.ones((3, 3), dtype='float32') - with self.assertRaisesRegexp( - TypeError, - 'mix computation of symbolic Tensors'): - math_ops.matmul(x1, x2) + y = math_ops.matmul(x1, x2) + self.assertEqual(y.graph, keras.backend.get_graph()) + fn = keras.backend.function(inputs=[x1], outputs=[y]) + x_val = np.random.random((3, 3)) + y_val = np.ones((3, 3)) + self.assertAllClose(fn([x_val])[0], + np.matmul(x_val, y_val), + atol=1e-5) if __name__ == '__main__': ",0,train aef53d7fe63191dc3adb3efd417c2054e3addc3e,tensorflow/tensorflow,deleted extraneous comment,c_api_unified_experimental_test.cc,"@@ -117,7 +117,7 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul) { float vals [] = {0.0f,0.0f,0.0f,0.0f}; TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx,status.get()); - TFE_TensorHandle* t = TestMatrixTensorHandleWithInput(eager_ctx, vals, dims,num_dims); //, dims[0],dims[1]); + TFE_TensorHandle* t = TestMatrixTensorHandleWithInput(eager_ctx, vals, dims,num_dims); TF_AbstractTensor* at = TF_CreateAbstractTensorFromEagerTensor(t, status.get()); // get abstract tensor ",0,train cdb6e80b21997c2b24336eb524134198fa6754d8,tensorflow/tensorflow,"Qualify uses of std::string PiperOrigin-RevId: 316789814 Change-Id: Ice83a74e70122008e090af3b818b9920abf7f5bc",cluster.h,"@@ -47,7 +47,7 @@ class Cluster { // 2- All the nodes in GraphDef which belong to this cluster. void SetGraphDefInfo(const tensorflow::GraphDef* graph_def); - const string& GetName() const { return name_; } + const std::string& GetName() const { return name_; } const std::vector>& GetNewNodes() const { return new_nodes_; @@ -55,18 +55,18 @@ class Cluster { const std::vector& GetNodes() { return nodes_; } - void SetName(const string& name) { name_ = name; } + void SetName(const std::string& name) { name_ = name; } - void SetDevice(const string& device) { device_ = device; } + void SetDevice(const std::string& device) { device_ = device; } // Find the input(s) and output(s) of this Cluster. bool FindClusterInputsAndOutputs(); protected: - string name_; - string device_; - std::vector inputs_; - std::vector outputs_; + std::string name_; + std::string device_; + std::vector inputs_; + std::vector outputs_; // Used to hold the pointers to nodes which are in this cluster. These nodes // are pointing to the nodes in graph_def_. ",0,train cdb6e80b21997c2b24336eb524134198fa6754d8,tensorflow/tensorflow,"Qualify uses of std::string PiperOrigin-RevId: 316789814 Change-Id: Ice83a74e70122008e090af3b818b9920abf7f5bc",cluster_utils.cc,"@@ -16,8 +16,8 @@ limitations under the License. #include ""tensorflow/lite/toco/toco_types.h"" namespace toco { -bool StrContains(const string& x, const string& search_pattern) { - return x.find(search_pattern) != string::npos; +bool StrContains(const std::string& x, const std::string& search_pattern) { + return x.find(search_pattern) != std::string::npos; } void Transpose2DTensor(const float* tensor, int row, int col, ",0,train cdb6e80b21997c2b24336eb524134198fa6754d8,tensorflow/tensorflow,"Qualify uses of std::string PiperOrigin-RevId: 316789814 Change-Id: Ice83a74e70122008e090af3b818b9920abf7f5bc",resolve_cluster.cc,"@@ -33,7 +33,8 @@ using tensorflow::GraphDef; using tensorflow::NodeDef; void AddNodeToGraph(const NodeDef& node, - const std::vector& cluster_names, GraphDef* graph) { + const std::vector& cluster_names, + GraphDef* graph) { NodeDef* new_node = graph->add_node(); new_node->set_op(node.op()); new_node->set_name(node.name()); @@ -41,9 +42,9 @@ void AddNodeToGraph(const NodeDef& node, // If the inputs are coming from a node which belongs to another cluster, then // those inputs are renamed to the source cluster name. Otherwise the original // input name is used. - for (const string& node_input : node.input()) { + for (const std::string& node_input : node.input()) { bool input_from_cluster = false; - for (const string& cluster_name : cluster_names) { + for (const std::string& cluster_name : cluster_names) { if (StrContains(node_input, cluster_name) && !StrContains(node.name(), cluster_name)) { new_node->add_input(cluster_name); @@ -62,7 +63,7 @@ void AddNodeToGraph(const NodeDef& node, bool FindCluster(const ClusterFactoryInterface& cluster_factory, const GraphDef& graph_def, - std::unordered_map* is_node_in_cluster, + std::unordered_map* is_node_in_cluster, std::vector>* clusters) { for (const NodeDef& node : graph_def.node()) { // If the node is not assigned to any cluster, then we check if it belong to @@ -90,12 +91,12 @@ std::unique_ptr MaybeResolveClusters( std::unique_ptr pruned_graph(new GraphDef); // The structure to keep track of which cluster each node is assigned to, and // to initialize them to all un-assigned, - std::unordered_map is_node_in_cluster; + std::unordered_map is_node_in_cluster; for (const NodeDef& node : graph_def.node()) { is_node_in_cluster[node.name()] = false; } - std::vector cluster_names; + std::vector cluster_names; std::vector> all_clusters; // Find the clusters for all available cluster factories. for (const ClusterFactoryInterface* cluster_factory : cluster_factories) { ",0,train cdb6e80b21997c2b24336eb524134198fa6754d8,tensorflow/tensorflow,"Qualify uses of std::string PiperOrigin-RevId: 316789814 Change-Id: Ice83a74e70122008e090af3b818b9920abf7f5bc",resolve_cluster.h,"@@ -40,7 +40,7 @@ std::unique_ptr MaybeResolveClusters( // belongs to another cluster, then those inputs are renamed to the source // cluster name. void AddNodeToGraph(const tensorflow::NodeDef& node, - const std::vector& cluster_names, + const std::vector& cluster_names, tensorflow::GraphDef* graph); // Given a graph and a cluster class, it finds all the nodes which belong to a @@ -49,7 +49,7 @@ void AddNodeToGraph(const tensorflow::NodeDef& node, // they belong to the generated clusters. bool FindCluster(const ClusterFactoryInterface& cluster_factory, const tensorflow::GraphDef& graph_def, - std::unordered_map* is_node_in_cluster, + std::unordered_map* is_node_in_cluster, std::vector>* clusters); // Receives a graph and generates another graph by replacing the cluster of ",0,train cdb6e80b21997c2b24336eb524134198fa6754d8,tensorflow/tensorflow,"Qualify uses of std::string PiperOrigin-RevId: 316789814 Change-Id: Ice83a74e70122008e090af3b818b9920abf7f5bc",resolve_svdf.cc,"@@ -47,11 +47,11 @@ namespace { // Since these nodes are connected to a Concatenate node, it makes sure the // axis value input of the Concatenate operator is 0. void FilterPartitionedConstNodes( - const string& const_pattern, + const std::string& const_pattern, const std::vector& cluster_nodes, std::vector* const_node_parts) { for (const NodeDef* node : cluster_nodes) { - string node_name_to_upper = node->name(); + std::string node_name_to_upper = node->name(); std::transform(node_name_to_upper.begin(), node_name_to_upper.end(), node_name_to_upper.begin(), ::toupper); if (StrContains(node->name(), const_pattern) && node->op() == ""Const"") { @@ -97,7 +97,7 @@ int SvdfCluster::InferFilterRank() { } void SvdfCluster::CreateNodes() { - for (const string& const_pattern : const_node_patterns_) { + for (const std::string& const_pattern : const_node_patterns_) { CreateConstNode(const_pattern); } std::unique_ptr svdf_node(new NodeDef); @@ -110,14 +110,14 @@ void SvdfCluster::CreateNodes() { // Add the rest of the inputs to Svdf cell: weights and bias. CHECK(new_nodes_.size() == 3 || new_nodes_.size() == 2); - string* weights_feature_input = svdf_node->add_input(); - string* weights_time_input = svdf_node->add_input(); - string* bias_input; + std::string* weights_feature_input = svdf_node->add_input(); + std::string* weights_time_input = svdf_node->add_input(); + std::string* bias_input; if (new_nodes_.size() == 3) { bias_input = svdf_node->add_input(); } for (const std::unique_ptr& node : new_nodes_) { - const string node_name = node->name(); + const std::string node_name = node->name(); if (StrContains(node_name, ""SVDF_weights_feature"")) { *weights_feature_input = node_name; } else if (StrContains(node_name, ""SVDF_weights_time"")) { @@ -136,7 +136,7 @@ void SvdfCluster::CreateNodes() { CHECK_GT(rank, 0); // Add Svdf activation and rank. - string activation_function = + std::string activation_function = StrContains(outputs_[0], ""Relu"") ? ""Relu"" : ""None""; (*svdf_node->mutable_attr())[""ActivationFunction""].set_s(activation_function); (*svdf_node->mutable_attr())[""Rank""].set_i(rank); @@ -145,7 +145,7 @@ void SvdfCluster::CreateNodes() { new_nodes_.push_back(std::move(svdf_node)); } -void SvdfCluster::CreateConstNode(const string& const_pattern) { +void SvdfCluster::CreateConstNode(const std::string& const_pattern) { // Find the nodes with pattern like: ""const_pattern""/part_xxx of type Const. std::vector const_node_parts; FilterPartitionedConstNodes(const_pattern, nodes_, &const_node_parts); @@ -236,15 +236,15 @@ void SvdfCluster::MaybeMergeConstNodes( // Set the tensor attributes. allocated_tensor->set_tensor_content( - string(reinterpret_cast(transposed_tensor.get()), - allocated_content_flat_size)); + std::string(reinterpret_cast(transposed_tensor.get()), + allocated_content_flat_size)); } else { tensor_shape_dim0->set_size(dim0_size); // Set the tensor attributes. allocated_tensor->set_tensor_content( - string(reinterpret_cast(allocated_content.get()), - allocated_content_flat_size)); + std::string(reinterpret_cast(allocated_content.get()), + allocated_content_flat_size)); } } @@ -252,21 +252,21 @@ void SvdfCluster::MaybeMergeConstNodes( std::unique_ptr SvdfClusterFactory::CreateCluster( const NodeDef& node, const GraphDef& graph_def) const { - std::vector node_patterns = {""SVDF_weights_feature"", - ""SVDF_weights_time"", ""SVDF_bias""}; + std::vector node_patterns = {""SVDF_weights_feature"", + ""SVDF_weights_time"", ""SVDF_bias""}; - string node_name_to_upper = node.name(); + std::string node_name_to_upper = node.name(); std::transform(node_name_to_upper.begin(), node_name_to_upper.end(), node_name_to_upper.begin(), ::toupper); std::unique_ptr cluster = nullptr; - if (node_name_to_upper.find(""SVDF"", 0) != string::npos) { + if (node_name_to_upper.find(""SVDF"", 0) != std::string::npos) { size_t weights_pos = node.name().find(node_patterns[0]); - if (weights_pos != string::npos) { + if (weights_pos != std::string::npos) { // Assuming the node name has a pattern like: // ""SOMESTRING1/CELLNAME/SEARCH_PATTERN/SOMESTRING2"", we use // CELLNAME as the cluster name. size_t cell_pos = node.name().rfind(""/"", weights_pos - 2) + 1; - string cell_name = + std::string cell_name = node.name().substr(cell_pos, weights_pos - cell_pos - 1); cluster = std::unique_ptr(new SvdfCluster); cluster->SetName(cell_name); @@ -274,7 +274,7 @@ std::unique_ptr SvdfClusterFactory::CreateCluster( cluster->SetGraphDefInfo(&graph_def); CHECK(cluster->FindClusterInputsAndOutputs()); - for (const string& const_pattern : node_patterns) { + for (const std::string& const_pattern : node_patterns) { cluster->AddConstNodePattern(const_pattern); } } ",0,train cdb6e80b21997c2b24336eb524134198fa6754d8,tensorflow/tensorflow,"Qualify uses of std::string PiperOrigin-RevId: 316789814 Change-Id: Ice83a74e70122008e090af3b818b9920abf7f5bc",resolve_svdf.h,"@@ -36,7 +36,7 @@ class SvdfCluster : public Cluster { // A helper function to set the pattern of Const nodes which CreateNodes() // should handle specially. - void AddConstNodePattern(const string& const_pattern) { + void AddConstNodePattern(const std::string& const_pattern) { const_node_patterns_.push_back(const_pattern); } @@ -46,7 +46,7 @@ class SvdfCluster : public Cluster { // The main function which is used to create Const nodes for this cluster. // These Const nodes are the inputs to the composite op generated for this // cluster. - void CreateConstNode(const string& const_pattern); + void CreateConstNode(const std::string& const_pattern); // Receives a vector of Const nodes, merge them (if necessary) and returns // only one Const node holding all the arrays contents. It transposes it if @@ -61,7 +61,7 @@ class SvdfCluster : public Cluster { // shape to [num_units, rank, batch] shape. The 2nd shape element is rank. int InferFilterRank(); - std::vector const_node_patterns_; + std::vector const_node_patterns_; }; class SvdfClusterFactory : public ClusterFactoryInterface { ",0,train cdb6e80b21997c2b24336eb524134198fa6754d8,tensorflow/tensorflow,"Qualify uses of std::string PiperOrigin-RevId: 316789814 Change-Id: Ice83a74e70122008e090af3b818b9920abf7f5bc",resolve_svdf_test.cc,"@@ -77,8 +77,8 @@ class ResolveSvdfTest : public ::testing::Test { ~ResolveSvdfTest() override {} protected: - void AddNewNode(const string& name, const string& op, - const std::vector& inputs) { + void AddNewNode(const std::string& name, const std::string& op, + const std::vector& inputs) { NodeDef* node = graph_.add_node(); node->set_name(name); node->set_op(op); @@ -89,8 +89,8 @@ class ResolveSvdfTest : public ::testing::Test { } } - void AddNewNode(const string& name, const string& op, - const std::vector& inputs, + void AddNewNode(const std::string& name, const std::string& op, + const std::vector& inputs, const std::vector& values) { NodeDef* node = graph_.add_node(); node->set_name(name); @@ -109,12 +109,12 @@ class ResolveSvdfTest : public ::testing::Test { tensor_shape_dim0->set_size(values.size()); allocated_tensor->set_allocated_tensor_shape(allocated_tensor_shape); allocated_tensor->set_tensor_content( - string(reinterpret_cast(values.data()), - values.size() * sizeof(float))); + std::string(reinterpret_cast(values.data()), + values.size() * sizeof(float))); (*node->mutable_attr())[""value""].set_allocated_tensor(allocated_tensor); } - void AddShapeNode(const string& name, const std::vector& values) { + void AddShapeNode(const std::string& name, const std::vector& values) { NodeDef* node = graph_.add_node(); node->set_name(name); node->set_op(""Const""); @@ -128,8 +128,8 @@ class ResolveSvdfTest : public ::testing::Test { tensor_shape_dim0->set_size(values.size()); allocated_tensor->set_allocated_tensor_shape(allocated_tensor_shape); allocated_tensor->set_tensor_content( - string(reinterpret_cast(values.data()), - values.size() * sizeof(int))); + std::string(reinterpret_cast(values.data()), + values.size() * sizeof(int))); (*node->mutable_attr())[""value""].set_allocated_tensor(allocated_tensor); } @@ -157,12 +157,12 @@ TEST_F(ResolveSvdfTest, TestTranspose2DTensor) { } TEST_F(ResolveSvdfTest, TestResolveSvdfFlow) { - std::unordered_map is_node_in_cluster; + std::unordered_map is_node_in_cluster; for (const NodeDef& node : graph_.node()) { is_node_in_cluster[node.name()] = false; } - std::vector cluster_names; + std::vector cluster_names; CHECK(FindCluster(svdf_cluster_factory_, graph_, &is_node_in_cluster, &clusters_)); @@ -174,7 +174,7 @@ TEST_F(ResolveSvdfTest, TestResolveSvdfFlow) { EXPECT_THAT(cluster_names, testing::UnorderedElementsAreArray({""Svdf1"", ""Svdf2""})); - std::vector new_node_names; + std::vector new_node_names; std::vector content_array(3); for (const std::unique_ptr& cluster : clusters_) { // After CreateNodes in each cluster we have three nodes: Svdf, ",0,train 9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs(). This is a preparation step to remove getAttrs() from OpState. PiperOrigin-RevId: 360159716 Change-Id: I185103ded7c111c19f9a3177514221230469e22d",hlo_legalize_to_lhlo.cc,"@@ -424,7 +424,7 @@ struct HloToLhloReduceOpConverter : public BaseOpConversion { buffer_args.push_back(InsertAlloc(loc, result, &rewriter)); } auto new_op = rewriter.create(loc, llvm::None, buffer_args, - op.getAttrs()); + op->getAttrs()); // Copy over the operations inside the region. rewriter.inlineRegionBefore(op.body(), new_op.body(), new_op.body().end()); ",0,train 9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs(). This is a preparation step to remove getAttrs() from OpState. PiperOrigin-RevId: 360159716 Change-Id: I185103ded7c111c19f9a3177514221230469e22d",lower_static_tensor_list.cc,"@@ -816,7 +816,7 @@ struct ConvertIdentity : public OpConversionPattern { ConversionPatternRewriter &rewriter) const override { Value input = operands[0]; rewriter.replaceOpWithNewOp(op, input.getType(), operands, - op.getAttrs()); + op->getAttrs()); return success(); } }; @@ -948,7 +948,7 @@ struct ConvertWhile : public OpConversionPattern { // Create a new while op with new operands and updated result types. auto converted = rewriter.create(op.getLoc(), result_types, - operands, op.getAttrs()); + operands, op->getAttrs()); converted.removeAttr(""T""); (void)UpdateFunctionTypes(rewriter, converted, tensor_list_args); @@ -972,7 +972,7 @@ struct ConvertWhileRegion : public OpConversionPattern { // Create a new while op with new operands and updated result types. auto converted = rewriter.create( - op.getLoc(), result_types, operands, op.getAttrs()); + op.getLoc(), result_types, operands, op->getAttrs()); // Inline the regions from the old while into the new one, and apply // signature conversion to inlined region. ",0,train 9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs(). This is a preparation step to remove getAttrs() from OpState. PiperOrigin-RevId: 360159716 Change-Id: I185103ded7c111c19f9a3177514221230469e22d",while_loop_outline.cc,"@@ -254,7 +254,7 @@ void WhileOutlinePass::OutlineWhile(WhileOp while_op) { new_types.push_back(extra_operand.getType()); auto new_while_op = OpBuilder(while_op).create( - while_op.getLoc(), new_types, operands, while_op.getAttrs()); + while_op.getLoc(), new_types, operands, while_op->getAttrs()); new_while_op.cond().takeBody(while_op.cond()); new_while_op.body().takeBody(while_op.body()); while_op.replaceAllUsesWith( ",0,train 9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs(). This is a preparation step to remove getAttrs() from OpState. PiperOrigin-RevId: 360159716 Change-Id: I185103ded7c111c19f9a3177514221230469e22d",tf_executor.cc,"@@ -213,7 +213,7 @@ LogicalResult Verify(GraphOp graph) { void Print(GraphOp graph, OpAsmPrinter &p) { p << graph.getOperationName(); p.printRegion(graph.getOperation()->getRegion(0)); - p.printOptionalAttrDict(graph.getAttrs()); + p.printOptionalAttrDict(graph->getAttrs()); } ParseResult ParseGraphOp(OpAsmParser &parser, OperationState &result) { @@ -321,7 +321,7 @@ void Print(IslandOp op, OpAsmPrinter &p) { // Check if we can print the short ""wraps"" form: that is if the island // contains a single operation and the result of this operation are perfectly // forwarded to the yield. - if (op.getAttrs().empty() && op.WrapsSingleOp()) { + if (op->getAttrs().empty() && op.WrapsSingleOp()) { Operation &wrapped_op = op.GetBody().front(); YieldOp yield_op = op.GetYield(); // The ""wraps"" syntax only encodes a single location. @@ -335,7 +335,7 @@ void Print(IslandOp op, OpAsmPrinter &p) { } } p.printRegion(op.getOperation()->getRegion(0)); - p.printOptionalAttrDict(op.getAttrs()); + p.printOptionalAttrDict(op->getAttrs()); } ParseResult ParseIslandOp(OpAsmParser &parser, OperationState &result) { @@ -449,7 +449,7 @@ void Print(SwitchOp switch_op, OpAsmPrinter &p) { } else { p << switch_op.getType(0); } - p.printOptionalAttrDict(switch_op.getAttrs()); + p.printOptionalAttrDict(switch_op->getAttrs()); } } // anonymous namespace @@ -525,7 +525,7 @@ void Print(SwitchNOp switchn, OpAsmPrinter &p) { p << "")""; } p << "" : "" << switchn.getType(0); - p.printOptionalAttrDict(switchn.getAttrs(), {""num_outs""}); + p.printOptionalAttrDict(switchn->getAttrs(), {""num_outs""}); } ParseResult ParseSwitchNOp(OpAsmParser &parser, OperationState &result) { @@ -655,7 +655,7 @@ void Print(MergeOp merge, OpAsmPrinter &p) { p << output_type; } - p.printOptionalAttrDict(merge.getAttrs()); + p.printOptionalAttrDict(merge->getAttrs()); } ParseResult ParseMergeOp(OpAsmParser &parser, OperationState &result) { @@ -723,7 +723,7 @@ void Print(EnterOp enter, OpAsmPrinter &p) { p << enter.getType(0); } - p.printOptionalAttrDict(enter.getAttrs(), + p.printOptionalAttrDict(enter->getAttrs(), {""frame_name"", ""parallel_iterations"", ""is_constant""}); } @@ -843,7 +843,7 @@ void Print(ExitOp exit, OpAsmPrinter &p) { p << exit.getOperationName() << ' '; p.printOperands(exit.getOperands()); p << "" : "" << exit.getType(0); - p.printOptionalAttrDict(exit.getAttrs()); + p.printOptionalAttrDict(exit->getAttrs()); } ParseResult ParseExitOp(OpAsmParser &parser, OperationState &result) { @@ -887,7 +887,7 @@ void Print(LoopCondOp loop_cond, OpAsmPrinter &p) { p << "" : "" << loop_cond.input().getType(); } - p.printOptionalAttrDict(loop_cond.getAttrs()); + p.printOptionalAttrDict(loop_cond->getAttrs()); } ParseResult ParseLoopCondOp(OpAsmParser &parser, OperationState &result) { ",0,train 9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs(). This is a preparation step to remove getAttrs() from OpState. PiperOrigin-RevId: 360159716 Change-Id: I185103ded7c111c19f9a3177514221230469e22d",fused_kernel_matcher.cc,"@@ -156,7 +156,7 @@ class FuseContractionWithBiasAdd : public OpRewritePattern { // The fused contraction has the same attributes as the original // contraction, with two additions: the list of ops which have been fused // together; epsilon (only with FusedBatchNorm). - std::vector attrs = contraction.getAttrs(); + std::vector attrs = contraction->getAttrs(); ArrayAttr fused_ops_attr = ArrayAttr::get(context, fused_ops); attrs.push_back( NamedAttribute(Identifier::get(""fused_ops"", context), fused_ops_attr)); ",0,train 9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs(). This is a preparation step to remove getAttrs() from OpState. PiperOrigin-RevId: 360159716 Change-Id: I185103ded7c111c19f9a3177514221230469e22d",gpu_fusion.cc,"@@ -96,7 +96,7 @@ struct ReluToFusedBatchNorm : public OpRewritePattern { state.addOperands(batch_norm.getOperands()); if (side_input) state.operands.push_back(side_input); state.addTypes(batch_norm.getResultTypes()); - state.addAttributes(batch_norm.getAttrs()); + state.addAttributes(batch_norm->getAttrs()); Operation *op = rewriter.createOperation(state); rewriter.replaceOp(batch_norm, op->getResults()); ",0,train 9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs(). This is a preparation step to remove getAttrs() from OpState. PiperOrigin-RevId: 360159716 Change-Id: I185103ded7c111c19f9a3177514221230469e22d",resource_op_lifting.cc,"@@ -931,7 +931,7 @@ LogicalResult HandleWhileLoop(TF::WhileOp while_op, FuncOp body, FuncOp cond) { while_op.getLoc(), body.getType().getResults(), FilterRange(while_op.getOperands(), resource_arg_uses), - while_op.getAttrs()); + while_op->getAttrs()); // Prepare for AddLoadsStoresOutsideControlFlowOp(). llvm::SmallDenseMap> arg_data_type_and_updated_output_index; @@ -1035,7 +1035,7 @@ LogicalResult HandleCaseOrIfOp(CaseOrIfOp op, ArrayRef branches) { FuncOp first_func = branches.front(); auto new_op = builder.create(op.getLoc(), first_func.getType().getResults(), - new_operands, op.getAttrs()); + new_operands, op->getAttrs()); // Prepare for AddLoadsStoresOutsideControlFlowOp() llvm::SmallDenseMap> arg_data_type_and_updated_output_index; @@ -1179,7 +1179,7 @@ void UpdatePartitionedCallOpWithNewCallee( FilterRange(call_op.args(), lifting_info.use_info); auto new_call = builder.create( call_op.getLoc(), lifting_info.lifted_callee.getType().getResults(), - new_operands, call_op.getAttrs()); + new_operands, call_op->getAttrs()); new_call->setAttr( ""f"", builder.getSymbolRefAttr(lifting_info.lifted_callee.getName())); AddLoadsStoresOutsideControlFlowOp( ",0,train 9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs(). This is a preparation step to remove getAttrs() from OpState. PiperOrigin-RevId: 360159716 Change-Id: I185103ded7c111c19f9a3177514221230469e22d",stack_ops_decomposition.cc,"@@ -204,7 +204,7 @@ LogicalResult HandleWhileOp( } auto new_while = builder.create(while_op.getLoc(), body.getType().getInputs(), - new_while_operands, while_op.getAttrs()); + new_while_operands, while_op->getAttrs()); for (int64_t i = 0; i < while_op.getNumResults(); ++i) { if (!getElementTypeOrSelf(while_op.getOperand(i).getType()) .isa()) { @@ -257,7 +257,7 @@ LogicalResult HandleIfOp( } auto new_if = OpBuilder(if_op).create( if_op.getLoc(), then_func.getType().getResults(), new_if_operands, - if_op.getAttrs()); + if_op->getAttrs()); for (auto result : if_op.getResults()) { if (!getElementTypeOrSelf(result.getType()).isa()) { continue; @@ -306,7 +306,7 @@ LogicalResult HandlePartitionedCallOp( OpBuilder builder(call); auto new_call = builder.create( call.getLoc(), info.decomposed_callee.getType().getResults(), - new_operands, call.getAttrs()); + new_operands, call->getAttrs()); new_call->setAttr( ""f"", builder.getSymbolRefAttr( const_cast(info.decomposed_callee).getName())); ",0,train 9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs(). This is a preparation step to remove getAttrs() from OpState. PiperOrigin-RevId: 360159716 Change-Id: I185103ded7c111c19f9a3177514221230469e22d",tensor_array_ops_decomposition.cc,"@@ -625,7 +625,7 @@ LogicalResult HandleWhileOp(TF::WhileOp while_op, ModuleOp module, OpBuilder builder(while_op); auto new_while = builder.create(while_op.getLoc(), body.getType().getInputs(), - operands, while_op.getAttrs()); + operands, while_op->getAttrs()); for (int64_t i = 0; i < while_op.getNumOperands(); ++i) { if (ta_arg_buffer_type(i)) { while_op.getResult(i).replaceAllUsesWith(while_op.getOperand(i)); @@ -692,7 +692,7 @@ LogicalResult HandleIfOp(TF::IfOp if_op, ModuleOp module, OpBuilder builder(if_op); auto new_if = builder.create(if_op.getLoc(), then_branch.getType().getResults(), - operands, if_op.getAttrs()); + operands, if_op->getAttrs()); auto ret_forwards_input = [](FuncOp f, int64_t ret_ind) -> int64_t { auto retval = f.front().getTerminator()->getOperand(ret_ind); auto arg = retval.dyn_cast(); @@ -751,7 +751,7 @@ LogicalResult HandlePartitionedCallOp( OpBuilder builder(call); auto new_call = builder.create( call.getLoc(), info.decomposed_callee.getType().getResults(), - new_operands, call.getAttrs()); + new_operands, call->getAttrs()); new_call->setAttr( ""f"", builder.getSymbolRefAttr( const_cast(info.decomposed_callee).getName())); ",0,train 9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs(). This is a preparation step to remove getAttrs() from OpState. PiperOrigin-RevId: 360159716 Change-Id: I185103ded7c111c19f9a3177514221230469e22d",tpu_extract_head_tail_outside_compilation.cc,"@@ -326,7 +326,7 @@ tf_device::ClusterOp UpdateClusterResults( auto new_cluster = builder->create( cluster.getLoc(), new_cluster_result_types, - /*operands=*/llvm::ArrayRef{}, cluster.getAttrs()); + /*operands=*/llvm::ArrayRef{}, cluster->getAttrs()); new_cluster.body().takeBody(cluster.body()); auto operand_not_in_cluster = [&](OpOperand& operand) { @@ -400,7 +400,7 @@ void RemoveClusterAliasedOutputs(OpBuilder* builder, builder->setInsertionPoint(cluster); auto new_cluster = builder->create( cluster.getLoc(), new_cluster_result_types, - /*operands=*/llvm::ArrayRef{}, cluster.getAttrs()); + /*operands=*/llvm::ArrayRef{}, cluster->getAttrs()); new_cluster.body().takeBody(cluster.body()); new_cluster.GetBody().getTerminator()->setOperands(new_cluster_results); ",0,train 9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs(). This is a preparation step to remove getAttrs() from OpState. PiperOrigin-RevId: 360159716 Change-Id: I185103ded7c111c19f9a3177514221230469e22d",tpu_reorder_replicate_and_partitioned_inputs.cc,"@@ -94,13 +94,13 @@ LogicalResult ReorderReplicateAndPartitionedInputs( for (const auto& operands_per_replica : operands_per_replica_per_core) { auto replicate_op = builder.create( replicated_input.getLoc(), replicated_input.getType(), - operands_per_replica, replicated_input.getAttrs()); + operands_per_replica, replicated_input->getAttrs()); operands_per_core.push_back(replicate_op); } auto pi = builder.create( first_partitioned_input.getLoc(), replicated_input.getType(), - operands_per_core, first_partitioned_input.getAttrs()); + operands_per_core, first_partitioned_input->getAttrs()); replicated_input.replaceAllUsesWith(pi.output()); return success(); } ",0,train a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL. PiperOrigin-RevId: 335916814 Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",depthwise_conv.cc,"@@ -67,17 +67,18 @@ std::string GetSrcValue(int channel_multiplier, const std::string coords) { return c; } -std::string GenerateDepthwiseConvolutionCode(const OperationDef& op_def, - bool stride_correction, - int channel_multiplier, - bool weights_are_buffer, - GPUOperation* op) { +std::string GenerateDepthwiseConvolutionCode( + const OperationDef& op_def, bool stride_correction, int channel_multiplier, + bool weights_are_buffer, bool dynamic_weights, GPUOperation* op) { auto src_desc = op_def.src_tensors[0]; src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); if (op_def.IsBatchSupported()) { src_desc.SetStateVar(""BatchedWidth"", ""true""); } op->AddSrcTensor(""src_tensor"", src_desc); + if (dynamic_weights) { + op->AddSrcTensor(""weights"", op_def.src_tensors[1]); + } auto dst_desc = op_def.dst_tensors[0]; if (op_def.IsBatchSupported()) { @@ -122,16 +123,24 @@ std::string GenerateDepthwiseConvolutionCode(const OperationDef& op_def, } } c += "" int y_offseted = Y * args.stride_y + args.padding_y;\n""; - std::string weights_offset = ""args.kernel_size_x * args.kernel_size_y""; - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) { - c += "" int z_offseted = Z * args.stride_z + args.padding_z;\n""; - weights_offset += "" * args.kernel_size_z""; - } - if (weights_are_buffer) { - c += "" int fx_c = S * "" + weights_offset + "";\n""; - } else { - c += "" int fx_c = 0;\n""; + if (!dynamic_weights) { + std::string weights_offset = ""args.kernel_size_x * args.kernel_size_y""; + if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) { + c += "" int z_offseted = Z * args.stride_z + args.padding_z;\n""; + weights_offset += "" * args.kernel_size_z""; + } + if (weights_are_buffer) { + c += "" int fx_c = S * "" + weights_offset + "";\n""; + } else { + c += "" int fx_c = 0;\n""; + } } + std::string kernel_size_x = + dynamic_weights ? ""args.weights.Width()"" : ""args.kernel_size_x""; + std::string kernel_size_y = + dynamic_weights ? ""args.weights.Height()"" : ""args.kernel_size_y""; + std::string kernel_size_z = + dynamic_weights ? ""args.weights.Depth()"" : ""args.kernel_size_z""; std::string flat_coords = ""x_c, y_c""; if (manual_clamp) { @@ -139,29 +148,35 @@ std::string GenerateDepthwiseConvolutionCode(const OperationDef& op_def, if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) { check += "" && !outside_z""; flat_coords += "", z_c""; - c += "" for (int kz = 0; kz < args.kernel_size_z; ++kz) {\n""; + c += "" for (int kz = 0; kz < "" + kernel_size_z + ""; ++kz) {\n""; c += "" int z_c = z_offseted + kz * args.dilation_z;\n""; c += "" bool outside_z = z_c < 0 || z_c >= args.src_tensor.Depth();\n""; } - c += "" for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n""; + c += "" for (int ky = 0; ky < "" + kernel_size_y + ""; ++ky) {\n""; c += "" int y_c = y_offseted + ky * args.dilation_y;\n""; c += "" bool outside_y = y_c < 0 || y_c >= args.src_tensor.Height();\n""; - c += "" for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n""; + c += "" for (int kx = 0; kx < "" + kernel_size_x + ""; ++kx) {\n""; const std::string dilation_x = op_def.IsBatchSupported() ? ""args.dilation_x * args.src_tensor.Batch()"" : ""args.dilation_x""; c += "" int x_c = x_offseted + kx * "" + dilation_x + "";\n""; c += "" bool outside_x = x_c < 0 || x_c >= args.src_tensor.Width();\n""; c += "" if ("" + check + "") {\n""; - if (weights_are_buffer) { - c += "" FLT4 f = args.weights.Read(fx_c);\n""; + if (dynamic_weights) { + c += "" FLT4 f = args.weights.Read(kx, ky, S);\n""; } else { - c += "" FLT4 f = args.weights.Read(fx_c, S);\n""; + if (weights_are_buffer) { + c += "" FLT4 f = args.weights.Read(fx_c);\n""; + } else { + c += "" FLT4 f = args.weights.Read(fx_c, S);\n""; + } } c += GetSrcValue(channel_multiplier, flat_coords); c += "" r += TO_ACCUM_TYPE(src_final * f);\n""; c += "" };\n""; - c += "" fx_c++;\n""; + if (!dynamic_weights) { + c += "" fx_c++;\n""; + } c += "" }\n""; c += "" }\n""; if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) { @@ -170,7 +185,7 @@ std::string GenerateDepthwiseConvolutionCode(const OperationDef& op_def, } else { // Texture types with ZERO clamping if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) { flat_coords += "", z_c""; - c += "" for (int kz = 0; kz < args.kernel_size_z; ++kz) {\n""; + c += "" for (int kz = 0; kz < "" + kernel_size_z + ""; ++kz) {\n""; c += "" int z_c = z_offseted + kz * args.dilation_z;\n""; if (src_tensor_type != TensorStorageType::TEXTURE_3D) { // Only TEXTURE_3D supports clamping @@ -181,20 +196,24 @@ std::string GenerateDepthwiseConvolutionCode(const OperationDef& op_def, c += "" }\n""; } } - c += "" for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n""; + c += "" for (int ky = 0; ky < "" + kernel_size_y + ""; ++ky) {\n""; c += "" int y_c = y_offseted + ky * args.dilation_y;\n""; - c += "" for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n""; + c += "" for (int kx = 0; kx < "" + kernel_size_x + ""; ++kx) {\n""; const std::string dilation_x = op_def.IsBatchSupported() ? ""args.dilation_x * args.src_tensor.Batch()"" : ""args.dilation_x""; c += "" int x_c = x_offseted + kx * "" + dilation_x + "";\n""; c += GetSrcValue(channel_multiplier, flat_coords); - if (weights_are_buffer) { - c += "" FLT4 f = args.weights.Read(fx_c);\n""; + if (dynamic_weights) { + c += "" FLT4 f = args.weights.Read(kx, ky, S);\n""; } else { - c += "" FLT4 f = args.weights.Read(fx_c, S);\n""; + if (weights_are_buffer) { + c += "" FLT4 f = args.weights.Read(fx_c);\n""; + } else { + c += "" FLT4 f = args.weights.Read(fx_c, S);\n""; + } + c += "" fx_c++;\n""; } - c += "" fx_c++;\n""; c += "" r += TO_ACCUM_TYPE(src_final * f);\n""; c += "" }\n""; c += "" }\n""; @@ -234,7 +253,7 @@ GPUOperation CreateDepthwiseConvolution2D( definition.IsBatchSupported() && attr.strides.w != 1; op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction, attr.weights.shape.o, - weights_are_buffer, &op); + weights_are_buffer, false, &op); UploadWeightsForDWConv2D(attr.weights, weights_are_buffer, definition.precision, &op); op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ; @@ -249,6 +268,32 @@ GPUOperation CreateDepthwiseConvolution2D( return op; } +GPUOperation CreateDepthwiseConvolution2DDynamicWeights( + const DeviceInfo& device_info, const OperationDef& definition, + const DepthwiseConvolution2DAttributes& attr) { + GPUOperation op(definition); + op.args_.AddInt(""stride_x"", attr.strides.w); + op.args_.AddInt(""padding_x"", -attr.padding.prepended.w); + op.args_.AddInt(""dilation_x"", attr.dilations.w); + op.args_.AddInt(""stride_y"", attr.strides.h); + op.args_.AddInt(""padding_y"", -attr.padding.prepended.h); + op.args_.AddInt(""dilation_y"", attr.dilations.h); + const bool stride_correction = + definition.IsBatchSupported() && attr.strides.w != 1; + op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction, 1, + false, true, &op); + op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ; + + TensorLinearDescriptor desc; + desc.storage_type = device_info.IsMali() ? LinearStorageType::BUFFER + : LinearStorageType::TEXTURE_2D; + desc.element_type = definition.GetDataType(); + desc.UploadLinearData(attr.bias); + op.args_.AddObject( + ""biases"", absl::make_unique(std::move(desc))); + return op; +} + GPUOperation CreateDepthwiseConvolution3D( const DeviceInfo& device_info, const OperationDef& definition, const DepthwiseConvolution3DAttributes& attr) { @@ -273,7 +318,7 @@ GPUOperation CreateDepthwiseConvolution3D( definition.IsBatchSupported() && attr.strides.w != 1; op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction, attr.weights.shape.o, - weights_are_buffer, &op); + weights_are_buffer, false, &op); UploadWeightsForDWConv3D(attr.weights, weights_are_buffer, definition.precision, &op); op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ; ",0,train a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL. PiperOrigin-RevId: 335916814 Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",depthwise_conv.h,"@@ -186,6 +186,10 @@ GPUOperation CreateDepthwiseConvolution2D( const DeviceInfo& device_info, const OperationDef& definition, const DepthwiseConvolution2DAttributes& attr); +GPUOperation CreateDepthwiseConvolution2DDynamicWeights( + const DeviceInfo& device_info, const OperationDef& definition, + const DepthwiseConvolution2DAttributes& attr); + GPUOperation CreateDepthwiseConvolution3D( const DeviceInfo& device_info, const OperationDef& definition, const DepthwiseConvolution3DAttributes& attr); ",0,train a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL. PiperOrigin-RevId: 335916814 Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",operation_selector.cc,"@@ -315,7 +315,16 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info, case OperationType::DEPTHWISE_CONVOLUTION: { auto attr = absl::any_cast( node.operation.attributes); - *gpu_op = SelectDWConvolution(attr, device_info, op_def); + if (inputs.size() == 1) { + *gpu_op = SelectDWConvolution(attr, device_info, op_def); + } else { + if (inputs[1]->tensor.shape.b != 1) { + return absl::UnimplementedError( + ""No support of depthwise runtime weights with channel multiplier "" + ""!= 1""); + } + *gpu_op = SelectDWConvolutionDynamicWeights(attr, device_info, op_def); + } return absl::OkStatus(); } case OperationType::FULLY_CONNECTED: { ",0,train a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL. PiperOrigin-RevId: 335916814 Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",simple_selectors.cc,"@@ -22,6 +22,7 @@ limitations under the License. #include ""tensorflow/lite/delegates/gpu/cl/kernels/add.h"" #include ""tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h"" #include ""tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h"" +#include ""tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h"" #include ""tensorflow/lite/delegates/gpu/cl/kernels/lstm.h"" #include ""tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h"" #include ""tensorflow/lite/delegates/gpu/cl/kernels/mean.h"" @@ -110,6 +111,13 @@ absl::Status SelectConcat(const ConcatAttributes& attr, } } +std::unique_ptr SelectDWConvolutionDynamicWeights( + const DepthwiseConvolution2DAttributes& attr, const DeviceInfo& device_info, + const OperationDef& op_def) { + return absl::make_unique( + CreateDepthwiseConvolution2DDynamicWeights(device_info, op_def, attr)); +} + void SelectReshape(int src_channels, int dst_channels, const OperationDef& op_def, std::unique_ptr* ptr) { ",0,train a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL. PiperOrigin-RevId: 335916814 Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",simple_selectors.h,"@@ -57,6 +57,10 @@ absl::Status SelectConcat(const ConcatAttributes& attr, const DeviceInfo& device_info, std::unique_ptr* ptr); +std::unique_ptr SelectDWConvolutionDynamicWeights( + const DepthwiseConvolution2DAttributes& attr, const DeviceInfo& device_info, + const OperationDef& op_def); + void SelectReshape(int src_channels, int dst_channels, const OperationDef& op_def, std::unique_ptr* ptr); ",0,train a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL. PiperOrigin-RevId: 335916814 Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",special_selector.cc,"@@ -40,6 +40,10 @@ absl::Status TryDepthwiseConvPlus1x1Conv( OperationType::DEPTHWISE_CONVOLUTION) { return absl::NotFoundError(""DepthwiseConvPlus1x1Conv not suitable.""); } + auto dw_inputs = graph.FindInputs(dw_node->id); + if (dw_inputs.size() != 1) { + return absl::NotFoundError(""DepthwiseConvPlus1x1Conv not suitable.""); + } auto dw_outputs = graph.FindOutputs(dw_node->id); auto consumers = graph.FindConsumers(dw_outputs[0]->id); if (consumers.size() != 1) { @@ -60,7 +64,6 @@ absl::Status TryDepthwiseConvPlus1x1Conv( dw_node->operation.attributes); auto conv_attr = absl::any_cast(conv_node->operation.attributes); - auto dw_inputs = graph.FindInputs(dw_node->id); auto conv_outputs = graph.FindOutputs(conv_node->id); OperationDef op_def; op_def.precision = precision; ",0,train a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL. PiperOrigin-RevId: 335916814 Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",model_builder.cc,"@@ -511,9 +511,22 @@ class DepthwiseConvolutionOperationParser : public TFLiteOperationParser { const TfLiteNode* tflite_node, const TfLiteRegistration* registration) final { RETURN_IF_ERROR(CheckMaxSupportedOpVersion(registration, 6)); - RETURN_IF_ERROR(CheckInputsOutputs(context, tflite_node, - /*runtime_inputs=*/1, /*outputs=*/1)); - RETURN_IF_ERROR(CheckTensorIsAvailable(context, tflite_node, 1)); + const int runtime_inputs = + GetNumberOfRuntimeInputsForNode(context, tflite_node); + if (runtime_inputs > 2) { + return absl::InternalError( + absl::StrCat(""Expected 1 or 2 input tensor(s), but node has "", + runtime_inputs, "" runtime inputs."")); + } + const int runtime_outputs = NumOutputs(tflite_node); + if (runtime_outputs != 1) { + return absl::InternalError( + absl::StrCat(""Expected 1 output tensor(s), but node has "", + runtime_outputs, "" runtime outputs."")); + } + if (runtime_inputs == 1) { + RETURN_IF_ERROR(CheckTensorIsAvailable(context, tflite_node, 1)); + } const TfLiteDepthwiseConvParams* tf_options; RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options)); RETURN_IF_ERROR(CheckStridesAndDilation( @@ -567,7 +580,12 @@ class DepthwiseConvolutionOperationParser : public TFLiteOperationParser { RETURN_IF_ERROR(reader->AddOutputs(node)); DepthwiseConvolution2DAttributes attr; - RETURN_IF_ERROR(reader->ReadTensor(1, &attr.weights)); + const int runtime_inputs = reader->GetNumberOfRuntimeInputs(); + if (runtime_inputs == 2) { + RETURN_IF_ERROR(reader->AddInput(node, 1)); + } else { // runtime_inputs == 1; + RETURN_IF_ERROR(reader->ReadTensor(1, &attr.weights)); + } reader->ReadTensor(2, &attr.bias).IgnoreError(); // bias is optional const TfLiteDepthwiseConvParams* tf_options; RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options)); ",0,train a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL. PiperOrigin-RevId: 335916814 Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",add_bias.cc,"@@ -70,6 +70,12 @@ class AddBias : public NodeTransformation { } if (node->operation.type == ToString(OperationType::DEPTHWISE_CONVOLUTION)) { + if (graph->FindInputs(node->id).size() != 1) { + return {TransformStatus::DECLINED, + ""This transformation is only applicable to depth wise conv "" + ""with one "" + ""runtime input.""}; + } auto& attr = absl::any_cast( node->operation.attributes); return FillBias(attr.weights.shape.o * attr.weights.shape.i, &attr.bias); ",0,train a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL. PiperOrigin-RevId: 335916814 Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",fuse_add_to_conv.cc,"@@ -54,6 +54,10 @@ class MergeConvolutionWithAdd : public SequenceTransformation { TransformResult ApplyToNodesSequence(const std::vector& sequence, GraphFloat32* graph) final { auto& conv_node = *sequence[0]; + if (graph->FindInputs(conv_node.id).size() != 1) { + return {TransformStatus::DECLINED, + ""This fusion is only applicable to ops with one runtime input.""}; + } auto& add_node = *sequence[1]; if (add_node.operation.type != ToString(OperationType::ADD)) { return {TransformStatus::SKIPPED, """"}; ",0,train a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL. PiperOrigin-RevId: 335916814 Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",depthwise_conv.cc,"@@ -38,6 +38,10 @@ class DepthwiseConvolution : public NodeShader { public: absl::Status GenerateCode(const GenerationContext& ctx, GeneratedCode* generated_code) const final { + if (ctx.input_shapes.size() != 1) { + return absl::UnimplementedError( + ""DepthWise Convolution does not support more than 1 runtime tensor""); + } const auto& attr = absl::any_cast(ctx.op_attr); auto weights = attr.weights.shape; ",0,train a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL. PiperOrigin-RevId: 335916814 Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",api.cc,"@@ -267,6 +267,11 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, device_info, options); break; case OperationType::DEPTHWISE_CONVOLUTION: + if (graph.FindInputs(node->id).size() != 1) { + return absl::UnimplementedError( + ""DepthWise Convolution does not support more than 1 runtime "" + ""tensor""); + } *tasks = SelectDepthWiseConv(node_id, inputs[0], outputs[0], absl::any_cast( ",0,train 1e1beefce2f40dc13f3374fdff4a83b63196d070,tensorflow/tensorflow,"250% GPU speed up of the convolution gradient computation wrt the weights for Eigen. Change: 118408303",eigen_backward_spatial_convolutions.h,"@@ -239,15 +239,43 @@ SpatialConvolutionBackwardInput( * It is possible to swap the order of the width and height dimensions provided that the same order is used in the input, the kernel, and the output. * */ -// TODO(gpapan): Resolve a bug in TensorContractionInputMapper at SpatialConvolutions.h that yangke circumvented by using .reshape().reshape(). -// This can significantly accelerate SpatialConvolutionBackwardKernel. template -EIGEN_ALWAYS_INLINE -static const typename internal::conditional< +EIGEN_ALWAYS_INLINE static const typename internal::conditional< internal::traits::Layout == ColMajor, - const TensorShufflingOp::Index, 4>, const TensorReverseOp, const TensorReshapingOp::Index, 4>, const TensorContractionOp::Index>, 2>, const TensorReshapingOp::Index, 3>, const Input>, const TensorReshapingOp::Index, 4>, const TensorReshapingOp::Index, 4>, const TensorImagePatchOp > > > > > >, - const TensorShufflingOp::Index, 4>, const TensorReverseOp, const TensorReshapingOp::Index, 4>, const TensorContractionOp::Index>, 2>, const TensorReshapingOp::Index, 4>, const TensorReshapingOp::Index, 4>, const TensorImagePatchOp > >, const TensorReshapingOp::Index, 3>, const Input> > > > > >::type + TensorReshapingOp< + const DSizes::Index, 4>, + const TensorContractionOp< + const array::Index>, 1>, + const TensorReshapingOp< + const DSizes::Index, 2>, + const OutputBackward>, + const TensorShufflingOp< + const array::Index, 2>, + const TensorReshapingOp< + const DSizes::Index, 2>, + const TensorImagePatchOp + > + > + > + >, + TensorReshapingOp< + const DSizes::Index, 4>, + const TensorContractionOp< + const array::Index>, 1>, + const TensorShufflingOp< + const array::Index, 2>, + const TensorReshapingOp< + const DSizes::Index, 2>, + const TensorImagePatchOp + > + >, + const TensorReshapingOp< + const DSizes::Index, 2>, + const OutputBackward> + > + > + >::type SpatialConvolutionBackwardKernel(const Input& input, const OutputBackward& output_backward, typename internal::traits::Index kernelRows, typename internal::traits::Index kernelCols, const DenseIndex stride = 1, const DenseIndex in_stride = 1) { typedef typename internal::traits::Index TensorIndex; @@ -283,127 +311,93 @@ SpatialConvolutionBackwardKernel(const Input& input, const OutputBackward& outpu const TensorIndex kernelColsEff = kernelCols + (kernelCols - 1) * (in_stride - 1); // Computing the forward padding - const TensorIndex forward_pad_top = ((outputRows - 1) * stride + kernelRowsEff - inputRows) / 2; - const TensorIndex forward_pad_left = ((outputCols - 1) * stride + kernelColsEff - inputCols) / 2; - - // TODO: factor out the padding computation. - const TensorIndex padding_top = kernelRowsEff - 1 - forward_pad_top; - const TensorIndex padding_left = kernelColsEff - 1 - forward_pad_left; - const TensorIndex padding_bottom = inputRows + kernelRowsEff - 1 - (outputRows - 1) * stride - 1 - padding_top; - const TensorIndex padding_right = inputCols + kernelColsEff - 1 - (outputCols - 1) * stride - 1 - padding_left; - - eigen_assert(padding_top >= 0); - eigen_assert(padding_left >= 0); - eigen_assert(padding_bottom >= 0); - eigen_assert(padding_right >= 0); - - // The output_backward has dimensions out_depth X out_rows X out_cols X OTHERS - // When we extract the image patches from output_backward (with input as the - // kernel), it will have dimensions - // (out_depth) X (input_rows * input_cols) X (kernel_rows * kernel_cols) X OTHERS - DSizes pre_contract_dims; + const TensorIndex padRows = numext::maxi( + 0, (outputRows - 1) * stride + kernelRowsEff - inputRows); + const TensorIndex padCols = numext::maxi( + 0, (outputCols - 1) * stride + kernelColsEff - inputCols); + const TensorIndex padding_top = padRows / 2; + const TensorIndex padding_bottom = padRows - padding_top; + const TensorIndex padding_left = padCols / 2; + const TensorIndex padding_right = padCols - padding_left; + + // Reshaped out + DSizes output_dims; if (isColMajor) { - pre_contract_dims[0] = kernelFilters; - pre_contract_dims[1] = inputRows * inputCols; - pre_contract_dims[2] = kernelRows * kernelCols; - pre_contract_dims[3] = 1; + output_dims[0] = kernelFilters; + output_dims[1] = outputRows * outputCols; for (int i = 3; i < NumDims; ++i) { - pre_contract_dims[3] *= out.dimension(i); + output_dims[1] *= out.dimension(i); } } else { - pre_contract_dims[3] = kernelFilters; - pre_contract_dims[2] = inputRows * inputCols; - pre_contract_dims[1] = kernelRows * kernelCols; - pre_contract_dims[0] = 1; + output_dims[1] = kernelFilters; + output_dims[0] = outputCols * outputRows; for (int i = 0; i < NumDims - 3; ++i) { - pre_contract_dims[0] *= out.dimension(i); + output_dims[0] *= out.dimension(i); } } - // The input has dimensions in_depth X (input_rows * input_cols) X OTHERS - DSizes input_dims; + // Reshaped extract_image_patches(in) + DSizes pre_contract_dims; if (isColMajor) { - input_dims[0] = kernelChannels; - input_dims[1] = inputRows * inputCols; - input_dims[2] = 1; + pre_contract_dims[0] = kernelChannels * kernelRows * kernelCols; + pre_contract_dims[1] = outputRows * outputCols; for (int i = 3; i < NumDims; ++i) { - input_dims[2] *= in.dimension(i); + pre_contract_dims[1] *= in.dimension(i); } - eigen_assert(input_dims[2] == pre_contract_dims[3]); + eigen_assert(output_dims[1] == pre_contract_dims[1]); } else { - input_dims[2] = kernelChannels; - input_dims[1] = inputRows * inputCols; - input_dims[0] = 1; + pre_contract_dims[1] = kernelCols * kernelRows * kernelChannels; + pre_contract_dims[0] = outputRows * outputCols; for (int i = 0; i < NumDims - 3; ++i) { - input_dims[0] *= in.dimension(i); + pre_contract_dims[0] *= in.dimension(i); } - eigen_assert(input_dims[0] == pre_contract_dims[0]); + eigen_assert(output_dims[0] == pre_contract_dims[0]); } - // We will contract along dimensions (1, 2) in in and (1, 3) in out, if - // this is col-major. - // For row-major, it's dimensions (0, 1) in in and (0, 2) in out. - array, 2> contract_dims; - if (isColMajor) { - // col-major: in.contract(output.patches) - contract_dims[0] = IndexPair(1, 1); - contract_dims[1] = IndexPair(2, 3); - } else { - // row-major: output.patches.contract(in) - contract_dims[0] = IndexPair(0, 0); - contract_dims[1] = IndexPair(2, 1); - } + array shuffle_dims; + shuffle_dims[0] = 1; + shuffle_dims[1] = 0; - // After the contraction, the kernel will have dimension - // in_depth X out_depth X kernel_rows X kernel_cols - // We will need to shuffle the first two dimensions and reverse the latter - // two dimensions. - // The end shape is - // out_depth X in_shape X kernel_rows X kernel_cols + array, 1> contract_dims; + contract_dims[0] = IndexPair(1, 0); - // This is the shape of the kernel *before* the shuffling. + // After the contraction, the kernel will have the desired shape + // out_depth X in_shape X kernel_rows X kernel_cols DSizes kernel_dims; if (isColMajor) { - kernel_dims[0] = kernelChannels; - kernel_dims[1] = kernelFilters; + kernel_dims[0] = kernelFilters; + kernel_dims[1] = kernelChannels; kernel_dims[2] = kernelRows; kernel_dims[3] = kernelCols; } else { - kernel_dims[0] = kernelCols; + kernel_dims[3] = kernelFilters; + kernel_dims[2] = kernelChannels; kernel_dims[1] = kernelRows; - kernel_dims[2] = kernelFilters; - kernel_dims[3] = kernelChannels; - } - - array kernel_shuffle; - if (isColMajor) { - kernel_shuffle[0] = 1; - kernel_shuffle[1] = 0; - kernel_shuffle[2] = 2; - kernel_shuffle[3] = 3; - } else { - kernel_shuffle[0] = 0; - kernel_shuffle[1] = 1; - kernel_shuffle[2] = 3; - kernel_shuffle[3] = 2; - } - - array kernel_reverse; - if (isColMajor) { - kernel_reverse[0] = false; - kernel_reverse[1] = false; - kernel_reverse[2] = true; - kernel_reverse[3] = true; - } else { - kernel_reverse[0] = true; - kernel_reverse[1] = true; - kernel_reverse[2] = false; - kernel_reverse[3] = false; + kernel_dims[0] = kernelCols; } - return choose(Cond::Layout == ColMajor>(), - input.reshape(input_dims).contract(output_backward.extract_image_patches(inputRows, inputCols, in_stride, in_stride, 1, 1, stride, stride, padding_top, padding_bottom, padding_left, padding_right, OutScalar(0)).reshape(pre_contract_dims).reshape(pre_contract_dims), contract_dims).reshape(kernel_dims).reverse(kernel_reverse).shuffle(kernel_shuffle), - output_backward.extract_image_patches(inputRows, inputCols, in_stride, in_stride, 1, 1, stride, stride, padding_top, padding_bottom, padding_left, padding_right, OutScalar(0)).reshape(pre_contract_dims).reshape(pre_contract_dims).contract(input.reshape(input_dims), contract_dims).reshape(kernel_dims).reverse(kernel_reverse).shuffle(kernel_shuffle)); + return choose( + Cond::Layout == ColMajor>(), + output_backward.reshape(output_dims) + .contract( + input.extract_image_patches( + kernelRows, kernelCols, stride, stride, + in_stride, in_stride, 1, 1, padding_top, padding_bottom, + padding_left, padding_right, OutScalar(0)) + .reshape(pre_contract_dims) + .shuffle(shuffle_dims), + contract_dims) + .reshape(kernel_dims), + input.extract_image_patches( + kernelRows, kernelCols, stride, stride, + in_stride, in_stride, 1, 1, padding_top, padding_bottom, + padding_left, padding_right, OutScalar(0)) + .reshape(pre_contract_dims) + .shuffle(shuffle_dims) + .contract( + output_backward.reshape(output_dims), + contract_dims) + .reshape(kernel_dims)); } } // end namespace Eigen ",0,train cd964065fb03e0b74f338dd7d9a499d1e7544ffb,tensorflow/tensorflow,"Fix a subtle bug where we unsafely modify the list while iterating it. PiperOrigin-RevId: 293933459 Change-Id: I0230df64b5dbfd03e941a0d19bd5d339b414cfff",cluster_formation.cc,"@@ -100,7 +100,8 @@ void ReplaceLiveOutExternalUses(llvm::ArrayRef live_outs, Region* launch_op_region = &launch_op.body(); for (const auto& p : llvm::zip(live_outs, launch_op.getResults())) { Value from = std::get<0>(p); - for (auto& use : from.getUses()) { + // TODO(jingpu): move this to RegionUtils.h in MLIR core. + for (auto& use : llvm::make_early_inc_range(from.getUses())) { if (launch_op_region->isAncestor(use.getOwner()->getParentRegion())) continue; use.set(std::get<1>(p)); ",0,test e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",process_function_library_runtime_test.cc,"@@ -764,8 +764,8 @@ Tensor GetResourceHandle(const string& var_name, const string& container, handle.set_device(device_name); handle.set_container(container); handle.set_name(var_name); - handle.set_hash_code(MakeTypeIndex().hash_code()); - handle.set_maybe_type_name(MakeTypeIndex().name()); + handle.set_hash_code(TypeIndex::Make().hash_code()); + handle.set_maybe_type_name(TypeIndex::Make().name()); Tensor tensor(DT_RESOURCE, TensorShape({})); tensor.scalar()() = handle; return tensor; ",0,train e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",resource_mgr.h,"@@ -301,7 +301,7 @@ ResourceHandle MakeResourceHandle( return MakeResourceHandle( container.empty() ? ctx->resource_manager()->default_container() : container, - name, *ctx->device(), MakeTypeIndex(), dtypes_and_shapes); + name, *ctx->device(), TypeIndex::Make(), dtypes_and_shapes); } template @@ -311,7 +311,7 @@ ResourceHandle MakeResourceHandle( return MakeResourceHandle( container.empty() ? ctx->resource_manager()->default_container() : container, - name, *ctx->device(), MakeTypeIndex(), dtypes_and_shapes); + name, *ctx->device(), TypeIndex::Make(), dtypes_and_shapes); } Status MakeResourceHandleToOutput(OpKernelContext* context, int output_index, @@ -589,7 +589,7 @@ Status ResourceMgr::Create(const string& container, const string& name, CheckDeriveFromResourceBase(); CHECK(resource != nullptr); mutex_lock l(mu_); - return DoCreate(container, MakeTypeIndex(), name, resource); + return DoCreate(container, TypeIndex::Make(), name, resource); } template @@ -635,7 +635,7 @@ template Status ResourceMgr::LookupInternal(const string& container, const string& name, T** resource) const { ResourceBase* found = nullptr; - Status s = DoLookup(container, MakeTypeIndex(), name, &found); + Status s = DoLookup(container, TypeIndex::Make(), name, &found); if (s.ok()) { // It's safe to down cast 'found' to T* since // typeid(T).hash_code() is part of the map key. @@ -660,7 +660,7 @@ Status ResourceMgr::LookupOrCreate(const string& container, const string& name, s = LookupInternal(container, name, resource); if (s.ok()) return s; TF_RETURN_IF_ERROR(creator(resource)); - s = DoCreate(container, MakeTypeIndex(), name, *resource); + s = DoCreate(container, TypeIndex::Make(), name, *resource); if (!s.ok()) { return errors::Internal(""LookupOrCreate failed unexpectedly""); } @@ -671,7 +671,7 @@ Status ResourceMgr::LookupOrCreate(const string& container, const string& name, template Status ResourceMgr::Delete(const string& container, const string& name) { CheckDeriveFromResourceBase(); - return DoDelete(container, MakeTypeIndex(), name); + return DoDelete(container, TypeIndex::Make(), name); } template @@ -710,7 +710,7 @@ Status ValidateDevice(OpKernelContext* ctx, const ResourceHandle& p); template Status ValidateDeviceAndType(OpKernelContext* ctx, const ResourceHandle& p) { TF_RETURN_IF_ERROR(internal::ValidateDevice(ctx, p)); - auto type_index = MakeTypeIndex(); + auto type_index = TypeIndex::Make(); if (type_index.hash_code() != p.hash_code()) { return errors::InvalidArgument( ""Trying to access resource using the wrong type. Expected "", @@ -883,7 +883,7 @@ ResourceHandle ScopedStepContainer::MakeResourceHandle( mutex_lock ml(mu_); dirty_ = true; return tensorflow::MakeResourceHandle(container_, name, device, - MakeTypeIndex(), {}); + TypeIndex::Make(), {}); } template ",0,train e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",resource_op_kernel.h,"@@ -105,7 +105,7 @@ class ResourceOpKernel : public OpKernel { if (has_resource_type_) { OP_REQUIRES_OK(context, MakeResourceHandleToOutput( context, 0, cinfo_.container(), cinfo_.name(), - MakeTypeIndex())); + TypeIndex::Make())); } else { context->set_output_ref(0, &mu_, handle_.AccessTensor(context)); } ",0,train e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",variant.h,"@@ -144,7 +144,7 @@ void EncodeVariant(const T& value, string* buf); // Variant y_type_unknown = serialized_proto_f; // Store serialized Variant. // // EXPECT_EQ(x.TypeName(), y_type_unknown.TypeName()); // Looks like Foo. -// EXPECT_EQ(MakeTypeIndex(), +// EXPECT_EQ(TypeIndex::Make(), // y_type_unknown.TypeId()); // class Variant { @@ -227,7 +227,7 @@ class Variant { // of the original type when a TensorValueDataProto is stored as the // value. In this case, it returns the TypeIndex of TensorValueDataProto. TypeIndex TypeId() const { - const TypeIndex VoidTypeIndex = MakeTypeIndex(); + const TypeIndex VoidTypeIndex = TypeIndex::Make(); if (is_empty()) { return VoidTypeIndex; } @@ -244,7 +244,7 @@ class Variant { // otherwise. template T* get() { - const TypeIndex TTypeIndex = MakeTypeIndex(); + const TypeIndex TTypeIndex = TypeIndex::Make(); if (is_empty() || (TTypeIndex != TypeId())) return nullptr; return std::addressof(static_cast*>(GetValue())->value); } @@ -253,7 +253,7 @@ class Variant { // otherwise. template const T* get() const { - const TypeIndex TTypeIndex = MakeTypeIndex(); + const TypeIndex TTypeIndex = TypeIndex::Make(); if (is_empty() || (TTypeIndex != TypeId())) return nullptr; return std::addressof( static_cast*>(GetValue())->value); @@ -333,7 +333,7 @@ class Variant { TypeIndex TypeId() const final { const TypeIndex value_type_index = - MakeTypeIndex::type>(); + TypeIndex::Make::type>(); return value_type_index; } ",0,train e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",variant_encode_decode.h,"@@ -160,7 +160,7 @@ string TypeNameVariantImpl( const T& value, TypeNameResolver) { - return port::MaybeAbiDemangle(MakeTypeIndex().name()); + return port::MaybeAbiDemangle(TypeIndex::Make().name()); } template ",0,train e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",variant_op_registry.h,"@@ -521,7 +521,7 @@ class UnaryVariantBinaryOpRegistration { #define INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION(T, direction, \ device_copy_fn) \ INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION_UNIQ_HELPER( \ - __COUNTER__, T, direction, MakeTypeIndex(), device_copy_fn) + __COUNTER__, T, direction, TypeIndex::Make(), device_copy_fn) #define INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION_UNIQ_HELPER( \ ctr, T, direction, type_index, device_copy_fn) \ @@ -542,7 +542,7 @@ class UnaryVariantBinaryOpRegistration { #define REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION(op, device, T, \ unary_op_function) \ REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION_UNIQ_HELPER( \ - __COUNTER__, op, device, T, MakeTypeIndex(), unary_op_function) + __COUNTER__, op, device, T, TypeIndex::Make(), unary_op_function) #define REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION_UNIQ_HELPER( \ ctr, op, device, T, type_index, unary_op_function) \ @@ -563,7 +563,7 @@ class UnaryVariantBinaryOpRegistration { #define REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION(op, device, T, \ binary_op_function) \ REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION_UNIQ_HELPER( \ - __COUNTER__, op, device, T, MakeTypeIndex(), binary_op_function) + __COUNTER__, op, device, T, TypeIndex::Make(), binary_op_function) #define REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION_UNIQ_HELPER( \ ctr, op, device, T, type_index, binary_op_function) \ ",0,train e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",variant_op_registry_test.cc,"@@ -155,12 +155,12 @@ TEST(VariantOpCopyToGPURegistryTest, TestBasic) { // No registered copy fn for GPU<->GPU. EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetDeviceCopyFn( VariantDeviceCopyDirection::DEVICE_TO_DEVICE, - MakeTypeIndex()), + TypeIndex::Make()), nullptr); auto* copy_to_gpu_fn = UnaryVariantOpRegistry::Global()->GetDeviceCopyFn( VariantDeviceCopyDirection::HOST_TO_DEVICE, - MakeTypeIndex()); + TypeIndex::Make()); EXPECT_NE(copy_to_gpu_fn, nullptr); VariantValue vv{true /* early_exit */}; @@ -183,7 +183,7 @@ TEST(VariantOpCopyToGPURegistryTest, TestDuplicate) { UnaryVariantOpRegistry registry; UnaryVariantOpRegistry::AsyncVariantDeviceCopyFn f; class FjFjFj {}; - const auto kTypeIndex = MakeTypeIndex(); + const auto kTypeIndex = TypeIndex::Make(); registry.RegisterDeviceCopyFn(VariantDeviceCopyDirection::HOST_TO_DEVICE, kTypeIndex, f); EXPECT_DEATH(registry.RegisterDeviceCopyFn( @@ -193,9 +193,10 @@ TEST(VariantOpCopyToGPURegistryTest, TestDuplicate) { TEST(VariantOpZerosLikeRegistryTest, TestBasicCPU) { class Blah {}; - EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetUnaryOpFn( - ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_CPU, MakeTypeIndex()), - nullptr); + EXPECT_EQ( + UnaryVariantOpRegistry::Global()->GetUnaryOpFn( + ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_CPU, TypeIndex::Make()), + nullptr); VariantValue vv_early_exit{true /* early_exit */, 0 /* value */}; Variant v = vv_early_exit; @@ -218,9 +219,10 @@ TEST(VariantOpZerosLikeRegistryTest, TestBasicCPU) { #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TEST(VariantOpUnaryOpRegistryTest, TestBasicGPU) { class Blah {}; - EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetUnaryOpFn( - ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_GPU, MakeTypeIndex()), - nullptr); + EXPECT_EQ( + UnaryVariantOpRegistry::Global()->GetUnaryOpFn( + ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_GPU, TypeIndex::Make()), + nullptr); VariantValue vv_early_exit{true /* early_exit */, 0 /* value */}; Variant v = vv_early_exit; @@ -245,7 +247,7 @@ TEST(VariantOpUnaryOpRegistryTest, TestDuplicate) { UnaryVariantOpRegistry registry; UnaryVariantOpRegistry::VariantUnaryOpFn f; class FjFjFj {}; - const auto kTypeIndex = MakeTypeIndex(); + const auto kTypeIndex = TypeIndex::Make(); registry.RegisterUnaryOpFn(ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_CPU, kTypeIndex, f); @@ -263,7 +265,7 @@ TEST(VariantOpUnaryOpRegistryTest, TestDuplicate) { TEST(VariantOpAddRegistryTest, TestBasicCPU) { class Blah {}; EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetBinaryOpFn( - ADD_VARIANT_BINARY_OP, DEVICE_CPU, MakeTypeIndex()), + ADD_VARIANT_BINARY_OP, DEVICE_CPU, TypeIndex::Make()), nullptr); VariantValue vv_early_exit{true /* early_exit */, 3 /* value */}; @@ -290,7 +292,7 @@ TEST(VariantOpAddRegistryTest, TestBasicCPU) { TEST(VariantOpAddRegistryTest, TestBasicGPU) { class Blah {}; EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetBinaryOpFn( - ADD_VARIANT_BINARY_OP, DEVICE_GPU, MakeTypeIndex()), + ADD_VARIANT_BINARY_OP, DEVICE_GPU, TypeIndex::Make()), nullptr); VariantValue vv_early_exit{true /* early_exit */, 3 /* value */}; @@ -318,7 +320,7 @@ TEST(VariantOpAddRegistryTest, TestDuplicate) { UnaryVariantOpRegistry registry; UnaryVariantOpRegistry::VariantBinaryOpFn f; class FjFjFj {}; - const auto kTypeIndex = MakeTypeIndex(); + const auto kTypeIndex = TypeIndex::Make(); registry.RegisterBinaryOpFn(ADD_VARIANT_BINARY_OP, DEVICE_CPU, kTypeIndex, f); EXPECT_DEATH(registry.RegisterBinaryOpFn(ADD_VARIANT_BINARY_OP, DEVICE_CPU, ",0,train e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",variant_test.cc,"@@ -589,7 +589,7 @@ TEST(VariantTest, TensorListTest) { serialized.ToProto(&data); const Variant y_unknown = data; EXPECT_EQ(y_unknown.TypeName(), ""TensorList""); - EXPECT_EQ(y_unknown.TypeId(), MakeTypeIndex()); + EXPECT_EQ(y_unknown.TypeId(), TypeIndex::Make()); EXPECT_EQ(y_unknown.DebugString(), strings::StrCat( ""Variant"")); ",0,train e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",conditional_accumulator_op.cc,"@@ -90,7 +90,7 @@ class ResourceConditionalAccumulatorOp : public ConditionalAccumulatorBaseOp { h(1) = cinfo_.name(); OP_REQUIRES_OK(ctx, MakeResourceHandleToOutput( ctx, 0, cinfo_.container(), cinfo_.name(), - MakeTypeIndex())); + TypeIndex::Make())); } TF_DISALLOW_COPY_AND_ASSIGN(ResourceConditionalAccumulatorOp); ",0,train e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",dataset_utils.h,"@@ -35,7 +35,7 @@ Status CreateHandle(OpKernelContext* ctx, T* resource, TF_RETURN_IF_ERROR(mgr->Create(container_name, unique_name, resource)); *handle = MakeResourceHandle(container_name, unique_name, *ctx->device(), - MakeTypeIndex()); + TypeIndex::Make()); return Status::OK(); } ",0,train e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",threadpool_dataset_op.cc,"@@ -111,7 +111,7 @@ class ThreadPoolHandleOp : public OpKernel { } OP_REQUIRES_OK(ctx, MakeResourceHandleToOutput( ctx, 0, cinfo_.container(), cinfo_.name(), - MakeTypeIndex())); + TypeIndex::Make())); } private: ",0,train e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",iterator_ops.cc,"@@ -443,7 +443,7 @@ void IteratorHandleOp::Compute(OpKernelContext* context) } OP_REQUIRES_OK(context, MakeResourceHandleToOutput( context, 0, cinfo_.container(), cinfo_.name(), - MakeTypeIndex())); + TypeIndex::Make())); } Status IteratorHandleOp::VerifyResource(IteratorResource* resource) { ",0,train e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",multi_device_iterator_ops.cc,"@@ -475,7 +475,7 @@ class MultiDeviceIteratorHandleOp : public OpKernel { } OP_REQUIRES_OK(context, MakeResourceHandleToOutput( context, 0, container_name, unique_name, - MakeTypeIndex())); + TypeIndex::Make())); } private: ",0,train e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",ops_testutil.h,"@@ -126,7 +126,7 @@ class OpsTestBase : public ::testing::Test { std::string container_name = container.empty() ? rm->default_container() : container; EXPECT_TRUE(rm->Create(container_name, name, resource).ok()); - AddResourceInputInternal(container_name, name, MakeTypeIndex()); + AddResourceInputInternal(container_name, name, TypeIndex::Make()); } // Runs an operation producing 'num_outputs' outputs. ",0,train e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",tile_ops.cc,"@@ -554,7 +554,7 @@ inline void TileGradientOp::HandleCase( OpKernelContext* context, const std::vector& input_dims, const gtl::ArraySlice& multiples_array, Tensor* result) { LOG(FATAL) << ""TileGradientOp: Invalid combination of Device, DT and NDIM: "" - << MakeTypeIndex().name() << "", "" << DataTypeString(DT) + << TypeIndex::Make().name() << "", "" << DataTypeString(DT) << "", "" << NDIM; } ",0,train e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`. PiperOrigin-RevId: 317920618 Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",abi_test.cc,"@@ -23,14 +23,14 @@ namespace tensorflow { struct MyRandomPODType {}; TEST(AbiTest, AbiDemangleTest) { - EXPECT_EQ(port::MaybeAbiDemangle(MakeTypeIndex().name()), ""int""); + EXPECT_EQ(port::MaybeAbiDemangle(TypeIndex::Make().name()), ""int""); #ifdef PLATFORM_WINDOWS const char pod_type_name[] = ""struct tensorflow::MyRandomPODType""; #else const char pod_type_name[] = ""tensorflow::MyRandomPODType""; #endif - EXPECT_EQ(port::MaybeAbiDemangle(MakeTypeIndex().name()), + EXPECT_EQ(port::MaybeAbiDemangle(TypeIndex::Make().name()), pod_type_name); EXPECT_EQ( ",0,train c07b18684c3b20dd91911a31bbd6169ad9cc1617,tensorflow/tensorflow,Fix set_difference doc,sets_impl.py,"@@ -247,7 +247,7 @@ def set_difference(a, b, aminusb=True, validate_indices=True): # # collections.OrderedDict([ # ((0, 0, 0), 2), - # ((0, 0, 1), 3), + # ((0, 1, 0), 3), # ]) ``` ",0,train 7b4389140094231ecf8c7491e3bb490a86ef1dd7,tensorflow/tensorflow,"Let log_every_steps <=0 disable logging hook. Change: 134278810",basic_session_run_hooks.py,"@@ -51,7 +51,12 @@ class LoggingTensorHook(session_run_hook.SessionRunHook): tensors: `dict` of tag to tensors/names or `iterable` of tensors/names. every_n_iter: `int`, print every N iteration. + + Raises: + ValueError: if `every_n_iter` is non-positive. """""" + if every_n_iter <= 0: + raise ValueError(""Invalid every_n_iter=%s."" % every_n_iter) if not isinstance(tensors, dict): tensors = {item: item for item in tensors} self._tensors = tensors @@ -147,7 +152,7 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): Raises: ValueError: One of `save_steps` or `save_secs` should be set. """""" - logging.info(""Create CheckpointSaverHook"") + logging.info(""Create CheckpointSaverHook."") self._saver = saver self._checkpoint_dir = checkpoint_dir self._summary_writer = SummaryWriterCache.get(checkpoint_dir) @@ -173,7 +178,7 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): def before_run(self, run_context): # pylint: disable=unused-argument if self._last_saved_time is None: - # Write graph in the first call + # Write graph in the first call. training_util.write_graph( ops.get_default_graph().as_graph_def(add_shapes=True), self._checkpoint_dir, ",0,train 7b4389140094231ecf8c7491e3bb490a86ef1dd7,tensorflow/tensorflow,"Let log_every_steps <=0 disable logging hook. Change: 134278810",graph_actions.py,"@@ -162,7 +162,7 @@ def _monitored_train(graph, This feed dictionary will be used when `init_op` is evaluated. init_fn: Optional callable passed to Supervisor to initialize the model. log_every_steps: Output logs regularly. The logs contain timing data and the - current loss. + current loss. A `0` or negative value disables logging. supervisor_is_chief: Whether the current process is the chief supervisor in charge of restoring the model and running standard services. supervisor_master: The master string to use when preparing the session. @@ -231,14 +231,13 @@ def _monitored_train(graph, # (such as ExportMonitor). Appending them after the basic_session_run_hooks. all_hooks = [] with graph.as_default(): - all_hooks.extend([ - basic_session_run_hooks.NanTensorHook( - loss_op, fail_on_nan_loss=fail_on_nan_loss), - basic_session_run_hooks.LoggingTensorHook({ - 'loss': loss_op.name, - 'step': global_step_tensor.name - }, every_n_iter=log_every_steps), - ]) + all_hooks.append(basic_session_run_hooks.NanTensorHook( + loss_op, fail_on_nan_loss=fail_on_nan_loss)) + if log_every_steps > 0: + all_hooks.append(basic_session_run_hooks.LoggingTensorHook({ + 'loss': loss_op.name, + 'step': global_step_tensor.name + }, every_n_iter=log_every_steps)) scaffold = monitored_session.Scaffold( init_op=init_op, ",0,train 7b4389140094231ecf8c7491e3bb490a86ef1dd7,tensorflow/tensorflow,"Let log_every_steps <=0 disable logging hook. Change: 134278810",basic_session_run_hooks_test.py,"@@ -100,6 +100,12 @@ class LoggingTensorHookTest(tf.test.TestCase): def tearDown(self): tf.logging.info = self._actual_log + def test_illegal_args(self): + with self.assertRaisesRegexp(ValueError, 'nvalid every_n_iter'): + basic_session_run_hooks.LoggingTensorHook(tensors=['t'], every_n_iter=0) + with self.assertRaisesRegexp(ValueError, 'nvalid every_n_iter'): + basic_session_run_hooks.LoggingTensorHook(tensors=['t'], every_n_iter=-10) + def test_print(self): with tf.Graph().as_default(), tf.Session() as sess: t = tf.constant(42.0, name='foo') ",0,train 088bd27daba96e3905c00dc075a7b42e055345a6,tensorflow/tensorflow,"Add a note that the replica_id_in_sync_group might not correspond to XLA replica ID. PiperOrigin-RevId: 292609147 Change-Id: I6052a297a50e213471ee8d3a62a4d0964affd9e1",distribute_lib.py,"@@ -2030,6 +2030,9 @@ class ReplicaContext(object): This identifies the replica that is part of a sync group. Currently we assume that all sync groups contain the same number of replicas. The value of the replica id can range from 0 to `num_replica_in_sync` - 1. + + NOTE: This is not guaranteed to be the same ID as the XLA replica ID use + for low-level operations such as collective_permute. """""" require_replica_context(self) return self._replica_id_in_sync_group ",0,test ec0e105c6fe537969a736ddb546c277ae18b9282,tensorflow/tensorflow,"Fix build failure of list_flex_ops_main in OSS The cc_binary required --config=monolithic which can't be passed into a native.genrule. Using tf_cc_binary solves the build failure. PiperOrigin-RevId: 316631689 Change-Id: Ia706d532578ccbf5bc8f172f6344f166d05531fb",list_flex_ops_test.cc,"@@ -22,6 +22,7 @@ limitations under the License. #include ""flatbuffers/flexbuffers.h"" // from @flatbuffers #include ""tensorflow/core/framework/node_def.pb.h"" #include ""tensorflow/core/platform/protobuf.h"" +#include ""tensorflow/core/platform/resource_loader.h"" #include ""tensorflow/lite/kernels/test_util.h"" namespace tflite { @@ -31,8 +32,9 @@ class FlexOpsListTest : public ::testing::Test { protected: FlexOpsListTest() {} - void ReadOps(const string& model_path) { - auto model = FlatBufferModel::BuildFromFile(model_path.data()); + void ReadOps(const string& path) { + std::string full_path = tensorflow::GetDataDependencyFilepath(path); + auto model = FlatBufferModel::BuildFromFile(full_path.data()); AddFlexOpsFromModel(model->GetModel(), &flex_ops_); output_text_ = OpListToJSONString(flex_ops_); } @@ -84,30 +86,29 @@ class FlexOpModel : public SingleOpModel { }; TEST_F(FlexOpsListTest, TestModelsNoFlex) { - ReadOps(""third_party/tensorflow/lite/testdata/test_model.bin""); + ReadOps(""tensorflow/lite/testdata/test_model.bin""); EXPECT_EQ(output_text_, ""[]""); } TEST_F(FlexOpsListTest, TestBrokenModel) { EXPECT_DEATH_IF_SUPPORTED( - ReadOps(""third_party/tensorflow/lite/testdata/test_model_broken.bin""), - """"); + ReadOps(""tensorflow/lite/testdata/test_model_broken.bin""), """"); } TEST_F(FlexOpsListTest, TestZeroSubgraphs) { - ReadOps(""third_party/tensorflow/lite/testdata/0_subgraphs.bin""); + ReadOps(""tensorflow/lite/testdata/0_subgraphs.bin""); EXPECT_EQ(output_text_, ""[]""); } TEST_F(FlexOpsListTest, TestFlexAdd) { - ReadOps(""third_party/tensorflow/lite/testdata/multi_add_flex.bin""); + ReadOps(""tensorflow/lite/testdata/multi_add_flex.bin""); EXPECT_EQ(output_text_, ""[[\""Add\"", \""BinaryOp>\""]]""); } TEST_F(FlexOpsListTest, TestTwoModel) { - ReadOps(""third_party/tensorflow/lite/testdata/multi_add_flex.bin""); - ReadOps(""third_party/tensorflow/lite/testdata/softplus_flex.bin""); + ReadOps(""tensorflow/lite/testdata/multi_add_flex.bin""); + ReadOps(""tensorflow/lite/testdata/softplus_flex.bin""); EXPECT_EQ(output_text_, ""[[\""Add\"", \""BinaryOp>\""],\n[\""Softplus\"", \""SoftplusOp>\""]]""); } ",0,train dd934175ecaa6d52d8a297144215acfa650360ac,tensorflow/tensorflow,"Avoid compiler crash on aggregate initialization of flexible array member PiperOrigin-RevId: 335754239 Change-Id: Ibc812c55e7e64739a030a6f03976c9c73d799ad2",micro_allocator.cc,"@@ -59,7 +59,7 @@ struct AllocationInfo { // requirement for SIMD extensions. constexpr int kBufferAlignment = 16; constexpr char kOfflineMemAllocMetadata[] = ""OfflineMemoryAllocation""; -const TfLiteIntArray kZeroLengthIntArray = {0, {}}; +const TfLiteIntArray kZeroLengthIntArray = {}; class MicroBuiltinDataAllocator : public BuiltinDataAllocator { public: ",0,train 105174318a9e152a50f26bff60e29b1217371d93,tensorflow/tensorflow,Example code uses `tf.image` for image ops instead of `tf`.,image_ops.py,"@@ -109,9 +109,9 @@ Example: ```python # Decode an image and convert it to HSV. -rgb_image = tf.decode_png(..., channels=3) -rgb_image_float = tf.convert_image_dtype(rgb_image, tf.float32) -hsv_image = tf.rgb_to_hsv(rgb_image) +rgb_image = tf.image.decode_png(..., channels=3) +rgb_image_float = tf.image.convert_image_dtype(rgb_image, tf.float32) +hsv_image = tf.image.rgb_to_hsv(rgb_image) ``` @@rgb_to_grayscale @@ -776,7 +776,7 @@ def adjust_contrast(images, contrast_factor): contrast_factor: A float multiplier for adjusting contrast. Returns: - The constrast-adjusted image or images. + The contrast-adjusted image or images. """""" with ops.op_scope([images, contrast_factor], None, 'adjust_contrast') as name: # Remember original dtype to so we can convert back if needed ",0,train ae9ff37386c0c5cf40d8877cc911394e90cbd7bd,tensorflow/tensorflow,"Enable NNAPI tests PiperOrigin-RevId: 305160089 Change-Id: I446eb5481ca6adc76e258b25d41dd8406421d74b",acceleration_test_list.cc,"@@ -349,6 +349,7 @@ SVDFOpTest/BlackBoxTestRank2 # tile_test -TileTest/TileTest/Int64.+/.+ -TileTest/TileTest/Boolean.+/.+ +-TileTest/TileTest/String.+/.+ # Const tensor only TileTest/TileTest/.+/0,29 ",0,train ae9ff37386c0c5cf40d8877cc911394e90cbd7bd,tensorflow/tensorflow,"Enable NNAPI tests PiperOrigin-RevId: 305160089 Change-Id: I446eb5481ca6adc76e258b25d41dd8406421d74b",mul_test.cc,"@@ -291,12 +291,6 @@ void NoActivation() { template void NoActivationLargeMultiplier() { - // TODO(b/138722124): Remove this after setting the appropriate op version (3) - // for dependent tests. - if (SingleOpModel::GetForceUseNnapi()) { - // NNAPI doesn't currently support Mul with multiplier>1. - return; - } // Intentionally pathological output range much narrower than needed // to represent input values to exercise the multiplier>1 case. QuantizedMulOpModel m({tensor_type, {1, 2, 2, 1}, -100, 100}, ",0,train ae9ff37386c0c5cf40d8877cc911394e90cbd7bd,tensorflow/tensorflow,"Enable NNAPI tests PiperOrigin-RevId: 305160089 Change-Id: I446eb5481ca6adc76e258b25d41dd8406421d74b",tile_test.cc,"@@ -203,10 +203,6 @@ TEST_P(TileTest, Int64Matrix64Multipliers) { } TEST_P(TileTest, StringMatrix) { - // TODO(b/138722124): Enable these tests on NNAPI. - if (SingleOpModel::GetForceUseNnapi()) { - return; - } Check( /*input_shape=*/{2, 3}, /*input_data=*/{""AA"", ""AB"", ""AC"", ""BA"", ""BB"", ""BC""}, @@ -218,10 +214,6 @@ TEST_P(TileTest, StringMatrix) { } TEST_P(TileTest, StringMatrix64Multipliers) { - // TODO(b/138722124): Enable these tests on NNAPI. - if (SingleOpModel::GetForceUseNnapi()) { - return; - } Check( /*input_shape=*/{2, 3}, /*input_data=*/{""AA"", ""AB"", ""AC"", ""BA"", ""BB"", ""BC""}, @@ -233,10 +225,6 @@ TEST_P(TileTest, StringMatrix64Multipliers) { } TEST_P(TileTest, StringMatrix2) { - // TODO(b/138722124): Enable these tests on NNAPI. - if (SingleOpModel::GetForceUseNnapi()) { - return; - } Check( /*input_shape=*/{3, 2, 1}, /*input_data=*/{""AA"", ""AB"", ""AC"", ""BA"", ""BB"", ""BC""}, ",0,train 8ccb3cf1b88a2c5d3431b333dd5d6b2215de4bed,tensorflow/tensorflow,"Add unit tests to assert that the strings ""true""/""True""/""false""/""0""/""1"" are rejected by set_hparam() on boolean hyperparameters. PiperOrigin-RevId: 242492963",hparam_test.py,"@@ -491,6 +491,26 @@ class HParamsTest(test.TestCase): with self.assertRaises(ValueError): hparams.set_hparam('bool_', 1) + # Unfortunately there is no automagic conversion of bool-like strings to + # bool. + with self.assertRaises(ValueError): + hparams.set_hparam('bool_', 'true') + + with self.assertRaises(ValueError): + hparams.set_hparam('bool_', 'True') + + with self.assertRaises(ValueError): + hparams.set_hparam('bool_', 'false') + + with self.assertRaises(ValueError): + hparams.set_hparam('bool_', 'False') + + with self.assertRaises(ValueError): + hparams.set_hparam('bool_', '0') + + with self.assertRaises(ValueError): + hparams.set_hparam('bool_', '1') + with self.assertRaises(ValueError): hparams.set_hparam('int_', 2.2) ",0,test 9b6fd34a0850939fca054098b97b097c1039405a,tensorflow/tensorflow,"ConcatZ reverted and rewritten to support batch implicitly. PiperOrigin-RevId: 272790908",concat_z.cc,"@@ -36,22 +36,27 @@ bool IsAllChannelsX4(const std::vector& channels) { return true; } +std::string GetSrcDepthSizeVar(int src_index) { + return ""src_size_"" + std::to_string(src_index) + ""_depth""; +} + std::string GetConcatKernelCode( const OperationDef& op_def, const std::vector& channels, const std::vector& linked_operations) { std::vector srcs(channels.size()); for (int i = 0; i < channels.size(); ++i) { const std::string tensor_name = ""src_data_"" + std::to_string(i); - const std::string uniform_name = ""src_size_"" + std::to_string(i); - srcs[i] = - TensorCodeGenerator(tensor_name, uniform_name, op_def.src_tensors[i]); + srcs[i] = TensorCodeGenerator( + tensor_name, {""dst_size.x"", ""dst_size.y"", GetSrcDepthSizeVar(i)}, + op_def.src_tensors[i]); } - TensorCodeGenerator dst(""dst_data"", ""dst_size"", op_def.dst_tensors[0]); + TensorCodeGenerator dst(""dst_data"", + {""dst_size.x"", ""dst_size.y"", ""dst_size.z""}, + op_def.dst_tensors[0]); std::string c = GetCommonDefines(op_def.precision); const std::string postfix[] = {"".x"", "".y"", "".z"", "".w""}; - const std::string batch_id = op_def.batch_support ? ""batch_id"" : """"; c += ""__kernel void main_function(\n""; for (const auto& src : srcs) { c += src.GetDeclaration(AccessType::READ) + "",\n""; @@ -59,21 +64,13 @@ std::string GetConcatKernelCode( c += dst.GetDeclaration(AccessType::WRITE); c += GetArgsDeclaration(linked_operations); for (int i = 0; i < channels.size(); ++i) { - const std::string uniform_name = ""src_size_"" + std::to_string(i); - c += "" int4 "" + uniform_name + "",\n""; - } - if (op_def.batch_support) { - c += "" int BATCH_SIZE, \n""; + c += "" int "" + GetSrcDepthSizeVar(i) + "",\n""; } c += "" int4 dst_size\n""; c += "") {\n""; c += "" int X = get_global_id(0);\n""; c += "" int Y = get_global_id(1);\n""; - c += "" if (X >= dst_size.x || Y >= dst_size.y) return;\n""; - if (op_def.batch_support) { - c += "" int batch_id = get_global_id(2);\n""; - c += "" if (batch_id >= BATCH_SIZE) return;\n""; - } + c += "" if (X >= dst_size.x || Y >= dst_size.y) return; \n""; if (IsAllChannelsX4(channels)) { // When all channels % 4 == 0 we can read/assign/write FLT4 elements easily. @@ -81,37 +78,35 @@ std::string GetConcatKernelCode( // generation. c += "" int Z = 0;\n""; for (int i = 0; i < channels.size(); ++i) { - const std::string uniform_name = ""src_size_"" + std::to_string(i); const int depth = IntegralDivideRoundUp(channels[i], 4); if (depth % 2 == 0) { // We can read more at once inside of loop in case depth % 2 == 0 // it should be better for reading latency hiding - c += "" for (int i = 0; i < "" + uniform_name + "".w; i += 2) {\n""; + c += "" for (int i = 0; i < "" + GetSrcDepthSizeVar(i) + ""; i += 2) {\n""; c += "" FLT4 result0 = "" + - srcs[i].Read4D(""X"", ""Y"", ""i"", batch_id, - TextureAddressMode::DONT_CARE) + + srcs[i].Read3D(""X"", ""Y"", ""i"", TextureAddressMode::DONT_CARE) + "";\n""; c += "" FLT4 result1 = "" + - srcs[i].Read4D(""X"", ""Y"", ""i + 1"", batch_id, - TextureAddressMode::DONT_CARE) + + srcs[i].Read3D(""X"", ""Y"", ""i + 1"", TextureAddressMode::DONT_CARE) + "";\n""; + c += "" "" + dst.GetAddress(""dst_adr0"", ""X"", ""Y"", ""Z"") + ""\n""; + c += "" "" + dst.GetAddress(""dst_adr1"", ""X"", ""Y"", ""Z + 1"") + ""\n""; const LinkingContext context_0{""result0"", ""X"", ""Y"", ""Z""}; const LinkingContext context_1{""result1"", ""X"", ""Y"", ""Z + 1""}; c += PostProcess(linked_operations, context_0); c += PostProcess(linked_operations, context_1); - c += "" "" + dst.Write4D(""result0"", ""X"", ""Y"", ""Z"", batch_id); - c += "" "" + dst.Write4D(""result1"", ""X"", ""Y"", ""Z + 1"", batch_id); + c += "" "" + dst.Write3D(""result0"", ""X"", ""Y"", ""Z""); + c += "" "" + dst.Write3D(""result1"", ""X"", ""Y"", ""Z + 1""); c += "" Z += 2;\n""; c += "" }\n""; } else { - c += "" for (int i = 0; i < "" + uniform_name + "".w; ++i) {\n""; + c += "" for (int i = 0; i < "" + GetSrcDepthSizeVar(i) + ""; ++i) {\n""; c += "" FLT4 result = "" + - srcs[i].Read4D(""X"", ""Y"", ""i"", batch_id, - TextureAddressMode::DONT_CARE) + + srcs[i].Read3D(""X"", ""Y"", ""i"", TextureAddressMode::DONT_CARE) + "";\n""; const LinkingContext context{""result"", ""X"", ""Y"", ""Z""}; c += PostProcess(linked_operations, context); - c += "" "" + dst.Write4D(""result"", ""X"", ""Y"", ""Z"", batch_id); + c += "" "" + dst.Write3D(""result"", ""X"", ""Y"", ""Z""); c += "" Z++;\n""; c += "" }\n""; } @@ -126,8 +121,8 @@ std::string GetConcatKernelCode( for (int d = 0; d < depth; ++d) { const int channels_in_group = std::min(4, channels[i] - d * 4); const std::string temp_name = ""t"" + std::to_string(read_index); - c += "" FLT4 "" + temp_name + "" = "" + - srcs[i].Read4D(""X"", ""Y"", std::to_string(d), batch_id, + c += "" FLT4 "" + temp_name + "" = ""; + c += srcs[i].Read3D(""X"", ""Y"", std::to_string(d), TextureAddressMode::DONT_CARE) + "";\n""; for (int ch = 0; ch < channels_in_group; ++ch) { @@ -139,8 +134,7 @@ std::string GetConcatKernelCode( c += "" {\n""; const LinkingContext context{""result"", ""X"", ""Y"", std::to_string(z)}; c += PostProcess(linked_operations, context); - c += "" "" + - dst.Write4D(""result"", ""X"", ""Y"", std::to_string(z), batch_id); + c += "" "" + dst.Write3D(""result"", ""X"", ""Y"", std::to_string(z)); c += "" }\n""; z++; } @@ -152,7 +146,7 @@ std::string GetConcatKernelCode( c += "" {\n""; const LinkingContext context{""result"", ""X"", ""Y"", std::to_string(z)}; c += PostProcess(linked_operations, context); - c += "" "" + dst.Write4D(""result"", ""X"", ""Y"", ""Z"", std::to_string(z)); + c += "" "" + dst.Write3D(""result"", ""X"", ""Y"", std::to_string(z)); c += "" }\n""; } } @@ -199,21 +193,16 @@ Status ConcatZ::BindArguments() { RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting())); RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_)); for (int i = 0; i < channels_.size(); ++i) { - int4 size(src_[i]->Width(), src_[i]->Height(), channels_[i], - IntegralDivideRoundUp(channels_[i], 4)); - RETURN_IF_ERROR(kernel_.SetBytesAuto(size)); - } - if (definition_.batch_support) { - RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->Batch())); + RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[i]->Depth())); } - RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetSizeWithDepth())); + RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWBatchedHDB())); return OkStatus(); } int3 ConcatZ::GetGridSize() const { - const int grid_x = dst_[0]->Width(); + const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); const int grid_y = dst_[0]->Height(); - const int grid_z = dst_[0]->Batch(); + const int grid_z = 1; return int3(grid_x, grid_y, grid_z); } ",0,train 6a8e5328c68b037a741b40bc538fecfb72980953,tensorflow/tensorflow,"Update tensorflow/core/grappler/graph_analyzer/graph_analyzer.cc Co-authored-by: Mihai Maruseac ",graph_analyzer.cc,"@@ -92,8 +92,8 @@ void GraphAnalyzer::FindSubgraphs() { } void GraphAnalyzer::ExtendSubgraph(Subgraph* parent) { - const int parent_id_size_plus_one = parent->id().size() + 1; - bool will_complete = (parent_id_size_plus_one == subgraph_size_); + const int next_parent_id = parent->id().size() + 1; + bool will_complete = (next_parent_id == subgraph_size_); SubgraphPtrSet& sg_set = will_complete ? result_ : partial_; const GenNode* last_all_or_none_node = nullptr; ",0,train 14f00bd6d9f7e6c1df6b14f3d2553121ae515e74,tensorflow/tensorflow,"[NNAPI] Check for optional tensor when handling FP16 weights. PiperOrigin-RevId: 427808358 Change-Id: I4a7e429c61c315a93825a596a1c8f81c97e1dd49",nnapi_delegate.cc,"@@ -5451,7 +5451,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors( } // Map inputs to NN API tensor indices. for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) { - if (context->tensors[node->inputs->data[input_pos]].type == + if (node->inputs->data[input_pos] != kTfLiteOptionalTensor && + context->tensors[node->inputs->data[input_pos]].type == kTfLiteFloat16 && IsConstantTensor(&context->tensors[node->inputs->data[input_pos]])) { input_tensor_flags |= NN_TENSOR_FLAG_HALF_TO_FLOAT_CONVERSION; ",0,train 025277a1598fa227b53ddc4e316a7a953b2006c8,tensorflow/tensorflow,"Small improvements to handling of Datasets in Keras. * Allow sparse labels to work with Datasets. * Allow sample_weights to be passed as the third output of a Dataset (like how generator input is treated). PiperOrigin-RevId: 211834259",keras_test.py,"@@ -446,8 +446,7 @@ class TestWithDistributionStrategy(test.TestCase): dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) - with self.assertRaisesRegexp(ValueError, - 'expected input to have 2 dimensions'): + with self.assertRaisesRegexp(ValueError, 'expected input to have shape'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0) # Wrong input shape ",0,train 025277a1598fa227b53ddc4e316a7a953b2006c8,tensorflow/tensorflow,"Small improvements to handling of Datasets in Keras. * Allow sparse labels to work with Datasets. * Allow sample_weights to be passed as the third output of a Dataset (like how generator input is treated). PiperOrigin-RevId: 211834259",training.py,"@@ -928,11 +928,16 @@ class Model(Network): 'Make sure that your dataset can generate ' 'required number of samples.') - if not isinstance(next_element, (list, tuple)) or len(next_element) != 2: - raise ValueError('Please provide model inputs as a list or tuple of 2 ' - 'elements: input and target pair. ' - 'Received %s' % next_element) - x, y = next_element + if (not isinstance(next_element, (list, tuple)) or + len(next_element) not in [2, 3]): + raise ValueError( + 'Please provide model inputs as a list or tuple of 2 or 3' + 'elements: (input, target) or (input, target, sample_weights)' + 'Received %s' % next_element) + if len(next_element) == 2: + x, y = next_element + else: + x, y, sample_weight = next_element x, y, sample_weights = self._standardize_weights(x, y, sample_weight, class_weight, batch_size) return x, y, sample_weights @@ -1331,7 +1336,8 @@ class Model(Network): (in case the model has multiple inputs). - A dict mapping input names to the corresponding array/tensors, if the model has named inputs. - - A `tf.data` dataset or a dataset iterator. + - A `tf.data` dataset or a dataset iterator. Should return a tuple + of either (inputs, targets) or (inputs, targets, sample_weights). y: Target data. Like the input data `x`, it could be either Numpy array(s) or TensorFlow tensor(s). It should be consistent with `x` (you cannot have Numpy inputs and @@ -1396,7 +1402,8 @@ class Model(Network): to apply a different weight to every timestep of every sample. In this case you should make sure to specify `sample_weight_mode=""temporal""` in `compile()`. This argument is not - supported when `x` is a dataset or a dataset iterator. + supported when `x` is a dataset or a dataset iterator, instead + provide the sample_weights as the third element of `x`. initial_epoch: Integer. Epoch at which to start training (useful for resuming a previous training run). ",0,train 025277a1598fa227b53ddc4e316a7a953b2006c8,tensorflow/tensorflow,"Small improvements to handling of Datasets in Keras. * Allow sparse labels to work with Datasets. * Allow sample_weights to be passed as the third output of a Dataset (like how generator input is treated). PiperOrigin-RevId: 211834259",training_eager.py,"@@ -417,11 +417,12 @@ def iterator_predict_loop(model, inputs, steps, verbose=0): """""" assert isinstance(inputs, iterator_ops.EagerIterator) if not isinstance(inputs.output_shapes, - (list, tuple)) or len(inputs.output_shapes) > 2: + (list, tuple)) or len(inputs.output_shapes) > 3: raise ValueError( - 'Please provide data as a list or tuple of 1 or 2 elements ' - ' - input or input and target pair. Received %s. We do not use the ' - '`target` value here.' % inputs.output_shapes) + 'Please provide data as a list or tuple of 1, 2, or 3 elements ' + ' - `(input)`, or `(input, target)`, or `(input, target,' + 'sample_weights)`. Received %s. We do not use the `target` or' + '`sample_weights` value here.' % inputs.output_shapes) outs = [] if verbose == 1: progbar = generic_utils.Progbar(target=steps) ",0,train 025277a1598fa227b53ddc4e316a7a953b2006c8,tensorflow/tensorflow,"Small improvements to handling of Datasets in Keras. * Allow sparse labels to work with Datasets. * Allow sample_weights to be passed as the third output of a Dataset (like how generator input is treated). PiperOrigin-RevId: 211834259",training_test.py,"@@ -2097,6 +2097,43 @@ class TestTrainingWithDataset(test.TestCase): 'you should specify the `steps` argument'): model.predict(dataset, verbose=0) + @tf_test_util.run_in_graph_and_eager_modes + def test_dataset_with_sample_weights(self): + model = testing_utils.get_small_functional_mlp(1, 4, input_dim=3) + optimizer = RMSPropOptimizer(learning_rate=0.001) + loss = 'mse' + metrics = ['mae', metrics_module.CategoricalAccuracy()] + model.compile(optimizer, loss, metrics=metrics) + + inputs = np.zeros((10, 3), np.float32) + targets = np.zeros((10, 4), np.float32) + sample_weights = np.ones((10), np.float32) + dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets, + sample_weights)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) + model.evaluate(dataset, steps=2, verbose=1) + model.predict(dataset, steps=2) + model.train_on_batch(dataset) + model.predict_on_batch(dataset) + + @tf_test_util.run_in_graph_and_eager_modes + def test_dataset_with_sparse_labels(self): + model = testing_utils.get_small_functional_mlp(1, 4, input_dim=3) + optimizer = RMSPropOptimizer(learning_rate=0.001) + loss = 'sparse_categorical_crossentropy' + model.compile(optimizer, loss) + + inputs = np.zeros((10, 3)) + targets = np.random.randint(0, 4, size=10, dtype=np.int32) + dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) + def test_dataset_input_shape_validation(self): with self.test_session(): model = testing_utils.get_small_functional_mlp(1, 4, input_dim=3) @@ -2108,8 +2145,10 @@ class TestTrainingWithDataset(test.TestCase): dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) - with self.assertRaisesRegexp(ValueError, - r'expected (.*?) to have 2 dimensions'): + with self.assertRaisesRegexp( + ValueError, + r'expected (.*?) to have shape \(3,\) but got array with shape \(1,\)' + ): model.train_on_batch(dataset) # Wrong input shape ",0,train 025277a1598fa227b53ddc4e316a7a953b2006c8,tensorflow/tensorflow,"Small improvements to handling of Datasets in Keras. * Allow sparse labels to work with Datasets. * Allow sample_weights to be passed as the third output of a Dataset (like how generator input is treated). PiperOrigin-RevId: 211834259",training_utils.py,"@@ -210,10 +210,11 @@ def check_num_samples(ins, def standardize_single_array(x): if x is None: return None - elif tensor_util.is_tensor(x): - return x - elif x.ndim == 1: - x = np.expand_dims(x, 1) + if x.shape is not None and len(x.shape) == 1: + if tensor_util.is_tensor(x): + return array_ops.expand_dims(x, axis=1) + else: + return np.expand_dims(x, 1) return x @@ -341,7 +342,7 @@ def standardize_sample_or_class_weights(x_weight, output_names, weight_type): Raises: ValueError: In case of invalid user-provided argument. """""" - if x_weight is None or len(x_weight) == 0: # pylint: disable=g-explicit-length-test + if x_weight is None or (isinstance(x_weight, list) and len(x_weight) == 0): # pylint: disable=g-explicit-length-test return [None for _ in output_names] if len(output_names) == 1: if isinstance(x_weight, list) and len(x_weight) == 1: @@ -675,7 +676,8 @@ def standardize_weights(y, 'Expected sample_weight with rank ' 'less than or equal to ' + str(len(y.shape))) - if y.shape[:sample_weight.ndim] != sample_weight.shape: + if (not tensor_util.is_tensor(sample_weight) and + y.shape[:sample_weight.ndim] != sample_weight.shape): raise ValueError( 'Found a sample_weight array with shape ' + str(sample_weight.shape) + ' for an input with shape ' + str(y.shape) + '. ' @@ -777,7 +779,9 @@ def validate_iterator_input(x, y, sample_weight, validation_split=None): 'Received: %s' % (x, y)) if sample_weight is not None: raise ValueError('`sample_weight` argument is not supported when input ' - '`x` is a dataset or a dataset iterator. ' + '`x` is a dataset or a dataset iterator. Instead, you' + 'can provide sample_weight as the third element of your' + 'dataset, i.e. (inputs, targets, sample_weight). ' 'Received: x=%s, sample_weight=%s' % (x, sample_weight)) if validation_split is not None and validation_split != 0.0: raise ValueError( ",0,train 8bb742049234d72c28ea22ed86f67f40b288aae8,tensorflow/tensorflow,"Use Env::LocalTempFilename for a temp filename. This function works both in and outside of tests. Additionally, LocalTempFilename works well on Windows where as TmpDir is a little problematic because of bazel oddities. PiperOrigin-RevId: 296250888 Change-Id: I2a8bc52ad784eda4d00f63c91eec681cc91e16e7",inputbuffer_test.cc,"@@ -16,7 +16,6 @@ limitations under the License. #include ""tensorflow/core/lib/io/inputbuffer.h"" #include -#include ""tensorflow/core/platform/env.h"" #include ""tensorflow/core/lib/core/coding.h"" #include ""tensorflow/core/lib/core/errors.h"" @@ -24,6 +23,7 @@ limitations under the License. #include ""tensorflow/core/lib/core/status_test_util.h"" #include ""tensorflow/core/lib/strings/str_util.h"" #include ""tensorflow/core/lib/strings/strcat.h"" +#include ""tensorflow/core/platform/env.h"" #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/core/platform/test.h"" @@ -37,7 +37,8 @@ static std::vector BufferSizes() { TEST(InputBuffer, ReadLine_Empty) { Env* env = Env::Default(); - string fname = testing::TmpDir() + ""/inputbuffer_test""; + string fname; + ASSERT_TRUE(env->LocalTempFilename(&fname)); TF_ASSERT_OK(WriteStringToFile(env, fname, """")); for (auto buf_size : BufferSizes()) { @@ -51,7 +52,8 @@ TEST(InputBuffer, ReadLine_Empty) { TEST(InputBuffer, ReadLine1) { Env* env = Env::Default(); - string fname = testing::TmpDir() + ""/inputbuffer_test""; + string fname; + ASSERT_TRUE(env->LocalTempFilename(&fname)); TF_CHECK_OK( WriteStringToFile(env, fname, ""line one\nline two\nline three\n"")); @@ -74,7 +76,8 @@ TEST(InputBuffer, ReadLine1) { TEST(InputBuffer, ReadLine_NoTrailingNewLine) { Env* env = Env::Default(); - string fname = testing::TmpDir() + ""/inputbuffer_test""; + string fname; + ASSERT_TRUE(env->LocalTempFilename(&fname)); TF_ASSERT_OK(WriteStringToFile(env, fname, ""line one\nline two\nline three"")); for (auto buf_size : BufferSizes()) { @@ -96,7 +99,8 @@ TEST(InputBuffer, ReadLine_NoTrailingNewLine) { TEST(InputBuffer, ReadLine_EmptyLines) { Env* env = Env::Default(); - string fname = testing::TmpDir() + ""/inputbuffer_test""; + string fname; + ASSERT_TRUE(env->LocalTempFilename(&fname)); TF_CHECK_OK( WriteStringToFile(env, fname, ""line one\n\n\nline two\nline three"")); @@ -123,7 +127,8 @@ TEST(InputBuffer, ReadLine_EmptyLines) { TEST(InputBuffer, ReadLine_CRLF) { Env* env = Env::Default(); - string fname = testing::TmpDir() + ""/inputbuffer_test""; + string fname; + ASSERT_TRUE(env->LocalTempFilename(&fname)); TF_ASSERT_OK(WriteStringToFile(env, fname, ""line one\r\n\r\n\r\nline two\r\nline three"")); @@ -150,7 +155,8 @@ TEST(InputBuffer, ReadLine_CRLF) { TEST(InputBuffer, ReadNBytes) { Env* env = Env::Default(); - string fname = testing::TmpDir() + ""/inputbuffer_test""; + string fname; + ASSERT_TRUE(env->LocalTempFilename(&fname)); TF_ASSERT_OK(WriteStringToFile(env, fname, ""0123456789"")); // ReadNBytes(int64, string*). @@ -223,7 +229,8 @@ TEST(InputBuffer, ReadNBytes) { TEST(InputBuffer, SkipNBytes) { Env* env = Env::Default(); - string fname = testing::TmpDir() + ""/inputbuffer_test""; + string fname; + ASSERT_TRUE(env->LocalTempFilename(&fname)); TF_ASSERT_OK(WriteStringToFile(env, fname, ""0123456789"")); for (auto buf_size : BufferSizes()) { @@ -258,7 +265,8 @@ TEST(InputBuffer, SkipNBytes) { TEST(InputBuffer, Seek) { Env* env = Env::Default(); - string fname = testing::TmpDir() + ""/inputbuffer_test""; + string fname; + ASSERT_TRUE(env->LocalTempFilename(&fname)); TF_ASSERT_OK(WriteStringToFile(env, fname, ""0123456789"")); for (auto buf_size : BufferSizes()) { @@ -293,7 +301,8 @@ TEST(InputBuffer, Seek) { TEST(InputBuffer, ReadVarint32) { Env* env = Env::Default(); - string fname = testing::TmpDir() + ""/inputbuffer_test""; + string fname; + ASSERT_TRUE(env->LocalTempFilename(&fname)); // Generates data. std::vector data; @@ -331,7 +340,8 @@ TEST(InputBuffer, ReadVarint32) { TEST(InputBuffer, ReadVarint64) { Env* env = Env::Default(); - string fname = testing::TmpDir() + ""/inputbuffer_test""; + string fname; + ASSERT_TRUE(env->LocalTempFilename(&fname)); // Generates data. std::vector data; ",0,train fea9d07d1e34d5330a13024cb42d9bc460869905,tensorflow/tensorflow,"Remove references to std::string in MKL-related code. tensorflow::string is sometimes ::string and sometimes std::string, which makes code that uses both subtly dangerous. For example, FactoryKeyCreator::AddAsKey() has an overload for tensorflow::string but had many callsites passing a std::string, causing incorrect behavior on the google platform. PiperOrigin-RevId: 208244169",mkl_fused_batch_norm_op.cc,"@@ -899,8 +899,8 @@ class MklFusedBatchNormFwdPrimitiveFactory : public MklPrimitiveFactory { MklFusedBatchNormFwdPrimitiveFactory() {} ~MklFusedBatchNormFwdPrimitiveFactory() {} - static std::string CreateKey(const MklBatchNormFwdParams& fwdParams) { - std::string prefix = ""bn_fwd""; + static string CreateKey(const MklBatchNormFwdParams& fwdParams) { + string prefix = ""bn_fwd""; FactoryKeyCreator key_creator; key_creator.AddAsKey(prefix); key_creator.AddAsKey(fwdParams.src_dims); @@ -911,13 +911,13 @@ class MklFusedBatchNormFwdPrimitiveFactory : public MklPrimitiveFactory { } MklPrimitive* GetBatchNormFwd(const MklBatchNormFwdParams& fwdParams) { - std::string key = CreateKey(fwdParams); + string key = CreateKey(fwdParams); return this->GetOp(key); } void SetBatchNormFwd(const MklBatchNormFwdParams& fwdParams, MklPrimitive* op) { - std::string key = CreateKey(fwdParams); + string key = CreateKey(fwdParams); this->SetOp(key, op); } }; @@ -1122,8 +1122,8 @@ class MklFusedBatchNormBwdPrimitiveFactory : public MklPrimitiveFactory { MklFusedBatchNormBwdPrimitiveFactory() {} ~MklFusedBatchNormBwdPrimitiveFactory() {} - static std::string CreateKey(const MklBatchNormBwdParams& bwdParams) { - std::string prefix = ""bn_bwd""; + static string CreateKey(const MklBatchNormBwdParams& bwdParams) { + string prefix = ""bn_bwd""; FactoryKeyCreator key_creator; key_creator.AddAsKey(prefix); key_creator.AddAsKey(bwdParams.src_dims); @@ -1135,13 +1135,13 @@ class MklFusedBatchNormBwdPrimitiveFactory : public MklPrimitiveFactory { } MklPrimitive* GetBatchNormBwd(const MklBatchNormBwdParams& bwdParams) { - std::string key = CreateKey(bwdParams); + string key = CreateKey(bwdParams); return this->GetOp(key); } void SetBatchNormBwd(const MklBatchNormBwdParams& bwdParams, MklPrimitive* op) { - std::string key = CreateKey(bwdParams); + string key = CreateKey(bwdParams); this->SetOp(key, op); } }; ",0,train fea9d07d1e34d5330a13024cb42d9bc460869905,tensorflow/tensorflow,"Remove references to std::string in MKL-related code. tensorflow::string is sometimes ::string and sometimes std::string, which makes code that uses both subtly dangerous. For example, FactoryKeyCreator::AddAsKey() has an overload for tensorflow::string but had many callsites passing a std::string, causing incorrect behavior on the google platform. PiperOrigin-RevId: 208244169",mkl_pooling_ops_common.h,"@@ -175,8 +175,8 @@ class MklPoolingFwdPrimitiveFactory : public MklPrimitiveFactory { // primitive op from reuse perspective. // A pooling key is a string which concates key parameters // as well as algorithm kind (max versus avg). - static std::string CreateKey(const MklPoolingParams& fwdParams) { - std::string prefix = ""pooling_fwd""; + static string CreateKey(const MklPoolingParams& fwdParams) { + string prefix = ""pooling_fwd""; FactoryKeyCreator key_creator; key_creator.AddAsKey(prefix); key_creator.AddAsKey(fwdParams.src_dims); @@ -190,12 +190,12 @@ class MklPoolingFwdPrimitiveFactory : public MklPrimitiveFactory { } MklPrimitive* GetPoolingFwd(const MklPoolingParams& fwdParams) { - std::string key = CreateKey(fwdParams); + string key = CreateKey(fwdParams); return this->GetOp(key); } void SetPoolingFwd(const MklPoolingParams& fwdParams, MklPrimitive* op) { - std::string key = CreateKey(fwdParams); + string key = CreateKey(fwdParams); this->SetOp(key, op); } }; @@ -326,8 +326,8 @@ class MklPoolingBwdPrimitiveFactory : public MklPrimitiveFactory { // primitive op from reuse perspective. // A pooling key is a string which concates key parameters // as well as algorithm kind (max versus avg). - static std::string CreateKey(const MklPoolingParams& bwdParams) { - std::string prefix = ""pooling_bwd""; + static string CreateKey(const MklPoolingParams& bwdParams) { + string prefix = ""pooling_bwd""; FactoryKeyCreator key_creator; key_creator.AddAsKey(prefix); key_creator.AddAsKey(bwdParams.src_dims); @@ -341,12 +341,12 @@ class MklPoolingBwdPrimitiveFactory : public MklPrimitiveFactory { } MklPrimitive* GetPoolingBwd(const MklPoolingParams& bwdParams) { - std::string key = CreateKey(bwdParams); + string key = CreateKey(bwdParams); return this->GetOp(key); } void SetPoolingBwd(const MklPoolingParams& bwdParams, MklPrimitive* op) { - std::string key = CreateKey(bwdParams); + string key = CreateKey(bwdParams); this->SetOp(key, op); } }; ",0,train fea9d07d1e34d5330a13024cb42d9bc460869905,tensorflow/tensorflow,"Remove references to std::string in MKL-related code. tensorflow::string is sometimes ::string and sometimes std::string, which makes code that uses both subtly dangerous. For example, FactoryKeyCreator::AddAsKey() has an overload for tensorflow::string but had many callsites passing a std::string, causing incorrect behavior on the google platform. PiperOrigin-RevId: 208244169",mkl_tfconv_op.h,"@@ -118,12 +118,11 @@ class MklToTfOp : public OpKernel { CHECK(output_tensor->CopyFrom(input_tensor, output_shape)); } } catch (mkldnn::error& e) { - string error_msg = ""Status: "" + std::to_string(e.status) + - "", message: "" + std::string(e.message) + "", in file "" + - std::string(__FILE__) + "":"" + std::to_string(__LINE__); OP_REQUIRES_OK( context, - errors::Aborted(""Operation received an exception:"", error_msg)); + errors::Aborted(""Operation received an exception: Status: "", e.status, + "", message: "", StringPiece(e.message), "", in file "", + __FILE__, "":"", __LINE__)); } } #else ",0,train d8840c4872df6f452e0c358cd26352ac4ddb6245,tensorflow/tensorflow,"Internal experimental C++ API change. PiperOrigin-RevId: 377532643 Change-Id: Ifd167c09366547924153cc24b1e01e3c3d0be548",saved_model_api.h,"@@ -24,6 +24,7 @@ limitations under the License. #include ""absl/container/flat_hash_map.h"" #include ""tensorflow/c/experimental/saved_model/core/concrete_function.h"" #include ""tensorflow/c/experimental/saved_model/core/signature_def_function.h"" +#include ""tensorflow/cc/saved_model/bundle_v2.h"" #include ""tensorflow/core/platform/status.h"" namespace tensorflow { @@ -54,6 +55,8 @@ class SavedModelAPI { virtual Status GetSignatureDefFunction(const std::string& signature_def_key, SignatureDefFunction** function) = 0; + virtual SavedModelV2Bundle* GetBundle() = 0; + virtual ~SavedModelAPI() = default; }; ",0,train d8840c4872df6f452e0c358cd26352ac4ddb6245,tensorflow/tensorflow,"Internal experimental C++ API change. PiperOrigin-RevId: 377532643 Change-Id: Ifd167c09366547924153cc24b1e01e3c3d0be548",tf_saved_model_api.cc,"@@ -247,6 +247,8 @@ Status TFSavedModelAPI::GetVariable(const std::string& variable_path, return Status(); } +SavedModelV2Bundle* TFSavedModelAPI::GetBundle() { return &this->bundle_; } + TFSavedModelAPI::TFSavedModelAPI(const std::string& directory, SavedModelV2Bundle bundle, RevivedObjects revived_objects) ",0,train d8840c4872df6f452e0c358cd26352ac4ddb6245,tensorflow/tensorflow,"Internal experimental C++ API change. PiperOrigin-RevId: 377532643 Change-Id: Ifd167c09366547924153cc24b1e01e3c3d0be548",tf_saved_model_api.h,"@@ -75,6 +75,8 @@ class TFSavedModelAPI : public SavedModelAPI { Status GetVariable(const std::string& variable_path, Variable** variable); + SavedModelV2Bundle* GetBundle() override; + private: TFSavedModelAPI(const std::string& directory, SavedModelV2Bundle bundle, RevivedObjects revived_objects); ",0,train b3701aac80622dde6529486ad118008c626eed65,tensorflow/tensorflow,"Update GraphDef version to 411. PiperOrigin-RevId: 312963337 Change-Id: I9b9db44aa0010e1dea95442a4e5ff0ae88aef128",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 410 // Updated: 2020/5/23 +#define TF_GRAPH_DEF_VERSION 411 // Updated: 2020/5/24 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,train 339fdd82490968b3314b8d58ad23cd4808b4e24b,tensorflow/tensorflow,"C++ API changes - Marked control flow ops as hidden Change: 146300232",mark_for_compilation_pass_test.cc,"@@ -16,6 +16,7 @@ limitations under the License. #include ""tensorflow/compiler/jit/mark_for_compilation_pass.h"" #include ""tensorflow/cc/framework/ops.h"" +#include ""tensorflow/cc/ops/control_flow_ops_internal.h"" #include ""tensorflow/cc/ops/standard_ops.h"" #include ""tensorflow/core/framework/node_def_util.h"" #include ""tensorflow/core/framework/op.h"" @@ -337,9 +338,9 @@ TEST(XlaCompilationTest, Loops) { auto a = ops::Placeholder(root.WithOpName(""A""), DT_FLOAT); auto b = ops::Placeholder(root.WithOpName(""B""), DT_FLOAT); auto c = ops::Add(root.WithOpName(""C""), a, b); - auto enter = ops::Enter(root, c, ""aframe""); + auto enter = ops::internal::Enter(root, c, ""aframe""); auto next_iter = ops::NextIteration(root, enter); - auto exit = ops::Exit(root, next_iter); + auto exit = ops::internal::Exit(root, next_iter); auto d = ops::Add(root.WithOpName(""D""), c, exit); std::unique_ptr graph(new Graph(OpRegistry::Global())); ",0,train 339fdd82490968b3314b8d58ad23cd4808b4e24b,tensorflow/tensorflow,"C++ API changes - Marked control flow ops as hidden Change: 146300232",graph_partition_test.cc,"@@ -20,6 +20,7 @@ limitations under the License. #include ""tensorflow/cc/ops/array_ops.h"" #include ""tensorflow/cc/ops/const_op.h"" #include ""tensorflow/cc/ops/control_flow_ops.h"" +#include ""tensorflow/cc/ops/control_flow_ops_internal.h"" #include ""tensorflow/cc/ops/random_ops.h"" #include ""tensorflow/cc/ops/sendrecv_ops.h"" #include ""tensorflow/core/framework/op.h"" @@ -337,8 +338,10 @@ TEST_F(GraphPartitionTest, CrossDevice_DataControl) { TEST_F(GraphPartitionTest, CrossDeviceLoop) { using namespace ::tensorflow::ops; // NOLINT(build/namespaces) auto a1 = BoolInput(in_.WithOpName(""A1"")); - auto a2 = Enter(in_.WithOpName(""A2""), a1, ""foo""); - auto a3 = Merge(in_.WithOpName(""A3""), {a2, Input(""A5"", 0, DT_BOOL)}).output; + auto a2 = ::tensorflow::ops::internal::Enter(in_.WithOpName(""A2""), a1, ""foo""); + auto a3 = ::tensorflow::ops::internal::Merge(in_.WithOpName(""A3""), + {a2, Input(""A5"", 0, DT_BOOL)}) + .output; LoopCond(in_.WithOpName(""A4""), a3); auto b1 = Identity(in_.WithOpName(""B1""), a3); NextIteration(in_.WithOpName(""A5""), b1); @@ -349,8 +352,10 @@ TEST_F(GraphPartitionTest, CrossDeviceLoop) { TEST_F(GraphPartitionTest, CrossDeviceLoop1) { using namespace ::tensorflow::ops; // NOLINT(build/namespaces) auto a1 = BoolInput(in_.WithOpName(""A1"")); - auto a2 = Enter(in_.WithOpName(""B2""), a1, ""foo""); - auto a3 = Merge(in_.WithOpName(""A3""), {a2, Input(""B5"", 0, DT_BOOL)}).output; + auto a2 = ::tensorflow::ops::internal::Enter(in_.WithOpName(""B2""), a1, ""foo""); + auto a3 = ::tensorflow::ops::internal::Merge(in_.WithOpName(""A3""), + {a2, Input(""B5"", 0, DT_BOOL)}) + .output; LoopCond(in_.WithOpName(""A4""), a3); auto b1 = Identity(in_.WithOpName(""B1""), a3); NextIteration(in_.WithOpName(""B5""), b1); ",0,train 1171258036e73c911d0487a3c2db8272fd9dc6be,tensorflow/tensorflow,"Automated rollback of commit d9e313d10790ae17d0eabbf6e63463510388e182 PiperOrigin-RevId: 264970919",api_template.__init__.py,"@@ -78,7 +78,7 @@ except ImportError: pass try: - from tensorflow.python.keras.api._v2 import keras + from .python.keras.api._v2 import keras _current_module.__path__ = ( [_module_util.get_parent_dir(keras)] + _current_module.__path__) setattr(_current_module, ""keras"", keras) ",0,train 1171258036e73c911d0487a3c2db8272fd9dc6be,tensorflow/tensorflow,"Automated rollback of commit d9e313d10790ae17d0eabbf6e63463510388e182 PiperOrigin-RevId: 264970919",api_template_v1.__init__.py,"@@ -69,7 +69,7 @@ except ImportError: pass try: - from tensorflow.python.keras.api._v1 import keras + from .python.keras.api._v1 import keras _current_module.__path__ = ( [_module_util.get_parent_dir(keras)] + _current_module.__path__) setattr(_current_module, ""keras"", keras) ",0,train 1171258036e73c911d0487a3c2db8272fd9dc6be,tensorflow/tensorflow,"Automated rollback of commit d9e313d10790ae17d0eabbf6e63463510388e182 PiperOrigin-RevId: 264970919",create_python_api.py,"@@ -195,8 +195,7 @@ class _ModuleInitCodeBuilder(object): dest_module_name=parent_module, dest_name=module_split[submodule_index]) else: - if submodule_index > 0: - import_from += '.' + '.'.join(module_split[:submodule_index]) + import_from = '.' self.add_import( symbol=None, source_module_name=import_from, ",0,train 1171258036e73c911d0487a3c2db8272fd9dc6be,tensorflow/tensorflow,"Automated rollback of commit d9e313d10790ae17d0eabbf6e63463510388e182 PiperOrigin-RevId: 264970919",module_test.py,"@@ -23,6 +23,7 @@ import pkgutil import tensorflow as tf +from tensorflow.python import tf2 from tensorflow.python.platform import test @@ -50,6 +51,18 @@ class ModuleTest(test.TestCase): def testName(self): self.assertEqual('tensorflow', tf.__name__) + def testBuiltInName(self): + # range is a built-in name in Python. Just checking that + # tf.range works fine. + if tf2.enabled(): + self.assertEqual( + 'tf.Tensor([1 2 3 4 5 6 7 8 9], shape=(9,), dtype=int32)', + str(tf.range(1, 10))) + else: + self.assertEqual( + 'Tensor(""range:0"", shape=(9,), dtype=int32)', + str(tf.range(1, 10))) + if __name__ == '__main__': test.main() ",0,train 859cd49b628bb430a721ba89883c3a0efbbbdbbc,tensorflow/tensorflow,"Fix breakage: conversion of tf.data was allowed too soon and broke the autograph notebook. PiperOrigin-RevId: 250764059",config.py,"@@ -28,8 +28,6 @@ DoNotConvert = config_lib.DoNotConvert # This list is evaluated in order and stops at the first rule that tests True # for a definitely_convert of definitely_bypass call. CONVERSION_RULES = ( - Convert('tensorflow.python.data.ops'), - DoNotConvert('tensorflow'), # TODO(b/133417201): Remove. ",0,test cd8ced7a2d48574908d2c9b7127960078cf41690,tensorflow/tensorflow,"Enable deduping of Assert nodes. Add unit test for deduping Assert and CheckNumerics. PiperOrigin-RevId: 176680534",op_types.cc,"@@ -24,64 +24,40 @@ limitations under the License. namespace tensorflow { namespace grappler { -bool IsAdd(const NodeDef& node) { - const auto op = node.op(); - return op == ""Add""; -} +bool IsAdd(const NodeDef& node) { return node.op() == ""Add""; } -bool IsAddN(const NodeDef& node) { - const auto op = node.op(); - return op == ""AddN""; -} +bool IsAddN(const NodeDef& node) { return node.op() == ""AddN""; } -bool IsAvgPoolGrad(const NodeDef& node) { - const auto op = node.op(); - return op == ""AvgPoolGrad""; -} +bool IsAvgPoolGrad(const NodeDef& node) { return node.op() == ""AvgPoolGrad""; } -bool IsBiasAddGrad(const NodeDef& node) { - const auto op = node.op(); - return op == ""BiasAddGrad""; -} +bool IsAssert(const NodeDef& node) { return node.op() == ""Assert""; } -bool IsConcatOffset(const NodeDef& node) { - const auto op = node.op(); - return op == ""ConcatOffset""; -} +bool IsBiasAddGrad(const NodeDef& node) { return node.op() == ""BiasAddGrad""; } -bool IsConstant(const NodeDef& node) { - const auto op = node.op(); - return op == ""Const""; -} +bool IsConcatOffset(const NodeDef& node) { return node.op() == ""ConcatOffset""; } -bool IsConv2D(const NodeDef& node) { - const auto op = node.op(); - return op == ""Conv2D""; -} +bool IsConstant(const NodeDef& node) { return node.op() == ""Const""; } + +bool IsConv2D(const NodeDef& node) { return node.op() == ""Conv2D""; } bool IsConv2DBackpropFilter(const NodeDef& node) { - const auto op = node.op(); - return op == ""Conv2DBackpropFilter""; + return node.op() == ""Conv2DBackpropFilter""; } bool IsConv2DBackpropInput(const NodeDef& node) { - const auto op = node.op(); - return op == ""Conv2DBackpropInput""; + return node.op() == ""Conv2DBackpropInput""; } bool IsDepthwiseConv2dNative(const NodeDef& node) { - const auto op = node.op(); - return op == ""DepthwiseConv2dNative""; + return node.op() == ""DepthwiseConv2dNative""; } bool IsDepthwiseConv2dNativeBackpropFilter(const NodeDef& node) { - const auto op = node.op(); - return op == ""DepthwiseConv2dNativeBackpropFilter""; + return node.op() == ""DepthwiseConv2dNativeBackpropFilter""; } bool IsDepthwiseConv2dNativeBackpropInput(const NodeDef& node) { - const auto op = node.op(); - return op == ""DepthwiseConv2dNativeBackpropInput""; + return node.op() == ""DepthwiseConv2dNativeBackpropInput""; } bool IsDequeueOp(const NodeDef& node) { @@ -101,14 +77,10 @@ bool IsExit(const NodeDef& node) { return op == ""Exit"" || op == ""RefExit""; } -bool IsFloorMod(const NodeDef& node) { - const auto& op = node.op(); - return op == ""FloorMod""; -} +bool IsFloorMod(const NodeDef& node) { return node.op() == ""FloorMod""; } bool IsFusedBatchNormGradV1(const NodeDef& node) { - const auto& op = node.op(); - return op == ""FusedBatchNormGrad""; + return node.op() == ""FusedBatchNormGrad""; } bool IsIdentity(const NodeDef& node) { @@ -121,25 +93,16 @@ bool IsMerge(const NodeDef& node) { return op == ""Merge"" || op == ""RefMerge""; } -bool IsMul(const NodeDef& node) { - const auto op = node.op(); - return op == ""Mul""; -} +bool IsMul(const NodeDef& node) { return node.op() == ""Mul""; } -bool IsNoOp(const NodeDef& node) { - const auto op = node.op(); - return op == ""NoOp""; -} +bool IsNoOp(const NodeDef& node) { return node.op() == ""NoOp""; } bool IsNextIteration(const NodeDef& node) { const auto& op = node.op(); return op == ""NextIteration"" || op == ""RefNextIteration""; } -bool IsPad(const NodeDef& node) { - const auto op = node.op(); - return op == ""Pad""; -} +bool IsPad(const NodeDef& node) { return node.op() == ""Pad""; } bool IsPlaceholder(const NodeDef& node) { const auto op = node.op(); @@ -147,20 +110,11 @@ bool IsPlaceholder(const NodeDef& node) { op == ""PlaceholderWithDefault""; } -bool IsRealDiv(const NodeDef& node) { - const auto op = node.op(); - return op == ""RealDiv""; -} +bool IsRealDiv(const NodeDef& node) { return node.op() == ""RealDiv""; } -bool IsReluGrad(const NodeDef& node) { - const auto op = node.op(); - return op == ""ReluGrad""; -} +bool IsReluGrad(const NodeDef& node) { return node.op() == ""ReluGrad""; } -bool IsRecv(const NodeDef& node) { - const auto op = node.op(); - return op == ""_Recv""; -} +bool IsRecv(const NodeDef& node) { return node.op() == ""_Recv""; } bool IsReduction(const NodeDef& node) { const auto& op = node.op(); @@ -175,53 +129,34 @@ bool IsRestore(const NodeDef& node) { node.op() == ""RestoreSlice""); } -bool IsSend(const NodeDef& node) { - const auto op = node.op(); - return op == ""_Send""; -} +bool IsSend(const NodeDef& node) { return node.op() == ""_Send""; } -bool IsSlice(const NodeDef& node) { - const auto op = node.op(); - return op == ""Slice""; -} +bool IsSlice(const NodeDef& node) { return node.op() == ""Slice""; } bool IsSquaredDifference(const NodeDef& node) { - const auto op = node.op(); - return op == ""SquaredDifference""; + return node.op() == ""SquaredDifference""; } -bool IsSqueeze(const NodeDef& node) { - const auto op = node.op(); - return op == ""Squeeze""; -} +bool IsSqueeze(const NodeDef& node) { return node.op() == ""Squeeze""; } bool IsStopGradient(const NodeDef& node) { const auto& op = node.op(); return op == ""StopGradient"" || op == ""PreventGradient""; } -bool IsSub(const NodeDef& node) { - const auto op = node.op(); - return op == ""Sub""; -} +bool IsSub(const NodeDef& node) { return node.op() == ""Sub""; } -bool IsSum(const NodeDef& node) { - const auto op = node.op(); - return op == ""Sum""; -} +bool IsSum(const NodeDef& node) { return node.op() == ""Sum""; } bool IsSwitch(const NodeDef& node) { const auto& op = node.op(); return op == ""Switch"" || op == ""RefSwitch""; } -bool IsTranspose(const NodeDef& node) { - const auto op = node.op(); - return op == ""Transpose""; -} +bool IsTranspose(const NodeDef& node) { return node.op() == ""Transpose""; } bool IsVariable(const NodeDef& node) { - const auto op = node.op(); + const auto& op = node.op(); return op == ""Variable"" || op == ""VariableV2"" || op == ""AutoReloadVariable"" || op == ""VarHandleOp"" || op == ""ReadVariableOp""; } ",0,train cd8ced7a2d48574908d2c9b7127960078cf41690,tensorflow/tensorflow,"Enable deduping of Assert nodes. Add unit test for deduping Assert and CheckNumerics. PiperOrigin-RevId: 176680534",op_types.h,"@@ -25,6 +25,7 @@ namespace grappler { bool IsAdd(const NodeDef& node); bool IsAddN(const NodeDef& node); bool IsAvgPoolGrad(const NodeDef& node); +bool IsAssert(const NodeDef& node); bool IsBiasAddGrad(const NodeDef& node); bool IsConcatOffset(const NodeDef& node); bool IsConstant(const NodeDef& node); ",0,train cd8ced7a2d48574908d2c9b7127960078cf41690,tensorflow/tensorflow,"Enable deduping of Assert nodes. Add unit test for deduping Assert and CheckNumerics. PiperOrigin-RevId: 176680534",arithmetic_optimizer.cc,"@@ -449,6 +449,10 @@ bool ArithmeticOptimizer::CanDedup(const NodeDef& node) const { if (node.device().find(""SPU"") != string::npos) { return false; } + // Workaround for Assert mistakenly being labeled as stateful. + if (IsAssert(node)) { + return true; + } return IsFreeOfSideEffect(node); } ",0,train cd8ced7a2d48574908d2c9b7127960078cf41690,tensorflow/tensorflow,"Enable deduping of Assert nodes. Add unit test for deduping Assert and CheckNumerics. PiperOrigin-RevId: 176680534",arithmetic_optimizer_test.cc,"@@ -81,6 +81,38 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) { EXPECT_EQ(""c1"", new_mul.input(1)); } +TEST_F(ArithmeticOptimizerTest, OpDeduppingAssertAndCheckNumerics) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output p = ops::Placeholder(s, DT_BOOL, ops::Placeholder::Shape({})); + Output c = ops::Const(s.WithOpName(""c""), {3.14, 2.7}, {1, 2}); + auto check1 = ops::CheckNumerics(s.WithOpName(""check1""), c, ""foo""); + auto check2 = ops::CheckNumerics(s.WithOpName(""check2""), c, ""foo""); + auto assert1 = ops::Assert(s.WithOpName(""assert1""), p, {c}); + auto assert2 = ops::Assert(s.WithOpName(""assert2""), p, {c}); + Output mul = ops::Multiply(s.WithOpName(""mul"").WithControlDependencies( + {assert1.operation, assert2.operation}), + check1, check2); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + ArithmeticOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(5, output.node_size()); + const NodeDef& new_mul = output.node(3); + EXPECT_EQ(4, new_mul.input_size()); + EXPECT_EQ(""check1"", new_mul.input(0)); + EXPECT_EQ(""check1"", new_mul.input(1)); + EXPECT_EQ(""^assert1"", new_mul.input(2)); + EXPECT_EQ(""^assert1"", new_mul.input(3)); +} + TEST_F(ArithmeticOptimizerTest, OpDedupCommutative) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output c1 = ops::Const(s.WithOpName(""c1""), {1.0f, 2.0f}, {1, 2}); ",0,train 8c51ff3258eb89dfe02f5aec8c4705033b30684f,tensorflow/tensorflow,"Add an explanation when cublas fails to initilaize that this may be due to OOM. I hit this recently with JAX. JAX allocates 90% of your GPU's total memory by default, and it turned out that after doing this, I didn't have enough memory free to initialize cublas! But unfortunately, cublas didn't give a useful error message. PiperOrigin-RevId: 414144717 Change-Id: I05ecaa512bfd49211d26ecc4f09ee11386d12ec9",cuda_blas.cc,"@@ -219,11 +219,20 @@ class ScopedCublasMathMode { }; #endif // CUDA_VERSION >= 9000 +static const char *const kCublasNotInitializedExplanation = + ""Failure to initialize cublas may be due to OOM (cublas needs some free "" + ""memory when you initialize it, and your deep-learning framework may have "" + ""preallocated more than its fair share), or may be because this binary was "" + ""not built with support for the GPU in your machine.""; + bool CUDABlas::Init() { gpu::ScopedActivateExecutorContext sac{parent_}; cublasStatus_t ret = cublasCreate(&blas_); if (ret != CUBLAS_STATUS_SUCCESS) { LOG(ERROR) << ""failed to create cublas handle: "" << ToString(ret); + if (ret == CUBLAS_STATUS_NOT_INITIALIZED) { + LOG(ERROR) << kCublasNotInitializedExplanation; + } return false; } @@ -231,6 +240,9 @@ bool CUDABlas::Init() { ret = cublasLtCreate(&blasLt_); if (ret != CUBLAS_STATUS_SUCCESS) { LOG(ERROR) << ""failed to create cublasLt handle: "" << ToString(ret); + if (ret == CUBLAS_STATUS_NOT_INITIALIZED) { + LOG(ERROR) << kCublasNotInitializedExplanation; + } return false; } #endif // CUDA_VERSION >= 11000 ",0,train a0991e859fe45ddb04d8b618e6b602684726b2e5,tensorflow/tensorflow,"Update GraphDef version to 789. PiperOrigin-RevId: 377755989 Change-Id: I38fae40ca0dc9cebe45233c59769500674d7a966",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 788 // Updated: 2021/6/5 +#define TF_GRAPH_DEF_VERSION 789 // Updated: 2021/6/6 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,train e2d0c2cb30f7f1472935350fd392ef473d07d089,tensorflow/tensorflow,Link to tf.ConfigProto,test_util.py,"@@ -2717,7 +2717,7 @@ def create_local_cluster(num_workers, TODO: image from https://www.tensorflow.org/images/diag1.svg - Figure 2 illustrates the interaction of these components. + Figure illustrates the interaction of these components. ""/job:worker/task:0"" and ""/job:ps/task:0"" are both tasks with worker services. @@ -2744,9 +2744,9 @@ def create_local_cluster(num_workers, num_ps: Number of PS servers to start. protocol: Communication protocol. Allowed values are documented in the documentation of `tf.train.Server`. - worker_config: (optional) ConfigProto to initialize workers. Can be used to + worker_config: (optional) `tf.ConfigProto` to initialize workers. Can be used to instantiate multiple devices etc. - ps_config: (optional) ConfigProto to initialize PS servers. + ps_config: (optional) `tf.ConfigProto` to initialize PS servers. Returns: A tuple `(worker_servers, ps_servers)`. `worker_servers` is a list ",0,train 42579858f9cda701c7c69d4a1f89035f0a68b258,tensorflow/tensorflow,[MLIR][XLA] Add GatherOp to HLO to LHLO converters,hlo_legalize_to_lhlo.cc,"@@ -453,6 +453,7 @@ void populateHLOToLHLOConversionPattern( HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, + HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, ",0,train 42579858f9cda701c7c69d4a1f89035f0a68b258,tensorflow/tensorflow,[MLIR][XLA] Add GatherOp to HLO to LHLO converters,map_hlo_to_lhlo_op.h,"@@ -52,6 +52,7 @@ MAP_HLO_TO_LHLO(CosOp); MAP_HLO_TO_LHLO(DivOp); MAP_HLO_TO_LHLO(DotOp); MAP_HLO_TO_LHLO(ExpOp); +MAP_HLO_TO_LHLO(GatherOp); MAP_HLO_TO_LHLO(ImagOp); MAP_HLO_TO_LHLO(IotaOp); MAP_HLO_TO_LHLO(LogOp); ",0,train 76dca033bc9e17ba5f74c22a301d513afa4d5790,tensorflow/tensorflow,Update nn_ops.py,nn_ops.py,"@@ -1853,9 +1853,9 @@ def conv2d_v2(input, # pylint: disable=redefined-builtin Usage Example: >>> kernel_in = np.array([ - ... [ [[2, 0.1]],[[3, 0.2]] ], + ... [ [[2, 0.1]], [[3, 0.2]] ], ... [ [[0, 0.3]],[[1, 0.4]] ], ]) - >>> x = tf.placeholder(tf.float32, shape=[1, 5, 5, 1]) + >>> x = tf.Variable(shape=tf.TensorShape(1, 5, 5, 1)) >>> kernel = tf.constant(kernel_in, dtype=tf.float32) ",0,train edbb66a83fd6070bfc9509caae15b46b7c7a2261,tensorflow/tensorflow,"Remove usage of internal composite_tensor_utils.get_shape from Keras. (By forking the one usage) PiperOrigin-RevId: 342130856 Change-Id: I8e67dd09a1c4e7326dd50573b58091c0d1338dc9",training_utils_v1.py,"@@ -425,6 +425,15 @@ def standardize_single_array(x, expected_shape=None): return x +def get_composite_shape(tensor): + """"""Returns the shape of the passed composite tensor."""""" + if isinstance(tensor, sparse_tensor.SparseTensorValue): + # SparseTensorValues use a 'dense_shape' attribute + return tensor.dense_shape + else: + return tensor.shape + + def standardize_input_data(data, names, shapes=None, @@ -528,7 +537,7 @@ def standardize_input_data(data, continue data_shape = tuple(tensorshape.as_list()) elif is_composite_or_composite_value(data[i]): - tensorshape = composite_tensor_utils.get_shape(data[i]) + tensorshape = get_composite_shape(data[i]) data_shape = tuple(tensorshape.as_list()) else: data_shape = data[i].shape ",0,train 8edf5c9e4cd68b81e7660ed44c7852485aebae14,tensorflow/tensorflow,"Reverted a change PiperOrigin-RevId: 366510368 Change-Id: I87c5bc3734986ab815d48bb244f90d54bda95df9",sparse_tensor.py,"@@ -343,11 +343,7 @@ class SparseTensorSpec(type_spec.BatchableTypeSpec): not tf2.enabled()): return SparseTensorValue(*tensor_list) else: - result = SparseTensor(*tensor_list) - # Augment the static dense shape with the shape carried by the spec. - result._dense_shape_default = result._dense_shape_default.merge_with( # pylint: disable=protected-access - self._shape) - return result + return SparseTensor(*tensor_list) # The SparseTensorSpec tensor_list encoding uses (de)serialize_sparse ops # to (un)box the component tensors in a way that allows for batching & ",0,train 8edf5c9e4cd68b81e7660ed44c7852485aebae14,tensorflow/tensorflow,"Reverted a change PiperOrigin-RevId: 366510368 Change-Id: I87c5bc3734986ab815d48bb244f90d54bda95df9",sparse_tensor_test.py,"@@ -290,16 +290,6 @@ class SparseTensorSpecTest(test_util.TensorFlowTestCase, self.assertAllEqual(st.values, st_reconstructed.values) self.assertAllEqual(st.dense_shape, st_reconstructed.dense_shape) - def testFromComponentsDynamicDenseShapeTensor(self): - @def_function.function(input_signature=[ - sparse_tensor.SparseTensorSpec([None, 10, 100])]) - def sparse_fun(st): - self.assertEqual(st.get_shape().as_list(), [None, 10, 100]) - return st.dense_shape - - # Force tracing the TF function. - _ = sparse_fun.get_concrete_function() - @test_util.run_v1_only(""SparseTensorValue is deprecated in v2"") def testFromNumpyComponents(self): indices = np.array([[0], [8]]) ",0,train 8edf5c9e4cd68b81e7660ed44c7852485aebae14,tensorflow/tensorflow,"Reverted a change PiperOrigin-RevId: 366510368 Change-Id: I87c5bc3734986ab815d48bb244f90d54bda95df9",control_flow_ops_test.py,"@@ -762,7 +762,7 @@ class DataTypesTest(test_util.TensorFlowTestCase): @test_util.run_deprecated_v1 def test_sparse_tensors(self): - shape = tensor_shape.TensorShape([3, 4]) + shape = tensor_shape.TensorShape([None, None]) def true_fn(): return [ ",0,train dd410bc164d4026a2feb5baae26e6df7a2005d89,tensorflow/tensorflow,"Backported some changes to the reduction code from upstream Eigen to keep the code in sync. Change: 127477364",eigen_pooling.h,"@@ -376,6 +376,24 @@ struct AvgPoolMeanReducer { Packet packetCount_; }; +template +struct reducer_traits, Device> { + enum { + Cost = 1, +#if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) + // We only support packet access for floats. + PacketAccess = true +#else + PacketAccess = false +#endif + }; +}; + +template <> +struct reducer_traits, GpuDevice> { + enum { Cost = 1, PacketAccess = false }; +}; + } // namespace internal #if !defined(EIGEN_HAS_INDEX_LIST) ",0,train de336139770c6e9e035c0e402375efda48d75301,tensorflow/tensorflow,"[rollforward]Guess test binary path from TEST_TARGET env var PiperOrigin-RevId: 343132917 Change-Id: I6af62b595875070dac9e47f22564852dd4976252",multi_process_lib.py,"@@ -23,6 +23,7 @@ import platform import sys import unittest from absl import app +from absl import logging from tensorflow.python.eager import test @@ -97,31 +98,28 @@ def _set_spawn_exe_path(): """""" # TODO(b/150264776): This does not work with Windows. Find a solution. if sys.argv[0].endswith('.py'): + path = None # If all we have is a python module path, we'll need to make a guess for - # the actual executable path. Since the binary path may correspond to the - # parent's path of the python module, we are making guesses by reducing - # directories one at a time. E.g., - # tensorflow/python/some/path/my_test.py - # -> tensorflow/python/some/path/my_test - # -> tensorflow/python/some/my_test - # -> tensorflow/python/my_test - path_to_use = None - guess_path = sys.argv[0][:-3] - guess_path = guess_path.split(os.sep) - for path_reduction in range(-1, -len(guess_path), -1): - possible_path = os.sep.join(guess_path[:path_reduction] + - [guess_path[-1]]) + # the actual executable path. + if 'bazel-out' in sys.argv[0]: + # Guess the binary path under bazel. For target + # //tensorflow/python/distribute:input_lib_test_multiworker_gpu, the + # argv[0] is in the form of + # /.../tensorflow/python/distribute/input_lib_test.py + # and the binary is + # /.../tensorflow/python/distribute/input_lib_test_multiworker_gpu + org_tensorflow_path = sys.argv[0][:sys.argv[0].rfind('/tensorflow')] + binary = os.environ['TEST_TARGET'][2:].replace(':', '/', 1) + possible_path = os.path.join(org_tensorflow_path, binary) + logging.info('Guessed test binary path: %s', possible_path) if os.access(possible_path, os.X_OK): - path_to_use = possible_path - break - # The binary can possibly have _gpu suffix. - possible_path += '_gpu' - if os.access(possible_path, os.X_OK): - path_to_use = possible_path - break - if path_to_use is None: + path = possible_path + if path is None: + logging.error( + 'Cannot determine binary path. sys.argv[0]=%s os.environ=%s', + sys.argv[0], os.environ) raise RuntimeError('Cannot determine binary path') - sys.argv[0] = path_to_use + sys.argv[0] = path # Note that this sets the executable for *all* contexts. multiprocessing.get_context().set_executable(sys.argv[0]) ",0,train 573c6f40a90ace2bc921738937fea32fdf724f7b,tensorflow/tensorflow,Bump the required numpy version in r1.6,setup.py,"@@ -36,7 +36,7 @@ REQUIRED_PACKAGES = [ 'astor >= 0.6.0', 'gast >= 0.2.0', 'grpcio >= 1.8.6', - 'numpy >= 1.12.1', + 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', 'tensorflow-tensorboard >= 1.5.0, < 1.6.0', ",0,test ff83809afa9062e77809d2b65ffbaee3c0045241,tensorflow/tensorflow,"Refactoring ArgMax implementation in preparation for ArgMin. PiperOrigin-RevId: 203698572",arg_min_max.cc,"@@ -23,7 +23,7 @@ limitations under the License. namespace tflite { namespace ops { namespace builtin { -namespace arg_max { +namespace arg_min_max { constexpr int kInputTensor = 0; constexpr int kAxis = 1; @@ -80,30 +80,39 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { return context->ResizeTensor(context, output, output_size); } +template +std::function GetComparefunction(bool is_arg_max) { + if (is_arg_max) { + return std::greater(); + } else { + return std::less(); + } +} + // The current impl actually ignores the axis argument. // Only determine the index of the maximum value in the last dimension. -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); const TfLiteTensor* axis = GetInput(context, node, kAxis); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); -#define TF_LITE_ARG_MAX(data_type, axis_type, output_type) \ - optimized_ops::ArgMax(GetTensorData(axis), \ - GetTensorData(input), GetTensorDims(input), \ - GetTensorData(output), \ - GetTensorDims(output)) +#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \ + optimized_ops::ArgMinMax( \ + GetTensorData(axis), GetTensorData(input), \ + GetTensorDims(input), GetTensorData(output), \ + GetTensorDims(output), GetComparefunction(is_arg_max)) if (axis->type == kTfLiteInt32) { switch (output->type) { case kTfLiteInt32: { switch (input->type) { case kTfLiteFloat32: - TF_LITE_ARG_MAX(float, int32_t, int32_t); + TF_LITE_ARG_MIN_MAX(float, int32_t, int32_t); break; case kTfLiteUInt8: - TF_LITE_ARG_MAX(uint8_t, int32_t, int32_t); + TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t); break; case kTfLiteInt32: - TF_LITE_ARG_MAX(int32_t, int32_t, int32_t); + TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int32_t); break; default: return kTfLiteError; @@ -112,13 +121,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { case kTfLiteInt64: { switch (input->type) { case kTfLiteFloat32: - TF_LITE_ARG_MAX(float, int32_t, int64_t); + TF_LITE_ARG_MIN_MAX(float, int32_t, int64_t); break; case kTfLiteUInt8: - TF_LITE_ARG_MAX(uint8_t, int32_t, int64_t); + TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t); break; case kTfLiteInt32: - TF_LITE_ARG_MAX(int32_t, int32_t, int64_t); + TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int64_t); break; default: return kTfLiteError; @@ -132,13 +141,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { case kTfLiteInt32: { switch (input->type) { case kTfLiteFloat32: - TF_LITE_ARG_MAX(float, int64_t, int32_t); + TF_LITE_ARG_MIN_MAX(float, int64_t, int32_t); break; case kTfLiteUInt8: - TF_LITE_ARG_MAX(uint8_t, int64_t, int32_t); + TF_LITE_ARG_MIN_MAX(uint8_t, int64_t, int32_t); break; case kTfLiteInt32: - TF_LITE_ARG_MAX(int32_t, int64_t, int32_t); + TF_LITE_ARG_MIN_MAX(int32_t, int64_t, int32_t); break; default: return kTfLiteError; @@ -147,13 +156,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { case kTfLiteInt64: { switch (input->type) { case kTfLiteFloat32: - TF_LITE_ARG_MAX(float, int64_t, int64_t); + TF_LITE_ARG_MIN_MAX(float, int64_t, int64_t); break; case kTfLiteUInt8: - TF_LITE_ARG_MAX(uint8_t, int64_t, int64_t); + TF_LITE_ARG_MIN_MAX(uint8_t, int64_t, int64_t); break; case kTfLiteInt32: - TF_LITE_ARG_MAX(int32_t, int64_t, int64_t); + TF_LITE_ARG_MIN_MAX(int32_t, int64_t, int64_t); break; default: return kTfLiteError; @@ -163,16 +172,20 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteError; } } -#undef TF_LITE_ARG_MAX +#undef TF_LITE_ARG_MIN_MAX return kTfLiteOk; } -} // namespace arg_max +TfLiteStatus ArgMaxEval(TfLiteContext* context, TfLiteNode* node) { + return Eval(context, node, true); +} + +} // namespace arg_min_max TfLiteRegistration* Register_ARG_MAX() { - static TfLiteRegistration r = {nullptr, nullptr, arg_max::Prepare, - arg_max::Eval}; + static TfLiteRegistration r = {nullptr, nullptr, arg_min_max::Prepare, + arg_min_max::ArgMaxEval}; return &r; } ",0,train ff83809afa9062e77809d2b65ffbaee3c0045241,tensorflow/tensorflow,"Refactoring ArgMax implementation in preparation for ArgMin. PiperOrigin-RevId: 203698572",arg_min_max_test.cc,,0,train ff83809afa9062e77809d2b65ffbaee3c0045241,tensorflow/tensorflow,"Refactoring ArgMax implementation in preparation for ArgMin. PiperOrigin-RevId: 203698572",optimized_ops.h,"@@ -41,6 +41,7 @@ namespace optimized_ops { // Unoptimized reference ops: using reference_ops::ArgMax; +using reference_ops::ArgMinMax; using reference_ops::BroadcastGreater; using reference_ops::BroadcastGreaterEqual; using reference_ops::BroadcastLess; ",0,train ff83809afa9062e77809d2b65ffbaee3c0045241,tensorflow/tensorflow,"Refactoring ArgMax implementation in preparation for ArgMin. PiperOrigin-RevId: 203698572",reference_ops.h,"@@ -3717,9 +3717,9 @@ void TensorFlowMaximumMinimum(const T* input1_data, const Dims<4>& input1_dims, } } -template -void ArgMax(const T3* axis, const T1* input_data, const Dims<4>& input_dims, - T2* output_data, const Dims<4>& output_dims) { +template +void ArgMinMax(const T3* axis, const T1* input_data, const Dims<4>& input_dims, + T2* output_data, const Dims<4>& output_dims, const Cmp& cmp) { // The current ArgMax implemention can only determine the index of the maximum // value in the last dimension. So the axis argument is ignored. @@ -3732,19 +3732,28 @@ void ArgMax(const T3* axis, const T1* input_data, const Dims<4>& input_dims, const int depth = ArraySize(input_dims, 0); for (int i = 0; i < outer_size; ++i) { - auto max_value = input_data[i * depth]; - int max_index = 0; + auto min_max_value = input_data[i * depth]; + int min_max_index = 0; for (int d = 1; d < depth; ++d) { const auto& curr_value = input_data[i * depth + d]; - if (curr_value > max_value) { - max_value = curr_value; - max_index = d; + if (cmp(curr_value, min_max_value)) { + min_max_value = curr_value; + min_max_index = d; } } - output_data[i] = max_index; + output_data[i] = min_max_index; } } +// TODO(renjieliu): Remove this one. +template +void ArgMax(const T3* axis, const T1* input_data, + const tflite::Dims<4>& input_dims, T2* output_data, + const tflite::Dims<4>& output_dims) { + ArgMinMax(axis, input_data, input_dims, output_data, output_dims, + std::greater()); +} + template void Transpose(const T* input, const Dims<4>& input_dims, T* output, const Dims<4>& output_dims, const int* permuted_axes) { ",0,train 107416b863c7b362ec8c04006ce1aab5ab1699fd,tensorflow/tensorflow,"Fix `ResourceWarning: unclosed file` warnings in reader_ops_test (#3827) Some file handles created in these test cases are were not being closed. This causes some warnings as well along with leaking the handles. ./source/tensorflow/tensorflow/python/kernel_tests/reader_ops_test.py:245: ResourceWarning: unclosed file <_io.BufferedWriter name='/var/folders/sq/vmncyd7506q_ch43llrwr8sn6zfknl/T/reader_ops_test/text_line.0.txt'> f = open(fn, ""wb"") /source/tensorflow/tensorflow/python/kernel_tests/reader_ops_test.py:276: ResourceWarning: unclosed file <_io.BufferedWriter name='/var/folders/sq/vmncyd7506q_ch43llrwr8sn6zfknl/T/reader_ops_test/text_line.1.txt'> self._testOneEpoch(self._CreateFiles(crlf=True)) ./source/tensorflow/tensorflow/python/kernel_tests/reader_ops_test.py:273: ResourceWarning: unclosed file <_io.BufferedWriter name='/var/folders/sq/vmncyd7506q_ch43llrwr8sn6zfknl/T/reader_ops_test/text_line.1.txt'> self._testOneEpoch(self._CreateFiles(crlf=False)) ./source/tensorflow/tensorflow/python/kernel_tests/reader_ops_test.py:279: ResourceWarning: unclosed file <_io.BufferedWriter name='/var/folders/sq/vmncyd7506q_ch43llrwr8sn6zfknl/T/reader_ops_test/text_line.1.txt'> files = self._CreateFiles() ../source/tensorflow/tensorflow/python/kernel_tests/reader_ops_test.py:183: ResourceWarning: unclosed file <_io.BufferedWriter name='/var/folders/sq/vmncyd7506q_ch43llrwr8sn6zfknl/T/reader_ops_test/whole_file.0.txt'> open(fn, ""wb"").write(c) /source/tensorflow/tensorflow/python/kernel_tests/reader_ops_test.py:183: ResourceWarning: unclosed file <_io.BufferedWriter name='/var/folders/sq/vmncyd7506q_ch43llrwr8sn6zfknl/T/reader_ops_test/whole_file.1.txt'> open(fn, ""wb"").write(c) /source/tensorflow/tensorflow/python/kernel_tests/reader_ops_test.py:183: ResourceWarning: unclosed file <_io.BufferedWriter name='/var/folders/sq/vmncyd7506q_ch43llrwr8sn6zfknl/T/reader_ops_test/whole_file.2.txt'> open(fn, ""wb"").write(c) ...",reader_ops_test.py,"@@ -178,7 +178,8 @@ class WholeFileReaderTest(tf.test.TestCase): for i in range(3)] self._content = [b""One\na\nb\n"", b""Two\nC\nD"", b""Three x, y, z""] for fn, c in zip(self._filenames, self._content): - open(fn, ""wb"").write(c) + with open(fn, ""wb"") as h: + h.write(c) def tearDown(self): super(WholeFileReaderTest, self).tearDown() @@ -240,13 +241,13 @@ class TextLineReaderTest(tf.test.TestCase): for i in range(self._num_files): fn = os.path.join(self.get_temp_dir(), ""text_line.%d.txt"" % i) filenames.append(fn) - f = open(fn, ""wb"") - for j in range(self._num_lines): - f.write(self._LineText(i, j)) - # Always include a newline after the record unless it is - # at the end of the file, in which case we include it sometimes. - if j + 1 != self._num_lines or i == 0: - f.write(b""\r\n"" if crlf else b""\n"") + with open(fn, ""wb"") as f: + for j in range(self._num_lines): + f.write(self._LineText(i, j)) + # Always include a newline after the record unless it is + # at the end of the file, in which case we include it sometimes. + if j + 1 != self._num_lines or i == 0: + f.write(b""\r\n"" if crlf else b""\n"") return filenames def _testOneEpoch(self, files): @@ -311,11 +312,11 @@ class FixedLengthRecordReaderTest(tf.test.TestCase): for i in range(self._num_files): fn = os.path.join(self.get_temp_dir(), ""fixed_length_record.%d.txt"" % i) filenames.append(fn) - f = open(fn, ""wb"") - f.write(b""H"" * self._header_bytes) - for j in range(self._num_records): - f.write(self._Record(i, j)) - f.write(b""F"" * self._footer_bytes) + with open(fn, ""wb"") as f: + f.write(b""H"" * self._header_bytes) + for j in range(self._num_records): + f.write(self._Record(i, j)) + f.write(b""F"" * self._footer_bytes) return filenames def testOneEpoch(self): ",0,test fc0b63edc0116f2df9847e3083247a4613bc0f26,tensorflow/tensorflow,"Clean up RemoveTrivialPassthroughOp and fix an issue in an edge case where we were not erasing the correct arrays. PiperOrigin-RevId: 176784020",remove_trivial_passthrough.cc,"@@ -63,19 +63,28 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation, main_input_array_index = i; } } - CHECK_LE(count_nonconstant_input_arrays, 1); const string main_input_name = passthru_op->inputs[main_input_array_index]; const string output_name = passthru_op->outputs[0]; + + // Build the list of all input and output arrays of the passthrough node + // that we are considering removing. Any of these arrays is a candidate + // for being removed as well, if nothing else references it. Doing that + // arrays-removal together with the passthrough-node-removal proved too + // error-prone. + std::vector removal_candidates; + for (const string& input : passthru_op->inputs) { + removal_candidates.push_back(input); + } + removal_candidates.push_back(output_name); + if (IsDiscardableArray(*model, output_name)) { transformation->AddMessageF( ""Removing %s, keeping its non-constant input array"", LogName(*passthru_op)); - model->arrays.erase(output_name); for (const string& input : passthru_op->inputs) { if (IsDiscardableArray(*model, input) && input != main_input_name && CountOpsWithInput(*model, input) == 1) { - model->arrays.erase(input); } } RerouteEdges(output_name, main_input_name, model); @@ -85,13 +94,12 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation, for (const string& input : passthru_op->inputs) { if (IsDiscardableArray(*model, input) && (input == main_input_name || CountOpsWithInput(*model, input) == 1)) { - model->arrays.erase(input); } } RerouteEdges(main_input_name, output_name, model); } else { transformation->AddMessageF( - ""Cannot remove %s, neither its nonconstant input nor its output may be "" + ""Cannot remove %s, neither its main input nor its output may be "" ""discarded"", LogName(*passthru_op)); return false; @@ -100,6 +108,26 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation, // Remove the pass-through node. model->operators.erase(passthru_it); + // Remove any array that is no longer used. + for (const string& removal_candidate : removal_candidates) { + bool is_referenced = false; + for (const auto& op : model->operators) { + for (const string& input : op->inputs) { + if (input == removal_candidate) { + is_referenced = true; + } + } + for (const string& output : op->outputs) { + if (output == removal_candidate) { + is_referenced = true; + } + } + } + if (!is_referenced) { + model->arrays.erase(removal_candidate); + } + } + return true; } ",0,train fc0b63edc0116f2df9847e3083247a4613bc0f26,tensorflow/tensorflow,"Clean up RemoveTrivialPassthroughOp and fix an issue in an edge case where we were not erasing the correct arrays. PiperOrigin-RevId: 176784020",remove_trivial_passthrough.h,"@@ -21,10 +21,12 @@ limitations under the License. namespace toco { // A ""passthrough op"" is an op that satisfies the following conditions: -// 1. It has at most one non-constant input (it may have other constant -// inputs). +// 1. One of its inputs is (per the semantics of that op) its ""main input"" +// for some notion of ""main input"" that is operator-specific; for example, +// for a Reshape op, the main input is the array being reshaped, not the +// other input which gives the new shape. // 2. It has exactly one output. -// 3. It forwards exactly its single non-constant input to its single output. +// 3. It forwards exactly its main input to its single output. // // Examples include: // 1. TensorFlow Identity ops. (Have one input). @@ -34,7 +36,7 @@ namespace toco { // where one of its inputs is a constant array filled with zeros. // // A passthrough op is ""trivial"" and can be removed when it is possible to -// discard either its single non-constant input or output array, rerouting any +// discard either its main input or output array, rerouting any // edge involving it to the other of these two arrays. // // It is only possible to discard such an array if it is not explicitly ",0,train 33febd68b5b5a198ff613f72581ce20293ed07f3,tensorflow/tensorflow,"Convert input shape to TensorShape before building SeparableConv. PiperOrigin-RevId: 261724310",convolutional.py,"@@ -1785,6 +1785,7 @@ class DepthwiseConv2D(Conv2D): if len(input_shape) < 4: raise ValueError('Inputs to `DepthwiseConv2D` should have rank 4. ' 'Received input shape:', str(input_shape)) + input_shape = tensor_shape.TensorShape(input_shape) if self.data_format == 'channels_first': channel_axis = 1 else: ",0,train d71be4d5febada6af32f3286ad2f4ec61cefb1b3,tensorflow/tensorflow,"[XLA:GPU] s/llvm_ir::IrArray/IrArray/ in ir_emitter_unnested. Less visual noise. PiperOrigin-RevId: 204139183",ir_emitter_unnested.cc,"@@ -595,7 +595,7 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { BuildKernelThunk(fusion, /*implements_whole_instruction=*/false)); thunk_sequence_->emplace_back( MakeUnique(std::move(thunks), fusion)); - std::vector parameter_arrays; + std::vector parameter_arrays; for (HloInstruction* operand : fusion->operands()) { parameter_arrays.push_back(GetIrArray(*operand, *fusion)); } @@ -668,7 +668,7 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { // Set up kernel thunk and fused ir emitter. thunk_sequence_->emplace_back( BuildKernelThunk(fusion, /*implements_whole_instruction=*/true)); - std::vector operand_arrays; + std::vector operand_arrays; for (HloInstruction* operand : fusion->operands()) { operand_arrays.push_back(GetIrArray(*operand, *fusion)); } @@ -681,7 +681,7 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { // Array to write into. Because this is an in-place operation, this is the // same as operand 0's array. - llvm_ir::IrArray output_array = GetIrArray(*fusion, *fusion); + IrArray output_array = GetIrArray(*fusion, *fusion); LaunchDimensions launch_dimensions = CalculateLaunchDimensions( update_shape, ir_emitter_context_->device_description()); @@ -732,7 +732,7 @@ Status IrEmitterUnnested::HandleCopy(HloInstruction* copy) { } Status IrEmitterUnnested::EmitExtraOutputsForReduce( - const HloInstruction* reduce, const llvm_ir::IrArray::Index& index, + const HloInstruction* reduce, const IrArray::Index& index, tensorflow::gtl::ArraySlice< std::pair> extra_output_gens) { @@ -819,8 +819,7 @@ Status IrEmitterUnnested::EmitReductionToScalar( // // and threads_per_block is a multiple of warpSize. // reduce_kernel<<>>(); // - auto loop_body_emitter = - [=](const llvm_ir::IrArray::Index& tile_index) -> Status { + auto loop_body_emitter = [=](const IrArray::Index& tile_index) -> Status { const int num_reduces = reducers.size(); llvm::Type* element_ir_type = llvm_ir::PrimitiveTypeToIrType(input_shape.element_type(), module_); @@ -829,9 +828,8 @@ Status IrEmitterUnnested::EmitReductionToScalar( llvm::Value* partial_reduction_result_address = ir_builder_.CreateAlloca( element_ir_type, /*ArraySize=*/nullptr, ""partial_reduction_result."" + llvm::Twine(i)); - TF_ASSIGN_OR_RETURN( - llvm::Value* const init_ir_value, - init_value_gens[i](llvm_ir::IrArray::Index(index_ty))); + TF_ASSIGN_OR_RETURN(llvm::Value* const init_ir_value, + init_value_gens[i](IrArray::Index(index_ty))); ir_builder_.CreateStore(init_ir_value, partial_reduction_result_address); partial_reduction_result_addresses.push_back( partial_reduction_result_address); @@ -866,7 +864,7 @@ Status IrEmitterUnnested::EmitReductionToScalar( llvm_ir::SetToFirstInsertPoint(if_data.true_block, &ir_builder_); } - llvm_ir::IrArray::Index input_index( + IrArray::Index input_index( /*linear=*/x, input_shape, &ir_builder_); llvm::Value* input_address = ir_builder_.CreateAlloca(element_ir_type); for (int i = 0; i != num_reduces; ++i) { @@ -951,7 +949,7 @@ Status IrEmitterUnnested::EmitReductionToScalar( llvm::Value* output_address = GetIrArray(*output, *output, reduce_output_shapes[i]) .EmitArrayElementAddress( - llvm_ir::IrArray::Index( + IrArray::Index( /*linear=*/ir_builder_.getInt64(0), ShapeUtil::GetSubshape(output->shape(), reduce_output_shapes[i]), @@ -1037,8 +1035,7 @@ Status IrEmitterUnnested::EmitColumnReduction( // } // AtomicReducer(&output[x], partial_result); // } - auto loop_body_emitter = - [=](const llvm_ir::IrArray::Index& tile_index) -> Status { + auto loop_body_emitter = [=](const IrArray::Index& tile_index) -> Status { const int num_reduces = reducers.size(); // Emit the loop body that reduces one tile. llvm::Type* element_ir_type = @@ -1048,9 +1045,8 @@ Status IrEmitterUnnested::EmitColumnReduction( llvm::Value* partial_reduction_result_address = ir_builder_.CreateAlloca( element_ir_type, /*ArraySize=*/nullptr, ""partial_reduction_result."" + llvm::Twine(i)); - TF_ASSIGN_OR_RETURN( - llvm::Value* const init_ir_value, - init_value_gens[i](llvm_ir::IrArray::Index(index_ty))); + TF_ASSIGN_OR_RETURN(llvm::Value* const init_ir_value, + init_value_gens[i](IrArray::Index(index_ty))); ir_builder_.CreateStore(init_ir_value, partial_reduction_result_address); partial_reduction_result_addresses.push_back( partial_reduction_result_address); @@ -1106,9 +1102,9 @@ Status IrEmitterUnnested::EmitColumnReduction( const Shape input_matrix_shape = ShapeUtil::MakeShapeWithDescendingLayout(input_shape.element_type(), {height, width}); - const llvm_ir::IrArray::Index input_matrix_index( - {y, x}, input_matrix_shape, &ir_builder_); - const llvm_ir::IrArray::Index input_index = + const IrArray::Index input_matrix_index({y, x}, input_matrix_shape, + &ir_builder_); + const IrArray::Index input_index = input_matrix_index .SourceIndexOfReshape(input_matrix_shape, normalized_input_shape, &ir_builder_) @@ -1159,11 +1155,10 @@ Status IrEmitterUnnested::EmitColumnReduction( llvm::Value* output_address = GetIrArray(*output, *output, reduce_output_shapes[i]) .EmitArrayElementAddress( - llvm_ir::IrArray::Index( - x, - ShapeUtil::GetSubshape(output->shape(), - reduce_output_shapes[i]), - &ir_builder_), + IrArray::Index(x, + ShapeUtil::GetSubshape( + output->shape(), reduce_output_shapes[i]), + &ir_builder_), &ir_builder_, ""output_element_address""); TF_RETURN_IF_ERROR(EmitAtomicOperationForNestedComputation( *reducers[i], output_address, partial_reduction_result_addresses[i])); @@ -1335,7 +1330,7 @@ Status IrEmitterUnnested::EmitRowReduction( return llvm::ConstantInt::get(index_ty, c); }; - auto loop_body_emitter = [=](const llvm_ir::IrArray::Index& tile_index) { + auto loop_body_emitter = [=](const IrArray::Index& tile_index) { const int num_reduces = reducers.size(); llvm::Type* element_ir_type = llvm_ir::PrimitiveTypeToIrType( input_shape.element_type(), ir_emitter_context_->llvm_module()); @@ -1344,9 +1339,8 @@ Status IrEmitterUnnested::EmitRowReduction( llvm::Value* partial_reduction_result_address = ir_builder_.CreateAlloca( element_ir_type, /*ArraySize=*/nullptr, ""partial_reduction_result."" + llvm::Twine(i)); - TF_ASSIGN_OR_RETURN( - llvm::Value* const init_ir_value, - init_value_gens[i](llvm_ir::IrArray::Index(index_ty))); + TF_ASSIGN_OR_RETURN(llvm::Value* const init_ir_value, + init_value_gens[i](IrArray::Index(index_ty))); ir_builder_.CreateStore(init_ir_value, partial_reduction_result_address); partial_reduction_result_addresses.push_back( partial_reduction_result_address); @@ -1435,9 +1429,9 @@ Status IrEmitterUnnested::EmitRowReduction( const Shape input_3d_tensor_shape = ShapeUtil::MakeShapeWithDescendingLayout( input_shape.element_type(), {depth, height, width}); - const llvm_ir::IrArray::Index input_3d_tensor_index( + const IrArray::Index input_3d_tensor_index( {z, y, x}, input_3d_tensor_shape, &ir_builder_); - const llvm_ir::IrArray::Index input_index = + const IrArray::Index input_index = input_3d_tensor_index .SourceIndexOfReshape(input_3d_tensor_shape, normalized_input_shape, @@ -1532,11 +1526,10 @@ Status IrEmitterUnnested::EmitRowReduction( llvm::Value* output_address = GetIrArray(*output, *output, reduce_output_shapes[i]) .EmitArrayElementAddress( - llvm_ir::IrArray::Index( - y, - ShapeUtil::GetSubshape(output->shape(), - reduce_output_shapes[i]), - &ir_builder_), + IrArray::Index(y, + ShapeUtil::GetSubshape( + output->shape(), reduce_output_shapes[i]), + &ir_builder_), &ir_builder_, ""output_element_address""); // We don't need to emit atomic operations if there is only one tile of // results. 'depth' is the z dimension, 'width' is the x dimension. @@ -1686,11 +1679,11 @@ Status IrEmitterUnnested::HandleReduce(HloInstruction* reduce) { MakeUnique(std::move(thunks), reduce)); return EmitReductionToVector( - reduce, input->shape(), {[&](const llvm_ir::IrArray::Index& index) { + reduce, input->shape(), {[&](const IrArray::Index& index) { return GetIrArray(*input, *reduce) .EmitReadArrayElement(index, &ir_builder_); }}, - {[&](const llvm_ir::IrArray::Index& index) { + {[&](const IrArray::Index& index) { return GetIrArray(*init_value, *reduce) .EmitReadArrayElement(index, &ir_builder_); }}, @@ -1791,8 +1784,7 @@ Status IrEmitterUnnested::HandleSelectAndScatter( // selected_index = I // initialized_flag = true // output(selected_index) = scatter(output(selected_index), source(S)) - auto loop_body_emitter = - [=](const llvm_ir::IrArray::Index& source_index) -> Status { + auto loop_body_emitter = [=](const IrArray::Index& source_index) -> Status { // Allocate space to keep the currently selected value, its index, and a // boolean flag if the value is initialized. The initialized_flag is set // false. @@ -1817,7 +1809,7 @@ Status IrEmitterUnnested::HandleSelectAndScatter( window_size.push_back(dim.size()); CHECK_GT(dim.size(), 0); } - const llvm_ir::IrArray::Index window_index = window_loops.AddLoopsForShape( + const IrArray::Index window_index = window_loops.AddLoopsForShape( ShapeUtil::MakeShape(operand_element_type, window_size), ""window""); llvm_ir::SetToFirstInsertPoint(window_loops.GetInnerLoopBodyBasicBlock(), &ir_builder_); @@ -1825,7 +1817,7 @@ Status IrEmitterUnnested::HandleSelectAndScatter( // Compute the operand index to visit and evaluate the condition whether the // operand index is within the bounds. The unsigned comparison includes // checking whether the operand index >= 0. - llvm_ir::IrArray::Index operand_index(index_type, source_index.size()); + IrArray::Index operand_index(index_type, source_index.size()); llvm::Value* in_bounds_condition = ir_builder_.getInt1(true); for (int64 i = 0; i < rank; ++i) { llvm::Value* strided_index = ir_builder_.CreateNSWMul( @@ -1853,8 +1845,7 @@ Status IrEmitterUnnested::HandleSelectAndScatter( // If the initialized_flag is false, initialize the selected value and index // with the currently visiting operand. llvm_ir::SetToFirstInsertPoint(if_initialized.false_block, &ir_builder_); - const auto save_operand_index = [&]( - const llvm_ir::IrArray::Index& operand_index) { + const auto save_operand_index = [&](const IrArray::Index& operand_index) { for (int64 i = 0; i < rank; ++i) { llvm::Value* selected_index_address_slot = ir_builder_.CreateInBoundsGEP(selected_index_address, @@ -1862,7 +1853,7 @@ Status IrEmitterUnnested::HandleSelectAndScatter( ir_builder_.CreateStore(operand_index[i], selected_index_address_slot); } }; - llvm_ir::IrArray operand_array = GetIrArray(*operand, *select_and_scatter); + IrArray operand_array = GetIrArray(*operand, *select_and_scatter); llvm::Value* operand_data = operand_array.EmitReadArrayElement(operand_index, &ir_builder_); ir_builder_.CreateStore(operand_data, selected_value_address); @@ -1907,7 +1898,7 @@ Status IrEmitterUnnested::HandleSelectAndScatter( // value and the current output value. llvm_ir::SetToFirstInsertPoint(window_loops.GetOuterLoopExitBasicBlock(), &ir_builder_); - llvm_ir::IrArray::Index selected_index(operand_index.GetType()); + IrArray::Index selected_index(operand_index.GetType()); for (int64 i = 0; i < rank; ++i) { llvm::Value* selected_index_address_slot = ir_builder_.CreateInBoundsGEP( selected_index_address, {ir_builder_.getInt32(i)}); @@ -2492,7 +2483,7 @@ StatusOr> IrEmitterUnnested::BuildInitializerThunk( TF_RETURN_IF_ERROR(HandleConstant(const_cast(init_value))); } TF_RETURN_IF_ERROR(ParallelLoopEmitter( - [=](const llvm_ir::IrArray::Index& index) { + [=](const IrArray::Index& index) { return GetIrArray(*init_value, *hlo) .EmitReadArrayElement(index, &ir_builder_); }, @@ -2688,7 +2679,7 @@ Status IrEmitterUnnested::EmitTargetElementLoopInThunk( } // For multioutput fusion, we need to emit each operand and the root. - std::vector output_arrays; + std::vector output_arrays; for (int64 i = 0; i < ShapeUtil::TupleElementCount(hlo.shape()); ++i) { output_arrays.push_back(GetIrArray(hlo, hlo, {i})); } @@ -2718,7 +2709,7 @@ Status IrEmitterUnnested::EmitTargetElementLoop( } int IrEmitterUnnested::ConstructIrArrayForOutputs( - const HloInstruction& hlo, std::vector* output_arrays) { + const HloInstruction& hlo, std::vector* output_arrays) { int64 num_outputs = 1; if (hlo.IsMultiOutputFusion()) { num_outputs = ShapeUtil::TupleElementCount(hlo.shape()); @@ -2733,7 +2724,7 @@ int IrEmitterUnnested::ConstructIrArrayForOutputs( } int IrEmitterUnnested::ConstructIrArrayForInputs( - const HloInstruction& hlo, std::vector* param_arrays) { + const HloInstruction& hlo, std::vector* param_arrays) { int64 num_params = hlo.operands().size(); param_arrays->reserve(num_params); for (const HloInstruction* param : hlo.operands()) { @@ -2743,11 +2734,10 @@ int IrEmitterUnnested::ConstructIrArrayForInputs( } int IrEmitterUnnested::ConstructOutputReducedShapeAndCastOutputIrArrayToShape( - const HloInstruction& hlo, - const std::vector& output_arrays, + const HloInstruction& hlo, const std::vector& output_arrays, tensorflow::gtl::ArraySlice reduced_output_dims, std::vector* output_reduced_shapes, - std::vector* output_in_reduced_shape_arrays) { + std::vector* output_in_reduced_shape_arrays) { int64 num_outputs = 1; if (hlo.IsMultiOutputFusion()) { num_outputs = ShapeUtil::TupleElementCount(hlo.shape()); @@ -2770,19 +2760,18 @@ int IrEmitterUnnested::ConstructOutputReducedShapeAndCastOutputIrArrayToShape( } int IrEmitterUnnested::ConstructInputReducedShapeAndCastInputIrArrayToShape( - const HloInstruction& hlo, - const std::vector& param_arrays, + const HloInstruction& hlo, const std::vector& param_arrays, const std::vector& param_buffers, tensorflow::gtl::ArraySlice reduced_output_dims, std::vector* param_reduced_shapes, - std::vector* param_in_reduced_shape_arrays) { + std::vector* param_in_reduced_shape_arrays) { int64 num_params = hlo.operands().size(); param_in_reduced_shape_arrays->reserve(num_params); param_reduced_shapes->reserve(num_params); for (int64 id = 0; id < num_params; ++id) { if (param_buffers[id] == nullptr) { param_reduced_shapes->push_back(Shape()); - param_in_reduced_shape_arrays->push_back(llvm_ir::IrArray()); + param_in_reduced_shape_arrays->push_back(IrArray()); continue; } const HloInstruction* param = hlo.operand(id); @@ -2835,11 +2824,11 @@ llvm::Value* GetBlockIdx(llvm::IRBuilder<>* builder, llvm::Type* index_ty, // processed element is within the boundary defined by `tile_width` and // `tile_height`. void EmitTiledElementalCodeWithBoundsCheck( - int64 tile_size, int64 num_rows, const llvm_ir::IrArray::Index& index, + int64 tile_size, int64 num_rows, const IrArray::Index& index, const string& loop_name, KernelSupportLibrary* ksl, llvm::IRBuilder<>* builder, llvm::Value* y, llvm::Value* x, llvm::Value* tile_width, llvm::Value* tile_height, - const std::function& + const std::function& emit_elem_function) { llvm::Type* index_ty = tile_width->getType(); // Emits a constant value with index type. @@ -2847,8 +2836,7 @@ void EmitTiledElementalCodeWithBoundsCheck( return llvm::ConstantInt::get(index_ty, c); }; // Adds `addend` to the given `dim` of `index`. - auto offset_dim = [&](llvm_ir::IrArray::Index index, llvm::Value* addend, - int64 dim) { + auto offset_dim = [&](IrArray::Index index, llvm::Value* addend, int64 dim) { index[dim] = builder->CreateAdd(index[dim], addend); return index; }; @@ -3037,8 +3025,8 @@ LaunchDimensions IrEmitterUnnested::EmitHlo021Tile( auto emit_tiled_elemental_code_with_bounds_check = [&](const IrArray::Index& index, const string& loop_name, llvm::Value* tile_width, llvm::Value* tile_height, - const std::function& emit_elem_function) { + const std::function& + emit_elem_function) { EmitTiledElementalCodeWithBoundsCheck( kTileSize, kNumRows, index, loop_name, &ksl, &ir_builder_, y, x, tile_width, tile_height, emit_elem_function); ",0,train 260cf9c59a104a252ffe4a299666a9efe437789a,tensorflow/tensorflow,"[lite] Add new NoValue op to tflite which is used to represent none values, which is used as optional tensors when the value is not present. PiperOrigin-RevId: 425971937 Change-Id: I62bcffa6514cb08927e87bf013589328fdf49237",flatbuffer_export.cc,"@@ -201,7 +201,7 @@ static StatusOr GetTFLiteType(Type type, static bool IsConst(Operation* op) { return isa(op); + tfl::SparseQConstOp, mlir::TFL::NoValueOp>(op); } static bool IsTFResourceOp(Operation* op) { ",0,test 260cf9c59a104a252ffe4a299666a9efe437789a,tensorflow/tensorflow,"[lite] Add new NoValue op to tflite which is used to represent none values, which is used as optional tensors when the value is not present. PiperOrigin-RevId: 425971937 Change-Id: I62bcffa6514cb08927e87bf013589328fdf49237",flatbuffer_import.cc,"@@ -853,8 +853,8 @@ StatusOr ConvertOp( // with `none` value, llvm::SmallVector none_operands( input_max_num - op_input_num, - builder.create(loc, builder.getNoneType(), - builder.getUnitAttr())); + builder.create(loc, builder.getNoneType(), + builder.getUnitAttr())); op_state.addOperands(ArrayRef(none_operands)); } @@ -1305,8 +1305,8 @@ StatusOr ConvertSubgraph( if (maybe_optional_arg_marker == nullptr) { maybe_optional_arg_marker = op_builder - .create(base_loc, builder.getNoneType(), - builder.getUnitAttr()) + .create(base_loc, builder.getNoneType(), + builder.getUnitAttr()) .getResult(); } } else if (!vals_map.at(input_num)) { ",0,test 260cf9c59a104a252ffe4a299666a9efe437789a,tensorflow/tensorflow,"[lite] Add new NoValue op to tflite which is used to represent none values, which is used as optional tensors when the value is not present. PiperOrigin-RevId: 425971937 Change-Id: I62bcffa6514cb08927e87bf013589328fdf49237",tfl_ops.cc,"@@ -227,8 +227,9 @@ struct RemoveOptionalZeroBias : public OpRewritePattern { LogicalResult matchAndRewrite(ConcreteOpType op, PatternRewriter &rewriter) const override { if (EqualsZero(op.bias())) { - auto none_value = rewriter.create( - rewriter.getUnknownLoc(), rewriter.getUnitAttr()); + auto none_value = rewriter.create( + rewriter.getUnknownLoc(), rewriter.getNoneType(), + rewriter.getUnitAttr()); op.biasMutable().assign(none_value); } @@ -2431,14 +2432,16 @@ struct RemoveLSTMOpZeroBias : public OpRewritePattern { LogicalResult matchAndRewrite(LSTMOp op, PatternRewriter &rewriter) const override { if (EqualsZero(op.input_gate_bias())) { - auto none_value = rewriter.create( - rewriter.getUnknownLoc(), rewriter.getUnitAttr()); + auto none_value = rewriter.create( + rewriter.getUnknownLoc(), rewriter.getNoneType(), + rewriter.getUnitAttr()); op.input_gate_biasMutable().assign(none_value); } if (EqualsZero(op.projection_bias())) { - auto none_value = rewriter.create( - rewriter.getUnknownLoc(), rewriter.getUnitAttr()); + auto none_value = rewriter.create( + rewriter.getUnknownLoc(), rewriter.getNoneType(), + rewriter.getUnitAttr()); op.projection_biasMutable().assign(none_value); } @@ -2778,9 +2781,10 @@ struct FoldPseudoConstOp : public OpRewritePattern { rewriter.replaceOpWithNewOp(const_op, const_op.value()); return success(); - } else if (ConstantOp::isBuildableWith(const_op.value(), - const_op.getType())) { - rewriter.replaceOpWithNewOp(const_op, const_op.value()); + } else if (TFL::NoValueOp::isBuildableWith(const_op.value(), + const_op.getType())) { + rewriter.replaceOpWithNewOp(const_op, rewriter.getNoneType(), + const_op.value().cast()); return success(); } return failure(); @@ -3685,6 +3689,18 @@ OpFoldResult PadV2Op::fold(ArrayRef operands) { return {}; } +//===----------------------------------------------------------------------===// +// NoValueOp +//===----------------------------------------------------------------------===// + +OpFoldResult NoValueOp::fold(ArrayRef operands) { + return valueAttr(); +} + +bool NoValueOp::isBuildableWith(Attribute value, Type type) { + return value.isa() && type.isa(); +} + //===----------------------------------------------------------------------===// // TableGen'd op method definitions //===----------------------------------------------------------------------===// @@ -3712,8 +3728,8 @@ Operation *TensorFlowLiteDialect::materializeConstant(OpBuilder &builder, return builder.create(loc, type, value.cast()); if (arith::ConstantOp::isBuildableWith(value, type)) return builder.create(loc, type, value); - if (ConstantOp::isBuildableWith(value, type)) - return builder.create(loc, type, value); + if (NoValueOp::isBuildableWith(value, type)) + return builder.create(loc, type, value.cast()); return nullptr; } ",0,test 260cf9c59a104a252ffe4a299666a9efe437789a,tensorflow/tensorflow,"[lite] Add new NoValue op to tflite which is used to represent none values, which is used as optional tensors when the value is not present. PiperOrigin-RevId: 425971937 Change-Id: I62bcffa6514cb08927e87bf013589328fdf49237",tfl_to_std.cc,"@@ -17,6 +17,7 @@ limitations under the License. #include ""llvm/Support/Casting.h"" #include ""mlir/Dialect/Quant/QuantOps.h"" // from @llvm-project #include ""mlir/Dialect/StandardOps/IR/Ops.h"" // from @llvm-project +#include ""mlir/IR/BuiltinAttributes.h"" // from @llvm-project #include ""tensorflow/compiler/mlir/lite/ir/tfl_ops.h"" #include ""tensorflow/compiler/mlir/lite/quantization/quantization_utils.h"" @@ -44,8 +45,8 @@ void ConvertTFLQuantOpsToMlirQuantOps(FuncOp func) { auto c = b.create(q.getLoc(), q.value()); q.output().replaceAllUsesWith(c); q.erase(); - } else if (ConstantOp::isBuildableWith(value, type)) { - auto c = b.create(q.getLoc(), q.value()); + } else if (TFL::NoValueOp::isBuildableWith(value, type)) { + auto c = b.create(q.getLoc(), type, mlir::UnitAttr()); q.output().replaceAllUsesWith(c); q.erase(); } ",0,test 260cf9c59a104a252ffe4a299666a9efe437789a,tensorflow/tensorflow,"[lite] Add new NoValue op to tflite which is used to represent none values, which is used as optional tensors when the value is not present. PiperOrigin-RevId: 425971937 Change-Id: I62bcffa6514cb08927e87bf013589328fdf49237",legalize_tf.cc,"@@ -31,6 +31,7 @@ limitations under the License. #include ""llvm/ADT/Hashing.h"" #include ""llvm/ADT/StringSwitch.h"" #include ""llvm/Support/Threading.h"" +#include ""llvm/Support/raw_ostream.h"" #include ""mlir/Dialect/Quant/FakeQuantSupport.h"" // from @llvm-project #include ""mlir/Dialect/Quant/QuantOps.h"" // from @llvm-project #include ""mlir/Dialect/Quant/UniformSupport.h"" // from @llvm-project @@ -264,7 +265,7 @@ LogicalResult ConvertTFMatMulOp::matchAndRewrite( } Type output_type = tf_matmul_op.getResult().getType(); - auto no_input = rewriter.create( + auto no_input = rewriter.create( op->getLoc(), rewriter.getNoneType(), rewriter.getUnitAttr()); auto fc_op = rewriter.create( op->getLoc(), ArrayRef{output_type}, @@ -359,7 +360,7 @@ LogicalResult ConvertTFConv3DOp::matchAndRewrite( // TensorFlow Conv3D has no bias, optimization patterns will fuse Conv3D // with other ops can fill the bias. - Value none = rewriter.create( + Value none = rewriter.create( op->getLoc(), rewriter.getNoneType(), rewriter.getUnitAttr()); rewriter.replaceOpWithNewOp( @@ -399,7 +400,7 @@ LogicalResult ConvertTFConv3DBackpropInputV2Op::matchAndRewrite( // TensorFlow Conv3D has no bias, optimization patterns will fuse Conv3D // with other ops can fill the bias. - Value none = rewriter.create( + Value none = rewriter.create( op->getLoc(), rewriter.getNoneType(), rewriter.getUnitAttr()); Value output_shape = @@ -518,7 +519,7 @@ struct LegalizeUnidirectionalSequenceLstm : public RewritePattern { } // Optional input placeholder. - Value none = rewriter.create( + Value none = rewriter.create( op->getLoc(), rewriter.getNoneType(), rewriter.getUnitAttr()); // Populate inputs. @@ -943,6 +944,7 @@ void LegalizeTF::runOnOperation() { // graph. target.addLegalOp(); target.addLegalOp(); + target.addLegalOp(); target.addLegalOp(); target.addLegalOp(); target.addLegalOp(); ",0,test 260cf9c59a104a252ffe4a299666a9efe437789a,tensorflow/tensorflow,"[lite] Add new NoValue op to tflite which is used to represent none values, which is used as optional tensors when the value is not present. PiperOrigin-RevId: 425971937 Change-Id: I62bcffa6514cb08927e87bf013589328fdf49237",lstm_utils.cc,"@@ -84,8 +84,8 @@ Value CreateI32DenseConst(OpBuilder* builder, ArrayRef values, } Value CreateNoneValue(OpBuilder* builder, mlir::Location location) { - return builder->create(location, builder->getNoneType(), - builder->getUnitAttr()); + return builder->create(location, builder->getNoneType(), + builder->getUnitAttr()); } Value Transpose(OpBuilder* builder, Value value_to_transpose, @@ -719,8 +719,7 @@ LogicalResult ConvertKerasLSTMLayer(mlir::FuncOp func_op, OpBuilder* builder) { output_shape, final_inputs.getType().cast().getElementType()); - Value none = builder->create( - func_op.getLoc(), builder->getNoneType(), builder->getUnitAttr()); + Value none = CreateNoneValue(builder, func_op.getLoc()); auto lstm = builder->create( func_op.getLoc(), result_type, /*input=*/final_inputs, /*input_to_input_weights=*/weights_array->getResult(0), ",0,test 97757f34d2124c2341111344bef469501e789c04,tensorflow/tensorflow,"make get_config from config public for feature column PiperOrigin-RevId: 272227486",feature_column_v2.py,"@@ -2265,8 +2265,7 @@ class FeatureColumn(object): """""" pass - @abc.abstractmethod - def _get_config(self): + def get_config(self): """"""Returns the config of the feature column. A FeatureColumn config is a Python dictionary (serializable) containing the @@ -2283,7 +2282,7 @@ class FeatureColumn(object): 'SerializationExampleFeatureColumn', ('dimension', 'parent', 'dtype', 'normalizer_fn'))): - def _get_config(self): + def get_config(self): # Create a dict from the namedtuple. # Python attribute literals can be directly copied from / to the config. # For example 'dimension', assuming it is an integer literal. @@ -2304,8 +2303,8 @@ class FeatureColumn(object): return config @classmethod - def _from_config(cls, config, custom_objects=None, columns_by_name=None): - # This should do the inverse transform from `_get_config` and construct + def from_config(cls, config, custom_objects=None, columns_by_name=None): + # This should do the inverse transform from `get_config` and construct # the namedtuple. kwargs = config.copy() kwargs['parent'] = deserialize_feature_column( @@ -2320,21 +2319,24 @@ class FeatureColumn(object): A serializable Dict that can be used to deserialize the object with from_config. """""" - pass + return self._get_config() + + def _get_config(self): + raise NotImplementedError('Must be implemented in subclasses.') @classmethod - def _from_config(cls, config, custom_objects=None, columns_by_name=None): + def from_config(cls, config, custom_objects=None, columns_by_name=None): """"""Creates a FeatureColumn from its config. - This method should be the reverse of `_get_config`, capable of instantiating - the same FeatureColumn from the config dictionary. See `_get_config` for an + This method should be the reverse of `get_config`, capable of instantiating + the same FeatureColumn from the config dictionary. See `get_config` for an example of common (de)serialization practices followed in this file. TODO(b/118939620): This is a private method until consensus is reached on supporting object deserialization deduping within Keras. Args: - config: A Dict config acquired with `_get_config`. + config: A Dict config acquired with `get_config`. custom_objects: Optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization. columns_by_name: A Dict[String, FeatureColumn] of existing columns in @@ -2344,7 +2346,11 @@ class FeatureColumn(object): Returns: A FeatureColumn for the input config. """""" - pass + return cls._from_config(config, custom_objects, columns_by_name) + + @classmethod + def _from_config(cls, config, custom_objects=None, columns_by_name=None): + raise NotImplementedError('Must be implemented in subclasses.') class DenseColumn(FeatureColumn): @@ -2857,7 +2863,7 @@ class NumericColumn( """"""See 'FeatureColumn` base class."""""" return [self.key] - def _get_config(self): + def get_config(self): """"""See 'FeatureColumn` base class."""""" config = dict(zip(self._fields, self)) config['normalizer_fn'] = generic_utils.serialize_keras_object( @@ -2866,7 +2872,7 @@ class NumericColumn( return config @classmethod - def _from_config(cls, config, custom_objects=None, columns_by_name=None): + def from_config(cls, config, custom_objects=None, columns_by_name=None): """"""See 'FeatureColumn` base class."""""" _check_config_keys(config, cls._fields) kwargs = _standardize_and_copy_config(config) @@ -3014,7 +3020,7 @@ class BucketizedColumn( """"""See 'FeatureColumn` base class."""""" return [self.source_column] - def _get_config(self): + def get_config(self): """"""See 'FeatureColumn` base class."""""" from tensorflow.python.feature_column.serialization import serialize_feature_column # pylint: disable=g-import-not-at-top config = dict(zip(self._fields, self)) @@ -3022,7 +3028,7 @@ class BucketizedColumn( return config @classmethod - def _from_config(cls, config, custom_objects=None, columns_by_name=None): + def from_config(cls, config, custom_objects=None, columns_by_name=None): """"""See 'FeatureColumn` base class."""""" from tensorflow.python.feature_column.serialization import deserialize_feature_column # pylint: disable=g-import-not-at-top _check_config_keys(config, cls._fields) @@ -3247,7 +3253,7 @@ class EmbeddingColumn( """"""See 'FeatureColumn` base class."""""" return [self.categorical_column] - def _get_config(self): + def get_config(self): """"""See 'FeatureColumn` base class."""""" from tensorflow.python.feature_column.serialization import serialize_feature_column # pylint: disable=g-import-not-at-top config = dict(zip(self._fields, self)) @@ -3257,7 +3263,7 @@ class EmbeddingColumn( return config @classmethod - def _from_config(cls, config, custom_objects=None, columns_by_name=None): + def from_config(cls, config, custom_objects=None, columns_by_name=None): """"""See 'FeatureColumn` base class."""""" from tensorflow.python.feature_column.serialization import deserialize_feature_column # pylint: disable=g-import-not-at-top _check_config_keys(config, cls._fields) @@ -3440,15 +3446,6 @@ class SharedEmbeddingColumn( """"""See 'FeatureColumn` base class."""""" return [self.categorical_column] - def _get_config(self): - """"""See 'FeatureColumn` base class."""""" - raise NotImplementedError() - - @classmethod - def _from_config(cls, config, custom_objects=None, columns_by_name=None): - """"""See 'FeatureColumn` base class."""""" - raise NotImplementedError() - def _check_shape(shape, key): """"""Returns shape if it's valid, raises error otherwise."""""" @@ -3559,14 +3556,14 @@ class HashedCategoricalColumn( """"""See 'FeatureColumn` base class."""""" return [self.key] - def _get_config(self): + def get_config(self): """"""See 'FeatureColumn` base class."""""" config = dict(zip(self._fields, self)) config['dtype'] = self.dtype.name return config @classmethod - def _from_config(cls, config, custom_objects=None, columns_by_name=None): + def from_config(cls, config, custom_objects=None, columns_by_name=None): """"""See 'FeatureColumn` base class."""""" _check_config_keys(config, cls._fields) kwargs = _standardize_and_copy_config(config) @@ -3673,14 +3670,14 @@ class VocabularyFileCategoricalColumn( """"""See 'FeatureColumn` base class."""""" return [self.key] - def _get_config(self): + def get_config(self): """"""See 'FeatureColumn` base class."""""" config = dict(zip(self._fields, self)) config['dtype'] = self.dtype.name return config @classmethod - def _from_config(cls, config, custom_objects=None, columns_by_name=None): + def from_config(cls, config, custom_objects=None, columns_by_name=None): """"""See 'FeatureColumn` base class."""""" _check_config_keys(config, cls._fields) kwargs = _standardize_and_copy_config(config) @@ -3787,14 +3784,14 @@ class VocabularyListCategoricalColumn( """"""See 'FeatureColumn` base class."""""" return [self.key] - def _get_config(self): + def get_config(self): """"""See 'FeatureColumn` base class."""""" config = dict(zip(self._fields, self)) config['dtype'] = self.dtype.name return config @classmethod - def _from_config(cls, config, custom_objects=None, columns_by_name=None): + def from_config(cls, config, custom_objects=None, columns_by_name=None): """"""See 'FeatureColumn` base class."""""" _check_config_keys(config, cls._fields) kwargs = _standardize_and_copy_config(config) @@ -3899,12 +3896,12 @@ class IdentityCategoricalColumn( """"""See 'FeatureColumn` base class."""""" return [self.key] - def _get_config(self): + def get_config(self): """"""See 'FeatureColumn` base class."""""" return dict(zip(self._fields, self)) @classmethod - def _from_config(cls, config, custom_objects=None, columns_by_name=None): + def from_config(cls, config, custom_objects=None, columns_by_name=None): """"""See 'FeatureColumn` base class."""""" _check_config_keys(config, cls._fields) kwargs = _standardize_and_copy_config(config) @@ -4013,7 +4010,7 @@ class WeightedCategoricalColumn( """"""See 'FeatureColumn` base class."""""" return [self.categorical_column, self.weight_feature_key] - def _get_config(self): + def get_config(self): """"""See 'FeatureColumn` base class."""""" from tensorflow.python.feature_column.serialization import serialize_feature_column # pylint: disable=g-import-not-at-top config = dict(zip(self._fields, self)) @@ -4023,7 +4020,7 @@ class WeightedCategoricalColumn( return config @classmethod - def _from_config(cls, config, custom_objects=None, columns_by_name=None): + def from_config(cls, config, custom_objects=None, columns_by_name=None): """"""See 'FeatureColumn` base class."""""" from tensorflow.python.feature_column.serialization import deserialize_feature_column # pylint: disable=g-import-not-at-top _check_config_keys(config, cls._fields) @@ -4157,7 +4154,7 @@ class CrossedColumn( """"""See 'FeatureColumn` base class."""""" return list(self.keys) - def _get_config(self): + def get_config(self): """"""See 'FeatureColumn` base class."""""" from tensorflow.python.feature_column.serialization import serialize_feature_column # pylint: disable=g-import-not-at-top config = dict(zip(self._fields, self)) @@ -4165,7 +4162,7 @@ class CrossedColumn( return config @classmethod - def _from_config(cls, config, custom_objects=None, columns_by_name=None): + def from_config(cls, config, custom_objects=None, columns_by_name=None): """"""See 'FeatureColumn` base class."""""" from tensorflow.python.feature_column.serialization import deserialize_feature_column # pylint: disable=g-import-not-at-top _check_config_keys(config, cls._fields) @@ -4427,7 +4424,7 @@ class IndicatorColumn( """"""See 'FeatureColumn` base class."""""" return [self.categorical_column] - def _get_config(self): + def get_config(self): """"""See 'FeatureColumn` base class."""""" from tensorflow.python.feature_column.serialization import serialize_feature_column # pylint: disable=g-import-not-at-top config = dict(zip(self._fields, self)) @@ -4436,7 +4433,7 @@ class IndicatorColumn( return config @classmethod - def _from_config(cls, config, custom_objects=None, columns_by_name=None): + def from_config(cls, config, custom_objects=None, columns_by_name=None): """"""See 'FeatureColumn` base class."""""" from tensorflow.python.feature_column.serialization import deserialize_feature_column # pylint: disable=g-import-not-at-top _check_config_keys(config, cls._fields) @@ -4573,7 +4570,7 @@ class SequenceCategoricalColumn( """"""See 'FeatureColumn` base class."""""" return [self.categorical_column] - def _get_config(self): + def get_config(self): """"""See 'FeatureColumn` base class."""""" from tensorflow.python.feature_column.serialization import serialize_feature_column # pylint: disable=g-import-not-at-top config = dict(zip(self._fields, self)) @@ -4582,7 +4579,7 @@ class SequenceCategoricalColumn( return config @classmethod - def _from_config(cls, config, custom_objects=None, columns_by_name=None): + def from_config(cls, config, custom_objects=None, columns_by_name=None): """"""See 'FeatureColumn` base class."""""" from tensorflow.python.feature_column.serialization import deserialize_feature_column # pylint: disable=g-import-not-at-top _check_config_keys(config, cls._fields) ",0,train 97757f34d2124c2341111344bef469501e789c04,tensorflow/tensorflow,"make get_config from config public for feature column PiperOrigin-RevId: 272227486",feature_column_v2_test.py,"@@ -81,10 +81,10 @@ class BaseFeatureColumnForTests(fc.FeatureColumn): raise ValueError('Should not use this method.') @classmethod - def _from_config(cls, config, custom_objects=None, columns_by_name=None): + def from_config(cls, config, custom_objects=None, columns_by_name=None): raise ValueError('Should not use this method.') - def _get_config(self): + def get_config(self): raise ValueError('Should not use this method.') @@ -478,7 +478,7 @@ class NumericColumnTest(test.TestCase): price = fc.numeric_column('price', normalizer_fn=_increment_two) self.assertEqual(['price'], price.parents) - config = price._get_config() + config = price.get_config() self.assertEqual({ 'key': 'price', 'shape': (1,), @@ -487,7 +487,7 @@ class NumericColumnTest(test.TestCase): 'normalizer_fn': '_increment_two' }, config) - new_col = fc.NumericColumn._from_config( + new_col = fc.NumericColumn.from_config( config, custom_objects={'_increment_two': _increment_two}) self.assertEqual(price, new_col) self.assertEqual(new_col.shape, (1,)) @@ -833,7 +833,7 @@ class BucketizedColumnTest(test.TestCase): bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) self.assertEqual([price], bucketized_price.parents) - config = bucketized_price._get_config() + config = bucketized_price.get_config() self.assertEqual({ 'source_column': { 'class_name': 'NumericColumn', @@ -848,11 +848,11 @@ class BucketizedColumnTest(test.TestCase): 'boundaries': (0, 2, 4, 6) }, config) - new_bucketized_price = fc.BucketizedColumn._from_config(config) + new_bucketized_price = fc.BucketizedColumn.from_config(config) self.assertEqual(bucketized_price, new_bucketized_price) self.assertIsNot(price, new_bucketized_price.source_column) - new_bucketized_price = fc.BucketizedColumn._from_config( + new_bucketized_price = fc.BucketizedColumn.from_config( config, columns_by_name={ serialization._column_name_with_class_name(price): price @@ -1106,7 +1106,7 @@ class HashedCategoricalColumnTest(test.TestCase): wire_column = fc.categorical_column_with_hash_bucket('wire', 4) self.assertEqual(['wire'], wire_column.parents) - config = wire_column._get_config() + config = wire_column.get_config() self.assertEqual({ 'key': 'wire', 'hash_bucket_size': 4, @@ -1114,7 +1114,7 @@ class HashedCategoricalColumnTest(test.TestCase): }, config) self.assertEqual(wire_column, - fc.HashedCategoricalColumn._from_config(config)) + fc.HashedCategoricalColumn.from_config(config)) class CrossedColumnTest(test.TestCase): @@ -1588,7 +1588,7 @@ class CrossedColumnTest(test.TestCase): self.assertEqual([b, 'c'], crossed.parents) - config = crossed._get_config() + config = crossed.get_config() self.assertEqual({ 'hash_bucket_size': 5, @@ -1612,11 +1612,11 @@ class CrossedColumnTest(test.TestCase): }, 'c') }, config) - new_crossed = fc.CrossedColumn._from_config(config) + new_crossed = fc.CrossedColumn.from_config(config) self.assertEqual(crossed, new_crossed) self.assertIsNot(b, new_crossed.keys[0]) - new_crossed = fc.CrossedColumn._from_config( + new_crossed = fc.CrossedColumn.from_config( config, columns_by_name={serialization._column_name_with_class_name(b): b}) self.assertEqual(crossed, new_crossed) @@ -4396,7 +4396,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): self.assertEqual(['wire'], wire_column.parents) - config = wire_column._get_config() + config = wire_column.get_config() self.assertEqual({ 'default_value': -1, 'dtype': 'string', @@ -4407,7 +4407,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): }, config) self.assertEqual(wire_column, - fc.VocabularyFileCategoricalColumn._from_config(config)) + fc.VocabularyFileCategoricalColumn.from_config(config)) class VocabularyListCategoricalColumnTest(test.TestCase): @@ -4859,7 +4859,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase): self.assertEqual(['aaa'], wire_column.parents) - config = wire_column._get_config() + config = wire_column.get_config() self.assertEqual({ 'default_value': -1, 'dtype': 'string', @@ -4869,7 +4869,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase): }, config) self.assertEqual(wire_column, - fc.VocabularyListCategoricalColumn._from_config(config)) + fc.VocabularyListCategoricalColumn.from_config(config)) class IdentityCategoricalColumnTest(test.TestCase): @@ -5218,14 +5218,14 @@ class IdentityCategoricalColumnTest(test.TestCase): self.assertEqual(['aaa'], column.parents) - config = column._get_config() + config = column.get_config() self.assertEqual({ 'default_value': None, 'key': 'aaa', 'number_buckets': 3 }, config) - self.assertEqual(column, fc.IdentityCategoricalColumn._from_config(config)) + self.assertEqual(column, fc.IdentityCategoricalColumn.from_config(config)) class TransformFeaturesTest(test.TestCase): @@ -5600,7 +5600,7 @@ class IndicatorColumnTest(test.TestCase): self.assertEqual([parent], animal.parents) - config = animal._get_config() + config = animal.get_config() self.assertEqual({ 'categorical_column': { 'class_name': 'IdentityCategoricalColumn', @@ -5612,11 +5612,11 @@ class IndicatorColumnTest(test.TestCase): } }, config) - new_animal = fc.IndicatorColumn._from_config(config) + new_animal = fc.IndicatorColumn.from_config(config) self.assertEqual(animal, new_animal) self.assertIsNot(parent, new_animal.categorical_column) - new_animal = fc.IndicatorColumn._from_config( + new_animal = fc.IndicatorColumn.from_config( config, columns_by_name={ serialization._column_name_with_class_name(parent): parent @@ -6605,7 +6605,7 @@ class EmbeddingColumnTest(test.TestCase): self.assertEqual([categorical_column], embedding_column.parents) - config = embedding_column._get_config() + config = embedding_column.get_config() self.assertEqual({ 'categorical_column': { 'class_name': 'IdentityCategoricalColumn', @@ -6633,22 +6633,22 @@ class EmbeddingColumnTest(test.TestCase): }, config) custom_objects = {'TruncatedNormal': init_ops.TruncatedNormal} - new_embedding_column = fc.EmbeddingColumn._from_config( + new_embedding_column = fc.EmbeddingColumn.from_config( config, custom_objects=custom_objects) - self.assertEqual(embedding_column._get_config(), - new_embedding_column._get_config()) + self.assertEqual(embedding_column.get_config(), + new_embedding_column.get_config()) self.assertIsNot(categorical_column, new_embedding_column.categorical_column) - new_embedding_column = fc.EmbeddingColumn._from_config( + new_embedding_column = fc.EmbeddingColumn.from_config( config, custom_objects=custom_objects, columns_by_name={ serialization._column_name_with_class_name(categorical_column): categorical_column }) - self.assertEqual(embedding_column._get_config(), - new_embedding_column._get_config()) + self.assertEqual(embedding_column.get_config(), + new_embedding_column.get_config()) self.assertIs(categorical_column, new_embedding_column.categorical_column) @test_util.run_deprecated_v1 @@ -6666,7 +6666,7 @@ class EmbeddingColumnTest(test.TestCase): self.assertEqual([categorical_column], embedding_column.parents) - config = embedding_column._get_config() + config = embedding_column.get_config() self.assertEqual({ 'categorical_column': { 'class_name': 'IdentityCategoricalColumn', @@ -6689,13 +6689,13 @@ class EmbeddingColumnTest(test.TestCase): '_initializer': _initializer, } - new_embedding_column = fc.EmbeddingColumn._from_config( + new_embedding_column = fc.EmbeddingColumn.from_config( config, custom_objects=custom_objects) self.assertEqual(embedding_column, new_embedding_column) self.assertIsNot(categorical_column, new_embedding_column.categorical_column) - new_embedding_column = fc.EmbeddingColumn._from_config( + new_embedding_column = fc.EmbeddingColumn.from_config( config, custom_objects=custom_objects, columns_by_name={ @@ -7763,7 +7763,7 @@ class WeightedCategoricalColumnTest(test.TestCase): self.assertEqual([categorical_column, 'weight'], column.parents) - config = column._get_config() + config = column.get_config() self.assertEqual({ 'categorical_column': { 'config': { @@ -7777,9 +7777,9 @@ class WeightedCategoricalColumnTest(test.TestCase): 'weight_feature_key': 'weight' }, config) - self.assertEqual(column, fc.WeightedCategoricalColumn._from_config(config)) + self.assertEqual(column, fc.WeightedCategoricalColumn.from_config(config)) - new_column = fc.WeightedCategoricalColumn._from_config( + new_column = fc.WeightedCategoricalColumn.from_config( config, columns_by_name={ serialization._column_name_with_class_name(categorical_column): ",0,train 97757f34d2124c2341111344bef469501e789c04,tensorflow/tensorflow,"make get_config from config public for feature column PiperOrigin-RevId: 272227486",sequence_feature_column.py,"@@ -582,7 +582,7 @@ class SequenceNumericColumn( """"""See 'FeatureColumn` base class."""""" return [self.key] - def _get_config(self): + def get_config(self): """"""See 'FeatureColumn` base class."""""" config = dict(zip(self._fields, self)) config['normalizer_fn'] = utils.serialize_keras_object(self.normalizer_fn) @@ -590,7 +590,7 @@ class SequenceNumericColumn( return config @classmethod - def _from_config(cls, config, custom_objects=None, columns_by_name=None): + def from_config(cls, config, custom_objects=None, columns_by_name=None): """"""See 'FeatureColumn` base class."""""" fc._check_config_keys(config, cls._fields) kwargs = fc._standardize_and_copy_config(config) ",0,train 97757f34d2124c2341111344bef469501e789c04,tensorflow/tensorflow,"make get_config from config public for feature column PiperOrigin-RevId: 272227486",sequence_feature_column_test.py,"@@ -765,7 +765,7 @@ class SequenceCategoricalColumnWithIdentityTest( 'animal', num_buckets=4) animal = fc.indicator_column(parent) - config = animal._get_config() + config = animal.get_config() self.assertEqual( { 'categorical_column': { @@ -783,11 +783,11 @@ class SequenceCategoricalColumnWithIdentityTest( } }, config) - new_animal = fc.IndicatorColumn._from_config(config) + new_animal = fc.IndicatorColumn.from_config(config) self.assertEqual(animal, new_animal) self.assertIsNot(parent, new_animal.categorical_column) - new_animal = fc.IndicatorColumn._from_config( + new_animal = fc.IndicatorColumn.from_config( config, columns_by_name={ serialization._column_name_with_class_name(parent): parent ",0,train 97757f34d2124c2341111344bef469501e789c04,tensorflow/tensorflow,"make get_config from config public for feature column PiperOrigin-RevId: 272227486",serialization.py,"@@ -45,14 +45,14 @@ def serialize_feature_column(fc): """"""Serializes a FeatureColumn or a raw string key. This method should only be used to serialize parent FeatureColumns when - implementing FeatureColumn._get_config(), else serialize_feature_columns() + implementing FeatureColumn.get_config(), else serialize_feature_columns() is preferable. This serialization also keeps information of the FeatureColumn class, so deserialization is possible without knowing the class type. For example: a = numeric_column('x') - a._get_config() gives: + a.get_config() gives: { 'key': 'price', 'shape': (1,), @@ -85,7 +85,7 @@ def serialize_feature_column(fc): return fc elif isinstance(fc, fc_lib.FeatureColumn): return generic_utils.serialize_keras_class_and_config( - fc.__class__.__name__, fc._get_config()) # pylint: disable=protected-access + fc.__class__.__name__, fc.get_config()) # pylint: disable=protected-access else: raise ValueError('Instance: {} is not a FeatureColumn'.format(fc)) @@ -96,7 +96,7 @@ def deserialize_feature_column(config, """"""Deserializes a `config` generated with `serialize_feature_column`. This method should only be used to deserialize parent FeatureColumns when - implementing FeatureColumn._from_config(), else deserialize_feature_columns() + implementing FeatureColumn.from_config(), else deserialize_feature_columns() is preferable. Returns a FeatureColumn for this config. TODO(b/118939620): Simplify code if Keras utils support object deduping. @@ -136,7 +136,7 @@ def deserialize_feature_column(config, 'Expected FeatureColumn class, instead found: {}'.format(cls)) # Always deserialize the FeatureColumn, in order to get the name. - new_instance = cls._from_config( # pylint: disable=protected-access + new_instance = cls.from_config( # pylint: disable=protected-access cls_config, custom_objects=custom_objects, columns_by_name=columns_by_name) ",0,train d9ae69f04ba944384d117662b888c43cb7e0bf72,tensorflow/tensorflow,Only trigger reduction columns indexing for columns reductions.,ir_emitter_unnested.cc,"@@ -3207,7 +3207,8 @@ ReductionCodegenInfo IrEmitterUnnested::ComputeReductionCodegenInfo( // dtypes. ((cc_major == 6 && smallest_input_dtype_bits <= 16) || cc_major >= 7)) { return kLinearStridedIndexingX; - } else if (IsUnrollingColumnReductionBeneficial( + } else if (!reduction_dimensions.is_row_reduction && + IsUnrollingColumnReductionBeneficial( unnested_hlo, input_shape, reduction_dimensions.dimensions[2])) { return kLinearIndexingX; ",0,test 13038b78f24d50a14f132806816cae99f630d78c,tensorflow/tensorflow,"Speed up transitive reduction in dependency optimizer by sorting inputs and breaking out of the inner loop when topo_order(input) < topo_order(source). This gives a ~7% speedup on the transformer graph (166ms -> 155ms). PiperOrigin-RevId: 277762493 Change-Id: I9c80cb68f6695000b9511d6651302772b308a25d",dependency_optimizer.cc,"@@ -500,6 +500,8 @@ Status DependencyOptimizer::TransitiveReduction() { control_outputs[input_node_idx].emplace_back(node_idx, input_slot); } } + std::sort(inputs[node_idx].begin(), inputs[node_idx].end(), + std::greater()); } // Run the longest path in DAG algorithm for each source node that has control @@ -528,13 +530,15 @@ Status DependencyOptimizer::TransitiveReduction() { std::fill(longest_distance.begin() + source, longest_distance.begin() + highest_control_target + 1, 0); for (int target = source + 1; target <= highest_control_target; ++target) { - for (int input : inputs[target]) { + const auto& target_inputs = inputs[target]; + for (int input_idx = 0; input_idx < target_inputs.size(); ++input_idx) { + const int input = target_inputs[input_idx]; + if (input < source) break; // If the input node is before source in the topo order, no path // source -> input -> target can exits and we can skip it. // Also only extend a path from the source itself or from nodes that // have a path from source, indicated by longest_distance[input] > 0. - if (input == source || - (input > source && longest_distance[input] > 0)) { + if (input == source || longest_distance[input] > 0) { // If source -> input -> target is longer than the longest // path so far from source -> target, update the longest_distance. int candidate_longest_distance = longest_distance[input] + 1; ",0,test ab5ba2aa0c3817f472a8336bba4cbb18fdeda258,tensorflow/tensorflow,"Allow empty GCS tokens to be cached. PiperOrigin-RevId: 217159671",google_auth_provider.cc,"@@ -135,8 +135,7 @@ Status GoogleAuthProvider::GetToken(string* t) { mutex_lock lock(mu_); const uint64 now_sec = env_->NowSeconds(); - if (!current_token_.empty() && - now_sec + kExpirationTimeMarginSec < expiration_timestamp_sec_) { + if (now_sec + kExpirationTimeMarginSec < expiration_timestamp_sec_) { *t = current_token_; return Status::OK(); } ",0,train 8db2e909e59c11b302715a9aec215cfc349892f7,tensorflow/tensorflow,update version information file. Also upadate tensorrt bazel configuration file,convert_nodes_test.cc,"@@ -280,6 +280,14 @@ class FakeITensor : public nvinfer1::ITensor { float getDynamicRangeMax() const override { return 0.f; } #endif +#if IS_TRT_VERSION_GE(6, 0, 0, 0) + void setAllowedFormats(nvinfer1::TensorFormats formats) override {} + + nvinfer1::TensorFormats getAllowedFormats() const override { return 1; } + + bool isShape() const override { return false; } +#endif + private: string name_; nvinfer1::Dims dims_; ",0,train 8db2e909e59c11b302715a9aec215cfc349892f7,tensorflow/tensorflow,update version information file. Also upadate tensorrt bazel configuration file,find_cuda_config.py,"@@ -390,7 +390,8 @@ def _find_tensorrt_config(base_paths, required_version): get_header_version) if "".."" in header_version: - header_path, header_version = _find_header(base_paths, ""NvInferRTSafe.h"", + # From TRT 6.0 onwards, version information has been moved to NvInferVersion.h. + header_path, header_version = _find_header(base_paths, ""NvInferVersion.h"", required_version, get_header_version) ",0,train c041b5de75463f76bf8d9461e0f79ea9ecec498f,tensorflow/tensorflow,"Replace ARCH_K8 with __x86_64__. PiperOrigin-RevId: 317689006 Change-Id: I7e47b17ef53b3cc223b64ff179fcdc3777c61eb7",manual_constructor_test.cc,"@@ -92,7 +92,7 @@ TEST(ManualConstructorTest, Alignment) { EXPECT_EQ(reinterpret_cast(test2.b.get()) - &test2.a, reinterpret_cast(&control2.b) - &control2.a); -#ifdef ARCH_K8 +#ifdef __x86_64__ EXPECT_EQ(reinterpret_cast(test2.b.get()) % 16, 0); #endif } ",0,train a4e401da71458d253b05e41f28637b65baf64be4,tensorflow/tensorflow,"Prevent segfault in `embedding_lookup_sparse.cc` Previous fixes missed one additional case. PiperOrigin-RevId: 417676944 Change-Id: I8ab412155cf9b1e897448a6611d209eaa7ca9e66",embedding_lookup_sparse.cc,"@@ -159,6 +159,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 3, &weights)); const TfLiteTensor* value; TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 4, &value)); + const size_t values_size = NumElements(value); const int lookup_rank = SizeOfDimension(indices, 1); const int embedding_rank = NumDimensions(value); @@ -253,6 +254,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { current_squares_weight += w * w; current_total_weight += w; for (int k = 0; k < embedding_size; k++) { + // only index if indices are valid + if (current_output_offset + k < 0) continue; + if (current_output_offset + k >= output_size) continue; + if (example_embedding_offset + k < 0) continue; + if (example_embedding_offset + k >= values_size) continue; output_ptr[current_output_offset + k] += value_ptr[example_embedding_offset + k] * w; } ",0,train 5d2a37a1ca528d454fc33400cad1d3163f1672b2,tensorflow/tensorflow,"[tf:tfrt] Check the returned memref alignment Failures in downstream kernels are hard to debug and impossible to find the original source of misaligned tensor. PiperOrigin-RevId: 420368719 Change-Id: I4b6f73e26ffbb37e49dafe77f1b798487311f744",tf_cpurt.h,"@@ -143,9 +143,9 @@ struct ConvertTensor { // Incorrect alignment will lead to a segfault in the downstream Tensorflow // kernels, check it before returning to the runtime. if (internal::IsStaticStorageDuration(memref)) { - DCHECK(tensor.IsAligned()) << ""global memref is not aligned""; + CHECK(tensor.IsAligned()) << ""global memref is not aligned""; } else { - DCHECK(tensor.IsAligned()) << ""allocated memref is not aligned""; + CHECK(tensor.IsAligned()) << ""allocated memref is not aligned""; } return tensor; ",0,train 351fd5e844343348bd6ba1535c908fe0ef0b196b,tensorflow/tensorflow,"Emit an error if there is an uncompilable op in tpu cluster and soft_device_placement option is not true. This emits an error early rather than a potentially misleading error later in compilation. PiperOrigin-RevId: 358291279 Change-Id: I227b8303a6b6245c49243e37b0ee9e2e68c20e35",mark_ops_for_outside_compilation.cc,"@@ -299,6 +299,30 @@ LogicalResult MarkUncompilableOps( return success(); } +// Check for uncompilable ops that are in `tf_dialect` and are not already +// marked for outside compilation. +bool ContainsUncompilableOps(const Dialect* tf_dialect, Block* block, + llvm::DenseSet& supported_ops) { + int uncompilable_op_count = 0; + // Check if op or any parent is already marked for outside compilation. + block->walk([&](Operation* op) { + Operation* iter_op = op; + while (iter_op && !llvm::isa(iter_op)) { + if (iter_op->hasAttrOfType(kXlaOutsideCompilationAttr)) { + return; + } + iter_op = iter_op->getParentOp(); + } + + if (!IsSupportedOp(*op, supported_ops, tf_dialect)) { + op->emitOpError() << ""isn't compilable for TPU device. enable "" + ""soft_device_placement option to run on CPU""; + ++uncompilable_op_count; + } + }); + return uncompilable_op_count > 0; +} + // Unmarks outside compilation for any op that has parents already // marked for outside compilation since the child will be extracted // anyways. @@ -354,6 +378,10 @@ void MarkOpsForOutsideCompilation::runOnOperation() { if (failed(MarkUncompilableOps(tf_dialect, &cluster.GetBody(), supported_ops))) return WalkResult::interrupt(); + } else { + if (ContainsUncompilableOps(tf_dialect, &cluster.GetBody(), + supported_ops)) + return WalkResult::interrupt(); } MarkVariantInputsOutputs(cluster); ",0,train 081758b0e5efc1a1591cda068a4866099bf8a3c5,tensorflow/tensorflow,"Do not reuse allocations which hold tuple logical buffers. This works around a GPU codegen issue which extended the live range of tuple buffers. This also addresses a potential latent bug where thread-local or custom call buffer might have been reused. Change: 145561773",buffer_assignment.cc,"@@ -170,20 +170,26 @@ BufferAssignment::GetUniqueTopLevelOutputAllocation() const { BufferAllocation* BufferAssignment::NewAllocation(const LogicalBuffer& buffer, int64 size, - bool is_thread_local) { + bool is_thread_local, + bool is_reusable) { BufferAllocation::Index index = allocations_.size(); - allocations_.emplace_back(index, size, is_thread_local); + allocations_.emplace_back(index, size, is_thread_local, is_reusable); BufferAllocation* allocation = &allocations_.back(); - AddAssignment(buffer, allocation); + AddAssignment(buffer, allocation, /*colocated_buffer=*/false); allocation_index_for_buffer_[&buffer] = index; return allocation; } // Adds an instruction to the set assigned to the given buffer. void BufferAssignment::AddAssignment(const LogicalBuffer& buffer, - BufferAllocation* allocation) { + BufferAllocation* allocation, + bool colocated_buffer) { CHECK_EQ(0, allocation_index_for_buffer_.count(&buffer)) << ""LogicalBuffer "" << buffer << "" already has an allocation.""; + CHECK(allocation->is_reusable() || allocation->assigned_buffers().empty() || + colocated_buffer) + << ""Non-reusable allocation already assigned a buffer""; + TF_CHECK_OK(points_to_analysis().VerifyBuffer(buffer)); allocation->AddAssignment(buffer); @@ -351,6 +357,11 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation, return false; } + if (!allocation->is_reusable()) { + VLOG(4) << ""Can't assign: allocation is not reusable""; + return false; + } + for (const LogicalBuffer* assigned_buffer : allocation->assigned_buffers()) { if (assignment->liveness().MayInterfere(*assigned_buffer, buffer)) { VLOG(4) << ""Can't assign: assignee "" << assigned_buffer->ToString() @@ -369,7 +380,7 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation, return false; } - assignment->AddAssignment(buffer, allocation); + assignment->AddAssignment(buffer, allocation, /*colocated_buffer=*/false); return true; } @@ -455,7 +466,8 @@ tensorflow::Status BufferAssigner::AssignBuffersForComputation( // callers. BufferAllocation* allocation = assignment->NewAllocation(*buffer, buffer_size_(*buffer), - /*is_thread_local=*/false); + /*is_thread_local=*/false, + /*is_reusable=*/false); allocation->set_entry_computation_parameter( buffer->instruction()->parameter_number()); VLOG(3) << ""New allocation for entry computation parameter: "" @@ -470,8 +482,8 @@ tensorflow::Status BufferAssigner::AssignBuffersForComputation( // Custom call operations never have reusable buffers. Also we do not // reuse thread-local buffers for now, because they are dynamically // allocated and their lifetimes are hard to compute. - assignment->NewAllocation(*buffer, buffer_size_(*buffer), - is_thread_local); + assignment->NewAllocation(*buffer, buffer_size_(*buffer), is_thread_local, + /*is_reusable=*/false); continue; } @@ -503,7 +515,16 @@ tensorflow::Status BufferAssigner::AssignBuffersForComputation( // Can't use MaybeAssignBuffer here because buffer liveness conservatively // assumes buffers in different computations always interfere. CHECK_GE(root_allocation->size(), buffer_size_(*buffer)); - assignment->AddAssignment(*buffer, root_allocation); + assignment->AddAssignment(*buffer, root_allocation, + /*colocated_buffer=*/true); + continue; + } + + if (ShapeUtil::IsTuple(buffer->shape())) { + // TODO(b/34669761): Don't reuse tuple buffers because the GPU backend + // assumes longer buffer liveness than indicated by the analysis. + assignment->NewAllocation(*buffer, buffer_size_(*buffer), is_thread_local, + /*is_reusable=*/false); continue; } @@ -567,8 +588,9 @@ tensorflow::Status BufferAssigner::AssignBuffersForComputation( } } if (!assignment->HasAllocation(*buffer)) { - auto* allocation = assignment->NewAllocation( - *buffer, buffer_size_(*buffer), is_thread_local); + auto* allocation = + assignment->NewAllocation(*buffer, buffer_size_(*buffer), + is_thread_local, /*is_reusable=*/true); VLOG(3) << ""New allocation for: "" << buffer->ToString(); allocation_indices.push_back(allocation->index()); } @@ -651,10 +673,12 @@ void BufferAssigner::AssignColocatedBufferSets( // module-level scope, we can allow buffers to be shared across // computations (in some cases). allocation = assignment->NewAllocation(*buffer, buffer_size_(*buffer), - /*is_thread_local=*/false); + /*is_thread_local=*/false, + /*is_reusable=*/true); colocated_buffer_allocations_.insert(allocation->index()); } else { - assignment->AddAssignment(*buffer, allocation); + assignment->AddAssignment(*buffer, allocation, + /*colocated_buffer=*/true); } colocated_buffers_.insert(buffer); } ",0,test 081758b0e5efc1a1591cda068a4866099bf8a3c5,tensorflow/tensorflow,"Do not reuse allocations which hold tuple logical buffers. This works around a GPU codegen issue which extended the live range of tuple buffers. This also addresses a potential latent bug where thread-local or custom call buffer might have been reused. Change: 145561773",buffer_assignment.h,"@@ -52,8 +52,12 @@ class BufferAllocation { // contiguously and can be used as array indexes. using Index = int64; - BufferAllocation(Index index, int64 size, bool is_thread_local) - : index_(index), size_(size), is_thread_local_(is_thread_local) {} + BufferAllocation(Index index, int64 size, bool is_thread_local, + bool is_reusable) + : index_(index), + size_(size), + is_thread_local_(is_thread_local), + is_reusable_(is_reusable) {} ~BufferAllocation() {} // Adds a LogicalBuffer to the set assigned to this buffer. @@ -64,6 +68,9 @@ class BufferAllocation { // local. bool is_thread_local() const { return is_thread_local_; } + // Whether this allocation can be used by more than one logical buffer. + bool is_reusable() const { return is_reusable_; } + // Whether this allocation holds a LogicalBuffer from a parameter of the entry // computation. These buffers have lifetimes which may be longer than the // XLA computation. @@ -138,6 +145,9 @@ class BufferAllocation { // Whether this buffer needs to be thread-local. bool is_thread_local_; + // Whether this buffer is usable by more than one logical buffer. + bool is_reusable_; + // Whether this allocation holds an entry computation parameter. Entry // computation parameters are special be cause they have lifetimes which may // outlast the computation. @@ -232,10 +242,13 @@ class BufferAssignment { // assigned to it. `is_thread_local` indicates whether this buffer needs to be // thread-local. BufferAllocation* NewAllocation(const LogicalBuffer& buffer, int64 size, - bool is_thread_local); + bool is_thread_local, bool is_reusable); - // Adds a LogicalBuffer to the set assigned to the given allocation. - void AddAssignment(const LogicalBuffer& buffer, BufferAllocation* allocation); + // Adds a LogicalBuffer to the set assigned to the given allocation. If + // colocated_buffer is true, then the logical buffer is an alias of another + // buffer assigned to this allocation. + void AddAssignment(const LogicalBuffer& buffer, BufferAllocation* allocation, + bool colocated_buffer); // Returns the BufferLiveness object used to construct this assignment. const BufferLiveness& liveness() { return *liveness_; } @@ -314,6 +327,10 @@ class BufferAssigner { const LogicalBuffer& buffer, BufferAssignment* assignment); + // Colocated buffers are logical buffers from different computations which + // alias. Explicitly handling these colocated buffers is necessary because + // points-to analysis is computation level scope and does not recognize + // aliasing across computations (b/32491382). using ColocatedBufferSet = std::vector; // Returns a vector of ColocatedBufferSet objects, where each ",0,test 081758b0e5efc1a1591cda068a4866099bf8a3c5,tensorflow/tensorflow,"Do not reuse allocations which hold tuple logical buffers. This works around a GPU codegen issue which extended the live range of tuple buffers. This also addresses a potential latent bug where thread-local or custom call buffer might have been reused. Change: 145561773",buffer_assignment_test.cc,"@@ -1046,6 +1046,31 @@ TEST_F(BufferAssignmentTest, AmbiguousBufferAsOutput) { .ConsumeValueOrDie())); } +// TODO(b/34669761): Remove this test when buffers are allowed to share +// allocations. +TEST_F(BufferAssignmentTest, TupleBufferNotReused) { + // Test a computation that returns a tuple parameter. + auto builder = HloComputation::Builder(TestName()); + auto scalar_shape = ShapeUtil::MakeShape(F32, {}); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, ""param0"")); + auto tuple = builder.AddInstruction(HloInstruction::CreateTuple({param})); + auto tuple_element = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape, tuple, 0)); + auto copy = builder.AddInstruction(HloInstruction::CreateUnary( + scalar_shape, HloOpcode::kCopy, tuple_element)); + + auto module = MakeUnique(TestName()); + module->AddEntryComputation(builder.Build()); + auto assignment = RunBufferAssignment(module.get()); + + // There should be no buffer reuse. The copy should not reuse the tuple + // buffer. + EXPECT_EQ(3, assignment->Allocations().size()); + EXPECT_NE(GetTopLevelAllocation(*assignment, tuple), + GetTopLevelAllocation(*assignment, copy)); +} + } // namespace } // namespace xla ",0,test 6dd43ec8cb299459b835e50faa4f3ffad044098c,tensorflow/tensorflow,PiperOrigin-RevId: 170347520,broadcast_simple_test.cc,"@@ -96,7 +96,7 @@ class BroadcastSimpleTest : public ClientLibraryTestBase { } default: { // Default to Add - CHECK(false); + LOG(FATAL); } } } ",0,train e69f08a6e5af596c0e0613980a958d587f440db0,tensorflow/tensorflow,"Fix and de-flake estimators_test. Change: 133218112",estimators_test.py,"@@ -68,7 +68,11 @@ class FeatureEngineeringFunctionTest(tf.test.TestCase): def feature_engineering_fn(features, targets): _, _ = features, targets - return {""x"": tf.constant([9.])}, {""y"": tf.constant([99.])} + return { + ""transformed_x"": tf.constant([9.]) + }, { + ""transformed_y"": tf.constant([99.]) + } def model_fn(features, targets): # dummy variable: @@ -83,8 +87,8 @@ class FeatureEngineeringFunctionTest(tf.test.TestCase): feature_engineering_fn=feature_engineering_fn) estimator.fit(input_fn=input_fn, steps=1) prediction = next(estimator.predict(input_fn=input_fn, as_iterable=True)) - # predictions = transformed_x (99) - self.assertEqual(99., prediction) + # predictions = transformed_x (9) + self.assertEqual(9., prediction) def testNoneFeatureEngineeringFn(self): ",0,train 1bd1c27e0db046ef97a1e39817956c05bc3f44d3,tensorflow/tensorflow,"Automated rollback of change 128378997 Change: 128393251",tensor_shape.cc,"@@ -33,14 +33,13 @@ static void AppendTo(const TensorShape& s, gtl::InlinedVector* vals) { } void TensorShape::CheckDimsEqual(int NDIMS) const { - CHECK_EQ(NDIMS, dims()) << ""Asking for tensor of "" << NDIMS << ""dimensions"" - << "" from a tensor of "" << dims() << "" dimensions""; + CHECK_EQ(NDIMS, dims()) << ""Asking for tensor of "" << NDIMS + << "" for a tensor of "" << dims() << "" dimensions""; } void TensorShape::CheckDimsAtLeast(int NDIMS) const { CHECK_GE(NDIMS, dims()) << ""Asking for tensor of at least "" << NDIMS - << "" dimensions from a tensor of "" << dims() - << "" dimensions""; + << "" for a tensor of "" << dims() << "" dimensions""; } bool TensorShape::IsValid(const TensorShapeProto& proto) { ",0,train 1bd1c27e0db046ef97a1e39817956c05bc3f44d3,tensorflow/tensorflow,"Automated rollback of change 128378997 Change: 128393251",array_ops_test.py,"@@ -273,10 +273,12 @@ class StridedSliceChecker(object): self.x_np = np.array(x) def __getitem__(self, spec): - op = self.x.__getitem__(spec) + # TODO(aselle): When NewSliceHelper is installed, we can switch this back + # op = self.x[spec] + op = array_ops._NewSliceHelper(self.x, spec) tensor = op.eval() - self.test.assertAllEqual(self.x_np.__getitem__(spec), tensor) + self.test.assertAllEqual(self.x_np[spec], tensor) self.test.assertAllEqual(tensor.shape, op.get_shape()) return tensor @@ -393,7 +395,9 @@ class StridedSliceShapeChecker(object): self.x = x def __getitem__(self, spec): - op = self.x.__getitem__(spec) + # TODO(aselle): When NewSliceHelper is installed, we can switch this back + # op = self.x[spec] + op = array_ops._NewSliceHelper(self.x, spec) return op.get_shape() @@ -447,28 +451,22 @@ class GradSliceChecker(object): self.varnp = varnp def __getitem__(self, spec): - slice_var = self.var[spec] - slice_val = self.val[spec] - - # compute analytic 2nd derivative - analytic_grad2 = 2 * slice_val - - dy = tf.Variable(tf.ones(shape=slice_var.get_shape(), dtype=tf.int32)) - assign = dy.assign(slice_var) - slice_val_grad, = tf.gradients(slice_val, self.var, grad_ys=dy) - slice_val_grad2, = tf.gradients(slice_val_grad, dy, grad_ys=self.var) - self.sess.run(assign) - slice_val_grad_evaled, slice_val_grad2_evaled = ( - self.sess.run([slice_val_grad, slice_val_grad2])) - analytic_grad2_evaled = analytic_grad2.eval() - self.test.assertAllEqual(slice_val_grad2_evaled, analytic_grad2_evaled) - - # compute analytic gradient for slice - np_val_grad = (2 * self.varnp * self.varnp) + val_grad_op = tf.gradients(self.val, self.var) + sliceval_grad_op = tf.gradients( + array_ops._NewSliceHelper(self.val, spec), self.var) + slice1_op = array_ops._NewSliceHelper(val_grad_op, spec) + slice2_op = array_ops._NewSliceHelper(sliceval_grad_op, spec) + val_grad, sliceval_grad, slice1, slice2 = self.sess.run( + [val_grad_op, sliceval_grad_op, slice1_op, slice2_op]) + np_val_grad = (2 * self.varnp) np_sliceval_grad = np.zeros(self.var.get_shape()) - np_sliceval_grad[spec] = np_val_grad[spec] - # verify gradient - self.test.assertAllEqual(slice_val_grad_evaled, np_sliceval_grad) + np_sliceval_grad[spec] = np.array(val_grad[0])[spec] + # make sure np val grad is correct + self.test.assertAllEqual(np_val_grad, val_grad[0]) + # make sure slice gradient is correct + self.test.assertAllEqual(np_sliceval_grad, sliceval_grad[0]) + # make sure val grad and sliceval grad are the same in sliced area + self.test.assertAllEqual(slice1, slice2) class StridedSliceGradTest(test_util.TensorFlowTestCase): @@ -495,7 +493,7 @@ class BenchmarkSlice(object): self.tensor = tensor def __getitem__(self, x): - return self.tensor[x] + return array_ops._NewSliceHelper(self.tensor, x) class StridedSliceBenchmark(tf.test.Benchmark): ",0,train 1bd1c27e0db046ef97a1e39817956c05bc3f44d3,tensorflow/tensorflow,"Automated rollback of change 128378997 Change: 128393251",array_grad.py,"@@ -151,7 +151,7 @@ def _SliceGrad(op, grad): @ops.RegisterGradient(""StridedSlice"") def _StridedSliceGrad(op, grad): - """"""Gradient for StridedSlice op."""""" + """"""Gradient for unpack op."""""" x = array_ops.shape(op.inputs[0]) begin = op.inputs[1] end = op.inputs[2] @@ -170,25 +170,6 @@ def _StridedSliceGrad(op, grad): shrink_axis_mask=op.get_attr(""shrink_axis_mask"")), None, None, None -@ops.RegisterGradient(""StridedSliceGrad"") -def _StridedSliceGradGrad(op, grad): - """"""Gradient for StridedSliceGrad op."""""" - begin = op.inputs[1] - end = op.inputs[2] - strides = op.inputs[3] - - return None, None, None, None, array_ops.strided_slice( - grad, - begin, - end, - strides, - begin_mask=op.get_attr(""begin_mask""), - end_mask=op.get_attr(""end_mask""), - ellipsis_mask=op.get_attr(""ellipsis_mask""), - new_axis_mask=op.get_attr(""new_axis_mask""), - shrink_axis_mask=op.get_attr(""shrink_axis_mask"")) - - @ops.RegisterGradient(""Split"") def _SplitGrad(op, *grads): return None, array_ops.concat(op.inputs[0], list(grads)) ",0,train 1bd1c27e0db046ef97a1e39817956c05bc3f44d3,tensorflow/tensorflow,"Automated rollback of change 128378997 Change: 128393251",array_ops.py,"@@ -196,7 +196,7 @@ def zeros_initializer(shape, dtype=dtypes.float32): return zeros(shape, dtype) -def _SliceHelper(tensor, slice_spec): +def _NewSliceHelper(tensor, slice_spec): """"""Overload for Tensor.__getitem__. This operation extracts the specified region from the tensor. @@ -275,6 +275,73 @@ def _SliceHelper(tensor, slice_spec): # pylint: disable=undefined-variable,protected-access +def _SliceHelper(tensor, slice_spec): + """"""Overload for Tensor.__getitem__. + + Currently the size of the slice must be statically known in each dimension, + i.e. the ""stop"" of the slice must not be omitted. + + TODO(mrry): Support slices where the sizes are not specified. + TODO(mrry): Support negative indices in slices with numpy/Python semantics. + + Args: + tensor: An ops.Tensor object. + slice_spec: The arguments to Tensor.__getitem__. + + Returns: + The appropriate slice of ""tensor"", based on ""slice_spec"". + + Raises: + ValueError: If a slice range is negative size. + TypeError: If the slice indices aren't int, slice, or Ellipsis. + """""" + if not isinstance(slice_spec, (list, tuple)): + slice_spec = [slice_spec] + indices = [] + sizes = [] + squeeze_dims = [] + for dim, s in enumerate(slice_spec): + if isinstance(s, _baseslice): + if s.step not in (None, 1): + raise NotImplementedError( + ""Steps other than 1 are not currently supported"") + start = s.start if s.start is not None else 0 + if start < 0: + raise NotImplementedError( + ""Negative start indices are not currently supported"") + indices.append(start) + if s.stop is not None and s.stop < 0: + raise NotImplementedError( + ""Negative stop indices are not currently supported"") + # NOTE(mrry): If the stop is not specified, Python substitutes + # sys.maxsize, which is typically (2 ** 63) - 1. Since Slice currently + # supports signed DT_INT32 arguments, we use -1 to specify that all + # elements should be captured. + if s.stop is None or s.stop == sys.maxsize: + sizes.append(-1) + else: + if start > s.stop: + raise ValueError(""Stop must be at least start"") + sizes.append(s.stop - start) + elif s is Ellipsis: + raise NotImplementedError(""Ellipsis is not currently supported"") + else: + try: + s = int(s) + except TypeError: + raise TypeError(""Bad slice index %s of type %s"" % (s, type(s))) + if s < 0: + raise NotImplementedError(""Negative indices are currently unsupported"") + indices.append(s) + sizes.append(1) + squeeze_dims.append(dim) + sliced = slice(tensor, indices, sizes) + if squeeze_dims: + return squeeze(sliced, squeeze_dims=squeeze_dims) + else: + return sliced + + def slice(input_, begin, size, name=None): """"""Extracts a slice from a tensor. @@ -423,6 +490,8 @@ def strided_slice(input_, new_axis_mask=new_axis_mask, shrink_axis_mask=shrink_axis_mask) +# TODO(aselle): When gradient is added and performance verified switch +# ops.Tensor._override_operator(""__getitem__"", _NewSliceHelper) ops.Tensor._override_operator(""__getitem__"", _SliceHelper) @@ -1526,9 +1595,8 @@ def _StridedSliceShape(op): sparse_dims = begin_shape.merge_with(end_shape).merge_with(strides_shape)[ 0].value - if (sparse_dims is None or begin_value is None or end_value is None or - strides_value is None): - return [tensor_shape.unknown_shape()] + if sparse_dims is None: + return [input_shape.unknown_shape()] begin_mask = op.get_attr(""begin_mask"") end_mask = op.get_attr(""end_mask"") ",0,train 69f52845ed618350aafd6fa8c2d369a636d205b5,tensorflow/tensorflow,"Update __init__.py Add python formatting to correct website formatting away newlines",__init__.py,"@@ -18,17 +18,14 @@ For TensorFlow 1.0, we have reorganized the TensorFlow summary ops into a submodule, and made some semantic tweaks. The first thing to note is that we moved the APIs around as follows: +```python tf.scalar_summary -> tf.summary.scalar - tf.histogram_summary -> tf.summary.histogram - tf.audio_summary -> tf.summary.audio - tf.image_summary -> tf.summary.image - tf.merge_summary -> tf.summary.merge - tf.merge_all_summaries -> tf.summary.merge_all +``` We think this API is cleaner and will improve long-term discoverability and clarity of the TensorFlow API. We however, also took the opportunity to make an @@ -46,7 +43,7 @@ collision. The new summary APIs under tf.summary throw away the ""tag"" as an independent concept; instead, the first argument is the node name. So summary tags now -automatically inherit the surrounding TF name scope, and automatically +automatically inherit the surrounding TF namescope, and automatically are deduplicated if there is a conflict. Now however, the only allowed characters are alphanumerics, underscores, and forward slashes. To make migration easier, the new APIs automatically convert illegal characters to @@ -67,7 +64,7 @@ def add_activation_summaries(v): ``` Now, so long as the add_activation_summaries function is called from within the -right name scope, the behavior is the same. +right namescope, the behavior is the same. Because this change does modify the behavior and could break tests, we can't automatically migrate usage to the new APIs. That is why we are making the old @@ -82,9 +79,9 @@ to the new summary ops: tf.summary.scalar requires a single scalar name and scalar value. In most cases, you can create tf.summary.scalars in a loop to get the same behavior -As before, TensorBoard groups charts by the top-level name scope. This may +As before, TensorBoard groups charts by the top-level namescope. This may be inconvenient, since in the new summary ops the summary will inherit that -name scope without user control. We plan to add more grouping mechanisms to +namescope without user control. We plan to add more grouping mechanisms to TensorBoard, so it will be possible to specify the TensorBoard group for each summary via the summary API. ",0,test ac18e7069cf865783e9ed75a1a036d69084d9a7f,tensorflow/tensorflow,"Version of convolution that uses weights broadcasting at simd level supports apis without pointers support in kernel languages. PiperOrigin-RevId: 419768517 Change-Id: I7824e4d6c618a30ed8b6636e0e9dbad1d627b7be",conv_powervr.cc,"@@ -940,16 +940,24 @@ std::string ConvPowerVR::GenerateConv(const GpuInfo& gpu_info, } else if (use_simd_broadcast) { int parts = local_mem_size / simd_size; int reminder = local_mem_size % simd_size; + const std::string read_start = gpu_info.SupportsPointersInKernels() + ? ""filters_loc["" + : ""args.weights.Read(filters_offset + ""; + const std::string read_end = + gpu_info.SupportsPointersInKernels() ? ""]"" : "")""; for (int i = 0; i < parts; ++i) { - c += "" FLT4 simd_w"" + std::to_string(i) + "" = filters_loc[simd_id + "" + - std::to_string(i * simd_size) + ""];\n""; + const std::string weights_index = + ""simd_id + "" + std::to_string(i * simd_size); + c += "" FLT4 simd_w"" + std::to_string(i) + "" = "" + read_start + + weights_index + read_end + "";\n""; } if (reminder) { + const std::string weights_index = + ""simd_id + "" + std::to_string(parts * simd_size); c += "" FLT4 simd_w"" + std::to_string(parts) + "";\n""; c += "" if (simd_id < "" + std::to_string(reminder) + "") {\n""; - c += "" simd_w"" + std::to_string(parts) + - "" = filters_loc[simd_id + "" + std::to_string(parts * simd_size) + - ""];\n""; + c += "" simd_w"" + std::to_string(parts) + "" = "" + read_start + + weights_index + read_end + "";\n""; c += "" }\n""; } } else if (conv_params.AreWeightsBuffer()) { // GLOBAL_MEM/CONSTANT_MEM ",0,train 0cc35b3d409ddf9d3baeb54edd88b1addd186b17,tensorflow/tensorflow,"Fix behavior of `stack()` when passed a single ragged tensor, to be consistent with handling of multiple tensors. PiperOrigin-RevId: 226355632",ragged_array_ops.py,"@@ -655,7 +655,7 @@ def _ragged_stack_concat_helper(rt_inputs, axis, stack_values): # Special case: if there's only one input, then return it as-is. if len(rt_inputs) == 1: if stack_values: - return expand_dims(rt_inputs[0], axis=0) + return expand_dims(rt_inputs[0], axis=axis) else: return rt_inputs[0] ",0,train 0cc35b3d409ddf9d3baeb54edd88b1addd186b17,tensorflow/tensorflow,"Fix behavior of `stack()` when passed a single ragged tensor, to be consistent with handling of multiple tensors. PiperOrigin-RevId: 226355632",ragged_stack_op_test.py,"@@ -33,6 +33,52 @@ class RaggedStackOpTest(ragged_test_util.RaggedTensorTestCase, parameterized.TestCase): @parameterized.parameters( + dict( + descr='One rank-2 input (ragged_rank=1), axis=0', + rt_inputs=( + [['a00', 'a01'], [], ['a20', 'a21']],), # shape=(3, None) + axis=0, + expected=[[[b'a00', b'a01'], [], [b'a20', b'a21']]]), + dict( + descr='One rank-2 input (ragged_rank=1), axis=1', + rt_inputs=( + [['a00', 'a01'], [], ['a20', 'a21', 'a22']],), # shape=(3, None) + axis=1, + expected=[ + [[b'a00', b'a01']], + [[]], + [[b'a20', b'a21', b'a22']]]), + dict( + descr='One rank-2 input (ragged_rank=1), axis=2', + rt_inputs=( + [['a00', 'a01'], [], ['a20', 'a21', 'a22']],), # shape=(3, None) + axis=2, + expected=[ + [[b'a00'], [b'a01']], [], + [[b'a20'], [b'a21'], [b'a22']]]), + dict( + descr='One rank-2 input (ragged_rank=1), axis=-3', + rt_inputs=( + [['a00', 'a01'], [], ['a20', 'a21']],), # shape=(3, None) + axis=-3, + expected=[[[b'a00', b'a01'], [], [b'a20', b'a21']]]), + dict( + descr='One rank-2 input (ragged_rank=1), axis=-2', + rt_inputs=( + [['a00', 'a01'], [], ['a20', 'a21', 'a22']],), # shape=(3, None) + axis=-2, + expected=[ + [[b'a00', b'a01']], + [[]], + [[b'a20', b'a21', b'a22']]]), + dict( + descr='One rank-2 input (ragged_rank=1), axis=-1', + rt_inputs=( + [['a00', 'a01'], [], ['a20', 'a21', 'a22']],), # shape=(3, None) + axis=-1, + expected=[ + [[b'a00'], [b'a01']], [], + [[b'a20'], [b'a21'], [b'a22']]]), dict( descr='Two rank-2 inputs (ragged_rank=1), axis=0', rt_inputs=( ",0,train 59b464fc9d2379c5da0bb6b3b4b5c6b0d36d65ce,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2021-07-27 PiperOrigin-RevId: 387057905 Change-Id: I1577296734d90794ccac1a409ad447dc302d4031",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 7, 26) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 7, 27) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train f4850641530017a3b2b294974298ae13028b8583,tensorflow/tensorflow,CLN: code style,training_ops.cc,"@@ -342,7 +342,7 @@ struct ApplyAdaMaxNonCuda { m.device(d) += (grad - m) * (T(1) - beta1()); // Here v is u in section 7.1 v.device(d) = (beta2() * v).cwiseMax(grad.abs()); - // var is θ in section 7.1 + // var is θ in section 7.1 var.device(d) -= lr() / (T(1) - beta1_power()) * (m / (v + epsilon())); } }; ",0,train 61fde9e1e3c5d995aa20f7bf2781ba60db5bf246,tensorflow/tensorflow,Single quote to backtick for consistency,mirrored_strategy.py,"@@ -49,7 +49,7 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): There are several important concepts for distributed TensorFlow, e.g. `client`, `job`, `task`, `cluster`, `in-graph replication` and - 'synchronous training' and they have already been defined in the + `synchronous training` and they have already been defined in the [TensorFlow's documentation](https://www.tensorflow.org/deploy/distributed). The distribution strategy inherits these concepts as well and in addition to that we also clarify several more concepts: ",0,test e656213afff2e9491ae1c53e2e76a427a3be20c9,tensorflow/tensorflow,"Rename hlo_algorithm_blacklist to hlo_algorithm_denylist PiperOrigin-RevId: 322821382 Change-Id: Iea0de84b3c82562b5649fc0a6092cd2fb473c83a",gpu_conv_algorithm_picker.cc,"@@ -24,7 +24,7 @@ limitations under the License. #include ""tensorflow/compiler/xla/service/gpu/backend_configs.pb.h"" #include ""tensorflow/compiler/xla/service/gpu/convolution_thunk.h"" #include ""tensorflow/compiler/xla/service/gpu/gpu_autotuning.pb.h"" -#include ""tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.h"" +#include ""tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist.h"" #include ""tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"" #include ""tensorflow/compiler/xla/service/gpu/stream_executor_util.h"" #include ""tensorflow/compiler/xla/service/hlo_casting_utils.h"" ",0,train e656213afff2e9491ae1c53e2e76a427a3be20c9,tensorflow/tensorflow,"Rename hlo_algorithm_blacklist to hlo_algorithm_denylist PiperOrigin-RevId: 322821382 Change-Id: Iea0de84b3c82562b5649fc0a6092cd2fb473c83a",hlo_algorithm_denylist.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include ""tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.h"" +#include ""tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist.h"" #include ",0,train e656213afff2e9491ae1c53e2e76a427a3be20c9,tensorflow/tensorflow,"Rename hlo_algorithm_blacklist to hlo_algorithm_denylist PiperOrigin-RevId: 322821382 Change-Id: Iea0de84b3c82562b5649fc0a6092cd2fb473c83a",hlo_algorithm_denylist.h,,0,train e656213afff2e9491ae1c53e2e76a427a3be20c9,tensorflow/tensorflow,"Rename hlo_algorithm_blacklist to hlo_algorithm_denylist PiperOrigin-RevId: 322821382 Change-Id: Iea0de84b3c82562b5649fc0a6092cd2fb473c83a",hlo_algorithm_denylist_test.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include ""tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.h"" +#include ""tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist.h"" #include ""tensorflow/core/lib/io/path.h"" #include ""tensorflow/core/platform/env.h"" ",0,train 7e346134472913c743047cd49e17649f34ec75ca,tensorflow/tensorflow,"Add TensorListStack op to the list of ops not to be marked for outside compilation. PiperOrigin-RevId: 342746704 Change-Id: I343580637ace980fdbbab8801eedc65be94f4b77",mark_ops_for_outside_compilation.cc,"@@ -117,6 +117,7 @@ void AddRewrittenCompositeOps(MLIRContext* context, GET_OPERATION_NAME(TF::TensorListElementShapeOp), GET_OPERATION_NAME(TF::TensorListGatherOp), GET_OPERATION_NAME(TF::TensorListScatterIntoExistingListOp), + GET_OPERATION_NAME(TF::TensorListStackOp), }; #undef GET_OPERATION_NAME ",0,train d84bfa45172da3af2b487593fb0cac1756f4fc0d,tensorflow/tensorflow,"Removed #includes of protobuf full headers when TENSORFLOW_LITE_PROTOS is defined. This requires #ifdef'ing out the code to use JSON parsing/serialization. PiperOrigin-RevId: 217003132",human_readable_json.cc,"@@ -22,6 +22,10 @@ namespace tensorflow { Status ProtoToHumanReadableJson(const ::google::protobuf::Message& proto, string* result) { +#ifdef TENSORFLOW_LITE_PROTOS + *result = ""[human readable output not available on Android]""; + return Status::OK(); +#else result->clear(); auto status = google::protobuf::util::MessageToJsonString(proto, result); @@ -34,10 +38,14 @@ Status ProtoToHumanReadableJson(const ::google::protobuf::Message& proto, StringPiece(error_msg.data(), error_msg.length()))); } return Status::OK(); +#endif } Status HumanReadableJsonToProto(const string& str, ::google::protobuf::Message* proto) { +#ifdef TENSORFLOW_LITE_PROTOS + return errors::Internal(""Cannot parse JSON protos on Android""); +#else proto->Clear(); auto status = google::protobuf::util::JsonStringToMessage(str, proto); if (!status.ok()) { @@ -49,6 +57,7 @@ Status HumanReadableJsonToProto(const string& str, StringPiece(error_msg.data(), error_msg.length()))); } return Status::OK(); +#endif } } // namespace tensorflow ",0,train d84bfa45172da3af2b487593fb0cac1756f4fc0d,tensorflow/tensorflow,"Removed #includes of protobuf full headers when TENSORFLOW_LITE_PROTOS is defined. This requires #ifdef'ing out the code to use JSON parsing/serialization. PiperOrigin-RevId: 217003132",protobuf.h,"@@ -19,18 +19,21 @@ limitations under the License. // IWYU pragma: private, include ""third_party/tensorflow/core/platform/protobuf.h"" // IWYU pragma: friend third_party/tensorflow/core/platform/protobuf.h -#include ""google/protobuf/arena.h"" +#ifndef TENSORFLOW_LITE_PROTOS #include ""google/protobuf/descriptor.h"" #include ""google/protobuf/descriptor.pb.h"" #include ""google/protobuf/dynamic_message.h"" +#include ""google/protobuf/text_format.h"" +#include ""google/protobuf/util/json_util.h"" +#include ""google/protobuf/util/type_resolver_util.h"" +#endif + +#include ""google/protobuf/arena.h"" #include ""google/protobuf/io/coded_stream.h"" #include ""google/protobuf/io/zero_copy_stream.h"" #include ""google/protobuf/io/zero_copy_stream_impl_lite.h"" #include ""google/protobuf/map.h"" #include ""google/protobuf/repeated_field.h"" -#include ""google/protobuf/text_format.h"" -#include ""google/protobuf/util/json_util.h"" -#include ""google/protobuf/util/type_resolver_util.h"" namespace tensorflow { namespace protobuf = ::google::protobuf; ",0,train d600c3c4fa9c8ea9581ea9ff52a30b87655ebb71,tensorflow/tensorflow,"Convert XEventMetadata XStats to trace viewer events PiperOrigin-RevId: 407940486 Change-Id: I2a4d378a7fa1013e75bf069184082715951a450f",xplane_to_trace_events.cc,"@@ -85,14 +85,17 @@ void ConvertXPlaneToTraceEvents(uint32 device_id, const XPlaneVisitor& xplane, event->set_timestamp_ps(xevent.TimestampPs()); event->set_duration_ps(xevent.DurationPs()); - xevent.ForEachStat([&](const XStatVisitor& stat) { + auto for_each_stat = [&](const XStatVisitor& stat) { if (stat.ValueCase() == XStat::VALUE_NOT_SET) return; if (IsInternalStat(stat.Type())) return; if (stat.Type() == StatType::kStepName) { event->set_name(stat.ToString()); } args[std::string(stat.Name())] = stat.ToString(); - }); + }; + // The metadata stats should appear before the per-occurrence stats. + xevent.Metadata().ForEachStat(for_each_stat); + xevent.ForEachStat(for_each_stat); }); }); } ",0,train 8624a703ebd914e9d91bb7992570b52946fad970,tensorflow/tensorflow,"Try to make resize bilinear test more deterministic. PiperOrigin-RevId: 228301953",resize_bilinear_test.cc,"@@ -76,6 +76,7 @@ void TestOneResizeBilinear(int batch, int depth, int input_width, } TEST(ResizeBilinear, TestResizeBilinear8Bit) { + RandomEngine().seed(38291); const int kTestsToRun = 100 * 1000; for (int i = 0; i < kTestsToRun; i++) { const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); @@ -91,6 +92,7 @@ TEST(ResizeBilinear, TestResizeBilinear8Bit) { } TEST(ResizeBilinear2x2, TestResizeBilinear8Bit) { + RandomEngine().seed(38291); const int kTestsToRun = 100 * 1000; for (int i = 0; i < kTestsToRun; i++) { const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); @@ -106,6 +108,7 @@ TEST(ResizeBilinear2x2, TestResizeBilinear8Bit) { } TEST(ResizeBilinear, TestResizeBilinear) { + RandomEngine().seed(38291); const int kTestsToRun = 100 * 1000; for (int i = 0; i < kTestsToRun; i++) { const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); @@ -121,6 +124,7 @@ TEST(ResizeBilinear, TestResizeBilinear) { } TEST(ResizeBilinear2x2, TestResizeBilinear) { + RandomEngine().seed(38291); const int kTestsToRun = 100 * 1000; for (int i = 0; i < kTestsToRun; i++) { const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); ",0,test 8fdf7b0e3b94154dd268acbc1db569c6c7ca3ce3,tensorflow/tensorflow,"Disable pinning functions to the CPU. PiperOrigin-RevId: 231327139",execute.cc,"@@ -744,7 +744,7 @@ bool IsPinnableOp(const string& op_type) { static const gtl::FlatSet* unpinnable_ops = new gtl::FlatSet({ ""RandomUniform"", ""RandomUniformInt"", - ""RandomNormal"", + ""RandomStandardNormal"", ""StatelessRandomUniform"", ""StatelessRandomUniformInt"", ""StatelessRandomNormal"", @@ -764,7 +764,7 @@ bool IsPinnableOp(const string& op_type) { Status MaybeUpdateOpDevice(EagerOperation* op) { EagerContext* ctx = op->EagerContext(); bool all_inputs_eligible_for_cpu_pinning = - ctx->PinSmallOpsToCPU() && IsPinnableOp(op->Name()); + ctx->PinSmallOpsToCPU() && !op->is_function() && IsPinnableOp(op->Name()); Device* op_device = op->Device() == nullptr ? ctx->HostCPU() : op->Device(); for (int i = 0; i < op->Inputs().size(); ++i) { TensorHandle* tensor_handle = op->Inputs()[i]; ",0,test 8fdf7b0e3b94154dd268acbc1db569c6c7ca3ce3,tensorflow/tensorflow,"Disable pinning functions to the CPU. PiperOrigin-RevId: 231327139",function_test.py,"@@ -49,6 +49,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gen_random_ops from tensorflow.python.ops import gen_resource_variable_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import list_ops @@ -2120,6 +2121,24 @@ class FunctionTest(test.TestCase, parameterized.TestCase): self.assertIn('node assert_equal/Assert/Assert (defined at', e.message) self.assertNotIn('fn3', e.message) + def testFunctionIsNotPinned(self): + """"""Tests that functions aren't pinned to the CPU by the eager runtime."""""" + if not context.context().num_gpus(): + self.skipTest('No GPUs found.') + seed1, seed2 = 79, 25 + shape = constant_op.constant([4, 7]) + dtype = dtypes.float32 + + @def_function.function + def func(): + with ops.device('GPU:0'): + return gen_random_ops.random_standard_normal( + shape, dtype=dtype, seed=seed1, seed2=seed2) + + with ops.device('GPU:0'): + x = func() + self.assertRegexpMatches(x.device, 'GPU') + class MultiDeviceTest(test.TestCase, parameterized.TestCase): ",0,test dcb9053d23034e1d16b8787a0ab3239a10d74f4b,tensorflow/tensorflow,"SparseXentOp now returns NaNs for loss & grad rows where the label value is OOB. Change: 128485714",bounds_check.h,"@@ -42,7 +42,7 @@ namespace internal { // This function may only be used on primitive integral types (int32, int64, // etc). It does not guarantee any atomicity or barriers. template -const T SubtleMustCopy(const T &x) { +EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC const T SubtleMustCopy(const T &x) { static_assert(std::is_integral::value, ""SubtleMustCopy can only be used on integer types.""); auto *to_x = reinterpret_cast(&x); ",0,train dcb9053d23034e1d16b8787a0ab3239a10d74f4b,tensorflow/tensorflow,"SparseXentOp now returns NaNs for loss & grad rows where the label value is OOB. Change: 128485714",sparse_xent_op.h,"@@ -19,6 +19,8 @@ limitations under the License. #include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" #include ""tensorflow/core/framework/tensor_types.h"" +#include ""tensorflow/core/kernels/bounds_check.h"" +#include ""tensorflow/core/platform/macros.h"" #include ""tensorflow/core/platform/types.h"" namespace tensorflow { @@ -56,14 +58,22 @@ class SparseXentLossGenerator { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SparseXentLossGenerator( typename TTypes::Tensor32Bit logits, typename TTypes::Tensor32Bit sum_exp_logits, - typename TTypes::Tensor32Bit labels) - : logits_(logits), sum_exp_logits_(sum_exp_logits), labels_(labels) {} + typename TTypes::Tensor32Bit labels, + const Index max_depth) + : logits_(logits), + sum_exp_logits_(sum_exp_logits), + labels_(labels), + max_depth_(max_depth) {} EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T operator()(const Eigen::array& coords) const { - int batch = coords[0]; - int depth = coords[1]; - return (labels_(batch) == depth) + const int batch = coords[0]; + const int depth = coords[1]; + const Index label = tensorflow::internal::SubtleMustCopy(labels_(batch)); + if (!FastBoundsCheck(label, max_depth_)) { + return Eigen::NumTraits::quiet_NaN(); + } + return TF_PREDICT_FALSE(label == depth) ? (Eigen::numext::log(sum_exp_logits_(batch)) - logits_(coords)) : T(0.0); }; @@ -72,6 +82,7 @@ class SparseXentLossGenerator { typename TTypes::Tensor32Bit logits_; typename TTypes::Tensor32Bit sum_exp_logits_; typename TTypes::Tensor32Bit labels_; + const Index max_depth_; }; // Generator for calculation of the sparse Xent gradient. @@ -87,16 +98,22 @@ class SparseXentGradGenerator { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SparseXentGradGenerator( typename TTypes::Tensor32Bit exp_logits, typename TTypes::Tensor32Bit sum_exp_logits, - typename TTypes::Tensor32Bit labels) + typename TTypes::Tensor32Bit labels, + const Index max_depth) : exp_logits_(exp_logits), sum_exp_logits_(sum_exp_logits), - labels_(labels) {} + labels_(labels), + max_depth_(max_depth) {} EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T operator()(const Eigen::array& coords) const { - int batch = coords[0]; - int depth = coords[1]; - T subtract = (depth == labels_(batch)) ? T(1.0) : T(0.0); + const int batch = coords[0]; + const int depth = coords[1]; + const Index label = tensorflow::internal::SubtleMustCopy(labels_(batch)); + if (!FastBoundsCheck(label, max_depth_)) { + return Eigen::NumTraits::quiet_NaN(); + } + T subtract = TF_PREDICT_FALSE(depth == label) ? T(1.0) : T(0.0); return exp_logits_(coords) / sum_exp_logits_(batch) - subtract; }; @@ -104,6 +121,7 @@ class SparseXentGradGenerator { typename TTypes::Tensor32Bit exp_logits_; typename TTypes::Tensor32Bit sum_exp_logits_; typename TTypes::Tensor32Bit labels_; + const Index max_depth_; }; } // namespace generator @@ -185,7 +203,8 @@ struct SparseXentEigenImpl { // along classes generator::SparseXentLossGenerator sparse_xent_loss_gen( sparse_xent_helpers::To32BitConst(backprop), - sparse_xent_helpers::To32BitConst(scratch), To32Bit(labels)); + sparse_xent_helpers::To32BitConst(scratch), To32Bit(labels), + backprop.dimension(1) /* max_depth */); To32Bit(loss).device(d) = To32Bit(backprop).generate(sparse_xent_loss_gen).sum(along_class); @@ -194,7 +213,8 @@ struct SparseXentEigenImpl { To32Bit(backprop).device(d) = To32Bit(backprop).exp(); generator::SparseXentGradGenerator sparse_xent_grad_gen( sparse_xent_helpers::To32BitConst(backprop), - sparse_xent_helpers::To32BitConst(scratch), To32Bit(labels)); + sparse_xent_helpers::To32BitConst(scratch), To32Bit(labels), + backprop.dimension(1) /* max_depth */); To32Bit(backprop).device(d) = To32Bit(backprop).generate(sparse_xent_grad_gen); } ",0,train dcb9053d23034e1d16b8787a0ab3239a10d74f4b,tensorflow/tensorflow,"SparseXentOp now returns NaNs for loss & grad rows where the label value is OOB. Change: 128485714",sparse_xent_op_test.py,"@@ -73,6 +73,30 @@ class SparseXentTest(tf.test.TestCase): self._testSingleClass(use_gpu=True) self._testSingleClass(use_gpu=False) + def _testInvalidLabel(self, use_gpu): + features = [ + [1., 1., 1., 1.], + [1., 1., 1., 1.], + [1., 2., 3., 4.], + [1., 2., 3., 4.]] + labels = [4, 3, 0, -1] + with self.test_session(use_gpu=use_gpu) as sess: + loss, backprop = gen_nn_ops._sparse_softmax_cross_entropy_with_logits( + features, labels) + tf_loss, tf_backprop = sess.run([loss, backprop]) + self.assertAllClose( + [[np.nan] * 4, + [0.25, 0.25, 0.25, -0.75], + [-0.968, 0.087, 0.237, 0.6439], + [np.nan] * 4], + tf_backprop, rtol=1e-3, atol=1e-3) + self.assertAllClose( + [np.nan, 1.3862, 3.4420, np.nan], tf_loss, rtol=1e-3, atol=1e-3) + + def testInvalidLabel(self): + self._testInvalidLabel(use_gpu=True) + self._testInvalidLabel(use_gpu=False) + def testNpXent(self): # We create 2 batches of logits for testing. # batch 0 is the boring uniform distribution: 1, 1, 1, 1, with target 3. ",0,train 92415c09b8d00f200429e994b08e302f4ca85e67,tensorflow/tensorflow,"Update README.md for tf.contrib.kfac and add deprecation warning. PiperOrigin-RevId: 199119904",optimizer.py,"@@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import warnings + # pylint disable=long-line from tensorflow.contrib.kfac.python.ops import curvature_matrix_vector_products as cmvp from tensorflow.contrib.kfac.python.ops import estimator as est @@ -107,6 +109,10 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): ValueError: If momentum is non-zero and momentum_type is not 'regular' or 'adam'. """""" + warnings.warn( + ""third_party.tensorflow.contrib.kfac is deprecated."" + ""This will be removed on 15-07-2018. Check README for further details."", + DeprecationWarning) # Parameters to be passed to the Fisher estimator: self._variables = var_list or tf_variables.trainable_variables self._cov_ema_decay = cov_ema_decay ",0,train 69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever. This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true. PiperOrigin-RevId: 265705492",collective_param_resolver_distributed_test.cc,"@@ -51,7 +51,7 @@ class FakeWorker : public TestWorkerInterface { : name_(name), device_mgr_(dev_mgr), param_resolver_(cpres) {} void GetStatusAsync(const GetStatusRequest* request, - GetStatusResponse* response, + GetStatusResponse* response, bool fail_fast, StatusCallback done) override { std::vector dev_attr; device_mgr_->ListDeviceAttributes(&dev_attr); ",0,train 69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever. This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true. PiperOrigin-RevId: 265705492",collective_rma_distributed_test.cc,"@@ -74,7 +74,7 @@ class FakeWorker : public TestWorkerInterface { BufRendezvous* buf_rendezvous() { return &buf_rendezvous_; } void GetStatusAsync(const GetStatusRequest* request, - GetStatusResponse* response, + GetStatusResponse* response, bool fail_fast, StatusCallback done) override { std::vector dev_attr; device_mgr_->ListDeviceAttributes(&dev_attr); ",0,train 69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever. This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true. PiperOrigin-RevId: 265705492",device_resolver_distributed.cc,"@@ -98,7 +98,8 @@ void DeviceResolverDistributed::RefreshRemoteAttributes( WorkerInterface* worker = worker_cache_->GetOrCreateWorker(task); CHECK(worker) << ""Failed to get worker for "" << task; worker->GetStatusAsync( - req, resp, [this, device, task, req, resp, worker, done](Status s) { + req, resp, /*fail_fast=*/true, + [this, device, task, req, resp, worker, done](Status s) { if (s.ok()) { mutex_lock l(mu_); for (const DeviceAttributes& da : resp->device_attributes()) { ",0,train 69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever. This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true. PiperOrigin-RevId: 265705492",device_resolver_distributed_test.cc,"@@ -69,7 +69,7 @@ class FakeWorker : public TestWorkerInterface { : name_(name), device_mgr_(dev_mgr), device_resolver_(dres) {} void GetStatusAsync(const GetStatusRequest* request, - GetStatusResponse* response, + GetStatusResponse* response, bool fail_fast, StatusCallback done) override { std::vector dev_attr; device_mgr_->ListDeviceAttributes(&dev_attr); ",0,train 69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever. This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true. PiperOrigin-RevId: 265705492",remote_device.cc,"@@ -129,7 +129,7 @@ void NewRemoteDevices(Env* env, WorkerCacheInterface* worker_cache, } } }; - wi->GetStatusAsync(&call->req, &call->resp, cb); + wi->GetStatusAsync(&call->req, &call->resp, /*fail_fast=*/false, cb); } } // namespace tensorflow ",0,train 69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever. This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true. PiperOrigin-RevId: 265705492",grpc_eager_client.cc,"@@ -49,8 +49,7 @@ class GrpcEagerClient : public EagerClient { override { \ new RPCState( \ &stub_, cq_, ""/tensorflow.eager.EagerService/"" #method, *request, \ - response, std::move(done), nullptr, nullptr, /*max_retries=*/10, \ - /*fail_fast=*/true); \ + response, std::move(done), nullptr, nullptr, /*max_retries=*/0); \ } CLIENT_METHOD(CreateContext); ",0,train 69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever. This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true. PiperOrigin-RevId: 265705492",grpc_remote_worker.cc,"@@ -72,9 +72,10 @@ class GrpcRemoteWorker : public WorkerInterface { ~GrpcRemoteWorker() override {} void GetStatusAsync(const GetStatusRequest* request, - GetStatusResponse* response, + GetStatusResponse* response, bool fail_fast, StatusCallback done) override { - IssueRequest(request, response, getstatus_, std::move(done)); + IssueRequest(request, response, getstatus_, std::move(done), nullptr, + fail_fast); } void CreateWorkerSessionAsync(const CreateWorkerSessionRequest* request, @@ -269,18 +270,18 @@ class GrpcRemoteWorker : public WorkerInterface { void IssueRequest(const protobuf::Message* request, protobuf::Message* response, const ::grpc::string& method, StatusCallback done, CallOptions* call_opts = nullptr, - int max_retries = kMaxWorkerRpcRetries) { - new RPCState(&stub_, cq_, method, *request, response, - std::move(done), call_opts, - callback_threadpool_, max_retries); + bool fail_fast = true) { + new RPCState( + &stub_, cq_, method, *request, response, std::move(done), call_opts, + callback_threadpool_, /*max_retries=*/0, fail_fast); } + void IssueRequest(const protobuf::Message* request, TensorResponse* response, const ::grpc::string& method, StatusCallback done, - CallOptions* call_opts = nullptr, - int max_retries = kMaxWorkerRpcRetries) { + CallOptions* call_opts = nullptr) { new RPCState(&stub_, cq_, method, *request, response, std::move(done), call_opts, - callback_threadpool_, max_retries); + callback_threadpool_); } void IssueMarkRecvFinishedRequest(int64 request_id) { ",0,train 69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever. This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true. PiperOrigin-RevId: 265705492",grpc_state.h,"@@ -36,17 +36,15 @@ namespace tensorflow { // Object allocated per active RPC. // Manage the state of a single asynchronous RPC request. If `max_retries` -// is greater than 0, the request will be retried for any transient failures -// as long as the overall deadline has not elapsed. +// is greater than 0, the request will be retried for any transient failures. template class RPCState : public GrpcClientCQTag { public: - // Default behavior is to set fail_fast = False and handle timeouts manually. RPCState(::grpc::GenericStub* stub, ::grpc::CompletionQueue* cq, const ::grpc::string& method, const protobuf::Message& request, Response* response, StatusCallback done, CallOptions* call_opts, thread::ThreadPool* threadpool, int32 max_retries = 0, - bool fail_fast = false) + bool fail_fast = true) : RPCState(stub, cq, method, request, response, std::move(done), call_opts, threadpool, fail_fast, /*timeout_in_ms=*/0, max_retries) {} @@ -133,6 +131,7 @@ class RPCState : public GrpcClientCQTag { response_buf_.Clear(); VLOG(1) << ""Retrying call for "" << method_ << ""Retry: "" << num_retries_ << "" of "" << max_retries_; + // TODO(b/139945426) Allow user to configure the retry backoff time. StartCall(); } else { // Attach additional GRPC error information if any to the final status ",0,train 69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever. This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true. PiperOrigin-RevId: 265705492",test_utils.h,"@@ -31,7 +31,7 @@ namespace tensorflow { class TestWorkerInterface : public WorkerInterface { public: void GetStatusAsync(const GetStatusRequest* request, - GetStatusResponse* response, + GetStatusResponse* response, bool fail_fast, StatusCallback done) override { done(errors::Unimplemented(""GetStatusAsync"")); } ",0,train 69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever. This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true. PiperOrigin-RevId: 265705492",worker.cc,"@@ -36,7 +36,8 @@ Worker::Worker(WorkerEnv* env) : env_(env), recent_request_ids_(100000) { } void Worker::GetStatusAsync(const GetStatusRequest* request, - GetStatusResponse* response, StatusCallback done) { + GetStatusResponse* response, bool fail_fast, + StatusCallback done) { DeviceMgr* dm = env_->device_mgr; std::vector devices; dm->ListDeviceAttributes(&devices); ",0,train 69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever. This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true. PiperOrigin-RevId: 265705492",worker.h,"@@ -46,7 +46,7 @@ class Worker : public WorkerInterface { virtual ~Worker() {} void GetStatusAsync(const GetStatusRequest* request, - GetStatusResponse* response, + GetStatusResponse* response, bool fail_fast, StatusCallback done) override; void CreateWorkerSessionAsync(const CreateWorkerSessionRequest* request, ",0,train 69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever. This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true. PiperOrigin-RevId: 265705492",worker_interface.h,"@@ -37,7 +37,7 @@ class TensorResponse; class WorkerInterface { public: virtual void GetStatusAsync(const GetStatusRequest* request, - GetStatusResponse* response, + GetStatusResponse* response, bool fail_fast, StatusCallback done) = 0; virtual void CreateWorkerSessionAsync( @@ -131,7 +131,15 @@ class WorkerInterface { Status GetStatus(const GetStatusRequest* request, GetStatusResponse* response) { - return CallAndWait(&ME::GetStatusAsync, request, response); + Status ret; + Notification n; + GetStatusAsync(request, response, /*fail_fast=*/true, + [&ret, &n](const Status& s) { + ret = s; + n.Notify(); + }); + n.WaitForNotification(); + return ret; } Status CreateWorkerSession(const CreateWorkerSessionRequest* request, ",0,train 20db88eec824259764b2eafba377f93ea11776b0,tensorflow/tensorflow,"Ignore nodes that are going to be swapped when computing max memory usage PiperOrigin-RevId: 181248577",graph_memory.cc,"@@ -16,6 +16,7 @@ limitations under the License. #include ""tensorflow/core/grappler/costs/graph_memory.h"" #include #include ""tensorflow/core/framework/allocation_description.pb.h"" +#include ""tensorflow/core/framework/attr_value.pb.h"" #include ""tensorflow/core/framework/node_def.pb.h"" #include ""tensorflow/core/framework/step_stats.pb.h"" #include ""tensorflow/core/framework/tensor_description.pb.h"" @@ -163,6 +164,8 @@ void GraphMemory::InferFromTrace(const StepStats& timeline) { NodeMap node_map(&item_.graph); for (const auto& dev_stats : timeline.dev_stats()) { + const string& device_name = dev_stats.device(); + const bool is_gpu = (device_name.find(""GPU:"") || device_name.find(""gpu:"")); std::list& device_tensors = live_tensors_per_device[dev_stats.device()]; for (const auto& node_stats : dev_stats.node_stats()) { @@ -194,7 +197,24 @@ void GraphMemory::InferFromTrace(const StepStats& timeline) { // graph (e.g _Send/_Recv nodes). continue; } - for (const string& input : node->input()) { + std::unordered_set swapped_inputs; + if (is_gpu) { + auto it = node->attr().find(""_swap_to_host""); + if (it != node->attr().end()) { + const AttrValue& val = it->second; + for (int port_id : val.list().i()) { + swapped_inputs.insert(port_id); + } + } + } + for (int i = 0; i < node->input_size(); ++i) { + if (swapped_inputs.find(i) != swapped_inputs.end()) { + // The memory of swapped inputs will be released as early as possible: + // therefore ignore this input when determining the deallocation time + // of the tensor. + continue; + } + const string& input = node->input(i); int position; string input_node = ParseNodeName(input, &position); if (position < 0) { ",0,test 20db88eec824259764b2eafba377f93ea11776b0,tensorflow/tensorflow,"Ignore nodes that are going to be swapped when computing max memory usage PiperOrigin-RevId: 181248577",graph_memory_test.cc,"@@ -134,6 +134,62 @@ TEST_F(GraphMemoryTest, MultiDevice) { EXPECT_EQ(gpu_expected, gpu_tensors); } +TEST_F(GraphMemoryTest, GpuSwapping) { + TrivialTestGraphInputYielder fake_input(4, 2, 1024 * 1024, false, {""/GPU:0""}); + GrapplerItem item; + CHECK(fake_input.NextItem(&item)); + item.feed.clear(); + + { + // Estimate the max memory usage for the graph. + GraphMemory memory(item); + Status s = memory.InferStatically(devices_); + TF_CHECK_OK(s); + + const GraphMemory::MemoryUsage& gpu_mem = + memory.GetPeakMemoryUsage(""/GPU:0""); + EXPECT_EQ(20971520, gpu_mem.used_memory); + std::set gpu_tensors; + for (const auto& t : gpu_mem.live_tensors) { + gpu_tensors.insert(strings::StrCat(t.node, "":"", t.output_id)); + } + std::set gpu_expected; + gpu_expected.insert(""Square:0""); + gpu_expected.insert(""Square_1:0""); + gpu_expected.insert(""AddN:0""); + gpu_expected.insert(""AddN_1:0""); + gpu_expected.insert(""AddN_2:0""); + EXPECT_EQ(gpu_expected, gpu_tensors); + } + + { + // Swap the first input to node AddN_1: its fanin (the square nodes) should + // not appear in the max cut anymore. + for (auto& node : *item.graph.mutable_node()) { + if (node.name() == ""AddN_1"") { + (*node.mutable_attr())[""_swap_to_host""].mutable_list()->add_i(0); + } + } + GraphMemory memory(item); + Status s = memory.InferStatically(devices_); + TF_CHECK_OK(s); + const GraphMemory::MemoryUsage& new_gpu_mem = + memory.GetPeakMemoryUsage(""/GPU:0""); + EXPECT_EQ(20971520, new_gpu_mem.used_memory); + std::set new_gpu_tensors; + for (const auto& t : new_gpu_mem.live_tensors) { + new_gpu_tensors.insert(strings::StrCat(t.node, "":"", t.output_id)); + } + std::set new_gpu_expected; + new_gpu_expected.insert(""AddN:0""); + new_gpu_expected.insert(""AddN_1:0""); + new_gpu_expected.insert(""AddN_2:0""); + new_gpu_expected.insert(""AddN_3:0""); + new_gpu_expected.insert(""AddN_4:0""); + EXPECT_EQ(new_gpu_expected, new_gpu_tensors); + } +} + TEST_F(GraphMemoryTest, CtrlDependencies) { // Build a simple graph with a control dependency. Scope s = Scope::NewRootScope(); ",0,test 20db88eec824259764b2eafba377f93ea11776b0,tensorflow/tensorflow,"Ignore nodes that are going to be swapped when computing max memory usage PiperOrigin-RevId: 181248577",trivial_test_graph_input_yielder.cc,"@@ -31,8 +31,6 @@ namespace { GraphDef CreateGraphDef(int num_stages, int width, int tensor_size, bool use_multiple_devices, bool insert_queue, const std::vector& device_names) { - CHECK_GE(device_names.size(), width); - using namespace ::tensorflow::ops; // NOLINT(build/namespaces) tensorflow::Scope s = tensorflow::Scope::NewRootScope(); @@ -49,13 +47,17 @@ GraphDef CreateGraphDef(int num_stages, int width, int tensor_size, std::vector this_stage; for (int j = 0; j < width; j++) { if (last_stage.size() == 1) { - Output unary_op = - Square(s.WithDevice(device_names[use_multiple_devices ? j : 0]), - last_stage[0]); + Output unary_op = Square( + s.WithDevice( + device_names[use_multiple_devices ? j % device_names.size() + : 0]), + last_stage[0]); this_stage.push_back(unary_op); } else { Output combine = - AddN(s.WithDevice(device_names[use_multiple_devices ? j : 0]), + AddN(s.WithDevice( + device_names[use_multiple_devices ? j % device_names.size() + : 0]), last_stage); this_stage.push_back(combine); } ",0,test 49b666dbbd58958a7499fa3961c1c8c75757ad7c,tensorflow/tensorflow,"Bring in `isbuiltin`. PiperOrigin-RevId: 187049824",tf_inspect.py,"@@ -149,6 +149,11 @@ def getsource(object): # pylint: disable=redefined-builtin return _inspect.getsource(tf_decorator.unwrap(object)[1]) +def isbuiltin(object): # pylint: disable=redefined-builtin + """"""TFDecorator-aware replacement for inspect.isbuiltin."""""" + return _inspect.isbuiltin(tf_decorator.unwrap(object)[1]) + + def isclass(object): # pylint: disable=redefined-builtin """"""TFDecorator-aware replacement for inspect.isclass."""""" return _inspect.isclass(tf_decorator.unwrap(object)[1]) ",0,train 49b666dbbd58958a7499fa3961c1c8c75757ad7c,tensorflow/tensorflow,"Bring in `isbuiltin`. PiperOrigin-RevId: 187049824",tf_inspect_test.py,"@@ -144,6 +144,19 @@ def test_decorated_function_with_defaults(a, b=2, c='Hello'): self.assertEqual( expected, tf_inspect.getsource(test_decorated_function_with_defaults)) + def testIsBuiltin(self): + self.assertEqual( + tf_inspect.isbuiltin(TestDecoratedClass), + inspect.isbuiltin(TestDecoratedClass)) + self.assertEqual( + tf_inspect.isbuiltin(test_decorated_function), + inspect.isbuiltin(test_decorated_function)) + self.assertEqual( + tf_inspect.isbuiltin(test_undecorated_function), + inspect.isbuiltin(test_undecorated_function)) + self.assertEqual(tf_inspect.isbuiltin(range), inspect.isbuiltin(range)) + self.assertEqual(tf_inspect.isbuiltin(max), inspect.isbuiltin(max)) + def testIsClass(self): self.assertTrue(tf_inspect.isclass(TestDecoratedClass)) self.assertFalse(tf_inspect.isclass(test_decorated_function)) ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",c_api_internal.c,"@@ -172,6 +172,8 @@ const char* TfLiteTypeGetName(TfLiteType type) { return ""COMPLEX64""; case kTfLiteString: return ""STRING""; + case kTfLiteFloat16: + return ""FLOAT16""; } return ""Unknown type""; } ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",c_api_internal.h,"@@ -195,6 +195,11 @@ typedef struct { float re, im; // real and imaginary parts, respectively. } TfLiteComplex64; +// Half precision data type compatible with the C99 definition. +typedef struct { + uint16_t data; +} TfLiteFloat16; + // Types supported by tensor typedef enum { kTfLiteNoType = 0, @@ -207,6 +212,7 @@ typedef enum { kTfLiteInt16 = 7, kTfLiteComplex64 = 8, kTfLiteInt8 = 9, + kTfLiteFloat16 = 10, } TfLiteType; // Return the name of a given type, for error reporting purposes. @@ -259,6 +265,8 @@ typedef union { int32_t* i32; int64_t* i64; float* f; + // Placeholder for 16b float type. Use uint16* in the pointer union for now. + TfLiteFloat16* f16; char* raw; const char* raw_const; uint8_t* uint8; ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",c_api_internal_test.cc,"@@ -78,6 +78,7 @@ TEST(Types, TestTypeNames) { }; EXPECT_EQ(type_name(kTfLiteNoType), ""NOTYPE""); EXPECT_EQ(type_name(kTfLiteFloat32), ""FLOAT32""); + EXPECT_EQ(type_name(kTfLiteFloat16), ""FLOAT16""); EXPECT_EQ(type_name(kTfLiteInt16), ""INT16""); EXPECT_EQ(type_name(kTfLiteInt32), ""INT32""); EXPECT_EQ(type_name(kTfLiteUInt8), ""UINT8""); ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",flatbuffer_conversions.cc,"@@ -61,9 +61,8 @@ TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type, *type = kTfLiteFloat32; break; case TensorType_FLOAT16: - error_reporter->Report(""Unimplemented data type float16 in tensor\n"", - tensor_type); - return kTfLiteError; + *type = kTfLiteFloat16; + break; case TensorType_INT16: *type = kTfLiteInt16; break; ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",flatbuffer_conversions_test.cc,"@@ -141,6 +141,13 @@ TEST_F(FlatbufferConversionsTest, TestConvertTensorType) { EXPECT_EQ(kTfLiteFloat32, type); } +TEST_F(FlatbufferConversionsTest, TestConvertTensorTypeFloat16) { + TfLiteType type; + EXPECT_EQ(kTfLiteOk, + ConvertTensorType(TensorType_FLOAT16, &type, &mock_reporter_)); + EXPECT_EQ(kTfLiteFloat16, type); +} + } // namespace tflite int main(int argc, char** argv) { ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",subgraph.cc,"@@ -469,6 +469,9 @@ TfLiteStatus Subgraph::BytesRequired(TfLiteType type, const int* dims, case kTfLiteInt8: *bytes = sizeof(int8_t) * count; break; + case kTfLiteFloat16: + *bytes = sizeof(TfLiteFloat16) * count; + break; default: ReportError( ""Only float32, int8, int16, int32, int64, uint8, bool, complex64 "" ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",util.cc,"@@ -60,6 +60,8 @@ TF_DataType GetTensorFlowDataType(TfLiteType type) { return TF_FLOAT; case kTfLiteFloat32: return TF_FLOAT; + case kTfLiteFloat16: + return TF_HALF; case kTfLiteInt16: return TF_INT16; case kTfLiteInt32: @@ -83,6 +85,8 @@ TfLiteType GetTensorFlowLiteType(TF_DataType type) { switch (type) { case TF_FLOAT: return kTfLiteFloat32; + case TF_HALF: + return kTfLiteFloat16; case TF_INT16: return kTfLiteInt16; case TF_INT32: ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",util_test.cc,"@@ -101,9 +101,9 @@ TEST(UtilTest, CopyShapeAndType) { EXPECT_EQ( CopyShapeAndType(&context, Tensor(tensorflow::DT_HALF, {1, 2}), &dst), - kTfLiteError); - EXPECT_EQ(context.error, - ""TF Lite does not support TensorFlow data type: half""); + kTfLiteOk); + EXPECT_THAT(context.new_size, ElementsAre(1, 2)); + EXPECT_EQ(dst.type, kTfLiteFloat16); } TEST(UtilTest, TypeConversionsFromTFLite) { ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",TFLTensor.h,"@@ -29,6 +29,9 @@ typedef NS_ENUM(NSUInteger, TFLTensorDataType) { /** 32-bit single precision floating point. */ TFLTensorDataTypeFloat32, + /** 16-bit half precision floating point. */ + TFLTensorDataTypeFloat16, + /** 32-bit signed integer. */ TFLTensorDataTypeInt32, ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",enum_mapping.h,"@@ -62,6 +62,8 @@ inline TensorType TfLiteTypeToSchemaType(TfLiteType type) { return TensorType_FLOAT32; // TODO(aselle): Consider an error. case kTfLiteFloat32: return TensorType_FLOAT32; + case kTfLiteFloat16: + return TensorType_FLOAT16; case kTfLiteInt32: return TensorType_INT32; case kTfLiteUInt8: ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",interpreter.cc,"@@ -30,6 +30,11 @@ limitations under the License. #include ""tensorflow/lite/schema/schema_generated.h"" #include ""tensorflow/lite/util.h"" +// TODO(b/132087118): move static_assert to c_api_internal when compiled with +// C++. +static_assert(sizeof(TfLiteFloat16) == sizeof(uint16_t), + ""Float 16 type must be 16 bits.""); + namespace tflite { namespace { ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",interpreter.h,"@@ -74,6 +74,10 @@ constexpr TfLiteType typeToTfLiteType() { return kTfLiteString; } +template <> +constexpr TfLiteType typeToTfLiteType() { + return kTfLiteFloat16; +} // An interpreter for a graph of nodes that input and output from tensors. // Each node of the graph processes a set of input tensors and produces a // set of output Tensors. All inputs/output tensors are referenced by index. ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",interpreter_test.cc,"@@ -17,6 +17,7 @@ limitations under the License. #include #include +#include ""third_party/eigen3/Eigen/Core"" #include ""tensorflow/lite/core/api/error_reporter.h"" #include ""tensorflow/lite/kernels/internal/compatibility.h"" #include ""tensorflow/lite/kernels/kernel_util.h"" @@ -165,7 +166,7 @@ TEST(BasicInterpreter, CheckAllocate) { } cases[] = { {kTfLiteFloat32, sizeof(float)}, {kTfLiteInt32, sizeof(int32_t)}, {kTfLiteUInt8, sizeof(uint8_t)}, {kTfLiteInt64, sizeof(int64_t)}, - {kTfLiteInt16, sizeof(int16_t)}, + {kTfLiteInt16, sizeof(int16_t)}, {kTfLiteFloat16, sizeof(TfLiteFloat16)}, }; for (auto test : cases) { @@ -238,6 +239,8 @@ TEST(BasicInterpreter, CheckResize) { const uint8_t uint8s[] = {3, 4}; const int64_t int64s[] = {6, -7}; const int16_t int16s[] = {8, -9}; + const Eigen::half float16s[] = {Eigen::half_impl::float_to_half_rtne(-3.f), + Eigen::half_impl::float_to_half_rtne(-4.f)}; struct { TfLiteType type; @@ -249,6 +252,8 @@ TEST(BasicInterpreter, CheckResize) { {kTfLiteUInt8, sizeof(uint8_t), reinterpret_cast(uint8s)}, {kTfLiteInt64, sizeof(int64_t), reinterpret_cast(int64s)}, {kTfLiteInt16, sizeof(int16_t), reinterpret_cast(int16s)}, + {kTfLiteFloat16, sizeof(TfLiteFloat16), + reinterpret_cast(float16s)}, }; for (auto test : cases) { @@ -283,10 +288,8 @@ TEST(BasicInterpreter, CheckResize) { TEST(BasicInterpreter, CheckAlignment) { struct { TfLiteType type; - } cases[] = { - {kTfLiteFloat32}, {kTfLiteInt32}, {kTfLiteUInt8}, - {kTfLiteInt64}, {kTfLiteInt16}, - }; + } cases[] = {{kTfLiteFloat32}, {kTfLiteInt32}, {kTfLiteUInt8}, + {kTfLiteInt64}, {kTfLiteInt16}, {kTfLiteFloat16}}; for (auto test : cases) { Interpreter interpreter; ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",tensor_ctypes.h,"@@ -66,6 +66,11 @@ inline const float* GetTensorData(const TfLiteTensor* tensor) { return tensor != nullptr ? tensor->data.f : nullptr; } +template <> +inline const TfLiteFloat16* GetTensorData(const TfLiteTensor* tensor) { + return tensor != nullptr ? tensor->data.f16 : nullptr; +} + template <> inline const uint8_t* GetTensorData(const TfLiteTensor* tensor) { return tensor != nullptr ? tensor->data.uint8 : nullptr; ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",test_util.h,"@@ -20,7 +20,6 @@ limitations under the License. #include #include - #include ""tensorflow/core/platform/logging.h"" #include ""tensorflow/lite/interpreter.h"" #include ""tensorflow/lite/kernels/internal/tensor_utils.h"" @@ -568,6 +567,7 @@ class SingleOpTest : public ::testing::TestWithParam { template TensorType GetTensorType() { if (std::is_same::value) return TensorType_FLOAT32; + if (std::is_same::value) return TensorType_FLOAT16; if (std::is_same::value) return TensorType_INT32; if (std::is_same::value) return TensorType_UINT8; if (std::is_same::value) return TensorType_STRING; ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",optional_debug_tools.cc,"@@ -56,6 +56,8 @@ const char* TensorTypeName(TfLiteType type) { return ""kTfLiteInt16""; case kTfLiteComplex64: return ""kTfLiteComplex64""; + case kTfLiteFloat16: + return ""kTfLiteFloat16""; } return ""(invalid)""; } ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",python_utils.cc,"@@ -32,6 +32,8 @@ int TfLiteTypeToPyArrayType(TfLiteType tf_lite_type) { switch (tf_lite_type) { case kTfLiteFloat32: return NPY_FLOAT32; + case kTfLiteFloat16: + return NPY_FLOAT16; case kTfLiteInt32: return NPY_INT32; case kTfLiteInt16: ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",calibration_wrapper.cc,"@@ -61,6 +61,8 @@ inline TensorType TfLiteTypeToSchemaType(TfLiteType type) { return TensorType_FLOAT32; // TODO(b/129336260): No schema type for none. case kTfLiteFloat32: return TensorType_FLOAT32; + case kTfLiteFloat16: + return TensorType_FLOAT16; case kTfLiteInt32: return TensorType_INT32; case kTfLiteUInt8: ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",util.py,"@@ -31,6 +31,7 @@ from tensorflow.python.training.saver import export_meta_graph as _export_meta_g # Map of tf.dtypes to TFLite types_flag_pb2. _MAP_TF_TO_TFLITE_TYPES = { dtypes.float32: _types_pb2.FLOAT, + dtypes.float16: _types_pb2.FLOAT16, dtypes.int32: _types_pb2.INT32, dtypes.int64: _types_pb2.INT64, dtypes.string: _types_pb2.STRING, ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",util_test.py,"@@ -50,6 +50,8 @@ class UtilTest(test_util.TensorFlowTestCase): self.assertEqual( util.convert_dtype_to_tflite_type(dtypes.complex64), _types_pb2.COMPLEX64) + self.assertEqual( + util.convert_dtype_to_tflite_type(dtypes.half), _types_pb2.FLOAT16) with self.assertRaises(ValueError): util.convert_dtype_to_tflite_type(dtypes.bool) ",0,train c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite PiperOrigin-RevId: 247082214",model.h,"@@ -223,6 +223,7 @@ enum class ArrayDataType : uint8 { kUint64, // 10 kString, kComplex64, + kFloat16, }; // Compile-time logic to map ArrayDataType to the corresponding C++ scalar type ",0,train e74a115bcf5cd27f476b46161a639e9ec599491d,tensorflow/tensorflow,"[TF-numpy] Adds __rmatmul__ method to ndarray. PiperOrigin-RevId: 317771125 Change-Id: I719c46d97ae1c68ac59dcd1cf8f65d067ddc7658",np_math_ops.py,"@@ -950,11 +950,12 @@ setattr(np_arrays.ndarray, '__sub__', _wrap(subtract)) setattr(np_arrays.ndarray, '__rsub__', _wrap(subtract, True)) setattr(np_arrays.ndarray, '__mul__', _wrap(multiply)) setattr(np_arrays.ndarray, '__rmul__', _wrap(multiply, True)) +setattr(np_arrays.ndarray, '__matmul__', _wrap(matmul)) +setattr(np_arrays.ndarray, '__rmatmul__', _wrap(matmul, True)) setattr(np_arrays.ndarray, '__pow__', _wrap(power)) setattr(np_arrays.ndarray, '__rpow__', _wrap(power, True)) setattr(np_arrays.ndarray, '__truediv__', _wrap(true_divide)) setattr(np_arrays.ndarray, '__rtruediv__', _wrap(true_divide, True)) -setattr(np_arrays.ndarray, '__matmul__', _wrap(matmul)) def _comparison(tf_fun, x1, x2, cast_bool_to_int=False): ",0,train 9fda694598631b6207f64fa5c39e6f76dca313a9,tensorflow/tensorflow,"Release the GIL when waiting for pending async ops Prevents deadlocks with PyFunc ops PiperOrigin-RevId: 361689648 Change-Id: I2e20ddae99e8ca5dad324ac9a13941cf93cf4c6f",core_test.py,"@@ -598,6 +598,12 @@ class TFETest(test_util.TensorFlowTestCase): self.assertAllEqual(test_fn(test_var), 3.0) async_executor.wait() + with context.executor_scope(async_executor): + test_var = variables.Variable(2.) + result = test_fn(test_var) + context.async_wait() + self.assertAllEqual(result, 3.0) + @test_util.run_gpu_only def testNumpyForceCPU(self): cpu = constant_op.constant([[1., 2.], [3., 4.]]) ",0,train 9fda694598631b6207f64fa5c39e6f76dca313a9,tensorflow/tensorflow,"Release the GIL when waiting for pending async ops Prevents deadlocks with PyFunc ops PiperOrigin-RevId: 361689648 Change-Id: I2e20ddae99e8ca5dad324ac9a13941cf93cf4c6f",tfe_wrapper.cc,"@@ -753,13 +753,19 @@ PYBIND11_MODULE(_pywrap_tfe, m) { m.def(""TFE_ContextSyncExecutors"", [](py::handle& ctx) { tensorflow::Safe_TF_StatusPtr status = tensorflow::make_safe(TF_NewStatus()); + // NOTE: release Python GIL for pending PyFunc ops to be executed properly. + Py_BEGIN_ALLOW_THREADS; TFE_ContextAsyncWait(tensorflow::InputTFE_Context(ctx), status.get()); + Py_END_ALLOW_THREADS; tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); }); m.def(""TFE_ContextClearExecutors"", [](py::handle& ctx) { tensorflow::Safe_TF_StatusPtr status = tensorflow::make_safe(TF_NewStatus()); + // NOTE: release Python GIL for pending PyFunc ops to be executed properly. + Py_BEGIN_ALLOW_THREADS; TFE_ContextAsyncWait(tensorflow::InputTFE_Context(ctx), status.get()); + Py_END_ALLOW_THREADS; // NOTE: different from TFE_ContextSyncExecutors that raises potential // errors, deliberately ignore executor statuses in cleanup. }); ",0,train bd464260b8eab307a1672fe16d5c26bc0d681ad5,tensorflow/tensorflow,Format comments,filesystem_interface.h,"@@ -836,27 +836,38 @@ typedef struct TF_FilesystemOps { /// `num_options`. Ownership of the array is transferred to caller and the /// caller is responsible of freeing the buffers using respective file systems /// allocation API. + /// Plugins: + /// * Must set `status` to `TF_OK` if `options` and `num_options` set. + /// If there is no configurable option, `num_options` should be 0. + /// * Might use any other error value for `status` to signal other errors. + /// DEFAULT IMPLEMENTATION: return 0 options and `TF_OK`. void (*get_filesystem_configuration)(const TF_Filesystem* filesystem, TF_Filesystem_Option** options, int* num_options, TF_Status* status); /// Updates filesystem configuration with options passed in `options`. It can /// contain full set of options supported by the filesystem or just a subset - /// of them. Ownership of options and buffers therein belongs to the caller and - /// any buffers need to be allocated through filesystem allocation API. On - /// success should return TF_OK in `status`. On failure, it should return a relevant error - /// code. Filesystems may choose to ignore configuration errors but should at - /// least display a warning or error message to warn the users. + /// of them. Ownership of options and buffers therein belongs to the caller + /// and any buffers need to be allocated through filesystem allocation API. + /// Filesystems may choose to ignore configuration errors but should at least + /// display a warning or error message to warn the users. Plugins: + /// * Must set `status` to `TF_OK` if options are updated. + /// * Might use any other error value for `status` to signal other errors. + /// DEFAULT IMPLEMENTATION: return `TF_NOT_FOUND`. void (*set_filesystem_configuration)(const TF_Filesystem* filesystem, const TF_Filesystem_Option** options, int num_options, TF_Status* status); /// Returns the value of the filesystem option given in `key` in `option`. /// Valid values of the `key` are returned by - /// `get_file_system_configuration_keys` call. This method should return TF_OK - /// on success, TF_NOT_FOUND if the key does not exist. Ownership of the + /// `get_file_system_configuration_keys` call. Ownership of the /// `option` is transferred to caller. Buffers therein should be allocated and /// freed by the relevant filesystems allocation API. + /// Plugins: + /// * Must set `status` to `TF_OK` if `option` is set + /// * Must set `status` to `TF_NOT_FOUND` if the key is invalid + /// * Might use any other error value for `status` to signal other errors. + /// DEFAULT IMPLEMENTATION: return `TF_NOT_FOUND`. void (*get_filesystem_configuration_option)(const TF_Filesystem* filesystem, const char* key, TF_Filesystem_Option** option, @@ -864,10 +875,13 @@ typedef struct TF_FilesystemOps { /// Sets the value of the filesystem option given in `key` to value in /// `option`. Valid values of the `key` are returned by - /// `get_file_system_configuration_keys` call. This method should return TF_OK - /// on success, TF_NOT_FOUND if the key does not exist or other relevant error - /// codes. `option` and the `key` are owned by the caller. Buffers therein - /// should be allocated and freed by the filesystems allocation API. + /// `get_file_system_configuration_keys` call. Ownership of the `option` and + /// the `key` belogs to the caller. Buffers therein should be allocated and + /// freed by the filesystems allocation API. Plugins: + /// * Must set `status` to `TF_OK` if `option` is set/updated + /// * Must set `status` to `TF_NOT_FOUND` if the key is invalid + /// * Might use any other error value for `status` to signal other errors. + /// DEFAULT IMPLEMENTATION: return `TF_NOT_FOUND`. void (*set_filesystem_configuration_option)( const TF_Filesystem* filesystem, const TF_Filesystem_Option* option, TF_Status* status); @@ -875,6 +889,11 @@ typedef struct TF_FilesystemOps { /// Returns a list of valid configuration keys in `keys` array and number of /// keys in `num_keys`. Ownership of the buffers in `keys` are transferred to /// caller and needs to be freed using relevant filesystem allocation API. + /// Plugins: + /// * Must set `status` to `TF_OK` on success. If there are no configurable + /// keys, `num_keys` should be set to 0 + /// * Might use any other error value for `status` to signal other errors. + /// DEFAULT IMPLEMENTATION: return `TF_OK` and `num_keys`=0. void (*get_filesystem_configuration_keys)(const TF_Filesystem* filesystem, char** keys, int* num_keys, TF_Status* status); ",0,train 2773c6370d9afb529f4aba4fe852a6ad38823da4,tensorflow/tensorflow,"Internal change PiperOrigin-RevId: 252093638",subgraph.cc,"@@ -288,11 +288,6 @@ TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels( return kTfLiteOk; } - TFLITE_LOG(tflite::TFLITE_LOG_INFO, - ""Replacing %d node(s) with delegate (%s) node."", - nodes_to_replace->size, - registration.custom_name ? registration.custom_name : ""unknown""); - // Annotate the registration as DELEGATE op. registration.builtin_code = BuiltinOperator_DELEGATE; @@ -303,6 +298,13 @@ TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels( PartitionGraphIntoIndependentNodeSubsets(&info, nodes_to_replace, &node_subsets); + TFLITE_LOG( + tflite::TFLITE_LOG_INFO, + ""Replacing %d node(s) with delegate (%s) node, yielding %zu partitions."", + nodes_to_replace->size, + registration.custom_name ? registration.custom_name : ""unknown"", + node_subsets.size()); + execution_plan_.clear(); for (auto& node_subset : node_subsets) { ",0,train ebd59a6298faea4a590e4eafedd06c91fbe3995e,tensorflow/tensorflow,add & for runner,dataset_test_base.cc,"@@ -333,7 +333,7 @@ Status DatasetOpsTestBase::InitFunctionLibraryRuntime( nullptr /* cluster_flr */); flr_ = pflr_->GetFLR(""/job:localhost/replica:0/task:0/cpu:0""); if (thread_pool_ == nullptr) { - runner_ = [](const std::function fn) { fn(); }; + runner_ = [](const std::function& fn) { fn(); }; } else { runner_ = [this](std::function fn) { thread_pool_->Schedule(std::move(fn)); ",0,test ebd59a6298faea4a590e4eafedd06c91fbe3995e,tensorflow/tensorflow,add & for runner,single_threaded_executor_test.cc,"@@ -68,7 +68,7 @@ class ExecutorTest : public ::testing::Test { }; delete exec_; TF_CHECK_OK(NewSingleThreadedExecutor(params, *graph, &exec_)); - runner_ = [](const std::function fn) { fn(); }; + runner_ = [](const std::function& fn) { fn(); }; rendez_ = NewLocalRendezvous(); } ",0,test 37eed82574c622bf91d72000b7b6ebdc92c9317c,tensorflow/tensorflow,"Removing unused options from batch_ops.batch_function(). PiperOrigin-RevId: 203489357",batch_ops.py,"@@ -58,8 +58,6 @@ def batch_function(num_batch_threads, max_batch_size, batch_timeout_micros, allowed_batch_sizes=None, - grad_timeout_micros=60 * 1000 * 1000, - unbatch_timeout_micros=60 * 1000 * 1000, max_enqueued_batches=10): """"""Batches the computation done by the decorated function. @@ -94,10 +92,6 @@ def batch_function(num_batch_threads, does nothing. Otherwise, supplies a list of batch sizes, causing the op to pad batches up to one of those sizes. The entries must increase monotonically, and the final entry must equal max_batch_size. - grad_timeout_micros: The timeout to use for the gradient. See the - documentation of the unbatch op for more details. Defaults to 60s. - unbatch_timeout_micros: The timeout to use for unbatching. See the - documentation of the unbatch op for more details. Defaults to 60s. max_enqueued_batches: The maximum depth of the batch queue. Defaults to 10. Returns: ",0,train 9d703eecbfca400f2a1d4786050e171a94696117,tensorflow/tensorflow,"Add `DistributionStrategy.experimental_run_v2`. PiperOrigin-RevId: 237071002",distribute_lib.py,"@@ -437,7 +437,7 @@ class DistributionStrategy(object): """"""Runs ops in `fn` on each replica, with inputs from `input_iterator`. When eager execution is enabled, executes ops specified by `fn` on each - replica. Otherwise, builds a graph to execute the ops on each replica. + replica. Otherwise, builds a graph to execute the ops on each replica. Each replica will take a single, different input from the inputs provided by one `get_next` call on the input iterator. @@ -445,13 +445,13 @@ class DistributionStrategy(object): `fn` may call `tf.distribute.get_replica_context()` to access members such as `replica_id_in_sync_group`. - IMPORTANT: Depending on the `DistributionStrategy` being used, and whether - eager execution is enabled, `fn` may be called one or more times (once for - each replica). + IMPORTANT: Depending on the `tf.distribute.Strategy` implementation being + used, and whether eager execution is enabled, `fn` may be called one or more + times (once for each replica). Args: - fn: function to run. The inputs to the function must match the outputs of - `input_iterator.get_next()`. The output must be a `tf.nest` of + fn: The function to run. The inputs to the function must match the outputs + of `input_iterator.get_next()`. The output must be a `tf.nest` of `Tensor`s. input_iterator: (Optional) input iterator from which the inputs are taken. @@ -463,11 +463,36 @@ class DistributionStrategy(object): single replica). """""" with self.scope(): - if input_iterator is None: - return self._extended.call_for_each_replica(fn) - else: - inputs = input_iterator.get_next() - return self._extended.call_for_each_replica(fn, args=(inputs,)) + args = (input_iterator.get_next(),) if input_iterator is not None else () + return self.experimental_run_v2(fn, args=args) + + def experimental_run_v2(self, fn, args=(), kwargs=None): + """"""Runs ops in `fn` on each replica, with the given arguments. + + When eager execution is enabled, executes ops specified by `fn` on each + replica. Otherwise, builds a graph to execute the ops on each replica. + + `fn` may call `tf.distribute.get_replica_context()` to access members such + as `replica_id_in_sync_group`. + + IMPORTANT: Depending on the `tf.distribute.Strategy` implementation being + used, and whether eager execution is enabled, `fn` may be called one or more + times (once for each replica). + + Args: + fn: The function to run. The output must be a `tf.nest` of `Tensor`s. + args: (Optional) Positional arguments to `fn`. + kwargs: (Optional) Keyword arguments to `fn`. + + Returns: + Merged return value of `fn` across replicas. The structure of the return + value is the same as the return value from `fn`. Each element in the + structure can either be `PerReplica` (if the values are unsynchronized), + `Mirrored` (if the values are kept in sync), or `Tensor` (if running on a + single replica). + """""" + with self.scope(): + return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs) def reduce(self, reduce_op, value): """"""Reduce `value` across replicas. ",0,test 9d703eecbfca400f2a1d4786050e171a94696117,tensorflow/tensorflow,"Add `DistributionStrategy.experimental_run_v2`. PiperOrigin-RevId: 237071002",tpu_strategy.py,"@@ -154,32 +154,28 @@ class TPUStrategy(distribute_lib.DistributionStrategy): # TODO(cjfj): Modify `_call_for_each_replica` in `TPUExtended` such that this # can use the default implementation. # This implementation runs a single step. It does not use infeed or outfeed. - def experimental_run(self, fn, input_iterator=None): + def experimental_run_v2(self, fn, args=(), kwargs=None): """"""See base class."""""" if context.executing_eagerly() and not ops.inside_function(): raise NotImplementedError( ""Eager mode not supported in TPUStrategy outside TF functions."") - if input_iterator is None: - inputs = [] - else: - inputs = input_iterator.get_next() + if kwargs is None: + kwargs = {} result = [None] - def replicated_fn(replica_id, replica_input): + def replicated_fn(replica_id, replica_args, replica_kwargs): """"""Wraps user function to provide replica ID and `Tensor` inputs."""""" with _TPUReplicaContext(self, replica_id_in_sync_group=replica_id): - if input_iterator is None: - result[0] = fn() - else: - result[0] = fn(replica_input) + result[0] = fn(*replica_args, **replica_kwargs) return result[0] replicate_inputs = [] # By replica. for i in range(self.num_replicas_in_sync): replicate_inputs.append( [constant_op.constant(i, dtype=dtypes.int32), - values.select_replica(i, inputs)]) + values.select_replica(i, args), + values.select_replica(i, kwargs)]) with self.scope(): replicate_outputs = tpu.replicate(replicated_fn, replicate_inputs) ",0,test 95fae75e4d15c59a43d8eaf150b8c32c7c6d1495,tensorflow/tensorflow,Fixing MklCPUAllocator error introduced by commit #1baac7862739525351d25202800dc04e8ec3868b,mkl_cpu_allocator.h,"@@ -25,7 +25,7 @@ limitations under the License. #include #include #include ""tensorflow/core/common_runtime/bfc_allocator.h"" -#include ""tensorflow/core/framework/allocator.h"" +#include ""tensorflow/core/common_runtime/visitable_allocator.h"" #include ""tensorflow/core/lib/strings/numbers.h"" #include ""tensorflow/core/lib/strings/str_util.h"" #include ""tensorflow/core/platform/mem.h"" @@ -161,7 +161,7 @@ class MklCPUAllocator : public VisitableAllocator { /// The alignment that we need for the allocations static const size_t kAlignment = 64; - Allocator* allocator_; // owned by this class + VisitableAllocator* allocator_; // owned by this class }; } // namespace tensorflow ",0,train 7047ceec37a3f004386621e8e56b825ab0d648a3,tensorflow/tensorflow,"Update sparse input documentation. PiperOrigin-RevId: 312789707 Change-Id: I09410e9adc25cfe6099cf1fd1a77edc3680a3a59",input_layer.py,"@@ -218,7 +218,9 @@ def Input( # pylint: disable=invalid-name dtype: The data type expected by the input, as a string (`float32`, `float64`, `int32`...) sparse: A boolean specifying whether the placeholder to be created is - sparse. Only one of 'ragged' and 'sparse' can be True. + sparse. Only one of 'ragged' and 'sparse' can be True. Note that, + if `sparse` is False, sparse tensors can still be passed into the + input - they will be densified with a default value of 0. tensor: Optional existing tensor to wrap into the `Input` layer. If set, the layer will not create a placeholder tensor. ragged: A boolean specifying whether the placeholder to be created is ",0,test b2e53b91019f9ab00fe133fe10b2d29bc7e5886c,tensorflow/tensorflow,"Making GetOptionalInput from kernel_util.h return a pointer to const data. PiperOrigin-RevId: 196932028",bidirectional_sequence_lstm.cc,"@@ -135,7 +135,7 @@ TfLiteStatus CheckLstmTensorDimensions( TF_LITE_ENSURE(context, params->cell_clip >= 0); TF_LITE_ENSURE(context, params->proj_clip >= 0); - TfLiteTensor* input_to_input_weights = + const TfLiteTensor* input_to_input_weights = GetOptionalInputTensor(context, node, input_to_input_weights_tensor); if (input_to_input_weights) { TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2); @@ -155,7 +155,7 @@ TfLiteStatus CheckLstmTensorDimensions( TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[0], n_cell); TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[1], n_input); - TfLiteTensor* recurrent_to_input_weights = + const TfLiteTensor* recurrent_to_input_weights = GetOptionalInputTensor(context, node, recurrent_to_input_weights_tensor); if (recurrent_to_input_weights) { TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->size, 2); @@ -189,21 +189,21 @@ TfLiteStatus CheckLstmTensorDimensions( (recurrent_to_input_weights == nullptr)); TF_LITE_ENSURE(context, cifg_weights_all_or_none == true); - TfLiteTensor* cell_to_input_weights = + const TfLiteTensor* cell_to_input_weights = GetOptionalInputTensor(context, node, cell_to_input_weights_tensor); if (cell_to_input_weights) { TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->size, 1); TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell); } - TfLiteTensor* cell_to_forget_weights = + const TfLiteTensor* cell_to_forget_weights = GetOptionalInputTensor(context, node, cell_to_forget_weights_tensor); if (cell_to_forget_weights) { TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->size, 1); TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell); } - TfLiteTensor* cell_to_output_weights = + const TfLiteTensor* cell_to_output_weights = GetOptionalInputTensor(context, node, cell_to_output_weights_tensor); if (cell_to_output_weights) { TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->size, 1); @@ -222,7 +222,7 @@ TfLiteStatus CheckLstmTensorDimensions( TF_LITE_ENSURE(context, peephole_weights_all_or_none == true); // Make sure the input gate bias is present only when not a CIFG-LSTM. - TfLiteTensor* input_gate_bias = + const TfLiteTensor* input_gate_bias = GetOptionalInputTensor(context, node, input_gate_bias_tensor); if (use_cifg) { TF_LITE_ENSURE_EQ(context, input_gate_bias, nullptr); @@ -246,7 +246,7 @@ TfLiteStatus CheckLstmTensorDimensions( TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->size, 1); TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->data[0], n_cell); - TfLiteTensor* projection_weights = + const TfLiteTensor* projection_weights = GetOptionalInputTensor(context, node, projection_weights_tensor); if (projection_weights) { TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2); @@ -254,7 +254,7 @@ TfLiteStatus CheckLstmTensorDimensions( TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell); } - TfLiteTensor* projection_bias = + const TfLiteTensor* projection_bias = GetOptionalInputTensor(context, node, projection_bias_tensor); if (projection_bias) { TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1); @@ -374,7 +374,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { fw_output_state->allocation_type = kTfLiteArenaRwPersistent; fw_cell_state->allocation_type = kTfLiteArenaRwPersistent; - TfLiteTensor* fw_input_to_input_weights = + const TfLiteTensor* fw_input_to_input_weights = GetOptionalInputTensor(context, node, kFwInputToInputWeightsTensor); const bool fw_use_cifg = (fw_input_to_input_weights == nullptr); TfLiteIntArray* fw_scratch_buffer_size = TfLiteIntArrayCreate(2); @@ -442,7 +442,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { bw_output_state->allocation_type = kTfLiteArenaRwPersistent; bw_cell_state->allocation_type = kTfLiteArenaRwPersistent; - TfLiteTensor* bw_input_to_input_weights = + const TfLiteTensor* bw_input_to_input_weights = GetOptionalInputTensor(context, node, kBwInputToInputWeightsTensor); const bool bw_use_cifg = (bw_input_to_input_weights == nullptr); TfLiteIntArray* bw_scratch_buffer_size = TfLiteIntArrayCreate(2); @@ -470,7 +470,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const int n_input = input->dims->data[2]; // Tensors for the forward cell. - TfLiteTensor* fw_input_to_input_weights = + const TfLiteTensor* fw_input_to_input_weights = GetOptionalInputTensor(context, node, kFwInputToInputWeightsTensor); const TfLiteTensor* fw_input_to_forget_weights = GetInput(context, node, kFwInputToForgetWeightsTensor); @@ -479,7 +479,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* fw_input_to_output_weights = GetInput(context, node, kFwInputToOutputWeightsTensor); - TfLiteTensor* fw_recurrent_to_input_weights = + const TfLiteTensor* fw_recurrent_to_input_weights = GetOptionalInputTensor(context, node, kFwRecurrentToInputWeightsTensor); const TfLiteTensor* fw_recurrent_to_forget_weights = GetInput(context, node, kFwRecurrentToForgetWeightsTensor); @@ -488,14 +488,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* fw_recurrent_to_output_weights = GetInput(context, node, kFwRecurrentToOutputWeightsTensor); - TfLiteTensor* fw_cell_to_input_weights = + const TfLiteTensor* fw_cell_to_input_weights = GetOptionalInputTensor(context, node, kFwCellToInputWeightsTensor); - TfLiteTensor* fw_cell_to_forget_weights = + const TfLiteTensor* fw_cell_to_forget_weights = GetOptionalInputTensor(context, node, kFwCellToForgetWeightsTensor); - TfLiteTensor* fw_cell_to_output_weights = + const TfLiteTensor* fw_cell_to_output_weights = GetOptionalInputTensor(context, node, kFwCellToOutputWeightsTensor); - TfLiteTensor* fw_input_gate_bias = + const TfLiteTensor* fw_input_gate_bias = GetOptionalInputTensor(context, node, kFwInputGateBiasTensor); const TfLiteTensor* fw_forget_gate_bias = GetInput(context, node, kFwForgetGateBiasTensor); @@ -504,9 +504,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* fw_output_gate_bias = GetInput(context, node, kFwOutputGateBiasTensor); - TfLiteTensor* fw_projection_weights = + const TfLiteTensor* fw_projection_weights = GetOptionalInputTensor(context, node, kFwProjectionWeightsTensor); - TfLiteTensor* fw_projection_bias = + const TfLiteTensor* fw_projection_bias = GetOptionalInputTensor(context, node, kFwProjectionBiasTensor); TfLiteTensor* fw_output_state = @@ -515,7 +515,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* fw_output = GetOutput(context, node, kFwOutputTensor); // Tensors for the backward cell. - TfLiteTensor* bw_input_to_input_weights = + const TfLiteTensor* bw_input_to_input_weights = GetOptionalInputTensor(context, node, kBwInputToInputWeightsTensor); const TfLiteTensor* bw_input_to_forget_weights = GetInput(context, node, kBwInputToForgetWeightsTensor); @@ -524,7 +524,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* bw_input_to_output_weights = GetInput(context, node, kBwInputToOutputWeightsTensor); - TfLiteTensor* bw_recurrent_to_input_weights = + const TfLiteTensor* bw_recurrent_to_input_weights = GetOptionalInputTensor(context, node, kBwRecurrentToInputWeightsTensor); const TfLiteTensor* bw_recurrent_to_forget_weights = GetInput(context, node, kBwRecurrentToForgetWeightsTensor); @@ -533,14 +533,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* bw_recurrent_to_output_weights = GetInput(context, node, kBwRecurrentToOutputWeightsTensor); - TfLiteTensor* bw_cell_to_input_weights = + const TfLiteTensor* bw_cell_to_input_weights = GetOptionalInputTensor(context, node, kBwCellToInputWeightsTensor); - TfLiteTensor* bw_cell_to_forget_weights = + const TfLiteTensor* bw_cell_to_forget_weights = GetOptionalInputTensor(context, node, kBwCellToForgetWeightsTensor); - TfLiteTensor* bw_cell_to_output_weights = + const TfLiteTensor* bw_cell_to_output_weights = GetOptionalInputTensor(context, node, kBwCellToOutputWeightsTensor); - TfLiteTensor* bw_input_gate_bias = + const TfLiteTensor* bw_input_gate_bias = GetOptionalInputTensor(context, node, kBwInputGateBiasTensor); const TfLiteTensor* bw_forget_gate_bias = GetInput(context, node, kBwForgetGateBiasTensor); @@ -549,9 +549,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* bw_output_gate_bias = GetInput(context, node, kBwOutputGateBiasTensor); - TfLiteTensor* bw_projection_weights = + const TfLiteTensor* bw_projection_weights = GetOptionalInputTensor(context, node, kBwProjectionWeightsTensor); - TfLiteTensor* bw_projection_bias = + const TfLiteTensor* bw_projection_bias = GetOptionalInputTensor(context, node, kBwProjectionBiasTensor); TfLiteTensor* bw_output_state = ",0,train b2e53b91019f9ab00fe133fe10b2d29bc7e5886c,tensorflow/tensorflow,"Making GetOptionalInput from kernel_util.h return a pointer to const data. PiperOrigin-RevId: 196932028",fully_connected.cc,"@@ -91,7 +91,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor); - TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); // Check all the parameters of tensor match within themselves and match the @@ -347,7 +347,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor); - TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); switch (filter->type) { // Already know in/out types are same. ",0,train b2e53b91019f9ab00fe133fe10b2d29bc7e5886c,tensorflow/tensorflow,"Making GetOptionalInput from kernel_util.h return a pointer to const data. PiperOrigin-RevId: 196932028",kernel_util.h,"@@ -47,8 +47,9 @@ inline int64_t NumElements(const TfLiteTensor* t) { return count; } -inline TfLiteTensor* GetOptionalInputTensor(TfLiteContext* context, - const TfLiteNode* node, int index) { +inline const TfLiteTensor* GetOptionalInputTensor(TfLiteContext* context, + const TfLiteNode* node, + int index) { const bool use_tensor = node->inputs->data[index] != kOptionalTensor; if (use_tensor) { return &context->tensors[node->inputs->data[index]]; ",0,train b2e53b91019f9ab00fe133fe10b2d29bc7e5886c,tensorflow/tensorflow,"Making GetOptionalInput from kernel_util.h return a pointer to const data. PiperOrigin-RevId: 196932028",lstm.cc,"@@ -92,7 +92,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, TF_LITE_ENSURE(context, params->cell_clip >= 0); TF_LITE_ENSURE(context, params->proj_clip >= 0); - TfLiteTensor* input_to_input_weights = + const TfLiteTensor* input_to_input_weights = GetOptionalInputTensor(context, node, kInputToInputWeightsTensor); if (input_to_input_weights) { TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2); @@ -112,7 +112,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[0], n_cell); TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[1], n_input); - TfLiteTensor* recurrent_to_input_weights = + const TfLiteTensor* recurrent_to_input_weights = GetOptionalInputTensor(context, node, kRecurrentToInputWeightsTensor); if (recurrent_to_input_weights) { TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->size, 2); @@ -146,21 +146,21 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, (recurrent_to_input_weights == nullptr)); TF_LITE_ENSURE(context, cifg_weights_all_or_none == true); - TfLiteTensor* cell_to_input_weights = + const TfLiteTensor* cell_to_input_weights = GetOptionalInputTensor(context, node, kCellToInputWeightsTensor); if (cell_to_input_weights) { TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->size, 1); TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell); } - TfLiteTensor* cell_to_forget_weights = + const TfLiteTensor* cell_to_forget_weights = GetOptionalInputTensor(context, node, kCellToForgetWeightsTensor); if (cell_to_forget_weights) { TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->size, 1); TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell); } - TfLiteTensor* cell_to_output_weights = + const TfLiteTensor* cell_to_output_weights = GetOptionalInputTensor(context, node, kCellToOutputWeightsTensor); if (cell_to_output_weights) { TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->size, 1); @@ -179,7 +179,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, TF_LITE_ENSURE(context, peephole_weights_all_or_none == true); // Make sure the input gate bias is present only when not a CIFG-LSTM. - TfLiteTensor* input_gate_bias = + const TfLiteTensor* input_gate_bias = GetOptionalInputTensor(context, node, kInputGateBiasTensor); if (use_cifg) { TF_LITE_ENSURE_EQ(context, input_gate_bias, nullptr); @@ -202,7 +202,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->size, 1); TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->data[0], n_cell); - TfLiteTensor* projection_weights = + const TfLiteTensor* projection_weights = GetOptionalInputTensor(context, node, kProjectionWeightsTensor); if (projection_weights) { TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2); @@ -210,7 +210,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell); } - TfLiteTensor* projection_bias = + const TfLiteTensor* projection_bias = GetOptionalInputTensor(context, node, kProjectionBiasTensor); if (projection_bias) { TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1); @@ -298,7 +298,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_state->allocation_type = kTfLiteArenaRwPersistent; cell_state->allocation_type = kTfLiteArenaRwPersistent; - TfLiteTensor* input_to_input_weights = + const TfLiteTensor* input_to_input_weights = GetOptionalInputTensor(context, node, kInputToInputWeightsTensor); const bool use_cifg = (input_to_input_weights == nullptr); if (use_cifg) { @@ -324,7 +324,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* input_to_input_weights = + const TfLiteTensor* input_to_input_weights = GetOptionalInputTensor(context, node, kInputToInputWeightsTensor); const TfLiteTensor* input_to_forget_weights = GetInput(context, node, kInputToForgetWeightsTensor); @@ -333,7 +333,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input_to_output_weights = GetInput(context, node, kInputToOutputWeightsTensor); - TfLiteTensor* recurrent_to_input_weights = + const TfLiteTensor* recurrent_to_input_weights = GetOptionalInputTensor(context, node, kRecurrentToInputWeightsTensor); const TfLiteTensor* recurrent_to_forget_weights = GetInput(context, node, kRecurrentToForgetWeightsTensor); @@ -342,14 +342,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* recurrent_to_output_weights = GetInput(context, node, kRecurrentToOutputWeightsTensor); - TfLiteTensor* cell_to_input_weights = + const TfLiteTensor* cell_to_input_weights = GetOptionalInputTensor(context, node, kCellToInputWeightsTensor); - TfLiteTensor* cell_to_forget_weights = + const TfLiteTensor* cell_to_forget_weights = GetOptionalInputTensor(context, node, kCellToForgetWeightsTensor); - TfLiteTensor* cell_to_output_weights = + const TfLiteTensor* cell_to_output_weights = GetOptionalInputTensor(context, node, kCellToOutputWeightsTensor); - TfLiteTensor* input_gate_bias = + const TfLiteTensor* input_gate_bias = GetOptionalInputTensor(context, node, kInputGateBiasTensor); const TfLiteTensor* forget_gate_bias = GetInput(context, node, kForgetGateBiasTensor); @@ -357,9 +357,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* output_gate_bias = GetInput(context, node, kOutputGateBiasTensor); - TfLiteTensor* projection_weights = + const TfLiteTensor* projection_weights = GetOptionalInputTensor(context, node, kProjectionWeightsTensor); - TfLiteTensor* projection_bias = + const TfLiteTensor* projection_bias = GetOptionalInputTensor(context, node, kProjectionBiasTensor); TfLiteTensor* output_state = GetOutput(context, node, kOutputStateTensor); ",0,train b2e53b91019f9ab00fe133fe10b2d29bc7e5886c,tensorflow/tensorflow,"Making GetOptionalInput from kernel_util.h return a pointer to const data. PiperOrigin-RevId: 196932028",pad.cc,"@@ -45,7 +45,7 @@ struct PadContext { output = GetOutput(context, node, 0); dims = NumDimensions(input); } - TfLiteTensor* constant_values; + const TfLiteTensor* constant_values; const TfLiteTensor* input; const TfLiteTensor* paddings; TfLiteTensor* output; ",0,train b2e53b91019f9ab00fe133fe10b2d29bc7e5886c,tensorflow/tensorflow,"Making GetOptionalInput from kernel_util.h return a pointer to const data. PiperOrigin-RevId: 196932028",svdf.cc,"@@ -74,7 +74,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ASSERT_EQ(input->dims->data[1], weights_feature->dims->data[1]); TF_LITE_ASSERT_EQ(weights_time->dims->data[0], num_filters); - TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); if (bias) { TF_LITE_ASSERT_EQ(bias->dims->data[0], num_units); } @@ -134,7 +134,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* output = GetOutput(context, node, kOutputTensor); TfLiteTensor* scratch = GetTemporary(context, node, /*index=*/0); - TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); const int rank = params->rank; const int batch_size = input->dims->data[0]; ",0,train b2e53b91019f9ab00fe133fe10b2d29bc7e5886c,tensorflow/tensorflow,"Making GetOptionalInput from kernel_util.h return a pointer to const data. PiperOrigin-RevId: 196932028",unidirectional_sequence_lstm.cc,"@@ -92,7 +92,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, TF_LITE_ENSURE(context, params->cell_clip >= 0); TF_LITE_ENSURE(context, params->proj_clip >= 0); - TfLiteTensor* input_to_input_weights = + const TfLiteTensor* input_to_input_weights = GetOptionalInputTensor(context, node, kInputToInputWeightsTensor); if (input_to_input_weights) { TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2); @@ -112,7 +112,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[0], n_cell); TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[1], n_input); - TfLiteTensor* recurrent_to_input_weights = + const TfLiteTensor* recurrent_to_input_weights = GetOptionalInputTensor(context, node, kRecurrentToInputWeightsTensor); if (recurrent_to_input_weights) { TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->size, 2); @@ -146,21 +146,21 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, (recurrent_to_input_weights == nullptr)); TF_LITE_ENSURE(context, cifg_weights_all_or_none == true); - TfLiteTensor* cell_to_input_weights = + const TfLiteTensor* cell_to_input_weights = GetOptionalInputTensor(context, node, kCellToInputWeightsTensor); if (cell_to_input_weights) { TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->size, 1); TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell); } - TfLiteTensor* cell_to_forget_weights = + const TfLiteTensor* cell_to_forget_weights = GetOptionalInputTensor(context, node, kCellToForgetWeightsTensor); if (cell_to_forget_weights) { TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->size, 1); TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell); } - TfLiteTensor* cell_to_output_weights = + const TfLiteTensor* cell_to_output_weights = GetOptionalInputTensor(context, node, kCellToOutputWeightsTensor); if (cell_to_output_weights) { TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->size, 1); @@ -179,7 +179,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, TF_LITE_ENSURE(context, peephole_weights_all_or_none == true); // Make sure the input gate bias is present only when not a CIFG-LSTM. - TfLiteTensor* input_gate_bias = + const TfLiteTensor* input_gate_bias = GetOptionalInputTensor(context, node, kInputGateBiasTensor); if (use_cifg) { TF_LITE_ENSURE_EQ(context, input_gate_bias, nullptr); @@ -202,7 +202,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->size, 1); TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->data[0], n_cell); - TfLiteTensor* projection_weights = + const TfLiteTensor* projection_weights = GetOptionalInputTensor(context, node, kProjectionWeightsTensor); if (projection_weights) { TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2); @@ -210,7 +210,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell); } - TfLiteTensor* projection_bias = + const TfLiteTensor* projection_bias = GetOptionalInputTensor(context, node, kProjectionBiasTensor); if (projection_bias) { TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1); @@ -300,7 +300,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_state->allocation_type = kTfLiteArenaRwPersistent; cell_state->allocation_type = kTfLiteArenaRwPersistent; - TfLiteTensor* input_to_input_weights = + const TfLiteTensor* input_to_input_weights = GetOptionalInputTensor(context, node, kInputToInputWeightsTensor); const bool use_cifg = (input_to_input_weights == nullptr); if (use_cifg) { @@ -326,7 +326,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* input_to_input_weights = + const TfLiteTensor* input_to_input_weights = GetOptionalInputTensor(context, node, kInputToInputWeightsTensor); const TfLiteTensor* input_to_forget_weights = GetInput(context, node, kInputToForgetWeightsTensor); @@ -335,7 +335,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input_to_output_weights = GetInput(context, node, kInputToOutputWeightsTensor); - TfLiteTensor* recurrent_to_input_weights = + const TfLiteTensor* recurrent_to_input_weights = GetOptionalInputTensor(context, node, kRecurrentToInputWeightsTensor); const TfLiteTensor* recurrent_to_forget_weights = GetInput(context, node, kRecurrentToForgetWeightsTensor); @@ -344,14 +344,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* recurrent_to_output_weights = GetInput(context, node, kRecurrentToOutputWeightsTensor); - TfLiteTensor* cell_to_input_weights = + const TfLiteTensor* cell_to_input_weights = GetOptionalInputTensor(context, node, kCellToInputWeightsTensor); - TfLiteTensor* cell_to_forget_weights = + const TfLiteTensor* cell_to_forget_weights = GetOptionalInputTensor(context, node, kCellToForgetWeightsTensor); - TfLiteTensor* cell_to_output_weights = + const TfLiteTensor* cell_to_output_weights = GetOptionalInputTensor(context, node, kCellToOutputWeightsTensor); - TfLiteTensor* input_gate_bias = + const TfLiteTensor* input_gate_bias = GetOptionalInputTensor(context, node, kInputGateBiasTensor); const TfLiteTensor* forget_gate_bias = GetInput(context, node, kForgetGateBiasTensor); @@ -359,9 +359,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* output_gate_bias = GetInput(context, node, kOutputGateBiasTensor); - TfLiteTensor* projection_weights = + const TfLiteTensor* projection_weights = GetOptionalInputTensor(context, node, kProjectionWeightsTensor); - TfLiteTensor* projection_bias = + const TfLiteTensor* projection_bias = GetOptionalInputTensor(context, node, kProjectionBiasTensor); TfLiteTensor* output_state = GetOutput(context, node, kOutputStateTensor); ",0,train 42b0014216ab04f704967b722f7062df8a4180e1,tensorflow/tensorflow,"Add test cases of uint16, uint32, uint64 support for tf.math.[equal|not_equal] Signed-off-by: Yong Tang ",cwise_ops_binary_test.py,"@@ -948,6 +948,31 @@ class ComparisonOpTest(test.TestCase): ""Incompatible shapes|Dimensions must be equal""): f(x.astype(t), y.astype(t)) + def testEqualDType(self): + dtypes = [np.float16, np.float32, np.float64, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64] + x = np.asarray([0, 1, 2, 3, 4]) + y = np.asarray([0, 1, 2, 3, 4]) + for dtype in dtypes: + xt = x.astype(dtype) + yt = y.astype(dtype) + cmp_eq = math_ops.equal(xt, yt) + cmp_ne = math_ops.not_equal(xt, yt) + values = self.evaluate([cmp_eq, cmp_ne]) + self.assertAllEqual( + [[True, True, True, True, True], + [False, False, False, False, False]], values) + for dtype in [np.complex64, np.complex128]: + xt = x.astype(dtype) + xt -= 1j * xt + yt = y.astype(dtype) + yt -= 1j * yt + cmp_eq = math_ops.equal(xt, yt) + cmp_ne = math_ops.not_equal(xt, yt) + values = self.evaluate([cmp_eq, cmp_ne]) + self.assertAllEqual( + [[True, True, True, True, True], + [False, False, False, False, False]], values) + if __name__ == ""__main__"": test.main() ",0,train d08667249fd064ddf41777eea6debf4474e6622a,tensorflow/tensorflow,"Internal change PiperOrigin-RevId: 251371868",strip_unused_lib.py,"@@ -75,6 +75,8 @@ def strip_unused(input_graph_def, input_node_names, output_node_names, if ""_output_shapes"" in node.attr: placeholder_node.attr[""_output_shapes""].CopyFrom(node.attr[ ""_output_shapes""]) + if ""shape"" in node.attr: + placeholder_node.attr[""shape""].CopyFrom(node.attr[""shape""]) inputs_replaced_graph_def.node.extend([placeholder_node]) else: inputs_replaced_graph_def.node.extend([copy.deepcopy(node)]) ",0,train da9706d518adf45b5d2dff480d80e78be12575ca,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-08-25 PiperOrigin-RevId: 328291621 Change-Id: If04388ad4e881890383fa7e83b49c272ff216949",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 24) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 25) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,test 6850dafeeaaa48efa748134688844bd079ef3949,tensorflow/tensorflow,"collective_param_resolver_local.cc: delete DCHECK(!ir->out_mu.try_lock()); in a lambda UNLOCK_FUNCTION(ir->out_mu) annotates that the lock is held on entry. try_lock() should not be called. PiperOrigin-RevId: 215769341",collective_param_resolver_local.cc,"@@ -522,7 +522,6 @@ void CollectiveParamResolverLocal::CallInitInstanceSharedParams( InitInstanceSharedParams( gr, cp, ir, [this, ir, done](const Status& s) UNLOCK_FUNCTION(ir->out_mu) { - DCHECK(!ir->out_mu.try_lock()); DCHECK(ir->out_mu_available); ir->status.Update(s); ir->out_mu.unlock(); ",0,train 2e7cc48e5cce0ff5429b2d9d0ac313ce70035605,tensorflow/tensorflow,Change for internal compatibility.,single_machine.cc,"@@ -41,7 +41,7 @@ SingleMachine::SingleMachine(int timeout_s, int num_cpu_cores, int num_gpus) } SingleMachine::~SingleMachine() { - CloseSession(false /*use_timeout*/); + CloseSession(false /*use_timeout*/).IgnoreError(); // Prevent the destructor from deleting mu_ until CloseSession() is done. mutex_lock l(mu_); @@ -164,18 +164,18 @@ Status SingleMachine::CloseSession(bool use_timeout) { thread_pool_->Schedule([this] { if (this->coordinator_) { - this->coordinator_->RequestStop(); + this->coordinator_->RequestStop().IgnoreError(); // Wait for all the runners to have closed their queues. while (!this->coordinator_->AllRunnersStopped()) { sleep(1); } // Now we can close the session. This should cancel any pending I/O // operation. - this->session_->Close(); + this->session_->Close().IgnoreError(); // Last but not least, we can delete the coordinator. this->coordinator_.reset(); } else { - this->session_->Close(); + this->session_->Close().IgnoreError(); } // Wait for any previous run to finish. ",0,train 2e7cc48e5cce0ff5429b2d9d0ac313ce70035605,tensorflow/tensorflow,Change for internal compatibility.,fused_batch_norm_op.cc,"@@ -300,8 +300,9 @@ struct FusedBatchNorm { GPUDevice d = context->eigen_device(); using perftools::gputools::DeviceMemory; Tensor inv_var; - context->allocate_temp(DataTypeToEnum::value, estimated_variance.shape(), - &inv_var); + OP_REQUIRES_OK( + context, context->allocate_temp(DataTypeToEnum::value, + estimated_variance.shape(), &inv_var)); auto inv_var_ptr = StreamExecutorUtil::AsDeviceMemory(inv_var); std::function&()> var_to_inv_var = [d, epsilon, estimated_variance, ",0,train 2e7cc48e5cce0ff5429b2d9d0ac313ce70035605,tensorflow/tensorflow,Change for internal compatibility.,graph_transferer.cc,"@@ -96,7 +96,8 @@ Status GraphTransferer::LoadGraphFromProto( shape_inference::ShapeHandle handle; status = context->MakeShapeFromTensorShape( input_node_info.second.shape(), &handle); - shape_refiner.SetShape(node, 0, handle); + // TODO(b/32704451): Don't just ignore this status! + shape_refiner.SetShape(node, 0, handle).IgnoreError(); is_input_node = true; } if (!status.ok()) { @@ -395,9 +396,11 @@ void GraphTransferer::RegisterConstantNode( const_node_info.add_shape(shape[2]); const_node_info.add_shape(shape[3]); const TensorProto* proto = nullptr; - GetNodeAttr(node.def(), ""value"", &proto); + // TODO(b/32704451): Don't just ignore this status! + GetNodeAttr(node.def(), ""value"", &proto).IgnoreError(); Tensor const_tensor; - MakeTensorFromProto(*proto, &const_tensor); + // TODO(b/32704451): Don't just ignore this status! + MakeTensorFromProto(*proto, &const_tensor).IgnoreError(); const_node_info.set_dtype(const_tensor.dtype()); // TODO(satok): Remove. Determine constant value without dryrun ",0,train 2e7cc48e5cce0ff5429b2d9d0ac313ce70035605,tensorflow/tensorflow,Change for internal compatibility.,sparsify_gather.cc,"@@ -146,7 +146,7 @@ Status SparsifyGather(const GraphDef& input_graph_def, const NodeDef& const_node = match.inputs[0].inputs[0].node; DataType data_type; - GetNodeAttr(const_node, ""dtype"", &data_type); + TF_RETURN_IF_ERROR(GetNodeAttr(const_node, ""dtype"", &data_type)); if (data_type != DT_FLOAT) { return tensorflow::errors::FailedPrecondition( ""Transform only applicable to subgraph with 'Const' of dtype "" ",0,train 1cd086bc77c2f58d9ce519250cdefc355ab3aac4,tensorflow/tensorflow,[tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc] Add calls to `reserve()` before populating vectors,tuple_points_to_analysis_test.cc,"@@ -111,6 +111,7 @@ class TuplePointsToAnalysisTest : public HloTestBase { points_to_analysis_->GetBufferDefinedAt(instruction, index) .ValueOrDie(); std::vector expected_aliases; + expected_aliases.reserve(expected.size()); for (auto& pair : expected) { expected_aliases.push_back(BufferAlias(pair.first, pair.second)); } ",0,train 78688104bc118097a7968c864197a3c328f1c00b,tensorflow/tensorflow,"Fix allocator build errors in xtensa softmax, conv + depthwise conv kernels. PiperOrigin-RevId: 322830325 Change-Id: I22eb3d1259db1390e6ad2c3caa588279b50fd674",conv.cc,"@@ -329,10 +329,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const int num_channels = filter->dims->data[kConvQuantizedDimension]; // Dynimically allocate per-channel quantization parameters. op_data->per_channel_output_multiplier = - reinterpret_cast(context->AllocatePersistentBuffer( + reinterpret_cast(context->AllocatePersistentBuffer( context, num_channels * sizeof(int32_t))); op_data->per_channel_output_shift = - reinterpret_cast(context->AllocatePersistentBuffer( + reinterpret_cast(context->AllocatePersistentBuffer( context, num_channels * sizeof(int32_t))); // All per-channel quantized tensors need valid zero point and scale arrays. ",0,train 78688104bc118097a7968c864197a3c328f1c00b,tensorflow/tensorflow,"Fix allocator build errors in xtensa softmax, conv + depthwise conv kernels. PiperOrigin-RevId: 322830325 Change-Id: I22eb3d1259db1390e6ad2c3caa588279b50fd674",depthwise_conv.cc,"@@ -377,10 +377,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; // Dynimically allocate per-channel quantization parameters. op_data->per_channel_output_multiplier = - reinterpret_cast(context->AllocatePersistentBuffer( + reinterpret_cast(context->AllocatePersistentBuffer( context, num_channels * sizeof(int32_t))); op_data->per_channel_output_shift = - reinterpret_cast(context->AllocatePersistentBuffer( + reinterpret_cast(context->AllocatePersistentBuffer( context, num_channels * sizeof(int32_t))); // All per-channel quantized tensors need valid zero point and scale arrays. ",0,train 78688104bc118097a7968c864197a3c328f1c00b,tensorflow/tensorflow,"Fix allocator build errors in xtensa softmax, conv + depthwise conv kernels. PiperOrigin-RevId: 322830325 Change-Id: I22eb3d1259db1390e6ad2c3caa588279b50fd674",softmax.cc,"@@ -167,10 +167,9 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { // the scale and beta before calculating exp. It is mandatory to apply beta // and scale here, since each softmax op may have different beta and scale // values. Beta and scale will remain constant for a given softmax op. - void* allocated_ptr; - TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( - context, kInt8Range * sizeof(int16_t), &allocated_ptr)); - op_data->exp_lut = static_cast(allocated_ptr); + op_data->exp_lut = static_cast(context->AllocatePersistentBuffer( + context, kInt8Range * sizeof(uint16_t))); + TF_LITE_ENSURE(context, op_data->exp_lut != nullptr); TF_LITE_ENSURE_STATUS( CalculateSoftmaxOpData(context, input, output, params, op_data)); ",0,train e5132a1a1c2a47b2496189dd0e0880d53816dea3,tensorflow/tensorflow,"[XLA:GPU] Fix a memory corruption in HLO snapshotting. PiperOrigin-RevId: 396849022 Change-Id: Id451a827a81007692fe91c153ba115f083e9abc0",gpu_compiler.cc,"@@ -1162,13 +1162,10 @@ StatusOr> GpuCompiler::RunBackend( // Dump computation proto state and buffer assignment for debug and test, if // dump is enabled. if (DumpingEnabledForHloModule(gpu_executable->module())) { - if (!hlo_proto_) { - hlo_proto_ = absl::make_unique(); - *hlo_proto_->mutable_hlo_module() = gpu_executable->module().ToProto(); - } - *hlo_proto_->mutable_buffer_assignment() = + auto hlo_proto = absl::make_unique(*hlo_proto_); + *hlo_proto->mutable_buffer_assignment() = compile_module_results.buffer_assignment->ToProto(); - gpu_executable->set_hlo_proto(std::move(hlo_proto_)); + gpu_executable->set_hlo_proto(std::move(hlo_proto)); } gpu_executable->set_debug_info( compile_module_results.buffer_assignment->GetStats().ToString()); ",0,train e5132a1a1c2a47b2496189dd0e0880d53816dea3,tensorflow/tensorflow,"[XLA:GPU] Fix a memory corruption in HLO snapshotting. PiperOrigin-RevId: 396849022 Change-Id: Id451a827a81007692fe91c153ba115f083e9abc0",gpu_compiler.h,"@@ -112,7 +112,7 @@ class GpuCompiler : public LLVMCompiler { } // Optional HloProto, stashed for dumping snapshots. - mutable std::unique_ptr hlo_proto_; + std::unique_ptr hlo_proto_; se::Platform::Id platform_id_; ",0,train 44fb8a750e563392e4aa4b7c6de5d7f56d1c65a8,tensorflow/tensorflow,"Tweak comment on XlaClusterInfo's default constructor. PiperOrigin-RevId: 220499695",encapsulate_util.h,"@@ -117,11 +117,14 @@ Status PreprocessForEncapsulation(Graph* g, // Information for XLA computation. struct XlaClusterInfo { - // The implicit default constructor is deleted because host_compute_core is a - // const member whose type (std::map) doesn't necessarily have a user provided - // constructor - while libc++ and libstdc++ 4.8 provide a user defined - // default constructor, libstdc++ at least >= 7.3 does not. - // See also c++11 [class.ctor] p5. + // Add an explicitly-defined default constructor for this class. + // + // The compiler may delete the default constructor here because + // host_compute_core is a const member whose type (std::map) doesn't + // necessarily have a user provided constructor -- while libc++ and + // libstdc++ 4.8 provide a user defined default constructor, libstdc++ at + // least >= 7.3 does not. See also c++11 [class.ctor] p5. + // // TODO(klimek): In c++17 we'll be able to initialize host_compute_core // without losing aggregate initialization, which allows us to get rid of // the constructor definitions again. ",0,train 1188b9e764fc76f0dfa9c87a4575e8fac706a3ec,tensorflow/tensorflow,"Shortcut cross_device_ops reduce and batch_reduce method if there is only one input in PerReplica object. PiperOrigin-RevId: 249860947",cross_device_ops.py,"@@ -232,6 +232,11 @@ class CrossDeviceOps(object): def __init__(self): pass + @property + def _num_between_graph_workers(self): + # Returns 1 by default, the value may be overridden by sub classes. + return 1 + def reduce(self, reduce_op, per_replica_value, destinations): """"""Reduce `per_replica_value` to `destinations`. @@ -255,6 +260,14 @@ class CrossDeviceOps(object): per_replica_value = _make_tensor_into_per_replica(per_replica_value) validate_destinations(destinations) + + # Shortcut if `per_replica_value` only contains one value. + if self._num_between_graph_workers == 1 and len( + per_replica_value.values) == 1 and _devices_match( + per_replica_value, destinations): + return value_lib.Mirrored(per_replica_value.device_map, + per_replica_value.values) + return self.reduce_implementation(reduce_op, per_replica_value, destinations) @@ -288,6 +301,15 @@ class CrossDeviceOps(object): for _, d in value_destination_pairs: validate_destinations(d) + # Shortcut all PerReplica objects only contain one value. + if self._num_between_graph_workers == 1 and _all_devices_match( + value_destination_pairs) and len( + value_destination_pairs[0][0].values) == 1: + return [ + value_lib.Mirrored(v.device_map, v.values) + for v, _ in value_destination_pairs + ] + return self.batch_reduce_implementation(reduce_op, value_destination_pairs) def broadcast(self, tensor, destinations): @@ -974,6 +996,10 @@ class CollectiveAllReduce(CrossDeviceOps): cross_device_utils.CollectiveKeys()) super(CollectiveAllReduce, self).__init__() + @property + def _num_between_graph_workers(self): + return self._num_workers + def reduce_implementation(self, reduce_op, per_replica_value, destinations): all_reduced = self._batch_all_reduce(reduce_op, [per_replica_value])[0] device_map, logical_device = get_device_map_from(destinations) ",0,train 8c31d812ad5904dcb2d5e8d221837be5bbb6e725,tensorflow/tensorflow,"Turning off the parallelized IsDirectory() call on iOS platform, due to problems with more than a few threads. Change: 137727687",file_system.cc,"@@ -29,6 +29,10 @@ limitations under the License. #include ""tensorflow/core/platform/platform.h"" #include ""tensorflow/core/platform/protobuf.h"" +#if defined(__APPLE__) +#include +#endif + namespace tensorflow { namespace { ",0,train 2fb51e98b98f0cba4af1131203a588a43cadbf8a,tensorflow/tensorflow,"Have ValueUseIterator template use OperandType instead of IROperand. This was causing some issues using helper methods like llvm::make_early_inc_range on Value::getUses(), resulting in IROperand instead of OpOperand. PiperOrigin-RevId: 262056425",UseDefLists.h,"@@ -197,7 +197,7 @@ public: /// An iterator over all uses of a ValueBase. template class ValueUseIterator - : public std::iterator { + : public std::iterator { public: ValueUseIterator() = default; explicit ValueUseIterator(OperandType *current) : current(current) {} ",0,train 1abfa5aa09174be04fd18f946eefd6368ae3cead,tensorflow/tensorflow,Markdown links using `[This link](http://example.net/)` syntax (#2273),nn_ops.py,"@@ -68,15 +68,15 @@ def atrous_conv2d(value, filters, rate, padding, name=None): the amount of computation. For a description of atrous convolution and how it can be used for dense - feature extraction, please see: (Semantic Image Segmentation with Deep - Convolutional Nets and Fully Connected CRFs)[http://arxiv.org/abs/1412.7062]. - The same operation is investigated further in (Multi-Scale Context Aggregation - by Dilated Convolutions)[http://arxiv.org/abs/1511.07122]. Previous works + feature extraction, please see: [Semantic Image Segmentation with Deep + Convolutional Nets and Fully Connected CRFs](http://arxiv.org/abs/1412.7062). + The same operation is investigated further in [Multi-Scale Context Aggregation + by Dilated Convolutions](http://arxiv.org/abs/1511.07122). Previous works that effectively use atrous convolution in different ways are, among others, - (OverFeat: Integrated Recognition, Localization and Detection using - Convolutional Networks) [http://arxiv.org/abs/1312.6229] and (Fast Image - Scanning with Deep Max-Pooling Convolutional Neural Networks) - [http://arxiv.org/abs/1302.1700]. Atrous convolution is also closely related + [OverFeat: Integrated Recognition, Localization and Detection using + Convolutional Networks](http://arxiv.org/abs/1312.6229) and [Fast Image + Scanning with Deep Max-Pooling Convolutional Neural Networks] + (http://arxiv.org/abs/1302.1700). Atrous convolution is also closely related to the so-called noble identities in multi-rate signal processing. There are many different ways to implement atrous convolution (see the refs @@ -227,8 +227,8 @@ def conv2d_transpose(value, name=None): """"""The transpose of `conv2d`. - This operation is sometimes called ""deconvolution"" after (Deconvolutional - Networks)[http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf], but is + This operation is sometimes called ""deconvolution"" after [Deconvolutional + Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is actually the transpose (gradient) of `conv2d` rather than an actual deconvolution. ",0,train 583da17bd6e0972b2c01305547ca04008b2c22a8,tensorflow/tensorflow,"Update GraphDef version to 1015. PiperOrigin-RevId: 422490804 Change-Id: If7881dcb54d717061bc3c62b5a5efb0cb9641f8f",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 1014 // Updated: 2022/1/17 +#define TF_GRAPH_DEF_VERSION 1015 // Updated: 2022/1/18 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,train f3117e8ec1952311d869c7524fba8cdfc2975979,tensorflow/tensorflow,"[RunHandler] Fix wait-for-handler code when timeout is not set. Previously we were setting a (very short) deadline when `call_timeout == 0`, whereas this should be treated as an indefinite deadline. PiperOrigin-RevId: 292241523 Change-Id: I659886f0f1642b6683c4c2ff44d74ae7bec29620",run_handler.cc,"@@ -36,7 +36,9 @@ limitations under the License. namespace tensorflow { namespace { +// LINT.IfChange static constexpr int32 kMaxConcurrentHandlers = 128; +// LINT.ThenChange(//tensorflow/core/framework/run_handler_test.cc) // TODO(azaks): Refactor with thread:ThreadPool class RunHandlerEnvironment { @@ -948,16 +950,18 @@ class RunHandlerPool::Impl { RunHandler::Impl* handler_impl; { mutex_lock l(mu_); - if (free_handlers_.empty()) { + if (!has_free_handler()) { profiler::TraceMe activity( [&] { return strings::StrCat(""WaitingForHandler#step_id="", step_id, ""#""); }, profiler::TraceMeLevel::kInfo); - if (!mu_.AwaitWithDeadline( - Condition(this, &Impl::has_free_handler), - EnvTime::NowNanos() + timeout_in_ms * 1000 * 1000)) { + if (timeout_in_ms == 0) { + mu_.Await(Condition(this, &Impl::has_free_handler)); + } else if (!mu_.AwaitWithDeadline( + Condition(this, &Impl::has_free_handler), + EnvTime::NowNanos() + timeout_in_ms * 1000 * 1000)) { return nullptr; } } ",0,train f3117e8ec1952311d869c7524fba8cdfc2975979,tensorflow/tensorflow,"[RunHandler] Fix wait-for-handler code when timeout is not set. Previously we were setting a (very short) deadline when `call_timeout == 0`, whereas this should be treated as an indefinite deadline. PiperOrigin-RevId: 292241523 Change-Id: I659886f0f1642b6683c4c2ff44d74ae7bec29620",run_handler_test.cc,"@@ -205,5 +205,37 @@ TEST_F(RunHandlerTest, TestConcurrencyUseRunHandlerPool) { delete tp; } +TEST_F(RunHandlerTest, TestWaitTimeout) { + std::unique_ptr pool(new RunHandlerPool(1, 1)); + + // Get the single handler in the pool. + std::vector> blocking_handles; + const int32 kMaxConcurrentHandlers = 128; // Copied from run_handler.cc. + blocking_handles.reserve(kMaxConcurrentHandlers); + for (int i = 0; i < kMaxConcurrentHandlers; ++i) { + blocking_handles.push_back(pool->Get(i)); + } + + // A subsequent request with a non-zero timeout will fail by returning + // nullptr. + auto null_handle = pool->Get(128, 1); + EXPECT_EQ(null_handle.get(), nullptr); + + // A subsequent request with no timeout will succeed once the blocking handle + // is returned. + auto tp = std::make_unique(Env::Default(), ""test"", 4); + std::atomic release_time; + + tp->Schedule([&blocking_handles, &release_time]() { + Env::Default()->SleepForMicroseconds(5000); + release_time = EnvTime::NowNanos(); + blocking_handles[0].reset(); + }); + + auto next_handle = pool->Get(129, 0); + EXPECT_GT(EnvTime::NowNanos(), release_time); + EXPECT_NE(next_handle.get(), nullptr); +} + } // namespace } // namespace tensorflow ",0,train 1e5a49750b70bda55a5d6cd3618ae2bdc0cf4f80,tensorflow/tensorflow,"[buildcop] Disable testAllToAllV3 that fails various memory tests, see the bug for the detail. PiperOrigin-RevId: 398518426 Change-Id: Ic51f33eaf6ef0285c8d14a0bc945aa0b81563120",collective_ops_test.py,"@@ -1245,6 +1245,7 @@ class CollectiveOpsV3Test(test.TestCase, parameterized.TestCase): @combinations.generate(device_combination) def testAllToAllV3(self, device, communication): + self.skipTest('TODO(b/200953796)') group_size = 2 group_key = 104 ",0,train 8cb8c460a3a1998182f8d338f9d82de89b076d19,tensorflow/tensorflow,"Treat SparseApply* on empty sparse gradients as no-op PiperOrigin-RevId: 353040731 Change-Id: Ibe213b002efc2622c86fb936c477c1e13820f3e4",training_ops_gpu.cu.cc,"@@ -512,6 +512,9 @@ struct SparseApplyAdagrad { const Tindex first_dim_size = var.dimension(0); const Tindex grad_size = grad.size(); const Tindex indices_size = indices.size(); + if (grad_size == 0) { + return Status::OK(); + } GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d); return GpuLaunchKernel( SparseApplyAdagradKernel, config.block_count, @@ -570,6 +573,9 @@ struct SparseApplyProximalAdagrad { const Tindex first_dim_size = var.dimension(0); const Tindex grad_size = grad.size(); const Tindex indices_size = indices.size(); + if (grad_size == 0) { + return Status::OK(); + } GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d); return GpuLaunchKernel(SparseApplyProximalAdagradKernel, config.block_count, config.thread_per_block, 0, @@ -777,6 +783,9 @@ struct SparseApplyFtrl { const Tindex first_dim_size = var.dimension(0); const Tindex grad_size = grad.size(); const Tindex indices_size = indices.size(); + if (grad_size == 0) { + return Status::OK(); + } GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d); return GpuLaunchKernel( SparseApplyFtrlKernel, config.block_count, @@ -846,12 +855,14 @@ struct SparseApplyKerasMomentum { const Tindex first_dim_size = var.dimension(0); const Tindex grad_size = grad.size(); const Tindex indices_size = indices.size(); - GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d); - TF_CHECK_OK(GpuLaunchKernel( - SparseApplyKerasMomentumKernel, config.block_count, - config.thread_per_block, 0, d.stream(), var.data(), accum.data(), - lr.data(), grad.data(), indices.data(), momentum.data(), use_nesterov, - first_dim_size, grad_size, indices_size)); + if (grad_size != 0) { + GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d); + TF_CHECK_OK(GpuLaunchKernel( + SparseApplyKerasMomentumKernel, config.block_count, + config.thread_per_block, 0, d.stream(), var.data(), accum.data(), + lr.data(), grad.data(), indices.data(), momentum.data(), use_nesterov, + first_dim_size, grad_size, indices_size)); + } return static_cast(-1); } }; ",0,train 8cb8c460a3a1998182f8d338f9d82de89b076d19,tensorflow/tensorflow,"Treat SparseApply* on empty sparse gradients as no-op PiperOrigin-RevId: 353040731 Change-Id: Ibe213b002efc2622c86fb936c477c1e13820f3e4",training_ops_test.py,"@@ -223,9 +223,9 @@ class TrainingOpsTest(TensorFlowTestCase): self._testTypesForFtrlMultiplyLinearByLr( x, y, z, lr, grad, use_gpu=False, l1=l1, l2=l2) - def _testTypesForSparseAdagrad(self, x, y, lr, grad, indices): + def _testTypesForSparseAdagrad(self, x, y, lr, grad, indices, use_gpu): self.setUp() - with self.session(use_gpu=True): + with self.session(use_gpu=use_gpu): var = variables.VariableV1(x) accum = variables.VariableV1(y) self.evaluate(variables.global_variables_initializer()) @@ -251,11 +251,12 @@ class TrainingOpsTest(TensorFlowTestCase): lr, grad, indices, + use_gpu, l1=0.0, l2=0.0, lr_power=-0.5): self.setUp() - with self.session(use_gpu=False): + with self.session(use_gpu=use_gpu): var = variables.VariableV1(x) accum = variables.VariableV1(y) linear = variables.VariableV1(z) @@ -327,8 +328,9 @@ class TrainingOpsTest(TensorFlowTestCase): @test_util.run_v1_only(""SparseApplyAdagrad op returns a ref, so it is not "" ""supported in eager mode."") def testSparseApplyAdagrad(self): - for (dtype, index_type) in itertools.product( - [np.float16, np.float32, np.float64], [np.int32, np.int64]): + for (dtype, index_type, + use_gpu) in itertools.product([np.float16, np.float32, np.float64], + [np.int32, np.int64], [False, True]): x_val = [np.arange(10), np.arange(10, 20), np.arange(20, 30)] y_val = [np.arange(1, 11), np.arange(11, 21), np.arange(21, 31)] x = np.array(x_val).astype(dtype) @@ -337,13 +339,19 @@ class TrainingOpsTest(TensorFlowTestCase): grad_val = [np.arange(10), np.arange(10)] grad = np.array(grad_val).astype(dtype) indices = np.array([0, 2]).astype(index_type) - self._testTypesForSparseAdagrad(x, y, lr, grad, indices) + self._testTypesForSparseAdagrad(x, y, lr, grad, indices, use_gpu) + # Empty sparse gradients. + empty_grad = np.zeros([0, 10], dtype=dtype) + empty_indices = np.zeros([0], dtype=index_type) + self._testTypesForSparseAdagrad(x, y, lr, empty_grad, empty_indices, + use_gpu) @test_util.run_v1_only(""SparseApplyAdagrad op returns a ref, so it is not "" ""supported in eager mode."") def testSparseApplyAdagradDim1(self): - for (dtype, index_type) in itertools.product( - [np.float16, np.float32, np.float64], [np.int32, np.int64]): + for (dtype, index_type, + use_gpu) in itertools.product([np.float16, np.float32, np.float64], + [np.int32, np.int64], [False, True]): x_val = [[1.0], [2.0], [3.0]] y_val = [[4.0], [5.0], [6.0]] x = np.array(x_val).astype(dtype) @@ -352,13 +360,18 @@ class TrainingOpsTest(TensorFlowTestCase): grad_val = [[1.5], [2.5]] grad = np.array(grad_val).astype(dtype) indices = np.array([0, 2]).astype(index_type) - self._testTypesForSparseAdagrad(x, y, lr, grad, indices) + self._testTypesForSparseAdagrad(x, y, lr, grad, indices, use_gpu) @test_util.run_v1_only(""SparseApplyFtrl op returns a ref, so it is not "" ""supported in eager mode."") def testSparseApplyFtrlDim1(self): - for (dtype, index_type) in itertools.product( - [np.float16, np.float32, np.float64], [np.int32, np.int64]): + for (dtype, index_type, + use_gpu) in itertools.product([np.float16, np.float32, np.float64], + [np.int32, np.int64], [False, True]): + # TODO(b/178042695): This configuration leads to a ""too many resources + # requested for launch"" error. + if (dtype, index_type, use_gpu) == (np.float64, np.int64, True): + continue x_val = [[0.0], [0.0], [0.0]] y_val = [[4.0], [5.0], [6.0]] z_val = [[0.0], [0.0], [0.0]] @@ -369,7 +382,12 @@ class TrainingOpsTest(TensorFlowTestCase): grad_val = [[1.5], [2.5]] grad = np.array(grad_val).astype(dtype) indices = np.array([0, 2]).astype(index_type) - self._testTypesForSparseFtrl(x, y, z, lr, grad, indices) + self._testTypesForSparseFtrl(x, y, z, lr, grad, indices, use_gpu) + # Empty sparse gradients. + empty_grad = np.zeros([0, 1], dtype=dtype) + empty_indices = np.zeros([0], dtype=index_type) + self._testTypesForSparseFtrl(x, y, z, lr, empty_grad, empty_indices, + use_gpu) @test_util.run_v1_only(""SparseApplyFtrlMultiplyLinearByLr op returns a ref, "" ""so it is not supported in eager mode."") ",0,train c3d50afa464d8130b5d258b5c0b69b3f4ee40501,tensorflow/tensorflow,"Pass profile_handle instead of session_id for XLA TPU compilation To make XLA profile based compilation more deterministic, this change passes an immutable profile_handle rather than an id of session, which can update the containing session. PiperOrigin-RevId: 438127623",compiler.h,"@@ -134,10 +134,12 @@ class AotCompilationOptions { se::StreamExecutor* executor() const { return executor_; } void set_executor(se::StreamExecutor* executor) { executor_ = executor; } - // Optional session_id and cache key may be used to trigger recompilation + // Optional profile_handle and cache key may be used to trigger recompilation // when a compilation cache is used. - uint64_t session_id() const { return session_id_; } - void set_session_id(uint64_t session_id) { session_id_ = session_id; } + uint64_t profile_handle() const { return profile_handle_; } + void set_profile_handle(uint64_t profile_handle) { + profile_handle_ = profile_handle; + } absl::string_view cache_key() const { return cache_key_; } void set_cache_key(absl::string_view cache_key) { @@ -161,7 +163,7 @@ class AotCompilationOptions { FusionConfigCollection fusion_config_collection_ = FusionConfigCollection::kOff; se::StreamExecutor* executor_ = nullptr; - uint64_t session_id_ = 0; + uint64_t profile_handle_ = 0; std::string cache_key_; bool run_backend_only_ = false; }; ",0,train c3d50afa464d8130b5d258b5c0b69b3f4ee40501,tensorflow/tensorflow,"Pass profile_handle instead of session_id for XLA TPU compilation To make XLA profile based compilation more deterministic, this change passes an immutable profile_handle rather than an id of session, which can update the containing session. PiperOrigin-RevId: 438127623",hlo_module.h,"@@ -408,9 +408,11 @@ class HloModule { module->metadata_ = std::move(metadata_); } - uint64_t session_id() const { return session_id_; } + uint64_t profile_handle() const { return profile_handle_; } - void set_session_id(uint64_t session_id) { session_id_ = session_id; } + void set_profile_handle(uint64_t profile_handle) { + profile_handle_ = profile_handle; + } void add_profile_info(const HloModuleProto::ProfileInfo& profile_info) { profile_info_list_.push_back(profile_info); @@ -494,8 +496,8 @@ class HloModule { // True if the module contains dynamic computation. bool is_dynamic_ = false; - // A compilation session id. - uint64_t session_id_ = 0; + // Optional compilation profile handle. + uint64_t profile_handle_ = 0; // An array of ProfileInfo specifying what optimization profiles this module // contains, along with the relative speedups. ",0,train 0b881b877b147e31ab86f4e5a4a215fb4764a782,tensorflow/tensorflow,"Fix util_tensor_slice_set_test to not be sensitive to the order of the results returned by TensorSlice.QueryMeta(). Since TensorSlice uses unordered_map<>, the order is not guaranteed. PiperOrigin-RevId: 248363564",tensor_slice_set_test.cc,"@@ -218,10 +218,18 @@ TEST(TensorSliceSetTest, QueryMetaTwoD) { std::vector> results; EXPECT_TRUE(tss.QueryMeta(s, &results)); EXPECT_EQ(2, results.size()); - EXPECT_EQ(""2,2:0,3"", results[0].first.DebugString()); - EXPECT_EQ(""slice_2"", results[0].second); - EXPECT_EQ(""0,2:-"", results[1].first.DebugString()); - EXPECT_EQ(""slice_1"", results[1].second); + // Allow results to be returned in either order + if (results[0].second == ""slice_2"") { + EXPECT_EQ(""2,2:0,3"", results[0].first.DebugString()); + EXPECT_EQ(""slice_2"", results[0].second); + EXPECT_EQ(""0,2:-"", results[1].first.DebugString()); + EXPECT_EQ(""slice_1"", results[1].second); + } else { + EXPECT_EQ(""0,2:-"", results[0].first.DebugString()); + EXPECT_EQ(""slice_1"", results[0].second); + EXPECT_EQ(""2,2:0,3"", results[1].first.DebugString()); + EXPECT_EQ(""slice_2"", results[1].second); + } } // Slice #4 includes the hole and so there is no match ",0,train 8d00ced88d6dfa369c2387d3745c1451f3f1ae64,tensorflow/tensorflow,Fix clang format issue,mkl_fused_batch_norm_op.cc,"@@ -14,13 +14,13 @@ limitations under the License. ==============================================================================*/ #ifdef INTEL_MKL #include ""mkldnn.hpp"" -#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" #include ""tensorflow/core/framework/op_kernel.h"" #include ""tensorflow/core/framework/register_types.h"" #include ""tensorflow/core/framework/tensor.h"" #include ""tensorflow/core/framework/tensor_types.h"" #include ""tensorflow/core/util/mkl_util.h"" #include ""tensorflow/core/util/tensor_format.h"" +#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor"" using mkldnn::batch_normalization_backward; using mkldnn::batch_normalization_forward; @@ -711,9 +711,9 @@ class MklFusedBatchNormOp : public OpKernel { std::memcpy(batch_variance_data, variance_data, depth_ * sizeof(U)); } } catch (mkldnn::error& e) { - string error_msg = ""Status: "" + std::to_string(e.status) + - "", message: "" + string(e.message) + "", in file "" + - string(__FILE__) + "":"" + std::to_string(__LINE__); + string error_msg = ""Status: "" + std::to_string(e.status) + "", message: "" + + string(e.message) + "", in file "" + string(__FILE__) + + "":"" + std::to_string(__LINE__); OP_REQUIRES_OK( context, errors::Aborted(""Operation received an exception:"", error_msg)); @@ -1036,9 +1036,9 @@ class MklFusedBatchNormGradOp : public OpKernel { reinterpret_cast(diff_weights_data + depth_), depth_ * sizeof(U)); } catch (mkldnn::error& e) { - string error_msg = ""Status: "" + std::to_string(e.status) + - "", message: "" + string(e.message) + "", in file "" + - string(__FILE__) + "":"" + std::to_string(__LINE__); + string error_msg = ""Status: "" + std::to_string(e.status) + "", message: "" + + string(e.message) + "", in file "" + string(__FILE__) + + "":"" + std::to_string(__LINE__); OP_REQUIRES_OK( context, errors::Aborted(""Operation received an exception:"", error_msg)); ",0,test 0393436023d8fe7a2f98284420c58de6e461212a,tensorflow/tensorflow,"First step of migrating layers to new API. Change: 121435753",layers_test.py,"@@ -22,6 +22,7 @@ import numpy as np import tensorflow as tf +# TODO(b/28426988): Add separate tests for non-legacy versions. class FullyConnectedTest(tf.test.TestCase): def setUp(self): @@ -41,8 +42,9 @@ class FullyConnectedTest(tf.test.TestCase): assert not tf.get_collection(tf.GraphKeys.SUMMARIES) def _fully_connected_basic_use(self, x, num_output_units, expected_shape): - output = tf.contrib.layers.fully_connected(x, num_output_units, - activation_fn=tf.nn.relu) + output = tf.contrib.layers.legacy_fully_connected(x, + num_output_units, + activation_fn=tf.nn.relu) with tf.Session() as sess: with self.assertRaises(tf.errors.FailedPreconditionError): @@ -71,7 +73,7 @@ class FullyConnectedTest(tf.test.TestCase): self.input_3_dim, last_dim, [2, 4, last_dim]) def test_relu_layer_basic_use(self): - output = tf.contrib.layers.relu(self.input, 8) + output = tf.contrib.layers.legacy_relu(self.input, 8) with tf.Session() as sess: with self.assertRaises(tf.errors.FailedPreconditionError): @@ -90,7 +92,7 @@ class FullyConnectedTest(tf.test.TestCase): len(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))) def test_relu6_layer_basic_use(self): - output = tf.contrib.layers.relu6(self.input, 8) + output = tf.contrib.layers.legacy_relu6(self.input, 8) with tf.Session() as sess: with self.assertRaises(tf.errors.FailedPreconditionError): @@ -112,11 +114,11 @@ class FullyConnectedTest(tf.test.TestCase): def test_variable_reuse_with_scope(self): with tf.variable_scope('test') as vs: - output1 = tf.contrib.layers.relu(self.input, 8) - output2 = tf.contrib.layers.relu(self.input, 8) + output1 = tf.contrib.layers.legacy_relu(self.input, 8) + output2 = tf.contrib.layers.legacy_relu(self.input, 8) with tf.variable_scope(vs, reuse=True): - output3 = tf.contrib.layers.relu(self.input, 8) + output3 = tf.contrib.layers.legacy_relu(self.input, 8) with tf.Session() as sess: tf.initialize_all_variables().run() @@ -127,7 +129,7 @@ class FullyConnectedTest(tf.test.TestCase): def test_variable_reuse_with_template(self): tmpl1 = tf.make_template('test', - tf.contrib.layers.fully_connected, + tf.contrib.layers.legacy_fully_connected, num_output_units=8) output1 = tmpl1(self.input) output2 = tmpl1(self.input) @@ -138,9 +140,11 @@ class FullyConnectedTest(tf.test.TestCase): self.assertAllClose(out_value1, out_value2) def _custom_initializers(self, x, num_output_units, expected_outputs): - output = tf.contrib.layers.relu(x, num_output_units, - weight_init=tf.constant_initializer(2.0), - bias_init=tf.constant_initializer(1.0)) + output = tf.contrib.layers.legacy_relu( + x, + num_output_units, + weight_init=tf.constant_initializer(2.0), + bias_init=tf.constant_initializer(1.0)) with tf.Session() as sess: tf.initialize_all_variables().run() @@ -165,10 +169,11 @@ class FullyConnectedTest(tf.test.TestCase): [49.6, 49.6]]]) def test_custom_collections(self): - tf.contrib.layers.relu(self.input, 2, - weight_collections=['unbiased'], - bias_collections=['biased'], - output_collections=['output']) + tf.contrib.layers.legacy_relu(self.input, + 2, + weight_collections=['unbiased'], + bias_collections=['biased'], + output_collections=['output']) self.assertEquals(1, len(tf.get_collection('unbiased'))) self.assertEquals(1, len(tf.get_collection('biased'))) @@ -176,9 +181,10 @@ class FullyConnectedTest(tf.test.TestCase): self.assertEquals(2, len(tf.get_collection(tf.GraphKeys.VARIABLES))) def test_all_custom_collections(self): - tf.contrib.layers.relu(self.input, 2, - weight_collections=['unbiased', 'all'], - bias_collections=['biased', 'all']) + tf.contrib.layers.legacy_relu(self.input, + 2, + weight_collections=['unbiased', 'all'], + bias_collections=['biased', 'all']) self.assertEquals(1, len(tf.get_collection('unbiased'))) self.assertEquals(1, len(tf.get_collection('biased'))) @@ -186,16 +192,16 @@ class FullyConnectedTest(tf.test.TestCase): tf.get_collection('all')) def test_no_bias(self): - tf.contrib.layers.relu(self.input, 2, bias_init=None) + tf.contrib.layers.legacy_relu(self.input, 2, bias_init=None) self.assertEqual(1, len(tf.get_collection(tf.GraphKeys.VARIABLES))) def test_no_activation(self): - y = tf.contrib.layers.fully_connected(self.input, 2) + y = tf.contrib.layers.legacy_fully_connected(self.input, 2) self.assertEquals(2, len(tf.get_collection(tf.GraphKeys.VARIABLES))) self.assertEquals('BiasAdd', y.op.type) def test_no_activation_no_bias(self): - y = tf.contrib.layers.fully_connected(self.input, 2, bias_init=None) + y = tf.contrib.layers.legacy_fully_connected(self.input, 2, bias_init=None) self.assertEquals(1, len(tf.get_collection(tf.GraphKeys.VARIABLES))) self.assertEquals('MatMul', y.op.type) @@ -206,7 +212,9 @@ class FullyConnectedTest(tf.test.TestCase): cnt[0] += 1 return tensor - tf.contrib.layers.fully_connected(self.input, 2, weight_regularizer=test_fn) + tf.contrib.layers.legacy_fully_connected(self.input, + 2, + weight_regularizer=test_fn) self.assertEqual([tensor], tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) @@ -219,10 +227,12 @@ class FullyConnectedTest(tf.test.TestCase): cnt[0] += 1 return tensor - tf.contrib.layers.fully_connected(self.input, 2, - weight_regularizer=test_fn) - tf.contrib.layers.fully_connected(self.input, 2, - weight_regularizer=test_fn) + tf.contrib.layers.legacy_fully_connected(self.input, + 2, + weight_regularizer=test_fn) + tf.contrib.layers.legacy_fully_connected(self.input, + 2, + weight_regularizer=test_fn) self.assertEqual([tensor, tensor], tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) @@ -236,12 +246,14 @@ class FullyConnectedTest(tf.test.TestCase): return tensor with tf.variable_scope('test') as vs: - tf.contrib.layers.fully_connected(self.input, 2, - weight_regularizer=test_fn) + tf.contrib.layers.legacy_fully_connected(self.input, + 2, + weight_regularizer=test_fn) with tf.variable_scope(vs, reuse=True): - tf.contrib.layers.fully_connected(self.input, 2, - weight_regularizer=test_fn) + tf.contrib.layers.legacy_fully_connected(self.input, + 2, + weight_regularizer=test_fn) self.assertEqual([tensor], tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) @@ -254,7 +266,9 @@ class FullyConnectedTest(tf.test.TestCase): with self.test_session(): x = tf.constant([[]], shape=[0, 3]) self.assertEqual(0, tf.size(x).eval()) - y = tf.contrib.layers.fully_connected(x, 2, activation_fn=tf.nn.softmax) + y = tf.contrib.layers.legacy_fully_connected(x, + 2, + activation_fn=tf.nn.softmax) tf.initialize_all_variables().run() expected_y = np.array([]).reshape(0, 2) np.testing.assert_array_equal(expected_y, y.eval()) @@ -262,7 +276,7 @@ class FullyConnectedTest(tf.test.TestCase): def test_shapes_variable_first_dim(self): # first dimension is not known statically. x = tf.placeholder(tf.float32, shape=[None, 4, 3]) - y = tf.contrib.layers.fully_connected(x, 1) + y = tf.contrib.layers.legacy_fully_connected(x, 1) # in the output we still only know the 2nd and 3rd dimensions statically. self.assertEquals(y.get_shape().as_list(), [None, 4, 1]) with self.test_session() as sess: @@ -280,7 +294,7 @@ class FullyConnectedTest(tf.test.TestCase): def _unknown_dim_invalid_input(self, last_dim): x = tf.placeholder(tf.float32, shape=[3, last_dim]) - tf.contrib.layers.fully_connected(x, 2, activation_fn=None) + tf.contrib.layers.legacy_fully_connected(x, 2, activation_fn=None) def test_known_dim_valid_input(self): self._unknown_dim_invalid_input(last_dim=3) @@ -295,7 +309,9 @@ class FullyConnectedTest(tf.test.TestCase): with self.assertRaisesRegexp(ValueError, 'rank of x must be at least 2 not: 1'): x = tf.constant([[]], shape=[0]) - tf.contrib.layers.fully_connected(x, 2, activation_fn=tf.nn.softmax) + tf.contrib.layers.legacy_fully_connected(x, + 2, + activation_fn=tf.nn.softmax) class Convolution2dTest(tf.test.TestCase): @@ -308,8 +324,9 @@ class Convolution2dTest(tf.test.TestCase): assert not tf.get_collection(tf.GraphKeys.SUMMARIES) def test_basic_use(self): - output = tf.contrib.layers.convolution2d(self.input, 8, (3, 3), - activation_fn=tf.nn.relu) + output = tf.contrib.layers.legacy_convolution2d(self.input, + 8, (3, 3), + activation_fn=tf.nn.relu) with tf.Session() as sess: with self.assertRaises(tf.errors.FailedPreconditionError): @@ -328,17 +345,17 @@ class Convolution2dTest(tf.test.TestCase): def test_variable_reuse_with_scope(self): with tf.variable_scope('test') as vs: - output1 = tf.contrib.layers.convolution2d(self.input, - 8, (3, 3), - activation_fn=tf.nn.relu) - output2 = tf.contrib.layers.convolution2d(self.input, - 8, (3, 3), - activation_fn=tf.nn.relu) + output1 = tf.contrib.layers.legacy_convolution2d(self.input, + 8, (3, 3), + activation_fn=tf.nn.relu) + output2 = tf.contrib.layers.legacy_convolution2d(self.input, + 8, (3, 3), + activation_fn=tf.nn.relu) with tf.variable_scope(vs, reuse=True): - output3 = tf.contrib.layers.convolution2d(self.input, - 8, (3, 3), - activation_fn=tf.nn.relu) + output3 = tf.contrib.layers.legacy_convolution2d(self.input, + 8, (3, 3), + activation_fn=tf.nn.relu) with tf.Session() as sess: tf.initialize_all_variables().run() @@ -349,7 +366,7 @@ class Convolution2dTest(tf.test.TestCase): def test_variable_reuse_with_template(self): tmpl1 = tf.make_template('test', - tf.contrib.layers.convolution2d, + tf.contrib.layers.legacy_convolution2d, kernel_size=(3, 3), num_output_channels=8) output1 = tmpl1(self.input) @@ -361,10 +378,9 @@ class Convolution2dTest(tf.test.TestCase): self.assertAllClose(out_value1, out_value2) def test_custom_initializers(self): - output = tf.contrib.layers.convolution2d( + output = tf.contrib.layers.legacy_convolution2d( self.input, - 2, - (3, 3), + 2, (3, 3), activation_fn=tf.nn.relu, weight_init=tf.constant_initializer(2.0), bias_init=tf.constant_initializer(1.0), @@ -378,21 +394,22 @@ class Convolution2dTest(tf.test.TestCase): np.array([[[[1261., 1261.]]], [[[3853., 3853.]]]]), out_value) def test_custom_collections(self): - tf.contrib.layers.convolution2d(self.input, - 2, (3, 3), - activation_fn=tf.nn.relu, - weight_collections=['unbiased'], - bias_collections=['biased']) + tf.contrib.layers.legacy_convolution2d(self.input, + 2, (3, 3), + activation_fn=tf.nn.relu, + weight_collections=['unbiased'], + bias_collections=['biased']) self.assertEquals(1, len(tf.get_collection('unbiased'))) self.assertEquals(1, len(tf.get_collection('biased'))) def test_all_custom_collections(self): - tf.contrib.layers.convolution2d(self.input, - 2, (3, 3), - activation_fn=tf.nn.relu, - weight_collections=['unbiased', 'all'], - bias_collections=['biased', 'all']) + tf.contrib.layers.legacy_convolution2d( + self.input, + 2, (3, 3), + activation_fn=tf.nn.relu, + weight_collections=['unbiased', 'all'], + bias_collections=['biased', 'all']) self.assertEquals(1, len(tf.get_collection('unbiased'))) self.assertEquals(1, len(tf.get_collection('biased'))) @@ -407,15 +424,18 @@ class Convolution2dTest(tf.test.TestCase): cnt[0] += 1 return tensor - tf.contrib.layers.convolution2d(self.input, 2, (3, 3), - weight_regularizer=test_fn) + tf.contrib.layers.legacy_convolution2d(self.input, + 2, (3, 3), + weight_regularizer=test_fn) self.assertEqual([tensor], tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) self.assertEqual(1, cnt[0]) def test_no_bias(self): - tf.contrib.layers.convolution2d(self.input, 2, (3, 3), bias_init=None) + tf.contrib.layers.legacy_convolution2d(self.input, + 2, (3, 3), + bias_init=None) self.assertEqual(1, len(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))) ",0,train 3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size} PiperOrigin-RevId: 223521732",interpreter_test.cc,"@@ -566,7 +566,7 @@ TEST(BasicInterpreter, ThreeStepAllocate) { DynamicBuffer buf; StringRef str_ref = GetString(input, 0); buf.AddString(str_ref); - buf.WriteToTensor(output); + buf.WriteToTensorAsVector(output); return kTfLiteOk; }; ",0,train 3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size} PiperOrigin-RevId: 223521732",tensor_jni.cc,"@@ -278,7 +278,7 @@ void WriteMultiDimensionalStringArray(JNIEnv* env, jobject src, tflite::DynamicBuffer dst_buffer; PopulateStringDynamicBuffer(env, src, &dst_buffer, tensor->dims->size); if (!env->ExceptionCheck()) { - dst_buffer.WriteToTensor(tensor); + dst_buffer.WriteToTensor(tensor, /*new_shape=*/nullptr); } } ",0,train 3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size} PiperOrigin-RevId: 223521732",gather.cc,"@@ -118,7 +118,7 @@ TfLiteStatus GatherStrings(TfLiteContext* context, const TfLiteTensor* input, const auto string_ref = GetString(input, pos); buffer.AddString(string_ref.str, string_ref.len); } - buffer.WriteToTensor(output); + buffer.WriteToTensorAsVector(output); return kTfLiteOk; } ",0,train 3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size} PiperOrigin-RevId: 223521732",hashtable_lookup.cc,"@@ -137,7 +137,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } } if (output->type == kTfLiteString) { - buf.WriteToTensor(output); + buf.WriteToTensorAsVector(output); } return kTfLiteOk; ",0,train 3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size} PiperOrigin-RevId: 223521732",skip_gram.cc,"@@ -107,7 +107,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // Generate n-grams recursively. tflite::DynamicBuffer buf; if (words.size() < params->ngram_size) { - buf.WriteToTensor(GetOutput(context, node, 0)); + buf.WriteToTensorAsVector(GetOutput(context, node, 0)); return kTfLiteOk; } @@ -145,7 +145,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } } - buf.WriteToTensor(GetOutput(context, node, 0)); + buf.WriteToTensorAsVector(GetOutput(context, node, 0)); return kTfLiteOk; } } // namespace ",0,train 3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size} PiperOrigin-RevId: 223521732",test_util.h,"@@ -199,7 +199,7 @@ class SingleOpModel { for (const string& s : content) { buf.AddString(s.data(), s.length()); } - buf.WriteToTensor(tensor); + buf.WriteToTensor(tensor, /*new_shape=*/nullptr); } // Populate the tensor given its index. ",0,train 3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size} PiperOrigin-RevId: 223521732",normalize.cc,"@@ -92,7 +92,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { tflite::DynamicBuffer buf; buf.AddString(result.data(), result.length()); - buf.WriteToTensor(GetOutput(context, node, 0)); + buf.WriteToTensorAsVector(GetOutput(context, node, 0)); return kTfLiteOk; } ",0,train 3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size} PiperOrigin-RevId: 223521732",predictor.cc,"@@ -49,7 +49,7 @@ void ExecuteTfLite(const std::string& sentence, TfLiteTensor* input = interpreter->tensor(interpreter->inputs()[0]); tflite::DynamicBuffer buf; buf.AddString(sentence.data(), sentence.length()); - buf.WriteToTensor(input); + buf.WriteToTensorAsVector(input); interpreter->AllocateTensors(); interpreter->Invoke(); ",0,train 3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size} PiperOrigin-RevId: 223521732",string_util.cc,"@@ -96,8 +96,7 @@ int DynamicBuffer::WriteToBuffer(char** buffer) { return bytes; } -void DynamicBuffer::WriteToTensor(TfLiteTensor* tensor) { - // Set tensor content pointer to tensor_buffer, and release original data. +void DynamicBuffer::WriteToTensorAsVector(TfLiteTensor* tensor) { auto dims = TfLiteIntArrayCreate(1); dims->data[0] = offset_.size() - 1; // Store number of strings. WriteToTensor(tensor, dims); @@ -108,6 +107,10 @@ void DynamicBuffer::WriteToTensor(TfLiteTensor* tensor, char* tensor_buffer; int bytes = WriteToBuffer(&tensor_buffer); + if (new_shape == nullptr) { + new_shape = TfLiteIntArrayCopy(tensor->dims); + } + // Set tensor content pointer to tensor_buffer, and release original data. TfLiteTensorReset(tensor->type, tensor->name, new_shape, tensor->params, tensor_buffer, bytes, kTfLiteDynamic, tensor->allocation, ",0,train 3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size} PiperOrigin-RevId: 223521732",string_util.h,"@@ -74,12 +74,18 @@ class DynamicBuffer { // The function allocates space for the buffer but does NOT take ownership. int WriteToBuffer(char** buffer); - // Fill content into a string tensor, with the given new_shape. The new - // shape must match the number of strings in this object. + // Fill content into a string tensor, with the given new_shape. The new shape + // must match the number of strings in this object. Caller relinquishes + // ownership of new_shape. If 'new_shape' is nullptr, keep the tensor's + // existing shape. void WriteToTensor(TfLiteTensor* tensor, TfLiteIntArray* new_shape); // Fill content into a string tensor. Set shape to {num_strings}. - void WriteToTensor(TfLiteTensor* tensor); + void WriteToTensorAsVector(TfLiteTensor* tensor); + + // Deprecated. Use WriteToTensorAsVector() or pass in the new shpe. + // TODO(b/120230709): remove when people migrate away. + void WriteToTensor(TfLiteTensor* tensor) { WriteToTensorAsVector(tensor); } private: // Data buffer to store contents of strings, not including headers. ",0,train 3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size} PiperOrigin-RevId: 223521732",string_util_test.cc,"@@ -55,7 +55,7 @@ TEST(StringUtil, TestStringUtil) { new_shape->data[0] = 2; new_shape->data[1] = 1; buf0.WriteToTensor(t0, new_shape); - buf1.WriteToTensor(t1); + buf1.WriteToTensorAsVector(t1); // Check tensor shapes. EXPECT_EQ(t0->dims->size, 2); @@ -99,7 +99,7 @@ TEST(StringUtil, TestAddJoinedString) { DynamicBuffer buf; buf.AddJoinedString({{s0, 3}, {s1, 4}, {s2, 0}, {s3, 3}}, ' '); - buf.WriteToTensor(t0); + buf.WriteToTensorAsVector(t0); ASSERT_EQ(GetStringCount(t0), 1); StringRef str_ref; @@ -115,7 +115,7 @@ TEST(StringUtil, TestEmptyList) { t0->type = kTfLiteString; t0->allocation_type = kTfLiteDynamic; DynamicBuffer buf; - buf.WriteToTensor(t0); + buf.WriteToTensorAsVector(t0); ASSERT_EQ(GetStringCount(t0), 0); ASSERT_EQ(t0->bytes, 8); ",0,train 3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size} PiperOrigin-RevId: 223521732",benchmark_tflite_model.cc,"@@ -279,7 +279,7 @@ void BenchmarkTfLiteModel::PrepareInputsAndOutputs() { FillRandomString(&buffer, sizes, []() { return ""we're have some friends over saturday to hang out in the yard""; }); - buffer.WriteToTensor(interpreter->tensor(i)); + buffer.WriteToTensor(interpreter->tensor(i), /*new_shape=*/nullptr); } else { TFLITE_LOG(FATAL) << ""Don't know how to populate tensor "" << t->name << "" of type "" << t->type; ",0,train 9a7e849472c954470de889cc8873223e4db1e4df,tensorflow/tensorflow,"* Passing `training_features` (without weight column) instead of `features` into GradientBoostedDecisionTreeModel. * Export GTFlow model into generic format with features defined in proto. PiperOrigin-RevId: 171766066",custom_export_strategy.py,"@@ -96,7 +96,8 @@ def make_custom_export_strategy(name, def convert_to_universal_format(dtec, sorted_feature_names, num_dense, num_sparse_float, - num_sparse_int): + num_sparse_int, + feature_name_to_proto=None): """"""Convert GTFlow trees to universal format."""""" del num_sparse_int # unused. model_and_features = generic_tree_model_pb2.ModelAndFeatures() @@ -104,7 +105,11 @@ def convert_to_universal_format(dtec, sorted_feature_names, # feature is processed before it's fed to the model (e.g. bucketing # information). As of now, this serves as a list of features the model uses. for feature_name in sorted_feature_names: - model_and_features.features[feature_name].SetInParent() + if not feature_name_to_proto: + model_and_features.features[feature_name].SetInParent() + else: + model_and_features.features[feature_name].CopyFrom( + feature_name_to_proto[feature_name]) model = model_and_features.model model.ensemble.summation_combination_technique.SetInParent() for tree_idx in range(len(dtec.trees)): ",0,train 9a7e849472c954470de889cc8873223e4db1e4df,tensorflow/tensorflow,"* Passing `training_features` (without weight column) instead of `features` into GradientBoostedDecisionTreeModel. * Export GTFlow model into generic format with features defined in proto. PiperOrigin-RevId: 171766066",model.py,"@@ -93,7 +93,7 @@ def model_builder(features, labels, mode, params, config): learner_config=learner_config, feature_columns=feature_columns, logits_dimension=head.logits_dimension, - features=features) + features=training_features) with ops.name_scope(""gbdt"", ""gbdt_optimizer""): predictions_dict = gbdt_model.predict(mode) logits = predictions_dict[""predictions""] ",0,train fe15ce0d733794491fea0d51589dd7c7c779ff60,tensorflow/tensorflow,"Fix broken test after changing the constructor of Converter. PiperOrigin-RevId: 272527750",convert_nodes_test.cc,"@@ -657,7 +657,7 @@ class ConverterTest : public ::testing::Test { builder_.reset(nvinfer1::createInferBuilder(logger_)); network_.reset(builder_->createNetwork()); converter_.reset(new Converter(network_.get(), TrtPrecisionMode::FP32, - /*use_calibration=*/false)); + /*use_calibration=*/false, &logger_)); weight_store_ = &converter_->weight_store_; } @@ -995,8 +995,9 @@ TEST_F(ConverterTest, MaybeApplyQuantizationRanges) { // input -> infer1 -> infer2 -> infer3 FakeITensor input, infer_1, infer_2, infer_3; FakeITensor not_infer; + Logger logger; Converter int8_converter(/*trt_network=*/nullptr, TrtPrecisionMode::INT8, - /*use_calibration=*/true); + /*use_calibration=*/true, &logger); int8_converter.ProvideQuantizationRange(&input, -5.0f, 5.0f); int8_converter.ProvideQuantizationRange(¬_infer, -100.0f, 100.0f); int8_converter.MarkQuantizationRangesAsInferrable(&input, &infer_1); @@ -1257,7 +1258,7 @@ class OpConverterTest : public ::testing::Test { // Reset the converter. converter_.reset(new Converter(network_.get(), precision_mode_to_test_, - /*use_calibration=*/false)); + /*use_calibration=*/false, &logger_)); // Reset other related artifacts. scope_ = Scope::NewRootScope(); ",0,train 075b37f91926e92aa3305ce12982f0128a59c0c6,tensorflow/tensorflow,"Include traceback for distributed variables. Otherwise errors in variable creation fail to point to the correct line of code. PiperOrigin-RevId: 256075666",values.py,"@@ -591,7 +591,7 @@ def _enter_or_assert_strategy(strategy): DistributedVarOp = collections.namedtuple( - ""DistributedVarOp"", [""name"", ""graph"", ""type""]) + ""DistributedVarOp"", [""name"", ""graph"", ""traceback"", ""type""]) class DistributedVariable(DistributedDelegate, variables_lib.AbstractVariable): @@ -757,6 +757,7 @@ class DistributedVariable(DistributedDelegate, variables_lib.AbstractVariable): if distribution_strategy_context.in_cross_replica_context(): return DistributedVarOp(self.primary.op.name, self.primary.op.graph, + self.primary.op.traceback, self.primary.op.type) return self.get().op @@ -885,7 +886,8 @@ class TPUVariableMixin(object): @property def op(self): return DistributedVarOp( - self.primary.op.name, self.primary.op.graph, self.primary.op.type) + self.primary.op.name, self.primary.op.graph, self.primary.op.traceback, + self.primary.op.type) def _dense_var_to_tensor(self, dtype=None, name=None, as_ref=False): """"""Converts a variable to a tensor."""""" ",0,train 075b37f91926e92aa3305ce12982f0128a59c0c6,tensorflow/tensorflow,"Include traceback for distributed variables. Otherwise errors in variable creation fail to point to the correct line of code. PiperOrigin-RevId: 256075666",values_test.py,"@@ -644,6 +644,24 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase): after_restore = self.evaluate(v) self.assertAllClose(before_save, after_restore) + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_one_cpu, + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + strategy_combinations.tpu_strategy, + strategy_combinations.central_storage_strategy_with_two_gpus, + ], + mode=[""graph""])) + def testTraceback(self, distribution): + with distribution.scope(): + variable_scope.get_variable( + name=""testVar"", initializer=1., use_resource=True) + with self.assertRaisesRegex( + ValueError, ""Variable testVar already exists""): + variable_scope.get_variable( + name=""testVar"", initializer=1., use_resource=True) + _TPU_STRATEGIES = (tpu_strategy.TPUStrategy, tpu_strategy.TPUStrategyV1) ",0,train cdb9f312f1a00e3fb90f14d79aca2fa9dcab8f21,tensorflow/tensorflow,"Add field to HLO proto field to enable reversing a convolution filter. PiperOrigin-RevId: 176149369",computation_builder.cc,"@@ -153,6 +153,7 @@ bool ComputationBuilder::MakeWindow( } else { dim->set_window_dilation(1); } + dim->set_window_reversal(false); } return true; } ",0,train cdb9f312f1a00e3fb90f14d79aca2fa9dcab8f21,tensorflow/tensorflow,"Add field to HLO proto field to enable reversing a convolution filter. PiperOrigin-RevId: 176149369",hlo_evaluator.cc,"@@ -814,7 +814,9 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { } rhs_index[dnums.kernel_spatial_dimensions(ki)] = - rhs_spatial_index[ki]; + window_dim.window_reversal() + ? ((window_dim.size() - 1) - rhs_spatial_index[ki]) + : rhs_spatial_index[ki]; } result_val += lhs_literal.Get(lhs_index) * ",0,train cdb9f312f1a00e3fb90f14d79aca2fa9dcab8f21,tensorflow/tensorflow,"Add field to HLO proto field to enable reversing a convolution filter. PiperOrigin-RevId: 176149369",hlo_evaluator_test.cc,"@@ -794,6 +794,83 @@ TEST_F(HloEvaluatorTest, Simple4x4Conv2DWith2x2Kernel) { LiteralTestUtil::ExpectEqual(*expected, *result); } +TEST_F(HloEvaluatorTest, Conv2DGeneralDimensionsReversed) { + HloComputation::Builder b(TestName()); + + // clang-format off + // Input dimensions: [feature=2, height=3, batch=1, width=4] + Array4D input({ + {{{1, 2, 3, 4}}, + {{5, 6, 7, 8}}, + {{9, 10, 11, 12}}}, + {{{13, 14, 15, 16}}, + {{17, 18, 19, 20}}, + {{21, 22, 23, 24}}} + }); + // Weight dimensions: + // [kernel_output_feature=1, width=3, kernel_input_feature=2, height=3] + Array4D weight({{ + {{1, 7, 13}, + {4, 10, 16}}, + {{2, 8, 14}, + {5, 11, 17}}, + {{3, 9, 15}, + {6, 12, 18}} + }}); + // clang-format on + + auto lhs_literal = Literal::CreateR4FromArray4D(input); + HloInstruction* lhs_instruction = + b.AddInstruction(HloInstruction::CreateConstant(std::move(lhs_literal))); + + auto rhs_literal = Literal::CreateR4FromArray4D(weight); + HloInstruction* rhs_instruction = + b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs_literal))); + rhs_instruction = b.AddInstruction(HloInstruction::CreateReverse( + rhs_instruction->shape(), rhs_instruction, {3, 1})); + + Window window; + WindowDimension dim; + dim.set_size(3); + dim.set_stride(1); + dim.set_padding_low(0); + dim.set_padding_high(0); + dim.set_window_dilation(1); + dim.set_base_dilation(1); + dim.set_window_reversal(true); + *window.add_dimensions() = dim; + *window.add_dimensions() = dim; + + ConvolutionDimensionNumbers dnums; + dnums.set_input_batch_dimension(2); + dnums.set_output_batch_dimension(2); + dnums.set_input_feature_dimension(0); + dnums.set_output_feature_dimension(0); + dnums.add_spatial_dimensions(1); + dnums.add_spatial_dimensions(3); + + dnums.set_kernel_output_feature_dimension(0); + dnums.set_kernel_input_feature_dimension(2); + dnums.add_kernel_spatial_dimensions(3); + dnums.add_kernel_spatial_dimensions(1); + + const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); + b.AddInstruction(HloInstruction::CreateConvolve( + shape, lhs_instruction, rhs_instruction, window, dnums)); + auto computation = module().AddEntryComputation(b.Build()); + + std::unique_ptr result = + evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie(); + + // clang-format off + // Result dimensions: [feature=1, height=1, batch=1, width=2] + Array4D expected_array({{{{2514, 2685}}}}); + // clang-format on + auto expected = Literal::CreateR4FromArray4D(expected_array); + + LiteralTestUtil::ExpectEqual(*expected, *result); +} + TEST_F(HloEvaluatorTest, Conv2DGeneralDimensions) { HloComputation::Builder b(TestName()); ",0,train cdb9f312f1a00e3fb90f14d79aca2fa9dcab8f21,tensorflow/tensorflow,"Add field to HLO proto field to enable reversing a convolution filter. PiperOrigin-RevId: 176149369",window_util.cc,"@@ -44,6 +44,9 @@ namespace window_util { if (dim.window_dilation() != 1) { StrAppend(&str, "",window_dilation="", dim.window_dilation()); } + if (dim.window_reversal()) { + StrAppend(&str, "",window_reversal""); + } StrAppend(&str, "")""); return str; } ",0,train 2a85bf4a14cf02f7b9cc6258c750f5f0e9fb385c,tensorflow/tensorflow,"Fix minimal logging build for macos PiperOrigin-RevId: 315321111 Change-Id: I205b82403e663bc415156cbe7e1d82e3b8866e93",platform.h,"@@ -34,6 +34,7 @@ limitations under the License. #define PLATFORM_POSIX_IOS #define IS_MOBILE_PLATFORM #else +// If no platform specified, use: #define PLATFORM_POSIX #endif ",0,train 2a85bf4a14cf02f7b9cc6258c750f5f0e9fb385c,tensorflow/tensorflow,"Fix minimal logging build for macos PiperOrigin-RevId: 315321111 Change-Id: I205b82403e663bc415156cbe7e1d82e3b8866e93",model_test.cc,"@@ -18,10 +18,6 @@ limitations under the License. #include #include #include -#include -#include -#include - #include #include ",0,train ec4f65aa468c6f304d67693850b846a5bc5d059b,tensorflow/tensorflow,"Always validate `node_id` range PiperOrigin-RevId: 411133308 Change-Id: I917cf026132d2374abdb8d6e06c8a925d031a74c",stats_ops.cc,"@@ -1187,8 +1187,10 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel { f_map.clear(); } previous_node_id = node_id; - DCHECK_LE(node_id_first, node_id); - DCHECK_LT(node_id, node_id_last); + OP_REQUIRES( + context, node_id_first <= node_id && node_id < node_id_last, + errors::InvalidArgument(""node_id = "", node_id, "" which is not in ["", + node_id_first, "", "", node_id_last, "")"")); const int32_t feature_dim = stats_summary_indices(idx, 1); const int32_t bucket_id = stats_summary_indices(idx, 2); const int32_t stat_dim = stats_summary_indices(idx, 3); ",0,train 74a6cca5d867d37e79ec9d780f2c57b926f07a80,tensorflow/tensorflow,"Removed a linear scan in dtypes.as_dtype PiperOrigin-RevId: 229152423",dtypes.py,"@@ -535,29 +535,31 @@ _np_qint32 = np.dtype([(""qint32"", np.int32, 1)]) np_resource = np.dtype([(""resource"", np.ubyte, 1)]) # Standard mappings between types_pb2.DataType values and numpy.dtypes. -_NP_TO_TF = frozenset([ - (np.float16, float16), - (np.float32, float32), - (np.float64, float64), - (np.int32, int32), - (np.int64, int64), - (np.uint8, uint8), - (np.uint16, uint16), - (np.uint32, uint32), - (np.uint64, uint64), - (np.int16, int16), - (np.int8, int8), - (np.complex64, complex64), - (np.complex128, complex128), - (np.object_, string), - (np.bool_, bool), - (_np_qint8, qint8), - (_np_quint8, quint8), - (_np_qint16, qint16), - (_np_quint16, quint16), - (_np_qint32, qint32), - (_np_bfloat16, bfloat16), -]) +_NP_TO_TF = { + np.float16: float16, + np.float32: float32, + np.float64: float64, + np.int32: int32, + np.int64: int64, + np.uint8: uint8, + np.uint16: uint16, + np.uint32: uint32, + np.uint64: uint64, + np.int16: int16, + np.int8: int8, + np.complex64: complex64, + np.complex128: complex128, + np.object_: string, + np.string_: string, + np.unicode_: string, + np.bool_: bool, + _np_qint8: qint8, + _np_quint8: quint8, + _np_qint16: qint16, + _np_quint16: quint16, + _np_qint32: qint32, + _np_bfloat16: bfloat16, +} _TF_TO_NP = { types_pb2.DT_HALF: np.float16, @@ -664,6 +666,20 @@ _PYTHON_TO_TF = { builtins.object: string } +_ANY_TO_TF = {} +_ANY_TO_TF.update(_INTERN_TABLE) +_ANY_TO_TF.update(_STRING_TO_TF) +_ANY_TO_TF.update(_PYTHON_TO_TF) +_ANY_TO_TF.update(_NP_TO_TF) + +# Ensure no collisions. +assert len(_ANY_TO_TF) == sum(len(d) for d in [ + _INTERN_TABLE, + _STRING_TO_TF, + _PYTHON_TO_TF, + _NP_TO_TF +]) + @tf_export(""dtypes.as_dtype"", ""as_dtype"") def as_dtype(type_value): @@ -684,36 +700,16 @@ def as_dtype(type_value): if isinstance(type_value, DType): return type_value - try: - return _INTERN_TABLE[type_value] - except KeyError: - pass - - try: - return _STRING_TO_TF[type_value] - except KeyError: - pass + if isinstance(type_value, np.dtype): + try: + return _NP_TO_TF[type_value.type] + except KeyError: + pass try: - return _PYTHON_TO_TF[type_value] + return _ANY_TO_TF[type_value] except KeyError: pass - if isinstance(type_value, np.dtype): - # The numpy dtype for strings is variable length. We can not compare - # dtype with a single constant (np.string does not exist) to decide - # dtype is a ""string"" type. We need to compare the dtype.type to be - # sure it's a string type. - if type_value.type == np.string_ or type_value.type == np.unicode_: - return string - - if isinstance(type_value, (type, np.dtype)): - for key, val in _NP_TO_TF: - try: - if key == type_value: - return val - except TypeError as e: - raise TypeError(""Cannot convert {} to a dtype. {}"".format( - type_value, e)) - - raise TypeError(""Cannot convert value %r to a TensorFlow DType."" % type_value) + raise TypeError( + ""Cannot convert value %r to a TensorFlow DType."" % type_value) ",0,train d7527088595cbc89778ee8d1b3e8361be0cb75cf,tensorflow/tensorflow,"Fix ""Converting DataType 'INVALID' to MLIR Type"" bug PiperOrigin-RevId: 276387998 Change-Id: Ide7dd335e1d1c1463e318ae39de3ac84a9aeeddf",graphdef_to_tfl_flatbuffer.cc,"@@ -106,10 +106,30 @@ Status ConvertGraphDefToTFLiteFlatBuffer(const toco::ModelFlags& model_flags, std::vector node_maxs; tensorflow::DataType inference_type = ConvertIODataTypeToDataType(toco_flags.inference_type()); + + // Build a map from placeholder to data types. + llvm::StringMap placeholder_data_type_map; + for (const NodeDef& node_def : input.node()) { + if (node_def.op() == ""Placeholder"" && node_def.attr().count(""dtype"") > 0) { + placeholder_data_type_map[node_def.name()] = + node_def.attr().at(""dtype"").type(); + } + } + for (auto& flag : model_flags.input_arrays()) { + // TOCO doesn't required `data_type` to be filled for every input. + // If it's not filled, try to get the data type from the placeholder. + auto toco_data_type = flag.data_type(); + DataType data_type; + if (toco_data_type == ::toco::IODataType::IO_DATA_TYPE_UNKNOWN && + placeholder_data_type_map.find(flag.name()) != + placeholder_data_type_map.end()) { + data_type = placeholder_data_type_map[flag.name()]; + } else { + data_type = ConvertIODataTypeToDataType(toco_data_type); + } node_names.push_back(flag.name()); - node_dtypes.push_back( - DataType_Name(ConvertIODataTypeToDataType(flag.data_type()))); + node_dtypes.push_back(DataType_Name(data_type)); node_shapes.push_back(std::vector(flag.shape().dims().begin(), flag.shape().dims().end())); ",0,train 7448a2b927cda64446a57bca785ac2b58a16cf12,tensorflow/tensorflow,"Don't strip default attributes when sending from master to workers PiperOrigin-RevId: 287220931 Change-Id: Ide0a4131bd35952ed63129b1112626d3683d885b",master_session.cc,"@@ -31,7 +31,6 @@ limitations under the License. #include ""tensorflow/core/framework/allocation_description.pb.h"" #include ""tensorflow/core/framework/collective.h"" #include ""tensorflow/core/framework/cost_graph.pb.h"" -#include ""tensorflow/core/framework/graph_def_util.h"" #include ""tensorflow/core/framework/node_def.pb.h"" #include ""tensorflow/core/framework/node_def_util.h"" #include ""tensorflow/core/framework/tensor.h"" @@ -473,8 +472,8 @@ Status MasterSession::ReffedClientGraph::DoRegisterPartitions( c->req.set_session_handle(session_handle_); c->req.set_create_worker_session_called(!should_deregister_); c->req.mutable_graph_def()->Swap(&graph_partitions[part.name]); - StripDefaultAttributes(*OpRegistry::Global(), - c->req.mutable_graph_def()->mutable_node()); + // TODO(b/146354085): Default attributes should be stripped here from + // c->req.graph_def(), but this causes some TFX pipelines to fail. *c->req.mutable_config_proto() = session_opts_.config; *c->req.mutable_graph_options() = session_opts_.config.graph_options(); *c->req.mutable_debug_options() = ",0,train 79a93ac627b9af8ae84a874ce248fe42aac8de36,tensorflow/tensorflow,"Support partitioned embedding lookup for resource variables. Change: 146034474",resource_variable_ops.cc,"@@ -230,11 +230,7 @@ REGISTER_KERNEL_BUILDER(Name(""VarIsInitializedOp"").Device(DEVICE_GPU), template class ResourceGatherOp : public OpKernel { public: - explicit ResourceGatherOp(OpKernelConstruction* c) : OpKernel(c) { - const DataType dt = DataTypeToEnum::v(); - const DataType index_t = DataTypeToEnum::v(); - OP_REQUIRES_OK(c, c->MatchSignature({dt, index_t}, {dt})); - } + explicit ResourceGatherOp(OpKernelConstruction* c) : OpKernel(c) {} void Compute(OpKernelContext* c) override { Var* v = nullptr; ",0,test 79a93ac627b9af8ae84a874ce248fe42aac8de36,tensorflow/tensorflow,"Support partitioned embedding lookup for resource variables. Change: 146034474",embedding_ops_test.py,"@@ -162,7 +162,8 @@ def _EmbeddingParams(num_shards, def _EmbeddingParamsAsPartitionedVariable(num_shards, vocab_size, dtype=dtypes.float32, - shape=None): + shape=None, + use_resource=False): p, params, feed_dict = _EmbeddingParams( num_shards, vocab_size, dtype=dtype, shape=shape) shape = shape or [10] @@ -171,7 +172,8 @@ def _EmbeddingParamsAsPartitionedVariable(num_shards, shape=[vocab_size] + shape, initializer=array_ops.concat([params[p_i.name] for p_i in p], 0), partitioner=partitioned_variables.min_max_variable_partitioner( - max_partitions=num_shards, min_slice_size=1)) + max_partitions=num_shards, min_slice_size=1), + use_resource=use_resource) return p, partitioned_variable, params, feed_dict @@ -300,6 +302,29 @@ class EmbeddingLookupTest(test.TestCase): self.assertAllEqual(np_result, tf_result) self.assertShapeEqual(np_result, embedding) + def testSimpleShardedPartitionedResourceVariable(self): + with self.test_session() as sess: + num_shards = 2 + vocab_size = 4 + p, p_variable, params, _ = _EmbeddingParamsAsPartitionedVariable( + num_shards, vocab_size, use_resource=True) + + id_vals = np.array([0, 0]) + ids = constant_op.constant(list(id_vals), dtype=dtypes.int32) + print(""Construct ids"", ids.get_shape()) + embedding = embedding_ops.embedding_lookup(p_variable, ids) + variables.global_variables_initializer().run() + params_values = [params[p_i.name] for p_i in p] + # Test that the PartitionedVariable components equal the list in p + p_var_val = sess.run(list(p_variable)) + # Actual test + print(ops.get_default_graph().as_graph_def()) + tf_result = embedding.eval() + np_result, _, _ = _EmbeddingResult(params, id_vals, num_shards, vocab_size) + self.assertAllEqual(params_values, p_var_val) + self.assertAllEqual(np_result, tf_result) + self.assertShapeEqual(np_result, embedding) + def testShardedModPartitioningInt32Ids(self): with self.test_session(): num_shards = 5 ",0,test 79a93ac627b9af8ae84a874ce248fe42aac8de36,tensorflow/tensorflow,"Support partitioned embedding lookup for resource variables. Change: 146034474",embedding_ops.py,"@@ -33,6 +33,14 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import tf_logging as logging +def _do_gather(params, ids, validate_indices=True, name=None): + """"""Deals with doing gather differently for resource variables."""""" + if isinstance(params, resource_variable_ops.ResourceVariable): + return params.sparse_read(ids, name=name) + return array_ops.gather( + params, ids, name=name, validate_indices=validate_indices) + + def embedding_lookup(params, ids, partition_strategy=""mod"", name=None, validate_indices=True, max_norm=None): """"""Looks up `ids` in a list of embedding tensors. @@ -100,16 +108,15 @@ def embedding_lookup(params, ids, partition_strategy=""mod"", name=None, return x with ops.name_scope(name, ""embedding_lookup"", params + [ids]) as name: np = len(params) # Number of partitions - params = ops.convert_n_to_tensor_or_indexed_slices(params, name=""params"") + # Preserve the resource variable status to avoid accidental dense reads. + if not any(isinstance(p, resource_variable_ops.ResourceVariable) + for p in params): + params = ops.convert_n_to_tensor_or_indexed_slices(params, name=""params"") if np == 1: with ops.colocate_with(params[0]): - # TODO(apassos): implement the sharded version as well. - if isinstance(params[0], resource_variable_ops.ResourceVariable): - ret = params[0].sparse_read(ids, name=name) - else: - ret = array_ops.gather(params[0], ids, name=name, - validate_indices=validate_indices) - return maybe_normalize(ret) + return maybe_normalize( + _do_gather( + params[0], ids, validate_indices=validate_indices, name=name)) else: ids = ops.convert_to_tensor(ids, name=""ids"") flat_ids = array_ops.reshape(ids, [-1]) @@ -169,9 +176,9 @@ def embedding_lookup(params, ids, partition_strategy=""mod"", name=None, partitioned_result = [] for p in xrange(np): with ops.colocate_with(params[p]): - partitioned_result.append(array_ops.gather( - params[p], gather_ids[p], - validate_indices=validate_indices)) + partitioned_result.append( + _do_gather(params[p], gather_ids[p], + validate_indices=validate_indices)) # Stitch these back together ret = data_flow_ops.dynamic_stitch(pindices, partitioned_result, name=name) ",0,test 79a93ac627b9af8ae84a874ce248fe42aac8de36,tensorflow/tensorflow,"Support partitioned embedding lookup for resource variables. Change: 146034474",resource_variable_ops.py,"@@ -241,9 +241,9 @@ class ResourceVariable(object): def sparse_read(self, indices, collections=None, trainable=True, name=None): """"""Reads the value of this variable sparsely, using `gather`."""""" - with ops.name_scope(""Gather"" if name is None else name): + with ops.name_scope(""Gather"" if name is None else name) as name: value = gen_resource_variable_ops.resource_gather( - self._handle, indices, dtype=self._dtype) + self._handle, indices, dtype=self._dtype, name=name) _register_variable_read(value, collections=collections, trainable=trainable) return array_ops.identity(value) ",0,test 2f15f9a594730757898ee5ece214135d45de212f,tensorflow/tensorflow,"NFC: Update signature of ConvertDotDimensionNumbers Planning to add ConvertConvDimensionNumbers in a follow-up change. PiperOrigin-RevId: 273624510",hlo_function_importer.cc,"@@ -92,13 +92,12 @@ StatusOr CreateDenseAttrFromLiteral(ShapedType type, // Returns whether the instruction is a default dot operation. bool DotIsDefault(const HloInstruction* instruction) { - auto dot_dimensions = instruction->dot_dimension_numbers(); + auto dnums = instruction->dot_dimension_numbers(); DotDimensionNumbers default_dimension_numbers; default_dimension_numbers.add_lhs_contracting_dimensions( instruction->operand(0)->shape().dimensions_size() == 1 ? 0 : 1); default_dimension_numbers.add_rhs_contracting_dimensions(0); - return xla::protobuf_util::ProtobufEquals(dot_dimensions, - default_dimension_numbers); + return xla::protobuf_util::ProtobufEquals(dnums, default_dimension_numbers); } } // namespace @@ -250,8 +249,8 @@ StatusOr HloFunctionImporter::ImportInstruction( MakeAndReturn(DotOp); } - attributes.push_back(builder_->getNamedAttr( - ""dot_dimension_numbers"", ConvertDotDimensionNumbers(instruction))); + attributes.push_back( + ConvertDotDimensionNumbers(instruction->dot_dimension_numbers())); MakeAndReturn(DotGeneralOp); } case HloOpcode::kCall: { @@ -581,21 +580,18 @@ mlir::DenseIntElementsAttr HloFunctionImporter::Convert( .cast(); } -mlir::xla_hlo::DotDimensionNumbers -HloFunctionImporter::ConvertDotDimensionNumbers(HloInstruction* instruction) { - auto dot_dimensions = instruction->dot_dimension_numbers(); +mlir::NamedAttribute HloFunctionImporter::ConvertDotDimensionNumbers( + const DotDimensionNumbers& dnums) { std::vector rhs_contracting_dimensions( - dot_dimensions.rhs_contracting_dimensions().begin(), - dot_dimensions.rhs_contracting_dimensions().end()); + dnums.rhs_contracting_dimensions().begin(), + dnums.rhs_contracting_dimensions().end()); std::vector lhs_contracting_dimensions( - dot_dimensions.lhs_contracting_dimensions().begin(), - dot_dimensions.lhs_contracting_dimensions().end()); + dnums.lhs_contracting_dimensions().begin(), + dnums.lhs_contracting_dimensions().end()); std::vector rhs_batch_dimensions( - dot_dimensions.rhs_batch_dimensions().begin(), - dot_dimensions.rhs_batch_dimensions().end()); + dnums.rhs_batch_dimensions().begin(), dnums.rhs_batch_dimensions().end()); std::vector lhs_batch_dimensions( - dot_dimensions.lhs_batch_dimensions().begin(), - dot_dimensions.lhs_batch_dimensions().end()); + dnums.lhs_batch_dimensions().begin(), dnums.lhs_batch_dimensions().end()); // Push the attributes into our new DictionaryAttr. auto lhs_batch_dims_attr = Convert(lhs_batch_dimensions); @@ -603,9 +599,10 @@ HloFunctionImporter::ConvertDotDimensionNumbers(HloInstruction* instruction) { auto lhs_contracting_dims_attr = Convert(lhs_contracting_dimensions); auto rhs_contracting_dims_attr = Convert(rhs_contracting_dimensions); - return mlir::xla_hlo::DotDimensionNumbers::get( + auto attr = mlir::xla_hlo::DotDimensionNumbers::get( lhs_batch_dims_attr, rhs_batch_dims_attr, lhs_contracting_dims_attr, rhs_contracting_dims_attr, context_); + return builder_->getNamedAttr(""dot_dimension_numbers"", attr); } } // namespace xla ",0,train 2f15f9a594730757898ee5ece214135d45de212f,tensorflow/tensorflow,"NFC: Update signature of ConvertDotDimensionNumbers Planning to add ConvertConvDimensionNumbers in a follow-up change. PiperOrigin-RevId: 273624510",hlo_function_importer.h,"@@ -105,9 +105,9 @@ class HloFunctionImporter { // Converts Array ref to an DenseIntElementsAttr. mlir::DenseIntElementsAttr Convert(llvm::ArrayRef op_dimensions); - // Converts the dot dimensions to attributes. - mlir::xla_hlo::DotDimensionNumbers ConvertDotDimensionNumbers( - xla::HloInstruction* instruction); + // Converts the dot dimensions to attribute. + mlir::NamedAttribute ConvertDotDimensionNumbers( + const DotDimensionNumbers& dnums); mlir::MLIRContext* context_; mlir::ModuleOp module_; ",0,train 35614e1eb507a99df2a5d953f0c4d2dfb55efc2c,tensorflow/tensorflow,"Change QuantizeSoftmaxOutput to a template, dropping the unused output_data parameter. On ARM64, switch uint8_t to use std::round instead of add and truncate, it's faster. PiperOrigin-RevId: 300854002 Change-Id: Ie3bd951b9c1c7747e13e17be5c2ef59f07f10992",optimized_ops.h,"@@ -4071,16 +4071,20 @@ inline void Softmax(const SoftmaxParams& params, } } -inline int32_t QuantizeSoftmaxOutput(int8_t* output_data, float prob_rescaled, - int32_t zero_point) { +template +inline int32_t QuantizeSoftmaxOutput(float prob_rescaled, int32_t zero_point) { const int32_t prob_rnd = static_cast(std::round(prob_rescaled)); return prob_rnd + zero_point; } -inline int32_t QuantizeSoftmaxOutput(uint8_t* output_data, float prob_rescaled, - int32_t zero_point) { +#if !__aarch64__ +// With ARM64, rounding is faster than add + truncation. +template <> +inline int32_t QuantizeSoftmaxOutput(float prob_rescaled, + int32_t zero_point) { return static_cast(prob_rescaled + 0.5f); } +#endif inline void PopulateSoftmaxLookupTable(SoftmaxParams* data, float input_scale, float beta) { @@ -4123,7 +4127,7 @@ inline void Softmax(const SoftmaxParams& params, for (int j = 0; j < last_dim; ++j) { const float prob_rescaled = table_offset[input_data[j]] * inv_sum_exp; const int32_t prob_quantized = - QuantizeSoftmaxOutput(output_data, prob_rescaled, params.zero_point); + QuantizeSoftmaxOutput(prob_rescaled, params.zero_point); output_data[j] = static_cast( std::max(std::min(clamp_max, prob_quantized), clamp_min)); } ",0,test b3b6085d6f81fe6ad47a72e8289ed93f98952e8d,tensorflow/tensorflow,Fix a typo in input_producer documentation,input.py,"@@ -92,7 +92,7 @@ def input_producer(input_tensor, element_shape=None, num_epochs=None, """"""Output the rows of `input_tensor` to a queue for an input pipeline. Args: - input_tensor: A tensor with the rows to produce. Must be at + input_tensor: A tensor with the rows to produce. Must be at least one-dimensional. Must either have a fully-defined shape, or `element_shape` must be defined. element_shape: (Optional.) A `TensorShape` representing the shape of a ",0,test f46041c9937309ec09270f83b4153eb71105ce37,tensorflow/tensorflow,"Adds missing functions to LabeledTensor's __init__.py. Change: 148149131",__init__.py,"@@ -72,6 +72,8 @@ digamma = _core.digamma erf = _core.erf erfc = _core.erfc logical_not = _core.logical_not +tanh = _core.tanh +sigmoid = _core.sigmoid add = _core.add sub = _core.sub ",0,train dfb532cb5de2ea7d067ec42f5b81e02a2148c3ac,tensorflow/tensorflow,"Register GPU kernels for placeholder to make placer happy. Change: 123984360",constant_op.cc,"@@ -273,5 +273,10 @@ class PlaceholderOp : public OpKernel { }; REGISTER_KERNEL_BUILDER(Name(""Placeholder"").Device(DEVICE_CPU), PlaceholderOp); +// The following GPU kernel registration is used to address the situation that +// a placeholder is added in a GPU device context and soft placement is false. +// Since a placeholder should never be executed, adding these GPU kernels has +// no effect on graph execution. +REGISTER_KERNEL_BUILDER(Name(""Placeholder"").Device(DEVICE_GPU), PlaceholderOp); } // namespace tensorflow ",0,train 92b294fac35d905f62ac89d66eeaf6e99054f954,tensorflow/tensorflow,"Disable new segment reduction kernels by default - Doing this because a performance regression was observed in some cases (need to investigate further). - The new kernels are now only used if determinism is required or if TF_USE_DETERMINISTIC_SEGMENT_REDUCTIONS=1 is set. - This commit also fixes the determinism test on Windows.",segment_reduction_ops.h,"@@ -25,7 +25,7 @@ namespace tensorflow { class OpKernelContext; -bool UseNonDeterministicSegmentReductions(); +bool UseDeterministicSegmentReductions(); bool DisableSegmentReductionOpDeterminismExceptions(); // Type of SparseSegmentReduction operation to perform gradient of. ",0,train 92b294fac35d905f62ac89d66eeaf6e99054f954,tensorflow/tensorflow,"Disable new segment reduction kernels by default - Doing this because a performance regression was observed in some cases (need to investigate further). - The new kernels are now only used if determinism is required or if TF_USE_DETERMINISTIC_SEGMENT_REDUCTIONS=1 is set. - This commit also fixes the determinism test on Windows.",segment_reduction_ops_gpu.cu.h,"@@ -712,11 +712,22 @@ void SegmentReductionFunctor< const Index input_inner_dim_size = input_total_size / input_outer_dim_size; const Index num_segments = output.size() / input_inner_dim_size; + bool use_deterministic_kernels = +#if defined(PLATFORM_WINDOWS) + // See comment in segment_reduction_ops_gpu_0.cu.cc regarding Windows CI + // build error. + false; +#else + UseDeterministicSegmentReductions() || + (OpDeterminismRequired() && + !ReduceOpIsAssociative::value); +#endif + // TODO(benbarsdell): If there are no performance concerns with the new // deterministic kernels, remove this runtime check and only compile the old // non-deterministic kernels on Windows (as a workaround for the build failure // issue). - if (UseNonDeterministicSegmentReductions()) { + if (!use_deterministic_kernels) { // Set 'output' to initial value. GpuLaunchConfig config = GetGpuLaunchConfig(output.size(), d); const T InitialValue = InitialValueF()(); @@ -774,8 +785,8 @@ void SegmentReductionFunctor< /*indices=*/static_cast(nullptr), /*weights=*/static_cast(nullptr), output.data())); #else - // Note: Shouldn't reach here because UseNonDeterministicSegmentReductions() - // always returns true on Windows. + // Note: Shouldn't reach here because use_deterministic_kernels is always + // false on Windows. OP_REQUIRES(ctx, false, errors::Unimplemented(""Deterministic segment reductions are "" ""not implemented on Windows."")); @@ -794,8 +805,19 @@ struct UnsortedSegmentFunctor { return; } + bool use_deterministic_kernels = +#if defined(PLATFORM_WINDOWS) + // See comment in segment_reduction_ops_gpu_0.cu.cc regarding Windows CI + // build error. + false; +#else + UseDeterministicSegmentReductions() || + (!ReduceOpIsAssociative::value && + OpDeterminismRequired()); +#endif + bool determinism_requirement_met = - !UseNonDeterministicSegmentReductions() || + use_deterministic_kernels || ReduceOpIsAssociative::value || !OpDeterminismRequired() || DisableSegmentReductionOpDeterminismExceptions(); @@ -819,7 +841,7 @@ struct UnsortedSegmentFunctor { // deterministic kernels, remove this runtime check and only compile the old // non-deterministic kernels on Windows (as a workaround for the build // failure issue). - if (UseNonDeterministicSegmentReductions()) { + if (!use_deterministic_kernels) { // Set 'output' to initial value. GPUDevice d = ctx->template eigen_device(); GpuLaunchConfig config = GetGpuLaunchConfig(output.size(), d); @@ -876,8 +898,8 @@ struct UnsortedSegmentFunctor { /*segment_ids=*/segment_ids_ptr, /*indices=*/sorted_indices_ptr, /*weights=*/static_cast(nullptr), output.data())); #else - // Note: Shouldn't reach here because - // UseNonDeterministicSegmentReductions() always returns true on Windows. + // Note: Shouldn't reach here because use_deterministic_kernels is always + // false on Windows. OP_REQUIRES( ctx, false, errors::Unimplemented(""Deterministic unsorted segment reductions are "" ",0,train 92b294fac35d905f62ac89d66eeaf6e99054f954,tensorflow/tensorflow,"Disable new segment reduction kernels by default - Doing this because a performance regression was observed in some cases (need to investigate further). - The new kernels are now only used if determinism is required or if TF_USE_DETERMINISTIC_SEGMENT_REDUCTIONS=1 is set. - This commit also fixes the determinism test on Windows.",segment_reduction_ops_gpu_0.cu.cc,"@@ -20,19 +20,19 @@ limitations under the License. namespace tensorflow { -bool UseNonDeterministicSegmentReductions() { +bool UseDeterministicSegmentReductions() { // See comment below regarding CI build error on Windows. #if !defined(PLATFORM_WINDOWS) static bool cached_result = [] { bool result = false; TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar( - ""TF_USE_NONDETERMINISTIC_SEGMENT_REDUCTIONS"", + ""TF_USE_DETERMINISTIC_SEGMENT_REDUCTIONS"", /*default_val=*/false, &result)); return result; }(); return cached_result; #else - return true; + return false; #endif } ",0,train 92b294fac35d905f62ac89d66eeaf6e99054f954,tensorflow/tensorflow,"Disable new segment reduction kernels by default - Doing this because a performance regression was observed in some cases (need to investigate further). - The new kernels are now only used if determinism is required or if TF_USE_DETERMINISTIC_SEGMENT_REDUCTIONS=1 is set. - This commit also fixes the determinism test on Windows.",segment_reduction_ops_impl.h,"@@ -296,11 +296,22 @@ class SegmentReductionGPUOp : public AsyncOpKernel { OP_REQUIRES_OK_ASYNC( context, context->allocate_output(0, output_shape, &output), done); + bool use_deterministic_kernels = +#if defined(PLATFORM_WINDOWS) + // See comment in segment_reduction_ops_gpu_0.cu.cc regarding Windows + // CI build error. + false; +#else + UseDeterministicSegmentReductions() || + (!SegmentReductionFunctor::atomic_reduction_is_associative && + OpDeterminismRequired()); +#endif + // The determinism check is here, rather than inside the functor (as it is // for the unsorted segment reduction ops) because the done callback // (required for OP_REQUIRES_ASYNC) is not available inside the functor. bool determinism_requirement_met = - !UseNonDeterministicSegmentReductions() || + use_deterministic_kernels || SegmentReductionFunctor::atomic_reduction_is_associative || !OpDeterminismRequired() || DisableSegmentReductionOpDeterminismExceptions(); ",0,train 92b294fac35d905f62ac89d66eeaf6e99054f954,tensorflow/tensorflow,"Disable new segment reduction kernels by default - Doing this because a performance regression was observed in some cases (need to investigate further). - The new kernels are now only used if determinism is required or if TF_USE_DETERMINISTIC_SEGMENT_REDUCTIONS=1 is set. - This commit also fixes the determinism test on Windows.",segment_reduction_ops_deterministic_test.py,"@@ -34,8 +34,14 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import test -def UsingNonDeterministicSegmentReductions(): - return bool(int(os.getenv(""TF_USE_NONDETERMINISTIC_SEGMENT_REDUCTIONS"", ""0""))) +def PlatformIsWindows(): + return os.name == 'nt' + + +def DeterministicSegmentReductionsSupported(): + # See comment in segment_reduction_ops_gpu_0.cu.cc for why deterministic + # segment reduction kernels are disabled on Windows. + return not PlatformIsWindows() class SegmentReductionDeterminismExceptionsTest(test.TestCase): @@ -69,7 +75,7 @@ class SegmentReductionDeterminismExceptionsTest(test.TestCase): for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]: with self.cached_session(force_gpu=True): data, segment_ids, _ = self._input(data_type, segment_ids_type) - if (UsingNonDeterministicSegmentReductions() and + if (not DeterministicSegmentReductionsSupported() and should_throw_for_float): with self.assertRaisesRegex( errors_impl.UnimplementedError, @@ -106,7 +112,7 @@ class SegmentReductionDeterminismExceptionsTest(test.TestCase): continue data, segment_ids, num_segments = self._input( data_type, segment_ids_type) - if (UsingNonDeterministicSegmentReductions() and + if (not DeterministicSegmentReductionsSupported() and (data_type != dtypes.int32) and should_throw_for_float): with self.assertRaisesRegex(errors_impl.UnimplementedError, self._UNSORTED_ERROR_MESSAGE): @@ -129,7 +135,7 @@ class SegmentReductionDeterminismExceptionsTest(test.TestCase): with self.cached_session(force_gpu=True): data, segment_ids, num_segments = self._input( data_type, segment_ids_type) - if UsingNonDeterministicSegmentReductions(): + if not DeterministicSegmentReductionsSupported(): with self.assertRaisesRegex(errors_impl.UnimplementedError, self._UNSORTED_ERROR_MESSAGE): result = op(data, segment_ids, num_segments) @@ -150,7 +156,7 @@ class SegmentReductionDeterminismExceptionsTest(test.TestCase): values, indices, _ = self._input(data_type, segment_ids_type) sparse_value = indexed_slices.IndexedSlices( values, indices, dense_shape=values.shape) - if UsingNonDeterministicSegmentReductions(): + if not DeterministicSegmentReductionsSupported(): with self.assertRaisesRegex(errors_impl.UnimplementedError, self._UNSORTED_ERROR_MESSAGE): # convert_to_tensor with IndexedSlices uses unsorted_segment_sum @@ -174,7 +180,7 @@ class SegmentReductionDeterminismExceptionsTest(test.TestCase): tape.watch(params) op_output = array_ops.gather(params, indices) gradient = tape.gradient(op_output, params) - if UsingNonDeterministicSegmentReductions(): + if not DeterministicSegmentReductionsSupported(): with self.assertRaisesRegex(errors_impl.UnimplementedError, self._UNSORTED_ERROR_MESSAGE): # convert_to_tensor on IndexedSlices ",0,train 264a4b7f20d3654bd29e9b335d6ddfe6115ac63b,tensorflow/tensorflow,Updated examples,image_ops_impl.py,"@@ -1948,10 +1948,9 @@ def random_hue(image, max_delta, seed=None): Usage Example: ```python - >>> x = tf.constant([[[1.0, 2.0, 3.0]]]) - >>> y = tf.image.random_hue(x, max_delta=0.1) - >>> print(y.numpy()) - [[[1. 2.5... 3. ]]] + >> import tensorflow as tf + >> x = tf.random.normal(shape=(256, 256, 3)) + >> y = tf.image.random_hue(x, max_delta=0.1) ``` Args: @@ -2943,11 +2942,9 @@ def rgb_to_yiq(images): Usage Example: ```python - >>> import tensorflow as tf - >>> x = tf.constant([[[2.0, 5.0, 3.0]]]) - >>> y = tf.image.rgb_to_yiq(x) - >>> print(y.numpy()) - [[[ 3.875 -1.14... -1.25...]]] + >> import tensorflow as tf + >> x = tf.random.normal(shape=(256, 256, 3)) + >> y = tf.image.rgb_to_yiq(x) ``` Args: ",0,train 8f6a17f052ba0709641082016bf3f53886078ba4,tensorflow/tensorflow,"Reduce some allocations on grpc code paths. -Use std::move when assigning std::function to reduce some simple allocations.' -Use std::bind to avoid copy of std::function in lambda statements. Change: 129654152",executor.cc,"@@ -1824,8 +1824,8 @@ void ExecutorState::DumpState() { void ExecutorState::Finish() { mu_.lock(); auto status = status_; - auto done_cb = done_cb_; - auto runner = runner_; + auto done_cb = std::move(done_cb_); + auto runner = std::move(runner_); mu_.unlock(); delete this; CHECK(done_cb != nullptr); ",0,train 8f6a17f052ba0709641082016bf3f53886078ba4,tensorflow/tensorflow,"Reduce some allocations on grpc code paths. -Use std::move when assigning std::function to reduce some simple allocations.' -Use std::bind to avoid copy of std::function in lambda statements. Change: 129654152",process_util.cc,"@@ -51,19 +51,21 @@ void SchedClosure(std::function closure) { const uint64 id = port::Tracing::UniqueId(); port::Tracing::RecordEvent(port::Tracing::EventCategory::kScheduleClosure, id); - std::function wrapper = [closure, id]() { - port::Tracing::ScopedActivity region( - port::Tracing::EventCategory::kRunClosure, id); - closure(); - }; - Env::Default()->SchedClosure(wrapper); + std::function wrapper = std::bind( + [id](std::function closure) { + port::Tracing::ScopedActivity region( + port::Tracing::EventCategory::kRunClosure, id); + closure(); + }, + std::move(closure)); + Env::Default()->SchedClosure(std::move(wrapper)); } else { - Env::Default()->SchedClosure(closure); + Env::Default()->SchedClosure(std::move(closure)); } } void SchedNonBlockingClosureAfter(int64 micros, std::function closure) { - Env::Default()->SchedClosureAfter(micros, closure); + Env::Default()->SchedClosureAfter(micros, std::move(closure)); } } // namespace tensorflow ",0,train 8f6a17f052ba0709641082016bf3f53886078ba4,tensorflow/tensorflow,"Reduce some allocations on grpc code paths. -Use std::move when assigning std::function to reduce some simple allocations.' -Use std::bind to avoid copy of std::function in lambda statements. Change: 129654152",base_rendezvous_mgr.cc,"@@ -64,13 +64,17 @@ void BaseRendezvousMgr::RecvLocalAsync(int64 step_id, const Rendezvous::ParsedKey& parsed, Rendezvous::DoneCallback done) { BaseRemoteRendezvous* rendez = FindOrCreate(step_id); - rendez->RecvLocalAsync( - parsed, [rendez, done](const Status& s, const Rendezvous::Args& send_args, - const Rendezvous::Args& recv_args, const Tensor& v, - bool dead) { + using namespace std::placeholders; + Rendezvous::DoneCallback done_cb = std::bind( + [rendez](Rendezvous::DoneCallback done, + // Begin unbound arguments. + const Status& s, const Rendezvous::Args& send_args, + const Rendezvous::Args& recv_args, const Tensor& v, bool dead) { rendez->Unref(); done(s, send_args, recv_args, v, dead); - }); + }, + std::move(done), _1, _2, _3, _4, _5); + rendez->RecvLocalAsync(parsed, std::move(done_cb)); } Status BaseRendezvousMgr::RecvLocal(int64 step_id, ",0,train 8f6a17f052ba0709641082016bf3f53886078ba4,tensorflow/tensorflow,"Reduce some allocations on grpc code paths. -Use std::move when assigning std::function to reduce some simple allocations.' -Use std::bind to avoid copy of std::function in lambda statements. Change: 129654152",grpc_call.h,"@@ -183,7 +183,7 @@ class Call : public UntypedCall { // call is cancelled by the client. void SetCancelCallback(std::function callback) { mutex_lock l(mu_); - cancel_callback_ = callback; + cancel_callback_ = std::move(callback); } // Clears any cancellation callback that has been registered for this call. ",0,train 8f6a17f052ba0709641082016bf3f53886078ba4,tensorflow/tensorflow,"Reduce some allocations on grpc code paths. -Use std::move when assigning std::function to reduce some simple allocations.' -Use std::bind to avoid copy of std::function in lambda statements. Change: 129654152",grpc_client_cq_tag.h,"@@ -29,7 +29,7 @@ namespace tensorflow { class GrpcClientCQTag { public: GrpcClientCQTag(::grpc::ClientContext* context, StatusCallback cb) - : context_(context), cb_(cb) {} + : context_(context), cb_(std::move(cb)) {} ~GrpcClientCQTag() { delete context_; } void OnCompleted(bool ok) { ",0,train 8f6a17f052ba0709641082016bf3f53886078ba4,tensorflow/tensorflow,"Reduce some allocations on grpc code paths. -Use std::move when assigning std::function to reduce some simple allocations.' -Use std::bind to avoid copy of std::function in lambda statements. Change: 129654152",grpc_remote_worker.cc,"@@ -46,41 +46,44 @@ class GrpcRemoteWorker : public WorkerInterface { GetStatusResponse* response, StatusCallback done) override { IssueRequest(request, response, &grpc::WorkerService::Stub::AsyncGetStatus, - done); + std::move(done)); } void RegisterGraphAsync(const RegisterGraphRequest* request, RegisterGraphResponse* response, StatusCallback done) override { IssueRequest(request, response, - &grpc::WorkerService::Stub::AsyncRegisterGraph, done); + &grpc::WorkerService::Stub::AsyncRegisterGraph, + std::move(done)); } void DeregisterGraphAsync(const DeregisterGraphRequest* request, DeregisterGraphResponse* response, StatusCallback done) override { IssueRequest(request, response, - &grpc::WorkerService::Stub::AsyncDeregisterGraph, done); + &grpc::WorkerService::Stub::AsyncDeregisterGraph, + std::move(done)); } void RunGraphAsync(CallOptions* call_opts, const RunGraphRequest* request, RunGraphResponse* response, StatusCallback done) override { IssueRequest(request, response, &grpc::WorkerService::Stub::AsyncRunGraph, - done, call_opts); + std::move(done), call_opts); } void CleanupGraphAsync(const CleanupGraphRequest* request, CleanupGraphResponse* response, StatusCallback done) override { IssueRequest(request, response, - &grpc::WorkerService::Stub::AsyncCleanupGraph, done); + &grpc::WorkerService::Stub::AsyncCleanupGraph, + std::move(done)); } void CleanupAllAsync(const CleanupAllRequest* request, CleanupAllResponse* response, StatusCallback done) override { IssueRequest(request, response, &grpc::WorkerService::Stub::AsyncCleanupAll, - done); + std::move(done)); } void RecvTensorAsync(CallOptions* call_opts, const RecvTensorRequest* request, ",0,train 8f6a17f052ba0709641082016bf3f53886078ba4,tensorflow/tensorflow,"Reduce some allocations on grpc code paths. -Use std::move when assigning std::function to reduce some simple allocations.' -Use std::bind to avoid copy of std::function in lambda statements. Change: 129654152",rpc_rendezvous_mgr.cc,"@@ -127,16 +127,20 @@ class RpcRecvTensorCall : public BaseRecvTensorCall { // Start the main RecvTensor call, checking for an async abort. void StartRTCall(std::function recv_done) { + using namespace std::placeholders; + StatusCallback cb = std::bind( + [this](std::function recv_done, + // Begin unbound arguments. + const Status& s) { + if (!s.ok()) { + mutex_lock l(mu_); + status_.Update(s); + } + recv_done(); + }, + std::move(recv_done), _1); wi_->RecvTensorAsync(&opts_, &req_, &resp_, - nullptr /* TensorBufAllocator */, - // done callback - [this, recv_done](const Status& s) { - if (!s.ok()) { - mutex_lock l(mu_); - status_.Update(s); - } - recv_done(); - }); + nullptr /* TensorBufAllocator */, std::move(cb)); } string src_worker_; ",0,train 8f6a17f052ba0709641082016bf3f53886078ba4,tensorflow/tensorflow,"Reduce some allocations on grpc code paths. -Use std::move when assigning std::function to reduce some simple allocations.' -Use std::bind to avoid copy of std::function in lambda statements. Change: 129654152",sendrecv_ops.cc,"@@ -110,23 +110,27 @@ void RecvOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) { Rendezvous::Args args; args.device_context = ctx->op_device_context(); args.alloc_attrs = ctx->output_alloc_attr(0); - DoneCallback done_cb = std::move(done); - ctx->rendezvous()->RecvAsync( - parsed, args, - [ctx, done_cb](const Status& s, const Rendezvous::Args& send_args, - const Rendezvous::Args& recv_args, const Tensor& val, - bool is_dead) { + using namespace std::placeholders; + Rendezvous::DoneCallback done_cb = std::bind( + [ctx](DoneCallback done, + // Begin unbound arguments. + const Status& s, const Rendezvous::Args& send_args, + const Rendezvous::Args& recv_args, const Tensor& val, + bool is_dead) { ctx->SetStatus(s); if (s.ok()) { - // 'ctx' allocates the output tensor of the expected type. The - // runtime checks whether the tensor received here is the same type. + // 'ctx' allocates the output tensor of the expected type. + // The runtime checks whether the tensor received here is + // the same type. if (!is_dead) { ctx->set_output(0, val); } *ctx->is_output_dead() = is_dead; } - done_cb(); - }); + done(); + }, + std::move(done), _1, _2, _3, _4, _5); + ctx->rendezvous()->RecvAsync(parsed, args, std::move(done_cb)); } REGISTER_KERNEL_BUILDER(Name(""_Recv"").Device(DEVICE_CPU), RecvOp); ",0,train d59bdf549306f2746bf45c06161aeba6151b092d,tensorflow/tensorflow,"Add TpuExecutableInterface::fingerprint() virtual method. This also makes the TpuExecutable in tpu_on_demand_compiler.cc subclass TpuExecutableInterface, and implements the fingerprint() method for future use by JAX. I didn't implement it for the TpuExecutable class in tpu_executable.h, since TF doesn't need this functionality (yet?), but it shouldn't be too hard. PiperOrigin-RevId: 330842613 Change-Id: I592068c7b1110e0ae32b241e3e6c5a7b121f3e0f",tpu_on_demand_compiler.cc,"@@ -29,6 +29,7 @@ limitations under the License. #include ""tensorflow/stream_executor/tpu/c_api_decl.h"" #include ""tensorflow/stream_executor/tpu/proto_helper.h"" #include ""tensorflow/stream_executor/tpu/status_helper.h"" +#include ""tensorflow/stream_executor/tpu/tpu_executable_interface.h"" #include ""tensorflow/stream_executor/tpu/tpu_executor.h"" #include ""tensorflow/stream_executor/tpu/tpu_executor_c_api.h"" #include ""tensorflow/stream_executor/tpu/tpu_platform.h"" @@ -97,11 +98,11 @@ void XLA_HloModuleConfig_Free(XLA_HloModuleConfig* module_config) { } } -class TpuExecutable : public Executable { +class TpuExecutable : public TpuExecutableInterface { public: TpuExecutable(SE_Executable* se_executable, std::shared_ptr hlo_module) - : Executable(std::move(hlo_module), nullptr, nullptr), + : TpuExecutableInterface(std::move(hlo_module), nullptr, nullptr), se_executable_(se_executable) {} ~TpuExecutable() override { @@ -192,7 +193,31 @@ class TpuExecutable : public Executable { return output; } + absl::string_view fingerprint() const override { + const char* data; + size_t size; + ExecutorApiFn()->TpuExecutable_FingerprintFn(se_executable_, &data, &size); + return absl::string_view(data, size); + } + private: + Status LoadProgramAndEnqueueToStream( + const ServiceExecutableRunOptions& run_options, + absl::Span arguments, + stream_executor::DeviceMemoryBase result, + absl::optional + cross_program_prefetch_addr) override { + LOG(FATAL) << ""LoadProgramAndEnqueueToStream unimplemented""; + } + + Shape HostShapeToDeviceShape(const Shape& host_shape) override { + LOG(FATAL) << ""HostShapeToDeviceShape unimplemented""; + } + + int64 ShapeSize(const Shape& shape) override { + LOG(FATAL) << ""ShapeSize unimplemented""; + } + SE_Executable* se_executable_; }; ",0,train d59bdf549306f2746bf45c06161aeba6151b092d,tensorflow/tensorflow,"Add TpuExecutableInterface::fingerprint() virtual method. This also makes the TpuExecutable in tpu_on_demand_compiler.cc subclass TpuExecutableInterface, and implements the fingerprint() method for future use by JAX. I didn't implement it for the TpuExecutable class in tpu_executable.h, since TF doesn't need this functionality (yet?), but it shouldn't be too hard. PiperOrigin-RevId: 330842613 Change-Id: I592068c7b1110e0ae32b241e3e6c5a7b121f3e0f",tpu_executable.cc,"@@ -113,4 +113,9 @@ int64 TpuExecutable::ShapeSize(const Shape& shape) { return size; } +absl::string_view TpuExecutable::fingerprint() const { + // TODO(skye): the fingerprint can be plumbed through via core_program_ + LOG(FATAL) << ""TpuExecutable::fingerprint() unimplemented""; +} + } // namespace xla ",0,train d59bdf549306f2746bf45c06161aeba6151b092d,tensorflow/tensorflow,"Add TpuExecutableInterface::fingerprint() virtual method. This also makes the TpuExecutable in tpu_on_demand_compiler.cc subclass TpuExecutableInterface, and implements the fingerprint() method for future use by JAX. I didn't implement it for the TpuExecutable class in tpu_executable.h, since TF doesn't need this functionality (yet?), but it shouldn't be too hard. PiperOrigin-RevId: 330842613 Change-Id: I592068c7b1110e0ae32b241e3e6c5a7b121f3e0f",tpu_executable.h,"@@ -46,6 +46,8 @@ class TpuExecutable : public TpuExecutableInterface { const XLA_TpuProgram* core_program() const { return core_program_; } + absl::string_view fingerprint() const override; + private: Status LoadProgramAndEnqueueToStream( const ServiceExecutableRunOptions& run_options, ",0,train d59bdf549306f2746bf45c06161aeba6151b092d,tensorflow/tensorflow,"Add TpuExecutableInterface::fingerprint() virtual method. This also makes the TpuExecutable in tpu_on_demand_compiler.cc subclass TpuExecutableInterface, and implements the fingerprint() method for future use by JAX. I didn't implement it for the TpuExecutable class in tpu_executable.h, since TF doesn't need this functionality (yet?), but it shouldn't be too hard. PiperOrigin-RevId: 330842613 Change-Id: I592068c7b1110e0ae32b241e3e6c5a7b121f3e0f",tpu_executable_interface.h,"@@ -80,6 +80,8 @@ class TpuExecutableInterface : public Executable { absl::optional cross_program_prefetch_addr) = 0; + virtual absl::string_view fingerprint() const = 0; + protected: virtual Shape HostShapeToDeviceShape(const Shape& host_shape) = 0; ",0,train d59bdf549306f2746bf45c06161aeba6151b092d,tensorflow/tensorflow,"Add TpuExecutableInterface::fingerprint() virtual method. This also makes the TpuExecutable in tpu_on_demand_compiler.cc subclass TpuExecutableInterface, and implements the fingerprint() method for future use by JAX. I didn't implement it for the TpuExecutable class in tpu_executable.h, since TF doesn't need this functionality (yet?), but it shouldn't be too hard. PiperOrigin-RevId: 330842613 Change-Id: I592068c7b1110e0ae32b241e3e6c5a7b121f3e0f",tpu_executor_c_api.h,"@@ -300,6 +300,10 @@ TFTPU_CAPI_EXPORT void TpuExecutable_ExecuteAsyncOnStream( SE_HloExecutionProfile* hlo_execution_profile, SE_ExecutionOutput* output, SE_Status* status); +TFTPU_CAPI_EXPORT void TpuExecutable_Fingerprint(SE_Executable* executable, + const char** fingerprint, + size_t* size); + TFTPU_CAPI_EXPORT void TpuExecutable_Free(SE_Executable*); // Converts an XLA `Shape` into its equivalent TPU `Shape` representation. @@ -445,6 +449,7 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Compile); TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_ShapeSize); TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_ExecuteAsyncOnStream); + TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Fingerprint); TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Free); TFTPU_ADD_FN_IN_STRUCT(XlaShapeToTpuShapeRepresentation); ",0,train 57f64fe469364417cfc6755c754abb54c2e3756b,tensorflow/tensorflow,revert unwanted typo,util_test.py,"@@ -785,7 +785,7 @@ class FillTriangularTest(test.TestCase): @test_util.with_c_api class ReduceWeightedLogSumExp(test.TestCase): - def _reduce_weighted_logsumexp(self, logx, w, axis, keepdims=False): + def _reduce_weighted_logsumexp(self, logx, w, axis, keep_dims=False): m = np.max(logx, axis=axis, keepdims=True) sum_ = np.sum(w * np.exp(logx - m), axis=axis, keepdims=keep_dims) sgn = np.sign(sum_) ",0,train f98110b559362b086409438b00f5b73c9d870b60,tensorflow/tensorflow,"[TF] Use compare and cast instead of floor for dropout. PiperOrigin-RevId: 229631446",layers_test.py,"@@ -1356,7 +1356,7 @@ class DropoutTest(test.TestCase): with self.cached_session(): images = np.random.uniform(size=(5, height, width, 3)) output = _layers.dropout(images) - self.assertEqual(output.op.name, 'Dropout/dropout_1/mul') + self.assertEqual(output.op.name, 'Dropout/dropout_1/mul_1') output.get_shape().assert_is_compatible_with( ops.convert_to_tensor(images).get_shape()) ",0,train f98110b559362b086409438b00f5b73c9d870b60,tensorflow/tensorflow,"[TF] Use compare and cast instead of floor for dropout. PiperOrigin-RevId: 229631446",parse_layer_parameters.py,"@@ -27,7 +27,8 @@ from tensorflow.python.platform import tf_logging as logging _UNCHANGED_RF_LAYER_OPS = [ ""Add"", ""BiasAdd"", ""Cast"", ""Ceil"", ""ConcatV2"", ""Const"", ""Floor"", ""FusedBatchNorm"", ""Identity"", ""Log"", ""Mul"", ""Pow"", ""RealDiv"", ""Relu"", - ""Relu6"", ""Round"", ""Rsqrt"", ""Softplus"", ""Sub"", ""VariableV2"", ""LRN"" + ""Relu6"", ""Round"", ""Rsqrt"", ""Softplus"", ""Sub"", ""VariableV2"", ""LRN"", + ""GreaterEqual"" ] # Different ways in which padding modes may be spelled. @@ -276,11 +277,11 @@ def get_layer_params(node, name_to_node, input_resolution=None, force=False): kernel_size_x, kernel_size_y = _conv_kernel_size(node, name_to_node) # Compute the padding for this node separately for each direction. total_padding_x, padding_x = _padding_size_conv_pool( - node, kernel_size_x, stride_x, input_resolution[1] - if input_resolution is not None else None) + node, kernel_size_x, stride_x, + input_resolution[1] if input_resolution is not None else None) total_padding_y, padding_y = _padding_size_conv_pool( - node, kernel_size_y, stride_y, input_resolution[0] - if input_resolution is not None else None) + node, kernel_size_y, stride_y, + input_resolution[0] if input_resolution is not None else None) elif node.op == ""Pad"": # Kernel and stride are simply 1 in this case. kernel_size_x = 1 @@ -294,11 +295,11 @@ def get_layer_params(node, name_to_node, input_resolution=None, force=False): kernel_size_x, kernel_size_y = _pool_kernel_size(node, name_to_node) # Compute the padding for this node separately for each direction. total_padding_x, padding_x = _padding_size_conv_pool( - node, kernel_size_x, stride_x, input_resolution[1] - if input_resolution is not None else None) + node, kernel_size_x, stride_x, + input_resolution[1] if input_resolution is not None else None) total_padding_y, padding_y = _padding_size_conv_pool( - node, kernel_size_y, stride_y, input_resolution[0] - if input_resolution is not None else None) + node, kernel_size_y, stride_y, + input_resolution[0] if input_resolution is not None else None) elif node.op in _UNCHANGED_RF_LAYER_OPS: # These nodes do not modify the RF parameters. kernel_size_x = 1 @@ -320,7 +321,7 @@ def get_layer_params(node, name_to_node, input_resolution=None, force=False): total_padding_y = None padding_y = None else: - raise ValueError(""Unknown layer for operation '%s': %s"" % (node.name, - node.op)) + raise ValueError( + ""Unknown layer for operation '%s': %s"" % (node.name, node.op)) return (kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x, padding_y, total_padding_x, total_padding_y) ",0,train f98110b559362b086409438b00f5b73c9d870b60,tensorflow/tensorflow,"[TF] Use compare and cast instead of floor for dropout. PiperOrigin-RevId: 229631446",topology_test.py,"@@ -358,17 +358,17 @@ class TopologyConstructionTest(keras_parameterized.TestCase): x = keras.layers.Dropout(0.5)(x, training=True) model = keras.models.Model(inp, x) # Would be `dropout/cond/Merge` by default - self.assertTrue(model.output.op.name.endswith('dropout/mul')) + self.assertTrue(model.output.op.name.endswith('dropout/mul_1')) # Test that argument is kept when applying the model inp2 = keras.layers.Input(shape=(2,)) out2 = model(inp2) - self.assertTrue(out2.op.name.endswith('dropout/mul')) + self.assertTrue(out2.op.name.endswith('dropout/mul_1')) # Test that argument is kept after loading a model config = model.get_config() model = keras.models.Model.from_config(config) - self.assertTrue(model.output.op.name.endswith('dropout/mul')) + self.assertTrue(model.output.op.name.endswith('dropout/mul_1')) def test_node_construction(self): # test basics ",0,train f98110b559362b086409438b00f5b73c9d870b60,tensorflow/tensorflow,"[TF] Use compare and cast instead of floor for dropout. PiperOrigin-RevId: 229631446",wrappers_test.py,"@@ -159,7 +159,7 @@ class TimeDistributedTest(test.TestCase): np.random.seed(1234) x = keras.layers.Input(shape=(3, 2)) y = keras.layers.TimeDistributed( - keras.layers.Dropout(.999))(x, training=True) + keras.layers.Dropout(.9999))(x, training=True) model = keras.models.Model(x, y) y = model.predict(np.random.random((10, 3, 2))) self.assertAllClose(np.mean(y), 0., atol=1e-1, rtol=1e-1) ",0,train f98110b559362b086409438b00f5b73c9d870b60,tensorflow/tensorflow,"[TF] Use compare and cast instead of floor for dropout. PiperOrigin-RevId: 229631446",nn_ops.py,"@@ -3292,15 +3292,13 @@ def dropout_v2(x, rate, noise_shape=None, seed=None, name=None): # pylint: disa return x noise_shape = _get_noise_shape(x, noise_shape) - - keep_prob = 1 - rate - # uniform [keep_prob, 1.0 + keep_prob) - random_tensor = keep_prob - random_tensor += random_ops.random_uniform( + # Sample a uniform distribution on [0.0, 1.0) and select values larger than + # rate. + random_tensor = random_ops.random_uniform( noise_shape, seed=seed, dtype=x.dtype) - # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) - binary_tensor = math_ops.floor(random_tensor) - ret = math_ops.divide(x, keep_prob) * binary_tensor + keep_prob = 1 - rate + ret = (1 / keep_prob) * math_ops.cast(keep_prob >= random_tensor, + x.dtype) * x if not context.executing_eagerly(): ret.set_shape(x.get_shape()) return ret ",0,train 7ad1f3d479eaf46042c5254487cb74f7143010cd,tensorflow/tensorflow,"Refactor cross_device_ops so that when indexed slices are present in a batch, batched all-reduce can still be done on dense gradients. PiperOrigin-RevId: 233695231",cross_device_ops.py,"@@ -269,6 +269,8 @@ class CrossDeviceOps(object): ValueError: if `value_destination_pairs` is not a list or a tuple of tuples of PerReplica objects and destinations """""" + # TODO(yuefengz): if destinations are different, split into several + # `_batch_reduce` invocations. if not _validate_value_destination_pairs(value_destination_pairs): # If the first element of each pair is a tensor, we try to turn it into a # PerReplica object. @@ -374,8 +376,10 @@ class ReductionToOneDevice(CrossDeviceOps): super(ReductionToOneDevice, self).__init__() def reduce_implementation(self, reduce_op, per_replica_value, destinations): - assert check_destinations(destinations) - devices = get_devices_from(destinations) + if check_destinations(destinations): + devices = get_devices_from(destinations) + else: + devices = get_devices_from(per_replica_value) reduce_to_device = self.reduce_to_device or devices[0] logging.log_first_n( logging.INFO, @@ -653,29 +657,15 @@ class AllReduceCrossDeviceOps(CrossDeviceOps): self._num_packs = num_packs self._agg_small_grads_max_bytes = agg_small_grads_max_bytes self._agg_small_grads_max_group = agg_small_grads_max_group + self._simple_cross_replica_ops = ReductionToOneDevice() super(AllReduceCrossDeviceOps, self).__init__() def reduce_implementation(self, reduce_op, per_replica_value, destinations): - contains_indexed_slices = cross_device_utils.contains_indexed_slices( - per_replica_value) - if (_devices_match(per_replica_value, destinations) - and not context.executing_eagerly() - and not contains_indexed_slices): + if _devices_match(per_replica_value, destinations): return self._batch_all_reduce(reduce_op, [per_replica_value])[0] else: - if contains_indexed_slices: - logging.log_first_n( - logging.WARN, - ""Efficient allreduce is not supported for IndexedSlices."", 10) - - if check_destinations(destinations): - devices = get_devices_from(destinations) - else: - devices = get_devices_from(per_replica_value) - reduce_to_device = devices[0] - reduced = _simple_reduce(per_replica_value, reduce_to_device, - math_ops.add_n, reduce_op) - return self.broadcast(reduced, destinations) + return self._simple_cross_replica_ops.reduce(reduce_op, per_replica_value, + destinations) def batch_reduce_implementation(self, reduce_op, value_destination_pairs): all_devices_match = _all_devices_match(value_destination_pairs) @@ -699,14 +689,31 @@ class AllReduceCrossDeviceOps(CrossDeviceOps): def _batch_all_reduce(self, reduce_op, per_replica_values): """"""All-reduce algorithm in a batch."""""" + dense_values, dense_indices, sparse_values, sparse_indices = ( + cross_device_utils.split_by_sparsity(per_replica_values)) + if dense_values: + dense_results = self._do_batch_all_reduce(reduce_op, dense_values) + else: + dense_results = [] + if sparse_values: + sparse_results = self._do_batch_all_reduce_sparse(reduce_op, + sparse_values) + else: + sparse_results = [] + return cross_device_utils.stitch_values(((dense_results, dense_indices), + (sparse_results, sparse_indices))) + + def _do_batch_all_reduce(self, reduce_op, dense_values): + """"""Run batch all-reduces."""""" logging.log_first_n( logging.INFO, ""batch_all_reduce invoked for batches size = %d with "" ""algorithm = %s, num_packs = %d, agg_small_grads_max_bytes = %d and "" ""agg_small_grads_max_group = %d"" % - (len(per_replica_values), self._all_reduce_alg, self._num_packs, + (len(dense_values), self._all_reduce_alg, self._num_packs, self._agg_small_grads_max_bytes, self._agg_small_grads_max_group), 10) - destinations = per_replica_values[0].devices - grouped = _group_value_by_device(per_replica_values) + + destinations = dense_values[0].devices + grouped = _group_value_by_device(dense_values) device_grad_packs, tensor_packer = _pack_tensors( grouped, self._num_packs, self._agg_small_grads_max_bytes, @@ -727,7 +734,18 @@ class AllReduceCrossDeviceOps(CrossDeviceOps): destinations, device_grad_packs)) reduced = _unpack_tensors(reduced, tensor_packer) - return _ungroup_and_make_mirrored(reduced, per_replica_values[0], reduce_op) + return _ungroup_and_make_mirrored(reduced, dense_values[0], reduce_op) + + def _do_batch_all_reduce_sparse(self, reduce_op, sparse_values): + """"""Run batch all-reduce for sparse values."""""" + logging.log_first_n( + logging.WARN, + ""Efficient allreduce is not supported for %d IndexedSlices"" % + len(sparse_values), 10) + # Use `sparse_values` as destinations to do all-reduces. It is effectively + # an allgather under the hood but not an efficient one. + return self._simple_cross_replica_ops.batch_reduce( + reduce_op, zip(sparse_values, sparse_values)) # For compatibility with code using the old name of `AllReduceCrossDeviceOps`. ",0,train 7ad1f3d479eaf46042c5254487cb74f7143010cd,tensorflow/tensorflow,"Refactor cross_device_ops so that when indexed slices are present in a batch, batched all-reduce can still be done on dense gradients. PiperOrigin-RevId: 233695231",cross_device_utils.py,"@@ -681,3 +681,58 @@ def contains_indexed_slices(value): return contains_indexed_slices(value.values) else: return False + + +def is_indexed_slices(value): + if isinstance(value, ops.IndexedSlices): + return True + assert isinstance(value, value_lib.DistributedValues) + return all([isinstance(v, ops.IndexedSlices) for v in value.values]) + + +def split_by_sparsity(values): + """"""Split values into dense and sparse values. + + Args: + values: a list of tensors or `PerReplica`s. + + Returns: + Four lists: + a list of dense values, a list of their indices in `values` and + a list of sparse values, a list of their indices in `values`. + """""" + dense_values = [] + dense_indices = [] + sparse_values = [] + sparse_indices = [] + for i, v in enumerate(values): + if is_indexed_slices(v): + sparse_values.append(v) + sparse_indices.append(i) + else: + dense_values.append(v) + dense_indices.append(i) + return dense_values, dense_indices, sparse_values, sparse_indices + + +def stitch_values(values_and_indices_list): + """"""Stitch values together according to their indices. + + Args: + values_and_indices_list: a list of tuples of values and indices indicating + the values and postions in the returned list. + + Returns: + a stitched list of values. + """""" + length = 0 + for values_and_indices in values_and_indices_list: + length += len(values_and_indices[0]) + + result = [None] * length + for values_and_indices in values_and_indices_list: + if values_and_indices and values_and_indices[0]: + for v, i in zip(*values_and_indices): + assert result[i] is None + result[i] = v + return result ",0,train 5278fa03a9e703d1e414ccebd858f7fdf22dbba5,tensorflow/tensorflow,"Make quant_delay work even if user didn't create global step. PiperOrigin-RevId: 174937793",quantize.py,"@@ -387,7 +387,7 @@ class _QuantizeContext(object): if delay_requested and self.quant_delay and self.quant_delay > 0: activate_quant = math_ops.greater_equal( - training_util.get_global_step(), + training_util.get_or_create_global_step(), self.quant_delay, name=scope + '/activate_quant') quant = control_flow_ops.cond( ",0,train f5ea388e48a38b935ebd36442f756c8974b7ce3f,tensorflow/tensorflow,"Implement ZlibInputStream::Tell() by keeping track of the number of bytes consumed by the reader. PiperOrigin-RevId: 172634455",zlib_buffers_test.cc,"@@ -68,25 +68,25 @@ void TestAllCombinations(CompressionOptions input_options, for (auto input_buf_size : InputBufferSizes()) { for (auto output_buf_size : OutputBufferSizes()) { std::unique_ptr file_writer; - TF_CHECK_OK(env->NewWritableFile(fname, &file_writer)); + TF_ASSERT_OK(env->NewWritableFile(fname, &file_writer)); string result; ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size, output_options); - TF_CHECK_OK(out.Init()); + TF_ASSERT_OK(out.Init()); - TF_CHECK_OK(out.Append(StringPiece(data))); - TF_CHECK_OK(out.Close()); - TF_CHECK_OK(file_writer->Flush()); - TF_CHECK_OK(file_writer->Close()); + TF_ASSERT_OK(out.Append(StringPiece(data))); + TF_ASSERT_OK(out.Close()); + TF_ASSERT_OK(file_writer->Flush()); + TF_ASSERT_OK(file_writer->Close()); std::unique_ptr file_reader; - TF_CHECK_OK(env->NewRandomAccessFile(fname, &file_reader)); + TF_ASSERT_OK(env->NewRandomAccessFile(fname, &file_reader)); std::unique_ptr input_stream( new RandomAccessInputStream(file_reader.get())); ZlibInputStream in(input_stream.get(), input_buf_size, output_buf_size, input_options); - TF_EXPECT_OK(in.ReadNBytes(data.size(), &result)); + TF_ASSERT_OK(in.ReadNBytes(data.size(), &result)); EXPECT_EQ(result, data); } } @@ -118,24 +118,24 @@ void TestMultipleWrites(uint8 input_buf_size, uint8 output_buf_size, string actual_result; string expected_result; - TF_CHECK_OK(env->NewWritableFile(fname, &file_writer)); + TF_ASSERT_OK(env->NewWritableFile(fname, &file_writer)); ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size, output_options); - TF_CHECK_OK(out.Init()); + TF_ASSERT_OK(out.Init()); for (int i = 0; i < num_writes; i++) { - TF_CHECK_OK(out.Append(StringPiece(data))); + TF_ASSERT_OK(out.Append(StringPiece(data))); if (with_flush) { - TF_CHECK_OK(out.Flush()); + TF_ASSERT_OK(out.Flush()); } strings::StrAppend(&expected_result, data); } - TF_CHECK_OK(out.Close()); - TF_CHECK_OK(file_writer->Flush()); - TF_CHECK_OK(file_writer->Close()); + TF_ASSERT_OK(out.Close()); + TF_ASSERT_OK(file_writer->Flush()); + TF_ASSERT_OK(file_writer->Close()); std::unique_ptr file_reader; - TF_CHECK_OK(env->NewRandomAccessFile(fname, &file_reader)); + TF_ASSERT_OK(env->NewRandomAccessFile(fname, &file_reader)); std::unique_ptr input_stream( new RandomAccessInputStream(file_reader.get())); ZlibInputStream in(input_stream.get(), input_buf_size, output_buf_size, @@ -143,7 +143,7 @@ void TestMultipleWrites(uint8 input_buf_size, uint8 output_buf_size, for (int i = 0; i < num_writes; i++) { string decompressed_output; - TF_EXPECT_OK(in.ReadNBytes(data.size(), &decompressed_output)); + TF_ASSERT_OK(in.ReadNBytes(data.size(), &decompressed_output)); strings::StrAppend(&actual_result, decompressed_output); } @@ -170,19 +170,19 @@ TEST(ZlibInputStream, FailsToReadIfWindowBitsAreIncompatible) { string data = GenTestString(10); std::unique_ptr file_writer; - TF_CHECK_OK(env->NewWritableFile(fname, &file_writer)); + TF_ASSERT_OK(env->NewWritableFile(fname, &file_writer)); string result; ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size, output_options); - TF_CHECK_OK(out.Init()); + TF_ASSERT_OK(out.Init()); - TF_CHECK_OK(out.Append(StringPiece(data))); - TF_CHECK_OK(out.Close()); - TF_CHECK_OK(file_writer->Flush()); - TF_CHECK_OK(file_writer->Close()); + TF_ASSERT_OK(out.Append(StringPiece(data))); + TF_ASSERT_OK(out.Close()); + TF_ASSERT_OK(file_writer->Flush()); + TF_ASSERT_OK(file_writer->Close()); std::unique_ptr file_reader; - TF_CHECK_OK(env->NewRandomAccessFile(fname, &file_reader)); + TF_ASSERT_OK(env->NewRandomAccessFile(fname, &file_reader)); std::unique_ptr input_stream( new RandomAccessInputStream(file_reader.get())); ZlibInputStream in(input_stream.get(), input_buf_size, output_buf_size, @@ -192,5 +192,129 @@ TEST(ZlibInputStream, FailsToReadIfWindowBitsAreIncompatible) { CHECK(read_status.error_message().find(""inflate() failed"") != string::npos); } +void WriteCompressedFile(Env* env, const string& fname, int input_buf_size, + int output_buf_size, + const CompressionOptions& output_options, + const string& data) { + std::unique_ptr file_writer; + TF_ASSERT_OK(env->NewWritableFile(fname, &file_writer)); + + ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size, + output_options); + TF_ASSERT_OK(out.Init()); + + TF_ASSERT_OK(out.Append(StringPiece(data))); + TF_ASSERT_OK(out.Close()); + TF_ASSERT_OK(file_writer->Flush()); + TF_ASSERT_OK(file_writer->Close()); +} + +void TestTell(CompressionOptions input_options, + CompressionOptions output_options) { + Env* env = Env::Default(); + string fname = testing::TmpDir() + ""/zlib_buffers_test""; + for (auto file_size : NumCopies()) { + string data = GenTestString(file_size); + for (auto input_buf_size : InputBufferSizes()) { + for (auto output_buf_size : OutputBufferSizes()) { + // Write the compressed file. + WriteCompressedFile(env, fname, input_buf_size, output_buf_size, + output_options, data); + + // Boiler-plate to set up ZlibInputStream. + std::unique_ptr file_reader; + TF_ASSERT_OK(env->NewRandomAccessFile(fname, &file_reader)); + std::unique_ptr input_stream( + new RandomAccessInputStream(file_reader.get())); + ZlibInputStream in(input_stream.get(), input_buf_size, output_buf_size, + input_options); + + string first_half(data, 0, data.size() / 2); + string bytes_read; + + // Read the first half of the uncompressed file and expect that Tell() + // returns half the uncompressed length of the file. + TF_ASSERT_OK(in.ReadNBytes(first_half.size(), &bytes_read)); + EXPECT_EQ(in.Tell(), first_half.size()); + EXPECT_EQ(bytes_read, first_half); + + // Read the remaining half of the uncompressed file and expect that + // Tell() points past the end of file. + string second_half; + TF_ASSERT_OK( + in.ReadNBytes(data.size() - first_half.size(), &second_half)); + EXPECT_EQ(in.Tell(), data.size()); + bytes_read.append(second_half); + + // Expect that the file is correctly read. + EXPECT_EQ(bytes_read, data); + } + } + } +} + +void TestSkipNBytes(CompressionOptions input_options, + CompressionOptions output_options) { + Env* env = Env::Default(); + string fname = testing::TmpDir() + ""/zlib_buffers_test""; + for (auto file_size : NumCopies()) { + string data = GenTestString(file_size); + for (auto input_buf_size : InputBufferSizes()) { + for (auto output_buf_size : OutputBufferSizes()) { + // Write the compressed file. + WriteCompressedFile(env, fname, input_buf_size, output_buf_size, + output_options, data); + + // Boiler-plate to set up ZlibInputStream. + std::unique_ptr file_reader; + TF_ASSERT_OK(env->NewRandomAccessFile(fname, &file_reader)); + std::unique_ptr input_stream( + new RandomAccessInputStream(file_reader.get())); + ZlibInputStream in(input_stream.get(), input_buf_size, output_buf_size, + input_options); + + size_t data_half_size = data.size() / 2; + string second_half(data, data_half_size, data.size() - data_half_size); + + // Skip past the first half of the file and expect Tell() returns + // correctly. + TF_ASSERT_OK(in.SkipNBytes(data_half_size)); + EXPECT_EQ(in.Tell(), data_half_size); + + // Expect that second half is read correctly and Tell() returns past + // end of file after reading complete file. + string bytes_read; + TF_ASSERT_OK(in.ReadNBytes(second_half.size(), &bytes_read)); + EXPECT_EQ(bytes_read, second_half); + EXPECT_EQ(in.Tell(), data.size()); + } + } + } +} + +TEST(ZlibInputStream, TellDefaultOptions) { + TestTell(CompressionOptions::DEFAULT(), CompressionOptions::DEFAULT()); +} + +TEST(ZlibInputStream, TellRawDeflate) { + TestTell(CompressionOptions::RAW(), CompressionOptions::RAW()); +} + +TEST(ZlibInputStream, TellGzip) { + TestTell(CompressionOptions::GZIP(), CompressionOptions::GZIP()); +} + +TEST(ZlibInputStream, SkipNBytesDefaultOptions) { + TestSkipNBytes(CompressionOptions::DEFAULT(), CompressionOptions::DEFAULT()); +} + +TEST(ZlibInputStream, SkipNBytesRawDeflate) { + TestSkipNBytes(CompressionOptions::RAW(), CompressionOptions::RAW()); +} + +TEST(ZlibInputStream, SkipNBytesGzip) { + TestSkipNBytes(CompressionOptions::GZIP(), CompressionOptions::GZIP()); +} + } // namespace io } // namespace tensorflow ",0,test f5ea388e48a38b935ebd36442f756c8974b7ce3f,tensorflow/tensorflow,"Implement ZlibInputStream::Tell() by keeping track of the number of bytes consumed by the reader. PiperOrigin-RevId: 172634455",zlib_inputstream.cc,"@@ -32,7 +32,8 @@ ZlibInputStream::ZlibInputStream( z_stream_input_(new Bytef[input_buffer_capacity_]), z_stream_output_(new Bytef[output_buffer_capacity_]), zlib_options_(zlib_options), - z_stream_(new z_stream) { + z_stream_(new z_stream), + bytes_read_(0) { InitZlibBuffer(); } @@ -45,6 +46,7 @@ ZlibInputStream::~ZlibInputStream() { Status ZlibInputStream::Reset() { TF_RETURN_IF_ERROR(input_stream_->Reset()); InitZlibBuffer(); + bytes_read_ = 0; return Status::OK(); } @@ -127,6 +129,7 @@ size_t ZlibInputStream::ReadBytesFromCache(size_t bytes_to_read, result->append(next_unread_byte_, can_read_bytes); next_unread_byte_ += can_read_bytes; } + bytes_read_ += can_read_bytes; return can_read_bytes; } @@ -170,8 +173,7 @@ Status ZlibInputStream::ReadNBytes(int64 bytes_to_read, string* result) { return Status::OK(); } -// TODO(srbs): Implement this. -int64 ZlibInputStream::Tell() const { return -1; } +int64 ZlibInputStream::Tell() const { return bytes_read_; } Status ZlibInputStream::Inflate() { int error = inflate(z_stream_.get(), zlib_options_.flush_mode); ",0,test f5ea388e48a38b935ebd36442f756c8974b7ce3f,tensorflow/tensorflow,"Implement ZlibInputStream::Tell() by keeping track of the number of bytes consumed by the reader. PiperOrigin-RevId: 172634455",zlib_inputstream.h,"@@ -132,6 +132,9 @@ class ZlibInputStream : public InputStreamInterface { // Returns the size of [next_unread_byte_, z_stream_->next_out) size_t NumUnreadBytes() const; + // Number of *uncompressed* bytes that have been read from this stream. + int64 bytes_read_; + TF_DISALLOW_COPY_AND_ASSIGN(ZlibInputStream); }; ",0,test f0a968651119a7dd17e727664c4741eaf737e839,tensorflow/tensorflow,Linter fixes,retrain.py,"@@ -41,7 +41,6 @@ The subfolder names are important, since they define what label is applied to each image, but the filenames themselves don't matter. Once your images are prepared, you can run the training with a command like this: - ```bash bazel build tensorflow/examples/image_retraining:retrain && \ bazel-bin/tensorflow/examples/image_retraining/retrain \ @@ -70,12 +69,14 @@ on resource-limited platforms, you can try the `--architecture` flag with a Mobilenet model. For example: Run floating-point version of mobilenet: + ```bash python tensorflow/examples/image_retraining/retrain.py \ --image_dir ~/flower_photos --architecture mobilenet_1.0_224 ``` Run quantized version of mobilenet: + ```bash python tensorflow/examples/image_retraining/retrain.py \ --image_dir ~/flower_photos/ --architecture mobilenet_1.0_224_quantized @@ -98,8 +99,10 @@ tensorboard --logdir /tmp/retrain_logs To use with Tensorflow Serving: -tensorflow_model_server --port=9000 --model_name=inception --model_base_path=/tmp/saved_models/ - +```bash +tensorflow_model_server --port=9000 --model_name=inception \ + --model_base_path=/tmp/saved_models/ +``` """""" from __future__ import absolute_import from __future__ import division @@ -1026,24 +1029,25 @@ def export_model(sess, architecture, saved_model_dir): inputs = {'image': tf.saved_model.utils.build_tensor_info(in_image)} out_classes = sess.graph.get_tensor_by_name('final_result:0') - outputs = {'prediction': tf.saved_model.utils.build_tensor_info(out_classes)} + outputs = {'prediction': + tf.saved_model.utils.build_tensor_info(out_classes)} signature = tf.saved_model.signature_def_utils.build_signature_def( - inputs=inputs, - outputs=outputs, - method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME - ) + inputs=inputs, + outputs=outputs, + method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME) legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') # Save out the SavedModel. builder = tf.saved_model.builder.SavedModelBuilder(saved_model_dir) builder.add_meta_graph_and_variables( - sess, [tf.saved_model.tag_constants.SERVING], - signature_def_map={ - tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature - }, - legacy_init_op=legacy_init_op) + sess, [tf.saved_model.tag_constants.SERVING], + signature_def_map = { + tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: + signature + }, + legacy_init_op=legacy_init_op) builder.save() ",0,train efd51f0b45f62399f1ad7a44348e928bfdeaf1c7,tensorflow/tensorflow,"Add an example of using tf.learn's random forest on mnist. Change: 128472012",random_forest_mnist.py,"@@ -0,0 +1,78 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an ""AS IS"" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""""""A stand-alone example for tf.learn's random forest model on mnist."""""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tempfile + +import tensorflow as tf + +from tensorflow.contrib.learn.python.learn.estimators import random_forest +from tensorflow.examples.tutorials.mnist import input_data + +flags = tf.app.flags +FLAGS = flags.FLAGS + +flags.DEFINE_string('model_dir', '', 'Base directory for output models.') +flags.DEFINE_string('data_dir', '/tmp/data/', 'Directory for storing data') + +flags.DEFINE_integer('train_steps', 1000, 'Number of training steps.') +flags.DEFINE_string('batch_size', 1000, + 'Number of examples in a training batch.') +flags.DEFINE_integer('num_trees', 100, 'Number of trees in the forest.') +flags.DEFINE_integer('max_nodes', 1000, 'Max total nodes in a single tree.') + + +def build_estimator(model_dir): + """"""Build an estimator."""""" + params = tf.contrib.tensor_forest.python.tensor_forest.ForestHParams( + num_classes=10, num_features=784, + num_trees=FLAGS.num_trees, max_nodes=FLAGS.max_nodes) + return random_forest.TensorForestEstimator(params, model_dir=model_dir) + + +def train_and_eval(): + """"""Train and evaluate the model."""""" + model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir + print('model directory = %s' % model_dir) + + estimator = build_estimator(model_dir) + + # TensorForest's LossMonitor allows training to terminate early if the + # forest is no longer growing. + early_stopping_rounds = 100 + check_every_n_steps = 100 + monitor = random_forest.LossMonitor(early_stopping_rounds, + check_every_n_steps) + + mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False) + + estimator.fit(x=mnist.train.images, y=mnist.train.labels, + batch_size=FLAGS.batch_size, monitors=[monitor]) + + results = estimator.evaluate(x=mnist.test.images, y=mnist.test.labels, + batch_size=FLAGS.batch_size) + for key in sorted(results): + print('%s: %s' % (key, results[key])) + + +def main(_): + train_and_eval() + + +if __name__ == '__main__': + tf.app.run() ",0,train f3f05c8fd6ab935a614337af033f413e262db301,tensorflow/tensorflow,"[XLA] Remove useless log message when dumping HLO GraphDef. This produces too much output that is not helpful. Change: 155212076",hlo_tfgraph_builder.cc,"@@ -68,9 +68,8 @@ void CleanNodeName(string* name) { } Status HloTfGraphBuilder::AddComputation(const HloComputation& computation) { - LOG(INFO) << ""Adding computation "" << computation.name(); + VLOG(2) << ""Adding computation "" << computation.name(); for (auto embedded : computation.MakeEmbeddedComputationsList()) { - LOG(INFO) << ""Adding embedded computation "" << embedded->name(); for (auto& instruction : embedded->instructions()) { TF_RETURN_IF_ERROR(AddInstruction(instruction.get())); } ",0,train cb9ba66ffcca6857c823cad05550296bf213aafb,tensorflow/tensorflow,"Fix crash of GFile in python 3.7 This fix tries to address the issue raised in 27276 where in Python 3.7, opening a zip file (of GFile) will results in the error of ``` bytes = self.zip.open(key) File ""/usr/lib64/python3.7/zipfile.py"", line 1480, in open self._fpclose, self._lock, lambda: self._writing) File ""/usr/lib64/python3.7/zipfile.py"", line 722, in __init__ self.seekable = file.seekable AttributeError: 'GFile' object has no attribute 'seekable' ``` The issue is that Python 3.7 adds seekable check: https://github.com/python/cpython/commit/066df4fd454d6ff9be66e80b2a65995b10af174f This fix adds `seekable()` and returns True, as GFile is indeed seekable. This fix fixes 27276 Signed-off-by: Yong Tang ",file_io.py,"@@ -246,6 +246,10 @@ class FileIO(object): pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status) self._writable_file = None + def seekable(self): + """"""Returns True as FileIO supports random access ops of seek()/tell()"""""" + return True + @tf_export(v1=[""gfile.Exists""]) def file_exists(filename): ",0,train ec87f72b9a80dee361c9c522c896edbda257c2f5,tensorflow/tensorflow,"support MatrixDiagV2 op PiperOrigin-RevId: 256070251",convert_matrix_diag_v2_to_v1.cc,"@@ -0,0 +1,101 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include ""tensorflow/core/lib/core/errors.h"" +#include ""tensorflow/core/platform/logging.h"" +#include ""tensorflow/lite/toco/graph_transformations/graph_transformations.h"" +#include ""tensorflow/lite/toco/model.h"" +#include ""tensorflow/lite/toco/tooling_util.h"" + +namespace toco { + +::tensorflow::Status ConvertMatrixDiagV2ToV1::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; + auto it = model->operators.begin() + op_index; + const auto* op = it->get(); + if (op->type != OperatorType::kMatrixDiagV2) { + return ::tensorflow::Status::OK(); + } + + if (op->inputs.size() != 5) { + return tensorflow::errors::InvalidArgument( + ""The input size of op %s should be 5"", LogName(*op)); + } + + const auto& input_k = model->GetArray(op->inputs[1]); + const auto& input_num_rows = model->GetArray(op->inputs[2]); + const auto& input_num_cols = model->GetArray(op->inputs[3]); + const auto& input_padding_value = model->GetArray(op->inputs[4]); + + if (!input_k.buffer || !input_num_rows.buffer || !input_num_cols.buffer || + !input_padding_value.buffer) { + return ::tensorflow::Status::OK(); + } + + if (input_k.GetBuffer().data.size() != 1 || + input_num_rows.GetBuffer().data.size() != 1 || + input_num_cols.GetBuffer().data.size() != 1) { + return tensorflow::errors::InvalidArgument( + ""Array for argument k / num_rows / num_cols of op "", LogName(*op), + "" should contains exact one element""); + } + + int k = input_k.GetBuffer().data[0]; + int num_rows = input_num_rows.GetBuffer().data[0]; + int num_cols = input_num_cols.GetBuffer().data[0]; + const auto& padding_value_vector = + input_padding_value.GetBuffer().data; + + if (k != 0) { + return tensorflow::errors::InvalidArgument( + ""parameter k of op "", LogName(*op), + "" is expected to be 0, other values are not supported currently""); + } + + if (num_rows != -1) { + return tensorflow::errors::InvalidArgument( + ""parameter num_rows of op "", LogName(*op), + "" is expected to be -1, other values are not supported currently""); + } + + if (num_cols != -1) { + return tensorflow::errors::InvalidArgument( + ""parameter num_cols of op "", LogName(*op), + "" is expected to be -1, other values are not supported currently""); + } + for (auto byte : padding_value_vector) { + if (byte != 0) { + return tensorflow::errors::InvalidArgument( + ""parameter padding_value of op "", LogName(*op), + "" is expected to be 0, other values are not supported currently""); + } + } + + auto* matrix_diag_op = new MatrixDiagOperator; + matrix_diag_op->inputs.push_back(op->inputs[0]); + matrix_diag_op->outputs.push_back(op->outputs[0]); + + AddMessageF(""Replacing %s with %s"", LogName(*op), LogName(*matrix_diag_op)); + + // Replace the operator in the graph. + model->operators.emplace(it, matrix_diag_op); + DeleteOpAndArrays(model, op); + + *modified = true; + return ::tensorflow::Status::OK(); +} + +} // namespace toco ",0,train ec87f72b9a80dee361c9c522c896edbda257c2f5,tensorflow/tensorflow,"support MatrixDiagV2 op PiperOrigin-RevId: 256070251",graph_transformations.h,"@@ -123,13 +123,14 @@ inline void RunGraphTransformations( // List of all graph transformations DECLARE_GRAPH_TRANSFORMATION(ConvertExpandDimsToReshape) +DECLARE_GRAPH_TRANSFORMATION(ConvertMatrixDiagV2ToV1) DECLARE_GRAPH_TRANSFORMATION(ConvertPureConvToDepthwise) +DECLARE_GRAPH_TRANSFORMATION(ConvertReorderAxes) DECLARE_GRAPH_TRANSFORMATION(ConvertSqueezeToReshape) DECLARE_GRAPH_TRANSFORMATION(ConvertTrivialAddNToAdd) DECLARE_GRAPH_TRANSFORMATION(ConvertTrivialPackToReshape) DECLARE_GRAPH_TRANSFORMATION(ConvertTrivialTileToConcat) DECLARE_GRAPH_TRANSFORMATION(ConvertTrivialTransposeToReshape) -DECLARE_GRAPH_TRANSFORMATION(ConvertReorderAxes) DECLARE_GRAPH_TRANSFORMATION(EnsureBiasVectors) DECLARE_GRAPH_TRANSFORMATION(FuseActivationFunctions) DECLARE_GRAPH_TRANSFORMATION(FuseBinaryIntoFollowingAffine) ",0,train ec87f72b9a80dee361c9c522c896edbda257c2f5,tensorflow/tensorflow,"support MatrixDiagV2 op PiperOrigin-RevId: 256070251",propagate_fixed_sizes.cc,"@@ -2426,6 +2426,10 @@ void ProcessMatrixSetDiagOperator(Model* model, MatrixSetDiagOperator* op) { // The sizes of the outputs are only known in runtime based on the input. // Ignore shape progapation here and defer that to the interpreter. break; + case OperatorType::kMatrixDiagV2: + // MatrixDiagV2 operators are converted to MatrixDiag, after which their + // shapes are propagated. + break; default: // Unimplemented, another graph transformation should drop it. LOG(FATAL) << ""Unhandled operator type "" << OperatorTypeName(op->type); ",0,train ec87f72b9a80dee361c9c522c896edbda257c2f5,tensorflow/tensorflow,"support MatrixDiagV2 op PiperOrigin-RevId: 256070251",import_tensorflow.cc,"@@ -2516,6 +2516,7 @@ ConverterMapType GetTensorFlowNodeConverterMap() { {""LogSoftmax"", ConvertSimpleOperator}, {""MatMul"", ConvertMatMulOperator}, {""MatrixDiag"", ConvertSimpleOperator}, + {""MatrixDiagV2"", ConvertSimpleOperator}, {""MatrixSetDiag"", ConvertSimpleOperator}, {""Max"", ConvertReduceOperator}, {""MaxPool"", ConvertMaxPoolOperator}, ",0,train ec87f72b9a80dee361c9c522c896edbda257c2f5,tensorflow/tensorflow,"support MatrixDiagV2 op PiperOrigin-RevId: 256070251",model.h,"@@ -172,7 +172,8 @@ enum class OperatorType : uint8 { kElu, kReverseSequence, kMatrixDiag, - kMatrixSetDiag + kMatrixSetDiag, + kMatrixDiagV2, }; // Helper to deal with TensorFlow arrays using a different ordering of @@ -2109,6 +2110,14 @@ struct MatrixDiagOperator : Operator { MatrixDiagOperator() : Operator(OperatorType::kMatrixDiag) {} }; +// Matrix Diag Operator V2: +// Construct a batched diagonal tensor with given batched diagonal values. +// Not fully supported, constains 4 extra inputs compared to MatrixDiag, support +// default parameters settings which performs the same as MatrixDiag +struct MatrixDiagV2Operator : Operator { + MatrixDiagV2Operator() : Operator(OperatorType::kMatrixDiagV2) {} +}; + // Matrix Set Diag Operator: // Construct a batched diagonal tensor with given input and diagonal values. // Input is a rank (k+1) tensor of values. ",0,train ec87f72b9a80dee361c9c522c896edbda257c2f5,tensorflow/tensorflow,"support MatrixDiagV2 op PiperOrigin-RevId: 256070251",toco_tooling.cc,"@@ -54,6 +54,7 @@ void MakeGeneralGraphTransformationsSet( GraphTransformationsSet* transformations) { CHECK(transformations->empty()); transformations->Add(new ConvertExpandDimsToReshape); + transformations->Add(new ConvertMatrixDiagV2ToV1); transformations->Add(new ConvertSqueezeToReshape); transformations->Add(new ConvertTrivialAddNToAdd); transformations->Add(new ConvertTrivialPackToReshape); ",0,train ec87f72b9a80dee361c9c522c896edbda257c2f5,tensorflow/tensorflow,"support MatrixDiagV2 op PiperOrigin-RevId: 256070251",tooling_util.cc,"@@ -447,6 +447,7 @@ const char* OperatorTypeName(OperatorType type) { HANDLE_OPERATORTYPENAME_CASE(ReverseSequence) HANDLE_OPERATORTYPENAME_CASE(MatrixDiag) HANDLE_OPERATORTYPENAME_CASE(MatrixSetDiag) + HANDLE_OPERATORTYPENAME_CASE(MatrixDiagV2) default: LOG(FATAL) << ""Unhandled op type""; #undef HANDLE_OPERATORTYPENAME_CASE ",0,train da8e7314544aa39d85d9bb111645077d1692ae05,tensorflow/tensorflow,"Fixed flaky test by increasing grace duration. PiperOrigin-RevId: 335535674 Change-Id: Idff0caaedf9585f19ab15d51ecaa5a0495bf337c",profiler_client_test.cc,"@@ -125,7 +125,7 @@ TEST(RemoteProfilerSession, LongDuration) { absl::Time approx_start = absl::Now(); // Empirically determined value. - absl::Duration grace = absl::Seconds(2); + absl::Duration grace = absl::Seconds(20); absl::Duration max_duration = duration + grace; const absl::Time deadline = approx_start + max_duration; ",0,train da8e7314544aa39d85d9bb111645077d1692ae05,tensorflow/tensorflow,"Fixed flaky test by increasing grace duration. PiperOrigin-RevId: 335535674 Change-Id: Idff0caaedf9585f19ab15d51ecaa5a0495bf337c",remote_profiler_session_manager_test.cc,"@@ -100,7 +100,8 @@ TEST(RemoteProfilerSessionManagerTest, LongSession) { auto server = StartServer(duration, &service_addresses); options.add_service_addresses(service_addresses); absl::Time approx_start = absl::Now(); - absl::Duration grace = absl::Seconds(2); + // Empirically determined value. + absl::Duration grace = absl::Seconds(20); absl::Duration max_duration = duration + grace; options.set_max_session_duration_ms(absl::ToInt64Milliseconds(max_duration)); options.set_session_creation_timestamp_ns(absl::ToUnixNanos(approx_start)); ",0,train ab8b627e0b269ef2a4d1859fb62b80f3f1eea345,tensorflow/tensorflow,"[XLA] Clean up execution_profile usage and make it thread-safe. Currently a mutable execution_profile is attached to xla::Executable. This isn't thread safe, since the same Executable may be invoked concurrently. Instead, clients already have the ability to pass in their own ExecutionProfile via the ExecutableRunOptions; update that one instead. Simplify APIs that accepted both an ExecutionProfile and an ExecutableRunOptions given the latter contains a pointer to an instance of the former. PiperOrigin-RevId: 261570194",local_client.cc,"@@ -180,8 +180,8 @@ StatusOr LocalExecutable::Run( if (executable_->dumping_snapshot()) { return ExecuteAndDump(&options_and_stream.first, arguments); } - return executable_->ExecuteOnStreamWrapper( - &options_and_stream.first, run_options.execution_profile(), arguments); + return executable_->ExecuteOnStreamWrapper(&options_and_stream.first, + arguments); } StatusOr LocalExecutable::RunAsync( ",0,test ab8b627e0b269ef2a4d1859fb62b80f3f1eea345,tensorflow/tensorflow,"[XLA] Clean up execution_profile usage and make it thread-safe. Currently a mutable execution_profile is attached to xla::Executable. This isn't thread safe, since the same Executable may be invoked concurrently. Instead, clients already have the ability to pass in their own ExecutionProfile via the ExecutableRunOptions; update that one instead. Simplify APIs that accepted both an ExecutionProfile and an ExecutableRunOptions given the latter contains a pointer to an instance of the former. PiperOrigin-RevId: 261570194",cpu_executable.cc,"@@ -194,13 +194,13 @@ Status CpuExecutable::ExecuteComputeFunction( uint64 end_micros = tensorflow::Env::Default()->NowMicros(); - { - tensorflow::mutex_lock lock(mutex_); + if (run_options->execution_profile()) { const double nanoseconds = (end_micros - start_micros) * 1000.0; - execution_profile_.set_compute_time_ns(std::max(nanoseconds, 1.0)); + run_options->execution_profile()->set_compute_time_ns( + std::max(nanoseconds, 1.0)); // If hlo profiling was disabled then the cycle count is left empty. if (hlo_execution_profile) { - execution_profile_.set_compute_cycle_count( + run_options->execution_profile()->set_compute_cycle_count( hlo_execution_profile->total_cycles_executed( *module().entry_computation())); } ",0,test ab8b627e0b269ef2a4d1859fb62b80f3f1eea345,tensorflow/tensorflow,"[XLA] Clean up execution_profile usage and make it thread-safe. Currently a mutable execution_profile is attached to xla::Executable. This isn't thread safe, since the same Executable may be invoked concurrently. Instead, clients already have the ability to pass in their own ExecutionProfile via the ExecutableRunOptions; update that one instead. Simplify APIs that accepted both an ExecutionProfile and an ExecutableRunOptions given the latter contains a pointer to an instance of the former. PiperOrigin-RevId: 261570194",executable.cc,"@@ -61,10 +61,11 @@ StatusOr> Executable::ExecuteOnStreams( } StatusOr Executable::ExecuteOnStreamWrapper( - const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile, + const ServiceExecutableRunOptions* run_options, absl::Span arguments) { se::Stream* stream = run_options->stream(); std::unique_ptr timer; + ExecutionProfile* profile = run_options->run_options().execution_profile(); if (profile != nullptr) { timer.reset(new se::Timer(stream->parent())); stream->InitTimer(timer.get()).ThenStartTimer(timer.get()); @@ -102,11 +103,6 @@ StatusOr Executable::ExecuteOnStreamWrapper( VLOG(1) << ""done with block-host-until-done""; // Merge in run-time profile information from execution_profile. - // - // TODO(b/71713097): This is buggy -- even though the mutex takes care of - // C++ level races, some other concurrent ExecuteOnStreamWrapper call could - // have rewritten the execution_profile before we get to it. - profile->MergeFrom(execution_profile()); // Overall execution time (in nanoseconds) from the executor timer. if (stream->ok()) { ",0,test ab8b627e0b269ef2a4d1859fb62b80f3f1eea345,tensorflow/tensorflow,"[XLA] Clean up execution_profile usage and make it thread-safe. Currently a mutable execution_profile is attached to xla::Executable. This isn't thread safe, since the same Executable may be invoked concurrently. Instead, clients already have the ability to pass in their own ExecutionProfile via the ExecutableRunOptions; update that one instead. Simplify APIs that accepted both an ExecutionProfile and an ExecutableRunOptions given the latter contains a pointer to an instance of the former. PiperOrigin-RevId: 261570194",executable.h,"@@ -171,6 +171,7 @@ class Executable { // called explicitly for other (async, for example) variants after the stream // has completed. virtual Status PopulateExecutionProfile( + ExecutionProfile* execution_profile, HloExecutionProfile* hlo_execution_profile, se::Stream* stream) { return Status::OK(); } @@ -179,16 +180,9 @@ class Executable { // timer for the execution, sets up HLO profiling if enabled, and fills in the // given ExecutionProfile if non-null. StatusOr ExecuteOnStreamWrapper( - const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile, + const ServiceExecutableRunOptions* run_options, absl::Span arguments); - // Returns the ExecutionProfile from executing on the device. This includes - // the number of cycles taken for the computation or the compilation time. - ExecutionProfile execution_profile() const { - tensorflow::mutex_lock lock(mutex_); - return execution_profile_; - } - const HloProfilePrinterData& hlo_profile_printer_data() const { CHECK(hlo_profiling_enabled()); return *hlo_profile_printer_data_; @@ -233,11 +227,6 @@ class Executable { HloProto const* hlo_proto() const { return hlo_proto_.get(); } protected: - mutable tensorflow::mutex mutex_; - - // Execution profile data on the device. - ExecutionProfile execution_profile_ GUARDED_BY(mutex_); - // HloModule this was compiled from. BufferAssignment keeps pointers to // HloInstructions owned by the HloModule so we need to keep the HloModule // around. ",0,test ab8b627e0b269ef2a4d1859fb62b80f3f1eea345,tensorflow/tensorflow,"[XLA] Clean up execution_profile usage and make it thread-safe. Currently a mutable execution_profile is attached to xla::Executable. This isn't thread safe, since the same Executable may be invoked concurrently. Instead, clients already have the ability to pass in their own ExecutionProfile via the ExecutableRunOptions; update that one instead. Simplify APIs that accepted both an ExecutionProfile and an ExecutableRunOptions given the latter contains a pointer to an instance of the former. PiperOrigin-RevId: 261570194",gpu_executable.cc,"@@ -207,17 +207,20 @@ Status GpuExecutable::ExecuteThunks( } } + // FinishExecution() blocks until main_stream has completed if profiling is + // enabled; we therefore do not need to defer profile collection onto a + // stream. profiler.FinishExecution(); uint64 end_micros = tensorflow::Env::Default()->NowMicros(); - { - tensorflow::mutex_lock lock(mutex_); + if (run_options->run_options().execution_profile()) { + ExecutionProfile* profile = run_options->run_options().execution_profile(); const double nanoseconds = (end_micros - start_micros) * 1000.0; - execution_profile_.set_compute_time_ns(std::max(nanoseconds, 1.0)); + profile->set_compute_time_ns(std::max(nanoseconds, 1.0)); // If hlo profiling was disabled then the cycle count is left empty. if (do_profile) { - execution_profile_.set_compute_cycle_count( + profile->set_compute_cycle_count( hlo_execution_profile->total_cycles_executed( *module().entry_computation())); } ",0,test ab8b627e0b269ef2a4d1859fb62b80f3f1eea345,tensorflow/tensorflow,"[XLA] Clean up execution_profile usage and make it thread-safe. Currently a mutable execution_profile is attached to xla::Executable. This isn't thread safe, since the same Executable may be invoked concurrently. Instead, clients already have the ability to pass in their own ExecutionProfile via the ExecutableRunOptions; update that one instead. Simplify APIs that accepted both an ExecutionProfile and an ExecutableRunOptions given the latter contains a pointer to an instance of the former. PiperOrigin-RevId: 261570194",hlo_runner.cc,"@@ -208,13 +208,13 @@ StatusOr HloRunner::ExecuteWithDeviceBuffers( ServiceExecutableRunOptions service_run_options = GetServiceRunOptionsForDevice(backend().default_device_ordinal(), &stream, nullptr, RunId()); + service_run_options.mutable_run_options()->set_execution_profile(profile); TF_ASSIGN_OR_RETURN(std::unique_ptr executable, CreateExecutable(std::move(module), run_hlo_passes)); TF_ASSIGN_OR_RETURN( ScopedShapedBuffer retval, - executable->ExecuteOnStreamWrapper(&service_run_options, - /*profile=*/profile, arguments)); + executable->ExecuteOnStreamWrapper(&service_run_options, arguments)); TF_RETURN_IF_ERROR(stream.BlockHostUntilDone()); return std::move(retval); } @@ -244,11 +244,11 @@ StatusOr HloRunner::ExecuteWithDeviceBuffers( ServiceExecutableRunOptions service_run_options = GetServiceRunOptionsForDevice(backend().default_device_ordinal(), &stream, nullptr, RunId()); + service_run_options.mutable_run_options()->set_execution_profile(profile); TF_ASSIGN_OR_RETURN( ScopedShapedBuffer retval, - executable->ExecuteOnStreamWrapper(&service_run_options, - /*profile=*/profile, arguments)); + executable->ExecuteOnStreamWrapper(&service_run_options, arguments)); TF_RETURN_IF_ERROR(stream.BlockHostUntilDone()); return std::move(retval); } ",0,test ab8b627e0b269ef2a4d1859fb62b80f3f1eea345,tensorflow/tensorflow,"[XLA] Clean up execution_profile usage and make it thread-safe. Currently a mutable execution_profile is attached to xla::Executable. This isn't thread safe, since the same Executable may be invoked concurrently. Instead, clients already have the ability to pass in their own ExecutionProfile via the ExecutableRunOptions; update that one instead. Simplify APIs that accepted both an ExecutionProfile and an ExecutableRunOptions given the latter contains a pointer to an instance of the former. PiperOrigin-RevId: 261570194",executable.cc,"@@ -113,10 +113,10 @@ StatusOr InterpreterExecutable::ExecuteOnStream( uint64 end_micros = tensorflow::Env::Default()->NowMicros(); - { - tensorflow::mutex_lock lock(mutex_); + ExecutionProfile* profile = run_options->run_options().execution_profile(); + if (profile) { const double nanoseconds = (end_micros - start_micros) * 1000.0; - execution_profile_.set_compute_time_ns(std::max(nanoseconds, 1.0)); + profile->set_compute_time_ns(std::max(nanoseconds, 1.0)); } return std::move(result); ",0,test ab8b627e0b269ef2a4d1859fb62b80f3f1eea345,tensorflow/tensorflow,"[XLA] Clean up execution_profile usage and make it thread-safe. Currently a mutable execution_profile is attached to xla::Executable. This isn't thread safe, since the same Executable may be invoked concurrently. Instead, clients already have the ability to pass in their own ExecutionProfile via the ExecutableRunOptions; update that one instead. Simplify APIs that accepted both an ExecutionProfile and an ExecutableRunOptions given the latter contains a pointer to an instance of the former. PiperOrigin-RevId: 261570194",service.cc,"@@ -451,6 +451,11 @@ Service::ExecuteParallelAndRegisterResult( options.set_intra_op_thread_pool( backend->eigen_intra_op_thread_pool_device()); options.set_device_assignment(&device_assignment); + // Use run-time profile information from execution_profile on the 0th + // device. + if (i == 0) { + options.set_execution_profile(profile); + } ServiceExecutableRunOptions run_options(options, backend->StreamBorrower()); @@ -490,10 +495,6 @@ Service::ExecuteParallelAndRegisterResult( uint64 nanoseconds = *std::max_element(timer_nanoseconds.begin(), timer_nanoseconds.end()); - // Merge in run-time profile information from execution_profile on the - // zeroth device. - profile->MergeFrom(executables[0]->execution_profile()); - // Overall execution time (in nanoseconds) from the executor timer. profile->set_compute_and_transfer_time_ns(nanoseconds); @@ -546,13 +547,13 @@ StatusOr Service::ExecuteAndRegisterResult( options.set_intra_op_thread_pool( backend->eigen_intra_op_thread_pool_device()); options.set_device_assignment(&device_assignment); + options.set_execution_profile(profile); run_options.emplace_back(options, backend->StreamBorrower()); } if (options_.number_of_replicas() == 1) { - TF_ASSIGN_OR_RETURN( - auto result, executable->ExecuteOnStreamWrapper(&run_options[0], - profile, arguments[0])); + TF_ASSIGN_OR_RETURN(auto result, executable->ExecuteOnStreamWrapper( + &run_options[0], arguments[0])); return allocation_tracker_.Register(std::move(result), result_tag); } ",0,test 40ea8ce73d0b38f07260845057b5b0e2bdb2ac17,tensorflow/tensorflow,"Lookup layers allow tensor input vocabs PiperOrigin-RevId: 376219678 Change-Id: Idcfad5f5eb3619785472f10d445af52dfbe14345",index_lookup.py,"@@ -26,6 +26,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec +from tensorflow.python.framework import tensor_util from tensorflow.python.keras import backend from tensorflow.python.keras.engine import base_preprocessing_layer from tensorflow.python.keras.layers.preprocessing import category_encoding @@ -377,16 +378,14 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer): # The MutableHashTable data will not be sorted, so we will create a inverted # lookup here, and use that to lookup a range of indices [0, vocab_size). - keys, values = self._table_handler.data() - if self.invert: - index_to_token = zip(keys, values) - else: - index_to_token = zip(values, keys) - lookup = collections.defaultdict(lambda: self.oov_token, index_to_token) + keys, values = self._table.export() + vocab, indices = (values, keys) if self.invert else (keys, values) + lookup = collections.defaultdict( + lambda: self.oov_token, + zip(indices.numpy(), self._tensor_vocab_to_numpy(vocab))) vocab = [lookup[x] for x in range(self.vocabulary_size())] if self.mask_token is not None and self.output_mode == INT: vocab[0] = self.mask_token - return vocab def vocabulary_size(self): @@ -441,9 +440,10 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer): it. Args: - vocabulary: An array of hashable tokens. - idf_weights: An array of inverse document frequency weights with equal - length to vocab. Only necessary if the layer output_mode is TF_IDF. + vocabulary: An array, numpy array, or tensor of hashable tokens. + idf_weights: An array, numpy array, or tensor of inverse document + frequency weights with equal length to vocab. Only necessary if the + layer output_mode is TF_IDF. Raises: ValueError: If there are too many inputs, the inputs do not match, or @@ -452,6 +452,7 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer): called. This happens when `""multi_hot""`, `""count""`, and `""tfidf""` modes, if `pad_to_max_tokens` is False and the layer itself has already been called. + RuntimeError: If a tensor vocabulary is passed outside of eager execution. """""" if self._has_static_table: raise RuntimeError(""Layer {} was created with a static file-based table "" @@ -470,6 +471,21 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer): ""False, the vocabulary cannot be changed after the "" ""layer is called."".format(self.output_mode)) + if not context.executing_eagerly() and (tensor_util.is_tensor(vocabulary) or + tensor_util.is_tensor(idf_weights)): + raise RuntimeError( + ""Cannot set a tensor vocabulary on {} layer {} when not executing "" + ""eagerly. Create this layer or call `set_vocabulary` outside of "" + ""any `tf.function`s and with eager execution enabled."".format( + self.__class__.__name__, self.name)) + + # TODO(mattdangerw): for better performance we should rewrite this entire + # function to operate on tensors and convert vocabulary to a tensor here. + if tensor_util.is_tensor(vocabulary): + vocabulary = self._tensor_vocab_to_numpy(vocabulary) + if tensor_util.is_tensor(idf_weights): + idf_weights = idf_weights.numpy() + oov_start = self._oov_start_index() token_start = self._token_start_index() should_have_mask = (oov_start > 0) @@ -658,6 +674,11 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer): def _trackable_saved_model_saver(self): return layer_serialization.IndexLookupLayerSavedModelSaver(self) + # Override points for IntegerLookup and StringLookup. + def _tensor_vocab_to_numpy(self, vocabulary): + """"""Converts a tensor vocabulary to a numpy vocabulary."""""" + return vocabulary.numpy() + class _IndexLookupAccumulator( collections.namedtuple(""Accumulator"", ",0,train 40ea8ce73d0b38f07260845057b5b0e2bdb2ac17,tensorflow/tensorflow,"Lookup layers allow tensor input vocabs PiperOrigin-RevId: 376219678 Change-Id: Idcfad5f5eb3619785472f10d445af52dfbe14345",integer_lookup_test.py,"@@ -26,6 +26,8 @@ from tensorflow.python import keras from tensorflow.python import tf2 from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.eager import def_function +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import errors_impl @@ -525,6 +527,17 @@ class IntegerLookupVocabularyTest( "".*HashTable has different value for same key.*42.*""): _ = integer_lookup.IntegerLookup(vocabulary=vocab_path) + def test_tensor_vocab(self): + vocab_data = [-1, 42, 1138, 725, 1729] + vocab_tensor = constant_op.constant(vocab_data, dtypes.int64) + layer = integer_lookup.IntegerLookup(vocabulary=vocab_tensor) + returned_vocab = layer.get_vocabulary() + self.assertAllEqual(vocab_data, returned_vocab) + self.assertAllEqual(layer.vocabulary_size(), 5) + fn = def_function.function(lambda: layer.set_vocabulary(vocab_tensor)) + with self.assertRaisesRegex(RuntimeError, ""Cannot set a tensor vocabulary""): + fn() + @keras_parameterized.run_all_keras_modes(always_skip_v1=True) class IntegerLookupErrorTest(keras_parameterized.TestCase, ",0,train 40ea8ce73d0b38f07260845057b5b0e2bdb2ac17,tensorflow/tensorflow,"Lookup layers allow tensor input vocabs PiperOrigin-RevId: 376219678 Change-Id: Idcfad5f5eb3619785472f10d445af52dfbe14345",string_lookup.py,"@@ -15,6 +15,8 @@ """"""Keras string lookup preprocessing layer."""""" # pylint: disable=g-classes-have-attributes +import numpy as np + from tensorflow.python.framework import dtypes from tensorflow.python.keras.engine import base_preprocessing_layer from tensorflow.python.keras.layers.preprocessing import index_lookup @@ -298,10 +300,6 @@ class StringLookup(index_lookup.IndexLookup): base_config = super(StringLookup, self).get_config() return dict(list(base_config.items()) + list(config.items())) - def get_vocabulary(self): - vocab = super(StringLookup, self).get_vocabulary() - return [compat.as_text(x, self.encoding) for x in vocab] - def set_vocabulary(self, vocabulary, idf_weights=None): if isinstance(vocabulary, str): if self.output_mode == index_lookup.TF_IDF: @@ -315,3 +313,8 @@ class StringLookup(index_lookup.IndexLookup): vocabulary = table_utils.get_vocabulary_from_file(vocabulary, self.encoding) super().set_vocabulary(vocabulary, idf_weights=idf_weights) + + # Overriden methods from IndexLookup. + def _tensor_vocab_to_numpy(self, vocabulary): + vocabulary = vocabulary.numpy() + return np.array([compat.as_text(x, self.encoding) for x in vocabulary]) ",0,train 40ea8ce73d0b38f07260845057b5b0e2bdb2ac17,tensorflow/tensorflow,"Lookup layers allow tensor input vocabs PiperOrigin-RevId: 376219678 Change-Id: Idcfad5f5eb3619785472f10d445af52dfbe14345",string_lookup_test.py,"@@ -21,6 +21,8 @@ import numpy as np from tensorflow.python import keras from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.eager import def_function +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import errors_impl @@ -365,6 +367,16 @@ class StringLookupVocabularyTest(keras_parameterized.TestCase, output_data = model.predict(input_array) self.assertAllEqual(expected_output, output_data) + def test_tensor_vocab(self): + vocab_data = [""[UNK]"", ""wind"", ""and"", ""fire""] + vocab_tensor = constant_op.constant(vocab_data) + layer = string_lookup.StringLookup(vocabulary=vocab_tensor) + returned_vocab = layer.get_vocabulary() + self.assertAllEqual(vocab_data, returned_vocab) + self.assertAllEqual(layer.vocabulary_size(), 4) + fn = def_function.function(lambda: layer.set_vocabulary(vocab_tensor)) + with self.assertRaisesRegex(RuntimeError, ""Cannot set a tensor vocabulary""): + fn() if __name__ == ""__main__"": test.main() ",0,train 40ea8ce73d0b38f07260845057b5b0e2bdb2ac17,tensorflow/tensorflow,"Lookup layers allow tensor input vocabs PiperOrigin-RevId: 376219678 Change-Id: Idcfad5f5eb3619785472f10d445af52dfbe14345",table_utils.py,"@@ -51,10 +51,6 @@ class TableHandler(object): oov_tokens = [oov_tokens] self.oov_tokens = math_ops.cast(oov_tokens, table._value_dtype) # pylint: disable=protected-access - def data(self): - keys, values = self.table.export() - return (keys.numpy(), values.numpy()) - def table_size(self): return self.table.size().numpy() ",0,train 995eec62445a677df9d7d1268143ae54fc7d6285,tensorflow/tensorflow,"Change derived type storage objects to define an 'operator==(const KeyTy &)' instead of converting to the KeyTy. This allows for handling cases where the KeyTy does not provide an equality operator on itself. PiperOrigin-RevId: 229423249",TypeSupport.h,"@@ -143,6 +143,9 @@ class TypeUniquer { public: /// Lookup key for storage types. struct TypeLookupKey { + /// The known derived kind for the storage. + unsigned kind; + /// The known hash value of the key. unsigned hashValue; @@ -170,18 +173,12 @@ public: // Generate an equality function for the derived storage. std::function isEqual = - [kind, &derivedKey](const TypeStorage *existing) { - // Check that these type storages have the same kind. - if (kind != existing->getKind()) - return false; - // Generate a key from the derived storage and compare it to the - // current key. - auto *derivedStorage = static_cast(existing); - return derivedStorage->getKey() == derivedKey; + [&derivedKey](const TypeStorage *existing) { + return static_cast(*existing) == derivedKey; }; // Lookup an existing type with the given key. - TypeStorage *storage = lookup(TypeLookupKey{hashValue, isEqual}); + TypeStorage *storage = lookup(TypeLookupKey{kind, hashValue, isEqual}); if (storage) return T(storage); ",0,test 995eec62445a677df9d7d1268143ae54fc7d6285,tensorflow/tensorflow,"Change derived type storage objects to define an 'operator==(const KeyTy &)' instead of converting to the KeyTy. This allows for handling cases where the KeyTy does not provide an equality operator on itself. PiperOrigin-RevId: 229423249",Types.h,"@@ -84,8 +84,8 @@ struct UnknownTypeStorage; /// * The key type must have a llvm::DenseMapInfo specialization for /// hashing. /// -/// - Provide a method, 'KeyTy getKey() const', to construct the key type -/// from an existing storage instance. +/// - Provide a method, 'bool operator==(const KeyTy &) const', to +/// compare the storage instance against an instance of the key type. /// /// - Provide a construction method: /// 'DerivedStorage *construct(TypeStorageAllocator &, const KeyTy &key)' ",0,test 995eec62445a677df9d7d1268143ae54fc7d6285,tensorflow/tensorflow,"Change derived type storage objects to define an 'operator==(const KeyTy &)' instead of converting to the KeyTy. This allows for handling cases where the KeyTy does not provide an equality operator on itself. PiperOrigin-RevId: 229423249",LLVMDialect.h,"@@ -43,7 +43,7 @@ namespace mlir { namespace LLVM { namespace detail { -class LLVMTypeStorage; +struct LLVMTypeStorage; } class LLVMType : public mlir::Type::TypeBase; - - /// Convert to the key type. - KeyTy getKey() const { return std::make_pair(dialectNamespace, typeData); } + bool operator==(const KeyTy &key) const { + return key == KeyTy(dialectNamespace, typeData); + } static UnknownTypeStorage *construct(TypeStorageAllocator &allocator, const KeyTy &key) { @@ -64,9 +64,7 @@ struct IntegerTypeStorage : public TypeStorage { /// The hash key used for uniquing. using KeyTy = unsigned; - - /// Convert to the key type. - KeyTy getKey() const { return width; } + bool operator==(const KeyTy &key) const { return key == width; } static IntegerTypeStorage *construct(TypeStorageAllocator &allocator, KeyTy bitwidth) { @@ -86,9 +84,9 @@ struct FunctionTypeStorage : public TypeStorage { /// The hash key used for uniquing. using KeyTy = std::pair, ArrayRef>; - - /// Convert to the key type. - KeyTy getKey() const { return KeyTy(getInputs(), getResults()); } + bool operator==(const KeyTy &key) const { + return key == KeyTy(getInputs(), getResults()); + } /// Construction. static FunctionTypeStorage *construct(TypeStorageAllocator &allocator, @@ -125,9 +123,7 @@ struct VectorOrTensorTypeStorage : public TypeStorage { /// The hash key used for uniquing. using KeyTy = Type; - - /// Convert to the key type. - KeyTy getKey() const { return elementType; } + bool operator==(const KeyTy &key) const { return key == elementType; } Type elementType; }; @@ -141,9 +137,9 @@ struct VectorTypeStorage : public VectorOrTensorTypeStorage { /// The hash key used for uniquing. using KeyTy = std::pair, Type>; - - /// Convert to the key type. - KeyTy getKey() const { return KeyTy(getShape(), elementType); } + bool operator==(const KeyTy &key) const { + return key == KeyTy(getShape(), elementType); + } /// Construction. static VectorTypeStorage *construct(TypeStorageAllocator &allocator, @@ -171,9 +167,9 @@ struct RankedTensorTypeStorage : public VectorOrTensorTypeStorage { /// The hash key used for uniquing. using KeyTy = std::pair, Type>; - - /// Convert to the key type. - KeyTy getKey() const { return KeyTy(getShape(), elementType); } + bool operator==(const KeyTy &key) const { + return key == KeyTy(getShape(), elementType); + } /// Construction. static RankedTensorTypeStorage *construct(TypeStorageAllocator &allocator, @@ -194,14 +190,14 @@ struct RankedTensorTypeStorage : public VectorOrTensorTypeStorage { }; struct UnrankedTensorTypeStorage : public VectorOrTensorTypeStorage { - UnrankedTensorTypeStorage(Type elementTy) - : VectorOrTensorTypeStorage(elementTy) {} + using VectorOrTensorTypeStorage::KeyTy; + using VectorOrTensorTypeStorage::VectorOrTensorTypeStorage; /// Construction. static UnrankedTensorTypeStorage *construct(TypeStorageAllocator &allocator, Type elementTy) { - auto *result = allocator.allocate(); - return new (result) UnrankedTensorTypeStorage(elementTy); + return new (allocator.allocate()) + UnrankedTensorTypeStorage(elementTy); } }; @@ -217,10 +213,8 @@ struct MemRefTypeStorage : public TypeStorage { // MemRefs are uniqued based on their shape, element type, affine map // composition, and memory space. using KeyTy = std::tuple, Type, ArrayRef, unsigned>; - - /// Convert to the key type. - KeyTy getKey() const { - return KeyTy(getShape(), elementType, getAffineMaps(), memorySpace); + bool operator==(const KeyTy &key) const { + return key == KeyTy(getShape(), elementType, getAffineMaps(), memorySpace); } /// Construction. ",0,test 4b6a394e951090e8ffb3770badfef3ab0b293d23,tensorflow/tensorflow,"Added GATHER operator for 16x8. Implementation, tests, versioning are added. Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9",acceleration_test_list.cc,"@@ -214,6 +214,7 @@ TypesGatherOpTest/Float32Int32,29 TypesGatherOpTest/Int32Int32,29 TypesGatherOpTest/Uint8Int32,29 TypesGatherOpTest/Int8Int32,29 +-TypesGatherOpTest/.*Int16.* # hashtable_lookup_test # All test excepted the string one should be accelerated ",0,train 4b6a394e951090e8ffb3770badfef3ab0b293d23,tensorflow/tensorflow,"Added GATHER operator for 16x8. Implementation, tests, versioning are added. Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9",gather.cc,"@@ -61,6 +61,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { case kTfLiteFloat32: case kTfLiteUInt8: case kTfLiteInt8: + case kTfLiteInt16: case kTfLiteInt64: case kTfLiteInt32: case kTfLiteBool: @@ -143,6 +144,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return Gather(*params, input, positions, output); case kTfLiteInt8: return Gather(*params, input, positions, output); + case kTfLiteInt16: + return Gather(*params, input, positions, output); case kTfLiteInt32: return Gather(*params, input, positions, output); case kTfLiteInt64: @@ -165,6 +168,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return Gather(*params, input, positions, output); case kTfLiteInt8: return Gather(*params, input, positions, output); + case kTfLiteInt16: + return Gather(*params, input, positions, output); case kTfLiteInt32: return Gather(*params, input, positions, output); case kTfLiteInt64: ",0,train 4b6a394e951090e8ffb3770badfef3ab0b293d23,tensorflow/tensorflow,"Added GATHER operator for 16x8. Implementation, tests, versioning are added. Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9",gather_test.cc,"@@ -272,6 +272,24 @@ TEST(TypesGatherOpTest, Int8Int64) { EXPECT_THAT(m.GetOutput(), ElementsAreArray({14, 15, -13, -120})); } +TEST(TypesGatherOpTest, Int16Int32) { + GatherOpModel m({TensorType_INT16, {2, 2}}, {TensorType_INT32, {2}}); + m.SetInput({-13, -32000, 0, 32500}); + m.SetPositions({1, 0}); + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 32500, -13, -32000})); +} + +TEST(TypesGatherOpTest, Int16Int64) { + GatherOpModel m({TensorType_INT16, {2, 2}}, {TensorType_INT64, {2}}); + m.SetInput({-13, -32000, 0, 32500}); + m.SetPositions({1LL, 0LL}); + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 32500, -13, -32000})); +} + TEST(TypesGatherOpTest, Int64Int32) { GatherOpModel m({TensorType_INT64, {2, 2}}, {TensorType_INT32, {2}}); m.SetInput({-(1LL << 34), 134LL, 14LL, 15LL}); ",0,train 4b6a394e951090e8ffb3770badfef3ab0b293d23,tensorflow/tensorflow,"Added GATHER operator for 16x8. Implementation, tests, versioning are added. Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9",register.cc,"@@ -131,7 +131,7 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_DEPTH_TO_SPACE, Register_DEPTH_TO_SPACE()); AddBuiltin(BuiltinOperator_GATHER, Register_GATHER(), /* min_version = */ 1, - /* max_version = */ 3); + /* max_version = */ 4); AddBuiltin(BuiltinOperator_TRANSPOSE, Register_TRANSPOSE(), /* min_version = */ 1, /* max_version = */ 4); ",0,train 4b6a394e951090e8ffb3770badfef3ab0b293d23,tensorflow/tensorflow,"Added GATHER operator for 16x8. Implementation, tests, versioning are added. Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9",op_version.cc,"@@ -80,6 +80,7 @@ std::string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kGather, 1}, ""1.6.0""}, {{OperatorType::kGather, 2}, ""1.14.0""}, {{OperatorType::kGather, 3}, ""1.15.0""}, + {{OperatorType::kGather, 4}, kPendingReleaseOpVersion}, {{OperatorType::kGatherNd, 1}, ""1.14.0""}, {{OperatorType::kGatherNd, 2}, kPendingReleaseOpVersion}, {{OperatorType::kSvdf, 1}, ""1.5.0""}, ",0,train 4b6a394e951090e8ffb3770badfef3ab0b293d23,tensorflow/tensorflow,"Added GATHER operator for 16x8. Implementation, tests, versioning are added. Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9",operator_property.cc,"@@ -191,7 +191,6 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.outputs = {{0, {}}}; property.restrict_same_input_output_scale = true; property.version = 2; - property.quantizable_int16 = false; break; case BuiltinOperator_HARD_SWISH: { property.inputs = {{0, {}}}; ",0,train 4b6a394e951090e8ffb3770badfef3ab0b293d23,tensorflow/tensorflow,"Added GATHER operator for 16x8. Implementation, tests, versioning are added. Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9",op_version.cc,"@@ -176,6 +176,9 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { return 1; case BuiltinOperator_GATHER: + if (op_sig.input_types.at(0) == TensorType_INT16) { + return 4; + } // If the op takes bool input, it is version 3. if (op_sig.input_types.at(0) == TensorType_BOOL) { return 3; ",0,train 4b6a394e951090e8ffb3770badfef3ab0b293d23,tensorflow/tensorflow,"Added GATHER operator for 16x8. Implementation, tests, versioning are added. Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9",runtime_version.cc,"@@ -109,6 +109,7 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code, {{BuiltinOperator_GATHER, 1}, ""1.6.0""}, {{BuiltinOperator_GATHER, 2}, ""1.14.0""}, {{BuiltinOperator_GATHER, 3}, ""1.15.0""}, + {{BuiltinOperator_GATHER, 4}, kPendingReleaseVersion}, {{BuiltinOperator_GATHER_ND, 1}, ""1.14.0""}, {{BuiltinOperator_GATHER_ND, 2}, ""2.3.0""}, {{BuiltinOperator_HASHTABLE_LOOKUP, 1}, ""1.5.0""}, ",0,train 4f3444ce5650831c7af364f5829ba1aa96e4a643,tensorflow/tensorflow,Print driver version.,gpu_cudamallocasync_allocator.cc,"@@ -129,10 +129,17 @@ GpuCudaMallocAsyncAllocator::GpuCudaMallocAsyncAllocator( if (auto status = cuDeviceGetAttribute(&cuda_malloc_async_supported, CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, - platform_device_id.value())) + platform_device_id.value())) { + int driverVersion; + if (auto status2 = cuDriverGetVersion(&driverVersion)) { + LOG(ERROR) << ""Error while fetching driver version: "" + << GetCudaErrorMessage(status2); + } LOG(FATAL) // Crash OK. << ""On device: "" << platform_device_id.value() + << "" Current driver: "" << driverVersion << "". Failed to get device attribute : "" << GetCudaErrorMessage(status); + } if (!cuda_malloc_async_supported) LOG(FATAL) // Crash OK. << ""TF_GPU_ALLOCATOR=cuda_malloc_async isn't currently supported on "" ",0,train 6a7a93e83c0957ca7fa916cece93bf21a3f33902,tensorflow/tensorflow,"Distributed runtime bfloat16 casting. Change: 133062008",master_session.cc,"@@ -948,7 +948,19 @@ Status MasterSession::DoRunWithLocalExecution(CallOptions* opts, } }; popts.control_flow_added = false; - // TODO(mrry): Enable DT_BFLOAT16 casting. + const bool enable_bfloat16_sendrecv = + session_opts_.config.graph_options().enable_bfloat16_sendrecv(); + popts.should_cast = [enable_bfloat16_sendrecv](const Edge* e) { + if (e->IsControlEdge()) { + return DT_FLOAT; + } + DataType dtype = BaseType(e->src()->output_type(e->src_output())); + if (enable_bfloat16_sendrecv && dtype == DT_FLOAT) { + return DT_BFLOAT16; + } else { + return dtype; + } + }; // TODO(mrry): Enable recv scheduling. TF_RETURN_IF_ERROR(rcg->RegisterPartitions(env_, popts, func_def_lib_)); ",0,train cf5ebf814eb9414c40d8c5323c322d498c7f0eed,tensorflow/tensorflow,"Adds BatchMatMul's c++ grad. Change: 121382365",math_grad.cc,"@@ -511,9 +511,12 @@ Status MinGrad(const AttrSlice& attrs, FunctionDef* g) { } REGISTER_OP_GRADIENT(""Min"", MinGrad); -static Status MatMulGradHelper(FunctionDef* g, const string& x0, bool tx0, - const string& x1, bool tx1, const string& y0, - bool ty0, const string& y1, bool ty1) { +static Status MatMulGradHelper(FunctionDef* g, const string& opname, + const string& attr_adj_x, + const string& attr_adj_y, const string& x0, + bool ax0, const string& x1, bool ax1, + const string& y0, bool ay0, const string& y1, + bool ay1) { *g = FDH::Define( // Arg defs {""x: T"", ""y: T"", ""dz: T""}, @@ -524,18 +527,20 @@ static Status MatMulGradHelper(FunctionDef* g, const string& x0, bool tx0, // Nodes { {{""dx""}, - ""MatMul"", + opname, {x0, x1}, - {{""T"", ""$T""}, {""transpose_a"", tx0}, {""transpose_b"", tx1}}}, + {{""T"", ""$T""}, {attr_adj_x, ax0}, {attr_adj_y, ax1}}}, {{""dy""}, - ""MatMul"", + opname, {y0, y1}, - {{""T"", ""$T""}, {""transpose_a"", ty0}, {""transpose_b"", ty1}}}, + {{""T"", ""$T""}, {attr_adj_x, ay0}, {attr_adj_y, ay1}}}, }); return Status::OK(); } -Status MatMulGrad(const AttrSlice& attrs, FunctionDef* g) { +Status MatMulGradCommon(const string& opname, const string& attr_adj_x, + const string& attr_adj_y, const AttrSlice& attrs, + FunctionDef* g) { DataType T; TF_RETURN_IF_ERROR(GetNodeAttr(attrs, ""T"", &T)); if (T == DT_COMPLEX64) { @@ -544,24 +549,36 @@ Status MatMulGrad(const AttrSlice& attrs, FunctionDef* g) { } bool ta; bool tb; - TF_RETURN_IF_ERROR(GetNodeAttr(attrs, ""transpose_a"", &ta)); - TF_RETURN_IF_ERROR(GetNodeAttr(attrs, ""transpose_b"", &tb)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, attr_adj_x, &ta)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, attr_adj_y, &tb)); if (!ta && !tb) { - return MatMulGradHelper(g, ""dz"", false, ""y"", true, ""x"", true, ""dz"", false); + return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, ""dz"", false, ""y"", + true, ""x"", true, ""dz"", false); } if (!ta && tb) { - return MatMulGradHelper(g, ""dz"", false, ""y"", false, ""dz"", true, ""x"", false); + return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, ""dz"", false, ""y"", + false, ""dz"", true, ""x"", false); } if (ta && !tb) { - return MatMulGradHelper(g, ""y"", false, ""dz"", true, ""x"", false, ""dz"", false); + return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, ""y"", false, ""dz"", + true, ""x"", false, ""dz"", false); } CHECK(ta && tb); - return MatMulGradHelper(g, ""y"", true, ""dz"", true, ""dz"", true, ""x"", true); + return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, ""y"", true, ""dz"", + true, ""dz"", true, ""x"", true); +} + +Status MatMulGrad(const AttrSlice& attrs, FunctionDef* g) { + return MatMulGradCommon(""MatMul"", ""transpose_a"", ""transpose_b"", attrs, g); } REGISTER_OP_GRADIENT(""MatMul"", MatMulGrad); +Status BatchMatMulGrad(const AttrSlice& attrs, FunctionDef* g) { + return MatMulGradCommon(""BatchMatMul"", ""adj_x"", ""adj_y"", attrs, g); +} +REGISTER_OP_GRADIENT(""BatchMatMul"", BatchMatMulGrad); + // REGISTER_OP_GRADIENT(""SparseMatMul"", SparseMatMulGrad); -// REGISTER_OP_GRADIENT(""BatchMatMul"", BatchMatMulGrad); // Comparison ops. REGISTER_OP_NO_GRADIENT(""Less""); ",0,train cf5ebf814eb9414c40d8c5323c322d498c7f0eed,tensorflow/tensorflow,"Adds BatchMatMul's c++ grad. Change: 121382365",math_grad_test.cc,"@@ -209,14 +209,16 @@ class MathGradTest : public ::testing::Test { *di = outputs[1]; } - Tensor MatMul(const Tensor& x, bool tx, const Tensor& y, bool ty) { + Tensor MatMulCommon(const string& opname, const string& attr_adj_x, + const string& attr_adj_y, const Tensor& x, bool ax, + const Tensor& y, bool ay) { auto T = x.dtype(); auto gdef = test::function::GDef( { f::NDef(""x"", ""Placeholder"", {}, {{""dtype"", T}}), f::NDef(""y"", ""Placeholder"", {}, {{""dtype"", T}}), - f::NDef(""z"", ""MatMul"", {""x"", ""y""}, - {{""T"", T}, {""transpose_a"", tx}, {""transpose_b"", ty}}), + f::NDef(""z"", opname, {""x"", ""y""}, + {{""T"", T}, {attr_adj_x, ax}, {attr_adj_y, ay}}), }, {}); auto sess = NewSession(); @@ -229,8 +231,17 @@ class MathGradTest : public ::testing::Test { return outputs[0]; } - void MatMulGrad(const Tensor& x, bool tx, const Tensor& y, bool ty, - Tensor* dx, Tensor* dy) { + Tensor MatMul(const Tensor& x, bool ax, const Tensor& y, bool ay) { + return MatMulCommon(""MatMul"", ""transpose_a"", ""transpose_b"", x, ax, y, ay); + } + + Tensor BatchMatMul(const Tensor& x, bool ax, const Tensor& y, bool ay) { + return MatMulCommon(""BatchMatMul"", ""adj_x"", ""adj_y"", x, ax, y, ay); + } + + void MatMulGradCommon(const string& opname, const string& attr_adj_x, + const string& attr_adj_y, const Tensor& x, bool ax, + const Tensor& y, bool ay, Tensor* dx, Tensor* dy) { const DataType T = x.dtype(); auto adef = [T](const string& name) { // E.g., x:float, dy:double return strings::StrCat(name, "":"", DataTypeString(T)); @@ -240,9 +251,9 @@ class MathGradTest : public ::testing::Test { FDH::Define(""Test"", {adef(""x""), adef(""y"")}, {adef(""l"")}, {}, { {{""z""}, - ""MatMul"", + opname, {""x"", ""y""}, - {{""T"", T}, {""transpose_a"", tx}, {""transpose_b"", ty}}}, + {{""T"", T}, {attr_adj_x, ax}, {attr_adj_y, ay}}}, FDH::Const(""zero"", 0), FDH::Const(""one"", 1), {{""r""}, ""Rank"", {""z""}, {{""T"", T}}}, @@ -289,6 +300,18 @@ class MathGradTest : public ::testing::Test { *dy = outputs[1]; } + void MatMulGrad(const Tensor& x, bool ax, const Tensor& y, bool ay, + Tensor* dx, Tensor* dy) { + return MatMulGradCommon(""MatMul"", ""transpose_a"", ""transpose_b"", x, ax, y, + ay, dx, dy); + } + + void BatchMatMulGrad(const Tensor& x, bool ax, const Tensor& y, bool ay, + Tensor* dx, Tensor* dy) { + return MatMulGradCommon(""BatchMatMul"", ""adj_x"", ""adj_y"", x, ax, y, ay, dx, + dy); + } + void SelectGrad(const Tensor& c, const Tensor& x, const Tensor& y, Tensor* dc, Tensor* dx, Tensor* dy) { auto T = DT_FLOAT; @@ -829,6 +852,54 @@ TEST_F(MathGradTest, MatMul_11) { test::ExpectClose(dy, MatMul(dz, true, x, true)); } +TEST_F(MathGradTest, BatchMatMul_00) { + auto x = test::AsTensor({1.f, 2.f, 3.f, 4.f, 5.f, 6.f}, + TensorShape({1, 2, 3})); + auto y = test::AsTensor({-1.f, .5f, 2.f}, TensorShape({1, 3, 1})); + Tensor dx; + Tensor dy; + BatchMatMulGrad(x, false, y, false, &dx, &dy); + auto dz = test::AsTensor({1.f, 1.f}, TensorShape({1, 2, 1})); + test::ExpectClose(dx, BatchMatMul(dz, false, y, true)); + test::ExpectClose(dy, BatchMatMul(x, true, dz, false)); +} + +TEST_F(MathGradTest, BatchMatMul_01) { + auto x = test::AsTensor({1.f, 2.f, 3.f, 4.f, 5.f, 6.f}, + TensorShape({1, 2, 3})); + auto y = test::AsTensor({-1.f, .5f, 2.f}, TensorShape({1, 1, 3})); + Tensor dx; + Tensor dy; + BatchMatMulGrad(x, false, y, true, &dx, &dy); + auto dz = test::AsTensor({1.f, 1.f}, TensorShape({1, 2, 1})); + test::ExpectClose(dx, BatchMatMul(dz, false, y, false)); + test::ExpectClose(dy, BatchMatMul(dz, true, x, false)); +} + +TEST_F(MathGradTest, BatchMatMul_10) { + auto x = test::AsTensor({1.f, 2.f, 3.f, 4.f, 5.f, 6.f}, + TensorShape({1, 3, 2})); + auto y = test::AsTensor({-1.f, .5f, 2.f}, TensorShape({1, 3, 1})); + Tensor dx; + Tensor dy; + BatchMatMulGrad(x, true, y, false, &dx, &dy); + auto dz = test::AsTensor({1.f, 1.f}, TensorShape({1, 2, 1})); + test::ExpectClose(dx, BatchMatMul(y, false, dz, true)); + test::ExpectClose(dy, BatchMatMul(x, false, dz, false)); +} + +TEST_F(MathGradTest, BatchMatMul_11) { + auto x = test::AsTensor({1.f, 2.f, 3.f, 4.f, 5.f, 6.f}, + TensorShape({1, 3, 2})); + auto y = test::AsTensor({-1.f, .5f, 2.f}, TensorShape({1, 1, 3})); + Tensor dx; + Tensor dy; + BatchMatMulGrad(x, true, y, true, &dx, &dy); + auto dz = test::AsTensor({1.f, 1.f}, TensorShape({1, 2, 1})); + test::ExpectClose(dx, BatchMatMul(y, true, dz, true)); + test::ExpectClose(dy, BatchMatMul(dz, true, x, true)); +} + TEST_F(MathGradTest, Sum_dim0) { auto x = test::AsTensor({-3.f, -2.f, -1.f, 1.f, 2.f, 3.f}, TensorShape({2, 3})); ",0,train 006d228201a1e9e140aa0651a59c51d3396a2d12,tensorflow/tensorflow,"Fixed the typo in RunConfig pydoc. PiperOrigin-RevId: 187498424",run_config.py,"@@ -345,7 +345,7 @@ class RunConfig(object): os.environ['TF_CONFIG'] = json.dumps( {'cluster': cluster, 'task': {'type': 'worker', 'index': 1}}) - config = ClusterConfig() + config = RunConfig() assert config.master == 'host4:2222' assert config.task_id == 1 assert config.num_ps_replicas == 2 @@ -363,7 +363,7 @@ class RunConfig(object): os.environ['TF_CONFIG'] = json.dumps( {'cluster': cluster, 'task': {'type': 'chief', 'index': 0}}) - config = ClusterConfig() + config = RunConfig() assert config.master == 'host0:2222' assert config.task_id == 0 assert config.num_ps_replicas == 2 @@ -381,7 +381,7 @@ class RunConfig(object): os.environ['TF_CONFIG'] = json.dumps( {'cluster': cluster, 'task': {'type': 'evaluator', 'index': 0}}) - config = ClusterConfig() + config = RunConfig() assert config.master == '' assert config.evaluator_master == '' assert config.task_id == 0 ",0,train 58986fcacaa10f039e5518a9b29a3d9dd51a6a41,tensorflow/tensorflow,"Fixes a race condition in device_set. The mutable device vectors recently introduced were updated in unguarded functions, generating race conditions. PiperOrigin-RevId: 293716863 Change-Id: I28da290862e3e51a8558bacab7a8fc5c2d4a2173",device_set.cc,"@@ -32,6 +32,7 @@ DeviceSet::DeviceSet() {} DeviceSet::~DeviceSet() {} void DeviceSet::AddDevice(Device* device) { + mutex_lock l(devices_mu_); devices_.push_back(device); prioritized_devices_.clear(); prioritized_device_types_.clear(); @@ -104,21 +105,6 @@ void DeviceSet::SortPrioritizedDeviceTypeVector( std::sort(vector->begin(), vector->end(), device_sort); } -const PrioritizedDeviceTypeVector& DeviceSet::prioritized_device_types() const { - if (prioritized_device_types_.size() == devices_.size()) { - return prioritized_device_types_; - } - - std::set seen; - for (const std::pair& p : prioritized_devices()) { - DeviceType t(p.first->device_type()); - if (seen.insert(t).second) { - prioritized_device_types_.emplace_back(t, p.second); - } - } - return prioritized_device_types_; -} - void DeviceSet::SortPrioritizedDeviceVector(PrioritizedDeviceVector* vector) { auto device_sort = [](const std::pair& a, const std::pair& b) { @@ -140,19 +126,46 @@ void DeviceSet::SortPrioritizedDeviceVector(PrioritizedDeviceVector* vector) { std::sort(vector->begin(), vector->end(), device_sort); } -const PrioritizedDeviceVector& DeviceSet::prioritized_devices() const { - if (prioritized_devices_.size() == devices_.size()) { - return prioritized_devices_; +namespace { + +void UpdatePrioritizedVectors( + const std::vector& devices, + PrioritizedDeviceVector* prioritized_devices, + PrioritizedDeviceTypeVector* prioritized_device_types) { + if (prioritized_devices->size() != devices.size()) { + for (Device* d : devices) { + prioritized_devices->emplace_back( + d, DeviceSet::DeviceTypeOrder(DeviceType(d->device_type()))); + } + DeviceSet::SortPrioritizedDeviceVector(prioritized_devices); } - for (Device* d : devices_) { - prioritized_devices_.emplace_back( - d, DeviceSet::DeviceTypeOrder(DeviceType(d->device_type()))); + if (prioritized_device_types != nullptr && + prioritized_device_types->size() != devices.size()) { + std::set seen; + for (const std::pair& p : *prioritized_devices) { + DeviceType t(p.first->device_type()); + if (seen.insert(t).second) { + prioritized_device_types->emplace_back(t, p.second); + } + } } +} - DeviceSet::SortPrioritizedDeviceVector(&prioritized_devices_); +} // namespace +const PrioritizedDeviceVector& DeviceSet::prioritized_devices() const { + mutex_lock l(devices_mu_); + UpdatePrioritizedVectors(devices_, &prioritized_devices_, + /* prioritized_device_types */ nullptr); return prioritized_devices_; } +const PrioritizedDeviceTypeVector& DeviceSet::prioritized_device_types() const { + mutex_lock l(devices_mu_); + UpdatePrioritizedVectors(devices_, &prioritized_devices_, + &prioritized_device_types_); + return prioritized_device_types_; +} + } // namespace tensorflow ",0,train 58986fcacaa10f039e5518a9b29a3d9dd51a6a41,tensorflow/tensorflow,"Fixes a race condition in device_set. The mutable device vectors recently introduced were updated in unguarded functions, generating race conditions. PiperOrigin-RevId: 293716863 Change-Id: I28da290862e3e51a8558bacab7a8fc5c2d4a2173",device_set.h,"@@ -38,7 +38,7 @@ class DeviceSet { ~DeviceSet(); // Does not take ownership of 'device'. - void AddDevice(Device* device); + void AddDevice(Device* device) LOCKS_EXCLUDED(devices_mu_); // Set the device designated as the ""client"". This device // must also be registered via AddDevice(). @@ -69,14 +69,16 @@ class DeviceSet { // Return the prioritized list of devices in this set. // Devices are prioritized first by `DeviceTypeOrder`, then by name. - const PrioritizedDeviceVector& prioritized_devices() const; + const PrioritizedDeviceVector& prioritized_devices() const + LOCKS_EXCLUDED(devices_mu_); // Return the prioritized list of unique device types in this set. // // The list will be ordered by decreasing priority. The priorities (the second // element in the list's `std::pair`) will be initialized // to the value of `DeviceTypeOrder` for the device types. - const PrioritizedDeviceTypeVector& prioritized_device_types() const; + const PrioritizedDeviceTypeVector& prioritized_device_types() const + LOCKS_EXCLUDED(devices_mu_); // An order to sort by device types according to system-determined // priority. @@ -103,16 +105,19 @@ class DeviceSet { PrioritizedDeviceTypeVector* vector); private: + mutable mutex devices_mu_; + // Not owned. std::vector devices_; // Cached prioritized vector, created on-the-fly when // prioritized_devices() is called. - mutable PrioritizedDeviceVector prioritized_devices_; + mutable PrioritizedDeviceVector prioritized_devices_ GUARDED_BY(devices_mu_); // Cached prioritized vector, created on-the-fly when // prioritized_device_types() is called. - mutable PrioritizedDeviceTypeVector prioritized_device_types_; + mutable PrioritizedDeviceTypeVector prioritized_device_types_ + GUARDED_BY(devices_mu_); // Fullname -> device* for device in devices_. std::unordered_map device_by_name_; ",0,train 792efc53cb091fcb5229202290cc51505bcb9634,tensorflow/tensorflow,"Fix FIFOQueue usage in tf.function Ensures that the queue resource is always created in the eager context and captured by any tf.functions, and that it is owned by the eager context regardless of where it was created. PiperOrigin-RevId: 285479831 Change-Id: Ife6d46aded4da09ed89306550f28fd71f5673966",fifo_queue_test.py,"@@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import gc import random import time @@ -34,9 +35,11 @@ from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util +from tensorflow.python.module import module from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import data_flow_ops +from tensorflow.python.ops import gen_resource_variable_ops from tensorflow.python.platform import test from tensorflow.python.util import compat @@ -137,6 +140,42 @@ class FIFOQueueTest(test.TestCase): self.assertAllEqual(self.evaluate(q2.dequeue()), 2) self.assertAllEqual(self.evaluate(q.dequeue()), 1) + def testQueueInFunction(self): + + class _M(module.Module): + + def __init__(self): + self.q1 = data_flow_ops.FIFOQueue(10, [dtypes_lib.int32], shapes=[()]) + self.q2 = None + + @def_function.function + def uses_queues(self, x): + if self.q2 is None: + self.q2 = data_flow_ops.FIFOQueue(10, [dtypes_lib.int32], shapes=[()]) + self.q2.enqueue(x) + self.q2.enqueue(x + 3) + self.q1.enqueue(self.q2.dequeue()) + + m = _M() + self.evaluate(m.uses_queues(constant_op.constant(2))) + self.assertAllEqual(2, self.evaluate(m.q1.dequeue())) + self.assertAllEqual(5, self.evaluate(m.q2.dequeue())) + if context.executing_eagerly(): + q1_handle = m.q1.queue_ref + q2_handle = m.q2.queue_ref + del m + gc.collect() + # If executing eagerly, deleting the Module should clean up the queue + # resources. + with self.assertRaisesRegexp(errors_impl.NotFoundError, + r""Resource .* does not exist.""): + gen_resource_variable_ops.destroy_resource_op( + q1_handle, ignore_lookup_error=False) + with self.assertRaisesRegexp(errors_impl.NotFoundError, + r""Resource .* does not exist.""): + gen_resource_variable_ops.destroy_resource_op( + q2_handle, ignore_lookup_error=False) + def testEnqueueDictWithoutNames(self): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) with self.assertRaisesRegexp(ValueError, ""must have names""): @@ -332,11 +371,11 @@ class FIFOQueueTest(test.TestCase): q.enqueue_many((7, [[1, 2], [3, 4], [5, 6]])) def testEnqueueManyEmptyTypeConversion(self): + q = data_flow_ops.FIFOQueue(10, (dtypes_lib.int32, dtypes_lib.float32), ( + (), ())) @def_function.function def _f(): - q = data_flow_ops.FIFOQueue(10, (dtypes_lib.int32, dtypes_lib.float32), ( - (), ())) enq = q.enqueue_many(([], [])) self.assertEqual(dtypes_lib.int32, enq.inputs[1].dtype) self.assertEqual(dtypes_lib.float32, enq.inputs[2].dtype) @@ -344,12 +383,11 @@ class FIFOQueueTest(test.TestCase): _f() def testEnqueueWrongType(self): + q = data_flow_ops.FIFOQueue(10, (dtypes_lib.int32, dtypes_lib.float32), ( + (), ())) @def_function.function def _f(): - q = data_flow_ops.FIFOQueue(10, (dtypes_lib.int32, dtypes_lib.float32), ( - (), ())) - with self.assertRaises(ValueError): q.enqueue((array_ops.placeholder(dtypes_lib.int32), array_ops.placeholder(dtypes_lib.int32))) ",0,train 792efc53cb091fcb5229202290cc51505bcb9634,tensorflow/tensorflow,"Fix FIFOQueue usage in tf.function Ensures that the queue resource is always created in the eager context and captured by any tf.functions, and that it is owned by the eager context regardless of where it was created. PiperOrigin-RevId: 285479831 Change-Id: Ife6d46aded4da09ed89306550f28fd71f5673966",data_flow_ops.py,"@@ -171,7 +171,7 @@ class QueueBase(object): else: self._names = None self._queue_ref = queue_ref - if context.executing_eagerly(): + if isinstance(queue_ref, ops.EagerTensor): if context.context().scope_name: self._name = context.context().scope_name else: @@ -754,12 +754,13 @@ class FIFOQueue(QueueBase): dtypes = _as_type_list(dtypes) shapes = _as_shape_list(shapes, dtypes) names = _as_name_list(names, dtypes) - queue_ref = gen_data_flow_ops.fifo_queue_v2( - component_types=dtypes, - shapes=shapes, - capacity=capacity, - shared_name=_shared_name(shared_name), - name=name) + with ops.init_scope(): + queue_ref = gen_data_flow_ops.fifo_queue_v2( + component_types=dtypes, + shapes=shapes, + capacity=capacity, + shared_name=_shared_name(shared_name), + name=name) super(FIFOQueue, self).__init__(dtypes, shapes, names, queue_ref) ",0,train d8e755a0da01068e60a797efaf76df71b65bacbb,tensorflow/tensorflow,"Replace mode string literals 'train', 'test', and 'predict' with ModeKeys constants. PiperOrigin-RevId: 226266973",callbacks.py,"@@ -43,6 +43,7 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.summary import summary as tf_summary from tensorflow.python.training import saver +from tensorflow.python.training.mode_keys import ModeKeys from tensorflow.python.util.tf_export import keras_export try: @@ -51,11 +52,6 @@ except ImportError: requests = None -_TRAIN = 'train' -_TEST = 'test' -_PREDICT = 'predict' - - # pylint: disable=protected-access def configure_callbacks(callbacks, model, @@ -66,7 +62,7 @@ def configure_callbacks(callbacks, samples=None, verbose=1, count_mode='steps', - mode=_TRAIN): + mode=ModeKeys.TRAIN): """"""Configures callbacks for use in various training loops. Arguments: @@ -79,8 +75,8 @@ def configure_callbacks(callbacks, samples: Number of training samples. verbose: int, 0 or 1. Keras logging verbosity to pass to ProgbarLogger. count_mode: One of 'steps' or 'samples'. Per-batch or per-sample count. - mode: String. One of 'train', 'test', or 'predict'. Which loop mode to - configure callbacks for. + mode: String. One of ModeKeys.TRAIN, ModeKeys.TEST, or ModeKeys.PREDICT. + Which loop mode to configure callbacks for. Returns: Instance of CallbackList used to control all Callbacks. @@ -93,7 +89,7 @@ def configure_callbacks(callbacks, callbacks = [] # Add additional callbacks during training. - if mode == _TRAIN: + if mode == ModeKeys.TRAIN: model.history = History() stateful_metric_names = None if hasattr(model, 'metrics_names'): @@ -113,7 +109,7 @@ def configure_callbacks(callbacks, callback_metrics = [] # When we have deferred build scenario with iterator input, we will compile # when we standardize first batch of data. - if mode != _PREDICT and hasattr(model, 'metrics_names'): + if mode != ModeKeys.PREDICT and hasattr(model, 'metrics_names'): callback_metrics = copy.copy(model.metrics_names) if do_validation: callback_metrics += ['val_' + n for n in model.metrics_names] @@ -148,7 +144,7 @@ def _is_generator_like(data): def make_logs(model, logs, outputs, mode, prefix=''): """"""Computes logs for sending to `on_batch_end` methods."""""" - if mode in {_TRAIN, _TEST}: + if mode in {ModeKeys.TRAIN, ModeKeys.TEST}: if hasattr(model, 'metrics_names'): for label, output in zip(model.metrics_names, outputs): logs[prefix + label] = output @@ -220,27 +216,27 @@ class CallbackList(object): def _call_begin_hook(self, mode): """"""Helper function for on_{train|test|predict}_begin methods."""""" - if mode == _TRAIN: + if mode == ModeKeys.TRAIN: self.on_train_begin() - elif mode == _TEST: + elif mode == ModeKeys.TEST: self.on_test_begin() else: self.on_predict_begin() def _call_end_hook(self, mode): """"""Helper function for on_{train|test|predict}_end methods."""""" - if mode == _TRAIN: + if mode == ModeKeys.TRAIN: self.on_train_end() - elif mode == _TEST: + elif mode == ModeKeys.TEST: self.on_test_end() else: self.on_predict_end() def on_batch_begin(self, batch, logs=None): - self._call_batch_hook(_TRAIN, 'begin', batch, logs=logs) + self._call_batch_hook(ModeKeys.TRAIN, 'begin', batch, logs=logs) def on_batch_end(self, batch, logs=None): - self._call_batch_hook(_TRAIN, 'end', batch, logs=logs) + self._call_batch_hook(ModeKeys.TRAIN, 'end', batch, logs=logs) def on_epoch_begin(self, epoch, logs=None): """"""Calls the `on_epoch_begin` methods of its callbacks. @@ -280,7 +276,7 @@ class CallbackList(object): logs: dict. Has keys `batch` and `size` representing the current batch number and the size of the batch. """""" - self._call_batch_hook(_TRAIN, 'begin', batch, logs=logs) + self._call_batch_hook(ModeKeys.TRAIN, 'begin', batch, logs=logs) def on_train_batch_end(self, batch, logs=None): """"""Calls the `on_train_batch_end` methods of its callbacks. @@ -289,7 +285,7 @@ class CallbackList(object): batch: integer, index of batch within the current epoch. logs: dict. Metric results for this batch. """""" - self._call_batch_hook(_TRAIN, 'end', batch, logs=logs) + self._call_batch_hook(ModeKeys.TRAIN, 'end', batch, logs=logs) def on_test_batch_begin(self, batch, logs=None): """"""Calls the `on_test_batch_begin` methods of its callbacks. @@ -299,7 +295,7 @@ class CallbackList(object): logs: dict. Has keys `batch` and `size` representing the current batch number and the size of the batch. """""" - self._call_batch_hook(_TEST, 'begin', batch, logs=logs) + self._call_batch_hook(ModeKeys.TEST, 'begin', batch, logs=logs) def on_test_batch_end(self, batch, logs=None): """"""Calls the `on_test_batch_end` methods of its callbacks. @@ -308,7 +304,7 @@ class CallbackList(object): batch: integer, index of batch within the current epoch. logs: dict. Metric results for this batch. """""" - self._call_batch_hook(_TEST, 'end', batch, logs=logs) + self._call_batch_hook(ModeKeys.TEST, 'end', batch, logs=logs) def on_predict_batch_begin(self, batch, logs=None): """"""Calls the `on_predict_batch_begin` methods of its callbacks. @@ -318,7 +314,7 @@ class CallbackList(object): logs: dict. Has keys `batch` and `size` representing the current batch number and the size of the batch. """""" - self._call_batch_hook(_PREDICT, 'begin', batch, logs=logs) + self._call_batch_hook(ModeKeys.PREDICT, 'begin', batch, logs=logs) def on_predict_batch_end(self, batch, logs=None): """"""Calls the `on_predict_batch_end` methods of its callbacks. @@ -327,7 +323,7 @@ class CallbackList(object): batch: integer, index of batch within the current epoch. logs: dict. Metric results for this batch. """""" - self._call_batch_hook(_PREDICT, 'end', batch, logs=logs) + self._call_batch_hook(ModeKeys.PREDICT, 'end', batch, logs=logs) def on_train_begin(self, logs=None): """"""Calls the `on_train_begin` methods of its callbacks. ",0,train d8e755a0da01068e60a797efaf76df71b65bacbb,tensorflow/tensorflow,"Replace mode string literals 'train', 'test', and 'predict' with ModeKeys constants. PiperOrigin-RevId: 226266973",training.py,"@@ -2057,7 +2057,7 @@ class Model(Network): # Gets network outputs. Does not update weights. # Does update the network states. kwargs = getattr(self, '_function_kwargs', {}) - with K.name_scope('predict'): + with K.name_scope(ModeKeys.PREDICT): self.predict_function = K.function( inputs, self.outputs, ",0,train d8e755a0da01068e60a797efaf76df71b65bacbb,tensorflow/tensorflow,"Replace mode string literals 'train', 'test', and 'predict' with ModeKeys constants. PiperOrigin-RevId: 226266973",training_arrays.py,"@@ -41,7 +41,7 @@ except ImportError: def _get_model_feed(model, mode): - if mode == 'predict': + if mode == ModeKeys.PREDICT: feed = model._feed_inputs else: feed = ( @@ -85,7 +85,7 @@ def _prepare_feed_values(model, inputs, targets, sample_weights, mode): inputs: List or dict of model inputs. targets: Optional list of model targets. sample_weights: Optional list of sample weight arrays. - mode: One of 'train'/'test'/'predict'. + mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. Returns: Feed values for the model in the given mode. @@ -111,7 +111,8 @@ def _prepare_feed_values(model, inputs, targets, sample_weights, mode): targets = targets or [] sample_weights = sample_weights or [] ins = inputs + targets + sample_weights - if mode == 'train' and not isinstance(K.symbolic_learning_phase(), int): + if mode == ModeKeys.TRAIN and not isinstance(K.symbolic_learning_phase(), + int): ins += [True] return ins @@ -138,10 +139,10 @@ def model_iteration(model, initial_epoch=0, steps_per_epoch=None, validation_steps=None, - mode='train', + mode=ModeKeys.TRAIN, validation_in_fit=False, **kwargs): - """"""Loop function for arrays of data with modes 'train'/'test'/'predict'. + """"""Loop function for arrays of data with modes TRAIN/TEST/PREDICT. Arguments: model: Keras Model instance. @@ -165,7 +166,7 @@ def model_iteration(model, the default value of `None`. validation_steps: Number of steps to run validation for (only if doing validation from data tensors). Ignored with the default value of `None`. - mode: One of 'train'/'test'/'predict'. + mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. validation_in_fit: DEPRECATED: if true, then this method is invoked from within training iteration (for validation). In this case, do not copy weights when using a tf.distribute.Strategy. The input is deprecated as @@ -174,9 +175,9 @@ def model_iteration(model, **kwargs: Additional arguments for backwards compatibility. Returns: - - In 'train' mode: `History` object. - - In 'test' mode: Evaluation metrics. - - In 'predict' mode: Outputs of the Model called on inputs. + - In TRAIN mode: `History` object. + - In TEST mode: Evaluation metrics. + - In PREDICT mode: Outputs of the Model called on inputs. Raises: ValueError: in case of invalid arguments. @@ -186,7 +187,7 @@ def model_iteration(model, steps_per_epoch = kwargs['steps'] _validate_arguments(steps_per_epoch, validation_steps, kwargs) - if mode == 'train': + if mode == ModeKeys.TRAIN: _print_train_info(inputs, val_inputs, steps_per_epoch, verbose) # Enter DistributionStrategy scope. @@ -230,7 +231,7 @@ def model_iteration(model, indices_for_conversion_to_dense.append(i) # Select aggregation method. - if mode == 'predict': + if mode == ModeKeys.PREDICT: aggregator = training_utils.OutputsAggregator(use_steps, num_samples_or_steps) else: @@ -364,14 +365,14 @@ def model_iteration(model, steps_per_epoch=validation_steps, callbacks=callbacks, verbose=0, - mode='test', + mode=ModeKeys.TEST, validation_in_fit=True) if not isinstance(val_results, list): val_results = [val_results] epoch_logs = cbks.make_logs( model, epoch_logs, val_results, mode, prefix='val_') - if mode == 'train': + if mode == ModeKeys.TRAIN: # Epochs only apply to `fit`. callbacks.on_epoch_end(epoch, epoch_logs) progbar.on_epoch_end(epoch, epoch_logs) @@ -385,12 +386,14 @@ def model_iteration(model, model, model._distributed_model, mode) scope.__exit__(None, None, None) - if mode == 'train': + if mode == ModeKeys.TRAIN: return model.history return results # For backwards compatibility for internal users of these loops. -fit_loop = functools.partial(model_iteration, mode='train') -test_loop = functools.partial(model_iteration, mode='test', shuffle=False) -predict_loop = functools.partial(model_iteration, mode='predict', shuffle=False) +fit_loop = functools.partial(model_iteration, mode=ModeKeys.TRAIN) +test_loop = functools.partial( + model_iteration, mode=ModeKeys.TEST, shuffle=False) +predict_loop = functools.partial( + model_iteration, mode=ModeKeys.PREDICT, shuffle=False) ",0,train d8e755a0da01068e60a797efaf76df71b65bacbb,tensorflow/tensorflow,"Replace mode string literals 'train', 'test', and 'predict' with ModeKeys constants. PiperOrigin-RevId: 226266973",training_distributed.py,"@@ -19,7 +19,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import enum # pylint: disable=g-bad-import-order import numpy as np from tensorflow.python.distribute import distribute_lib @@ -38,13 +37,10 @@ from tensorflow.python.keras.utils.generic_utils import Progbar from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training.mode_keys import ModeKeys from tensorflow.python.util import nest -class _Mode(enum.Enum): - TRAIN = 'train' - TEST = 'test' - PREDICT = 'predict' # TODO(priyag, sourabhbajaj): Refactor this file to address code duplication. @@ -100,10 +96,10 @@ def experimental_fit_loop(model, if model._compile_distribution: clone_model_on_replicas(model, current_strategy, make_callback_model=True, inputs=inputs, - targets=targets, mode=_Mode.TRAIN) + targets=targets, mode=ModeKeys.TRAIN) else: _build_distributed_network(model, current_strategy, inputs, - targets, mode=_Mode.TRAIN) + targets, mode=ModeKeys.TRAIN) (grouped_inputs, grouped_outputs, grouped_updates, grouped_session_args) = current_strategy.extended.call_for_each_replica( @@ -215,7 +211,7 @@ def experimental_fit_loop(model, # the weights back to the original model before we can run validation. with current_strategy.scope(): _copy_weights_to_original_model( - model, model._distributed_model_train, 'train') + model, model._distributed_model_train, ModeKeys.TRAIN) val_outs = experimental_test_loop( # pylint: disable=undefined-variable model, @@ -237,7 +233,7 @@ def experimental_fit_loop(model, # Copy the weights back from the replicated model to the original model. with current_strategy.scope(): _copy_weights_to_original_model(model, model._distributed_model_train, - 'train') + ModeKeys.TRAIN) scope.__exit__(None, None, None) return model.history @@ -281,10 +277,10 @@ def experimental_test_loop(model, if model._compile_distribution: clone_model_on_replicas(model, current_strategy, make_callback_model=False, inputs=inputs, - targets=targets, mode=_Mode.TEST) + targets=targets, mode=ModeKeys.TEST) else: _build_distributed_network(model, current_strategy, inputs, - targets, mode=_Mode.TEST) + targets, mode=ModeKeys.TEST) (grouped_inputs, grouped_outputs, grouped_updates, grouped_session_args) = current_strategy.extended.call_for_each_replica( @@ -397,10 +393,10 @@ def experimental_predict_loop(model, iterator, verbose=0, steps=None): if model._compile_distribution: clone_model_on_replicas(model, current_strategy, make_callback_model=False, inputs=inputs, - mode=_Mode.PREDICT) + mode=ModeKeys.PREDICT) else: _build_distributed_network(model, current_strategy, inputs, - mode=_Mode.PREDICT) + mode=ModeKeys.PREDICT) (grouped_inputs, grouped_outputs, grouped_updates, grouped_session_args) = current_strategy.extended.call_for_each_replica( @@ -535,7 +531,7 @@ def _build_network_on_replica(model, inputs=None, targets=None, mode=None): if isinstance(targets, tuple): targets = nest.flatten(targets) - if mode == _Mode.PREDICT: + if mode == ModeKeys.PREDICT: _custom_compile_for_predict(updated_model) else: updated_model.compile( @@ -557,11 +553,11 @@ def _build_distributed_network(model, strategy, inputs=None, targets=None, distributed_model = strategy.extended.call_for_each_replica( _build_network_on_replica, args=(model, inputs, targets, mode)) - if mode is _Mode.TRAIN: + if mode is ModeKeys.TRAIN: model._distributed_model_train = distributed_model - elif mode is _Mode.TEST: + elif mode is ModeKeys.TEST: model._distributed_model_test = distributed_model - elif mode is _Mode.PREDICT: + elif mode is ModeKeys.PREDICT: model._distributed_model_predict = distributed_model else: model._distributed_model = distributed_model @@ -594,7 +590,7 @@ def _clone_and_build_model(model, inputs=None, targets=None, mode=None): if isinstance(targets, tuple): targets = nest.flatten(targets) - if mode == _Mode.PREDICT: + if mode == ModeKeys.PREDICT: _custom_compile_for_predict(cloned_model) else: cloned_model.compile( @@ -615,11 +611,11 @@ def clone_model_on_replicas(model, strategy, make_callback_model=False, with K.get_graph().as_default(), strategy.scope(): distributed_model = strategy.extended.call_for_each_replica( _clone_and_build_model, args=(model, inputs, targets, mode)) - if mode is _Mode.TRAIN: + if mode is ModeKeys.TRAIN: model._distributed_model_train = distributed_model - elif mode is _Mode.TEST: + elif mode is ModeKeys.TEST: model._distributed_model_test = distributed_model - elif mode is _Mode.PREDICT: + elif mode is ModeKeys.PREDICT: model._distributed_model_predict = distributed_model else: model._distributed_model = distributed_model @@ -659,7 +655,7 @@ def _make_execution_function(model, mode): if not model._distributed_model: if model._compile_distribution: clone_model_on_replicas( - model, strategy, make_callback_model=(mode == 'train')) + model, strategy, make_callback_model=(mode == ModeKeys.TRAIN)) else: _build_distributed_network(model, strategy) @@ -674,7 +670,7 @@ def _make_execution_function(model, mode): grouped_session_args) = strategy.extended.call_for_each_replica( _per_device_function, args=(model._distributed_model,)) - if mode == 'train': + if mode == ModeKeys.TRAIN: # Initialize the variables in the replicated model. This is necessary for # multi-worker training because on some workers, initialization is not # needed. This method does initialization or waiting for initialization @@ -692,7 +688,7 @@ def _make_execution_function(model, mode): grouped_outputs, grouped_updates, grouped_session_args, - with_loss_tensor=(mode != 'predict')) + with_loss_tensor=(mode != ModeKeys.PREDICT)) return K.function( all_inputs, @@ -708,7 +704,7 @@ def _make_eager_execution_function(model, mode): if not model._distributed_model: if model._compile_distribution: clone_model_on_replicas( - model, strategy, make_callback_model=(mode == 'train')) + model, strategy, make_callback_model=(mode == ModeKeys.TRAIN)) else: _build_distributed_network(model, strategy) @@ -732,7 +728,7 @@ def _make_eager_execution_function(model, mode): strategy, grouped_inputs, grouped_outputs, - with_loss_tensor=(mode != 'predict')) + with_loss_tensor=(mode != ModeKeys.PREDICT)) return K.function( all_inputs, @@ -748,7 +744,7 @@ def _prepare_feed_values(model, inputs, targets, sample_weights, mode): inputs: List or dict of model inputs. targets: Optional list of model targets. sample_weights: Optional list of sample weight arrays. - mode: One of 'train'/'test'/'predict'. + mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. Returns: Feed values for the model in the given mode. @@ -758,7 +754,7 @@ def _prepare_feed_values(model, inputs, targets, sample_weights, mode): inputs = distributed_training_utils.flatten_perdevice_values(strategy, inputs) targets = distributed_training_utils.flatten_perdevice_values( strategy, targets) - if mode == 'predict': + if mode == ModeKeys.PREDICT: sample_weights = [] targets = [] else: @@ -766,7 +762,8 @@ def _prepare_feed_values(model, inputs, targets, sample_weights, mode): None for _ in range(len(model.outputs) * strategy.num_replicas_in_sync) ] ins = inputs + targets + sample_weights - if mode == 'train' and not isinstance(K.symbolic_learning_phase(), int): + if mode == ModeKeys.TRAIN and not isinstance(K.symbolic_learning_phase(), + int): ins += [True] return ins @@ -785,7 +782,7 @@ def _copy_weights_to_distributed_model(original_model, grouped_model): def _copy_weights_to_original_model(model, grouped_model, mode): """"""Copies weights from first distributed model back to original model."""""" - if model._distribution_strategy and mode == 'train': + if model._distribution_strategy and mode == ModeKeys.TRAIN: updated_weights = model._distribution_strategy.unwrap( grouped_model)[0].get_weights() model.set_weights(updated_weights) @@ -793,7 +790,7 @@ def _copy_weights_to_original_model(model, grouped_model, mode): def _per_device_aggregate_batch(batch_outs, model, mode): """"""Aggregates the per-device batch-level outputs from a distributed step."""""" - if model._distribution_strategy is not None and mode == 'predict': + if model._distribution_strategy is not None and mode == ModeKeys.PREDICT: total_batch_outs = [] for i in range(len(model.outputs)): num_replicas = model._distribution_strategy.num_replicas_in_sync ",0,train d8e755a0da01068e60a797efaf76df71b65bacbb,tensorflow/tensorflow,"Replace mode string literals 'train', 'test', and 'predict' with ModeKeys constants. PiperOrigin-RevId: 226266973",training_generator.py,"@@ -52,10 +52,10 @@ def model_iteration(model, use_multiprocessing=False, shuffle=False, initial_epoch=0, - mode='train', + mode=ModeKeys.TRAIN, batch_size=None, **kwargs): - """"""Loop function for arrays of data with modes 'train'/'test'/'predict'. + """"""Loop function for arrays of data with modes TRAIN/TEST/PREDICT. Arguments: model: Keras Model instance. @@ -90,16 +90,16 @@ def model_iteration(model, `None`. initial_epoch: Epoch at which to start training (useful for resuming a previous training run). - mode: One of 'train'/'test'/'predict'. + mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. batch_size: Integer batch size or None if unknown. Will only be used if `data` is in NumPy/Tensor format. **kwargs: Additional arguments for backwards compatibility. `steps` is accepted as an alias for `steps_per_epoch`. Returns: - - In 'train' mode: `History` object. - - In 'test' mode: Evaluation metrics. - - In 'predict' mode: Outputs of the Model called on inputs. + - In TRAIN mode: `History` object. + - In TEST mode: Evaluation metrics. + - In PREDICT mode: Outputs of the Model called on inputs. Raises: ValueError: in case of invalid arguments. @@ -152,14 +152,14 @@ def model_iteration(model, progbar.params = callbacks.params progbar.params['verbose'] = verbose - if mode == 'predict': + if mode == ModeKeys.PREDICT: aggregator = training_utils.OutputsAggregator(True, steps_per_epoch) else: aggregator = training_utils.MetricsAggregator(True, steps_per_epoch) if should_set_learning_phase: old_learning_phase = backend.learning_phase() - backend.set_learning_phase(1 if mode == 'train' else 0) + backend.set_learning_phase(1 if mode == ModeKeys.TRAIN else 0) callbacks.model.stop_training = False callbacks._call_begin_hook(mode) @@ -226,14 +226,14 @@ def model_iteration(model, max_queue_size=max_queue_size, callbacks=callbacks, verbose=0, - mode='test') + mode=ModeKeys.TEST) if not isinstance(val_results, list): val_results = [val_results] epoch_logs = cbks.make_logs( model, epoch_logs, val_results, mode, prefix='val_') - if mode == 'train': + if mode == ModeKeys.TRAIN: # Epochs only apply to `fit`. callbacks.on_epoch_end(epoch, epoch_logs) progbar.on_epoch_end(epoch, epoch_logs) @@ -246,17 +246,17 @@ def model_iteration(model, if should_set_learning_phase: backend.set_learning_phase(old_learning_phase) - if mode == 'train': + if mode == ModeKeys.TRAIN: return model.history return results # Maintain compatibility with the existing names. -fit_generator = functools.partial(model_iteration, mode='train') +fit_generator = functools.partial(model_iteration, mode=ModeKeys.TRAIN) evaluate_generator = functools.partial( - model_iteration, mode='test', shuffle=False) + model_iteration, mode=ModeKeys.TEST, shuffle=False) predict_generator = functools.partial( - model_iteration, mode='predict', shuffle=False) + model_iteration, mode=ModeKeys.PREDICT, shuffle=False) def _get_next_batch(output_generator, mode): @@ -268,7 +268,7 @@ def _get_next_batch(output_generator, mode): logging.warning('Your dataset iterator ran out of data.') return None if not isinstance(generator_output, tuple): - if mode == 'predict': + if mode == ModeKeys.PREDICT: # Always wrap in a tuple. return (generator_output,) else: @@ -307,7 +307,7 @@ def _validate_arguments(is_sequence, use_multiprocessing, workers, `keras.utils.data_utils.Sequence` object or Eager Iterator or Dataset. validation_steps: Total number of steps (batches of samples) before declaring validation finished. - mode: One of 'train'/'test'/'predict'. + mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. kwargs: Additional arguments for backwards compatibility. Raises: @@ -323,7 +323,7 @@ def _validate_arguments(is_sequence, use_multiprocessing, workers, ' class.')) if steps_per_epoch is None: - arg_name = 'steps_per_epoch' if mode == 'train' else 'steps' + arg_name = 'steps_per_epoch' if mode == ModeKeys.TRAIN else 'steps' raise ValueError('Please specify the number of steps via the ' '`{}` argument.'.format(arg_name)) @@ -429,11 +429,11 @@ def _make_enqueued_generator(generator, def _make_execution_function(model, mode, class_weight=None): """"""Makes function to run one step of model execution."""""" - if mode == 'train': + if mode == ModeKeys.TRAIN: if not context.executing_eagerly(): model._make_fit_function() f = functools.partial(model.train_on_batch, class_weight=class_weight) - elif mode == 'test': + elif mode == ModeKeys.TEST: if not context.executing_eagerly(): model._make_eval_function() f = model.test_on_batch @@ -446,7 +446,7 @@ def _make_execution_function(model, mode, class_weight=None): f = predict_on_batch # Maintain stateful metrics across batch-level calls. - if mode != 'predict': + if mode != ModeKeys.PREDICT: f = functools.partial(f, reset_metrics=False) return f ",0,train 9db4d46857b4d313425a83bdf86b24927e05423d,tensorflow/tensorflow,"[XLA/GPU] Use SequentialHloOrdering for single stream modules. Also simplify tests in gpu_hlo_schedule_test. PiperOrigin-RevId: 357989058 Change-Id: Iab8a2857ce41a30c278c1f202be3162ca16b6428",gpu_compiler.cc,"@@ -520,9 +520,9 @@ GpuCompiler::RunHloPassesAndBufferAssignement( std::unique_ptr stream_assignment = AssignStreams(*hlo_module); - TF_ASSIGN_OR_RETURN( - std::unique_ptr hlo_schedule, - GpuHloSchedule::Build(*hlo_module, *stream_assignment, pointer_size_)); + TF_ASSIGN_OR_RETURN(std::unique_ptr hlo_schedule, + GpuHloSchedule::Build(hlo_module.get(), + *stream_assignment, pointer_size_)); auto buffer_size_bytes_function = [this](const BufferValue& buffer_value) -> int64 { @@ -565,7 +565,7 @@ static Status CompileModuleToLlvmIrImpl( AssignStreams(*hlo_module); TF_ASSIGN_OR_RETURN( std::unique_ptr hlo_schedule, - GpuHloSchedule::Build(*hlo_module, *stream_assignment, pointer_size)); + GpuHloSchedule::Build(hlo_module, *stream_assignment, pointer_size)); auto buffer_size_bytes_function = [pointer_size](const BufferValue& buffer_value) -> int64 { ",0,train 9db4d46857b4d313425a83bdf86b24927e05423d,tensorflow/tensorflow,"[XLA/GPU] Use SequentialHloOrdering for single stream modules. Also simplify tests in gpu_hlo_schedule_test. PiperOrigin-RevId: 357989058 Change-Id: Iab8a2857ce41a30c278c1f202be3162ca16b6428",gpu_hlo_schedule.cc,"@@ -190,30 +190,29 @@ GpuHloSchedule::GpuHloSchedule() {} /* static */ StatusOr> GpuHloSchedule::Build( - const HloModule& module, const StreamAssignment& stream_assignment, + HloModule* module, const StreamAssignment& stream_assignment, int64 pointer_size) { std::unique_ptr schedule(new GpuHloSchedule); // Initialize thunk_launch_order_, the total order of thunk launches. - HloComputation* entry_computation = module.entry_computation(); + HloComputation* entry_computation = module->entry_computation(); if (stream_assignment.StreamCount() == 1) { - // All kernels are launched on a single stream, so there's no loss of - // concurrency by optimizing for minimal memory usage. TF_ASSIGN_OR_RETURN( - HloInstructionSequence sequence, - ScheduleComputation( - entry_computation, [pointer_size](const BufferValue& buffer) { - return ShapeUtil::ByteSizeOf(buffer.shape(), pointer_size); - })); - schedule->thunk_launch_order_ = sequence.instructions(); + HloSchedule sequences, + ScheduleModule(module, [pointer_size](const BufferValue& buffer) { + return ShapeUtil::ByteSizeOf(buffer.shape(), pointer_size); + })); + schedule->thunk_launch_order_ = + sequences.sequence(entry_computation).instructions(); + schedule->hlo_ordering_ = + absl::make_unique(sequences); } else { // BFS tends to increase concurrency, but also increases memory usage. BFSLaunchOrder(entry_computation, &schedule->thunk_launch_order_); + schedule->hlo_ordering_ = absl::make_unique( + module, stream_assignment, schedule->thunk_launch_order_); } - schedule->hlo_ordering_ = absl::make_unique( - &module, stream_assignment, schedule->thunk_launch_order_); - return std::move(schedule); } ",0,train 9db4d46857b4d313425a83bdf86b24927e05423d,tensorflow/tensorflow,"[XLA/GPU] Use SequentialHloOrdering for single stream modules. Also simplify tests in gpu_hlo_schedule_test. PiperOrigin-RevId: 357989058 Change-Id: Iab8a2857ce41a30c278c1f202be3162ca16b6428",gpu_hlo_schedule.h,"@@ -41,7 +41,7 @@ class GpuHloSchedule { // Constructs an GpuHloSchedule for the given module, based on the given // stream assignment. static StatusOr> Build( - const HloModule& module, const StreamAssignment& stream_assignment, + HloModule* module, const StreamAssignment& stream_assignment, int64 pointer_size); // Returns the total order of thunk launches, represented in terms of HLO ",0,train 9db4d46857b4d313425a83bdf86b24927e05423d,tensorflow/tensorflow,"[XLA/GPU] Use SequentialHloOrdering for single stream modules. Also simplify tests in gpu_hlo_schedule_test. PiperOrigin-RevId: 357989058 Change-Id: Iab8a2857ce41a30c278c1f202be3162ca16b6428",gpu_hlo_schedule_test.cc,"@@ -39,7 +39,7 @@ class GpuHloScheduleTest : public HloTestBase { Shape f32_2x2_ = ShapeUtil::MakeShape(F32, {2, 2}); static std::unique_ptr BuildGpuHloSchedule( - const HloModule& module, const StreamAssignment& streams) { + HloModule* module, const StreamAssignment& streams) { return GpuHloSchedule::Build(module, streams, /*pointer_size=*/8) .ConsumeValueOrDie(); } @@ -86,7 +86,7 @@ TEST_F(GpuHloScheduleTest, SequentialMatMul) { EXPECT_EQ(streams->StreamNumberForHlo(*dot1), streams->StreamNumberForHlo(*dot2)); - auto schedule = BuildGpuHloSchedule(*module, *streams); + auto schedule = BuildGpuHloSchedule(module.get(), *streams); // Remove parameters, which are unordered. EXPECT_EQ(RemoveHlo(schedule->ThunkLaunchOrder(), {x, y, z}), HloVec({dot1, dot2})); @@ -94,32 +94,10 @@ TEST_F(GpuHloScheduleTest, SequentialMatMul) { // Parameters x,y,z are mutually unordered, while dot1 and dot2 are // transitively ordered by operands. auto order = schedule->ConsumeHloOrdering(); + EXPECT_TRUE(order->ExecutesBefore(z, y)); + EXPECT_TRUE(order->ExecutesBefore(y, x)); EXPECT_TRUE(order->ExecutesBefore(x, dot1)); - EXPECT_TRUE(order->ExecutesBefore(x, dot2)); - EXPECT_TRUE(order->ExecutesBefore(y, dot1)); - EXPECT_TRUE(order->ExecutesBefore(y, dot2)); - EXPECT_TRUE(order->ExecutesBefore(z, dot2)); EXPECT_TRUE(order->ExecutesBefore(dot1, dot2)); - - EXPECT_FALSE(order->ExecutesBefore(x, x)); - EXPECT_FALSE(order->ExecutesBefore(x, y)); - EXPECT_FALSE(order->ExecutesBefore(x, z)); - EXPECT_FALSE(order->ExecutesBefore(y, x)); - EXPECT_FALSE(order->ExecutesBefore(y, y)); - EXPECT_FALSE(order->ExecutesBefore(y, z)); - EXPECT_FALSE(order->ExecutesBefore(z, x)); - EXPECT_FALSE(order->ExecutesBefore(z, y)); - EXPECT_FALSE(order->ExecutesBefore(z, z)); - EXPECT_FALSE(order->ExecutesBefore(z, dot1)); - EXPECT_FALSE(order->ExecutesBefore(dot1, x)); - EXPECT_FALSE(order->ExecutesBefore(dot1, y)); - EXPECT_FALSE(order->ExecutesBefore(dot1, z)); - EXPECT_FALSE(order->ExecutesBefore(dot1, dot1)); - EXPECT_FALSE(order->ExecutesBefore(dot2, x)); - EXPECT_FALSE(order->ExecutesBefore(dot2, y)); - EXPECT_FALSE(order->ExecutesBefore(dot2, z)); - EXPECT_FALSE(order->ExecutesBefore(dot2, dot1)); - EXPECT_FALSE(order->ExecutesBefore(dot2, dot2)); } // Test of a single stream, where data dependencies do not fully determine the @@ -148,7 +126,7 @@ TEST_F(GpuHloScheduleTest, SequentialAdd) { EXPECT_EQ(streams->StreamNumberForHlo(*add1), streams->StreamNumberForHlo(*add3)); - auto schedule = BuildGpuHloSchedule(*module, *streams); + auto schedule = BuildGpuHloSchedule(module.get(), *streams); // Remove parameters, which are unordered. EXPECT_EQ(RemoveHlo(schedule->ThunkLaunchOrder(), {x, y, z}), HloVec({add1, add2, add3})); @@ -156,43 +134,11 @@ TEST_F(GpuHloScheduleTest, SequentialAdd) { // Parameters x,y,z are mutually unordered, while add1, add2 and add3 are // transitively ordered by operands. auto order = schedule->ConsumeHloOrdering(); + EXPECT_TRUE(order->ExecutesBefore(y, z)); + EXPECT_TRUE(order->ExecutesBefore(z, x)); EXPECT_TRUE(order->ExecutesBefore(x, add1)); - EXPECT_TRUE(order->ExecutesBefore(x, add2)); - EXPECT_TRUE(order->ExecutesBefore(x, add3)); - EXPECT_TRUE(order->ExecutesBefore(y, add1)); - EXPECT_TRUE(order->ExecutesBefore(y, add2)); - EXPECT_TRUE(order->ExecutesBefore(y, add3)); - EXPECT_TRUE(order->ExecutesBefore(z, add2)); - EXPECT_TRUE(order->ExecutesBefore(z, add3)); - EXPECT_TRUE(order->ExecutesBefore(add1, add3)); + EXPECT_TRUE(order->ExecutesBefore(add1, add2)); EXPECT_TRUE(order->ExecutesBefore(add2, add3)); - // The HLO graph does not define an ordering for add1 and add2, but their - // assignment onto the same stream does define an ordering. - if (order->ExecutesBefore(add1, add2)) { - EXPECT_FALSE(order->ExecutesBefore(add2, add1)); - } else { - EXPECT_TRUE(order->ExecutesBefore(add2, add1)); - EXPECT_FALSE(order->ExecutesBefore(add1, add2)); - } - - EXPECT_FALSE(order->ExecutesBefore(x, x)); - EXPECT_FALSE(order->ExecutesBefore(x, y)); - EXPECT_FALSE(order->ExecutesBefore(x, z)); - EXPECT_FALSE(order->ExecutesBefore(y, x)); - EXPECT_FALSE(order->ExecutesBefore(y, y)); - EXPECT_FALSE(order->ExecutesBefore(y, z)); - EXPECT_FALSE(order->ExecutesBefore(z, x)); - EXPECT_FALSE(order->ExecutesBefore(z, y)); - EXPECT_FALSE(order->ExecutesBefore(z, z)); - EXPECT_FALSE(order->ExecutesBefore(z, add1)); - EXPECT_FALSE(order->ExecutesBefore(add1, x)); - EXPECT_FALSE(order->ExecutesBefore(add1, y)); - EXPECT_FALSE(order->ExecutesBefore(add1, z)); - EXPECT_FALSE(order->ExecutesBefore(add1, add1)); - EXPECT_FALSE(order->ExecutesBefore(add2, x)); - EXPECT_FALSE(order->ExecutesBefore(add2, y)); - EXPECT_FALSE(order->ExecutesBefore(add2, z)); - EXPECT_FALSE(order->ExecutesBefore(add2, add2)); } // Test of two streams. @@ -216,7 +162,7 @@ TEST_F(GpuHloScheduleTest, ConcurrentMatMul) { EXPECT_NE(streams->StreamNumberForHlo(*dot1), streams->StreamNumberForHlo(*dot2)); - auto schedule = BuildGpuHloSchedule(*module, *streams); + auto schedule = BuildGpuHloSchedule(module.get(), *streams); // Remove parameters, which are unordered. HloVec thunk_launch_order = RemoveHlo(schedule->ThunkLaunchOrder(), {x, y}); EXPECT_TRUE(thunk_launch_order == HloVec({dot1, dot2, add}) || @@ -308,7 +254,7 @@ TEST_F(GpuHloScheduleTest, LatticeMatMul) { // We don't check the thunk launch order, since there are many valid total // orders, and it's annoying to express. - auto schedule = BuildGpuHloSchedule(*module, *streams); + auto schedule = BuildGpuHloSchedule(module.get(), *streams); auto order = schedule->ConsumeHloOrdering(); const HloVec all_params( ",0,train 9db4d46857b4d313425a83bdf86b24927e05423d,tensorflow/tensorflow,"[XLA/GPU] Use SequentialHloOrdering for single stream modules. Also simplify tests in gpu_hlo_schedule_test. PiperOrigin-RevId: 357989058 Change-Id: Iab8a2857ce41a30c278c1f202be3162ca16b6428",ir_emitter_unnested.cc,"@@ -3829,6 +3829,20 @@ Status CheckConditionalBuffersShareAllocation( return Status::OK(); } +Status AcceptMaybeOrdered(HloComputation* computation, + IrEmitterUnnested* emitter, + const BufferAssignment& buffer_assignment) { + const auto& debug_options = computation->parent()->config().debug_options(); + if (debug_options.xla_gpu_disable_multi_streaming()) { + const HloInstructionSequence* sequence = + buffer_assignment.hlo_ordering().SequentialOrder(*computation); + // Always expect a sequential ordering for single-stream programs. + TF_RET_CHECK(sequence); + return computation->AcceptOrdered(emitter, sequence->instructions()); + } + return computation->Accept(emitter); +} + } // namespace StatusOr> IrEmitterUnnested::BuildWhileThunk( @@ -3842,14 +3856,18 @@ StatusOr> IrEmitterUnnested::BuildWhileThunk( TF_ASSIGN_OR_RETURN(auto ir_emitter_condition, IrEmitterUnnested::Create(hlo_module_config_, condition, ir_emitter_context_)); - TF_RETURN_IF_ERROR(condition->Accept(ir_emitter_condition.get())); + + TF_RETURN_IF_ERROR( + AcceptMaybeOrdered(condition, ir_emitter_condition.get(), + ir_emitter_context_->buffer_assignment())); // Generate thunk sequence for while 'body'. HloComputation* body = hlo->while_body(); TF_ASSIGN_OR_RETURN( auto ir_emitter_body, IrEmitterUnnested::Create(hlo_module_config_, body, ir_emitter_context_)); - TF_RETURN_IF_ERROR(body->Accept(ir_emitter_body.get())); + TF_RETURN_IF_ERROR(AcceptMaybeOrdered( + body, ir_emitter_body.get(), ir_emitter_context_->buffer_assignment())); const auto* index_map = ir_emitter_context_->profile_index_map(); absl::optional condition_profile_index, body_profile_index; @@ -3877,7 +3895,8 @@ StatusOr> IrEmitterUnnested::BuildForThunk( TF_ASSIGN_OR_RETURN( auto ir_emitter_body, IrEmitterUnnested::Create(hlo_module_config_, body, ir_emitter_context_)); - TF_RETURN_IF_ERROR(body->Accept(ir_emitter_body.get())); + TF_RETURN_IF_ERROR(AcceptMaybeOrdered( + body, ir_emitter_body.get(), ir_emitter_context_->buffer_assignment())); const auto* index_map = ir_emitter_context_->profile_index_map(); absl::optional body_profile_index; @@ -3914,7 +3933,8 @@ StatusOr> IrEmitterUnnested::BuildConditionalThunk( auto ir_emitter, IrEmitterUnnested::Create(hlo_module_config_, branch_computation, ir_emitter_context_)); - TF_CHECK_OK(branch_computation->Accept(ir_emitter.get())); + TF_CHECK_OK(AcceptMaybeOrdered(branch_computation, ir_emitter.get(), + ir_emitter_context_->buffer_assignment())); branch_thunks.push_back(std::move(*ir_emitter->ConsumeThunkSequence())); absl::optional profile_index; ",0,train f04c6f08a3756a7d5fd7ae94fb8199831e22cebd,tensorflow/tensorflow,Addressed more review comments,mkl_layout_pass.cc,"@@ -1210,16 +1210,16 @@ class MklLayoutRewritePass : public GraphOptimizationPass { string mode_string; GetNodeAttr(n->def(), ""mode"", &mode_string); if (mode_string != ""SCALED"") { - VLOG(1) << ""DequantizeRewrite: Mode is not SCALED."" + VLOG(1) << ""DequantizeRewrite: Mode is not SCALED. "" << ""This case is not optimized by Intel MKL kernel, thus using "" - ""Eigen op for Dequantize op ""; + ""Eigen op for Dequantize op.""; return false; } if (input->IsConstant()) { VLOG(1) << ""DequantizeRewrite: Trying to dequantize a Const node which "" << ""could possibly be a filter. "" << ""This case is not supported by Intel MKL kernel, thus using "" - ""Eigen op for Dequantize op ""; + ""Eigen op for Dequantize op.""; return false; } return true; ",0,train ceb4b27be1947e9232304ad81c2d6e02d542e7ed,tensorflow/tensorflow,"Upstreaming the changes from https://github.com/tensorflow/tflite-micro/pull/369/ PiperOrigin-RevId: 389148121 Change-Id: I36a86a8122632f8ebc010be7d429e11b642b0aea",cppmath.h,"@@ -19,9 +19,8 @@ limitations under the License. namespace tflite { -#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \ - (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO) || \ - defined(__ZEPHYR__) +#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \ + (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(__ZEPHYR__) #define TF_LITE_GLOBAL_STD_PREFIX #else #define TF_LITE_GLOBAL_STD_PREFIX std ",0,train b46aa421ece747f31a47c3f600fac03bb6e4247f,tensorflow/tensorflow,"Ensure traceback from Operation() is correct This is a followup to cl/439389601, which only tested and fixed the path from _create_c_op. Operation() has another level of nested-ness. This could have been done by using stacklevel=4 in extract_traceback_for_op. But we instead mutate traceback inplace to avoid changing the API of _create_c_op to add a stacklevel argument. PiperOrigin-RevId: 439624965",ops.py,"@@ -2114,6 +2114,10 @@ class Operation(object): # Post process for control flows. self._control_flow_post_processing(input_tensors=inputs) + # Removes this frame from the Python traceback. + # We adjust stacklevel directly to avoid triggering serialization. + self.traceback._stacklevel += 1 # pylint: disable=protected-access + @classmethod def _from_c_op(cls, c_op, g): """"""Create an Operation from a TF_Operation. ",0,train b46aa421ece747f31a47c3f600fac03bb6e4247f,tensorflow/tensorflow,"Ensure traceback from Operation() is correct This is a followup to cl/439389601, which only tested and fixed the path from _create_c_op. Operation() has another level of nested-ness. This could have been done by using stacklevel=4 in extract_traceback_for_op. But we instead mutate traceback inplace to avoid changing the API of _create_c_op to add a stacklevel argument. PiperOrigin-RevId: 439624965",ops_test.py,"@@ -663,6 +663,13 @@ def _apply_op(g, *args, **kwargs): class OperationTest(test_util.TensorFlowTestCase): + def testTraceback(self): + g = ops.Graph() + op1 = ops.Operation( + ops._NodeDef(""None"", ""op1""), g, [], + [dtypes.float32_ref, dtypes.float32]) + self.assertIn(""testTraceback"", op1.traceback[-1]) + @test_util.run_deprecated_v1 def testNoInputs(self): op = test_ops.float_output_string_output(name=""myop"").a.op ",0,train b46aa421ece747f31a47c3f600fac03bb6e4247f,tensorflow/tensorflow,"Ensure traceback from Operation() is correct This is a followup to cl/439389601, which only tested and fixed the path from _create_c_op. Operation() has another level of nested-ness. This could have been done by using stacklevel=4 in extract_traceback_for_op. But we instead mutate traceback inplace to avoid changing the API of _create_c_op to add a stacklevel argument. PiperOrigin-RevId: 439624965",tf_stack.cc,"@@ -138,7 +138,7 @@ std::string StackFrameToString( class StackTraceWrapper : public AbstractStackTrace { public: - explicit StackTraceWrapper(absl::Span stack_frames) + explicit StackTraceWrapper(absl::Span stack_frames) : stack_frames_cache_(std::vector(stack_frames.begin(), stack_frames.end())) {} @@ -149,7 +149,7 @@ class StackTraceWrapper : public AbstractStackTrace { stacklevel}; } - absl::Span ToFrames() const override { + absl::Span ToFrames() const override { if (stack_frames_cache_) { return *stack_frames_cache_; } @@ -172,6 +172,10 @@ class StackTraceWrapper : public AbstractStackTrace { return *stack_frames_cache_; } + int get_stacklevel() const { return stacklevel_; } + + void set_stacklevel(int stacklevel) { stacklevel_ = stacklevel; } + std::vector GetUserFrames(int limit = -1) const { PyGILState_STATE state = PyGILState_Ensure(); std::vector user_frames = captured_.ToStackFrames( @@ -262,7 +266,7 @@ class StackTraceWrapper : public AbstractStackTrace { filter_(filter), stacklevel_(stacklevel) {} - static std::string ToStringHelper(absl::Span stack_frames, + static std::string ToStringHelper(absl::Span stack_frames, const TracePrintingOptions& opts, int shared_prefix_size) { return absl::StrJoin( @@ -369,8 +373,8 @@ PYBIND11_MODULE(_tf_stack, m) { // TODO(slebedev): upstream negative indexing support into pybind11. .def( ""__getitem__"", - [](const StackTraceWrapper& self, ssize_t index) { - absl::Span frames = self.ToFrames(); + [](const StackTraceWrapper& self, py::ssize_t index) { + absl::Span frames = self.ToFrames(); const size_t eff_index = index < 0 ? frames.size() + index : static_cast(index); if (eff_index >= frames.size()) { @@ -382,7 +386,7 @@ PYBIND11_MODULE(_tf_stack, m) { .def( ""__getitem__"", [](const StackTraceWrapper& self, py::slice slice) { - absl::Span frames = self.ToFrames(); + absl::Span frames = self.ToFrames(); py::ssize_t start, stop, step, slicelength; if (!slice.compute(frames.size(), &start, &stop, &step, &slicelength)) { @@ -402,9 +406,19 @@ PYBIND11_MODULE(_tf_stack, m) { return StackTraceWrapper{out}; }, py::return_value_policy::reference_internal) + .def(""__delitem__"", + [](StackTraceWrapper& self, py::ssize_t index) { + absl::Span frames = self.ToFrames(); + const size_t eff_index = + index < 0 ? frames.size() + index : static_cast(index); + if (eff_index >= frames.size()) { + throw py::index_error(); + } + self.Erase(eff_index, eff_index + 1); + }) .def(""__delitem__"", [](StackTraceWrapper& self, py::slice slice) { - absl::Span frames = self.ToFrames(); + absl::Span frames = self.ToFrames(); py::ssize_t start, stop, step, slicelength; if (!slice.compute(frames.size(), &start, &stop, &step, &slicelength)) { @@ -433,6 +447,10 @@ PYBIND11_MODULE(_tf_stack, m) { [](const StackTraceWrapper& self) { return py::str(self.ToString({})); }) + .def_property( + ""_stacklevel"", &StackTraceWrapper::get_stacklevel, + &StackTraceWrapper::set_stacklevel, + ""Adjusts stacklevel; no effects after ToFrames() is called."") .def( ""get_user_frames"", [](const StackTraceWrapper& self) { ",0,train b46aa421ece747f31a47c3f600fac03bb6e4247f,tensorflow/tensorflow,"Ensure traceback from Operation() is correct This is a followup to cl/439389601, which only tested and fixed the path from _create_c_op. Operation() has another level of nested-ness. This could have been done by using stacklevel=4 in extract_traceback_for_op. But we instead mutate traceback inplace to avoid changing the API of _create_c_op to add a stacklevel argument. PiperOrigin-RevId: 439624965",tf_stack_test.py,"@@ -64,6 +64,23 @@ class TFStackTest(test.TestCase): self.assertRegex(frames[-1].line, ""# COMMENT"") self.assertRegex(frames[-2].line, ""# CALLSITE"") + def testGelItem(self): + + def func(n): + if n == 0: + return tf_stack.extract_stack() # COMMENT + else: + return func(n - 1) + + trace = func(5) + self.assertIn(""COMMENT"", trace[-1].line) + + with self.assertRaises(IndexError): + _ = trace[-len(trace) - 1] + + with self.assertRaises(IndexError): + _ = trace[len(trace)] + def testDelItem(self): def func(n): @@ -72,6 +89,7 @@ class TFStackTest(test.TestCase): else: return func(n - 1) + # Test deleting a slice. trace = func(5) self.assertGreater(len(trace), 5) @@ -82,6 +100,22 @@ class TFStackTest(test.TestCase): self.assertLen(head_list, len(full_list) - 5) self.assertEqual(head_list, full_list[:-5]) + # Test deleting an item. + trace = func(1) + self.assertGreater(len(trace), 1) + full_list = list(trace) + del trace[-1] + head_list = list(trace) + self.assertLen(head_list, len(full_list) - 1) + self.assertEqual(head_list, full_list[:-1]) + + # Errors + trace = func(5) + with self.assertRaises(IndexError): + del trace[-len(trace) - 1] + + with self.assertRaises(IndexError): + del trace[len(trace)] if __name__ == ""__main__"": test.main() ",0,train 19f428204217b7fc2820a340c43ce79b0e958a58,tensorflow/tensorflow,"Close remote contexts with a context_view_id. Adding this field to the CloseContextRequest avoids the scenario where a stale request closes a newly created context. PiperOrigin-RevId: 276631303 Change-Id: If9ff9191e3d1f2c0245265cdd9dccfd8bab00cd3",context.cc,"@@ -215,21 +215,27 @@ bool EagerContext::MirrorTensors() const { #if !defined(IS_MOBILE_PLATFORM) void EagerContext::CloseAndClearAllRemoteContexts() { uint64 context_id; + uint64 context_view_id; { mutex_lock l(remote_state_mu_); if (!is_master_) return; context_id = context_id_; + context_view_id = context_view_id_; context_id_ = kInvalidContextId; + // Forget the current view id and reset to the starting value 0. + context_view_id_ = 0; } - CloseRemoteContexts(remote_contexts_, context_id); + CloseRemoteContexts(remote_contexts_, context_id, context_view_id); remote_contexts_.clear(); } void EagerContext::CloseRemoteContexts( - const std::vector& remote_contexts, uint64 context_id) { + const std::vector& remote_contexts, uint64 context_id, + uint64 context_view_id) { // Close all remote contexts. eager::CloseContextRequest request; request.set_context_id(context_id); + request.set_context_view_id(context_view_id); // Setting context_id to a new value can avoid us issuing DestroyTensorHandle // request to closed remote workers. std::vector responses(remote_contexts.size()); @@ -762,13 +768,12 @@ Status EagerContext::UpdateRemoteMaster( } if (!remove_remote_contexts.empty()) { - // N.B. remove_remote_contexts include both removed and replaced workers. It - // is safe to send CloseContextRequest to them using the old copy of eager - // client cache (i.e., `remote_eager_workers_`) because the replaced workers - // will be resolved to the old eager clients. Thus, it correctly closes - // contexts on workers that are replaced by new ones. It must be called - // before overwriting `remote_eager_workers_` in current master context. - CloseRemoteContexts(remove_remote_contexts, context_id); + // N.B. remove_remote_contexts include both removed and replaced workers. + // In the case where a worker is replaced by one that resolves to the same + // `hostname:port`, it is safe to close context with the current view id, + // since the newly created context on the remote worker will be holding + // a larger view id and ignores this request. + CloseRemoteContexts(remove_remote_contexts, context_id, GetContextViewId()); for (const string& remote_context : remove_remote_contexts) { remote_contexts_.erase( std::remove(remote_contexts_.begin(), remote_contexts_.end(), ",0,train 19f428204217b7fc2820a340c43ce79b0e958a58,tensorflow/tensorflow,"Close remote contexts with a context_view_id. Adding this field to the CloseContextRequest avoids the scenario where a stale request closes a newly created context. PiperOrigin-RevId: 276631303 Change-Id: If9ff9191e3d1f2c0245265cdd9dccfd8bab00cd3",context.h,"@@ -460,7 +460,7 @@ class EagerContext : public core::RefCounted { #if !defined(IS_MOBILE_PLATFORM) void CloseAndClearAllRemoteContexts(); void CloseRemoteContexts(const std::vector& remote_contexts, - uint64 context_id); + uint64 context_id, uint64 context_view_id); Status SetMasterContextState( std::unique_ptr server, WorkerEnv* worker_env, ",0,train 19f428204217b7fc2820a340c43ce79b0e958a58,tensorflow/tensorflow,"Close remote contexts with a context_view_id. Adding this field to the CloseContextRequest avoids the scenario where a stale request closes a newly created context. PiperOrigin-RevId: 276631303 Change-Id: If9ff9191e3d1f2c0245265cdd9dccfd8bab00cd3",eager_service_impl.cc,"@@ -459,9 +459,17 @@ Status EagerServiceImpl::CloseContext(const CloseContextRequest* request, // Swallow the error here. return Status::OK(); } - core::ScopedUnref context_unref(context); + if (request->context_view_id() < context->Context()->GetContextViewId()) { + // Swallow the error here. + LOG(INFO) << ""Ignoring CloseContext request with a stale context_view_id "" + << request->context_view_id() << "" for context_id "" + << request->context_id() << "". The current context_view_id is "" + << context->Context()->GetContextViewId() << "".""; + return Status::OK(); + } + mutex_lock l(contexts_mu_); contexts_.erase(request->context_id()); ",0,train 19f428204217b7fc2820a340c43ce79b0e958a58,tensorflow/tensorflow,"Close remote contexts with a context_view_id. Adding this field to the CloseContextRequest avoids the scenario where a stale request closes a newly created context. PiperOrigin-RevId: 276631303 Change-Id: If9ff9191e3d1f2c0245265cdd9dccfd8bab00cd3",eager_service_impl_test.cc,"@@ -312,6 +312,7 @@ TEST_F(EagerServiceImplTest, BasicTest) { CloseContextRequest close_context_request; close_context_request.set_context_id(context_id); + close_context_request.set_context_view_id(0); CloseContextResponse close_context_response; TF_ASSERT_OK(eager_service_impl.CloseContext(&close_context_request, &close_context_response)); @@ -379,6 +380,7 @@ TEST_F(EagerServiceImplTest, BasicFunctionTest) { CloseContextRequest close_context_request; close_context_request.set_context_id(context_id); + close_context_request.set_context_view_id(0); CloseContextResponse close_context_response; TF_ASSERT_OK(eager_service_impl.CloseContext(&close_context_request, &close_context_response)); @@ -473,6 +475,7 @@ class FunctionWithRemoteInputsTest : public EagerServiceImplTest { CloseContextRequest close_context_request; close_context_request.set_context_id(context_id_); + close_context_request.set_context_view_id(0); CloseContextResponse close_context_response; TF_ASSERT_OK(eager_service_impl_.CloseContext(&close_context_request, &close_context_response)); @@ -640,6 +643,7 @@ TEST_F(EagerServiceImplTest, SendTensorTest) { CloseContextRequest close_context_request; close_context_request.set_context_id(context_id); + close_context_request.set_context_view_id(0); CloseContextResponse close_context_response; TF_ASSERT_OK(eager_service_impl.CloseContext(&close_context_request, &close_context_response)); ",0,train 4d568d5967cbb2f46b763800ea63390868368a24,tensorflow/tensorflow,"NFC: Move AffineOps dialect to the Dialect sub-directory. PiperOrigin-RevId: 264482571",AffineOps.h,"@@ -20,8 +20,8 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_AFFINEOPS_AFFINEOPS_H -#define MLIR_AFFINEOPS_AFFINEOPS_H +#ifndef MLIR_DIALECT_AFFINEOPS_AFFINEOPS_H +#define MLIR_DIALECT_AFFINEOPS_AFFINEOPS_H #include ""mlir/IR/AffineMap.h"" #include ""mlir/IR/Builders.h"" @@ -540,7 +540,7 @@ void fullyComposeAffineMapAndOperands(AffineMap *map, llvm::SmallVectorImpl *operands); #define GET_OP_CLASSES -#include ""mlir/AffineOps/AffineOps.h.inc"" +#include ""mlir/Dialect/AffineOps/AffineOps.h.inc"" /// Returns if the provided value is the induction variable of a AffineForOp. bool isForInductionVar(Value *val); ",0,train 4d568d5967cbb2f46b763800ea63390868368a24,tensorflow/tensorflow,"NFC: Move AffineOps dialect to the Dialect sub-directory. PiperOrigin-RevId: 264482571",Builders.h,"@@ -23,7 +23,7 @@ #ifndef MLIR_EDSC_BUILDERS_H_ #define MLIR_EDSC_BUILDERS_H_ -#include ""mlir/AffineOps/AffineOps.h"" +#include ""mlir/Dialect/AffineOps/AffineOps.h"" #include ""mlir/Dialect/StandardOps/Ops.h"" #include ""mlir/Dialect/VectorOps/VectorOps.h"" #include ""mlir/IR/Builders.h"" ",0,train 3c08b43159e4bd1e587170a707f44fbea77239d1,tensorflow/tensorflow,"Docstring example and formatting updates Change: 138936680",tensor_signature.py,"@@ -35,6 +35,17 @@ class TensorSignature(collections.namedtuple( Useful to check compatibility of tensors. + Example: + + ```python + examples = tf.placeholder(...) + inputs = {'a': var_a, 'b': var_b} + signatures = tensor_signature.create_signatures(inputs) + result = tensor_signature.create_example_parser_from_signatures( + signatures, examples) + self.assertTrue(tensor_signature.tensors_compatible(result, signatures)) + ``` + Attributes: dtype: `DType` object. shape: `TensorShape` object. ",0,train 028725d42f687243b47caa689909ae3e91221a1f,tensorflow/tensorflow,"Release GIL for PyLocalBuffer::copy_to_host_async PiperOrigin-RevId: 273971322",xla.cc,"@@ -458,7 +458,8 @@ PYBIND11_MODULE(xla_extension, m) { py::gil_scoped_release gil_release; return buffer->BlockHostUntilReady(); }) - .def(""copy_to_host_async"", &PyLocalBuffer::CopyToHostAsync) + .def(""copy_to_host_async"", &PyLocalBuffer::CopyToHostAsync, + py::call_guard()) .def(""to_py"", [](PyLocalBuffer* buffer) -> StatusOr { GlobalPyRefManager()->CollectGarbage(); ",0,test e6ab6e648673041de33ba16d250367157a7cb2ec,tensorflow/tensorflow,"tfdbg: Google internal-oriented changes with no external effect Change: 135694400",local_cli.py,"@@ -23,6 +23,7 @@ import shutil import sys import tempfile +# Google-internal import(s). from tensorflow.python.debug import debug_data from tensorflow.python.debug import framework from tensorflow.python.debug.cli import analyzer_cli @@ -37,7 +38,7 @@ _DUMP_ROOT_PREFIX = ""tfdbg_"" class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession): """"""Concrete subclass of BaseDebugWrapperSession implementing a local CLI."""""" - def __init__(self, sess, dump_root=None): + def __init__(self, sess, dump_root=None, log_usage=True): """"""Constructor of LocalCLIDebugWrapperSession. Args: @@ -46,12 +47,16 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession): a directory that does not exist or an empty directory. If the directory does not exist, it will be created by the debugger core during debug run() calls and removed afterwards. + log_usage: (bool) Whether the usage of this class is to be logged. Raises: ValueError: If dump_root is an existing and non-empty directory or if dump_root is a file. """""" + if log_usage: + pass # No logging for open-source. + framework.BaseDebugWrapperSession.__init__(self, sess) if dump_root is None: ",0,test e6ab6e648673041de33ba16d250367157a7cb2ec,tensorflow/tensorflow,"tfdbg: Google internal-oriented changes with no external effect Change: 135694400",local_cli_test.py,"@@ -37,14 +37,14 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase): shutil.rmtree(self._tmp_dir) def testConstructWrapper(self): - local_cli.LocalCLIDebugWrapperSession(session.Session()) + local_cli.LocalCLIDebugWrapperSession(session.Session(), log_usage=False) def testConstructWrapperWithExistingEmptyDumpRoot(self): os.mkdir(self._tmp_dir) self.assertTrue(os.path.isdir(self._tmp_dir)) local_cli.LocalCLIDebugWrapperSession( - session.Session(), dump_root=self._tmp_dir) + session.Session(), dump_root=self._tmp_dir, log_usage=False) def testConstructWrapperWithExistingNonEmptyDumpRoot(self): os.mkdir(self._tmp_dir) @@ -55,7 +55,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp( ValueError, ""dump_root path points to a non-empty directory""): local_cli.LocalCLIDebugWrapperSession( - session.Session(), dump_root=self._tmp_dir) + session.Session(), dump_root=self._tmp_dir, log_usage=False) def testConstructWrapperWithExistingFileDumpRoot(self): os.mkdir(self._tmp_dir) @@ -65,7 +65,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp( ValueError, ""dump_root path points to a file""): local_cli.LocalCLIDebugWrapperSession( - session.Session(), dump_root=file_path) + session.Session(), dump_root=file_path, log_usage=False) if __name__ == ""__main__"": ",0,test 00ee6689bb838f45a393d4fbca11ad10018a382a,tensorflow/tensorflow,"Improvements to function._FuncGraph. * Adds 'inputs', 'outputs', and 'name' field to _FuncGraph. This allows _FuncGraph to encapsulate all the information needed to convert it to a FunctionDef. * Refactor logic for converting a Python callable to a _FuncGraph into a new method, func_graph_from_py_func(). These changes are in preparation for converting tf.cond to emit an If op. By exposing _FuncGraph functionality outside of _DefinedFunction, _FuncGraphs can be used to represent functions that are manipulated (e.g. to output intermediate tensors) before being converted to FunctionDef protos. PiperOrigin-RevId: 197003496",function.py,"@@ -258,12 +258,10 @@ class _DefinedFunction(object): # another reference to _definition.signature self._op_def = None - self._args = [] assert isinstance(input_types, (list, tuple)) - for i in range(len(input_types)): - argname = argnames[i] if i < len(argnames) else (""arg%d"" % i) - argtype = input_types[i] - self._args.append((argname, argtype)) + self._arg_types = input_types + self._arg_names = [argnames[i] if i < len(argnames) else (""arg%d"" % i) + for i in range(len(input_types))] @property def name(self): @@ -336,42 +334,11 @@ class _DefinedFunction(object): if self._definition is not None or self._c_func is not None: return - # Create the func_def object. - temp_graph = _FuncGraph(capture_by_value=self._capture_by_value) - with temp_graph.as_default(), ops.device(self._caller_device): - # List of placeholders for the function_def. - inputs = [] - for (argname, argtype) in self._args: - argholder = array_ops.placeholder(argtype, name=argname) - inputs.append(argholder) - # Call func and gather the output tensors. - with vs.variable_scope("""", custom_getter=temp_graph.getvar): - outputs = self._func(*inputs) - - # There is no way of distinguishing between a function not returning - # anything and a function returning None in Python. - # We need to allow the former and ideally want to forbid the latter as - # it is most likely user error. - # TODO(iga): Consider adding a @NoOutput decorator on top of @Defun to - # allow users to explicitly mark the function as not returning anything. - # For now, we allow a single None return and interpret it as a function - # with no output. - if outputs is None: - outputs = [] - else: - # If func only returned one value, make it a tuple. - if not isinstance(outputs, (list, tuple)): - outputs = (outputs,) - if any([_ is None for _ in outputs]): - raise ValueError(""Function can not return None."") - # Ensures each output is a Tensor in the function graph. - outputs = [ops.convert_to_tensor(t) for t in outputs] - outputs = [ - temp_graph.capture(t) if t.graph is not temp_graph else t - for t in outputs - ] + temp_graph = func_graph_from_py_func( + self._func, self._arg_names, self._arg_types, self._func_name, + self._capture_by_value, self._caller_device) + self._extra_inputs = temp_graph.extra_inputs - inputs.extend(temp_graph.extra_args) # pylint: disable=protected-access self._sub_functions = temp_graph._functions # pylint: enable=protected-access @@ -390,8 +357,8 @@ class _DefinedFunction(object): self._definition = graph_to_function_def.graph_to_function_def( temp_graph, temp_graph.get_operations(), - inputs, - outputs, + temp_graph.inputs, + temp_graph.outputs, out_names=self._out_names) for k in kwargs_attr: @@ -421,8 +388,8 @@ class _DefinedFunction(object): base_func_name, self._func_name is None, # append_hash_to_fn_name None, # opers - [t._as_tf_output() for t in inputs], - [t._as_tf_output() for t in outputs], + [t._as_tf_output() for t in temp_graph.inputs], + [t._as_tf_output() for t in temp_graph.outputs], output_names, None, # opts description) @@ -653,16 +620,33 @@ class _FuncGraph(ops.Graph): function argument and the caller passes in the captured tensor. """""" - def __init__(self, capture_by_value, *args, **kwargs): + def __init__(self, name, capture_by_value, *args, **kwargs): super(_FuncGraph, self).__init__(*args, **kwargs) self._capture_by_value = capture_by_value self._building_function = True self._outer_graph = ops.get_default_graph() self._vscope = vs.get_variable_scope() self._old_custom_getter = self._vscope.custom_getter + + # The name of the function. + self.name = name + # Placeholder tensors representing the inputs to this function. The tensors + # are in this _FuncGraph. + self.inputs = [] + # Tensors that will be returned this function. The tensors are in this + # _FuncGraph. + self.outputs = [] + # Maps external tensor -> internal tensor (e.g. input placeholder). self._captured = {} + # The external tensors that have been captured as inputs and must be passed + # to this function (empty if capturing by value, otherwise these are the + # keys of _captured). self.extra_inputs = [] + # Input placeholders that been added for captured values (empty if capturing + # by value). self.extra_args = [] + # Captured variables. + # TODO(skyewm): is this needed? self.extra_vars = [] def getvar( @@ -742,6 +726,7 @@ class _FuncGraph(ops.Graph): else: ph._handle_data = tensor._handle_data # pylint: enable=protected-access + self.inputs.append(ph) self._captured[tensor] = ph self.extra_args.append(ph) if _is_guaranteed_const(tensor): @@ -780,6 +765,63 @@ class _FuncGraph(ops.Graph): return captured_op +def func_graph_from_py_func(func, arg_names, arg_types, name=None, + capture_by_value=False, device=None): + """"""Returns a _FuncGraph generated from `func`. + + Args: + func: A Python callable which constructs a TF function body. The arguments + must correspond to `arg_types`. Returns a value or list/tuple of values. + No returned value can be None. + arg_names: A sequence of strings for the function argument names. + arg_types: A sequence of the function's argument types. + name: The function name. If None, the name is derived from `func`. + capture_by_value: boolean. If True, captured values will be copied into the + function body. + device: device name or function. + + Returns: + A _FuncGraph. + + Raises: + ValueError: if func returns None. + """""" + if not name: + name = _get_func_name(func) + func_graph = _FuncGraph(name, capture_by_value) + with func_graph.as_default(), ops.device(device): + # Create placeholders for the function arguments. + for (argname, argtype) in zip(arg_names, arg_types): + argholder = array_ops.placeholder(argtype, name=argname) + func_graph.inputs.append(argholder) + # Call func and gather the output tensors. + with vs.variable_scope("""", custom_getter=func_graph.getvar): + outputs = func(*func_graph.inputs) + + # There is no way of distinguishing between a function not returning + # anything and a function returning None in Python. + # We need to allow the former and ideally want to forbid the latter as + # it is most likely user error. + # TODO(iga): Consider adding a @NoOutput decorator on top of @Defun to + # allow users to explicitly mark the function as not returning anything. + # For now, we allow a single None return and interpret it as a function + # with no output. + if outputs is None: + outputs = [] + else: + # If func only returned one value, make it a tuple. + if not isinstance(outputs, (list, tuple)): + outputs = (outputs,) + if any([_ is None for _ in outputs]): + raise ValueError(""Function can not return None."") + # Ensures each output is a Tensor in the function graph. + outputs = [ops.convert_to_tensor(t) for t in outputs] + outputs = [func_graph.capture(t) if t.graph is not func_graph else t + for t in outputs] + func_graph.outputs = outputs + return func_graph + + def _is_guaranteed_const(tensor): """"""Determines whether `tensor` is guaranteed to be a constant. ",0,train 8fde5290d6f9acea81482a9f300178a07873322c,tensorflow/tensorflow,"Canonicalize MatrixSetDiag and MatrixSetDiagV2 ops to MatrixSetDiagV3 Lower canonical MatrixSetDiagV3 op in TFLite Converter instead of MatrixSetDiag Now, MatrixSetDiag and MatrixSetDiagV2 are canonicalized to MatrixSetDiagV3 so this removes the need to downgrade MatrixSetDiagV2 and MatrixSetDiagV3 ops to MatrixSetDiag. PiperOrigin-RevId: 343133748 Change-Id: Ia60046e17358de72af3b2ac9144fc80d437e930b",tf_ops_a_m.cc,"@@ -2428,6 +2428,24 @@ static LogicalResult Verify(MatrixBandPartOp op) { return success(); } +//===----------------------------------------------------------------------===// +// MatrixSetDiagOp +//===----------------------------------------------------------------------===// +// +void MatrixSetDiagOp::getCanonicalizationPatterns( + OwningRewritePatternList &results, MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// MatrixSetDiagV2Op +//===----------------------------------------------------------------------===// + +void MatrixSetDiagV2Op::getCanonicalizationPatterns( + OwningRewritePatternList &results, MLIRContext *context) { + results.insert(context); +} + //===----------------------------------------------------------------------===// // MaxOp //===----------------------------------------------------------------------===// ",0,train bfe235d23fd60d251a0cfa4325bb1b92bbf47f49,tensorflow/tensorflow,"Cleanup: Use a standard name for the directives annotation. PiperOrigin-RevId: 283450563 Change-Id: I7db4fde547fe9d0d66c7a2e161e481f8add0b5ff",directives.py,"@@ -98,9 +98,9 @@ class DirectivesTransformer(converter.Base): raise ValueError( '""%s"" must be used inside a statement' % directive.__name__) target = self.get_local(ENCLOSING_LOOP) - node_anno = anno.getanno(target, converter.AgAnno.DIRECTIVES, {}) + node_anno = anno.getanno(target, anno.Basic.DIRECTIVES, {}) node_anno[directive] = _map_args(call_node, directive) - anno.setanno(target, converter.AgAnno.DIRECTIVES, node_anno) + anno.setanno(target, anno.Basic.DIRECTIVES, node_anno) return call_node def visit_Name(self, node): ",0,train bfe235d23fd60d251a0cfa4325bb1b92bbf47f49,tensorflow/tensorflow,"Cleanup: Use a standard name for the directives annotation. PiperOrigin-RevId: 283450563 Change-Id: I7db4fde547fe9d0d66c7a2e161e481f8add0b5ff",directives_test.py,"@@ -20,7 +20,6 @@ from __future__ import print_function from tensorflow.python.autograph.converters import directives as directives_converter from tensorflow.python.autograph.core import converter_testing -from tensorflow.python.autograph.core.converter import AgAnno from tensorflow.python.autograph.lang import directives from tensorflow.python.autograph.pyct import anno from tensorflow.python.autograph.pyct import parser @@ -68,7 +67,7 @@ class DirectivesTest(converter_testing.TestCase): node, ctx = self.prepare(test_fn, {'directives': directives}) node = directives_converter.transform(node, ctx) - d = anno.getanno(node.body[1], AgAnno.DIRECTIVES) + d = anno.getanno(node.body[1], anno.Basic.DIRECTIVES) d = d[directives.set_loop_options] self.assertEqual(d['parallel_iterations'].n, 10) self.assertEqual(d['back_prop'].id, 'a') ",0,train bfe235d23fd60d251a0cfa4325bb1b92bbf47f49,tensorflow/tensorflow,"Cleanup: Use a standard name for the directives annotation. PiperOrigin-RevId: 283450563 Change-Id: I7db4fde547fe9d0d66c7a2e161e481f8add0b5ff",converter.py,"@@ -354,15 +354,6 @@ class AnnotatedDef(reaching_definitions.Definition): self.directives = {} -class AgAnno(enum.Enum): - """"""Annotation labels specific to AutoGraph. See anno.py."""""" - - DIRECTIVES = 'User directives associated with the annotated statement.' - - def __repr__(self): - return self.name - - def standard_analysis(node, context, is_initial=False): """"""Performs a complete static analysis of the given code. ",0,train bfe235d23fd60d251a0cfa4325bb1b92bbf47f49,tensorflow/tensorflow,"Cleanup: Use a standard name for the directives annotation. PiperOrigin-RevId: 283450563 Change-Id: I7db4fde547fe9d0d66c7a2e161e481f8add0b5ff",anno.py,"@@ -55,6 +55,8 @@ class Basic(NoValue): ' `name_map` allows renaming symbols.') ORIGIN = ('Information about the source code that converted code originated' ' from. See origin_information.py.') + DIRECTIVES = ('User directives associated with a statement or a variable.' + ' Typically, they affect the immediately-enclosing statement.') class Static(NoValue): ",0,train bfe235d23fd60d251a0cfa4325bb1b92bbf47f49,tensorflow/tensorflow,"Cleanup: Use a standard name for the directives annotation. PiperOrigin-RevId: 283450563 Change-Id: I7db4fde547fe9d0d66c7a2e161e481f8add0b5ff",templates.py,"@@ -120,6 +120,7 @@ class ReplaceTransformer(gast.NodeTransformer): self.preserved_annos = { anno.Basic.ORIGIN, anno.Basic.SKIP_PROCESSING, + anno.Basic.DIRECTIVES, anno.Static.ORIG_DEFINITIONS, 'extra_test', 'function_context_name', ",0,train f9a44a69c35dcf7f1c0f42e1ae9971bae0148099,tensorflow/tensorflow,Update the docs and api_def.,gcs_config_ops.cc,"@@ -21,50 +21,12 @@ namespace tensorflow { REGISTER_OP(""GcsConfigureCredentials"") .Input(""json: string"") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R""doc( -Configures the credentials used by the GCS client of the local TF runtime. - -The json input can be of the format: - -1. Refresh Token: -{ - ""client_id"": """", - ""client_secret"": """", - ""refresh_token: """", - ""type"": ""authorized_user"", -} - -2. Service Account: -{ - ""type"": ""service_account"", - ""project_id"": """", - ""private_key_id"": """", - ""private_key"": ""------BEGIN PRIVATE KEY-----\n\n-----END PRIVATE KEY------\n"", - ""client_email"": ""@.iam.gserviceaccount.com"", - ""client_id"": """", - # Some additional fields elided -} - -Note the credentials established through this method are shared across all -sessions run on this runtime. - -Note be sure to feed the inputs to this op to ensure the credentials are not -stored in a constant op within the graph that might accidentally be checkpointed -or in other ways be persisted or exfiltrated. -)doc""); + .SetShapeFn(shape_inference::NoOutputs); REGISTER_OP(""GcsConfigureBlockCache"") .Input(""max_cache_size: uint64"") .Input(""block_size: uint64"") .Input(""max_staleness: uint64"") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R""doc( -Re-configures the GCS block cache with the new configuration values. - -If the values are the same as already configured values, this op is a no-op. If -they are different, the current contents of the block cache is dropped, and a -new block cache is created fresh. -)doc""); + .SetShapeFn(shape_inference::NoOutputs); } // namespace tensorflow ",0,test fcc9a6ed272d6599d38ae59ae215cff786ad1bea,tensorflow/tensorflow,"Making the third_party ffmpeg decode_audio op resilient to small numbers of decoding failures. Instead of crashing the TF pipeline it will now return an empty tensor. Note that pipelines that want to take advantage of this will have to be modified to handle empty tensors. Change: 128747076",decode_audio_op.cc,"@@ -25,6 +25,7 @@ #include ""tensorflow/core/lib/strings/str_util.h"" #include ""tensorflow/core/lib/strings/strcat.h"" #include ""tensorflow/core/platform/env.h"" +#include ""tensorflow/core/platform/logging.h"" namespace tensorflow { namespace ffmpeg { @@ -112,6 +113,13 @@ class DecodeAudioOp : public OpKernel { context, result.ok(), errors::Unavailable(""FFmpeg must be installed to run this op. FFmpeg "" ""can be found at http://www.ffmpeg.org."")); + } else if (result.code() == error::UNKNOWN) { + LOG(ERROR) << ""Ffmpeg failed with error '"" << result.error_message() + << ""'. Returning empty tensor.""; + Tensor* output = nullptr; + OP_REQUIRES_OK( + context, context->allocate_output(0, TensorShape({0, 0}), &output)); + return; } else { OP_REQUIRES_OK(context, result); } @@ -162,7 +170,8 @@ different from the contents of the file, channels will be merged or created. contents: The binary audio file contents. sampled_audio: A rank 2 tensor containing all tracks of the audio. Dimension 0 - is time and dimension 1 is the channel. + is time and dimension 1 is the channel. If ffmpeg fails to decode the audio + then an empty tensor will be returned. file_format: A string describing the audio file format. This can be ""wav"" or ""mp3"". samples_per_second: The number of samples per second that the audio should have. ",0,test fcc9a6ed272d6599d38ae59ae215cff786ad1bea,tensorflow/tensorflow,"Making the third_party ffmpeg decode_audio op resilient to small numbers of decoding failures. Instead of crashing the TF pipeline it will now return an empty tensor. Note that pipelines that want to take advantage of this will have to be modified to handle empty tensors. Change: 128747076",decode_audio_op_test.py,"@@ -72,6 +72,14 @@ class DecodeAudioOpTest(tf.test.TestCase): def testOgg(self): self._loadFileAndTest('mono_10khz.ogg', 'ogg', 0.57, 10000, 1) + def testInvalidFile(self): + with self.test_session(): + contents = 'invalid file' + audio_op = ffmpeg.decode_audio(contents, file_format='wav', + samples_per_second=10000, channel_count=2) + audio = audio_op.eval() + self.assertEqual(audio.shape, (0, 0)) + if __name__ == '__main__': tf.test.main() ",0,test fcc9a6ed272d6599d38ae59ae215cff786ad1bea,tensorflow/tensorflow,"Making the third_party ffmpeg decode_audio op resilient to small numbers of decoding failures. Instead of crashing the TF pipeline it will now return an empty tensor. Note that pipelines that want to take advantage of this will have to be modified to handle empty tensors. Change: 128747076",ffmpeg_ops.py,"@@ -67,7 +67,8 @@ def decode_audio(contents, file_format=None, samples_per_second=None, Returns: A rank 2 tensor that has time along dimension 0 and channels along dimension 1. Dimension 0 will be `samples_per_second * length` wide, and - dimension 1 will be `channel_count` wide. + dimension 1 will be `channel_count` wide. If ffmpeg fails to decode the + audio then an empty tensor will be returned. """""" return gen_decode_audio_op_py.decode_audio( contents, file_format=file_format, samples_per_second=samples_per_second, ",0,test e0ec3437cfe4bf6ed3ab14d6601f3b7110fc5285,tensorflow/tensorflow,"LinearOperator (base class), prefer statically defined shape if available. Change: 143529651",linear_operator.py,"@@ -21,6 +21,7 @@ from __future__ import print_function import contextlib from tensorflow.contrib import framework as contrib_framework +from tensorflow.contrib.linalg.python.ops import linear_operator_util from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import linalg_ops @@ -258,7 +259,12 @@ class LinearOperator(object): with self._name_scope(name): # Be clean by avoiding adding shape Ops to the graph too many times. if self._cached_shape_dynamic is None: - self._cached_shape_dynamic = self._shape_dynamic() + # Prefer to use statically defined shape if available. + if self.shape.is_fully_defined(): + self._cached_shape_dynamic = linear_operator_util.shape_tensor( + self.shape.as_list()) + else: + self._cached_shape_dynamic = self._shape_dynamic() return self._cached_shape_dynamic @property @@ -291,8 +297,12 @@ class LinearOperator(object): # Derived classes get this ""for free"" once .shape() is implemented. with self._name_scope(name): if self._cached_batch_shape_dynamic is None: - self._cached_batch_shape_dynamic = array_ops.slice( - self.shape_dynamic(), [0], [self.tensor_rank_dynamic() - 2]) + # Prefer to use statically defined shape if available. + if self.batch_shape.is_fully_defined(): + self._cached_batch_shape_dynamic = linear_operator_util.shape_tensor( + self.batch_shape.as_list(), name=""batch_shape"") + else: + self._cached_batch_shape_dynamic = self.shape_dynamic()[:-2] return self._cached_batch_shape_dynamic @property @@ -327,7 +337,13 @@ class LinearOperator(object): # Derived classes get this ""for free"" once .shape() is implemented. with self._name_scope(name): if self._cached_tensor_rank_dynamic is None: - self._cached_tensor_rank_dynamic = array_ops.size(self.shape_dynamic()) + # Prefer to use statically defined shape if available. + if self.tensor_rank is not None: + self._cached_tensor_rank_dynamic = ops.convert_to_tensor( + self.tensor_rank) + else: + self._cached_tensor_rank_dynamic = array_ops.size( + self.shape_dynamic()) return self._cached_tensor_rank_dynamic @property @@ -360,8 +376,12 @@ class LinearOperator(object): # Derived classes get this ""for free"" once .shape() is implemented. with self._name_scope(name): if self._cached_domain_dimension_dynamic is None: - self._cached_domain_dimension_dynamic = array_ops.gather( - self.shape_dynamic(), self.tensor_rank_dynamic() - 1) + # Prefer to use statically defined shape if available. + if self.domain_dimension.value is not None: + self._cached_domain_dimension_dynamic = ops.convert_to_tensor( + self.domain_dimension.value) + else: + self._cached_domain_dimension_dynamic = self.shape_dynamic()[-1] return self._cached_domain_dimension_dynamic @property @@ -394,8 +414,12 @@ class LinearOperator(object): # Derived classes get this ""for free"" once .shape() is implemented. with self._name_scope(name): if self._cached_range_dimension_dynamic is None: - self._cached_range_dimension_dynamic = array_ops.gather( - self.shape_dynamic(), self.tensor_rank_dynamic() - 2) + # Prefer to use statically defined shape if available. + if self.range_dimension.value is not None: + self._cached_range_dimension_dynamic = ops.convert_to_tensor( + self.range_dimension.value) + else: + self._cached_range_dimension_dynamic = self.shape_dynamic()[-2] return self._cached_range_dimension_dynamic def _assert_non_singular(self): ",0,train e0ec3437cfe4bf6ed3ab14d6601f3b7110fc5285,tensorflow/tensorflow,"LinearOperator (base class), prefer statically defined shape if available. Change: 143529651",linear_operator_util.py,"@@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops @@ -64,3 +65,13 @@ def assert_zero_imag_part(x, message=None, name=""assert_zero_imag_part""): zero = ops.convert_to_tensor(0, dtype=dtype.real_dtype) return check_ops.assert_equal(zero, math_ops.imag(x), message=message) + + +def shape_tensor(shape, name=None): + """"""Convert Tensor using default type, unless empty list or tuple."""""" + # Works just like random_ops._ShapeTensor. + if isinstance(shape, (tuple, list)) and not shape: + dtype = dtypes.int32 + else: + dtype = None + return ops.convert_to_tensor(shape, dtype=dtype, name=name) ",0,train 428a2813f2aa5703f8ecdee0f175a13d59707ed5,tensorflow/tensorflow,"Fix broken tensorboard/backend:server_test in OSS. It failed because: - assertSameElements doesn't exist in OSS - json_format.MessageToDict doesn't exist in the OSS. Replaced with the correct ones. Change: 137602799",server_test.py,"@@ -243,7 +243,7 @@ class TensorboardServerTest(tf.test.TestCase): return info_json = self._getJson('/data/plugin/projector/info?run=run1') - self.assertSameElements(info_json['embeddings'], [ + self.assertItemsEqual(info_json['embeddings'], [ { 'tensorShape': [1, 2], 'tensorName': 'var1' ",0,train 428a2813f2aa5703f8ecdee0f175a13d59707ed5,tensorflow/tensorflow,"Fix broken tensorboard/backend:server_test in OSS. It failed because: - assertSameElements doesn't exist in OSS - json_format.MessageToDict doesn't exist in the OSS. Replaced with the correct ones. Change: 137602799",plugin.py,"@@ -170,7 +170,7 @@ class ProjectorPlugin(TBPlugin): if not info.tensor_shape: info.tensor_shape.extend(tensor_shape) - self.handler.respond(json_format.MessageToDict(config), 'application/json') + self.handler.respond(json_format.MessageToJson(config), 'application/json') def _serve_metadata(self, query_params): run = query_params.get('run') ",0,train c955f9804bd9e56c712934c6f4c2b24cfc3a2310,tensorflow/tensorflow,Make *args in sv.loop example an iterable,supervisor.py,"@@ -152,7 +152,7 @@ class Supervisor(object): ... sv = Supervisor(logdir='/tmp/mydir') with sv.managed_session(FLAGS.master) as sess: - sv.loop(60, print_loss, (sess)) + sv.loop(60, print_loss, (sess, )) while not sv.should_stop(): sess.run(my_train_op) ``` ",0,test 60a9676ea1b7645e4d268a09df21147b3381a140,tensorflow/tensorflow,"Convert unicode strings to (byte-)strings in py_func (Python3 compatibility) PiperOrigin-RevId: 170524684",py_func_test.py,"@@ -133,12 +133,34 @@ class PyOpTest(test.TestCase): z, = script_ops.py_func(read_and_return_strings, [x, y], [dtypes.string]) self.assertListEqual(list(z.eval()), [b""hello there"", b""hi there""]) + def testStringsAreConvertedToBytes(self): + + def read_fixed_length_numpy_strings(): + return np.array(["" there""]) + + def read_and_return_strings(x, y): + return x + y + + with self.test_session(): + x = constant_op.constant([""hello"", ""hi""], dtypes.string) + y, = script_ops.py_func(read_fixed_length_numpy_strings, [], + [dtypes.string]) + z, = script_ops.py_func(read_and_return_strings, [x, y], [dtypes.string]) + self.assertListEqual(list(z.eval()), [b""hello there"", b""hi there""]) + def testStringPadding(self): correct = [b""this"", b""is"", b""a"", b""test""] with self.test_session(): s, = script_ops.py_func(lambda: [correct], [], [dtypes.string]) self.assertAllEqual(s.eval(), correct) + def testStringPaddingAreConvertedToBytes(self): + inp = [""this"", ""is"", ""a"", ""test""] + correct = [b""this"", b""is"", b""a"", b""test""] + with self.test_session(): + s, = script_ops.py_func(lambda: [inp], [], [dtypes.string]) + self.assertAllEqual(s.eval(), correct) + def testLarge(self): with self.test_session() as sess: x = array_ops.zeros([1000000], dtype=np.float32) ",0,train 60a9676ea1b7645e4d268a09df21147b3381a140,tensorflow/tensorflow,"Convert unicode strings to (byte-)strings in py_func (Python3 compatibility) PiperOrigin-RevId: 170524684",script_ops.py,"@@ -64,6 +64,8 @@ class FuncRegistry(object): components of a tensor have different lengths. This is bad: ignoring the padding is wrong for text data, and removing the padding is wrong for binary data. To avoid this bug, we redo the conversion using an object dtype. + Additionally, we convert unicode strings to (byte-)strings for Python3 + compatibility. Args: value: Value to convert to a numpy array. @@ -72,9 +74,15 @@ class FuncRegistry(object): A numpy array. """""" result = np.asarray(value, order=""C"") - if result.dtype.char in ""SU"" and result is not value: + if result.dtype.char == ""S"" and result is not value: return np.asarray(value, order=""C"", dtype=object) - return result + elif result.dtype.char == ""U"" and result is not value: + value = np.vectorize(lambda x: x.encode())(value) + return np.asarray(value, order=""C"", dtype=object) + elif result.dtype.char == ""U"": + return result.astype(np.bytes_) + else: + return result def __call__(self, token, args): """"""Calls the registered function for `token` with args."""""" ",0,train 4f5b9455c3fae482e1f0477ff664777778e9da02,tensorflow/tensorflow,"Add test case for GitHub issue 33383 where ignore_erorrs combined with tf.data.Dataset.zip could cause out-of-sync for remaining components. Signed-off-by: Yong Tang ",ignore_errors_test.py,"@@ -126,6 +126,19 @@ class IgnoreErrorsTest(test_base.DatasetTestBase): with self.assertRaises(errors.OutOfRangeError): self.evaluate(get_next()) + def testZipIgnoreError(self): + a = dataset_ops.Dataset.from_tensor_slices([1., 2., 0., 4.]) + b = a.map(lambda x: array_ops.check_numerics(1. / x, ""error"")) + + dataset = dataset_ops.Dataset.zip( + (b, a)).apply(error_ops.ignore_errors()) + get_next = self.getNext(dataset) + + for x in [1., 2., 4.]: + self.assertEqual((1. / x, x), self.evaluate(get_next())) + with self.assertRaises(errors.OutOfRangeError): + self.evaluate(get_next()) + if __name__ == ""__main__"": test.main() ",0,train 7bbbbe8a86ad35c3ca9a864f4df8722508f68b97,tensorflow/tensorflow,"Automatic precision selection. PiperOrigin-RevId: 296910710 Change-Id: I64d9a23f21225bacbb21ed7bf2d51fcb68f7d8e2",performance_profiling.cc,"@@ -22,6 +22,7 @@ limitations under the License. #include ""tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"" #include ""tensorflow/lite/delegates/gpu/cl/environment.h"" #include ""tensorflow/lite/delegates/gpu/cl/inference_context.h"" +#include ""tensorflow/lite/delegates/gpu/cl/model_hints.h"" #include ""tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"" #include ""tensorflow/lite/delegates/gpu/cl/precision.h"" #include ""tensorflow/lite/delegates/gpu/cl/tensor_type.h"" @@ -122,7 +123,9 @@ Status RunModelSample(const std::string& model_name) { RETURN_IF_ERROR(CreateEnvironment(&env)); InferenceContext::CreateInferenceInfo create_info; - create_info.precision = CalculationsPrecision::F16; + create_info.precision = env.IsSupported(CalculationsPrecision::F16) + ? CalculationsPrecision::F16 + : CalculationsPrecision::F32; create_info.storage_type = GetFastestStorageType(env.device()); std::cout << ""Precision: "" << ToString(create_info.precision) << std::endl; std::cout << ""Storage type: "" << ToString(create_info.storage_type) ",0,test 3c4e684b81810bde0bd72fabd149a6083aeff02e,tensorflow/tensorflow,"Turn on VariablePolicy for TPUStrategy. PiperOrigin-RevId: 337271222 Change-Id: Iacebd894d496b01ebf5bc78d07bc2639d3896da5",tpu_strategy.py,"@@ -740,7 +740,7 @@ class TPUExtended(distribute_lib.StrategyExtendedV1): atexit.register(async_wait) # Flag to turn on VariablePolicy - self._use_var_policy = False + self._use_var_policy = True def _validate_colocate_with_variable(self, colocate_with_variable): distribute_utils. validate_colocate(colocate_with_variable, self) ",0,train eb0f97061cf9f13ac611f5807870873360348ae6,tensorflow/tensorflow,"Add migration block for disable_v2_behavior PiperOrigin-RevId: 387851624 Change-Id: I3f37d5e4aee06ecd5d294574b59d2c0c6d6a8949",v2_compat.py,"@@ -93,6 +93,16 @@ def disable_v2_behavior(): TensorFlow 1.x and 2.x to behave as intended for 1.x. User can call this function to disable 2.x behavior during complex migrations. + + @compatibility(TF2) + Using this function indicates that your software is not compatible + with eager execution and `tf.function` in TF2. + + To migrate to TF2, rewrite your code to be compatible with eager execution. + Please refer to the [migration guide] + (https://www.tensorflow.org/guide/migrate) for additional resource on the + topic. + @end_compatibility """""" _v2_behavior_usage_gauge.get_cell(""disable"").set(True) tf2.disable() ",0,train 49002f2e95446e4aa262080839226eb9f47ad43b,tensorflow/tensorflow,"Remove unnecessary __init__ files. PiperOrigin-RevId: 260967386",__init__.py,"@@ -1,32 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the ""License""); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an ""AS IS"" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -""""""estimator python module. - -Importing from tensorflow.python.estimator is unsupported -and will soon break! -"""""" -# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow_estimator.python import estimator - -# Include attrs that start with single underscore. -_HAS_DYNAMIC_ATTRIBUTES = True -estimator.__all__ = [s for s in dir(estimator) if not s.startswith('__')] - -from tensorflow_estimator.python.estimator import * ",0,train 49002f2e95446e4aa262080839226eb9f47ad43b,tensorflow/tensorflow,"Remove unnecessary __init__ files. PiperOrigin-RevId: 260967386",__init__.py,"@@ -1,32 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the ""License""); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an ""AS IS"" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -""""""canned python module. - -Importing from tensorflow.python.estimator is unsupported -and will soon break! -"""""" -# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow_estimator.python.estimator import canned - -# Include attrs that start with single underscore. -_HAS_DYNAMIC_ATTRIBUTES = True -canned.__all__ = [s for s in dir(canned) if not s.startswith('__')] - -from tensorflow_estimator.python.estimator.canned import * ",0,train 49002f2e95446e4aa262080839226eb9f47ad43b,tensorflow/tensorflow,"Remove unnecessary __init__ files. PiperOrigin-RevId: 260967386",__init__.py,"@@ -1,32 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the ""License""); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an ""AS IS"" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -""""""export python module. - -Importing from tensorflow.python.estimator is unsupported -and will soon break! -"""""" -# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow_estimator.python.estimator import export - -# Include attrs that start with single underscore. -_HAS_DYNAMIC_ATTRIBUTES = True -export.__all__ = [s for s in dir(export) if not s.startswith('__')] - -from tensorflow_estimator.python.estimator.export import * ",0,train 49002f2e95446e4aa262080839226eb9f47ad43b,tensorflow/tensorflow,"Remove unnecessary __init__ files. PiperOrigin-RevId: 260967386",__init__.py,"@@ -1,32 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the ""License""); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an ""AS IS"" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -""""""inputs python module. - -Importing from tensorflow.python.estimator is unsupported -and will soon break! -"""""" -# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow_estimator.python.estimator import inputs - -# Include attrs that start with single underscore. -_HAS_DYNAMIC_ATTRIBUTES = True -inputs.__all__ = [s for s in dir(inputs) if not s.startswith('__')] - -from tensorflow_estimator.python.estimator.inputs import * ",0,train 955f41c5f2240495a086b503e54eac6928876aca,tensorflow/tensorflow,"Cleanup `astor` output to match `codegen` output. The default `astor` output messes up the function signature docs for many docs without a bit of cleanup. With this change the only differences I see are parens around lambdas and math expressions in default arguments.",parser.py,"@@ -650,6 +650,9 @@ def _remove_first_line_indent(string): return '\n'.join([line[indent:] for line in string.split('\n')]) +PAREN_NUMBER_RE = re.compile(""^\(([0-9.e-]+)\)"") + + def _generate_signature(func, reverse_index): """"""Given a function, returns a list of strings representing its args. @@ -705,7 +708,11 @@ def _generate_signature(func, reverse_index): if id(default) in reverse_index: default_text = reverse_index[id(default)] elif ast_default is not None: - default_text = astor.to_source(ast_default) + default_text = ( + astor.to_source(ast_default).rstrip('\n').replace('\t','\\t') + .replace('\n','\\n').replace('""""""',""'"")) + default_text = PAREN_NUMBER_RE.sub('\\1',default_text) + if default_text != repr(default): # This may be an internal name. If so, handle the ones we know about. # TODO(wicke): This should be replaced with a lookup in the index. ",0,train 59292f548ccb7454c4e4bf3bb7e3f51eab50251f,tensorflow/tensorflow,Addressing @penpornk's comments,direct_session_with_tracking_alloc_test.cc,"@@ -108,25 +108,9 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelTest) { EXPECT_EQ(2, shape.dim(0).size()); EXPECT_EQ(1, shape.dim(1).size()); if (node->name() == y->name()) { -#if defined(INTEL_MKL) && defined(ENABLE_MKL) - // if MKL is used, it goes through various additional - // graph rewrite pass. In TF, everytime a graph pass - // happens, ""constant"" nodes are allocated - // and deallocated. Each allocation calls the - // (FindChunkPtr of BFCAllocator), - // which increments the value of AllocationId. - // Thus AllocationId becomes more than TF if MKL - // is used. Now IDs for MKL are 8 more than TF. EXPECT_EQ(13, cm->AllocationId(node, 0)); -#else - EXPECT_EQ(13, cm->AllocationId(node, 0)); -#endif // INTEL_MKL && ENABLE_MKL } else { -#if defined(INTEL_MKL) && defined(ENABLE_MKL) - EXPECT_EQ(14, cm->AllocationId(node, 0)); -#else EXPECT_EQ(14, cm->AllocationId(node, 0)); -#endif // INTEL_MKL && ENABLE_MKL } } EXPECT_LE(0, cm->MaxExecutionTime(node)); ",0,train 0349fe6a146c8dbb0d27a3d38729436c8164fffa,tensorflow/tensorflow,"Cosmetic fix to AUC class docstring. PiperOrigin-RevId: 377530540 Change-Id: I6eeb45ece631ef4ce3b1fec354ffb04c01ddf919",metrics.py,"@@ -1971,7 +1971,7 @@ class AUC(Metric): of binary classifiers. Unlike the accuracy, and like cross-entropy losses, ROC-AUC and PR-AUC evaluate all the operational points of a model. - This classes approximates AUCs using a Riemann sum: During the metric + This class approximates AUCs using a Riemann sum. During the metric accumulation phrase, predictions are accumulated within predefined buckets by value. The AUC is then computed by interpolating per-bucket averages. These buckets define the evaluated operational points. ",0,test 552580beb1b5488128053506a03730e3d1ba02ad,tensorflow/tensorflow,Divide by non zero data,math_grad.py,"@@ -447,7 +447,7 @@ def _SegmentProdGrad(op, grad): non_zero_prod = gen_math_ops.segment_prod(non_zero_data, segment_ids) gathered_prod = array_ops.gather(op.outputs[0], segment_ids) gathered_non_zero_prod = array_ops.gather(non_zero_prod, segment_ids) - prod_divided_by_el = gathered_prod / data # May contain nan/inf. + prod_divided_by_el = gathered_prod / non_zero_data # Now fetch the individual results for segments containing 0 and those that # don't. partial_derivative = array_ops.where_v2(is_zero, gathered_non_zero_prod, ",0,train 4dc57fb74b7885a5ef468bc5fced373724d4ac59,tensorflow/tensorflow,"[XLA] Try to validate that shape sizes are sane. This won't catch all overflows, but will do the right thing for the ""normal"" flow. Also fix layout validation to reject padded sparse layouts. PiperOrigin-RevId: 202151215",layout_util.cc,"@@ -248,6 +248,12 @@ Layout CreateDefaultLayoutForRank(int64 rank) { } } + if (layout.format() == SPARSE) { + if (!layout.padded_dimensions().empty()) { + return InvalidArgument(""Sparse layout has padded dimensions""); + } + } + return Status::OK(); } ",0,train 4dc57fb74b7885a5ef468bc5fced373724d4ac59,tensorflow/tensorflow,"[XLA] Try to validate that shape sizes are sane. This won't catch all overflows, but will do the right thing for the ""normal"" flow. Also fix layout validation to reject padded sparse layouts. PiperOrigin-RevId: 202151215",overflow_util.h,"@@ -0,0 +1,50 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the ""License""); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an ""AS IS"" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_OVERFLOW_UTIL_H_ +#define TENSORFLOW_COMPILER_XLA_OVERFLOW_UTIL_H_ + +#include ""tensorflow/core/platform/logging.h"" +#include ""tensorflow/core/platform/macros.h"" +#include ""tensorflow/core/platform/types.h"" + +namespace xla { + +// Multiply two nonnegative int64's, returning negative for overflow +inline int64 MultiplyWithoutOverflow(const int64 x, const int64 y) { + // Multiply in uint64 rather than int64 since signed overflow is undefined. + // Negative values will wrap around to large unsigned values in the casts + // (see section 4.7 [conv.integral] of the C++14 standard). + const uint64 ux = x; + const uint64 uy = y; + const uint64 uxy = ux * uy; + + // Check if we overflow uint64, using a cheap check if both inputs are small + if (TF_PREDICT_FALSE((ux | uy) >> 32 != 0)) { + // Ensure nonnegativity. Note that negative numbers will appear ""large"" + // to the unsigned comparisons above. + CHECK(x >= 0 && y >= 0); + + // Otherwise, detect overflow using a division + if (ux != 0 && uxy / ux != uy) return -1; + } + + // Cast back to signed. Any negative value will signal an error. + return static_cast(uxy); +} + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_OVERFLOW_UTIL_H_ ",0,train 4dc57fb74b7885a5ef468bc5fced373724d4ac59,tensorflow/tensorflow,"[XLA] Try to validate that shape sizes are sane. This won't catch all overflows, but will do the right thing for the ""normal"" flow. Also fix layout validation to reject padded sparse layouts. PiperOrigin-RevId: 202151215",shape_util.cc,"@@ -24,6 +24,7 @@ limitations under the License. #include ""tensorflow/compiler/xla/index_util.h"" #include ""tensorflow/compiler/xla/layout_util.h"" +#include ""tensorflow/compiler/xla/overflow_util.h"" #include ""tensorflow/compiler/xla/primitive_util.h"" #include ""tensorflow/compiler/xla/status_macros.h"" #include ""tensorflow/compiler/xla/types.h"" @@ -885,6 +886,50 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { } } + TF_RETURN_IF_ERROR(ValidateShapeSize(shape)); + return Status::OK(); +} + +/* static */ Status ShapeUtil::ValidateShapeSize(const Shape& shape) { + VLOG(3) << ""Validating shape size: "" << ShapeUtil::HumanString(shape); + auto invalid_argument = + InvalidArgument(""Shape %s size may overflow int64."", + ShapeUtil::HumanString(shape).c_str()); + if (!IsArray(shape)) { + return Status::OK(); + } + int64 shape_size; + if (LayoutUtil::IsSparseArray(shape)) { + shape_size = LayoutUtil::MaxSparseElements(shape.layout()); + shape_size = MultiplyWithoutOverflow(shape_size, ShapeUtil::Rank(shape)); + if (shape_size < 0) { + return invalid_argument; + } + shape_size = MultiplyWithoutOverflow(shape_size, sizeof(int64)); + if (shape_size < 0) { + return invalid_argument; + } + } + + // This is intentionally unconditional: even if the shape is sparse, we want + // to verify the densified version has a reasonable size. + if (shape.dimensions().empty()) { + return Status::OK(); + } + shape_size = 1; + for (int64 dim : shape.dimensions()) { + shape_size = MultiplyWithoutOverflow(shape_size, dim); + if (shape_size < 0) { + return invalid_argument; + } + } + shape_size = MultiplyWithoutOverflow( + shape_size, ByteSizeOfPrimitiveType(shape.element_type())); + if (shape_size < 0) { + return invalid_argument; + } + + VLOG(3) << ""Shape size is valid: "" << shape_size; return Status::OK(); } ",0,train 4dc57fb74b7885a5ef468bc5fced373724d4ac59,tensorflow/tensorflow,"[XLA] Try to validate that shape sizes are sane. This won't catch all overflows, but will do the right thing for the ""normal"" flow. Also fix layout validation to reject padded sparse layouts. PiperOrigin-RevId: 202151215",shape_util.h,"@@ -702,6 +702,10 @@ class ShapeUtil { static size_t Hash(const Shape& shape); private: + // Validates the shape size is sane. This makes sure it's safe to do + // calculations in int64 without overflowing. + static Status ValidateShapeSize(const Shape& shape); + // Validates all of the non-layout properties of the shape -- this is a helper // used by both the layout-optional and layout-required public method. static Status ValidateShapeWithOptionalLayoutInternal(const Shape& shape); ",0,train 604ff7509dcfe452a3baa06b1b500980063d8262,tensorflow/tensorflow,"Always return a list for Graph.collection (including in Python3) PiperOrigin-RevId: 156870745",ops.py,"@@ -2789,7 +2789,7 @@ class Graph(object): @property def collections(self): """"""Returns the names of the collections known to this graph."""""" - return self._collections.keys() + return list(self._collections) def add_to_collection(self, name, value): """"""Stores `value` in the collection with the given `name`. ",0,train fc6510b506731bf2ffc2520e30fba73b79e5b687,tensorflow/tensorflow,"Fix CheckpointSaverHook to properly save every save_checkpoints_steps for TPU workloads. PiperOrigin-RevId: 193266515 (cherry picked from commit 5aba07dce5b9e924183efcd05cd82f2fbb70edc8)",tpu_estimator.py,"@@ -2054,6 +2054,14 @@ class TPUEstimator(estimator_lib.Estimator): }, every_n_secs=30) ] + input_hooks + chief_hooks = [ + training.CheckpointSaverHook( + self.model_dir, + save_secs=self._config.save_checkpoints_secs, + save_steps=self._config.save_checkpoints_steps, + steps_per_run=self._config.tpu_config.iterations_per_loop, + scaffold=scaffold) + ] summary.scalar(model_fn_lib.LOSS_METRIC_KEY, loss) with ops.control_dependencies([loss]): update_ops = _sync_variables_ops() @@ -2067,6 +2075,7 @@ class TPUEstimator(estimator_lib.Estimator): return model_fn_lib.EstimatorSpec( mode, loss=loss, + training_chief_hooks=chief_hooks, training_hooks=hooks, train_op=train_op, scaffold=scaffold) ",0,train fc6510b506731bf2ffc2520e30fba73b79e5b687,tensorflow/tensorflow,"Fix CheckpointSaverHook to properly save every save_checkpoints_steps for TPU workloads. PiperOrigin-RevId: 193266515 (cherry picked from commit 5aba07dce5b9e924183efcd05cd82f2fbb70edc8)",basic_session_run_hooks.py,"@@ -391,7 +391,8 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): saver=None, checkpoint_basename=""model.ckpt"", scaffold=None, - listeners=None): + listeners=None, + steps_per_run=1): """"""Initializes a `CheckpointSaverHook`. Args: @@ -404,6 +405,9 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): listeners: List of `CheckpointSaverListener` subclass instances. Used for callbacks that run immediately before or after this hook saves the checkpoint. + steps_per_run: `int`, number of steps that occur between each invocation + of the hook. Primarily used for TPU workloads which run multiple steps + in a while loop in a single Session.run. Raises: ValueError: One of `save_steps` or `save_secs` should be set. @@ -419,6 +423,7 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): self._timer = SecondOrStepTimer(every_secs=save_secs, every_steps=save_steps) self._listeners = listeners or [] + self._steps_per_run = steps_per_run def begin(self): self._summary_writer = SummaryWriterCache.get(self._checkpoint_dir) @@ -450,7 +455,8 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): def after_run(self, run_context, run_values): stale_global_step = run_values.results - if self._timer.should_trigger_for_step(stale_global_step+1): + if self._timer.should_trigger_for_step( + stale_global_step + self._steps_per_run): # get the real value after train op. global_step = run_context.session.run(self._global_step_tensor) if self._timer.should_trigger_for_step(global_step): ",0,train fc6510b506731bf2ffc2520e30fba73b79e5b687,tensorflow/tensorflow,"Fix CheckpointSaverHook to properly save every save_checkpoints_steps for TPU workloads. PiperOrigin-RevId: 193266515 (cherry picked from commit 5aba07dce5b9e924183efcd05cd82f2fbb70edc8)",basic_session_run_hooks_test.py,"@@ -719,6 +719,99 @@ class CheckpointSaverHookTest(test.TestCase): fake_summary_writer.FakeSummaryWriter.uninstall() +class CheckpointSaverHookMultiStepTest(test.TestCase): + + def setUp(self): + self.model_dir = tempfile.mkdtemp() + self.graph = ops.Graph() + self.steps_per_run = 5 + with self.graph.as_default(): + self.scaffold = monitored_session.Scaffold() + self.global_step = variables.get_or_create_global_step() + self.train_op = training_util._increment_global_step(self.steps_per_run) + + def tearDown(self): + shutil.rmtree(self.model_dir, ignore_errors=True) + + def test_save_steps_saves_in_first_step(self): + with self.graph.as_default(): + hook = basic_session_run_hooks.CheckpointSaverHook( + self.model_dir, + save_steps=2*self.steps_per_run, + scaffold=self.scaffold, + steps_per_run=self.steps_per_run) + hook.begin() + self.scaffold.finalize() + with session_lib.Session() as sess: + sess.run(self.scaffold.init_op) + mon_sess = monitored_session._HookedSession(sess, [hook]) + mon_sess.run(self.train_op) + self.assertEqual(5, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + + def test_save_steps_saves_periodically(self): + with self.graph.as_default(): + hook = basic_session_run_hooks.CheckpointSaverHook( + self.model_dir, + save_steps=2*self.steps_per_run, + scaffold=self.scaffold, + steps_per_run=self.steps_per_run) + hook.begin() + self.scaffold.finalize() + with session_lib.Session() as sess: + sess.run(self.scaffold.init_op) + mon_sess = monitored_session._HookedSession(sess, [hook]) + mon_sess.run(self.train_op) + # Saved (step=5) + self.assertEqual(5, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + + mon_sess.run(self.train_op) + # Not saved (step=10) + self.assertEqual(5, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + + mon_sess.run(self.train_op) + # Saved (step=15) + self.assertEqual(15, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + + mon_sess.run(self.train_op) + # Not saved (step=20) + self.assertEqual(15, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + + mon_sess.run(self.train_op) + # Saved (step=25) + self.assertEqual(25, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + + def test_save_steps_saves_at_end(self): + with self.graph.as_default(): + hook = basic_session_run_hooks.CheckpointSaverHook( + self.model_dir, + save_steps=2*self.steps_per_run, + scaffold=self.scaffold, + steps_per_run=self.steps_per_run) + hook.begin() + self.scaffold.finalize() + with session_lib.Session() as sess: + sess.run(self.scaffold.init_op) + mon_sess = monitored_session._HookedSession(sess, [hook]) + mon_sess.run(self.train_op) + mon_sess.run(self.train_op) + hook.end(sess) + self.assertEqual(10, + checkpoint_utils.load_variable(self.model_dir, + self.global_step.name)) + + class ResourceCheckpointSaverHookTest(test.TestCase): def setUp(self): ",0,train d84acd6e45d5c33743d032885e4f5ee727f57db8,tensorflow/tensorflow,"Remove unused symbols in vars_test. PiperOrigin-RevId: 324686959 Change-Id: If5a8d2ccf6d4baa4e1f19d83a2d54d359c6e6514",vars_test.py,"@@ -26,7 +26,6 @@ from tensorflow.python.distribute import combinations from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.distribute import strategy_combinations from tensorflow.python.distribute import tpu_strategy -from tensorflow.python.distribute import tpu_values from tensorflow.python.distribute import values from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver from tensorflow.python.eager import context @@ -664,26 +663,6 @@ class OnWriteVariableSyncScatterTests(test.TestCase, parameterized.TestCase): self.assertAllEqual([1, 1, 1], self.evaluate(v2.read_value())) -def _make_replica_local(method, strategy=None): - if strategy is None: - devices = (""/device:GPU:0"", ""/device:CPU:0"") - else: - devices = strategy.extended.worker_devices - - v = [] - for d, n, init in zip(devices, [""v"", ""v/replica""], [1., 2.]): - with ops.device(d): - v.append(variable_scope.get_variable( - name=n, initializer=init, use_resource=True)) - - if (strategy is not None) and isinstance(strategy, _TPU_STRATEGIES): - var_cls = tpu_values.TPUSyncOnReadVariable - else: - var_cls = values.SyncOnReadVariable - replica_local = var_cls(strategy, v, method) - return v, replica_local - - class OnReadVariableSyncTest(test.TestCase, parameterized.TestCase): @combinations.generate(strategy_and_run_tf_function_combinations()) @@ -1258,12 +1237,5 @@ class SyncOnReadScatterReplicaTest(test.TestCase, parameterized.TestCase): self.evaluate(distribution.run(v.scatter_min, args=(delta,))) -def _make_index_slices(vals, indices, dense_shape=None): - if dense_shape: - dense_shape = array_ops.identity(dense_shape) - return indexed_slices.IndexedSlices( - array_ops.identity(vals), array_ops.identity(indices), dense_shape) - - if __name__ == ""__main__"": test.main() ",0,train b1109ff54544be100bdb88a6661e8938937cac7b,tensorflow/tensorflow,"Enable a few test targets for tfrt. Disable quantization test since we don't plan to have quantization support in the initial launch. PiperOrigin-RevId: 335143411 Change-Id: I606bacf12bd9b349da304cd97a8acc081dc758f0",bitcast_op_test.py,"@@ -82,6 +82,7 @@ class BitcastTest(test.TestCase): datatype = dtypes.int8 array_ops.bitcast(x, datatype, None) + @test_util.disable_tfrt(""b/169901260"") def testQuantizedType(self): shape = [3, 4] x = np.zeros(shape, np.uint16) ",0,train b1109ff54544be100bdb88a6661e8938937cac7b,tensorflow/tensorflow,"Enable a few test targets for tfrt. Disable quantization test since we don't plan to have quantization support in the initial launch. PiperOrigin-RevId: 335143411 Change-Id: I606bacf12bd9b349da304cd97a8acc081dc758f0",constant_op_test.py,"@@ -456,6 +456,7 @@ class ZerosTest(test.TestCase): self.assertFalse(np.any(z_value)) self.assertEqual((2, 3), z_value.shape) + @test_util.disable_tfrt(""b/169901260"") def testQint8Dtype(self): dtype = dtypes_lib.qint8 z = array_ops.zeros([2, 3], dtype=dtype) @@ -466,6 +467,7 @@ class ZerosTest(test.TestCase): z_value = self.evaluate(math_ops.cast(z, dtypes_lib.int32)) self.assertFalse(np.any(z_value)) + @test_util.disable_tfrt(""b/169901260"") def testQint16Dtype(self): dtype = dtypes_lib.qint16 z = array_ops.zeros([2, 3], dtype=dtype) @@ -650,6 +652,7 @@ class OnesTest(test.TestCase): self.assertEqual([2, 3], z.get_shape()) self.assertAllEqual(z, np.ones([2, 3])) + @test_util.disable_tfrt(""b/169901260"") def testQintDtype(self): @def_function.function(autograph=False) ",0,train b1109ff54544be100bdb88a6661e8938937cac7b,tensorflow/tensorflow,"Enable a few test targets for tfrt. Disable quantization test since we don't plan to have quantization support in the initial launch. PiperOrigin-RevId: 335143411 Change-Id: I606bacf12bd9b349da304cd97a8acc081dc758f0",cwise_ops_binary_test.py,"@@ -991,6 +991,7 @@ class ComparisonOpTest(test.TestCase): [[True, True, True, True, True], [False, False, False, False, False]], values) + @test_util.disable_tfrt(""b/169901260"") def testEqualQuantizeDType(self): dtypes = [ dtypes_lib.qint8, ",0,train b1109ff54544be100bdb88a6661e8938937cac7b,tensorflow/tensorflow,"Enable a few test targets for tfrt. Disable quantization test since we don't plan to have quantization support in the initial launch. PiperOrigin-RevId: 335143411 Change-Id: I606bacf12bd9b349da304cd97a8acc081dc758f0",dynamic_stitch_op_test.py,"@@ -62,6 +62,7 @@ class DynamicStitchTestBase(object): # length. self.assertEqual([None], stitched_t.get_shape().as_list()) + @test_util.disable_tfrt(""b/169901260"") def testSimpleOneDimensional(self): # Test various datatypes in the simple case to ensure that the op was # registered under those types. ",0,train b1109ff54544be100bdb88a6661e8938937cac7b,tensorflow/tensorflow,"Enable a few test targets for tfrt. Disable quantization test since we don't plan to have quantization support in the initial launch. PiperOrigin-RevId: 335143411 Change-Id: I606bacf12bd9b349da304cd97a8acc081dc758f0",spacetodepth_op_test.py,"@@ -309,6 +309,7 @@ class SpaceToDepthTest(test.TestCase): actual_vals, expected_vals = self.evaluate([actual, expected]) self.assertTrue(np.array_equal(actual_vals, expected_vals)) + @test_util.disable_tfrt(""b/169901260"") def testAgainstTranspose(self): self.compareToTranspose(3, 2, 3, 1, 2, ""NHWC"", dtypes.float32, False) self.compareToTranspose(1, 2, 3, 2, 2, ""NHWC"", dtypes.float32, False) ",0,train 4d4794806b565656e3c6a5844be159e84867cd4c,tensorflow/tensorflow,"Update tensorflow/python/keras/callbacks.py Co-Authored-By: aweers <32593524+aweers@users.noreply.github.com>",callbacks.py,"@@ -1025,7 +1025,7 @@ class EarlyStopping(Callback): # Firstly, let's create the callback callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3) # This callback will stop training when there is no improvement in - # validation loss for three epochs + # the validation loss for three consecutive epochs. # then simply train the model with the callback model.fit(data, labels, epochs=100, callbacks=[callback], validation_data=(val_data, val_labels)) ",0,train 62df725269a89a0a5d877eae18d0c83155f2ea9d,tensorflow/tensorflow,"Increase the input dimension size from 4 to 6 to address the RetinaNet model PiperOrigin-RevId: 206235660",import_tensorflow.cc,"@@ -215,7 +215,7 @@ tensorflow::Status ImportFloatArray(const TensorProto& input_tensor, Array* output_array) { CHECK_EQ(input_tensor.dtype(), DT_FLOAT); const auto& input_shape = input_tensor.tensor_shape(); - CHECK_LE(input_shape.dim_size(), 4); + CHECK_LE(input_shape.dim_size(), 6); int input_flat_size; auto status = ImportShape(input_shape.dim(), &input_flat_size, output_array->mutable_shape()); @@ -253,7 +253,7 @@ tensorflow::Status ImportQuint8Array(const TensorProto& input_tensor, Array* output_array) { CHECK_EQ(input_tensor.dtype(), DT_QUINT8); const auto& input_shape = input_tensor.tensor_shape(); - CHECK_LE(input_shape.dim_size(), 4); + CHECK_LE(input_shape.dim_size(), 6); int input_flat_size; auto status = ImportShape(input_shape.dim(), &input_flat_size, output_array->mutable_shape()); @@ -290,7 +290,7 @@ tensorflow::Status ImportInt32Array(const TensorProto& input_tensor, Array* output_array) { CHECK_EQ(input_tensor.dtype(), DT_INT32); const auto& input_shape = input_tensor.tensor_shape(); - CHECK_LE(input_shape.dim_size(), 4); + CHECK_LE(input_shape.dim_size(), 6); int input_flat_size; auto status = ImportShape(input_shape.dim(), &input_flat_size, output_array->mutable_shape()); @@ -326,7 +326,7 @@ tensorflow::Status ImportInt64Array(const TensorProto& input_tensor, Array* output_array) { CHECK_EQ(input_tensor.dtype(), DT_INT64); const auto& input_shape = input_tensor.tensor_shape(); - CHECK_LE(input_shape.dim_size(), 4); + CHECK_LE(input_shape.dim_size(), 6); int input_flat_size; auto status = ImportShape(input_shape.dim(), &input_flat_size, output_array->mutable_shape()); @@ -363,7 +363,7 @@ tensorflow::Status ImportBoolArray(const TensorProto& input_tensor, Array* output_array) { CHECK_EQ(input_tensor.dtype(), DT_BOOL); const auto& input_shape = input_tensor.tensor_shape(); - CHECK_LE(input_shape.dim_size(), 4); + CHECK_LE(input_shape.dim_size(), 6); int input_flat_size; auto status = ImportShape(input_shape.dim(), &input_flat_size, output_array->mutable_shape()); @@ -409,7 +409,7 @@ tensorflow::Status ImportStringArray(const TensorProto& input_tensor, Array* output_array) { CHECK_EQ(input_tensor.dtype(), DT_STRING); const auto& input_shape = input_tensor.tensor_shape(); - CHECK_LE(input_shape.dim_size(), 4); + CHECK_LE(input_shape.dim_size(), 6); int input_flat_size; auto status = ImportShape(input_shape.dim(), &input_flat_size, output_array->mutable_shape()); ",0,train 7322a44ff82a5a44e690b59fec5557ffc8f4ab34,tensorflow/tensorflow,"Add IsEmpty() utility for telling if XSpace is empty. PiperOrigin-RevId: 336768938 Change-Id: Idce1f4dae25bb9a4248ffc50aa2c50d1c90d661f",xplane_utils.cc,"@@ -240,5 +240,16 @@ uint64 GetStartTimestampNs(const XPlane& plane) { return plane_timestamp; } +bool IsEmpty(const XSpace& space) { + for (const auto& plane : space.planes()) { + for (const auto& line : plane.lines()) { + if (!line.events().empty()) { + return false; + } + } + } + return true; +} + } // namespace profiler } // namespace tensorflow ",0,train 7322a44ff82a5a44e690b59fec5557ffc8f4ab34,tensorflow/tensorflow,"Add IsEmpty() utility for telling if XSpace is empty. PiperOrigin-RevId: 336768938 Change-Id: Idce1f4dae25bb9a4248ffc50aa2c50d1c90d661f",xplane_utils.h,"@@ -110,6 +110,9 @@ void MergePlanes(const XPlane& src_plane, XPlane* dst_plane); // timestamps. If zero line exists, return 0; uint64 GetStartTimestampNs(const XPlane& plane); +// Returns true if there are no XEvents. +bool IsEmpty(const XSpace& space); + } // namespace profiler } // namespace tensorflow ",0,train 7530e33cee8fe6555d98a7951006faf51c8b3809,tensorflow/tensorflow,Iterate on copy of the structure we modify.,fusion_bitcast_lift.cc,"@@ -193,7 +193,9 @@ StatusOr FusionBitcastLift::Run(HloModule* module) { i->CloneWithNewOperands(dtyped_new_shape, new_operands)); // Replace the old bitcasts with the new instruction to // remove it. - for (HloInstruction* user: i->users()) { + // Copy the vector as it will be modified while we iterate on it. + const std::vector users = i->users(); + for (HloInstruction* user: users) { TF_RETURN_IF_ERROR(i->parent()->ReplaceInstructionWithDifferentShape( user, cloned_i)); } ",0,train b4a149edf6185bf65a4dde7cc16fb88995056af9,tensorflow/tensorflow,"Allow passing dtype='mixed_float16' to a layer. Before, only dtypes or anything convertible to dtypes could be passed. Now, policy names which are not dtypes can also be passed: 'mixed_float16' and 'mixed_bfloat16"". PiperOrigin-RevId: 360970929 Change-Id: I6b83931d430a6b2dec9dfba7af1792e4a7d9ae8f",distribute_strategy_test.py,"@@ -579,7 +579,7 @@ class TestDistributionStrategyWithNumpyArrays(test.TestCase, return self.v2 + inp with self.cached_session(), distribution.scope(): - layer = MyLayer(dtype=policy.Policy(policy_name)) + layer = MyLayer(dtype=policy_name) def run_fn(): x = np.array([1.]) with backprop.GradientTape() as tape: ",0,train b4a149edf6185bf65a4dde7cc16fb88995056af9,tensorflow/tensorflow,"Allow passing dtype='mixed_float16' to a layer. Before, only dtypes or anything convertible to dtypes could be passed. Now, policy names which are not dtypes can also be passed: 'mixed_float16' and 'mixed_bfloat16"". PiperOrigin-RevId: 360970929 Change-Id: I6b83931d430a6b2dec9dfba7af1792e4a7d9ae8f",base_layer.py,"@@ -2348,6 +2348,11 @@ class Layer(module.Module, version_utils.LayerVersionSelector): self._dtype_policy = dtype elif isinstance(dtype, dict): self._dtype_policy = policy.deserialize(dtype) + elif isinstance(dtype, str) and dtype in ('mixed_float16', + 'mixed_bfloat16'): + # The isinstance check is required since np.dtype raises an error if + # compared to a non-dtype string. + self._dtype_policy = policy.Policy(dtype) elif dtype: self._dtype_policy = policy.Policy(dtypes.as_dtype(dtype).name) else: ",0,train b4a149edf6185bf65a4dde7cc16fb88995056af9,tensorflow/tensorflow,"Allow passing dtype='mixed_float16' to a layer. Before, only dtypes or anything convertible to dtypes could be passed. Now, policy names which are not dtypes can also be passed: 'mixed_float16' and 'mixed_bfloat16"". PiperOrigin-RevId: 360970929 Change-Id: I6b83931d430a6b2dec9dfba7af1792e4a7d9ae8f",base_layer_v1.py,"@@ -1750,6 +1750,11 @@ class Layer(base_layer.Layer): self._dtype_policy = dtype elif isinstance(dtype, dict): self._dtype_policy = policy.deserialize(dtype) + elif isinstance(dtype, str) and dtype in ('mixed_float16', + 'mixed_bfloat16'): + # The isinstance check is required since np.dtype raises an error if + # compared to a non-dtype string. + self._dtype_policy = policy.Policy(dtype) elif dtype: self._dtype_policy = policy.Policy(dtypes.as_dtype(dtype).name) else: ",0,train b4a149edf6185bf65a4dde7cc16fb88995056af9,tensorflow/tensorflow,"Allow passing dtype='mixed_float16' to a layer. Before, only dtypes or anything convertible to dtypes could be passed. Now, policy names which are not dtypes can also be passed: 'mixed_float16' and 'mixed_bfloat16"". PiperOrigin-RevId: 360970929 Change-Id: I6b83931d430a6b2dec9dfba7af1792e4a7d9ae8f",normalization_test.py,"@@ -31,7 +31,6 @@ from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import testing_utils from tensorflow.python.keras.layers import normalization from tensorflow.python.keras.layers import normalization_v2 -from tensorflow.python.keras.mixed_precision import policy from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker_v2 from tensorflow.python.ops import math_ops @@ -166,7 +165,7 @@ class BatchNormalizationTest(keras_parameterized.TestCase): axis=-1, input_shape=(4, 4, 3), momentum=0.8, - dtype=policy.Policy('mixed_float16')) + dtype='mixed_float16') x = np.random.normal(size=(10, 4, 4, 3)) y = norm(x) self.assertEqual(y.dtype, 'float16') @@ -181,7 +180,7 @@ class BatchNormalizationTest(keras_parameterized.TestCase): axis=-1, input_shape=(1, 1, 1), fused=fused, - dtype=policy.Policy('mixed_float16')) + dtype='mixed_float16') x = np.array([-1000., 1000.]).reshape((2, 1, 1, 1)) y = norm(x, training=True) expected_y = np.array([-1.0, 1.0]).reshape((2, 1, 1, 1)) ",0,train b4a149edf6185bf65a4dde7cc16fb88995056af9,tensorflow/tensorflow,"Allow passing dtype='mixed_float16' to a layer. Before, only dtypes or anything convertible to dtypes could be passed. Now, policy names which are not dtypes can also be passed: 'mixed_float16' and 'mixed_bfloat16"". PiperOrigin-RevId: 360970929 Change-Id: I6b83931d430a6b2dec9dfba7af1792e4a7d9ae8f",keras_test.py,"@@ -164,7 +164,7 @@ class KerasLayerTest(keras_parameterized.TestCase): return math_ops.cast(inputs, 'int32') + self.v x = constant_op.constant([1.]) - layer = LayerWithIntVar(dtype=policy.Policy('mixed_float16')) + layer = LayerWithIntVar(dtype='mixed_float16') self.assertEqual(layer(x).dtype, 'int32') @parameterized.named_parameters(*TESTCASES) @@ -239,14 +239,6 @@ class KerasLayerTest(keras_parameterized.TestCase): self.assertEqual(layer(x).dtype, dtypes.float64) self.assertEqual(layer.v.dtype, dtypes.float64) - def test_error_passing_policy_string_to_layer(self): - with self.assertRaisesRegex( - TypeError, ""Cannot convert value 'mixed_float16' to a "" - 'TensorFlow DType'): - # This is not allowed, as otherwise a ""mixed_float16"" policy could be - # created without an API call that has the name ""experimental"" in it. - mp_test_util.MultiplyLayer(dtype='mixed_float16') - @parameterized.named_parameters(*TESTCASES) def test_gradient(self, strategy_fn): x = constant_op.constant([1.]) @@ -344,7 +336,7 @@ class KerasLayerTest(keras_parameterized.TestCase): self.assertEqual(layer(x).dtype, dtype) self.assertEqual(layer.v.dtype, dtype) - layer = mp_test_util.MultiplyLayer(dtype=policy.Policy('mixed_float16')) + layer = mp_test_util.MultiplyLayer(dtype='mixed_float16') config = layer.get_config() self.assertEqual(config['dtype'], {'class_name': 'Policy', @@ -430,7 +422,7 @@ class KerasLayerTest(keras_parameterized.TestCase): self.assertEqual(config['dtype'], 'float16') def test_delete_variable(self): - layer = base_layer.Layer(dtype=policy.Policy('mixed_float16')) + layer = base_layer.Layer(dtype='mixed_float16') layer.x = layer.add_weight('x') self.assertEqual(layer.trainable_weights, [layer.x]) del layer.x @@ -455,7 +447,7 @@ class KerasLayerTest(keras_parameterized.TestCase): 'stop using mixed precision by removing the use of the ' '""mixed_float16"" policy or use a different Strategy, e.g. ' 'a MirroredStrategy.'): - mp_test_util.MultiplyLayer(dtype=policy.Policy('mixed_float16')) + mp_test_util.MultiplyLayer(dtype='mixed_float16') # Non-mixed policies are fine mp_test_util.MultiplyLayer(dtype=policy.Policy('float64')) ",0,train b4a149edf6185bf65a4dde7cc16fb88995056af9,tensorflow/tensorflow,"Allow passing dtype='mixed_float16' to a layer. Before, only dtypes or anything convertible to dtypes could be passed. Now, policy names which are not dtypes can also be passed: 'mixed_float16' and 'mixed_bfloat16"". PiperOrigin-RevId: 360970929 Change-Id: I6b83931d430a6b2dec9dfba7af1792e4a7d9ae8f",policy.py,"@@ -70,8 +70,9 @@ class Policy(object): In the example above, passing `dtype='float32'` to the layer is equivalent to passing `dtype=tf.keras.mixed_precision.Policy('float32')`. In general, - passing a dtype to a layer is equivalent to passing the corresponding policy, - so it is never necessary to explicitly construct a `Policy` object. + passing a dtype policy name to a layer is equivalent to passing the + corresponding policy, so it is never necessary to explicitly construct a + `Policy` object. Note: `Model.compile` will automatically wrap an optimizer with a `tf.keras.mixed_precision.LossScaleOptimizer` if you use the `'mixed_float16'` @@ -145,8 +146,7 @@ class Policy(object): ... # With mixed precision, self.kernel will be casted to float16 ... return tf.linalg.matmul(inputs, self.kernel) ... - >>> dtype_policy = tf.keras.mixed_precision.Policy('mixed_float16') - >>> layer = SimpleDense(dtype=dtype_policy) + >>> layer = SimpleDense(dtype='mixed_float16') >>> y = layer(tf.ones((10, 10))) >>> y.dtype tf.float16 @@ -178,9 +178,7 @@ class Policy(object): ... # occur when adding `inputs` to `rand`. ... rand = tf.random.normal(shape=inputs.shape, dtype=inputs.dtype) ... return inputs + rand - - >>> dtype_policy = tf.keras.mixed_precision.Policy('mixed_float16') - >>> layer = AddRandom(dtype=dtype_policy) + >>> layer = AddRandom(dtype='mixed_float16') >>> y = layer(x) >>> y.dtype tf.float16 ",0,train 389fa0598c5a1d0ffdfefa8ce24aab7d5d0f8864,tensorflow/tensorflow,"Add legalization of HLO reduce to LHLO reduce. PiperOrigin-RevId: 283928453 Change-Id: Ib4d878e41473fe41c1ef20f269542aa0f248b723",hlo_legalize_to_lhlo.cc,"@@ -18,6 +18,7 @@ limitations under the License. #include ""absl/memory/memory.h"" #include ""mlir/Dialect/StandardOps/Ops.h"" // TF:local_config_mlir #include ""mlir/IR/Attributes.h"" // TF:local_config_mlir +#include ""mlir/IR/BlockAndValueMapping.h"" // TF:local_config_mlir #include ""mlir/IR/Builders.h"" // TF:local_config_mlir #include ""mlir/IR/Function.h"" // TF:local_config_mlir #include ""mlir/IR/Location.h"" // TF:local_config_mlir @@ -38,13 +39,19 @@ namespace { constexpr StringRef kTempBufferAttr = ""temp""; -Value* GetTensorStoreMemRef(Value* value) { +Value* GetTensorStoreOrReturnMemRef(Value* value) { for (const auto& user : value->getUsers()) { if (auto tensor_store = dyn_cast(user)) { if (tensor_store.getOperand(0) == value) { return tensor_store.getOperand(1); } } + if (auto return_op = dyn_cast(user)) { + if (return_op.getOperand(0) == value) { + auto block = return_op.getOperation()->getBlock(); + return *block->args_rbegin(); + } + } } return nullptr; } @@ -88,8 +95,8 @@ Value* InsertAllocAndDealloc(Location loc, Value* result, /// function to store that values held in the tensor. Value* GetBufferForResultValue(Location loc, Value* result, ConversionPatternRewriter* rewriter) { - if (auto tensor_store_memref = GetTensorStoreMemRef(result)) { - return tensor_store_memref; + if (auto existing_memref = GetTensorStoreOrReturnMemRef(result)) { + return existing_memref; } return InsertAllocAndDealloc(loc, result, rewriter); } @@ -122,6 +129,62 @@ class HloToLhloOpConverter : public ConversionPattern { } }; +struct HloToLHloReduceConverter + : public OpConversionPattern { + public: + using OpConversionPattern::OpConversionPattern; + + PatternMatchResult matchAndRewrite( + xla_hlo::ReduceOp op, ArrayRef operands, + ConversionPatternRewriter& rewriter) const final { + auto loc = op.getLoc(); + // TODO(b/137624192) Implement variadic reduce. + if (op.getNumResults() != 1) return matchFailure(); + if (op.getParentRegion()->getBlocks().size() != 1) { + emitError(loc, + ""tensor to buffer conversion expects a single block in the "" + ""region containing the operation""); + } + const auto& original_results = op.getResults(); + SmallVector buffer_args(operands.begin(), operands.end()); + for (auto result : original_results) { + buffer_args.push_back(GetBufferForResultValue(loc, result, &rewriter)); + } + auto new_op = rewriter.create( + loc, llvm::None, buffer_args, op.getAttrs()); + + // Copy over the operations inside the region. + rewriter.inlineRegionBefore(op.body(), new_op.body(), new_op.body().end()); + + // Create new block arguments with correct type. + auto& entry_block = new_op.body().front(); + int original_arg_count = entry_block.getNumArguments(); + for (int i = 0; i < original_arg_count; ++i) { + auto old_arg = entry_block.getArgument(i); + auto old_type = old_arg->getType().cast(); + auto new_type = + MemRefType::get(old_type.getShape(), old_type.getElementType()); + auto new_arg = entry_block.addArgument(new_type); + rewriter.replaceUsesOfBlockArgument(old_arg, new_arg); + } + // Add an argument for the result. + entry_block.addArgument( + entry_block.getArgument(original_arg_count)->getType()); + // Remove the old arguments. + for (int i = original_arg_count - 1; i >= 0; --i) { + entry_block.eraseArgument(i); + } + // Insert terminator at the end. + rewriter.setInsertionPointToEnd(&entry_block); + rewriter.create(loc); + + rewriter.replaceOp(op, ArrayRef(buffer_args).slice(operands.size()), + llvm::to_vector<4>(original_results)); + + return matchSuccess(); + } +}; + class HloToLhloTensorLoadConverter : public ConversionPattern { public: explicit HloToLhloTensorLoadConverter(MLIRContext* context) @@ -135,6 +198,7 @@ class HloToLhloTensorLoadConverter : public ConversionPattern { } }; +// TODO(b/137624192): Rewrite into a copy and elide copy if possible. class HloToLhloTensorStoreConverter : public ConversionPattern { public: explicit HloToLhloTensorStoreConverter(MLIRContext* context) @@ -148,6 +212,19 @@ class HloToLhloTensorStoreConverter : public ConversionPattern { } }; +// TODO(b/137624192): Rewrite into a copy and elide copy if possible. +class HloToLhloReturnConverter : public OpConversionPattern { + public: + using OpConversionPattern::OpConversionPattern; + + PatternMatchResult matchAndRewrite( + xla_hlo::ReturnOp op, ArrayRef operands, + ConversionPatternRewriter& rewriter) const final { + rewriter.eraseOp(op); + return matchSuccess(); + } +}; + // Lowers from HLO dialect to LHLO dialect allocating/deallocating temporary // buffers if necessary. // @@ -215,6 +292,7 @@ void populateHLOToLHLOConversionPattern(MLIRContext* context, xla_lhlo::BroadcastInDimOp>, HloToLhloOpConverter, HloToLhloOpConverter, + HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, @@ -229,6 +307,7 @@ void populateHLOToLHLOConversionPattern(MLIRContext* context, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, + HloToLHloReduceConverter, HloToLhloReturnConverter, HloToLhloTensorLoadConverter, HloToLhloTensorStoreConverter >(context); // clang-format on ",0,train 389fa0598c5a1d0ffdfefa8ce24aab7d5d0f8864,tensorflow/tensorflow,"Add legalization of HLO reduce to LHLO reduce. PiperOrigin-RevId: 283928453 Change-Id: Ib4d878e41473fe41c1ef20f269542aa0f248b723",kernel_lowering.cc,"@@ -65,8 +65,8 @@ struct FusionToLhloConverter mlir::OwningRewritePatternList patterns; mlir::ConversionTarget target(ctx); target.addLegalDialect<::mlir::xla_lhlo::XlaLhloDialect>(); - ::mlir::xla_hlo::populateHLOToLHLOConversionPattern(&ctx, &patterns); + getFunction().walk([&](FusionOp op) { if (failed(applyPartialConversion(op, target, patterns, nullptr))) { signalPassFailure(); ",0,train 389fa0598c5a1d0ffdfefa8ce24aab7d5d0f8864,tensorflow/tensorflow,"Add legalization of HLO reduce to LHLO reduce. PiperOrigin-RevId: 283928453 Change-Id: Ib4d878e41473fe41c1ef20f269542aa0f248b723",mlir_gpu_lhlo_gen_test.cc,"@@ -255,45 +255,44 @@ ENTRY %AddMultiply (x: f32[2,2], y: f32[2,2], z: f32[2,2]) -> f32[2,2] { LoweringStage::GPU); } -// TODO(herhut): Re-enable once we can lower hlo_reduce to proper lhlo_reduce. -// TEST_F(LhloGenTest, FusedReduce) { -// CompileAndVerifyIr(R""( -// HloModule FusedReduce -// -// %add (x: f32[], y: f32[]) -> f32[] { -// %x = f32[] parameter(0) -// %y = f32[] parameter(1) -// ROOT %add = f32[] add(f32[] %x, f32[] %y) -// } -// -// %fused_computation (param: f32[100,10]) -> f32[10] { -// %param = f32[100,10] parameter(0) -// %constant = f32[] constant(0) -// ROOT %reduce = f32[10]{0} reduce(f32[100,10]{1,0} %param, f32[] %constant), -// dimensions={0}, to_apply=%add -// } -// -// ENTRY %FusedReduce (x: f32[100,10]) -> f32[10] { -// %x = f32[100,10] parameter(0) -// ROOT %fusion = f32[10]{0} fusion(f32[100,10]{1,0} %x), kind=kInput, -// calls=%fused_computation -// } -// )"", -// R""( -// ;CHECK: func @fusion(%[[ARG0:.*]]: [[TYPE:.*]], %[[RESULT:.*]]: [[RTYPE:.*]]) -// ;CHECK: ""xla_lhlo.fusion""() ( { -// ;CHECK: %[[REF0:.*]] = tensor_load %arg0 : [[TYPE]] -// ;CHECK: %[[CT0:.*]] = xla_hlo.constant dense<0.000000e+00> -// ;CHECK: %[[RED:.*]] = ""xla_hlo.reduce""(%0, %1) ( { -// ;CHECK: ^bb0(%[[BARG0:.*]]: [[ETYPE:.*]], %[[BARG1:.*]]: [[ETYPE]]) -// ;CHECK: %[[ADD:.*]] = xla_hlo.add %[[BARG0]], %[[BARG1]] : [[ETYPE]] -// ;CHECK: ""xla_hlo.return""(%[[ADD]]) -// ;CHECK: }) -// ;CHECK: tensor_store %[[RED]], %[[RESULT]] : [[RTYPE]] -// ;CHECK: ""xla_lhlo.terminator""() -// ;CHECK-NEXT: }) -// )""); -// } +TEST_F(LhloGenTest, FusedReduce) { + CompileAndVerifyIr(R""( +HloModule FusedReduce + +%add (x: f32[], y: f32[]) -> f32[] { + %x = f32[] parameter(0) + %y = f32[] parameter(1) + ROOT %add = f32[] add(f32[] %x, f32[] %y) +} + +%fused_computation (param: f32[100,10]) -> f32[10] { + %param = f32[100,10] parameter(0) + %constant = f32[] constant(0) + ROOT %reduce = f32[10]{0} reduce(f32[100,10]{1,0} %param, f32[] %constant), + dimensions={0}, to_apply=%add +} + +ENTRY %FusedReduce (x: f32[100,10]) -> f32[10] { + %x = f32[100,10] parameter(0) + ROOT %fusion = f32[10]{0} fusion(f32[100,10]{1,0} %x), kind=kInput, + calls=%fused_computation +} +)"", + R""( +;CHECK: func @fusion(%[[ARG0:.*]]: [[TYPE:.*]], %[[RESULT:.*]]: [[RTYPE:.*]]) +;CHECK: ""xla_lhlo.fusion""() ( { +;CHECK: %[[REF0:.*]] = tensor_load %arg0 : [[TYPE]] +;CHECK: %[[CT0:.*]] = xla_hlo.constant dense<0.000000e+00> +;CHECK: %[[RED:.*]] = ""xla_hlo.reduce""(%0, %1) ( { +;CHECK: ^bb0(%[[BARG0:.*]]: [[ETYPE:.*]], %[[BARG1:.*]]: [[ETYPE]]) +;CHECK: %[[ADD:.*]] = xla_hlo.add %[[BARG0]], %[[BARG1]] : [[ETYPE]] +;CHECK: ""xla_hlo.return""(%[[ADD]]) +;CHECK: }) +;CHECK: tensor_store %[[RED]], %[[RESULT]] : [[RTYPE]] +;CHECK: ""xla_lhlo.terminator""() +;CHECK-NEXT: }) + )""); +} TEST_F(LhloGenTest, Broadcast) { CompileAndVerifyIr(R""( ",0,train 4a265a6f3a8ea441e6135da03aafa773bbce5505,tensorflow/tensorflow,"Ensure `min_node_weight` is scalar in `BoostedTreesCalculateBestFeatureSplitV2` PiperOrigin-RevId: 411085102 Change-Id: Ibd511f4b224452cbe235e3d2359f384c839ea558",stats_ops.cc,"@@ -736,6 +736,10 @@ class BoostedTreesCalculateBestFeatureSplitV2 : public OpKernel { const Tensor* min_node_weight_t; OP_REQUIRES_OK(context, context->input(""min_node_weight"", &min_node_weight_t)); + OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_node_weight_t->shape()), + errors::InvalidArgument( + ""min_node_weight must be a scalar, got a tensor of shape "", + min_node_weight_t->shape().DebugString())); const auto min_node_weight = min_node_weight_t->scalar()(); std::vector output_node_ids; ",0,train 75a5cb4e1f6b82c24e9c3ca011ea94ec4c459a48,tensorflow/tensorflow,"Rollback the change of interpreter_wrapper which breaks TF Serving. PiperOrigin-RevId: 410667131 Change-Id: I54dd3831ba9595d0af34ebc8160608002bf31c84",interpreter_wrapper.cc,"@@ -258,12 +258,8 @@ InterpreterWrapper::~InterpreterWrapper() {} PyObject* InterpreterWrapper::AllocateTensors(int subgraph_index) { TFLITE_PY_ENSURE_VALID_INTERPRETER(); - if (subgraph_index == InterpreterWrapper::kUndeterminedSubgraphIndex) { - TFLITE_PY_CHECK(interpreter_->AllocateTensors()); - } else { - TFLITE_PY_SUBGRAPH_BOUNDS_CHECK(subgraph_index); - TFLITE_PY_CHECK(interpreter_->subgraph(subgraph_index)->AllocateTensors()); - } + TFLITE_PY_SUBGRAPH_BOUNDS_CHECK(subgraph_index); + TFLITE_PY_CHECK(interpreter_->subgraph(subgraph_index)->AllocateTensors()); Py_RETURN_NONE; } ",0,train 75a5cb4e1f6b82c24e9c3ca011ea94ec4c459a48,tensorflow/tensorflow,"Rollback the change of interpreter_wrapper which breaks TF Serving. PiperOrigin-RevId: 410667131 Change-Id: I54dd3831ba9595d0af34ebc8160608002bf31c84",interpreter_wrapper.h,"@@ -44,8 +44,6 @@ class InterpreterWrapper { public: using Model = FlatBufferModel; - static constexpr int kUndeterminedSubgraphIndex = -1; - // SWIG caller takes ownership of pointer. static InterpreterWrapper* CreateWrapperCPPFromFile( const char* model_path, int op_resolver_id, ",0,train 75a5cb4e1f6b82c24e9c3ca011ea94ec4c459a48,tensorflow/tensorflow,"Rollback the change of interpreter_wrapper which breaks TF Serving. PiperOrigin-RevId: 410667131 Change-Id: I54dd3831ba9595d0af34ebc8160608002bf31c84",interpreter_wrapper_pybind11.cc,"@@ -94,8 +94,7 @@ PYBIND11_MODULE(_pywrap_tensorflow_interpreter_wrapper, m) { [](InterpreterWrapper& self, int subgraph_index) { return tensorflow::PyoOrThrow(self.AllocateTensors(subgraph_index)); }, - py::arg(""subgraph_index"") = - InterpreterWrapper::kUndeterminedSubgraphIndex) + py::arg(""subgraph_index"") = 0) .def( ""Invoke"", [](InterpreterWrapper& self, int subgraph_index) { ",0,train 3db21177223b70103644c0a87299cf194e8f2c6c,tensorflow/tensorflow,"Update GraphDef version to 840. PiperOrigin-RevId: 387057910 Change-Id: I1fbfceb78e988f6a4ca133e86b0076878cdfe547",version.h,"@@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 839 // Updated: 2021/7/26 +#define TF_GRAPH_DEF_VERSION 840 // Updated: 2021/7/27 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // ",0,test fab3f8548264590133d2f49c75ed9c0c0ab83f28,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-11-28 PiperOrigin-RevId: 344594007 Change-Id: I99aa6a0b381bff28ab595736a77169cd7b060724",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 11, 27) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 11, 28) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS"" _FORWARD_COMPATIBILITY_DATE_NUMBER = None ",0,train