Please, help us to better know about our user community by answering the following short survey: https://forms.gle/wpyrxWi18ox9Z5ae9
TensorPadding.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
12 
13 namespace Eigen {
14 
22 namespace internal {
23 template<typename PaddingDimensions, typename XprType>
24 struct traits<TensorPaddingOp<PaddingDimensions, XprType> > : public traits<XprType>
25 {
26  typedef typename XprType::Scalar Scalar;
27  typedef traits<XprType> XprTraits;
28  typedef typename XprTraits::StorageKind StorageKind;
29  typedef typename XprTraits::Index Index;
30  typedef typename XprType::Nested Nested;
31  typedef typename remove_reference<Nested>::type _Nested;
32  static const int NumDimensions = XprTraits::NumDimensions;
33  static const int Layout = XprTraits::Layout;
34  typedef typename XprTraits::PointerType PointerType;
35 };
36 
37 template<typename PaddingDimensions, typename XprType>
38 struct eval<TensorPaddingOp<PaddingDimensions, XprType>, Eigen::Dense>
39 {
40  typedef const TensorPaddingOp<PaddingDimensions, XprType>& type;
41 };
42 
43 template<typename PaddingDimensions, typename XprType>
44 struct nested<TensorPaddingOp<PaddingDimensions, XprType>, 1, typename eval<TensorPaddingOp<PaddingDimensions, XprType> >::type>
45 {
46  typedef TensorPaddingOp<PaddingDimensions, XprType> type;
47 };
48 
49 } // end namespace internal
50 
51 
52 
53 template<typename PaddingDimensions, typename XprType>
54 class TensorPaddingOp : public TensorBase<TensorPaddingOp<PaddingDimensions, XprType>, ReadOnlyAccessors>
55 {
56  public:
57  typedef typename Eigen::internal::traits<TensorPaddingOp>::Scalar Scalar;
58  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
59  typedef typename XprType::CoeffReturnType CoeffReturnType;
60  typedef typename Eigen::internal::nested<TensorPaddingOp>::type Nested;
61  typedef typename Eigen::internal::traits<TensorPaddingOp>::StorageKind StorageKind;
62  typedef typename Eigen::internal::traits<TensorPaddingOp>::Index Index;
63 
64  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType& expr, const PaddingDimensions& padding_dims, const Scalar padding_value)
65  : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {}
66 
67  EIGEN_DEVICE_FUNC
68  const PaddingDimensions& padding() const { return m_padding_dims; }
69  EIGEN_DEVICE_FUNC
70  Scalar padding_value() const { return m_padding_value; }
71 
72  EIGEN_DEVICE_FUNC
73  const typename internal::remove_all<typename XprType::Nested>::type&
74  expression() const { return m_xpr; }
75 
76  protected:
77  typename XprType::Nested m_xpr;
78  const PaddingDimensions m_padding_dims;
79  const Scalar m_padding_value;
80 };
81 
82 
83 // Eval as rvalue
84 template<typename PaddingDimensions, typename ArgType, typename Device>
85 struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device>
86 {
87  typedef TensorPaddingOp<PaddingDimensions, ArgType> XprType;
88  typedef typename XprType::Index Index;
89  static const int NumDims = internal::array_size<PaddingDimensions>::value;
90  typedef DSizes<Index, NumDims> Dimensions;
91  typedef typename XprType::Scalar Scalar;
92  typedef typename XprType::CoeffReturnType CoeffReturnType;
93  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
94  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
95  typedef StorageMemory<CoeffReturnType, Device> Storage;
96  typedef typename Storage::Type EvaluatorPointerType;
97 
98  enum {
99  IsAligned = true,
100  PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
101  BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
102  PreferBlockAccess = true,
103  Layout = TensorEvaluator<ArgType, Device>::Layout,
104  CoordAccess = true,
105  RawAccess = false
106  };
107 
108  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
109 
110  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
111  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
112  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
113 
114  typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
115  Layout, Index>
116  TensorBlock;
117  //===--------------------------------------------------------------------===//
118 
119  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
120  : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()), m_device(device)
121  {
122  // The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead
123  // to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector
124  // of 1 element first and then pad.
125  EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
126 
127  // Compute dimensions
128  m_dimensions = m_impl.dimensions();
129  for (int i = 0; i < NumDims; ++i) {
130  m_dimensions[i] += m_padding[i].first + m_padding[i].second;
131  }
132  const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
133  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
134  m_inputStrides[0] = 1;
135  m_outputStrides[0] = 1;
136  for (int i = 1; i < NumDims; ++i) {
137  m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
138  m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
139  }
140  m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
141  } else {
142  m_inputStrides[NumDims - 1] = 1;
143  m_outputStrides[NumDims] = 1;
144  for (int i = NumDims - 2; i >= 0; --i) {
145  m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
146  m_outputStrides[i+1] = m_outputStrides[i+2] * m_dimensions[i+1];
147  }
148  m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0];
149  }
150  }
151 
152  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
153 
154  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
155  m_impl.evalSubExprsIfNeeded(NULL);
156  return true;
157  }
158 
159 #ifdef EIGEN_USE_THREADS
160  template <typename EvalSubExprsCallback>
161  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
162  EvaluatorPointerType, EvalSubExprsCallback done) {
163  m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
164  }
165 #endif // EIGEN_USE_THREADS
166 
167  EIGEN_STRONG_INLINE void cleanup() {
168  m_impl.cleanup();
169  }
170 
171  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
172  {
173  eigen_assert(index < dimensions().TotalSize());
174  Index inputIndex = 0;
175  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
176  EIGEN_UNROLL_LOOP
177  for (int i = NumDims - 1; i > 0; --i) {
178  const Index idx = index / m_outputStrides[i];
179  if (isPaddingAtIndexForDim(idx, i)) {
180  return m_paddingValue;
181  }
182  inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
183  index -= idx * m_outputStrides[i];
184  }
185  if (isPaddingAtIndexForDim(index, 0)) {
186  return m_paddingValue;
187  }
188  inputIndex += (index - m_padding[0].first);
189  } else {
190  EIGEN_UNROLL_LOOP
191  for (int i = 0; i < NumDims - 1; ++i) {
192  const Index idx = index / m_outputStrides[i+1];
193  if (isPaddingAtIndexForDim(idx, i)) {
194  return m_paddingValue;
195  }
196  inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
197  index -= idx * m_outputStrides[i+1];
198  }
199  if (isPaddingAtIndexForDim(index, NumDims-1)) {
200  return m_paddingValue;
201  }
202  inputIndex += (index - m_padding[NumDims-1].first);
203  }
204  return m_impl.coeff(inputIndex);
205  }
206 
207  template<int LoadMode>
208  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
209  {
210  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
211  return packetColMajor(index);
212  }
213  return packetRowMajor(index);
214  }
215 
216  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
217  TensorOpCost cost = m_impl.costPerCoeff(vectorized);
218  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
219  EIGEN_UNROLL_LOOP
220  for (int i = 0; i < NumDims; ++i)
221  updateCostPerDimension(cost, i, i == 0);
222  } else {
223  EIGEN_UNROLL_LOOP
224  for (int i = NumDims - 1; i >= 0; --i)
225  updateCostPerDimension(cost, i, i == NumDims - 1);
226  }
227  return cost;
228  }
229 
230  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
231  internal::TensorBlockResourceRequirements getResourceRequirements() const {
232  const size_t target_size = m_device.lastLevelCacheSize();
233  return internal::TensorBlockResourceRequirements::merge(
234  internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
235  m_impl.getResourceRequirements());
236  }
237 
238  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
239  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
240  bool /*root_of_expr_ast*/ = false) const {
241  // If one of the dimensions is zero, return empty block view.
242  if (desc.size() == 0) {
243  return TensorBlock(internal::TensorBlockKind::kView, NULL,
244  desc.dimensions());
245  }
246 
247  static const bool IsColMajor = Layout == static_cast<int>(ColMajor);
248  const int inner_dim_idx = IsColMajor ? 0 : NumDims - 1;
249 
250  Index offset = desc.offset();
251 
252  // Compute offsets in the output tensor corresponding to the desc.offset().
253  DSizes<Index, NumDims> output_offsets;
254  for (int i = NumDims - 1; i > 0; --i) {
255  const int dim = IsColMajor ? i : NumDims - i - 1;
256  const int stride_dim = IsColMajor ? dim : dim + 1;
257  output_offsets[dim] = offset / m_outputStrides[stride_dim];
258  offset -= output_offsets[dim] * m_outputStrides[stride_dim];
259  }
260  output_offsets[inner_dim_idx] = offset;
261 
262  // Offsets in the input corresponding to output offsets.
263  DSizes<Index, NumDims> input_offsets = output_offsets;
264  for (int i = 0; i < NumDims; ++i) {
265  const int dim = IsColMajor ? i : NumDims - i - 1;
266  input_offsets[dim] = input_offsets[dim] - m_padding[dim].first;
267  }
268 
269  // Compute offset in the input buffer (at this point it might be illegal and
270  // point outside of the input buffer, because we don't check for negative
271  // offsets, it will be autocorrected in the block iteration loop below).
272  Index input_offset = 0;
273  for (int i = 0; i < NumDims; ++i) {
274  const int dim = IsColMajor ? i : NumDims - i - 1;
275  input_offset += input_offsets[dim] * m_inputStrides[dim];
276  }
277 
278  // Destination buffer and scratch buffer both indexed from 0 and have the
279  // same dimensions as the requested block (for destination buffer this
280  // property is guaranteed by `desc.destination()`).
281  Index output_offset = 0;
282  const DSizes<Index, NumDims> output_strides =
283  internal::strides<Layout>(desc.dimensions());
284 
285  // NOTE(ezhulenev): We initialize bock iteration state for `NumDims - 1`
286  // dimensions, skipping innermost dimension. In theory it should be possible
287  // to squeeze matching innermost dimensions, however in practice that did
288  // not show any improvements in benchmarks. Also in practice first outer
289  // dimension usually has padding, and will prevent squeezing.
290 
291  // Initialize output block iterator state. Dimension in this array are
292  // always in inner_most -> outer_most order (col major layout).
293  array<BlockIteratorState, NumDims - 1> it;
294  for (int i = 0; i < NumDims - 1; ++i) {
295  const int dim = IsColMajor ? i + 1 : NumDims - i - 2;
296  it[i].count = 0;
297  it[i].size = desc.dimension(dim);
298 
299  it[i].input_stride = m_inputStrides[dim];
300  it[i].input_span = it[i].input_stride * (it[i].size - 1);
301 
302  it[i].output_stride = output_strides[dim];
303  it[i].output_span = it[i].output_stride * (it[i].size - 1);
304  }
305 
306  const Index input_inner_dim_size =
307  static_cast<Index>(m_impl.dimensions()[inner_dim_idx]);
308 
309  // Total output size.
310  const Index output_size = desc.size();
311 
312  // We will fill inner dimension of this size in the output. It might be
313  // larger than the inner dimension in the input, so we might have to pad
314  // before/after we copy values from the input inner dimension.
315  const Index output_inner_dim_size = desc.dimension(inner_dim_idx);
316 
317  // How many values to fill with padding BEFORE reading from the input inner
318  // dimension.
319  const Index output_inner_pad_before_size =
320  input_offsets[inner_dim_idx] < 0
321  ? numext::mini(numext::abs(input_offsets[inner_dim_idx]),
322  output_inner_dim_size)
323  : 0;
324 
325  // How many values we can actually copy from the input inner dimension.
326  const Index output_inner_copy_size = numext::mini(
327  // Want to copy from input.
328  (output_inner_dim_size - output_inner_pad_before_size),
329  // Can copy from input.
330  numext::maxi(input_inner_dim_size - (input_offsets[inner_dim_idx] +
331  output_inner_pad_before_size),
332  Index(0)));
333 
334  eigen_assert(output_inner_copy_size >= 0);
335 
336  // How many values to fill with padding AFTER reading from the input inner
337  // dimension.
338  const Index output_inner_pad_after_size =
339  (output_inner_dim_size - output_inner_copy_size -
340  output_inner_pad_before_size);
341 
342  // Sanity check, sum of all sizes must be equal to the output size.
343  eigen_assert(output_inner_dim_size ==
344  (output_inner_pad_before_size + output_inner_copy_size +
345  output_inner_pad_after_size));
346 
347  // Keep track of current coordinates and padding in the output.
348  DSizes<Index, NumDims> output_coord = output_offsets;
349  DSizes<Index, NumDims> output_padded;
350  for (int i = 0; i < NumDims; ++i) {
351  const int dim = IsColMajor ? i : NumDims - i - 1;
352  output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
353  }
354 
355  typedef internal::StridedLinearBufferCopy<ScalarNoConst, Index> LinCopy;
356 
357  // Prepare storage for the materialized padding result.
358  const typename TensorBlock::Storage block_storage =
359  TensorBlock::prepareStorage(desc, scratch);
360 
361  // TODO(ezhulenev): Squeeze multiple non-padded inner dimensions into a
362  // single logical inner dimension.
363 
364  // When possible we squeeze writes for the innermost (only if non-padded)
365  // dimension with the first padded dimension. This allows to reduce the
366  // number of calls to LinCopy and better utilize vector instructions.
367  const bool squeeze_writes =
368  NumDims > 1 &&
369  // inner dimension is not padded
370  (input_inner_dim_size == m_dimensions[inner_dim_idx]) &&
371  // and equal to the block inner dimension
372  (input_inner_dim_size == output_inner_dim_size);
373 
374  const int squeeze_dim = IsColMajor ? inner_dim_idx + 1 : inner_dim_idx - 1;
375 
376  // Maximum coordinate on a squeeze dimension that we can write to.
377  const Index squeeze_max_coord =
378  squeeze_writes ? numext::mini(
379  // max non-padded element in the input
380  static_cast<Index>(m_dimensions[squeeze_dim] -
381  m_padding[squeeze_dim].second),
382  // max element in the output buffer
383  static_cast<Index>(output_offsets[squeeze_dim] +
384  desc.dimension(squeeze_dim)))
385  : static_cast<Index>(0);
386 
387  // Iterate copying data from `m_impl.data()` to the output buffer.
388  for (Index size = 0; size < output_size;) {
389  // Detect if we are in the padded region (exclude innermost dimension).
390  bool is_padded = false;
391  for (int j = 1; j < NumDims; ++j) {
392  const int dim = IsColMajor ? j : NumDims - j - 1;
393  is_padded = output_padded[dim];
394  if (is_padded) break;
395  }
396 
397  if (is_padded) {
398  // Fill single innermost dimension with padding value.
399  size += output_inner_dim_size;
400 
401  LinCopy::template Run<LinCopy::Kind::FillLinear>(
402  typename LinCopy::Dst(output_offset, 1, block_storage.data()),
403  typename LinCopy::Src(0, 0, &m_paddingValue),
404  output_inner_dim_size);
405 
406 
407  } else if (squeeze_writes) {
408  // Squeeze multiple reads from innermost dimensions.
409  const Index squeeze_num = squeeze_max_coord - output_coord[squeeze_dim];
410  size += output_inner_dim_size * squeeze_num;
411 
412  // Copy `squeeze_num` inner dimensions from input to output.
413  LinCopy::template Run<LinCopy::Kind::Linear>(
414  typename LinCopy::Dst(output_offset, 1, block_storage.data()),
415  typename LinCopy::Src(input_offset, 1, m_impl.data()),
416  output_inner_dim_size * squeeze_num);
417 
418  // Update iteration state for only `squeeze_num - 1` processed inner
419  // dimensions, because we have another iteration state update at the end
420  // of the loop that will update iteration state for the last inner
421  // processed dimension.
422  it[0].count += (squeeze_num - 1);
423  input_offset += it[0].input_stride * (squeeze_num - 1);
424  output_offset += it[0].output_stride * (squeeze_num - 1);
425  output_coord[squeeze_dim] += (squeeze_num - 1);
426 
427  } else {
428  // Single read from innermost dimension.
429  size += output_inner_dim_size;
430 
431  { // Fill with padding before copying from input inner dimension.
432  const Index out = output_offset;
433 
434  LinCopy::template Run<LinCopy::Kind::FillLinear>(
435  typename LinCopy::Dst(out, 1, block_storage.data()),
436  typename LinCopy::Src(0, 0, &m_paddingValue),
437  output_inner_pad_before_size);
438  }
439 
440  { // Copy data from input inner dimension.
441  const Index out = output_offset + output_inner_pad_before_size;
442  const Index in = input_offset + output_inner_pad_before_size;
443 
444  eigen_assert(output_inner_copy_size == 0 || m_impl.data() != NULL);
445 
446  LinCopy::template Run<LinCopy::Kind::Linear>(
447  typename LinCopy::Dst(out, 1, block_storage.data()),
448  typename LinCopy::Src(in, 1, m_impl.data()),
449  output_inner_copy_size);
450  }
451 
452  { // Fill with padding after copying from input inner dimension.
453  const Index out = output_offset + output_inner_pad_before_size +
454  output_inner_copy_size;
455 
456  LinCopy::template Run<LinCopy::Kind::FillLinear>(
457  typename LinCopy::Dst(out, 1, block_storage.data()),
458  typename LinCopy::Src(0, 0, &m_paddingValue),
459  output_inner_pad_after_size);
460  }
461  }
462 
463  for (int j = 0; j < NumDims - 1; ++j) {
464  const int dim = IsColMajor ? j + 1 : NumDims - j - 2;
465 
466  if (++it[j].count < it[j].size) {
467  input_offset += it[j].input_stride;
468  output_offset += it[j].output_stride;
469  output_coord[dim] += 1;
470  output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
471  break;
472  }
473  it[j].count = 0;
474  input_offset -= it[j].input_span;
475  output_offset -= it[j].output_span;
476  output_coord[dim] -= it[j].size - 1;
477  output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
478  }
479  }
480 
481  return block_storage.AsTensorMaterializedBlock();
482  }
483 
484  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
485 
486 #ifdef EIGEN_USE_SYCL
487  // binding placeholder accessors to a command group handler for SYCL
488  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
489  m_impl.bind(cgh);
490  }
491 #endif
492 
493  private:
494  struct BlockIteratorState {
495  BlockIteratorState()
496  : count(0),
497  size(0),
498  input_stride(0),
499  input_span(0),
500  output_stride(0),
501  output_span(0) {}
502 
503  Index count;
504  Index size;
505  Index input_stride;
506  Index input_span;
507  Index output_stride;
508  Index output_span;
509  };
510 
511  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
512  Index index, int dim_index) const {
513 #if defined(EIGEN_HAS_INDEX_LIST)
514  return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
515  index < m_padding[dim_index].first) ||
516  (!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
517  index >= m_dimensions[dim_index] - m_padding[dim_index].second);
518 #else
519  return (index < m_padding[dim_index].first) ||
520  (index >= m_dimensions[dim_index] - m_padding[dim_index].second);
521 #endif
522  }
523 
524  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero(
525  int dim_index) const {
526 #if defined(EIGEN_HAS_INDEX_LIST)
527  return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
528 #else
529  EIGEN_UNUSED_VARIABLE(dim_index);
530  return false;
531 #endif
532  }
533 
534  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero(
535  int dim_index) const {
536 #if defined(EIGEN_HAS_INDEX_LIST)
537  return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
538 #else
539  EIGEN_UNUSED_VARIABLE(dim_index);
540  return false;
541 #endif
542  }
543 
544 
545  void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const {
546  const double in = static_cast<double>(m_impl.dimensions()[i]);
547  const double out = in + m_padding[i].first + m_padding[i].second;
548  if (out == 0)
549  return;
550  const double reduction = in / out;
551  cost *= reduction;
552  if (first) {
553  cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
554  reduction * (1 * TensorOpCost::AddCost<Index>()));
555  } else {
556  cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
557  2 * TensorOpCost::MulCost<Index>() +
558  reduction * (2 * TensorOpCost::MulCost<Index>() +
559  1 * TensorOpCost::DivCost<Index>()));
560  }
561  }
562 
563  protected:
564 
565  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
566  {
567  EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
568  eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
569 
570  const Index initialIndex = index;
571  Index inputIndex = 0;
572  EIGEN_UNROLL_LOOP
573  for (int i = NumDims - 1; i > 0; --i) {
574  const Index firstIdx = index;
575  const Index lastIdx = index + PacketSize - 1;
576  const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i];
577  const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
578  const Index lastPaddedRight = m_outputStrides[i+1];
579 
580  if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
581  // all the coefficient are in the padding zone.
582  return internal::pset1<PacketReturnType>(m_paddingValue);
583  }
584  else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
585  // all the coefficient are in the padding zone.
586  return internal::pset1<PacketReturnType>(m_paddingValue);
587  }
588  else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
589  // all the coefficient are between the 2 padding zones.
590  const Index idx = index / m_outputStrides[i];
591  inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
592  index -= idx * m_outputStrides[i];
593  }
594  else {
595  // Every other case
596  return packetWithPossibleZero(initialIndex);
597  }
598  }
599 
600  const Index lastIdx = index + PacketSize - 1;
601  const Index firstIdx = index;
602  const Index lastPaddedLeft = m_padding[0].first;
603  const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
604  const Index lastPaddedRight = m_outputStrides[1];
605 
606  if (!isLeftPaddingCompileTimeZero(0) && lastIdx < lastPaddedLeft) {
607  // all the coefficient are in the padding zone.
608  return internal::pset1<PacketReturnType>(m_paddingValue);
609  }
610  else if (!isRightPaddingCompileTimeZero(0) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
611  // all the coefficient are in the padding zone.
612  return internal::pset1<PacketReturnType>(m_paddingValue);
613  }
614  else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
615  // all the coefficient are between the 2 padding zones.
616  inputIndex += (index - m_padding[0].first);
617  return m_impl.template packet<Unaligned>(inputIndex);
618  }
619  // Every other case
620  return packetWithPossibleZero(initialIndex);
621  }
622 
623  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
624  {
625  EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
626  eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
627 
628  const Index initialIndex = index;
629  Index inputIndex = 0;
630  EIGEN_UNROLL_LOOP
631  for (int i = 0; i < NumDims - 1; ++i) {
632  const Index firstIdx = index;
633  const Index lastIdx = index + PacketSize - 1;
634  const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i+1];
635  const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1];
636  const Index lastPaddedRight = m_outputStrides[i];
637 
638  if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
639  // all the coefficient are in the padding zone.
640  return internal::pset1<PacketReturnType>(m_paddingValue);
641  }
642  else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
643  // all the coefficient are in the padding zone.
644  return internal::pset1<PacketReturnType>(m_paddingValue);
645  }
646  else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
647  // all the coefficient are between the 2 padding zones.
648  const Index idx = index / m_outputStrides[i+1];
649  inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
650  index -= idx * m_outputStrides[i+1];
651  }
652  else {
653  // Every other case
654  return packetWithPossibleZero(initialIndex);
655  }
656  }
657 
658  const Index lastIdx = index + PacketSize - 1;
659  const Index firstIdx = index;
660  const Index lastPaddedLeft = m_padding[NumDims-1].first;
661  const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second);
662  const Index lastPaddedRight = m_outputStrides[NumDims-1];
663 
664  if (!isLeftPaddingCompileTimeZero(NumDims-1) && lastIdx < lastPaddedLeft) {
665  // all the coefficient are in the padding zone.
666  return internal::pset1<PacketReturnType>(m_paddingValue);
667  }
668  else if (!isRightPaddingCompileTimeZero(NumDims-1) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
669  // all the coefficient are in the padding zone.
670  return internal::pset1<PacketReturnType>(m_paddingValue);
671  }
672  else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
673  // all the coefficient are between the 2 padding zones.
674  inputIndex += (index - m_padding[NumDims-1].first);
675  return m_impl.template packet<Unaligned>(inputIndex);
676  }
677  // Every other case
678  return packetWithPossibleZero(initialIndex);
679  }
680 
681  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
682  {
683  EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
684  EIGEN_UNROLL_LOOP
685  for (int i = 0; i < PacketSize; ++i) {
686  values[i] = coeff(index+i);
687  }
688  PacketReturnType rslt = internal::pload<PacketReturnType>(values);
689  return rslt;
690  }
691 
692  Dimensions m_dimensions;
693  array<Index, NumDims+1> m_outputStrides;
694  array<Index, NumDims> m_inputStrides;
695  TensorEvaluator<ArgType, Device> m_impl;
696  PaddingDimensions m_padding;
697 
698  Scalar m_paddingValue;
699 
700  const Device EIGEN_DEVICE_REF m_device;
701 };
702 
703 
704 
705 
706 } // end namespace Eigen
707 
708 #endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index