Please, help us to better know about our user community by answering the following short survey: https://forms.gle/wpyrxWi18ox9Z5ae9
TensorEvaluator.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
12 
13 namespace Eigen {
14 
26 // Generic evaluator
27 template<typename Derived, typename Device>
29 {
30  typedef typename Derived::Index Index;
31  typedef typename Derived::Scalar Scalar;
32  typedef typename Derived::Scalar CoeffReturnType;
33  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
34  typedef typename Derived::Dimensions Dimensions;
35  typedef Derived XprType;
36  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
37  typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType;
38  typedef StorageMemory<Scalar, Device> Storage;
39  typedef typename Storage::Type EvaluatorPointerType;
40 
41  // NumDimensions is -1 for variable dim tensors
42  static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
43  internal::traits<Derived>::NumDimensions : 0;
44 
45  enum {
46  IsAligned = Derived::IsAligned,
47  PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
48  BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
49  PreferBlockAccess = false,
50  Layout = Derived::Layout,
51  CoordAccess = NumCoords > 0,
52  RawAccess = true
53  };
54 
55  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
56 
57  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
58  typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
59  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
60 
61  typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
62  Layout, Index>
63  TensorBlock;
64  //===--------------------------------------------------------------------===//
65 
66  EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
67  : m_data(device.get((const_cast<TensorPointerType>(m.data())))),
68  m_dims(m.dimensions()),
69  m_device(device)
70  { }
71 
72 
73  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
74 
75  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
76  if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) {
77  m_device.memcpy((void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
78  return false;
79  }
80  return true;
81  }
82 
83 #ifdef EIGEN_USE_THREADS
84  template <typename EvalSubExprsCallback>
85  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
86  EvaluatorPointerType dest, EvalSubExprsCallback done) {
87  // TODO(ezhulenev): ThreadPoolDevice memcpy is blockign operation.
88  done(evalSubExprsIfNeeded(dest));
89  }
90 #endif // EIGEN_USE_THREADS
91 
92  EIGEN_STRONG_INLINE void cleanup() {}
93 
94  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
95  eigen_assert(m_data != NULL);
96  return m_data[index];
97  }
98 
99  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) {
100  eigen_assert(m_data != NULL);
101  return m_data[index];
102  }
103 
104  template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
105  PacketReturnType packet(Index index) const
106  {
107  return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
108  }
109 
110  // Return a packet starting at `index` where `umask` specifies which elements
111  // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for
112  // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding
113  // float element will be loaded, otherwise 0 will be loaded.
114  // Function has been templatized to enable Sfinae.
115  template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
116  typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
117  partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
118  {
119  return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
120  }
121 
122  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
123  void writePacket(Index index, const PacketReturnType& x)
124  {
125  return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x);
126  }
127 
128  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
129  eigen_assert(m_data != NULL);
130  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
131  return m_data[m_dims.IndexOfColMajor(coords)];
132  } else {
133  return m_data[m_dims.IndexOfRowMajor(coords)];
134  }
135  }
136 
137  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType&
138  coeffRef(const array<DenseIndex, NumCoords>& coords) {
139  eigen_assert(m_data != NULL);
140  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
141  return m_data[m_dims.IndexOfColMajor(coords)];
142  } else {
143  return m_data[m_dims.IndexOfRowMajor(coords)];
144  }
145  }
146 
147  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
148  return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
149  PacketType<CoeffReturnType, Device>::size);
150  }
151 
152  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
153  internal::TensorBlockResourceRequirements getResourceRequirements() const {
154  return internal::TensorBlockResourceRequirements::any();
155  }
156 
157  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
158  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
159  bool /*root_of_expr_ast*/ = false) const {
160  assert(m_data != NULL);
161  return TensorBlock::materialize(m_data, m_dims, desc, scratch);
162  }
163 
164  template<typename TensorBlock>
165  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
166  const TensorBlockDesc& desc, const TensorBlock& block) {
167  assert(m_data != NULL);
168 
169  typedef typename TensorBlock::XprType TensorBlockExpr;
170  typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr,
171  Index>
172  TensorBlockAssign;
173 
174  TensorBlockAssign::Run(
175  TensorBlockAssign::target(desc.dimensions(),
176  internal::strides<Layout>(m_dims), m_data,
177  desc.offset()),
178  block.expr());
179  }
180 
181  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
182 
183 #ifdef EIGEN_USE_SYCL
184  // binding placeholder accessors to a command group handler for SYCL
185  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
186  m_data.bind(cgh);
187  }
188 #endif
189  protected:
190  EvaluatorPointerType m_data;
191  Dimensions m_dims;
192  const Device EIGEN_DEVICE_REF m_device;
193 };
194 
195 namespace {
196 template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
197 T loadConstant(const T* address) {
198  return *address;
199 }
200 // Use the texture cache on CUDA devices whenever possible
201 #if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
202 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
203 float loadConstant(const float* address) {
204  return __ldg(address);
205 }
206 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
207 double loadConstant(const double* address) {
208  return __ldg(address);
209 }
210 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
211 Eigen::half loadConstant(const Eigen::half* address) {
212  return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
213 }
214 #endif
215 #ifdef EIGEN_USE_SYCL
216 // overload of load constant should be implemented here based on range access
217 template <cl::sycl::access::mode AcMd, typename T>
218 T &loadConstant(const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address) {
219  return *address;
220 }
221 #endif
222 }
223 
224 
225 // Default evaluator for rvalues
226 template<typename Derived, typename Device>
227 struct TensorEvaluator<const Derived, Device>
228 {
229  typedef typename Derived::Index Index;
230  typedef typename Derived::Scalar Scalar;
231  typedef typename Derived::Scalar CoeffReturnType;
232  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
233  typedef typename Derived::Dimensions Dimensions;
234  typedef const Derived XprType;
235  typedef typename internal::traits<Derived>::template MakePointer<const Scalar>::Type TensorPointerType;
236  typedef StorageMemory<const Scalar, Device> Storage;
237  typedef typename Storage::Type EvaluatorPointerType;
238 
239  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
240 
241  // NumDimensions is -1 for variable dim tensors
242  static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
243  internal::traits<Derived>::NumDimensions : 0;
244  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
245 
246  enum {
247  IsAligned = Derived::IsAligned,
248  PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
249  BlockAccess = internal::is_arithmetic<ScalarNoConst>::value,
250  PreferBlockAccess = false,
251  Layout = Derived::Layout,
252  CoordAccess = NumCoords > 0,
253  RawAccess = true
254  };
255 
256  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
257  typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
258  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
259 
260  typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
261  Layout, Index>
262  TensorBlock;
263  //===--------------------------------------------------------------------===//
264 
265  EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
266  : m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device)
267  { }
268 
269  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
270 
271  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
272  if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
273  m_device.memcpy((void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
274  return false;
275  }
276  return true;
277  }
278 
279 #ifdef EIGEN_USE_THREADS
280  template <typename EvalSubExprsCallback>
281  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
282  EvaluatorPointerType dest, EvalSubExprsCallback done) {
283  // TODO(ezhulenev): ThreadPoolDevice memcpy is a blockign operation.
284  done(evalSubExprsIfNeeded(dest));
285  }
286 #endif // EIGEN_USE_THREADS
287 
288  EIGEN_STRONG_INLINE void cleanup() { }
289 
290  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
291  eigen_assert(m_data != NULL);
292  return loadConstant(m_data+index);
293  }
294 
295  template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
296  PacketReturnType packet(Index index) const
297  {
298  return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
299  }
300 
301  // Return a packet starting at `index` where `umask` specifies which elements
302  // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for
303  // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding
304  // float element will be loaded, otherwise 0 will be loaded.
305  // Function has been templatized to enable Sfinae.
306  template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
307  typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
308  partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
309  {
310  return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
311  }
312 
313  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
314  eigen_assert(m_data != NULL);
315  const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
316  : m_dims.IndexOfRowMajor(coords);
317  return loadConstant(m_data+index);
318  }
319 
320  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
321  return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
322  PacketType<CoeffReturnType, Device>::size);
323  }
324 
325  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
326  internal::TensorBlockResourceRequirements getResourceRequirements() const {
327  return internal::TensorBlockResourceRequirements::any();
328  }
329 
330  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
331  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
332  bool /*root_of_expr_ast*/ = false) const {
333  assert(m_data != NULL);
334  return TensorBlock::materialize(m_data, m_dims, desc, scratch);
335  }
336 
337  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
338 #ifdef EIGEN_USE_SYCL
339  // binding placeholder accessors to a command group handler for SYCL
340  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
341  m_data.bind(cgh);
342  }
343 #endif
344  protected:
345  EvaluatorPointerType m_data;
346  Dimensions m_dims;
347  const Device EIGEN_DEVICE_REF m_device;
348 };
349 
350 
351 
352 
353 // -------------------- CwiseNullaryOp --------------------
354 
355 template<typename NullaryOp, typename ArgType, typename Device>
356 struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
357 {
358  typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
359 
360  TensorEvaluator(const XprType& op, const Device& device)
361  : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
362  { }
363 
364  typedef typename XprType::Index Index;
365  typedef typename XprType::Scalar Scalar;
366  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
367  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
368  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
369  typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
370  typedef StorageMemory<CoeffReturnType, Device> Storage;
371  typedef typename Storage::Type EvaluatorPointerType;
372 
373  enum {
374  IsAligned = true,
375  PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess
376  #ifdef EIGEN_USE_SYCL
377  && (PacketType<CoeffReturnType, Device>::size >1)
378  #endif
379  ,
380  BlockAccess = false,
381  PreferBlockAccess = false,
383  CoordAccess = false, // to be implemented
384  RawAccess = false
385  };
386 
387  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
388  typedef internal::TensorBlockNotImplemented TensorBlock;
389  //===--------------------------------------------------------------------===//
390 
391  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
392 
393  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { return true; }
394 
395 #ifdef EIGEN_USE_THREADS
396  template <typename EvalSubExprsCallback>
397  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
398  EvaluatorPointerType, EvalSubExprsCallback done) {
399  done(true);
400  }
401 #endif // EIGEN_USE_THREADS
402 
403  EIGEN_STRONG_INLINE void cleanup() { }
404 
405  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
406  {
407  return m_wrapper(m_functor, index);
408  }
409 
410  template<int LoadMode>
411  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
412  {
413  return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index);
414  }
415 
416  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
417  costPerCoeff(bool vectorized) const {
418  return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
419  PacketType<CoeffReturnType, Device>::size);
420  }
421 
422  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
423 
424 #ifdef EIGEN_USE_SYCL
425  // binding placeholder accessors to a command group handler for SYCL
426  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
427  m_argImpl.bind(cgh);
428  }
429 #endif
430 
431  private:
432  const NullaryOp m_functor;
434  const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
435 };
436 
437 
438 
439 // -------------------- CwiseUnaryOp --------------------
440 
441 template<typename UnaryOp, typename ArgType, typename Device>
442 struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
443 {
444  typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
445 
446  enum {
449  int(internal::functor_traits<UnaryOp>::PacketAccess),
453  CoordAccess = false, // to be implemented
454  RawAccess = false
455  };
456 
457  TensorEvaluator(const XprType& op, const Device& device)
458  : m_device(device),
459  m_functor(op.functor()),
460  m_argImpl(op.nestedExpression(), device)
461  { }
462 
463  typedef typename XprType::Index Index;
464  typedef typename XprType::Scalar Scalar;
465  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
466  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
467  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
468  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
469  typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
470  typedef StorageMemory<CoeffReturnType, Device> Storage;
471  typedef typename Storage::Type EvaluatorPointerType;
472  static const int NumDims = internal::array_size<Dimensions>::value;
473 
474  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
475  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
476  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
477 
478  typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
479  ArgTensorBlock;
480 
481  typedef internal::TensorCwiseUnaryBlock<UnaryOp, ArgTensorBlock>
482  TensorBlock;
483  //===--------------------------------------------------------------------===//
484 
485  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
486 
487  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
488  m_argImpl.evalSubExprsIfNeeded(NULL);
489  return true;
490  }
491 
492 #ifdef EIGEN_USE_THREADS
493  template <typename EvalSubExprsCallback>
494  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
495  EvaluatorPointerType, EvalSubExprsCallback done) {
496  m_argImpl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
497  }
498 #endif // EIGEN_USE_THREADS
499 
500  EIGEN_STRONG_INLINE void cleanup() {
501  m_argImpl.cleanup();
502  }
503 
504  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
505  {
506  return m_functor(m_argImpl.coeff(index));
507  }
508 
509  template<int LoadMode>
510  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
511  {
512  return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
513  }
514 
515  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
516  const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
517  return m_argImpl.costPerCoeff(vectorized) +
518  TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
519  }
520 
521  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
522  internal::TensorBlockResourceRequirements getResourceRequirements() const {
523  static const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
524  return m_argImpl.getResourceRequirements().addCostPerCoeff(
525  {0, 0, functor_cost / PacketSize});
526  }
527 
528  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
529  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
530  bool /*root_of_expr_ast*/ = false) const {
531  return TensorBlock(m_argImpl.block(desc, scratch), m_functor);
532  }
533 
534  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
535 
536 #ifdef EIGEN_USE_SYCL
537  // binding placeholder accessors to a command group handler for SYCL
538  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const{
539  m_argImpl.bind(cgh);
540  }
541 #endif
542 
543 
544  private:
545  const Device EIGEN_DEVICE_REF m_device;
546  const UnaryOp m_functor;
548 };
549 
550 
551 // -------------------- CwiseBinaryOp --------------------
552 
553 template<typename BinaryOp, typename LeftArgType, typename RightArgType, typename Device>
554 struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device>
555 {
556  typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
557 
558  enum {
563  int(internal::functor_traits<BinaryOp>::PacketAccess),
569  CoordAccess = false, // to be implemented
570  RawAccess = false
571  };
572 
573  TensorEvaluator(const XprType& op, const Device& device)
574  : m_device(device),
575  m_functor(op.functor()),
576  m_leftImpl(op.lhsExpression(), device),
577  m_rightImpl(op.rhsExpression(), device)
578  {
579  EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
580  eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
581  }
582 
583  typedef typename XprType::Index Index;
584  typedef typename XprType::Scalar Scalar;
585  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
586  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
587  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
588  typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
589  typedef StorageMemory<CoeffReturnType, Device> Storage;
590  typedef typename Storage::Type EvaluatorPointerType;
591 
592  static const int NumDims = internal::array_size<
594 
595  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
596  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
597  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
598 
599  typedef typename TensorEvaluator<const LeftArgType, Device>::TensorBlock
600  LeftTensorBlock;
601  typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlock
602  RightTensorBlock;
603 
604  typedef internal::TensorCwiseBinaryBlock<BinaryOp, LeftTensorBlock,
605  RightTensorBlock>
606  TensorBlock;
607  //===--------------------------------------------------------------------===//
608 
609  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
610  {
611  // TODO: use right impl instead if right impl dimensions are known at compile time.
612  return m_leftImpl.dimensions();
613  }
614 
615  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
616  m_leftImpl.evalSubExprsIfNeeded(NULL);
617  m_rightImpl.evalSubExprsIfNeeded(NULL);
618  return true;
619  }
620 
621 #ifdef EIGEN_USE_THREADS
622  template <typename EvalSubExprsCallback>
623  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
624  EvaluatorPointerType, EvalSubExprsCallback done) {
625  // TODO(ezhulenev): Evaluate two expression in parallel?
626  m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done](bool) {
627  m_rightImpl.evalSubExprsIfNeededAsync(nullptr,
628  [done](bool) { done(true); });
629  });
630  }
631 #endif // EIGEN_USE_THREADS
632 
633  EIGEN_STRONG_INLINE void cleanup() {
634  m_leftImpl.cleanup();
635  m_rightImpl.cleanup();
636  }
637 
638  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
639  {
640  return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
641  }
642  template<int LoadMode>
643  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
644  {
645  return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
646  }
647 
648  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
649  costPerCoeff(bool vectorized) const {
650  const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
651  return m_leftImpl.costPerCoeff(vectorized) +
652  m_rightImpl.costPerCoeff(vectorized) +
653  TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
654  }
655 
656  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
657  internal::TensorBlockResourceRequirements getResourceRequirements() const {
658  static const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
659  return internal::TensorBlockResourceRequirements::merge(
660  m_leftImpl.getResourceRequirements(),
661  m_rightImpl.getResourceRequirements())
662  .addCostPerCoeff({0, 0, functor_cost / PacketSize});
663  }
664 
665  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
666  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
667  bool /*root_of_expr_ast*/ = false) const {
668  desc.DropDestinationBuffer();
669  return TensorBlock(m_leftImpl.block(desc, scratch),
670  m_rightImpl.block(desc, scratch), m_functor);
671  }
672 
673  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
674 
675  #ifdef EIGEN_USE_SYCL
676  // binding placeholder accessors to a command group handler for SYCL
677  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
678  m_leftImpl.bind(cgh);
679  m_rightImpl.bind(cgh);
680  }
681  #endif
682  private:
683  const Device EIGEN_DEVICE_REF m_device;
684  const BinaryOp m_functor;
687 };
688 
689 // -------------------- CwiseTernaryOp --------------------
690 
691 template<typename TernaryOp, typename Arg1Type, typename Arg2Type, typename Arg3Type, typename Device>
692 struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device>
693 {
694  typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType;
695 
696  enum {
701  internal::functor_traits<TernaryOp>::PacketAccess,
702  BlockAccess = false,
707  CoordAccess = false, // to be implemented
708  RawAccess = false
709  };
710 
711  TensorEvaluator(const XprType& op, const Device& device)
712  : m_functor(op.functor()),
713  m_arg1Impl(op.arg1Expression(), device),
714  m_arg2Impl(op.arg2Expression(), device),
715  m_arg3Impl(op.arg3Expression(), device)
716  {
717  EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<Arg1Type, Device>::Layout) == static_cast<int>(TensorEvaluator<Arg3Type, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
718 
719  EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
720  typename internal::traits<Arg2Type>::StorageKind>::value),
721  STORAGE_KIND_MUST_MATCH)
722  EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
723  typename internal::traits<Arg3Type>::StorageKind>::value),
724  STORAGE_KIND_MUST_MATCH)
725  EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
726  typename internal::traits<Arg2Type>::Index>::value),
727  STORAGE_INDEX_MUST_MATCH)
728  EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
729  typename internal::traits<Arg3Type>::Index>::value),
730  STORAGE_INDEX_MUST_MATCH)
731 
732  eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
733  }
734 
735  typedef typename XprType::Index Index;
736  typedef typename XprType::Scalar Scalar;
737  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
738  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
739  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
740  typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
741  typedef StorageMemory<CoeffReturnType, Device> Storage;
742  typedef typename Storage::Type EvaluatorPointerType;
743 
744  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
745  typedef internal::TensorBlockNotImplemented TensorBlock;
746  //===--------------------------------------------------------------------===//
747 
748  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
749  {
750  // TODO: use arg2 or arg3 dimensions if they are known at compile time.
751  return m_arg1Impl.dimensions();
752  }
753 
754  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
755  m_arg1Impl.evalSubExprsIfNeeded(NULL);
756  m_arg2Impl.evalSubExprsIfNeeded(NULL);
757  m_arg3Impl.evalSubExprsIfNeeded(NULL);
758  return true;
759  }
760  EIGEN_STRONG_INLINE void cleanup() {
761  m_arg1Impl.cleanup();
762  m_arg2Impl.cleanup();
763  m_arg3Impl.cleanup();
764  }
765 
766  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
767  {
768  return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
769  }
770  template<int LoadMode>
771  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
772  {
773  return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index),
774  m_arg2Impl.template packet<LoadMode>(index),
775  m_arg3Impl.template packet<LoadMode>(index));
776  }
777 
778  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
779  costPerCoeff(bool vectorized) const {
780  const double functor_cost = internal::functor_traits<TernaryOp>::Cost;
781  return m_arg1Impl.costPerCoeff(vectorized) +
782  m_arg2Impl.costPerCoeff(vectorized) +
783  m_arg3Impl.costPerCoeff(vectorized) +
784  TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
785  }
786 
787  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
788 
789 #ifdef EIGEN_USE_SYCL
790  // binding placeholder accessors to a command group handler for SYCL
791  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
792  m_arg1Impl.bind(cgh);
793  m_arg2Impl.bind(cgh);
794  m_arg3Impl.bind(cgh);
795  }
796 #endif
797 
798  private:
799  const TernaryOp m_functor;
803 };
804 
805 
806 // -------------------- SelectOp --------------------
807 
808 template<typename IfArgType, typename ThenArgType, typename ElseArgType, typename Device>
809 struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
810 {
811  typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
812  typedef typename XprType::Scalar Scalar;
813 
814  enum {
819  PacketType<Scalar, Device>::HasBlend,
827  CoordAccess = false, // to be implemented
828  RawAccess = false
829  };
830 
831  TensorEvaluator(const XprType& op, const Device& device)
832  : m_condImpl(op.ifExpression(), device),
833  m_thenImpl(op.thenExpression(), device),
834  m_elseImpl(op.elseExpression(), device)
835  {
836  EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ThenArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
837  EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ElseArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
838  eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions()));
839  eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions()));
840  }
841 
842  typedef typename XprType::Index Index;
843  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
844  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
845  static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
846  typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
847  typedef StorageMemory<CoeffReturnType, Device> Storage;
848  typedef typename Storage::Type EvaluatorPointerType;
849 
850  static const int NumDims = internal::array_size<Dimensions>::value;
851 
852  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
853  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
854  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
855 
856  typedef typename TensorEvaluator<const IfArgType, Device>::TensorBlock
857  IfArgTensorBlock;
858  typedef typename TensorEvaluator<const ThenArgType, Device>::TensorBlock
859  ThenArgTensorBlock;
860  typedef typename TensorEvaluator<const ElseArgType, Device>::TensorBlock
861  ElseArgTensorBlock;
862 
863  struct TensorSelectOpBlockFactory {
864  template <typename IfArgXprType, typename ThenArgXprType, typename ElseArgXprType>
865  struct XprType {
866  typedef TensorSelectOp<const IfArgXprType, const ThenArgXprType, const ElseArgXprType> type;
867  };
868 
869  template <typename IfArgXprType, typename ThenArgXprType, typename ElseArgXprType>
870  typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type expr(
871  const IfArgXprType& if_expr, const ThenArgXprType& then_expr, const ElseArgXprType& else_expr) const {
872  return typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type(if_expr, then_expr, else_expr);
873  }
874  };
875 
876  typedef internal::TensorTernaryExprBlock<TensorSelectOpBlockFactory,
877  IfArgTensorBlock, ThenArgTensorBlock,
878  ElseArgTensorBlock>
879  TensorBlock;
880  //===--------------------------------------------------------------------===//
881 
882  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
883  {
884  // TODO: use then or else impl instead if they happen to be known at compile time.
885  return m_condImpl.dimensions();
886  }
887 
888  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
889  m_condImpl.evalSubExprsIfNeeded(NULL);
890  m_thenImpl.evalSubExprsIfNeeded(NULL);
891  m_elseImpl.evalSubExprsIfNeeded(NULL);
892  return true;
893  }
894 
895 #ifdef EIGEN_USE_THREADS
896  template <typename EvalSubExprsCallback>
897  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
898  EvaluatorPointerType, EvalSubExprsCallback done) {
899  m_condImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) {
900  m_thenImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) {
901  m_elseImpl.evalSubExprsIfNeeded(nullptr, [done](bool) { done(true); });
902  });
903  });
904  }
905 #endif // EIGEN_USE_THREADS
906 
907  EIGEN_STRONG_INLINE void cleanup() {
908  m_condImpl.cleanup();
909  m_thenImpl.cleanup();
910  m_elseImpl.cleanup();
911  }
912 
913  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
914  {
915  return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
916  }
917  template<int LoadMode>
918  EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
919  {
920  internal::Selector<PacketSize> select;
921  EIGEN_UNROLL_LOOP
922  for (Index i = 0; i < PacketSize; ++i) {
923  select.select[i] = m_condImpl.coeff(index+i);
924  }
925  return internal::pblend(select,
926  m_thenImpl.template packet<LoadMode>(index),
927  m_elseImpl.template packet<LoadMode>(index));
928 
929  }
930 
931  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
932  costPerCoeff(bool vectorized) const {
933  return m_condImpl.costPerCoeff(vectorized) +
934  m_thenImpl.costPerCoeff(vectorized)
935  .cwiseMax(m_elseImpl.costPerCoeff(vectorized));
936  }
937 
938  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
939  internal::TensorBlockResourceRequirements getResourceRequirements() const {
940  auto then_req = m_thenImpl.getResourceRequirements();
941  auto else_req = m_elseImpl.getResourceRequirements();
942 
943  auto merged_req =
944  internal::TensorBlockResourceRequirements::merge(then_req, else_req);
945  merged_req.cost_per_coeff =
946  then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff);
947 
948  return internal::TensorBlockResourceRequirements::merge(
949  m_condImpl.getResourceRequirements(), merged_req);
950  }
951 
952  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
953  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
954  bool /*root_of_expr_ast*/ = false) const {
955  // It's unsafe to pass destination buffer to underlying expressions, because
956  // output might be aliased with one of the inputs.
957  desc.DropDestinationBuffer();
958 
959  return TensorBlock(
960  m_condImpl.block(desc, scratch), m_thenImpl.block(desc, scratch),
961  m_elseImpl.block(desc, scratch), TensorSelectOpBlockFactory());
962  }
963 
964  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
965 
966 #ifdef EIGEN_USE_SYCL
967  // binding placeholder accessors to a command group handler for SYCL
968  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
969  m_condImpl.bind(cgh);
970  m_thenImpl.bind(cgh);
971  m_elseImpl.bind(cgh);
972  }
973 #endif
974  private:
978 };
979 
980 
981 } // end namespace Eigen
982 
983 #endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
Namespace containing all symbols from the Eigen library.
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:28
A tensor expression mapping an existing array of data.
Definition: TensorForwardDeclarations.h:52