10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H 27 template<
typename Derived,
typename Device>
30 typedef typename Derived::Index Index;
31 typedef typename Derived::Scalar Scalar;
32 typedef typename Derived::Scalar CoeffReturnType;
33 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
34 typedef typename Derived::Dimensions Dimensions;
35 typedef Derived XprType;
36 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
37 typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType;
38 typedef StorageMemory<Scalar, Device> Storage;
39 typedef typename Storage::Type EvaluatorPointerType;
42 static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
43 internal::traits<Derived>::NumDimensions : 0;
46 IsAligned = Derived::IsAligned,
47 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
48 BlockAccess = internal::is_arithmetic<
typename internal::remove_const<Scalar>::type>::value,
49 PreferBlockAccess =
false,
50 Layout = Derived::Layout,
51 CoordAccess = NumCoords > 0,
55 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
58 typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
59 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
61 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
66 EIGEN_STRONG_INLINE
TensorEvaluator(
const Derived& m,
const Device& device)
67 : m_data(device.get((const_cast<TensorPointerType>(m.data())))),
68 m_dims(m.dimensions()),
73 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dims; }
75 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
76 if (!
NumTraits<
typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) {
77 m_device.memcpy((
void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() *
sizeof(Scalar));
83 #ifdef EIGEN_USE_THREADS 84 template <
typename EvalSubExprsCallback>
85 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
86 EvaluatorPointerType dest, EvalSubExprsCallback done) {
88 done(evalSubExprsIfNeeded(dest));
90 #endif // EIGEN_USE_THREADS 92 EIGEN_STRONG_INLINE
void cleanup() {}
94 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
95 eigen_assert(m_data != NULL);
99 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) {
100 eigen_assert(m_data != NULL);
101 return m_data[index];
104 template<
int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
105 PacketReturnType packet(Index index)
const 107 return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
115 template <
typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
116 typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
117 partialPacket(Index index,
typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask)
const 119 return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
122 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
123 void writePacket(Index index,
const PacketReturnType& x)
125 return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x);
128 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
const array<DenseIndex, NumCoords>& coords)
const {
129 eigen_assert(m_data != NULL);
130 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
131 return m_data[m_dims.IndexOfColMajor(coords)];
133 return m_data[m_dims.IndexOfRowMajor(coords)];
137 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType&
138 coeffRef(
const array<DenseIndex, NumCoords>& coords) {
139 eigen_assert(m_data != NULL);
140 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
141 return m_data[m_dims.IndexOfColMajor(coords)];
143 return m_data[m_dims.IndexOfRowMajor(coords)];
147 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
148 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
149 PacketType<CoeffReturnType, Device>::size);
152 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
153 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
154 return internal::TensorBlockResourceRequirements::any();
157 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
158 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
159 bool =
false)
const {
160 assert(m_data != NULL);
161 return TensorBlock::materialize(m_data, m_dims, desc, scratch);
164 template<
typename TensorBlock>
165 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writeBlock(
166 const TensorBlockDesc& desc,
const TensorBlock& block) {
167 assert(m_data != NULL);
170 typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr,
174 TensorBlockAssign::Run(
175 TensorBlockAssign::target(desc.dimensions(),
176 internal::strides<Layout>(m_dims), m_data,
181 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return m_data; }
183 #ifdef EIGEN_USE_SYCL 185 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
190 EvaluatorPointerType m_data;
192 const Device EIGEN_DEVICE_REF m_device;
196 template <
typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
197 T loadConstant(
const T* address) {
201 #if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350 202 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
203 float loadConstant(
const float* address) {
204 return __ldg(address);
206 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
207 double loadConstant(
const double* address) {
208 return __ldg(address);
210 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
211 Eigen::half loadConstant(
const Eigen::half* address) {
212 return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
215 #ifdef EIGEN_USE_SYCL 217 template <cl::sycl::access::mode AcMd,
typename T>
218 T &loadConstant(
const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address) {
226 template<
typename Derived,
typename Device>
229 typedef typename Derived::Index Index;
230 typedef typename Derived::Scalar Scalar;
231 typedef typename Derived::Scalar CoeffReturnType;
232 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
233 typedef typename Derived::Dimensions Dimensions;
234 typedef const Derived XprType;
235 typedef typename internal::traits<Derived>::template MakePointer<const Scalar>::Type TensorPointerType;
236 typedef StorageMemory<const Scalar, Device> Storage;
237 typedef typename Storage::Type EvaluatorPointerType;
239 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
242 static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
243 internal::traits<Derived>::NumDimensions : 0;
244 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
247 IsAligned = Derived::IsAligned,
248 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
249 BlockAccess = internal::is_arithmetic<ScalarNoConst>::value,
250 PreferBlockAccess =
false,
251 Layout = Derived::Layout,
252 CoordAccess = NumCoords > 0,
257 typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
258 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
260 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
265 EIGEN_STRONG_INLINE
TensorEvaluator(
const Derived& m,
const Device& device)
266 : m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device)
269 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dims; }
271 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
272 if (!
NumTraits<
typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
273 m_device.memcpy((
void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() *
sizeof(Scalar));
279 #ifdef EIGEN_USE_THREADS 280 template <
typename EvalSubExprsCallback>
281 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
282 EvaluatorPointerType dest, EvalSubExprsCallback done) {
284 done(evalSubExprsIfNeeded(dest));
286 #endif // EIGEN_USE_THREADS 288 EIGEN_STRONG_INLINE
void cleanup() { }
290 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
291 eigen_assert(m_data != NULL);
292 return loadConstant(m_data+index);
295 template<
int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
296 PacketReturnType packet(Index index)
const 298 return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
306 template <
typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
307 typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
308 partialPacket(Index index,
typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask)
const 310 return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
313 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
const array<DenseIndex, NumCoords>& coords)
const {
314 eigen_assert(m_data != NULL);
315 const Index index = (
static_cast<int>(Layout) == static_cast<int>(
ColMajor)) ? m_dims.IndexOfColMajor(coords)
316 : m_dims.IndexOfRowMajor(coords);
317 return loadConstant(m_data+index);
320 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
321 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
322 PacketType<CoeffReturnType, Device>::size);
325 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
326 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
327 return internal::TensorBlockResourceRequirements::any();
330 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
331 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
332 bool =
false)
const {
333 assert(m_data != NULL);
334 return TensorBlock::materialize(m_data, m_dims, desc, scratch);
337 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return m_data; }
338 #ifdef EIGEN_USE_SYCL 340 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
345 EvaluatorPointerType m_data;
347 const Device EIGEN_DEVICE_REF m_device;
355 template<
typename NullaryOp,
typename ArgType,
typename Device>
356 struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
358 typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
361 : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
364 typedef typename XprType::Index Index;
365 typedef typename XprType::Scalar Scalar;
366 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
367 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
368 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
370 typedef StorageMemory<CoeffReturnType, Device> Storage;
371 typedef typename Storage::Type EvaluatorPointerType;
375 PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess
376 #ifdef EIGEN_USE_SYCL 377 && (PacketType<CoeffReturnType, Device>::size >1)
381 PreferBlockAccess =
false,
388 typedef internal::TensorBlockNotImplemented TensorBlock;
391 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const {
return m_argImpl.dimensions(); }
393 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
return true; }
395 #ifdef EIGEN_USE_THREADS 396 template <
typename EvalSubExprsCallback>
397 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
398 EvaluatorPointerType, EvalSubExprsCallback done) {
401 #endif // EIGEN_USE_THREADS 403 EIGEN_STRONG_INLINE
void cleanup() { }
405 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 407 return m_wrapper(m_functor, index);
410 template<
int LoadMode>
411 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 413 return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index);
416 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
417 costPerCoeff(
bool vectorized)
const {
418 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
419 PacketType<CoeffReturnType, Device>::size);
422 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
424 #ifdef EIGEN_USE_SYCL 426 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
432 const NullaryOp m_functor;
434 const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
441 template<
typename UnaryOp,
typename ArgType,
typename Device>
442 struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
444 typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
449 int(internal::functor_traits<UnaryOp>::PacketAccess),
459 m_functor(op.functor()),
460 m_argImpl(op.nestedExpression(), device)
463 typedef typename XprType::Index Index;
464 typedef typename XprType::Scalar Scalar;
465 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
466 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
467 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
468 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
470 typedef StorageMemory<CoeffReturnType, Device> Storage;
471 typedef typename Storage::Type EvaluatorPointerType;
472 static const int NumDims = internal::array_size<Dimensions>::value;
475 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
476 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
478 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
481 typedef internal::TensorCwiseUnaryBlock<UnaryOp, ArgTensorBlock>
485 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const {
return m_argImpl.dimensions(); }
487 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
488 m_argImpl.evalSubExprsIfNeeded(NULL);
492 #ifdef EIGEN_USE_THREADS 493 template <
typename EvalSubExprsCallback>
494 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
495 EvaluatorPointerType, EvalSubExprsCallback done) {
496 m_argImpl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
498 #endif // EIGEN_USE_THREADS 500 EIGEN_STRONG_INLINE
void cleanup() {
504 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 506 return m_functor(m_argImpl.coeff(index));
509 template<
int LoadMode>
510 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 512 return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
515 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
516 const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
517 return m_argImpl.costPerCoeff(vectorized) +
518 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
521 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
522 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
523 static const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
524 return m_argImpl.getResourceRequirements().addCostPerCoeff(
525 {0, 0, functor_cost / PacketSize});
528 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
529 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
530 bool =
false)
const {
531 return TensorBlock(m_argImpl.block(desc, scratch), m_functor);
534 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
536 #ifdef EIGEN_USE_SYCL 538 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const{
545 const Device EIGEN_DEVICE_REF m_device;
546 const UnaryOp m_functor;
553 template<
typename BinaryOp,
typename LeftArgType,
typename RightArgType,
typename Device>
554 struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device>
556 typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
563 int(internal::functor_traits<BinaryOp>::PacketAccess),
575 m_functor(op.functor()),
576 m_leftImpl(op.lhsExpression(), device),
577 m_rightImpl(op.rhsExpression(), device)
580 eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
583 typedef typename XprType::Index Index;
584 typedef typename XprType::Scalar Scalar;
585 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
586 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
587 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
589 typedef StorageMemory<CoeffReturnType, Device> Storage;
590 typedef typename Storage::Type EvaluatorPointerType;
592 static const int NumDims = internal::array_size<
596 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
597 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
599 typedef typename TensorEvaluator<const LeftArgType, Device>::TensorBlock
601 typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlock
604 typedef internal::TensorCwiseBinaryBlock<BinaryOp, LeftTensorBlock,
609 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const 612 return m_leftImpl.dimensions();
615 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
616 m_leftImpl.evalSubExprsIfNeeded(NULL);
617 m_rightImpl.evalSubExprsIfNeeded(NULL);
621 #ifdef EIGEN_USE_THREADS 622 template <
typename EvalSubExprsCallback>
623 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
624 EvaluatorPointerType, EvalSubExprsCallback done) {
626 m_leftImpl.evalSubExprsIfNeededAsync(
nullptr, [
this, done](
bool) {
627 m_rightImpl.evalSubExprsIfNeededAsync(
nullptr,
628 [done](
bool) { done(
true); });
631 #endif // EIGEN_USE_THREADS 633 EIGEN_STRONG_INLINE
void cleanup() {
634 m_leftImpl.cleanup();
635 m_rightImpl.cleanup();
638 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 640 return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
642 template<
int LoadMode>
643 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 645 return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
648 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
649 costPerCoeff(
bool vectorized)
const {
650 const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
651 return m_leftImpl.costPerCoeff(vectorized) +
652 m_rightImpl.costPerCoeff(vectorized) +
653 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
656 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
657 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
658 static const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
659 return internal::TensorBlockResourceRequirements::merge(
660 m_leftImpl.getResourceRequirements(),
661 m_rightImpl.getResourceRequirements())
662 .addCostPerCoeff({0, 0, functor_cost / PacketSize});
665 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
666 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
667 bool =
false)
const {
668 desc.DropDestinationBuffer();
669 return TensorBlock(m_leftImpl.block(desc, scratch),
670 m_rightImpl.block(desc, scratch), m_functor);
673 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
675 #ifdef EIGEN_USE_SYCL 677 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
678 m_leftImpl.bind(cgh);
679 m_rightImpl.bind(cgh);
683 const Device EIGEN_DEVICE_REF m_device;
684 const BinaryOp m_functor;
691 template<
typename TernaryOp,
typename Arg1Type,
typename Arg2Type,
typename Arg3Type,
typename Device>
692 struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device>
694 typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType;
701 internal::functor_traits<TernaryOp>::PacketAccess,
712 : m_functor(op.functor()),
713 m_arg1Impl(op.arg1Expression(), device),
714 m_arg2Impl(op.arg2Expression(), device),
715 m_arg3Impl(op.arg3Expression(), device)
719 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::StorageKind,
720 typename internal::traits<Arg2Type>::StorageKind>::value),
721 STORAGE_KIND_MUST_MATCH)
722 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::StorageKind,
723 typename internal::traits<Arg3Type>::StorageKind>::value),
724 STORAGE_KIND_MUST_MATCH)
725 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::Index,
726 typename internal::traits<Arg2Type>::Index>::value),
727 STORAGE_INDEX_MUST_MATCH)
728 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::Index,
729 typename internal::traits<Arg3Type>::Index>::value),
730 STORAGE_INDEX_MUST_MATCH)
732 eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
735 typedef typename XprType::Index Index;
736 typedef typename XprType::Scalar Scalar;
737 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
738 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
739 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
741 typedef StorageMemory<CoeffReturnType, Device> Storage;
742 typedef typename Storage::Type EvaluatorPointerType;
745 typedef internal::TensorBlockNotImplemented TensorBlock;
748 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const 751 return m_arg1Impl.dimensions();
754 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
755 m_arg1Impl.evalSubExprsIfNeeded(NULL);
756 m_arg2Impl.evalSubExprsIfNeeded(NULL);
757 m_arg3Impl.evalSubExprsIfNeeded(NULL);
760 EIGEN_STRONG_INLINE
void cleanup() {
761 m_arg1Impl.cleanup();
762 m_arg2Impl.cleanup();
763 m_arg3Impl.cleanup();
766 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 768 return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
770 template<
int LoadMode>
771 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 773 return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index),
774 m_arg2Impl.template packet<LoadMode>(index),
775 m_arg3Impl.template packet<LoadMode>(index));
778 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
779 costPerCoeff(
bool vectorized)
const {
780 const double functor_cost = internal::functor_traits<TernaryOp>::Cost;
781 return m_arg1Impl.costPerCoeff(vectorized) +
782 m_arg2Impl.costPerCoeff(vectorized) +
783 m_arg3Impl.costPerCoeff(vectorized) +
784 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
787 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
789 #ifdef EIGEN_USE_SYCL 791 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
792 m_arg1Impl.bind(cgh);
793 m_arg2Impl.bind(cgh);
794 m_arg3Impl.bind(cgh);
799 const TernaryOp m_functor;
808 template<
typename IfArgType,
typename ThenArgType,
typename ElseArgType,
typename Device>
809 struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
811 typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
812 typedef typename XprType::Scalar Scalar;
819 PacketType<Scalar, Device>::HasBlend,
832 : m_condImpl(op.ifExpression(), device),
833 m_thenImpl(op.thenExpression(), device),
834 m_elseImpl(op.elseExpression(), device)
838 eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions()));
839 eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions()));
842 typedef typename XprType::Index Index;
843 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
844 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
845 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
847 typedef StorageMemory<CoeffReturnType, Device> Storage;
848 typedef typename Storage::Type EvaluatorPointerType;
850 static const int NumDims = internal::array_size<Dimensions>::value;
853 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
854 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
856 typedef typename TensorEvaluator<const IfArgType, Device>::TensorBlock
858 typedef typename TensorEvaluator<const ThenArgType, Device>::TensorBlock
860 typedef typename TensorEvaluator<const ElseArgType, Device>::TensorBlock
863 struct TensorSelectOpBlockFactory {
864 template <
typename IfArgXprType,
typename ThenArgXprType,
typename ElseArgXprType>
866 typedef TensorSelectOp<const IfArgXprType, const ThenArgXprType, const ElseArgXprType> type;
869 template <
typename IfArgXprType,
typename ThenArgXprType,
typename ElseArgXprType>
870 typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type expr(
871 const IfArgXprType& if_expr,
const ThenArgXprType& then_expr,
const ElseArgXprType& else_expr)
const {
872 return typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type(if_expr, then_expr, else_expr);
876 typedef internal::TensorTernaryExprBlock<TensorSelectOpBlockFactory,
877 IfArgTensorBlock, ThenArgTensorBlock,
882 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const 885 return m_condImpl.dimensions();
888 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
889 m_condImpl.evalSubExprsIfNeeded(NULL);
890 m_thenImpl.evalSubExprsIfNeeded(NULL);
891 m_elseImpl.evalSubExprsIfNeeded(NULL);
895 #ifdef EIGEN_USE_THREADS 896 template <
typename EvalSubExprsCallback>
897 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
898 EvaluatorPointerType, EvalSubExprsCallback done) {
899 m_condImpl.evalSubExprsIfNeeded(
nullptr, [
this, done](
bool) {
900 m_thenImpl.evalSubExprsIfNeeded(
nullptr, [
this, done](
bool) {
901 m_elseImpl.evalSubExprsIfNeeded(
nullptr, [done](
bool) { done(
true); });
905 #endif // EIGEN_USE_THREADS 907 EIGEN_STRONG_INLINE
void cleanup() {
908 m_condImpl.cleanup();
909 m_thenImpl.cleanup();
910 m_elseImpl.cleanup();
913 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 915 return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
917 template<
int LoadMode>
918 EIGEN_DEVICE_FUNC PacketReturnType packet(Index index)
const 920 internal::Selector<PacketSize> select;
922 for (Index i = 0; i < PacketSize; ++i) {
923 select.select[i] = m_condImpl.coeff(index+i);
925 return internal::pblend(select,
926 m_thenImpl.template packet<LoadMode>(index),
927 m_elseImpl.template packet<LoadMode>(index));
931 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
932 costPerCoeff(
bool vectorized)
const {
933 return m_condImpl.costPerCoeff(vectorized) +
934 m_thenImpl.costPerCoeff(vectorized)
935 .cwiseMax(m_elseImpl.costPerCoeff(vectorized));
938 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
939 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
940 auto then_req = m_thenImpl.getResourceRequirements();
941 auto else_req = m_elseImpl.getResourceRequirements();
944 internal::TensorBlockResourceRequirements::merge(then_req, else_req);
945 merged_req.cost_per_coeff =
946 then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff);
948 return internal::TensorBlockResourceRequirements::merge(
949 m_condImpl.getResourceRequirements(), merged_req);
952 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
953 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
954 bool =
false)
const {
957 desc.DropDestinationBuffer();
960 m_condImpl.block(desc, scratch), m_thenImpl.block(desc, scratch),
961 m_elseImpl.block(desc, scratch), TensorSelectOpBlockFactory());
964 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data()
const {
return NULL; }
966 #ifdef EIGEN_USE_SYCL 968 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
969 m_condImpl.bind(cgh);
970 m_thenImpl.bind(cgh);
971 m_elseImpl.bind(cgh);
983 #endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
Namespace containing all symbols from the Eigen library.
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:28
A tensor expression mapping an existing array of data.
Definition: TensorForwardDeclarations.h:52