10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H 23 template<
typename Shuffle,
typename XprType>
24 struct traits<TensorShufflingOp<Shuffle, XprType> > :
public traits<XprType>
26 typedef typename XprType::Scalar Scalar;
27 typedef traits<XprType> XprTraits;
28 typedef typename XprTraits::StorageKind StorageKind;
29 typedef typename XprTraits::Index
Index;
30 typedef typename XprType::Nested Nested;
31 typedef typename remove_reference<Nested>::type _Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
34 typedef typename XprTraits::PointerType PointerType;
37 template<
typename Shuffle,
typename XprType>
38 struct eval<TensorShufflingOp<Shuffle, XprType>,
Eigen::Dense>
40 typedef const TensorShufflingOp<Shuffle, XprType>& type;
43 template<
typename Shuffle,
typename XprType>
44 struct nested<TensorShufflingOp<Shuffle, XprType>, 1, typename eval<TensorShufflingOp<Shuffle, XprType> >::type>
46 typedef TensorShufflingOp<Shuffle, XprType> type;
53 template<
typename Shuffle,
typename XprType>
54 class TensorShufflingOp :
public TensorBase<TensorShufflingOp<Shuffle, XprType> >
57 typedef TensorBase<TensorShufflingOp<Shuffle, XprType> > Base;
58 typedef typename Eigen::internal::traits<TensorShufflingOp>::Scalar Scalar;
60 typedef typename XprType::CoeffReturnType CoeffReturnType;
61 typedef typename Eigen::internal::nested<TensorShufflingOp>::type Nested;
62 typedef typename Eigen::internal::traits<TensorShufflingOp>::StorageKind StorageKind;
63 typedef typename Eigen::internal::traits<TensorShufflingOp>::Index
Index;
65 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(
const XprType& expr,
const Shuffle& shfl)
66 : m_xpr(expr), m_shuffle(shfl) {}
69 const Shuffle& shufflePermutation()
const {
return m_shuffle; }
72 const typename internal::remove_all<typename XprType::Nested>::type&
73 expression()
const {
return m_xpr; }
75 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorShufflingOp)
79 typename XprType::Nested m_xpr;
80 const Shuffle m_shuffle;
85 template<
typename Shuffle,
typename ArgType,
typename Device>
86 struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
88 typedef TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> Self;
89 typedef TensorShufflingOp<Shuffle, ArgType> XprType;
90 typedef typename XprType::Index
Index;
91 static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
92 typedef DSizes<Index, NumDims> Dimensions;
93 typedef typename XprType::Scalar Scalar;
94 typedef typename XprType::CoeffReturnType CoeffReturnType;
95 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
96 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
97 typedef StorageMemory<CoeffReturnType, Device> Storage;
98 typedef typename Storage::Type EvaluatorPointerType;
102 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
103 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
104 PreferBlockAccess =
true,
105 Layout = TensorEvaluator<ArgType, Device>::Layout,
110 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
113 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
114 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
116 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
121 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
123 m_impl(op.expression(), device)
125 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
126 const Shuffle& shuffle = op.shufflePermutation();
127 m_is_identity =
true;
128 for (
int i = 0; i < NumDims; ++i) {
129 m_shuffle[i] =
static_cast<int>(shuffle[i]);
130 m_dimensions[i] = input_dims[shuffle[i]];
131 m_inverseShuffle[shuffle[i]] = i;
132 if (m_is_identity && shuffle[i] != i) {
133 m_is_identity =
false;
137 if (static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
138 m_unshuffledInputStrides[0] = 1;
139 m_outputStrides[0] = 1;
141 for (
int i = 1; i < NumDims; ++i) {
142 m_unshuffledInputStrides[i] =
143 m_unshuffledInputStrides[i - 1] * input_dims[i - 1];
144 m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
145 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(
146 m_outputStrides[i] > 0 ? m_outputStrides[i] :
Index(1));
149 m_unshuffledInputStrides[NumDims - 1] = 1;
150 m_outputStrides[NumDims - 1] = 1;
151 for (
int i = NumDims - 2; i >= 0; --i) {
152 m_unshuffledInputStrides[i] =
153 m_unshuffledInputStrides[i + 1] * input_dims[i + 1];
154 m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
155 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(
156 m_outputStrides[i] > 0 ? m_outputStrides[i] :
Index(1));
160 for (
int i = 0; i < NumDims; ++i) {
161 m_inputStrides[i] = m_unshuffledInputStrides[shuffle[i]];
165 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
167 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType ) {
168 m_impl.evalSubExprsIfNeeded(NULL);
172 #ifdef EIGEN_USE_THREADS 173 template <
typename EvalSubExprsCallback>
174 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
175 EvaluatorPointerType, EvalSubExprsCallback done) {
176 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
178 #endif // EIGEN_USE_THREADS 180 EIGEN_STRONG_INLINE
void cleanup() {
184 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const 187 return m_impl.coeff(index);
189 return m_impl.coeff(srcCoeff(index));
193 template <
int LoadMode,
typename Self,
bool ImplPacketAccess>
194 struct PacketLoader {
195 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
196 static PacketReturnType Run(
const Self&
self, Index index) {
197 EIGEN_ALIGN_MAX
typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
199 for (
int i = 0; i < PacketSize; ++i) {
200 values[i] =
self.coeff(index + i);
202 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
207 template<
int LoadMode,
typename Self>
208 struct PacketLoader<LoadMode, Self, true> {
209 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
210 static PacketReturnType Run(
const Self&
self, Index index) {
211 if (
self.m_is_identity) {
212 return self.m_impl.template packet<LoadMode>(index);
214 EIGEN_ALIGN_MAX
typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
216 for (
int i = 0; i < PacketSize; ++i) {
217 values[i] =
self.coeff(index + i);
219 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
225 template<
int LoadMode>
226 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 228 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
229 eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
230 return PacketLoader<LoadMode, Self, TensorEvaluator<ArgType, Device>::PacketAccess>::Run(*
this, index);
233 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
234 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
235 static const int inner_dim =
236 Layout ==
static_cast<int>(
ColMajor) ? 0 : NumDims - 1;
238 const size_t target_size = m_device.firstLevelCacheSize();
239 const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim;
246 using BlockRequirements = internal::TensorBlockResourceRequirements;
247 if (inner_dim_shuffled) {
248 return BlockRequirements::uniform<Scalar>(target_size)
249 .addCostPerCoeff({0, 0, NumDims * 28});
251 return BlockRequirements::skewed<Scalar>(target_size);
255 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
256 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
257 bool root_of_expr_ast =
false)
const {
258 assert(m_impl.data() != NULL);
260 typedef internal::TensorBlockIO<ScalarNoConst, Index, NumDims, Layout>
262 typedef typename TensorBlockIO::Dst TensorBlockIODst;
263 typedef typename TensorBlockIO::Src TensorBlockIOSrc;
265 const typename TensorBlock::Storage block_storage =
266 TensorBlock::prepareStorage(
267 desc, scratch, root_of_expr_ast);
269 typename TensorBlockIO::Dimensions input_strides(m_unshuffledInputStrides);
270 TensorBlockIOSrc src(input_strides, m_impl.data(), srcCoeff(desc.offset()));
272 TensorBlockIODst dst(block_storage.dimensions(), block_storage.strides(),
273 block_storage.data());
275 typename TensorBlockIO::DimensionsMap dst_to_src_dim_map(m_shuffle);
276 TensorBlockIO::Copy(dst, src, dst_to_src_dim_map);
278 return block_storage.AsTensorMaterializedBlock();
281 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
282 const double compute_cost = m_is_identity ? TensorOpCost::AddCost<Index>() :
283 NumDims * (2 * TensorOpCost::AddCost<Index>() +
284 2 * TensorOpCost::MulCost<Index>() +
285 TensorOpCost::DivCost<Index>());
286 return m_impl.costPerCoeff(vectorized) +
287 TensorOpCost(0, 0, compute_cost, m_is_identity , PacketSize);
290 EIGEN_DEVICE_FUNC
typename Storage::Type data()
const {
return NULL; }
292 #ifdef EIGEN_USE_SYCL 294 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
299 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index GetBlockOutputIndex(
301 const DSizes<Index, NumDims>& input_block_strides,
302 const DSizes<Index, NumDims>& output_block_strides,
303 const DSizes<internal::TensorIntDivisor<Index>, NumDims>& fast_input_block_strides)
const {
304 Index output_index = 0;
305 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
306 for (
int i = NumDims - 1; i > 0; --i) {
307 const Index idx = input_index / fast_input_block_strides[i];
308 output_index += idx * output_block_strides[m_inverseShuffle[i]];
309 input_index -= idx * input_block_strides[i];
311 return output_index + input_index *
312 output_block_strides[m_inverseShuffle[0]];
314 for (
int i = 0; i < NumDims - 1; ++i) {
315 const Index idx = input_index / fast_input_block_strides[i];
316 output_index += idx * output_block_strides[m_inverseShuffle[i]];
317 input_index -= idx * input_block_strides[i];
319 return output_index + input_index *
320 output_block_strides[m_inverseShuffle[NumDims - 1]];
324 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index)
const {
325 Index inputIndex = 0;
326 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
327 for (
int i = NumDims - 1; i > 0; --i) {
328 const Index idx = index / m_fastOutputStrides[i];
329 inputIndex += idx * m_inputStrides[i];
330 index -= idx * m_outputStrides[i];
332 return inputIndex + index * m_inputStrides[0];
334 for (
int i = 0; i < NumDims - 1; ++i) {
335 const Index idx = index / m_fastOutputStrides[i];
336 inputIndex += idx * m_inputStrides[i];
337 index -= idx * m_outputStrides[i];
339 return inputIndex + index * m_inputStrides[NumDims - 1];
343 Dimensions m_dimensions;
345 array<int, NumDims> m_shuffle;
346 array<Index, NumDims> m_inverseShuffle;
347 array<Index, NumDims> m_outputStrides;
348 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
349 array<Index, NumDims> m_inputStrides;
350 array<Index, NumDims> m_unshuffledInputStrides;
352 const Device EIGEN_DEVICE_REF m_device;
353 TensorEvaluator<ArgType, Device> m_impl;
358 template<
typename Shuffle,
typename ArgType,
typename Device>
359 struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
360 :
public TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
362 typedef TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> Base;
364 typedef TensorShufflingOp<Shuffle, ArgType> XprType;
365 typedef typename XprType::Index
Index;
366 static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
367 typedef DSizes<Index, NumDims> Dimensions;
368 typedef typename XprType::Scalar Scalar;
369 typedef typename XprType::CoeffReturnType CoeffReturnType;
370 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
371 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
375 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
376 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
377 PreferBlockAccess =
true,
378 Layout = TensorEvaluator<ArgType, Device>::Layout,
382 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
385 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
388 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
392 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
394 return this->m_impl.coeffRef(this->srcCoeff(index));
397 template <
int StoreMode> EIGEN_STRONG_INLINE
398 void writePacket(Index index,
const PacketReturnType& x)
400 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
402 EIGEN_ALIGN_MAX
typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
403 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
405 for (
int i = 0; i < PacketSize; ++i) {
406 this->coeffRef(index+i) = values[i];
410 template <
typename TensorBlock>
411 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writeBlock(
412 const TensorBlockDesc& desc,
const TensorBlock& block) {
413 eigen_assert(this->m_impl.data() != NULL);
415 typedef internal::TensorBlockIO<ScalarNoConst, Index, NumDims, Layout>
417 typedef typename TensorBlockIO::Dst TensorBlockIODst;
418 typedef typename TensorBlockIO::Src TensorBlockIOSrc;
420 const Scalar* block_buffer = block.data();
425 if (block_buffer == NULL) {
426 mem = this->m_device.allocate(desc.size() *
sizeof(Scalar));
427 ScalarNoConst* buf =
static_cast<ScalarNoConst*
>(mem);
429 typedef internal::TensorBlockAssignment<
430 ScalarNoConst, NumDims,
typename TensorBlock::XprType, Index>
431 TensorBlockAssignment;
433 TensorBlockAssignment::Run(
434 TensorBlockAssignment::target(
435 desc.dimensions(), internal::strides<Layout>(desc.dimensions()),
443 TensorBlockIOSrc src(internal::strides<Layout>(desc.dimensions()),
447 typename TensorBlockIO::Dimensions output_strides(
448 this->m_unshuffledInputStrides);
449 typename TensorBlockIO::Dimensions output_dimensions;
450 for (
int i = 0; i < NumDims; ++i) {
451 output_dimensions[this->m_shuffle[i]] = desc.dimension(i);
453 TensorBlockIODst dst(output_dimensions, output_strides, this->m_impl.data(),
454 this->srcCoeff(desc.offset()));
457 typename TensorBlockIO::DimensionsMap dst_to_src_dim_map;
458 for (
int i = 0; i < NumDims; ++i) {
459 dst_to_src_dim_map[i] =
static_cast<int>(this->m_inverseShuffle[i]);
461 TensorBlockIO::Copy(dst, src, dst_to_src_dim_map);
464 if (mem != NULL) this->m_device.deallocate(mem);
471 #endif // EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index