10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H 23 template<
typename Str
ides,
typename XprType>
24 struct traits<TensorStridingOp<Strides, XprType> > :
public traits<XprType>
26 typedef typename XprType::Scalar Scalar;
27 typedef traits<XprType> XprTraits;
28 typedef typename XprTraits::StorageKind StorageKind;
29 typedef typename XprTraits::Index
Index;
30 typedef typename XprType::Nested Nested;
31 typedef typename remove_reference<Nested>::type _Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
34 typedef typename XprTraits::PointerType PointerType;
37 template<
typename Str
ides,
typename XprType>
38 struct eval<TensorStridingOp<Strides, XprType>,
Eigen::Dense>
40 typedef const TensorStridingOp<Strides, XprType>EIGEN_DEVICE_REF type;
43 template<
typename Str
ides,
typename XprType>
44 struct nested<TensorStridingOp<Strides, XprType>, 1, typename eval<TensorStridingOp<Strides, XprType> >::type>
46 typedef TensorStridingOp<Strides, XprType> type;
53 template<
typename Str
ides,
typename XprType>
54 class TensorStridingOp :
public TensorBase<TensorStridingOp<Strides, XprType> >
57 typedef TensorBase<TensorStridingOp<Strides, XprType> > Base;
58 typedef typename Eigen::internal::traits<TensorStridingOp>::Scalar Scalar;
60 typedef typename XprType::CoeffReturnType CoeffReturnType;
61 typedef typename Eigen::internal::nested<TensorStridingOp>::type Nested;
62 typedef typename Eigen::internal::traits<TensorStridingOp>::StorageKind StorageKind;
63 typedef typename Eigen::internal::traits<TensorStridingOp>::Index
Index;
65 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp(
const XprType& expr,
const Strides& dims)
66 : m_xpr(expr), m_dims(dims) {}
69 const Strides& strides()
const {
return m_dims; }
72 const typename internal::remove_all<typename XprType::Nested>::type&
73 expression()
const {
return m_xpr; }
75 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingOp)
78 typename XprType::Nested m_xpr;
84 template<
typename Str
ides,
typename ArgType,
typename Device>
85 struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
87 typedef TensorStridingOp<Strides, ArgType> XprType;
88 typedef typename XprType::Index
Index;
89 static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
90 typedef DSizes<Index, NumDims> Dimensions;
91 typedef typename XprType::Scalar Scalar;
92 typedef typename XprType::CoeffReturnType CoeffReturnType;
93 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
94 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
95 typedef StorageMemory<CoeffReturnType, Device> Storage;
96 typedef typename Storage::Type EvaluatorPointerType;
100 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
102 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
103 Layout = TensorEvaluator<ArgType, Device>::Layout,
109 typedef internal::TensorBlockNotImplemented TensorBlock;
112 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
113 : m_impl(op.expression(), device)
115 m_dimensions = m_impl.dimensions();
116 for (
int i = 0; i < NumDims; ++i) {
117 m_dimensions[i] =Eigen::numext::ceil(static_cast<float>(m_dimensions[i]) / op.strides()[i]);
120 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
121 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
122 m_outputStrides[0] = 1;
123 m_inputStrides[0] = 1;
124 for (
int i = 1; i < NumDims; ++i) {
125 m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
126 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
127 m_inputStrides[i-1] *= op.strides()[i-1];
129 m_inputStrides[NumDims-1] *= op.strides()[NumDims-1];
131 m_outputStrides[NumDims-1] = 1;
132 m_inputStrides[NumDims-1] = 1;
133 for (
int i = NumDims - 2; i >= 0; --i) {
134 m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
135 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
136 m_inputStrides[i+1] *= op.strides()[i+1];
138 m_inputStrides[0] *= op.strides()[0];
143 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
145 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
146 m_impl.evalSubExprsIfNeeded(NULL);
149 EIGEN_STRONG_INLINE
void cleanup() {
153 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const 155 return m_impl.coeff(srcCoeff(index));
158 template<
int LoadMode>
159 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 161 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
162 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
164 Index inputIndices[] = {0, 0};
165 Index indices[] = {index, index + PacketSize - 1};
166 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
168 for (
int i = NumDims - 1; i > 0; --i) {
169 const Index idx0 = indices[0] / m_outputStrides[i];
170 const Index idx1 = indices[1] / m_outputStrides[i];
171 inputIndices[0] += idx0 * m_inputStrides[i];
172 inputIndices[1] += idx1 * m_inputStrides[i];
173 indices[0] -= idx0 * m_outputStrides[i];
174 indices[1] -= idx1 * m_outputStrides[i];
176 inputIndices[0] += indices[0] * m_inputStrides[0];
177 inputIndices[1] += indices[1] * m_inputStrides[0];
180 for (
int i = 0; i < NumDims - 1; ++i) {
181 const Index idx0 = indices[0] / m_outputStrides[i];
182 const Index idx1 = indices[1] / m_outputStrides[i];
183 inputIndices[0] += idx0 * m_inputStrides[i];
184 inputIndices[1] += idx1 * m_inputStrides[i];
185 indices[0] -= idx0 * m_outputStrides[i];
186 indices[1] -= idx1 * m_outputStrides[i];
188 inputIndices[0] += indices[0] * m_inputStrides[NumDims-1];
189 inputIndices[1] += indices[1] * m_inputStrides[NumDims-1];
191 if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
192 PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
196 EIGEN_ALIGN_MAX
typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
197 values[0] = m_impl.coeff(inputIndices[0]);
198 values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
200 for (
int i = 1; i < PacketSize-1; ++i) {
201 values[i] = coeff(index+i);
203 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
208 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
209 double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() +
210 TensorOpCost::MulCost<Index>() +
211 TensorOpCost::DivCost<Index>()) +
212 TensorOpCost::MulCost<Index>();
216 const int innerDim = (
static_cast<int>(Layout) == static_cast<int>(
ColMajor)) ? 0 : (NumDims - 1);
217 return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) +
219 TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
222 EIGEN_DEVICE_FUNC
typename Storage::Type data()
const {
return NULL; }
224 #ifdef EIGEN_USE_SYCL 226 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
231 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index)
const 233 Index inputIndex = 0;
234 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
236 for (
int i = NumDims - 1; i > 0; --i) {
237 const Index idx = index / m_outputStrides[i];
238 inputIndex += idx * m_inputStrides[i];
239 index -= idx * m_outputStrides[i];
241 inputIndex += index * m_inputStrides[0];
244 for (
int i = 0; i < NumDims - 1; ++i) {
245 const Index idx = index / m_outputStrides[i];
246 inputIndex += idx * m_inputStrides[i];
247 index -= idx * m_outputStrides[i];
249 inputIndex += index * m_inputStrides[NumDims-1];
254 Dimensions m_dimensions;
255 array<Index, NumDims> m_outputStrides;
256 array<Index, NumDims> m_inputStrides;
257 TensorEvaluator<ArgType, Device> m_impl;
261 template<
typename Str
ides,
typename ArgType,
typename Device>
262 struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
263 :
public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
265 typedef TensorStridingOp<Strides, ArgType> XprType;
266 typedef TensorEvaluator<const XprType, Device> Base;
268 static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
273 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
274 PreferBlockAccess =
false,
275 Layout = TensorEvaluator<ArgType, Device>::Layout,
280 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
281 : Base(op, device) { }
283 typedef typename XprType::Index
Index;
284 typedef typename XprType::Scalar Scalar;
285 typedef typename XprType::CoeffReturnType CoeffReturnType;
286 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
287 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
289 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
291 return this->m_impl.coeffRef(this->srcCoeff(index));
294 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
295 void writePacket(Index index,
const PacketReturnType& x)
297 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
298 eigen_assert(index+PacketSize-1 < this->dimensions().TotalSize());
300 Index inputIndices[] = {0, 0};
301 Index indices[] = {index, index + PacketSize - 1};
302 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
304 for (
int i = NumDims - 1; i > 0; --i) {
305 const Index idx0 = indices[0] / this->m_outputStrides[i];
306 const Index idx1 = indices[1] / this->m_outputStrides[i];
307 inputIndices[0] += idx0 * this->m_inputStrides[i];
308 inputIndices[1] += idx1 * this->m_inputStrides[i];
309 indices[0] -= idx0 * this->m_outputStrides[i];
310 indices[1] -= idx1 * this->m_outputStrides[i];
312 inputIndices[0] += indices[0] * this->m_inputStrides[0];
313 inputIndices[1] += indices[1] * this->m_inputStrides[0];
316 for (
int i = 0; i < NumDims - 1; ++i) {
317 const Index idx0 = indices[0] / this->m_outputStrides[i];
318 const Index idx1 = indices[1] / this->m_outputStrides[i];
319 inputIndices[0] += idx0 * this->m_inputStrides[i];
320 inputIndices[1] += idx1 * this->m_inputStrides[i];
321 indices[0] -= idx0 * this->m_outputStrides[i];
322 indices[1] -= idx1 * this->m_outputStrides[i];
324 inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1];
325 inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1];
327 if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
328 this->m_impl.template writePacket<Unaligned>(inputIndices[0], x);
331 EIGEN_ALIGN_MAX Scalar values[PacketSize];
332 internal::pstore<Scalar, PacketReturnType>(values, x);
333 this->m_impl.coeffRef(inputIndices[0]) = values[0];
334 this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1];
336 for (
int i = 1; i < PacketSize-1; ++i) {
337 this->coeffRef(index+i) = values[i];
346 #endif // EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index