35 Triplet (std::size_t row, std::size_t col, T
const& value);
48 void allocate (std::size_t rows, std::size_t cols);
49 void reserve (std::size_t num_elements);
50 void set_from_triplets (
Triplets const& triplets);
51 void mult_diagonal (T
const& factor);
52 void cwise_invert (
void);
53 void column_nonzeros (std::size_t col,
DenseVector<T>* vector)
const;
63 std::size_t num_non_zero (
void)
const;
64 std::size_t num_rows (
void)
const;
65 std::size_t num_cols (
void)
const;
69 void debug (
void)
const;
74 std::vector<T> values;
75 std::vector<std::size_t> outer;
76 std::vector<std::size_t> inner;
137 transposed.values.resize(triplets.size());
138 transposed.inner.resize(triplets.size());
141 for (std::size_t i = 0; i < triplets.size(); ++i)
142 transposed.outer[triplets[i].row]++;
146 std::vector<std::size_t> scratch(transposed.outer.size());
147 for (std::size_t i = 0; i < transposed.outer.size(); ++i)
149 std::size_t
const temp = transposed.outer[i];
150 transposed.outer[i] = sum;
156 for (std::size_t i = 0; i < triplets.size(); ++i)
158 Triplet const& t = triplets[i];
159 std::size_t pos = scratch[t.
row]++;
160 transposed.values[pos] = t.
value;
161 transposed.inner[pos] = t.
col;
173 ret.values.resize(this->num_non_zero());
174 ret.inner.resize(this->num_non_zero());
177 for(std::size_t i = 0; i < this->inner.size(); ++i)
178 ret.outer[this->inner[i]] += 1;
182 std::vector<std::size_t> scratch(ret.outer.size());
183 for (std::size_t i = 0; i < ret.outer.size(); ++i)
185 std::size_t
const temp = ret.outer[i];
192 for (std::size_t i = 0; i < this->outer.size() - 1; ++i)
193 for (std::size_t j = this->outer[i]; j < this->outer[i + 1]; ++j)
195 std::size_t pos = scratch[this->inner[j]]++;
197 ret.values[pos] = this->values[j];
207 if (this->rows != rhs.rows || this->cols != rhs.cols)
208 throw std::invalid_argument(
"Incompatible matrix dimensions");
213 std::size_t num_outer = this->outer.size() - 1;
214 for (std::size_t outer = 0; outer < num_outer; ++outer)
216 ret.outer[outer] = ret.values.size();
218 std::size_t i1 = this->outer[outer];
219 std::size_t i2 = rhs.outer[outer];
220 std::size_t
const i1_end = this->outer[outer + 1];
221 std::size_t
const i2_end = rhs.outer[outer + 1];
222 while (i1 < i1_end || i2 < i2_end)
226 ret.values.push_back(-rhs.values[i2]);
227 ret.inner.push_back(rhs.inner[i2]);
233 ret.values.push_back(this->values[i1]);
234 ret.inner.push_back(this->inner[i1]);
239 std::size_t id1 = this->inner[i1];
240 std::size_t id2 = rhs.inner[i2];
243 ret.values.push_back(this->values[i1]);
245 ret.values.push_back(-rhs.values[i2]);
247 ret.values.push_back(this->values[i1] - rhs.values[i2]);
249 i1 +=
static_cast<std::size_t
>(id1 <= id2);
250 i2 +=
static_cast<std::size_t
>(id2 <= id1);
251 ret.inner.push_back(std::min(id1, id2));
254 ret.outer.back() = ret.values.size();
274 if (this->cols != rhs.rows)
275 throw std::invalid_argument(
"Incompatible matrix dimensions");
281 std::vector<T> ret_col(ret.rows, T(0));
282 std::vector<bool> ret_nonzero(ret.rows,
false);
283 for (std::size_t col = 0; col < ret.cols; ++col)
285 ret.outer[col] = ret.values.size();
287 std::fill(ret_col.begin(), ret_col.end(), T(0));
288 std::fill(ret_nonzero.begin(), ret_nonzero.end(),
false);
289 std::size_t rhs_col_begin = rhs.outer[col];
290 std::size_t rhs_col_end = rhs.outer[col + 1];
291 for (std::size_t i = rhs_col_begin; i < rhs_col_end; ++i)
293 T
const& rhs_col_value = rhs.values[i];
294 std::size_t
const lhs_col = rhs.inner[i];
295 std::size_t
const lhs_col_begin = this->outer[lhs_col];
296 std::size_t
const lhs_col_end = this->outer[lhs_col + 1];
298 for (std::size_t j = lhs_col_begin; j < lhs_col_end; ++j)
300 std::size_t
const id = this->inner[j];
301 ret_col[id] += this->values[j] * rhs_col_value;
302 ret_nonzero[id] =
true;
305 for (std::size_t i = 0; i < ret.rows; ++i)
308 ret.inner.push_back(i);
309 ret.values.push_back(ret_col[i]);
312 ret.outer[ret.cols] = ret.values.size();
321 if (this->cols != rhs.rows)
322 throw std::invalid_argument(
"Incompatible matrix dimensions");
324 std::size_t nnz = this->num_non_zero() + rhs.
num_non_zero();
327 std::fill(ret.outer.begin(), ret.outer.end(), 0);
329 std::size_t
const chunk_size = 64;
330 std::size_t
const num_chunks = ret.cols / chunk_size
331 + (ret.cols % chunk_size != 0);
332 std::size_t
const max_threads = std::max(1u,
333 std::thread::hardware_concurrency());
334 std::size_t
const num_threads = std::min(num_chunks, max_threads);
336#pragma omp parallel num_threads(num_threads)
339 std::vector<T> ret_col(ret.rows, T(0));
340 std::vector<bool> ret_nonzero(ret.rows,
false);
341 std::vector<T> thread_values;
342 thread_values.reserve(nnz / num_chunks);
343 std::vector<std::size_t> thread_inner;
344 thread_inner.reserve(nnz / num_chunks);
346#pragma omp for ordered schedule(static, 1)
347 for (std::size_t chunk = 0; chunk < num_chunks; ++chunk)
349 thread_inner.clear();
350 thread_values.clear();
352 std::size_t
const begin = chunk * chunk_size;
353 std::size_t
const end = std::min(begin + chunk_size, ret.cols);
354 for (std::size_t col = begin; col < end; ++col)
356 std::fill(ret_col.begin(), ret_col.end(), T(0));
357 std::fill(ret_nonzero.begin(), ret_nonzero.end(),
false);
358 std::size_t
const rhs_col_begin = rhs.outer[col];
359 std::size_t
const rhs_col_end = rhs.outer[col + 1];
360 for (std::size_t i = rhs_col_begin; i < rhs_col_end; ++i)
362 T
const& rhs_col_value = rhs.values[i];
363 std::size_t
const lhs_col = rhs.inner[i];
364 std::size_t
const lhs_col_begin = this->outer[lhs_col];
365 std::size_t
const lhs_col_end = this->outer[lhs_col + 1];
367 for (std::size_t j = lhs_col_begin; j < lhs_col_end; ++j)
369 std::size_t
const id = this->inner[j];
370 ret_col[id] += this->values[j] * rhs_col_value;
371 ret_nonzero[id] =
true;
374 for (std::size_t i = 0; i < ret.rows; ++i)
377 ret.outer[col + 1] += 1;
378 thread_inner.push_back(i);
379 thread_values.push_back(ret_col[i]);
385 ret.inner.insert(ret.inner.end(),
386 thread_inner.begin(), thread_inner.end());
387 ret.values.insert(ret.values.end(),
388 thread_values.begin(), thread_values.end());
393 for (std::size_t col = 0; col < ret.cols; ++col)
394 ret.outer[col + 1] += ret.outer[col];
417 std::size_t
const diag_size = std::min(this->rows, this->cols);
420 for (std::size_t i = 0; i < diag_size; ++i)
422 ret.outer[i] = ret.values.size();
423 for (std::size_t j = this->outer[i]; j < this->outer[i + 1]; ++j)
424 if (this->inner[j] == i)
426 ret.inner.push_back(i);
427 ret.values.push_back(this->values[j]);
429 else if (this->inner[j] > i)
432 ret.outer[diag_size] = ret.values.size();
508 std::cout <<
"SparseMatrix ("
509 << this->rows <<
" rows, " << this->cols <<
" cols, "
510 << this->num_non_zero() <<
" values)" << std::endl;
511 std::cout <<
" Values:";
512 for (std::size_t i = 0; i < this->values.size(); ++i)
513 std::cout <<
" " << this->values[i];
514 std::cout << std::endl <<
" Inner:";
515 for (std::size_t i = 0; i < this->inner.size(); ++i)
516 std::cout <<
" " << this->inner[i];
517 std::cout << std::endl <<
" Outer:";
518 for (std::size_t i = 0; i < this->outer.size(); ++i)
519 std::cout <<
" " << this->outer[i];
520 std::cout << std::endl;