1 2 /* 3 Defines the basic matrix operations for the SELL matrix storage format. 4 */ 5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blocktranspose.h> 8 9 static PetscBool cited = PETSC_FALSE; 10 static const char citation[] = 11 "@inproceedings{ZhangELLPACK2018,\n" 12 " author = {Hong Zhang and Richard T. Mills and Karl Rupp and Barry F. Smith},\n" 13 " title = {Vectorized Parallel Sparse Matrix-Vector Multiplication in {PETSc} Using {AVX-512}},\n" 14 " booktitle = {Proceedings of the 47th International Conference on Parallel Processing},\n" 15 " year = 2018\n" 16 "}\n"; 17 18 #if defined(PETSC_HAVE_IMMINTRIN_H) && (defined(__AVX512F__) || (defined(__AVX2__) && defined(__FMA__)) || defined(__AVX__)) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 19 20 #include <immintrin.h> 21 22 #if !defined(_MM_SCALE_8) 23 #define _MM_SCALE_8 8 24 #endif 25 26 #if defined(__AVX512F__) 27 /* these do not work 28 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 29 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 30 */ 31 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 32 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 33 vec_idx = _mm256_loadu_si256((__m256i const*)acolidx); \ 34 vec_vals = _mm512_loadu_pd(aval); \ 35 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 36 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y) 37 #elif defined(__AVX2__) && defined(__FMA__) 38 #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 39 vec_vals = _mm256_loadu_pd(aval); \ 40 vec_idx = _mm_loadu_si128((__m128i const*)acolidx); /* SSE2 */ \ 41 vec_x = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \ 42 vec_y = _mm256_fmadd_pd(vec_x,vec_vals,vec_y) 43 #endif 44 #endif /* PETSC_HAVE_IMMINTRIN_H */ 45 46 /*@C 47 MatSeqSELLSetPreallocation - For good matrix assembly performance 48 the user should preallocate the matrix storage by setting the parameter nz 49 (or the array nnz). By setting these parameters accurately, performance 50 during matrix assembly can be increased significantly. 51 52 Collective 53 54 Input Parameters: 55 + B - The matrix 56 . nz - number of nonzeros per row (same for all rows) 57 - nnz - array containing the number of nonzeros in the various rows 58 (possibly different for each row) or NULL 59 60 Notes: 61 If nnz is given then nz is ignored. 62 63 Specify the preallocated storage with either nz or nnz (not both). 64 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 65 allocation. For large problems you MUST preallocate memory or you 66 will get TERRIBLE performance, see the users' manual chapter on matrices. 67 68 You can call MatGetInfo() to get information on how effective the preallocation was; 69 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 70 You can also run with the option -info and look for messages with the string 71 malloc in them to see if additional memory allocation was needed. 72 73 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 74 entries or columns indices. 75 76 The maximum number of nonzeos in any row should be as accurate as possible. 77 If it is underestimated, you will get bad performance due to reallocation 78 (MatSeqXSELLReallocateSELL). 79 80 Level: intermediate 81 82 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo() 83 84 @*/ 85 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[]) 86 { 87 PetscErrorCode ierr; 88 89 PetscFunctionBegin; 90 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 91 PetscValidType(B,1); 92 ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 96 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[]) 97 { 98 Mat_SeqSELL *b; 99 PetscInt i,j,totalslices; 100 PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE; 101 PetscErrorCode ierr; 102 103 PetscFunctionBegin; 104 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 105 if (maxallocrow == MAT_SKIP_ALLOCATION) { 106 skipallocation = PETSC_TRUE; 107 maxallocrow = 0; 108 } 109 110 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 111 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 112 113 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 114 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 115 PetscCheckFalse(maxallocrow < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %" PetscInt_FMT,maxallocrow); 116 if (rlen) { 117 for (i=0; i<B->rmap->n; i++) { 118 PetscCheckFalse(rlen[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,rlen[i]); 119 PetscCheckFalse(rlen[i] > B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,rlen[i],B->cmap->n); 120 } 121 } 122 123 B->preallocated = PETSC_TRUE; 124 125 b = (Mat_SeqSELL*)B->data; 126 127 totalslices = PetscCeilInt(B->rmap->n,8); 128 b->totalslices = totalslices; 129 if (!skipallocation) { 130 if (B->rmap->n & 0x07) {ierr = PetscInfo(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %" PetscInt_FMT ")\n",B->rmap->n);CHKERRQ(ierr);} 131 132 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 133 ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr); 134 ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 135 } 136 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 137 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 138 else if (maxallocrow < 0) maxallocrow = 1; 139 for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow; 140 } else { 141 maxallocrow = 0; 142 b->sliidx[0] = 0; 143 for (i=1; i<totalslices; i++) { 144 b->sliidx[i] = 0; 145 for (j=0;j<8;j++) { 146 b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]); 147 } 148 maxallocrow = PetscMax(b->sliidx[i],maxallocrow); 149 ierr = PetscIntSumError(b->sliidx[i-1],8*b->sliidx[i],&b->sliidx[i]);CHKERRQ(ierr); 150 } 151 /* last slice */ 152 b->sliidx[totalslices] = 0; 153 for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]); 154 maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow); 155 b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices]; 156 } 157 158 /* allocate space for val, colidx, rlen */ 159 /* FIXME: should B's old memory be unlogged? */ 160 ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr); 161 /* FIXME: assuming an element of the bit array takes 8 bits */ 162 ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr); 163 ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 164 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 165 ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr); 166 ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr); 167 168 b->singlemalloc = PETSC_TRUE; 169 b->free_val = PETSC_TRUE; 170 b->free_colidx = PETSC_TRUE; 171 } else { 172 b->free_val = PETSC_FALSE; 173 b->free_colidx = PETSC_FALSE; 174 } 175 176 b->nz = 0; 177 b->maxallocrow = maxallocrow; 178 b->rlenmax = maxallocrow; 179 b->maxallocmat = b->sliidx[totalslices]; 180 B->info.nz_unneeded = (double)b->maxallocmat; 181 if (realalloc) { 182 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 183 } 184 PetscFunctionReturn(0); 185 } 186 187 PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 188 { 189 Mat_SeqSELL *a = (Mat_SeqSELL*)A->data; 190 PetscInt shift; 191 192 PetscFunctionBegin; 193 PetscCheckFalse(row < 0 || row >= A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range",row); 194 if (nz) *nz = a->rlen[row]; 195 shift = a->sliidx[row>>3]+(row&0x07); 196 if (!a->getrowcols) { 197 PetscErrorCode ierr; 198 199 ierr = PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);CHKERRQ(ierr); 200 } 201 if (idx) { 202 PetscInt j; 203 for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j]; 204 *idx = a->getrowcols; 205 } 206 if (v) { 207 PetscInt j; 208 for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j]; 209 *v = a->getrowvals; 210 } 211 PetscFunctionReturn(0); 212 } 213 214 PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 215 { 216 PetscFunctionBegin; 217 PetscFunctionReturn(0); 218 } 219 220 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 221 { 222 Mat B; 223 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 224 PetscInt i; 225 PetscErrorCode ierr; 226 227 PetscFunctionBegin; 228 if (reuse == MAT_REUSE_MATRIX) { 229 B = *newmat; 230 ierr = MatZeroEntries(B);CHKERRQ(ierr); 231 } else { 232 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 233 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 234 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 235 ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr); 236 } 237 238 for (i=0; i<A->rmap->n; i++) { 239 PetscInt nz = 0,*cols = NULL; 240 PetscScalar *vals = NULL; 241 242 ierr = MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 243 ierr = MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 244 ierr = MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 245 } 246 247 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 248 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 249 B->rmap->bs = A->rmap->bs; 250 251 if (reuse == MAT_INPLACE_MATRIX) { 252 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 253 } else { 254 *newmat = B; 255 } 256 PetscFunctionReturn(0); 257 } 258 259 #include <../src/mat/impls/aij/seq/aij.h> 260 261 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 262 { 263 Mat B; 264 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 265 PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols; 266 const PetscInt *cols; 267 const PetscScalar *vals; 268 PetscErrorCode ierr; 269 270 PetscFunctionBegin; 271 272 if (reuse == MAT_REUSE_MATRIX) { 273 B = *newmat; 274 } else { 275 if (PetscDefined(USE_DEBUG) || !a->ilen) { 276 ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr); 277 for (i=0; i<m; i++) { 278 rowlengths[i] = ai[i+1] - ai[i]; 279 } 280 } 281 if (PetscDefined(USE_DEBUG) && a->ilen) { 282 PetscBool eq; 283 ierr = PetscMemcmp(rowlengths,a->ilen,m*sizeof(PetscInt),&eq);CHKERRQ(ierr); 284 PetscCheckFalse(!eq,PETSC_COMM_SELF,PETSC_ERR_PLIB,"SeqAIJ ilen array incorrect"); 285 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 286 rowlengths = a->ilen; 287 } else if (a->ilen) rowlengths = a->ilen; 288 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 289 ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr); 290 ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr); 291 ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr); 292 if (rowlengths != a->ilen) {ierr = PetscFree(rowlengths);CHKERRQ(ierr);} 293 } 294 295 for (row=0; row<m; row++) { 296 ierr = MatGetRow_SeqAIJ(A,row,&ncols,(PetscInt**)&cols,(PetscScalar**)&vals);CHKERRQ(ierr); 297 ierr = MatSetValues_SeqSELL(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 298 ierr = MatRestoreRow_SeqAIJ(A,row,&ncols,(PetscInt**)&cols,(PetscScalar**)&vals);CHKERRQ(ierr); 299 } 300 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 301 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 302 B->rmap->bs = A->rmap->bs; 303 304 if (reuse == MAT_INPLACE_MATRIX) { 305 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 306 } else { 307 *newmat = B; 308 } 309 PetscFunctionReturn(0); 310 } 311 312 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy) 313 { 314 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 315 PetscScalar *y; 316 const PetscScalar *x; 317 const MatScalar *aval=a->val; 318 PetscInt totalslices=a->totalslices; 319 const PetscInt *acolidx=a->colidx; 320 PetscInt i,j; 321 PetscErrorCode ierr; 322 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 323 __m512d vec_x,vec_y,vec_vals; 324 __m256i vec_idx; 325 __mmask8 mask; 326 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 327 __m256i vec_idx2,vec_idx3,vec_idx4; 328 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 329 __m128i vec_idx; 330 __m256d vec_x,vec_y,vec_y2,vec_vals; 331 MatScalar yval; 332 PetscInt r,rows_left,row,nnz_in_row; 333 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 334 __m128d vec_x_tmp; 335 __m256d vec_x,vec_y,vec_y2,vec_vals; 336 MatScalar yval; 337 PetscInt r,rows_left,row,nnz_in_row; 338 #else 339 PetscScalar sum[8]; 340 #endif 341 342 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 343 #pragma disjoint(*x,*y,*aval) 344 #endif 345 346 PetscFunctionBegin; 347 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 348 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 349 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 350 for (i=0; i<totalslices; i++) { /* loop over slices */ 351 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 352 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 353 354 vec_y = _mm512_setzero_pd(); 355 vec_y2 = _mm512_setzero_pd(); 356 vec_y3 = _mm512_setzero_pd(); 357 vec_y4 = _mm512_setzero_pd(); 358 359 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 360 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 361 case 3: 362 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 363 acolidx += 8; aval += 8; 364 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 365 acolidx += 8; aval += 8; 366 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 367 acolidx += 8; aval += 8; 368 j += 3; 369 break; 370 case 2: 371 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 372 acolidx += 8; aval += 8; 373 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 374 acolidx += 8; aval += 8; 375 j += 2; 376 break; 377 case 1: 378 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 379 acolidx += 8; aval += 8; 380 j += 1; 381 break; 382 } 383 #pragma novector 384 for (; j<(a->sliidx[i+1]>>3); j+=4) { 385 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 386 acolidx += 8; aval += 8; 387 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 388 acolidx += 8; aval += 8; 389 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 390 acolidx += 8; aval += 8; 391 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 392 acolidx += 8; aval += 8; 393 } 394 395 vec_y = _mm512_add_pd(vec_y,vec_y2); 396 vec_y = _mm512_add_pd(vec_y,vec_y3); 397 vec_y = _mm512_add_pd(vec_y,vec_y4); 398 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 399 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 400 _mm512_mask_storeu_pd(&y[8*i],mask,vec_y); 401 } else { 402 _mm512_storeu_pd(&y[8*i],vec_y); 403 } 404 } 405 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 406 for (i=0; i<totalslices; i++) { /* loop over full slices */ 407 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 408 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 409 410 /* last slice may have padding rows. Don't use vectorization. */ 411 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 412 rows_left = A->rmap->n - 8*i; 413 for (r=0; r<rows_left; ++r) { 414 yval = (MatScalar)0; 415 row = 8*i + r; 416 nnz_in_row = a->rlen[row]; 417 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 418 y[row] = yval; 419 } 420 break; 421 } 422 423 vec_y = _mm256_setzero_pd(); 424 vec_y2 = _mm256_setzero_pd(); 425 426 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 427 #pragma novector 428 #pragma unroll(2) 429 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 430 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 431 aval += 4; acolidx += 4; 432 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2); 433 aval += 4; acolidx += 4; 434 } 435 436 _mm256_storeu_pd(y+i*8,vec_y); 437 _mm256_storeu_pd(y+i*8+4,vec_y2); 438 } 439 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 440 for (i=0; i<totalslices; i++) { /* loop over full slices */ 441 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 442 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 443 444 vec_y = _mm256_setzero_pd(); 445 vec_y2 = _mm256_setzero_pd(); 446 447 /* last slice may have padding rows. Don't use vectorization. */ 448 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 449 rows_left = A->rmap->n - 8*i; 450 for (r=0; r<rows_left; ++r) { 451 yval = (MatScalar)0; 452 row = 8*i + r; 453 nnz_in_row = a->rlen[row]; 454 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]]; 455 y[row] = yval; 456 } 457 break; 458 } 459 460 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 461 #pragma novector 462 #pragma unroll(2) 463 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 464 vec_vals = _mm256_loadu_pd(aval); 465 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 466 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 467 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 468 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 469 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 470 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 471 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 472 aval += 4; 473 474 vec_vals = _mm256_loadu_pd(aval); 475 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 476 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 477 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 478 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 479 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 480 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 481 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 482 aval += 4; 483 } 484 485 _mm256_storeu_pd(y + i*8, vec_y); 486 _mm256_storeu_pd(y + i*8 + 4, vec_y2); 487 } 488 #else 489 for (i=0; i<totalslices; i++) { /* loop over slices */ 490 for (j=0; j<8; j++) sum[j] = 0.0; 491 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 492 sum[0] += aval[j] * x[acolidx[j]]; 493 sum[1] += aval[j+1] * x[acolidx[j+1]]; 494 sum[2] += aval[j+2] * x[acolidx[j+2]]; 495 sum[3] += aval[j+3] * x[acolidx[j+3]]; 496 sum[4] += aval[j+4] * x[acolidx[j+4]]; 497 sum[5] += aval[j+5] * x[acolidx[j+5]]; 498 sum[6] += aval[j+6] * x[acolidx[j+6]]; 499 sum[7] += aval[j+7] * x[acolidx[j+7]]; 500 } 501 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 502 for (j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j]; 503 } else { 504 for (j=0; j<8; j++) y[8*i+j] = sum[j]; 505 } 506 } 507 #endif 508 509 ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */ 510 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 511 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 512 PetscFunctionReturn(0); 513 } 514 515 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 516 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz) 517 { 518 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 519 PetscScalar *y,*z; 520 const PetscScalar *x; 521 const MatScalar *aval=a->val; 522 PetscInt totalslices=a->totalslices; 523 const PetscInt *acolidx=a->colidx; 524 PetscInt i,j; 525 PetscErrorCode ierr; 526 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 527 __m512d vec_x,vec_y,vec_vals; 528 __m256i vec_idx; 529 __mmask8 mask; 530 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 531 __m256i vec_idx2,vec_idx3,vec_idx4; 532 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 533 __m128d vec_x_tmp; 534 __m256d vec_x,vec_y,vec_y2,vec_vals; 535 MatScalar yval; 536 PetscInt r,row,nnz_in_row; 537 #else 538 PetscScalar sum[8]; 539 #endif 540 541 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 542 #pragma disjoint(*x,*y,*aval) 543 #endif 544 545 PetscFunctionBegin; 546 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 547 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 548 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 549 for (i=0; i<totalslices; i++) { /* loop over slices */ 550 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 551 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 552 553 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 554 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 555 vec_y = _mm512_mask_loadu_pd(vec_y,mask,&y[8*i]); 556 } else { 557 vec_y = _mm512_loadu_pd(&y[8*i]); 558 } 559 vec_y2 = _mm512_setzero_pd(); 560 vec_y3 = _mm512_setzero_pd(); 561 vec_y4 = _mm512_setzero_pd(); 562 563 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 564 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 565 case 3: 566 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 567 acolidx += 8; aval += 8; 568 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 569 acolidx += 8; aval += 8; 570 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 571 acolidx += 8; aval += 8; 572 j += 3; 573 break; 574 case 2: 575 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 576 acolidx += 8; aval += 8; 577 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 578 acolidx += 8; aval += 8; 579 j += 2; 580 break; 581 case 1: 582 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 583 acolidx += 8; aval += 8; 584 j += 1; 585 break; 586 } 587 #pragma novector 588 for (; j<(a->sliidx[i+1]>>3); j+=4) { 589 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 590 acolidx += 8; aval += 8; 591 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 592 acolidx += 8; aval += 8; 593 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 594 acolidx += 8; aval += 8; 595 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 596 acolidx += 8; aval += 8; 597 } 598 599 vec_y = _mm512_add_pd(vec_y,vec_y2); 600 vec_y = _mm512_add_pd(vec_y,vec_y3); 601 vec_y = _mm512_add_pd(vec_y,vec_y4); 602 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 603 _mm512_mask_storeu_pd(&z[8*i],mask,vec_y); 604 } else { 605 _mm512_storeu_pd(&z[8*i],vec_y); 606 } 607 } 608 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 609 for (i=0; i<totalslices; i++) { /* loop over full slices */ 610 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 611 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 612 613 /* last slice may have padding rows. Don't use vectorization. */ 614 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 615 for (r=0; r<(A->rmap->n & 0x07); ++r) { 616 row = 8*i + r; 617 yval = (MatScalar)0.0; 618 nnz_in_row = a->rlen[row]; 619 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 620 z[row] = y[row] + yval; 621 } 622 break; 623 } 624 625 vec_y = _mm256_loadu_pd(y+8*i); 626 vec_y2 = _mm256_loadu_pd(y+8*i+4); 627 628 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 629 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 630 vec_vals = _mm256_loadu_pd(aval); 631 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 632 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 633 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 634 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 635 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 636 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 637 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 638 aval += 4; 639 640 vec_vals = _mm256_loadu_pd(aval); 641 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 642 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 643 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 644 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 645 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 646 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 647 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 648 aval += 4; 649 } 650 651 _mm256_storeu_pd(z+i*8,vec_y); 652 _mm256_storeu_pd(z+i*8+4,vec_y2); 653 } 654 #else 655 for (i=0; i<totalslices; i++) { /* loop over slices */ 656 for (j=0; j<8; j++) sum[j] = 0.0; 657 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 658 sum[0] += aval[j] * x[acolidx[j]]; 659 sum[1] += aval[j+1] * x[acolidx[j+1]]; 660 sum[2] += aval[j+2] * x[acolidx[j+2]]; 661 sum[3] += aval[j+3] * x[acolidx[j+3]]; 662 sum[4] += aval[j+4] * x[acolidx[j+4]]; 663 sum[5] += aval[j+5] * x[acolidx[j+5]]; 664 sum[6] += aval[j+6] * x[acolidx[j+6]]; 665 sum[7] += aval[j+7] * x[acolidx[j+7]]; 666 } 667 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 668 for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j]; 669 } else { 670 for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j]; 671 } 672 } 673 #endif 674 675 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 676 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 677 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy) 682 { 683 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 684 PetscScalar *y; 685 const PetscScalar *x; 686 const MatScalar *aval=a->val; 687 const PetscInt *acolidx=a->colidx; 688 PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices; 689 PetscErrorCode ierr; 690 691 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 692 #pragma disjoint(*x,*y,*aval) 693 #endif 694 695 PetscFunctionBegin; 696 if (A->symmetric) { 697 ierr = MatMultAdd_SeqSELL(A,xx,zz,yy);CHKERRQ(ierr); 698 PetscFunctionReturn(0); 699 } 700 if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); } 701 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 702 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 703 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 704 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 705 for (r=0; r<(A->rmap->n & 0x07); ++r) { 706 row = 8*i + r; 707 nnz_in_row = a->rlen[row]; 708 for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row]; 709 } 710 break; 711 } 712 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 713 y[acolidx[j]] += aval[j] * x[8*i]; 714 y[acolidx[j+1]] += aval[j+1] * x[8*i+1]; 715 y[acolidx[j+2]] += aval[j+2] * x[8*i+2]; 716 y[acolidx[j+3]] += aval[j+3] * x[8*i+3]; 717 y[acolidx[j+4]] += aval[j+4] * x[8*i+4]; 718 y[acolidx[j+5]] += aval[j+5] * x[8*i+5]; 719 y[acolidx[j+6]] += aval[j+6] * x[8*i+6]; 720 y[acolidx[j+7]] += aval[j+7] * x[8*i+7]; 721 } 722 } 723 ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr); 724 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 725 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy) 730 { 731 PetscErrorCode ierr; 732 733 PetscFunctionBegin; 734 if (A->symmetric) { 735 ierr = MatMult_SeqSELL(A,xx,yy);CHKERRQ(ierr); 736 } else { 737 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 738 ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr); 739 } 740 PetscFunctionReturn(0); 741 } 742 743 /* 744 Checks for missing diagonals 745 */ 746 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d) 747 { 748 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 749 PetscInt *diag,i; 750 PetscErrorCode ierr; 751 752 PetscFunctionBegin; 753 *missing = PETSC_FALSE; 754 if (A->rmap->n > 0 && !(a->colidx)) { 755 *missing = PETSC_TRUE; 756 if (d) *d = 0; 757 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 758 } else { 759 diag = a->diag; 760 for (i=0; i<A->rmap->n; i++) { 761 if (diag[i] == -1) { 762 *missing = PETSC_TRUE; 763 if (d) *d = i; 764 ierr = PetscInfo(A,"Matrix is missing diagonal number %" PetscInt_FMT "\n",i);CHKERRQ(ierr); 765 break; 766 } 767 } 768 } 769 PetscFunctionReturn(0); 770 } 771 772 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 773 { 774 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 775 PetscInt i,j,m=A->rmap->n,shift; 776 PetscErrorCode ierr; 777 778 PetscFunctionBegin; 779 if (!a->diag) { 780 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 781 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 782 a->free_diag = PETSC_TRUE; 783 } 784 for (i=0; i<m; i++) { /* loop over rows */ 785 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 786 a->diag[i] = -1; 787 for (j=0; j<a->rlen[i]; j++) { 788 if (a->colidx[shift+j*8] == i) { 789 a->diag[i] = shift+j*8; 790 break; 791 } 792 } 793 } 794 PetscFunctionReturn(0); 795 } 796 797 /* 798 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 799 */ 800 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift) 801 { 802 Mat_SeqSELL *a=(Mat_SeqSELL*) A->data; 803 PetscInt i,*diag,m = A->rmap->n; 804 MatScalar *val = a->val; 805 PetscScalar *idiag,*mdiag; 806 PetscErrorCode ierr; 807 808 PetscFunctionBegin; 809 if (a->idiagvalid) PetscFunctionReturn(0); 810 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 811 diag = a->diag; 812 if (!a->idiag) { 813 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 814 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 815 val = a->val; 816 } 817 mdiag = a->mdiag; 818 idiag = a->idiag; 819 820 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 821 for (i=0; i<m; i++) { 822 mdiag[i] = val[diag[i]]; 823 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 824 if (PetscRealPart(fshift)) { 825 ierr = PetscInfo(A,"Zero diagonal on row %" PetscInt_FMT "\n",i);CHKERRQ(ierr); 826 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 827 A->factorerror_zeropivot_value = 0.0; 828 A->factorerror_zeropivot_row = i; 829 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %" PetscInt_FMT,i); 830 } 831 idiag[i] = 1.0/val[diag[i]]; 832 } 833 ierr = PetscLogFlops(m);CHKERRQ(ierr); 834 } else { 835 for (i=0; i<m; i++) { 836 mdiag[i] = val[diag[i]]; 837 idiag[i] = omega/(fshift + val[diag[i]]); 838 } 839 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 840 } 841 a->idiagvalid = PETSC_TRUE; 842 PetscFunctionReturn(0); 843 } 844 845 PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 846 { 847 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 848 PetscErrorCode ierr; 849 850 PetscFunctionBegin; 851 ierr = PetscArrayzero(a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr); 852 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 853 PetscFunctionReturn(0); 854 } 855 856 PetscErrorCode MatDestroy_SeqSELL(Mat A) 857 { 858 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 859 PetscErrorCode ierr; 860 861 PetscFunctionBegin; 862 #if defined(PETSC_USE_LOG) 863 PetscLogObjectState((PetscObject)A,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT,A->rmap->n,A->cmap->n,a->nz); 864 #endif 865 ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr); 866 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 867 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 868 ierr = PetscFree(a->diag);CHKERRQ(ierr); 869 ierr = PetscFree(a->rlen);CHKERRQ(ierr); 870 ierr = PetscFree(a->sliidx);CHKERRQ(ierr); 871 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 872 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 873 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 874 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 875 ierr = PetscFree2(a->getrowcols,a->getrowvals);CHKERRQ(ierr); 876 877 ierr = PetscFree(A->data);CHKERRQ(ierr); 878 879 ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr); 880 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 881 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 882 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg) 887 { 888 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 889 PetscErrorCode ierr; 890 891 PetscFunctionBegin; 892 switch (op) { 893 case MAT_ROW_ORIENTED: 894 a->roworiented = flg; 895 break; 896 case MAT_KEEP_NONZERO_PATTERN: 897 a->keepnonzeropattern = flg; 898 break; 899 case MAT_NEW_NONZERO_LOCATIONS: 900 a->nonew = (flg ? 0 : 1); 901 break; 902 case MAT_NEW_NONZERO_LOCATION_ERR: 903 a->nonew = (flg ? -1 : 0); 904 break; 905 case MAT_NEW_NONZERO_ALLOCATION_ERR: 906 a->nonew = (flg ? -2 : 0); 907 break; 908 case MAT_UNUSED_NONZERO_LOCATION_ERR: 909 a->nounused = (flg ? -1 : 0); 910 break; 911 case MAT_FORCE_DIAGONAL_ENTRIES: 912 case MAT_IGNORE_OFF_PROC_ENTRIES: 913 case MAT_USE_HASH_TABLE: 914 case MAT_SORTED_FULL: 915 ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 916 break; 917 case MAT_SPD: 918 case MAT_SYMMETRIC: 919 case MAT_STRUCTURALLY_SYMMETRIC: 920 case MAT_HERMITIAN: 921 case MAT_SYMMETRY_ETERNAL: 922 /* These options are handled directly by MatSetOption() */ 923 break; 924 default: 925 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 926 } 927 PetscFunctionReturn(0); 928 } 929 930 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v) 931 { 932 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 933 PetscInt i,j,n,shift; 934 PetscScalar *x,zero=0.0; 935 PetscErrorCode ierr; 936 937 PetscFunctionBegin; 938 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 939 PetscCheckFalse(n != A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 940 941 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 942 PetscInt *diag=a->diag; 943 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 944 for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]]; 945 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 949 ierr = VecSet(v,zero);CHKERRQ(ierr); 950 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 951 for (i=0; i<n; i++) { /* loop over rows */ 952 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 953 x[i] = 0; 954 for (j=0; j<a->rlen[i]; j++) { 955 if (a->colidx[shift+j*8] == i) { 956 x[i] = a->val[shift+j*8]; 957 break; 958 } 959 } 960 } 961 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 965 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr) 966 { 967 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 968 const PetscScalar *l,*r; 969 PetscInt i,j,m,n,row; 970 PetscErrorCode ierr; 971 972 PetscFunctionBegin; 973 if (ll) { 974 /* The local size is used so that VecMPI can be passed to this routine 975 by MatDiagonalScale_MPISELL */ 976 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 977 PetscCheckFalse(m != A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 978 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 979 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 980 if (i == a->totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 981 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 982 if (row < (A->rmap->n & 0x07)) a->val[j] *= l[8*i+row]; 983 } 984 } else { 985 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 986 a->val[j] *= l[8*i+row]; 987 } 988 } 989 } 990 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 991 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 992 } 993 if (rr) { 994 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 995 PetscCheckFalse(n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 996 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 997 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 998 if (i == a->totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 999 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 1000 if (row < (A->rmap->n & 0x07)) a->val[j] *= r[a->colidx[j]]; 1001 } 1002 } else { 1003 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) { 1004 a->val[j] *= r[a->colidx[j]]; 1005 } 1006 } 1007 } 1008 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 1009 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1010 } 1011 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1016 { 1017 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1018 PetscInt *cp,i,k,low,high,t,row,col,l; 1019 PetscInt shift; 1020 MatScalar *vp; 1021 1022 PetscFunctionBegin; 1023 for (k=0; k<m; k++) { /* loop over requested rows */ 1024 row = im[k]; 1025 if (row<0) continue; 1026 PetscCheck(row < A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->n-1); 1027 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1028 cp = a->colidx+shift; /* pointer to the row */ 1029 vp = a->val+shift; /* pointer to the row */ 1030 for (l=0; l<n; l++) { /* loop over requested columns */ 1031 col = in[l]; 1032 if (col<0) continue; 1033 PetscCheck(col < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %" PetscInt_FMT " max %" PetscInt_FMT,col,A->cmap->n-1); 1034 high = a->rlen[row]; low = 0; /* assume unsorted */ 1035 while (high-low > 5) { 1036 t = (low+high)/2; 1037 if (*(cp+t*8) > col) high = t; 1038 else low = t; 1039 } 1040 for (i=low; i<high; i++) { 1041 if (*(cp+8*i) > col) break; 1042 if (*(cp+8*i) == col) { 1043 *v++ = *(vp+8*i); 1044 goto finished; 1045 } 1046 } 1047 *v++ = 0.0; 1048 finished:; 1049 } 1050 } 1051 PetscFunctionReturn(0); 1052 } 1053 1054 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer) 1055 { 1056 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1057 PetscInt i,j,m=A->rmap->n,shift; 1058 const char *name; 1059 PetscViewerFormat format; 1060 PetscErrorCode ierr; 1061 1062 PetscFunctionBegin; 1063 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1064 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1065 PetscInt nofinalvalue = 0; 1066 /* 1067 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 1068 nofinalvalue = 1; 1069 } 1070 */ 1071 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1072 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n",m,A->cmap->n);CHKERRQ(ierr); 1073 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %" PetscInt_FMT " \n",a->nz);CHKERRQ(ierr); 1074 #if defined(PETSC_USE_COMPLEX) 1075 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1076 #else 1077 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1078 #endif 1079 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 1080 1081 for (i=0; i<m; i++) { 1082 shift = a->sliidx[i>>3]+(i&0x07); 1083 for (j=0; j<a->rlen[i]; j++) { 1084 #if defined(PETSC_USE_COMPLEX) 1085 ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1086 #else 1087 ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1088 #endif 1089 } 1090 } 1091 /* 1092 if (nofinalvalue) { 1093 #if defined(PETSC_USE_COMPLEX) 1094 ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 1095 #else 1096 ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 1097 #endif 1098 } 1099 */ 1100 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 1101 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 1102 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1103 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 1104 PetscFunctionReturn(0); 1105 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1106 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1107 for (i=0; i<m; i++) { 1108 ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i);CHKERRQ(ierr); 1109 shift = a->sliidx[i>>3]+(i&0x07); 1110 for (j=0; j<a->rlen[i]; j++) { 1111 #if defined(PETSC_USE_COMPLEX) 1112 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1113 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1114 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1115 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1116 } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) { 1117 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1118 } 1119 #else 1120 if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);} 1121 #endif 1122 } 1123 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1124 } 1125 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1126 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 1127 PetscInt cnt=0,jcnt; 1128 PetscScalar value; 1129 #if defined(PETSC_USE_COMPLEX) 1130 PetscBool realonly=PETSC_TRUE; 1131 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1132 if (PetscImaginaryPart(a->val[i]) != 0.0) { 1133 realonly = PETSC_FALSE; 1134 break; 1135 } 1136 } 1137 #endif 1138 1139 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1140 for (i=0; i<m; i++) { 1141 jcnt = 0; 1142 shift = a->sliidx[i>>3]+(i&0x07); 1143 for (j=0; j<A->cmap->n; j++) { 1144 if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) { 1145 value = a->val[cnt++]; 1146 jcnt++; 1147 } else { 1148 value = 0.0; 1149 } 1150 #if defined(PETSC_USE_COMPLEX) 1151 if (realonly) { 1152 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 1153 } else { 1154 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 1155 } 1156 #else 1157 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 1158 #endif 1159 } 1160 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1161 } 1162 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1163 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 1164 PetscInt fshift=1; 1165 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1166 #if defined(PETSC_USE_COMPLEX) 1167 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 1168 #else 1169 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 1170 #endif 1171 ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 1172 for (i=0; i<m; i++) { 1173 shift = a->sliidx[i>>3]+(i&0x07); 1174 for (j=0; j<a->rlen[i]; j++) { 1175 #if defined(PETSC_USE_COMPLEX) 1176 ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1177 #else 1178 ierr = PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1179 #endif 1180 } 1181 } 1182 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1183 } else if (format == PETSC_VIEWER_NATIVE) { 1184 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 1185 PetscInt row; 1186 ierr = PetscViewerASCIIPrintf(viewer,"slice %" PetscInt_FMT ": %" PetscInt_FMT " %" PetscInt_FMT "\n",i,a->sliidx[i],a->sliidx[i+1]);CHKERRQ(ierr); 1187 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 1188 #if defined(PETSC_USE_COMPLEX) 1189 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1190 ierr = PetscViewerASCIIPrintf(viewer," %" PetscInt_FMT " %" PetscInt_FMT " %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1191 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1192 ierr = PetscViewerASCIIPrintf(viewer," %" PetscInt_FMT " %" PetscInt_FMT " %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1193 } else { 1194 ierr = PetscViewerASCIIPrintf(viewer," %" PetscInt_FMT " %" PetscInt_FMT " %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1195 } 1196 #else 1197 ierr = PetscViewerASCIIPrintf(viewer," %" PetscInt_FMT " %" PetscInt_FMT " %g\n",8*i+row,a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1198 #endif 1199 } 1200 } 1201 } else { 1202 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1203 if (A->factortype) { 1204 for (i=0; i<m; i++) { 1205 shift = a->sliidx[i>>3]+(i&0x07); 1206 ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i);CHKERRQ(ierr); 1207 /* L part */ 1208 for (j=shift; j<a->diag[i]; j+=8) { 1209 #if defined(PETSC_USE_COMPLEX) 1210 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) { 1211 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1212 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) { 1213 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1214 } else { 1215 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1216 } 1217 #else 1218 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1219 #endif 1220 } 1221 /* diagonal */ 1222 j = a->diag[i]; 1223 #if defined(PETSC_USE_COMPLEX) 1224 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1225 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr); 1226 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1227 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr); 1228 } else { 1229 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr); 1230 } 1231 #else 1232 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr); 1233 #endif 1234 1235 /* U part */ 1236 for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) { 1237 #if defined(PETSC_USE_COMPLEX) 1238 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1239 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1240 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1241 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1242 } else { 1243 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1244 } 1245 #else 1246 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1247 #endif 1248 } 1249 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1250 } 1251 } else { 1252 for (i=0; i<m; i++) { 1253 shift = a->sliidx[i>>3]+(i&0x07); 1254 ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i);CHKERRQ(ierr); 1255 for (j=0; j<a->rlen[i]; j++) { 1256 #if defined(PETSC_USE_COMPLEX) 1257 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1258 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1259 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1260 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1261 } else { 1262 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1263 } 1264 #else 1265 ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr); 1266 #endif 1267 } 1268 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1269 } 1270 } 1271 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1272 } 1273 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #include <petscdraw.h> 1278 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa) 1279 { 1280 Mat A=(Mat)Aa; 1281 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1282 PetscInt i,j,m=A->rmap->n,shift; 1283 int color; 1284 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1285 PetscViewer viewer; 1286 PetscViewerFormat format; 1287 PetscErrorCode ierr; 1288 1289 PetscFunctionBegin; 1290 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1291 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1292 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1293 1294 /* loop over matrix elements drawing boxes */ 1295 1296 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1297 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1298 /* Blue for negative, Cyan for zero and Red for positive */ 1299 color = PETSC_DRAW_BLUE; 1300 for (i=0; i<m; i++) { 1301 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1302 y_l = m - i - 1.0; y_r = y_l + 1.0; 1303 for (j=0; j<a->rlen[i]; j++) { 1304 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1305 if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue; 1306 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1307 } 1308 } 1309 color = PETSC_DRAW_CYAN; 1310 for (i=0; i<m; i++) { 1311 shift = a->sliidx[i>>3]+(i&0x07); 1312 y_l = m - i - 1.0; y_r = y_l + 1.0; 1313 for (j=0; j<a->rlen[i]; j++) { 1314 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1315 if (a->val[shift+8*j] != 0.) continue; 1316 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1317 } 1318 } 1319 color = PETSC_DRAW_RED; 1320 for (i=0; i<m; i++) { 1321 shift = a->sliidx[i>>3]+(i&0x07); 1322 y_l = m - i - 1.0; y_r = y_l + 1.0; 1323 for (j=0; j<a->rlen[i]; j++) { 1324 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1325 if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue; 1326 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1327 } 1328 } 1329 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1330 } else { 1331 /* use contour shading to indicate magnitude of values */ 1332 /* first determine max of all nonzero values */ 1333 PetscReal minv=0.0,maxv=0.0; 1334 PetscInt count=0; 1335 PetscDraw popup; 1336 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1337 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1338 } 1339 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1340 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1341 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 1342 1343 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1344 for (i=0; i<m; i++) { 1345 shift = a->sliidx[i>>3]+(i&0x07); 1346 y_l = m - i - 1.0; 1347 y_r = y_l + 1.0; 1348 for (j=0; j<a->rlen[i]; j++) { 1349 x_l = a->colidx[shift+j*8]; 1350 x_r = x_l + 1.0; 1351 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv); 1352 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1353 count++; 1354 } 1355 } 1356 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1357 } 1358 PetscFunctionReturn(0); 1359 } 1360 1361 #include <petscdraw.h> 1362 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer) 1363 { 1364 PetscDraw draw; 1365 PetscReal xr,yr,xl,yl,h,w; 1366 PetscBool isnull; 1367 PetscErrorCode ierr; 1368 1369 PetscFunctionBegin; 1370 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1371 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1372 if (isnull) PetscFunctionReturn(0); 1373 1374 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 1375 xr += w; yr += h; xl = -w; yl = -h; 1376 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1377 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1378 ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr); 1379 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1380 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1381 PetscFunctionReturn(0); 1382 } 1383 1384 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer) 1385 { 1386 PetscBool iascii,isbinary,isdraw; 1387 PetscErrorCode ierr; 1388 1389 PetscFunctionBegin; 1390 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1391 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1392 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1393 if (iascii) { 1394 ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr); 1395 } else if (isbinary) { 1396 /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */ 1397 } else if (isdraw) { 1398 ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode) 1404 { 1405 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1406 PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k; 1407 MatScalar *vp; 1408 PetscErrorCode ierr; 1409 1410 PetscFunctionBegin; 1411 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1412 /* To do: compress out the unused elements */ 1413 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 1414 ierr = PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " allocated %" PetscInt_FMT " used (%" PetscInt_FMT " nonzeros+%" PetscInt_FMT " paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr); 1415 ierr = PetscInfo(A,"Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n",a->reallocs);CHKERRQ(ierr); 1416 ierr = PetscInfo(A,"Maximum nonzeros in any row is %" PetscInt_FMT "\n",a->rlenmax);CHKERRQ(ierr); 1417 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1418 for (i=0; i<a->totalslices; ++i) { 1419 shift = a->sliidx[i]; /* starting index of the slice */ 1420 cp = a->colidx+shift; /* pointer to the column indices of the slice */ 1421 vp = a->val+shift; /* pointer to the nonzero values of the slice */ 1422 for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */ 1423 row = 8*i + row_in_slice; 1424 nrow = a->rlen[row]; /* number of nonzeros in row */ 1425 /* 1426 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1427 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1428 */ 1429 lastcol = 0; 1430 if (nrow>0) { /* nonempty row */ 1431 lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */ 1432 } else if (!row_in_slice) { /* first row of the currect slice is empty */ 1433 for (j=1;j<8;j++) { 1434 if (a->rlen[8*i+j]) { 1435 lastcol = cp[j]; 1436 break; 1437 } 1438 } 1439 } else { 1440 if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */ 1441 } 1442 1443 for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) { 1444 cp[8*k+row_in_slice] = lastcol; 1445 vp[8*k+row_in_slice] = (MatScalar)0; 1446 } 1447 } 1448 } 1449 1450 A->info.mallocs += a->reallocs; 1451 a->reallocs = 0; 1452 1453 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1454 PetscFunctionReturn(0); 1455 } 1456 1457 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info) 1458 { 1459 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1460 1461 PetscFunctionBegin; 1462 info->block_size = 1.0; 1463 info->nz_allocated = a->maxallocmat; 1464 info->nz_used = a->sliidx[a->totalslices]; /* include padding zeros */ 1465 info->nz_unneeded = (a->maxallocmat-a->sliidx[a->totalslices]); 1466 info->assemblies = A->num_ass; 1467 info->mallocs = A->info.mallocs; 1468 info->memory = ((PetscObject)A)->mem; 1469 if (A->factortype) { 1470 info->fill_ratio_given = A->info.fill_ratio_given; 1471 info->fill_ratio_needed = A->info.fill_ratio_needed; 1472 info->factor_mallocs = A->info.factor_mallocs; 1473 } else { 1474 info->fill_ratio_given = 0; 1475 info->fill_ratio_needed = 0; 1476 info->factor_mallocs = 0; 1477 } 1478 PetscFunctionReturn(0); 1479 } 1480 1481 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1482 { 1483 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1484 PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow; 1485 PetscInt *cp,nonew=a->nonew,lastcol=-1; 1486 MatScalar *vp,value; 1487 PetscErrorCode ierr; 1488 1489 PetscFunctionBegin; 1490 for (k=0; k<m; k++) { /* loop over added rows */ 1491 row = im[k]; 1492 if (row < 0) continue; 1493 PetscCheck(row < A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->n-1); 1494 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1495 cp = a->colidx+shift; /* pointer to the row */ 1496 vp = a->val+shift; /* pointer to the row */ 1497 nrow = a->rlen[row]; 1498 low = 0; 1499 high = nrow; 1500 1501 for (l=0; l<n; l++) { /* loop over added columns */ 1502 col = in[l]; 1503 if (col<0) continue; 1504 PetscCheck(col < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %" PetscInt_FMT " max %" PetscInt_FMT,col,A->cmap->n-1); 1505 if (a->roworiented) { 1506 value = v[l+k*n]; 1507 } else { 1508 value = v[k+l*m]; 1509 } 1510 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1511 1512 /* search in this row for the specified column, i indicates the column to be set */ 1513 if (col <= lastcol) low = 0; 1514 else high = nrow; 1515 lastcol = col; 1516 while (high-low > 5) { 1517 t = (low+high)/2; 1518 if (*(cp+t*8) > col) high = t; 1519 else low = t; 1520 } 1521 for (i=low; i<high; i++) { 1522 if (*(cp+i*8) > col) break; 1523 if (*(cp+i*8) == col) { 1524 if (is == ADD_VALUES) *(vp+i*8) += value; 1525 else *(vp+i*8) = value; 1526 low = i + 1; 1527 goto noinsert; 1528 } 1529 } 1530 if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1531 if (nonew == 1) goto noinsert; 1532 PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 1533 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1534 MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar); 1535 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1536 for (ii=nrow-1; ii>=i; ii--) { 1537 *(cp+(ii+1)*8) = *(cp+ii*8); 1538 *(vp+(ii+1)*8) = *(vp+ii*8); 1539 } 1540 a->rlen[row]++; 1541 *(cp+i*8) = col; 1542 *(vp+i*8) = value; 1543 a->nz++; 1544 A->nonzerostate++; 1545 low = i+1; high++; nrow++; 1546 noinsert:; 1547 } 1548 a->rlen[row] = nrow; 1549 } 1550 PetscFunctionReturn(0); 1551 } 1552 1553 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str) 1554 { 1555 PetscErrorCode ierr; 1556 1557 PetscFunctionBegin; 1558 /* If the two matrices have the same copy implementation, use fast copy. */ 1559 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1560 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1561 Mat_SeqSELL *b=(Mat_SeqSELL*)B->data; 1562 1563 PetscCheckFalse(a->sliidx[a->totalslices] != b->sliidx[b->totalslices],PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1564 ierr = PetscArraycpy(b->val,a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr); 1565 } else { 1566 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 PetscErrorCode MatSetUp_SeqSELL(Mat A) 1572 { 1573 PetscErrorCode ierr; 1574 1575 PetscFunctionBegin; 1576 ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 1577 PetscFunctionReturn(0); 1578 } 1579 1580 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[]) 1581 { 1582 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1583 1584 PetscFunctionBegin; 1585 *array = a->val; 1586 PetscFunctionReturn(0); 1587 } 1588 1589 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[]) 1590 { 1591 PetscFunctionBegin; 1592 PetscFunctionReturn(0); 1593 } 1594 1595 PetscErrorCode MatRealPart_SeqSELL(Mat A) 1596 { 1597 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1598 PetscInt i; 1599 MatScalar *aval=a->val; 1600 1601 PetscFunctionBegin; 1602 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]); 1603 PetscFunctionReturn(0); 1604 } 1605 1606 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1607 { 1608 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1609 PetscInt i; 1610 MatScalar *aval=a->val; 1611 PetscErrorCode ierr; 1612 1613 PetscFunctionBegin; 1614 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1615 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1616 PetscFunctionReturn(0); 1617 } 1618 1619 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha) 1620 { 1621 Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data; 1622 MatScalar *aval=a->val; 1623 PetscScalar oalpha=alpha; 1624 PetscBLASInt one=1,size; 1625 PetscErrorCode ierr; 1626 1627 PetscFunctionBegin; 1628 ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr); 1629 PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one)); 1630 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1631 ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a) 1636 { 1637 Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data; 1638 PetscErrorCode ierr; 1639 1640 PetscFunctionBegin; 1641 if (!Y->preallocated || !y->nz) { 1642 ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 1643 } 1644 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1645 PetscFunctionReturn(0); 1646 } 1647 1648 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1649 { 1650 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1651 PetscScalar *x,sum,*t; 1652 const MatScalar *idiag=NULL,*mdiag; 1653 const PetscScalar *b,*xb; 1654 PetscInt n,m=A->rmap->n,i,j,shift; 1655 const PetscInt *diag; 1656 PetscErrorCode ierr; 1657 1658 PetscFunctionBegin; 1659 its = its*lits; 1660 1661 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1662 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);} 1663 a->fshift = fshift; 1664 a->omega = omega; 1665 1666 diag = a->diag; 1667 t = a->ssor_work; 1668 idiag = a->idiag; 1669 mdiag = a->mdiag; 1670 1671 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1672 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1673 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1674 PetscCheckFalse(flag == SOR_APPLY_UPPER,PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented"); 1675 PetscCheckFalse(flag == SOR_APPLY_LOWER,PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1676 PetscCheckFalse(flag & SOR_EISENSTAT,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 1677 1678 if (flag & SOR_ZERO_INITIAL_GUESS) { 1679 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1680 for (i=0; i<m; i++) { 1681 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1682 sum = b[i]; 1683 n = (diag[i]-shift)/8; 1684 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1685 t[i] = sum; 1686 x[i] = sum*idiag[i]; 1687 } 1688 xb = t; 1689 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1690 } else xb = b; 1691 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1692 for (i=m-1; i>=0; i--) { 1693 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1694 sum = xb[i]; 1695 n = a->rlen[i]-(diag[i]-shift)/8-1; 1696 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1697 if (xb == b) { 1698 x[i] = sum*idiag[i]; 1699 } else { 1700 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1701 } 1702 } 1703 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1704 } 1705 its--; 1706 } 1707 while (its--) { 1708 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1709 for (i=0; i<m; i++) { 1710 /* lower */ 1711 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1712 sum = b[i]; 1713 n = (diag[i]-shift)/8; 1714 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1715 t[i] = sum; /* save application of the lower-triangular part */ 1716 /* upper */ 1717 n = a->rlen[i]-(diag[i]-shift)/8-1; 1718 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1719 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1720 } 1721 xb = t; 1722 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1723 } else xb = b; 1724 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1725 for (i=m-1; i>=0; i--) { 1726 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1727 sum = xb[i]; 1728 if (xb == b) { 1729 /* whole matrix (no checkpointing available) */ 1730 n = a->rlen[i]; 1731 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1732 x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i]; 1733 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1734 n = a->rlen[i]-(diag[i]-shift)/8-1; 1735 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1736 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1737 } 1738 } 1739 if (xb == b) { 1740 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1741 } else { 1742 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1743 } 1744 } 1745 } 1746 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1747 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1748 PetscFunctionReturn(0); 1749 } 1750 1751 /* -------------------------------------------------------------------*/ 1752 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1753 MatGetRow_SeqSELL, 1754 MatRestoreRow_SeqSELL, 1755 MatMult_SeqSELL, 1756 /* 4*/ MatMultAdd_SeqSELL, 1757 MatMultTranspose_SeqSELL, 1758 MatMultTransposeAdd_SeqSELL, 1759 NULL, 1760 NULL, 1761 NULL, 1762 /* 10*/ NULL, 1763 NULL, 1764 NULL, 1765 MatSOR_SeqSELL, 1766 NULL, 1767 /* 15*/ MatGetInfo_SeqSELL, 1768 MatEqual_SeqSELL, 1769 MatGetDiagonal_SeqSELL, 1770 MatDiagonalScale_SeqSELL, 1771 NULL, 1772 /* 20*/ NULL, 1773 MatAssemblyEnd_SeqSELL, 1774 MatSetOption_SeqSELL, 1775 MatZeroEntries_SeqSELL, 1776 /* 24*/ NULL, 1777 NULL, 1778 NULL, 1779 NULL, 1780 NULL, 1781 /* 29*/ MatSetUp_SeqSELL, 1782 NULL, 1783 NULL, 1784 NULL, 1785 NULL, 1786 /* 34*/ MatDuplicate_SeqSELL, 1787 NULL, 1788 NULL, 1789 NULL, 1790 NULL, 1791 /* 39*/ NULL, 1792 NULL, 1793 NULL, 1794 MatGetValues_SeqSELL, 1795 MatCopy_SeqSELL, 1796 /* 44*/ NULL, 1797 MatScale_SeqSELL, 1798 MatShift_SeqSELL, 1799 NULL, 1800 NULL, 1801 /* 49*/ NULL, 1802 NULL, 1803 NULL, 1804 NULL, 1805 NULL, 1806 /* 54*/ MatFDColoringCreate_SeqXAIJ, 1807 NULL, 1808 NULL, 1809 NULL, 1810 NULL, 1811 /* 59*/ NULL, 1812 MatDestroy_SeqSELL, 1813 MatView_SeqSELL, 1814 NULL, 1815 NULL, 1816 /* 64*/ NULL, 1817 NULL, 1818 NULL, 1819 NULL, 1820 NULL, 1821 /* 69*/ NULL, 1822 NULL, 1823 NULL, 1824 NULL, 1825 NULL, 1826 /* 74*/ NULL, 1827 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1828 NULL, 1829 NULL, 1830 NULL, 1831 /* 79*/ NULL, 1832 NULL, 1833 NULL, 1834 NULL, 1835 NULL, 1836 /* 84*/ NULL, 1837 NULL, 1838 NULL, 1839 NULL, 1840 NULL, 1841 /* 89*/ NULL, 1842 NULL, 1843 NULL, 1844 NULL, 1845 NULL, 1846 /* 94*/ NULL, 1847 NULL, 1848 NULL, 1849 NULL, 1850 NULL, 1851 /* 99*/ NULL, 1852 NULL, 1853 NULL, 1854 MatConjugate_SeqSELL, 1855 NULL, 1856 /*104*/ NULL, 1857 NULL, 1858 NULL, 1859 NULL, 1860 NULL, 1861 /*109*/ NULL, 1862 NULL, 1863 NULL, 1864 NULL, 1865 MatMissingDiagonal_SeqSELL, 1866 /*114*/ NULL, 1867 NULL, 1868 NULL, 1869 NULL, 1870 NULL, 1871 /*119*/ NULL, 1872 NULL, 1873 NULL, 1874 NULL, 1875 NULL, 1876 /*124*/ NULL, 1877 NULL, 1878 NULL, 1879 NULL, 1880 NULL, 1881 /*129*/ NULL, 1882 NULL, 1883 NULL, 1884 NULL, 1885 NULL, 1886 /*134*/ NULL, 1887 NULL, 1888 NULL, 1889 NULL, 1890 NULL, 1891 /*139*/ NULL, 1892 NULL, 1893 NULL, 1894 MatFDColoringSetUp_SeqXAIJ, 1895 NULL, 1896 /*144*/ NULL, 1897 NULL, 1898 NULL, 1899 NULL 1900 }; 1901 1902 PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1903 { 1904 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1905 PetscErrorCode ierr; 1906 1907 PetscFunctionBegin; 1908 PetscCheckFalse(!a->nonew,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1909 1910 /* allocate space for values if not already there */ 1911 if (!a->saved_values) { 1912 ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr); 1913 ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1914 } 1915 1916 /* copy values over */ 1917 ierr = PetscArraycpy(a->saved_values,a->val,a->sliidx[a->totalslices]);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1922 { 1923 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1924 PetscErrorCode ierr; 1925 1926 PetscFunctionBegin; 1927 PetscCheckFalse(!a->nonew,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1928 PetscCheckFalse(!a->saved_values,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1929 ierr = PetscArraycpy(a->val,a->saved_values,a->sliidx[a->totalslices]);CHKERRQ(ierr); 1930 PetscFunctionReturn(0); 1931 } 1932 1933 /*@C 1934 MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray() 1935 1936 Not Collective 1937 1938 Input Parameters: 1939 . mat - a MATSEQSELL matrix 1940 . array - pointer to the data 1941 1942 Level: intermediate 1943 1944 .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90() 1945 @*/ 1946 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array) 1947 { 1948 PetscErrorCode ierr; 1949 1950 PetscFunctionBegin; 1951 ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 1952 PetscFunctionReturn(0); 1953 } 1954 1955 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1956 { 1957 Mat_SeqSELL *b; 1958 PetscMPIInt size; 1959 PetscErrorCode ierr; 1960 1961 PetscFunctionBegin; 1962 ierr = PetscCitationsRegister(citation,&cited);CHKERRQ(ierr); 1963 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 1964 PetscCheckFalse(size > 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 1965 1966 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1967 1968 B->data = (void*)b; 1969 1970 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1971 1972 b->row = NULL; 1973 b->col = NULL; 1974 b->icol = NULL; 1975 b->reallocs = 0; 1976 b->ignorezeroentries = PETSC_FALSE; 1977 b->roworiented = PETSC_TRUE; 1978 b->nonew = 0; 1979 b->diag = NULL; 1980 b->solve_work = NULL; 1981 B->spptr = NULL; 1982 b->saved_values = NULL; 1983 b->idiag = NULL; 1984 b->mdiag = NULL; 1985 b->ssor_work = NULL; 1986 b->omega = 1.0; 1987 b->fshift = 0.0; 1988 b->idiagvalid = PETSC_FALSE; 1989 b->keepnonzeropattern = PETSC_FALSE; 1990 1991 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr); 1992 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr); 1993 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr); 1994 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr); 1995 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr); 1996 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr); 1997 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr); 1998 PetscFunctionReturn(0); 1999 } 2000 2001 /* 2002 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 2003 */ 2004 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 2005 { 2006 Mat_SeqSELL *c = (Mat_SeqSELL*)C->data,*a = (Mat_SeqSELL*)A->data; 2007 PetscInt i,m=A->rmap->n; 2008 PetscInt totalslices=a->totalslices; 2009 PetscErrorCode ierr; 2010 2011 PetscFunctionBegin; 2012 C->factortype = A->factortype; 2013 c->row = NULL; 2014 c->col = NULL; 2015 c->icol = NULL; 2016 c->reallocs = 0; 2017 C->assembled = PETSC_TRUE; 2018 2019 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 2020 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 2021 2022 ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr); 2023 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2024 ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr); 2025 ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 2026 2027 for (i=0; i<m; i++) c->rlen[i] = a->rlen[i]; 2028 for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i]; 2029 2030 /* allocate the matrix space */ 2031 if (mallocmatspace) { 2032 ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr); 2033 ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2034 2035 c->singlemalloc = PETSC_TRUE; 2036 2037 if (m > 0) { 2038 ierr = PetscArraycpy(c->colidx,a->colidx,a->maxallocmat);CHKERRQ(ierr); 2039 if (cpvalues == MAT_COPY_VALUES) { 2040 ierr = PetscArraycpy(c->val,a->val,a->maxallocmat);CHKERRQ(ierr); 2041 } else { 2042 ierr = PetscArrayzero(c->val,a->maxallocmat);CHKERRQ(ierr); 2043 } 2044 } 2045 } 2046 2047 c->ignorezeroentries = a->ignorezeroentries; 2048 c->roworiented = a->roworiented; 2049 c->nonew = a->nonew; 2050 if (a->diag) { 2051 ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr); 2052 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2053 for (i=0; i<m; i++) { 2054 c->diag[i] = a->diag[i]; 2055 } 2056 } else c->diag = NULL; 2057 2058 c->solve_work = NULL; 2059 c->saved_values = NULL; 2060 c->idiag = NULL; 2061 c->ssor_work = NULL; 2062 c->keepnonzeropattern = a->keepnonzeropattern; 2063 c->free_val = PETSC_TRUE; 2064 c->free_colidx = PETSC_TRUE; 2065 2066 c->maxallocmat = a->maxallocmat; 2067 c->maxallocrow = a->maxallocrow; 2068 c->rlenmax = a->rlenmax; 2069 c->nz = a->nz; 2070 C->preallocated = PETSC_TRUE; 2071 2072 c->nonzerorowcnt = a->nonzerorowcnt; 2073 C->nonzerostate = A->nonzerostate; 2074 2075 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B) 2080 { 2081 PetscErrorCode ierr; 2082 2083 PetscFunctionBegin; 2084 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 2085 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 2086 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 2087 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 2088 } 2089 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2090 ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 2091 PetscFunctionReturn(0); 2092 } 2093 2094 /*MC 2095 MATSEQSELL - MATSEQSELL = "seqsell" - A matrix type to be used for sequential sparse matrices, 2096 based on the sliced Ellpack format 2097 2098 Options Database Keys: 2099 . -mat_type seqsell - sets the matrix type to "seqsell" during a call to MatSetFromOptions() 2100 2101 Level: beginner 2102 2103 .seealso: MatCreateSeqSell(), MATSELL, MATMPISELL, MATSEQAIJ, MATAIJ, MATMPIAIJ 2104 M*/ 2105 2106 /*MC 2107 MATSELL - MATSELL = "sell" - A matrix type to be used for sparse matrices. 2108 2109 This matrix type is identical to MATSEQSELL when constructed with a single process communicator, 2110 and MATMPISELL otherwise. As a result, for single process communicators, 2111 MatSeqSELLSetPreallocation() is supported, and similarly MatMPISELLSetPreallocation() is supported 2112 for communicators controlling multiple processes. It is recommended that you call both of 2113 the above preallocation routines for simplicity. 2114 2115 Options Database Keys: 2116 . -mat_type sell - sets the matrix type to "sell" during a call to MatSetFromOptions() 2117 2118 Level: beginner 2119 2120 Notes: 2121 This format is only supported for real scalars, double precision, and 32 bit indices (the defaults). 2122 2123 It can provide better performance on Intel and AMD processes with AVX2 or AVX512 support for matrices that have a similar number of 2124 non-zeros in contiguous groups of rows. However if the computation is memory bandwidth limited it may not provide much improvement. 2125 2126 Developer Notes: 2127 On Intel (and AMD) systems some of the matrix operations use SIMD (AVX) instructions to achieve higher performance. 2128 2129 The sparse matrix format is as follows. For simplicity we assume a slice size of 2, it is actually 8 2130 .vb 2131 (2 0 3 4) 2132 Consider the matrix A = (5 0 6 0) 2133 (0 0 7 8) 2134 (0 0 9 9) 2135 2136 symbolically the Ellpack format can be written as 2137 2138 (2 3 4 |) (0 2 3 |) 2139 v = (5 6 0 |) colidx = (0 2 2 |) 2140 -------- --------- 2141 (7 8 |) (2 3 |) 2142 (9 9 |) (2 3 |) 2143 2144 The data for 2 contiguous rows of the matrix are stored together (in column-major format) (with any left-over rows handled as a special case). 2145 Any of the rows in a slice fewer columns than the rest of the slice (row 1 above) are padded with a previous valid column in their "extra" colidx[] locations and 2146 zeros in their "extra" v locations so that the matrix operations do not need special code to handle different length rows within the 2 rows in a slice. 2147 2148 The one-dimensional representation of v used in the code is (2 5 3 6 4 0 7 9 8 9) and for colidx is (0 0 2 2 3 2 2 2 3 3) 2149 2150 .ve 2151 2152 See MatMult_SeqSELL() for how this format is used with the SIMD operations to achieve high performance. 2153 2154 References: 2155 Hong Zhang, Richard T. Mills, Karl Rupp, and Barry F. Smith, Vectorized Parallel Sparse Matrix-Vector Multiplication in {PETSc} Using {AVX-512}, 2156 Proceedings of the 47th International Conference on Parallel Processing, 2018. 2157 2158 .seealso: MatCreateSeqSELL(), MatCreateSeqAIJ(), MatCreateSell(), MATSEQSELL, MATMPISELL, MATSEQAIJ, MATMPIAIJ, MATAIJ 2159 M*/ 2160 2161 /*@C 2162 MatCreateSeqSELL - Creates a sparse matrix in SELL format. 2163 2164 Collective on comm 2165 2166 Input Parameters: 2167 + comm - MPI communicator, set to PETSC_COMM_SELF 2168 . m - number of rows 2169 . n - number of columns 2170 . rlenmax - maximum number of nonzeros in a row 2171 - rlen - array containing the number of nonzeros in the various rows 2172 (possibly different for each row) or NULL 2173 2174 Output Parameter: 2175 . A - the matrix 2176 2177 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2178 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2179 [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation] 2180 2181 Notes: 2182 If nnz is given then nz is ignored 2183 2184 Specify the preallocated storage with either rlenmax or rlen (not both). 2185 Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory 2186 allocation. For large problems you MUST preallocate memory or you 2187 will get TERRIBLE performance, see the users' manual chapter on matrices. 2188 2189 Level: intermediate 2190 2191 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatSeqSELLSetPreallocation(), MATSELL, MATSEQSELL, MATMPISELL 2192 2193 @*/ 2194 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A) 2195 { 2196 PetscErrorCode ierr; 2197 2198 PetscFunctionBegin; 2199 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2200 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2201 ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr); 2202 ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr); 2203 PetscFunctionReturn(0); 2204 } 2205 2206 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg) 2207 { 2208 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data; 2209 PetscInt totalslices=a->totalslices; 2210 PetscErrorCode ierr; 2211 2212 PetscFunctionBegin; 2213 /* If the matrix dimensions are not equal,or no of nonzeros */ 2214 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 2215 *flg = PETSC_FALSE; 2216 PetscFunctionReturn(0); 2217 } 2218 /* if the a->colidx are the same */ 2219 ierr = PetscArraycmp(a->colidx,b->colidx,a->sliidx[totalslices],flg);CHKERRQ(ierr); 2220 if (!*flg) PetscFunctionReturn(0); 2221 /* if a->val are the same */ 2222 ierr = PetscArraycmp(a->val,b->val,a->sliidx[totalslices],flg);CHKERRQ(ierr); 2223 PetscFunctionReturn(0); 2224 } 2225 2226 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 2227 { 2228 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2229 2230 PetscFunctionBegin; 2231 a->idiagvalid = PETSC_FALSE; 2232 PetscFunctionReturn(0); 2233 } 2234 2235 PetscErrorCode MatConjugate_SeqSELL(Mat A) 2236 { 2237 #if defined(PETSC_USE_COMPLEX) 2238 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2239 PetscInt i; 2240 PetscScalar *val = a->val; 2241 2242 PetscFunctionBegin; 2243 for (i=0; i<a->sliidx[a->totalslices]; i++) { 2244 val[i] = PetscConj(val[i]); 2245 } 2246 #else 2247 PetscFunctionBegin; 2248 #endif 2249 PetscFunctionReturn(0); 2250 } 2251