1 2 /* 3 Defines the basic matrix operations for the SELL matrix storage format. 4 */ 5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blocktranspose.h> 8 #if defined(PETSC_HAVE_IMMINTRIN_H) 9 #include <immintrin.h> 10 11 #if !defined(_MM_SCALE_8) 12 #define _MM_SCALE_8 8 13 #endif 14 15 #if defined(__AVX512F__) 16 /* these do not work 17 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 18 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 19 */ 20 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 21 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 22 vec_idx = _mm256_loadu_si256((__m256i const*)acolidx); \ 23 vec_vals = _mm512_loadu_pd(aval); \ 24 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 25 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y) 26 #elif defined(__AVX2__) 27 #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 28 vec_vals = _mm256_loadu_pd(aval); \ 29 vec_idx = _mm_loadu_si128((__m128i const*)acolidx); /* SSE2 */ \ 30 vec_x = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \ 31 vec_y = _mm256_fmadd_pd(vec_x,vec_vals,vec_y) 32 #endif 33 #endif /* PETSC_HAVE_IMMINTRIN_H */ 34 35 /*@C 36 MatSeqSELLSetPreallocation - For good matrix assembly performance 37 the user should preallocate the matrix storage by setting the parameter nz 38 (or the array nnz). By setting these parameters accurately, performance 39 during matrix assembly can be increased significantly. 40 41 Collective on MPI_Comm 42 43 Input Parameters: 44 + B - The matrix 45 . nz - number of nonzeros per row (same for all rows) 46 - nnz - array containing the number of nonzeros in the various rows 47 (possibly different for each row) or NULL 48 49 Notes: 50 If nnz is given then nz is ignored. 51 52 Specify the preallocated storage with either nz or nnz (not both). 53 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 54 allocation. For large problems you MUST preallocate memory or you 55 will get TERRIBLE performance, see the users' manual chapter on matrices. 56 57 You can call MatGetInfo() to get information on how effective the preallocation was; 58 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 59 You can also run with the option -info and look for messages with the string 60 malloc in them to see if additional memory allocation was needed. 61 62 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 63 entries or columns indices. 64 65 The maximum number of nonzeos in any row should be as accuate as possible. 66 If it is underesitmated, you will get bad performance due to reallocation 67 (MatSeqXSELLReallocateSELL). 68 69 Level: intermediate 70 71 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo() 72 73 @*/ 74 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[]) 75 { 76 PetscErrorCode ierr; 77 78 PetscFunctionBegin; 79 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 80 PetscValidType(B,1); 81 ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr); 82 PetscFunctionReturn(0); 83 } 84 85 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[]) 86 { 87 Mat_SeqSELL *b; 88 PetscInt i,j,totalslices; 89 PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE; 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 94 if (maxallocrow == MAT_SKIP_ALLOCATION) { 95 skipallocation = PETSC_TRUE; 96 maxallocrow = 0; 97 } 98 99 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 100 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 101 102 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 103 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 104 if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow); 105 if (rlen) { 106 for (i=0; i<B->rmap->n; i++) { 107 if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]); 108 if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n); 109 } 110 } 111 112 B->preallocated = PETSC_TRUE; 113 114 b = (Mat_SeqSELL*)B->data; 115 116 totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */ 117 b->totalslices = totalslices; 118 if (!skipallocation) { 119 if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n); 120 121 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 122 ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr); 123 ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 124 } 125 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 126 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 127 else if (maxallocrow < 0) maxallocrow = 1; 128 for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow; 129 } else { 130 maxallocrow = 0; 131 b->sliidx[0] = 0; 132 for (i=1; i<totalslices; i++) { 133 b->sliidx[i] = 0; 134 for (j=0;j<8;j++) { 135 b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]); 136 } 137 maxallocrow = PetscMax(b->sliidx[i],maxallocrow); 138 b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i]; 139 } 140 /* last slice */ 141 b->sliidx[totalslices] = 0; 142 for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]); 143 maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow); 144 b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices]; 145 } 146 147 /* allocate space for val, colidx, rlen */ 148 /* FIXME: should B's old memory be unlogged? */ 149 ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr); 150 /* FIXME: assuming an element of the bit array takes 8 bits */ 151 ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr); 152 ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 153 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 154 ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr); 155 ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr); 156 157 b->singlemalloc = PETSC_TRUE; 158 b->free_val = PETSC_TRUE; 159 b->free_colidx = PETSC_TRUE; 160 } else { 161 b->free_val = PETSC_FALSE; 162 b->free_colidx = PETSC_FALSE; 163 } 164 165 b->nz = 0; 166 b->maxallocrow = maxallocrow; 167 b->rlenmax = maxallocrow; 168 b->maxallocmat = b->sliidx[totalslices]; 169 B->info.nz_unneeded = (double)b->maxallocmat; 170 if (realalloc) { 171 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 172 } 173 PetscFunctionReturn(0); 174 } 175 176 PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 177 { 178 Mat_SeqSELL *a = (Mat_SeqSELL*)A->data; 179 PetscInt shift; 180 181 PetscFunctionBegin; 182 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 183 if (nz) *nz = a->rlen[row]; 184 shift = a->sliidx[row>>3]+(row&0x07); 185 if (!a->getrowcols) { 186 PetscErrorCode ierr; 187 188 ierr = PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);CHKERRQ(ierr); 189 } 190 if (idx) { 191 PetscInt j; 192 for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j]; 193 *idx = a->getrowcols; 194 } 195 if (v) { 196 PetscInt j; 197 for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j]; 198 *v = a->getrowvals; 199 } 200 PetscFunctionReturn(0); 201 } 202 203 PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 204 { 205 PetscFunctionBegin; 206 PetscFunctionReturn(0); 207 } 208 209 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 210 { 211 Mat B; 212 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 213 PetscInt i; 214 PetscErrorCode ierr; 215 216 PetscFunctionBegin; 217 if (reuse == MAT_REUSE_MATRIX) { 218 B = *newmat; 219 ierr = MatZeroEntries(B);CHKERRQ(ierr); 220 } else { 221 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 222 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 223 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr); 225 } 226 227 for (i=0; i<A->rmap->n; i++) { 228 PetscInt nz,*cols; 229 PetscScalar *vals; 230 231 ierr = MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 232 ierr = MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 233 ierr = MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 234 } 235 236 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 237 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 238 B->rmap->bs = A->rmap->bs; 239 240 if (reuse == MAT_INPLACE_MATRIX) { 241 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 242 } else { 243 *newmat = B; 244 } 245 PetscFunctionReturn(0); 246 } 247 248 #include <../src/mat/impls/aij/seq/aij.h> 249 250 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 251 { 252 Mat B; 253 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 254 PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols; 255 const PetscInt *cols; 256 const PetscScalar *vals; 257 PetscErrorCode ierr; 258 259 PetscFunctionBegin; 260 if (A->rmap->bs > 1) { 261 ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr); 262 PetscFunctionReturn(0); 263 } 264 265 if (reuse == MAT_REUSE_MATRIX) { 266 B = *newmat; 267 } else { 268 /* Can we just use ilen? */ 269 ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr); 270 for (i=0; i<m; i++) { 271 rowlengths[i] = ai[i+1] - ai[i]; 272 } 273 274 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 275 ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr); 276 ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr); 277 ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr); 278 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 279 } 280 281 for (row=0; row<m; row++) { 282 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 283 ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 284 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 285 } 286 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 287 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 288 B->rmap->bs = A->rmap->bs; 289 290 if (reuse == MAT_INPLACE_MATRIX) { 291 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 292 } else { 293 *newmat = B; 294 } 295 PetscFunctionReturn(0); 296 } 297 298 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy) 299 { 300 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 301 PetscScalar *y; 302 const PetscScalar *x; 303 const MatScalar *aval=a->val; 304 PetscInt totalslices=a->totalslices; 305 const PetscInt *acolidx=a->colidx; 306 PetscInt i,j; 307 PetscErrorCode ierr; 308 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 309 __m512d vec_x,vec_y,vec_vals; 310 __m256i vec_idx; 311 __mmask8 mask; 312 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 313 __m256i vec_idx2,vec_idx3,vec_idx4; 314 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 315 __m128i vec_idx; 316 __m256d vec_x,vec_y,vec_y2,vec_vals; 317 MatScalar yval; 318 PetscInt r,rows_left,row,nnz_in_row; 319 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 320 __m128d vec_x_tmp; 321 __m256d vec_x,vec_y,vec_y2,vec_vals; 322 MatScalar yval; 323 PetscInt r,rows_left,row,nnz_in_row; 324 #else 325 PetscScalar sum[8]; 326 #endif 327 328 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 329 #pragma disjoint(*x,*y,*aval) 330 #endif 331 332 PetscFunctionBegin; 333 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 334 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 335 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 336 for (i=0; i<totalslices; i++) { /* loop over slices */ 337 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 338 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 339 340 vec_y = _mm512_setzero_pd(); 341 vec_y2 = _mm512_setzero_pd(); 342 vec_y3 = _mm512_setzero_pd(); 343 vec_y4 = _mm512_setzero_pd(); 344 345 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 346 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 347 case 3: 348 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 349 acolidx += 8; aval += 8; 350 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 351 acolidx += 8; aval += 8; 352 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 353 acolidx += 8; aval += 8; 354 j += 3; 355 break; 356 case 2: 357 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 358 acolidx += 8; aval += 8; 359 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 360 acolidx += 8; aval += 8; 361 j += 2; 362 break; 363 case 1: 364 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 365 acolidx += 8; aval += 8; 366 j += 1; 367 break; 368 } 369 #pragma novector 370 for (; j<(a->sliidx[i+1]>>3); j+=4) { 371 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 372 acolidx += 8; aval += 8; 373 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 374 acolidx += 8; aval += 8; 375 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 376 acolidx += 8; aval += 8; 377 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 378 acolidx += 8; aval += 8; 379 } 380 381 vec_y = _mm512_add_pd(vec_y,vec_y2); 382 vec_y = _mm512_add_pd(vec_y,vec_y3); 383 vec_y = _mm512_add_pd(vec_y,vec_y4); 384 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 385 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 386 _mm512_mask_storeu_pd(&y[8*i],mask,vec_y); 387 } else { 388 _mm512_storeu_pd(&y[8*i],vec_y); 389 } 390 } 391 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 392 for (i=0; i<totalslices; i++) { /* loop over full slices */ 393 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 394 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 395 396 /* last slice may have padding rows. Don't use vectorization. */ 397 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 398 rows_left = A->rmap->n - 8*i; 399 for (r=0; r<rows_left; ++r) { 400 yval = (MatScalar)0; 401 row = 8*i + r; 402 nnz_in_row = a->rlen[row]; 403 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 404 y[row] = yval; 405 } 406 break; 407 } 408 409 vec_y = _mm256_setzero_pd(); 410 vec_y2 = _mm256_setzero_pd(); 411 412 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 413 #pragma novector 414 #pragma unroll(2) 415 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 416 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 417 aval += 4; acolidx += 4; 418 AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2); 419 aval += 4; acolidx += 4; 420 } 421 422 _mm256_storeu_pd(y+i*8,vec_y); 423 _mm256_storeu_pd(y+i*8+4,vec_y2); 424 } 425 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 426 for (i=0; i<totalslices; i++) { /* loop over full slices */ 427 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 428 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 429 430 vec_y = _mm256_setzero_pd(); 431 vec_y2 = _mm256_setzero_pd(); 432 433 /* last slice may have padding rows. Don't use vectorization. */ 434 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 435 rows_left = A->rmap->n - 8*i; 436 for (r=0; r<rows_left; ++r) { 437 yval = (MatScalar)0; 438 row = 8*i + r; 439 nnz_in_row = a->rlen[row]; 440 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]]; 441 y[row] = yval; 442 } 443 break; 444 } 445 446 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 447 #pragma novector 448 #pragma unroll(2) 449 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 450 vec_vals = _mm256_loadu_pd(aval); 451 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 452 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 453 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 454 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 455 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 456 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 457 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 458 aval += 4; 459 460 vec_vals = _mm256_loadu_pd(aval); 461 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 462 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 463 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 464 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 465 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 466 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 467 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 468 aval += 4; 469 } 470 471 _mm256_storeu_pd(y + i*8, vec_y); 472 _mm256_storeu_pd(y + i*8 + 4, vec_y2); 473 } 474 #else 475 for (i=0; i<totalslices; i++) { /* loop over slices */ 476 for (j=0; j<8; j++) sum[j] = 0.0; 477 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 478 sum[0] += aval[j] * x[acolidx[j]]; 479 sum[1] += aval[j+1] * x[acolidx[j+1]]; 480 sum[2] += aval[j+2] * x[acolidx[j+2]]; 481 sum[3] += aval[j+3] * x[acolidx[j+3]]; 482 sum[4] += aval[j+4] * x[acolidx[j+4]]; 483 sum[5] += aval[j+5] * x[acolidx[j+5]]; 484 sum[6] += aval[j+6] * x[acolidx[j+6]]; 485 sum[7] += aval[j+7] * x[acolidx[j+7]]; 486 } 487 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 488 for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j]; 489 } else { 490 for(j=0; j<8; j++) y[8*i+j] = sum[j]; 491 } 492 } 493 #endif 494 495 ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */ 496 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 497 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 502 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz) 503 { 504 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 505 PetscScalar *y,*z; 506 const PetscScalar *x; 507 const MatScalar *aval=a->val; 508 PetscInt totalslices=a->totalslices; 509 const PetscInt *acolidx=a->colidx; 510 PetscInt i,j; 511 PetscErrorCode ierr; 512 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 513 __m512d vec_x,vec_y,vec_vals; 514 __m256i vec_idx; 515 __mmask8 mask; 516 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 517 __m256i vec_idx2,vec_idx3,vec_idx4; 518 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 519 __m128d vec_x_tmp; 520 __m256d vec_x,vec_y,vec_y2,vec_vals; 521 MatScalar yval; 522 PetscInt r,row,nnz_in_row; 523 #else 524 PetscScalar sum[8]; 525 #endif 526 527 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 528 #pragma disjoint(*x,*y,*aval) 529 #endif 530 531 PetscFunctionBegin; 532 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 533 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 534 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 535 for (i=0; i<totalslices; i++) { /* loop over slices */ 536 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 537 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 538 539 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 540 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 541 vec_y = _mm512_mask_loadu_pd(vec_y,mask,&y[8*i]); 542 } else { 543 vec_y = _mm512_loadu_pd(&y[8*i]); 544 } 545 vec_y2 = _mm512_setzero_pd(); 546 vec_y3 = _mm512_setzero_pd(); 547 vec_y4 = _mm512_setzero_pd(); 548 549 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 550 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 551 case 3: 552 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 553 acolidx += 8; aval += 8; 554 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 555 acolidx += 8; aval += 8; 556 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 557 acolidx += 8; aval += 8; 558 j += 3; 559 break; 560 case 2: 561 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 562 acolidx += 8; aval += 8; 563 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 564 acolidx += 8; aval += 8; 565 j += 2; 566 break; 567 case 1: 568 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 569 acolidx += 8; aval += 8; 570 j += 1; 571 break; 572 } 573 #pragma novector 574 for (; j<(a->sliidx[i+1]>>3); j+=4) { 575 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 576 acolidx += 8; aval += 8; 577 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 578 acolidx += 8; aval += 8; 579 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 580 acolidx += 8; aval += 8; 581 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 582 acolidx += 8; aval += 8; 583 } 584 585 vec_y = _mm512_add_pd(vec_y,vec_y2); 586 vec_y = _mm512_add_pd(vec_y,vec_y3); 587 vec_y = _mm512_add_pd(vec_y,vec_y4); 588 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 589 _mm512_mask_storeu_pd(&z[8*i],mask,vec_y); 590 } else { 591 _mm512_storeu_pd(&z[8*i],vec_y); 592 } 593 } 594 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 595 for (i=0; i<totalslices; i++) { /* loop over full slices */ 596 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 597 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 598 599 /* last slice may have padding rows. Don't use vectorization. */ 600 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 601 for (r=0; r<(A->rmap->n & 0x07); ++r) { 602 row = 8*i + r; 603 yval = (MatScalar)0.0; 604 nnz_in_row = a->rlen[row]; 605 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 606 z[row] = y[row] + yval; 607 } 608 break; 609 } 610 611 vec_y = _mm256_loadu_pd(y+8*i); 612 vec_y2 = _mm256_loadu_pd(y+8*i+4); 613 614 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 615 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 616 vec_vals = _mm256_loadu_pd(aval); 617 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 618 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 619 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 620 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 621 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 622 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 623 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 624 aval += 4; 625 626 vec_vals = _mm256_loadu_pd(aval); 627 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 628 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 629 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 630 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 631 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 632 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 633 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 634 aval += 4; 635 } 636 637 _mm256_storeu_pd(z+i*8,vec_y); 638 _mm256_storeu_pd(z+i*8+4,vec_y2); 639 } 640 #else 641 for (i=0; i<totalslices; i++) { /* loop over slices */ 642 for (j=0; j<8; j++) sum[j] = 0.0; 643 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 644 sum[0] += aval[j] * x[acolidx[j]]; 645 sum[1] += aval[j+1] * x[acolidx[j+1]]; 646 sum[2] += aval[j+2] * x[acolidx[j+2]]; 647 sum[3] += aval[j+3] * x[acolidx[j+3]]; 648 sum[4] += aval[j+4] * x[acolidx[j+4]]; 649 sum[5] += aval[j+5] * x[acolidx[j+5]]; 650 sum[6] += aval[j+6] * x[acolidx[j+6]]; 651 sum[7] += aval[j+7] * x[acolidx[j+7]]; 652 } 653 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 654 for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j]; 655 } else { 656 for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j]; 657 } 658 } 659 #endif 660 661 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 662 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 663 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy) 668 { 669 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 670 PetscScalar *y; 671 const PetscScalar *x; 672 const MatScalar *aval=a->val; 673 const PetscInt *acolidx=a->colidx; 674 PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices; 675 PetscErrorCode ierr; 676 677 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 678 #pragma disjoint(*x,*y,*aval) 679 #endif 680 681 PetscFunctionBegin; 682 if (A->symmetric) { 683 ierr = MatMultAdd_SeqSELL(A,xx,zz,yy);CHKERRQ(ierr); 684 PetscFunctionReturn(0); 685 } 686 if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); } 687 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 688 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 689 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 690 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 691 for (r=0; r<(A->rmap->n & 0x07); ++r) { 692 row = 8*i + r; 693 nnz_in_row = a->rlen[row]; 694 for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row]; 695 } 696 break; 697 } 698 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 699 y[acolidx[j]] += aval[j] * x[8*i]; 700 y[acolidx[j+1]] += aval[j+1] * x[8*i+1]; 701 y[acolidx[j+2]] += aval[j+2] * x[8*i+2]; 702 y[acolidx[j+3]] += aval[j+3] * x[8*i+3]; 703 y[acolidx[j+4]] += aval[j+4] * x[8*i+4]; 704 y[acolidx[j+5]] += aval[j+5] * x[8*i+5]; 705 y[acolidx[j+6]] += aval[j+6] * x[8*i+6]; 706 y[acolidx[j+7]] += aval[j+7] * x[8*i+7]; 707 } 708 } 709 ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr); 710 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 711 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy) 716 { 717 PetscErrorCode ierr; 718 719 PetscFunctionBegin; 720 if (A->symmetric) { 721 ierr = MatMult_SeqSELL(A,xx,yy);CHKERRQ(ierr); 722 } else { 723 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 724 ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr); 725 } 726 PetscFunctionReturn(0); 727 } 728 729 /* 730 Checks for missing diagonals 731 */ 732 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d) 733 { 734 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 735 PetscInt *diag,i; 736 737 PetscFunctionBegin; 738 *missing = PETSC_FALSE; 739 if (A->rmap->n > 0 && !(a->colidx)) { 740 *missing = PETSC_TRUE; 741 if (d) *d = 0; 742 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 743 } else { 744 diag = a->diag; 745 for (i=0; i<A->rmap->n; i++) { 746 if (diag[i] == -1) { 747 *missing = PETSC_TRUE; 748 if (d) *d = i; 749 PetscInfo1(A,"Matrix is missing diagonal number %D\n",i); 750 break; 751 } 752 } 753 } 754 PetscFunctionReturn(0); 755 } 756 757 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 758 { 759 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 760 PetscInt i,j,m=A->rmap->n,shift; 761 PetscErrorCode ierr; 762 763 PetscFunctionBegin; 764 if (!a->diag) { 765 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 766 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 767 a->free_diag = PETSC_TRUE; 768 } 769 for (i=0; i<m; i++) { /* loop over rows */ 770 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 771 a->diag[i] = -1; 772 for (j=0; j<a->rlen[i]; j++) { 773 if (a->colidx[shift+j*8] == i) { 774 a->diag[i] = shift+j*8; 775 break; 776 } 777 } 778 } 779 PetscFunctionReturn(0); 780 } 781 782 /* 783 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 784 */ 785 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift) 786 { 787 Mat_SeqSELL *a=(Mat_SeqSELL*) A->data; 788 PetscInt i,*diag,m = A->rmap->n; 789 MatScalar *val = a->val; 790 PetscScalar *idiag,*mdiag; 791 PetscErrorCode ierr; 792 793 PetscFunctionBegin; 794 if (a->idiagvalid) PetscFunctionReturn(0); 795 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 796 diag = a->diag; 797 if (!a->idiag) { 798 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 799 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 800 val = a->val; 801 } 802 mdiag = a->mdiag; 803 idiag = a->idiag; 804 805 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 806 for (i=0; i<m; i++) { 807 mdiag[i] = val[diag[i]]; 808 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 809 if (PetscRealPart(fshift)) { 810 ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr); 811 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 812 A->factorerror_zeropivot_value = 0.0; 813 A->factorerror_zeropivot_row = i; 814 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i); 815 } 816 idiag[i] = 1.0/val[diag[i]]; 817 } 818 ierr = PetscLogFlops(m);CHKERRQ(ierr); 819 } else { 820 for (i=0; i<m; i++) { 821 mdiag[i] = val[diag[i]]; 822 idiag[i] = omega/(fshift + val[diag[i]]); 823 } 824 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 825 } 826 a->idiagvalid = PETSC_TRUE; 827 PetscFunctionReturn(0); 828 } 829 830 PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 831 { 832 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 833 PetscErrorCode ierr; 834 835 PetscFunctionBegin; 836 ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr); 837 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 PetscErrorCode MatDestroy_SeqSELL(Mat A) 842 { 843 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 844 PetscErrorCode ierr; 845 846 PetscFunctionBegin; 847 #if defined(PETSC_USE_LOG) 848 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz); 849 #endif 850 ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr); 851 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 852 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 853 ierr = PetscFree(a->diag);CHKERRQ(ierr); 854 ierr = PetscFree(a->rlen);CHKERRQ(ierr); 855 ierr = PetscFree(a->sliidx);CHKERRQ(ierr); 856 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 857 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 858 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 859 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 860 ierr = PetscFree2(a->getrowcols,a->getrowvals);CHKERRQ(ierr); 861 862 ierr = PetscFree(A->data);CHKERRQ(ierr); 863 864 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 865 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 866 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 867 #if defined(PETSC_HAVE_ELEMENTAL) 868 #endif 869 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg) 874 { 875 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 876 PetscErrorCode ierr; 877 878 PetscFunctionBegin; 879 switch (op) { 880 case MAT_ROW_ORIENTED: 881 a->roworiented = flg; 882 break; 883 case MAT_KEEP_NONZERO_PATTERN: 884 a->keepnonzeropattern = flg; 885 break; 886 case MAT_NEW_NONZERO_LOCATIONS: 887 a->nonew = (flg ? 0 : 1); 888 break; 889 case MAT_NEW_NONZERO_LOCATION_ERR: 890 a->nonew = (flg ? -1 : 0); 891 break; 892 case MAT_NEW_NONZERO_ALLOCATION_ERR: 893 a->nonew = (flg ? -2 : 0); 894 break; 895 case MAT_UNUSED_NONZERO_LOCATION_ERR: 896 a->nounused = (flg ? -1 : 0); 897 break; 898 case MAT_NEW_DIAGONALS: 899 case MAT_IGNORE_OFF_PROC_ENTRIES: 900 case MAT_USE_HASH_TABLE: 901 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 902 break; 903 case MAT_SPD: 904 case MAT_SYMMETRIC: 905 case MAT_STRUCTURALLY_SYMMETRIC: 906 case MAT_HERMITIAN: 907 case MAT_SYMMETRY_ETERNAL: 908 /* These options are handled directly by MatSetOption() */ 909 break; 910 default: 911 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 912 } 913 PetscFunctionReturn(0); 914 } 915 916 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v) 917 { 918 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 919 PetscInt i,j,n,shift; 920 PetscScalar *x,zero=0.0; 921 PetscErrorCode ierr; 922 923 PetscFunctionBegin; 924 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 925 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 926 927 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 928 PetscInt *diag=a->diag; 929 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 930 for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]]; 931 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 932 PetscFunctionReturn(0); 933 } 934 935 ierr = VecSet(v,zero);CHKERRQ(ierr); 936 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 937 for (i=0; i<n; i++) { /* loop over rows */ 938 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 939 x[i] = 0; 940 for (j=0; j<a->rlen[i]; j++) { 941 if (a->colidx[shift+j*8] == i) { 942 x[i] = a->val[shift+j*8]; 943 break; 944 } 945 } 946 } 947 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 948 PetscFunctionReturn(0); 949 } 950 951 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr) 952 { 953 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 954 const PetscScalar *l,*r; 955 PetscInt i,j,m,n,row; 956 PetscErrorCode ierr; 957 958 PetscFunctionBegin; 959 if (ll) { 960 /* The local size is used so that VecMPI can be passed to this routine 961 by MatDiagonalScale_MPISELL */ 962 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 963 if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 964 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 965 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 966 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 967 a->val[j] *= l[8*i+row]; 968 } 969 } 970 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 971 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 972 } 973 if (rr) { 974 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 975 if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 976 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 977 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 978 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) { 979 a->val[j] *= r[a->colidx[j]]; 980 } 981 } 982 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 983 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 984 } 985 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } 988 989 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 990 991 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 992 { 993 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 994 PetscInt *cp,i,k,low,high,t,row,col,l; 995 PetscInt shift; 996 MatScalar *vp; 997 998 PetscFunctionBegin; 999 for (k=0; k<m; k++) { /* loop over requested rows */ 1000 row = im[k]; 1001 if (row<0) continue; 1002 #if defined(PETSC_USE_DEBUG) 1003 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1004 #endif 1005 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1006 cp = a->colidx+shift; /* pointer to the row */ 1007 vp = a->val+shift; /* pointer to the row */ 1008 for (l=0; l<n; l++) { /* loop over requested columns */ 1009 col = in[l]; 1010 if (col<0) continue; 1011 #if defined(PETSC_USE_DEBUG) 1012 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1); 1013 #endif 1014 high = a->rlen[row]; low = 0; /* assume unsorted */ 1015 while (high-low > 5) { 1016 t = (low+high)/2; 1017 if (*(cp+t*8) > col) high = t; 1018 else low = t; 1019 } 1020 for (i=low; i<high; i++) { 1021 if (*(cp+8*i) > col) break; 1022 if (*(cp+8*i) == col) { 1023 *v++ = *(vp+8*i); 1024 goto finished; 1025 } 1026 } 1027 *v++ = 0.0; 1028 finished:; 1029 } 1030 } 1031 PetscFunctionReturn(0); 1032 } 1033 1034 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer) 1035 { 1036 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1037 PetscInt i,j,m=A->rmap->n,shift; 1038 const char *name; 1039 PetscViewerFormat format; 1040 PetscErrorCode ierr; 1041 1042 PetscFunctionBegin; 1043 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1044 if (format == PETSC_VIEWER_ASCII_MATLAB) { 1045 PetscInt nofinalvalue = 0; 1046 /* 1047 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 1048 nofinalvalue = 1; 1049 } 1050 */ 1051 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1052 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr); 1053 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr); 1054 #if defined(PETSC_USE_COMPLEX) 1055 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1056 #else 1057 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 1058 #endif 1059 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 1060 1061 for (i=0; i<m; i++) { 1062 shift = a->sliidx[i>>3]+(i&0x07); 1063 for (j=0; j<a->rlen[i]; j++) { 1064 #if defined(PETSC_USE_COMPLEX) 1065 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1066 #else 1067 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1068 #endif 1069 } 1070 } 1071 /* 1072 if (nofinalvalue) { 1073 #if defined(PETSC_USE_COMPLEX) 1074 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 1075 #else 1076 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 1077 #endif 1078 } 1079 */ 1080 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 1081 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 1082 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1083 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1086 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1087 for (i=0; i<m; i++) { 1088 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1089 shift = a->sliidx[i>>3]+(i&0x07); 1090 for (j=0; j<a->rlen[i]; j++) { 1091 #if defined(PETSC_USE_COMPLEX) 1092 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1093 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1094 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1095 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1096 } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) { 1097 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1098 } 1099 #else 1100 if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);} 1101 #endif 1102 } 1103 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1104 } 1105 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1106 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 1107 PetscInt cnt=0,jcnt; 1108 PetscScalar value; 1109 #if defined(PETSC_USE_COMPLEX) 1110 PetscBool realonly=PETSC_TRUE; 1111 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1112 if (PetscImaginaryPart(a->val[i]) != 0.0) { 1113 realonly = PETSC_FALSE; 1114 break; 1115 } 1116 } 1117 #endif 1118 1119 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1120 for (i=0; i<m; i++) { 1121 jcnt = 0; 1122 shift = a->sliidx[i>>3]+(i&0x07); 1123 for (j=0; j<A->cmap->n; j++) { 1124 if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) { 1125 value = a->val[cnt++]; 1126 jcnt++; 1127 } else { 1128 value = 0.0; 1129 } 1130 #if defined(PETSC_USE_COMPLEX) 1131 if (realonly) { 1132 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 1133 } else { 1134 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 1135 } 1136 #else 1137 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 1138 #endif 1139 } 1140 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1141 } 1142 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1143 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 1144 PetscInt fshift=1; 1145 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1146 #if defined(PETSC_USE_COMPLEX) 1147 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 1148 #else 1149 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 1150 #endif 1151 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 1152 for (i=0; i<m; i++) { 1153 shift = a->sliidx[i>>3]+(i&0x07); 1154 for (j=0; j<a->rlen[i]; j++) { 1155 #if defined(PETSC_USE_COMPLEX) 1156 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1157 #else 1158 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1159 #endif 1160 } 1161 } 1162 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1163 } else if (format == PETSC_VIEWER_NATIVE) { 1164 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 1165 PetscInt row; 1166 ierr = PetscViewerASCIIPrintf(viewer,"slice %D: %D %D\n",i,a->sliidx[i],a->sliidx[i+1]);CHKERRQ(ierr); 1167 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 1168 #if defined(PETSC_USE_COMPLEX) 1169 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1170 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1171 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1172 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1173 } else { 1174 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1175 } 1176 #else 1177 ierr = PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1178 #endif 1179 } 1180 } 1181 } else { 1182 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1183 if (A->factortype) { 1184 for (i=0; i<m; i++) { 1185 shift = a->sliidx[i>>3]+(i&0x07); 1186 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1187 /* L part */ 1188 for (j=shift; j<a->diag[i]; j+=8) { 1189 #if defined(PETSC_USE_COMPLEX) 1190 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) { 1191 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1192 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) { 1193 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1194 } else { 1195 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1196 } 1197 #else 1198 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1199 #endif 1200 } 1201 /* diagonal */ 1202 j = a->diag[i]; 1203 #if defined(PETSC_USE_COMPLEX) 1204 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1205 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr); 1206 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1207 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr); 1208 } else { 1209 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr); 1210 } 1211 #else 1212 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr); 1213 #endif 1214 1215 /* U part */ 1216 for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) { 1217 #if defined(PETSC_USE_COMPLEX) 1218 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1219 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1220 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1221 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1222 } else { 1223 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1224 } 1225 #else 1226 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1227 #endif 1228 } 1229 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1230 } 1231 } else { 1232 for (i=0; i<m; i++) { 1233 shift = a->sliidx[i>>3]+(i&0x07); 1234 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1235 for (j=0; j<a->rlen[i]; j++) { 1236 #if defined(PETSC_USE_COMPLEX) 1237 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1238 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1239 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1240 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1241 } else { 1242 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1243 } 1244 #else 1245 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr); 1246 #endif 1247 } 1248 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1249 } 1250 } 1251 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1252 } 1253 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #include <petscdraw.h> 1258 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa) 1259 { 1260 Mat A=(Mat)Aa; 1261 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1262 PetscInt i,j,m=A->rmap->n,shift; 1263 int color; 1264 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1265 PetscViewer viewer; 1266 PetscViewerFormat format; 1267 PetscErrorCode ierr; 1268 1269 PetscFunctionBegin; 1270 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1271 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1272 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1273 1274 /* loop over matrix elements drawing boxes */ 1275 1276 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1277 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1278 /* Blue for negative, Cyan for zero and Red for positive */ 1279 color = PETSC_DRAW_BLUE; 1280 for (i=0; i<m; i++) { 1281 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1282 y_l = m - i - 1.0; y_r = y_l + 1.0; 1283 for (j=0; j<a->rlen[i]; j++) { 1284 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1285 if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue; 1286 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1287 } 1288 } 1289 color = PETSC_DRAW_CYAN; 1290 for (i=0; i<m; i++) { 1291 shift = a->sliidx[i>>3]+(i&0x07); 1292 y_l = m - i - 1.0; y_r = y_l + 1.0; 1293 for (j=0; j<a->rlen[i]; j++) { 1294 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1295 if (a->val[shift+8*j] != 0.) continue; 1296 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1297 } 1298 } 1299 color = PETSC_DRAW_RED; 1300 for (i=0; i<m; i++) { 1301 shift = a->sliidx[i>>3]+(i&0x07); 1302 y_l = m - i - 1.0; y_r = y_l + 1.0; 1303 for (j=0; j<a->rlen[i]; j++) { 1304 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1305 if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue; 1306 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1307 } 1308 } 1309 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1310 } else { 1311 /* use contour shading to indicate magnitude of values */ 1312 /* first determine max of all nonzero values */ 1313 PetscReal minv=0.0,maxv=0.0; 1314 PetscInt count=0; 1315 PetscDraw popup; 1316 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1317 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1318 } 1319 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1320 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1321 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 1322 1323 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1324 for (i=0; i<m; i++) { 1325 shift = a->sliidx[i>>3]+(i&0x07); 1326 y_l = m - i - 1.0; 1327 y_r = y_l + 1.0; 1328 for (j=0; j<a->rlen[i]; j++) { 1329 x_l = a->colidx[shift+j*8]; 1330 x_r = x_l + 1.0; 1331 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv); 1332 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1333 count++; 1334 } 1335 } 1336 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1337 } 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #include <petscdraw.h> 1342 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer) 1343 { 1344 PetscDraw draw; 1345 PetscReal xr,yr,xl,yl,h,w; 1346 PetscBool isnull; 1347 PetscErrorCode ierr; 1348 1349 PetscFunctionBegin; 1350 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1351 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1352 if (isnull) PetscFunctionReturn(0); 1353 1354 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 1355 xr += w; yr += h; xl = -w; yl = -h; 1356 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1357 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1358 ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr); 1359 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1360 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer) 1365 { 1366 PetscBool iascii,isbinary,isdraw; 1367 PetscErrorCode ierr; 1368 1369 PetscFunctionBegin; 1370 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1371 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1372 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1373 if (iascii) { 1374 ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr); 1375 } else if (isbinary) { 1376 /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */ 1377 } else if (isdraw) { 1378 ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode) 1384 { 1385 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1386 PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k; 1387 MatScalar *vp; 1388 PetscErrorCode ierr; 1389 1390 PetscFunctionBegin; 1391 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1392 /* To do: compress out the unused elements */ 1393 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 1394 ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr); 1395 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 1396 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr); 1397 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1398 for (i=0; i<a->totalslices; ++i) { 1399 shift = a->sliidx[i]; /* starting index of the slice */ 1400 cp = a->colidx+shift; /* pointer to the column indices of the slice */ 1401 vp = a->val+shift; /* pointer to the nonzero values of the slice */ 1402 for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */ 1403 row = 8*i + row_in_slice; 1404 nrow = a->rlen[row]; /* number of nonzeros in row */ 1405 /* 1406 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1407 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1408 */ 1409 lastcol = 0; 1410 if (nrow>0) { /* nonempty row */ 1411 lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */ 1412 } else if (!row_in_slice) { /* first row of the currect slice is empty */ 1413 for (j=1;j<8;j++) { 1414 if (a->rlen[8*i+j]) { 1415 lastcol = cp[j]; 1416 break; 1417 } 1418 } 1419 } else { 1420 if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */ 1421 } 1422 1423 for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) { 1424 cp[8*k+row_in_slice] = lastcol; 1425 vp[8*k+row_in_slice] = (MatScalar)0; 1426 } 1427 } 1428 } 1429 1430 A->info.mallocs += a->reallocs; 1431 a->reallocs = 0; 1432 1433 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info) 1438 { 1439 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1440 1441 PetscFunctionBegin; 1442 info->block_size = 1.0; 1443 info->nz_allocated = (double)a->maxallocmat; 1444 info->nz_used = (double)a->sliidx[a->totalslices]; /* include padding zeros */ 1445 info->nz_unneeded = (double)(a->maxallocmat-a->sliidx[a->totalslices]); 1446 info->assemblies = (double)A->num_ass; 1447 info->mallocs = (double)A->info.mallocs; 1448 info->memory = ((PetscObject)A)->mem; 1449 if (A->factortype) { 1450 info->fill_ratio_given = A->info.fill_ratio_given; 1451 info->fill_ratio_needed = A->info.fill_ratio_needed; 1452 info->factor_mallocs = A->info.factor_mallocs; 1453 } else { 1454 info->fill_ratio_given = 0; 1455 info->fill_ratio_needed = 0; 1456 info->factor_mallocs = 0; 1457 } 1458 PetscFunctionReturn(0); 1459 } 1460 1461 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1462 { 1463 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1464 PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow; 1465 PetscInt *cp,nonew=a->nonew,lastcol=-1; 1466 MatScalar *vp,value; 1467 PetscErrorCode ierr; 1468 1469 PetscFunctionBegin; 1470 for (k=0; k<m; k++) { /* loop over added rows */ 1471 row = im[k]; 1472 if (row < 0) continue; 1473 #if defined(PETSC_USE_DEBUG) 1474 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1475 #endif 1476 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1477 cp = a->colidx+shift; /* pointer to the row */ 1478 vp = a->val+shift; /* pointer to the row */ 1479 nrow = a->rlen[row]; 1480 low = 0; 1481 high = nrow; 1482 1483 for (l=0; l<n; l++) { /* loop over added columns */ 1484 col = in[l]; 1485 if (col<0) continue; 1486 #if defined(PETSC_USE_DEBUG) 1487 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1); 1488 #endif 1489 if (a->roworiented) { 1490 value = v[l+k*n]; 1491 } else { 1492 value = v[k+l*m]; 1493 } 1494 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1495 1496 /* search in this row for the specified colmun, i indicates the column to be set */ 1497 if (col <= lastcol) low = 0; 1498 else high = nrow; 1499 lastcol = col; 1500 while (high-low > 5) { 1501 t = (low+high)/2; 1502 if (*(cp+t*8) > col) high = t; 1503 else low = t; 1504 } 1505 for (i=low; i<high; i++) { 1506 if (*(cp+i*8) > col) break; 1507 if (*(cp+i*8) == col) { 1508 if (is == ADD_VALUES) *(vp+i*8) += value; 1509 else *(vp+i*8) = value; 1510 low = i + 1; 1511 goto noinsert; 1512 } 1513 } 1514 if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1515 if (nonew == 1) goto noinsert; 1516 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1517 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1518 MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar); 1519 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1520 for (ii=nrow-1; ii>=i; ii--) { 1521 *(cp+(ii+1)*8) = *(cp+ii*8); 1522 *(vp+(ii+1)*8) = *(vp+ii*8); 1523 } 1524 a->rlen[row]++; 1525 *(cp+i*8) = col; 1526 *(vp+i*8) = value; 1527 a->nz++; 1528 A->nonzerostate++; 1529 low = i+1; high++; nrow++; 1530 noinsert:; 1531 } 1532 a->rlen[row] = nrow; 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 1537 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str) 1538 { 1539 PetscErrorCode ierr; 1540 1541 PetscFunctionBegin; 1542 /* If the two matrices have the same copy implementation, use fast copy. */ 1543 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1544 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1545 Mat_SeqSELL *b=(Mat_SeqSELL*)B->data; 1546 1547 if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1548 ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1549 } else { 1550 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1551 } 1552 PetscFunctionReturn(0); 1553 } 1554 1555 PetscErrorCode MatSetUp_SeqSELL(Mat A) 1556 { 1557 PetscErrorCode ierr; 1558 1559 PetscFunctionBegin; 1560 ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 1564 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[]) 1565 { 1566 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1567 1568 PetscFunctionBegin; 1569 *array = a->val; 1570 PetscFunctionReturn(0); 1571 } 1572 1573 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[]) 1574 { 1575 PetscFunctionBegin; 1576 PetscFunctionReturn(0); 1577 } 1578 1579 PetscErrorCode MatRealPart_SeqSELL(Mat A) 1580 { 1581 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1582 PetscInt i; 1583 MatScalar *aval=a->val; 1584 1585 PetscFunctionBegin; 1586 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]); 1587 PetscFunctionReturn(0); 1588 } 1589 1590 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1591 { 1592 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1593 PetscInt i; 1594 MatScalar *aval=a->val; 1595 PetscErrorCode ierr; 1596 1597 PetscFunctionBegin; 1598 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1599 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1600 PetscFunctionReturn(0); 1601 } 1602 1603 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha) 1604 { 1605 Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data; 1606 MatScalar *aval=a->val; 1607 PetscScalar oalpha=alpha; 1608 PetscBLASInt one=1,size; 1609 PetscErrorCode ierr; 1610 1611 PetscFunctionBegin; 1612 ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr); 1613 PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one)); 1614 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1615 ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr); 1616 PetscFunctionReturn(0); 1617 } 1618 1619 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a) 1620 { 1621 Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data; 1622 PetscErrorCode ierr; 1623 1624 PetscFunctionBegin; 1625 if (!Y->preallocated || !y->nz) { 1626 ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 1627 } 1628 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1629 PetscFunctionReturn(0); 1630 } 1631 1632 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1633 { 1634 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1635 PetscScalar *x,sum,*t; 1636 const MatScalar *idiag=0,*mdiag; 1637 const PetscScalar *b,*xb; 1638 PetscInt n,m=A->rmap->n,i,j,shift; 1639 const PetscInt *diag; 1640 PetscErrorCode ierr; 1641 1642 PetscFunctionBegin; 1643 its = its*lits; 1644 1645 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1646 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);} 1647 a->fshift = fshift; 1648 a->omega = omega; 1649 1650 diag = a->diag; 1651 t = a->ssor_work; 1652 idiag = a->idiag; 1653 mdiag = a->mdiag; 1654 1655 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1656 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1657 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1658 if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented"); 1659 if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1660 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 1661 1662 if (flag & SOR_ZERO_INITIAL_GUESS) { 1663 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1664 for (i=0; i<m; i++) { 1665 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1666 sum = b[i]; 1667 n = (diag[i]-shift)/8; 1668 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1669 t[i] = sum; 1670 x[i] = sum*idiag[i]; 1671 } 1672 xb = t; 1673 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1674 } else xb = b; 1675 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1676 for (i=m-1; i>=0; i--) { 1677 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1678 sum = xb[i]; 1679 n = a->rlen[i]-(diag[i]-shift)/8-1; 1680 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1681 if (xb == b) { 1682 x[i] = sum*idiag[i]; 1683 } else { 1684 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1685 } 1686 } 1687 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1688 } 1689 its--; 1690 } 1691 while (its--) { 1692 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1693 for (i=0; i<m; i++) { 1694 /* lower */ 1695 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1696 sum = b[i]; 1697 n = (diag[i]-shift)/8; 1698 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1699 t[i] = sum; /* save application of the lower-triangular part */ 1700 /* upper */ 1701 n = a->rlen[i]-(diag[i]-shift)/8-1; 1702 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1703 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1704 } 1705 xb = t; 1706 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1707 } else xb = b; 1708 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1709 for (i=m-1; i>=0; i--) { 1710 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1711 sum = xb[i]; 1712 if (xb == b) { 1713 /* whole matrix (no checkpointing available) */ 1714 n = a->rlen[i]; 1715 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1716 x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i]; 1717 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1718 n = a->rlen[i]-(diag[i]-shift)/8-1; 1719 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1720 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1721 } 1722 } 1723 if (xb == b) { 1724 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1725 } else { 1726 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1727 } 1728 } 1729 } 1730 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1731 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1732 PetscFunctionReturn(0); 1733 } 1734 1735 /* -------------------------------------------------------------------*/ 1736 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1737 MatGetRow_SeqSELL, 1738 MatRestoreRow_SeqSELL, 1739 MatMult_SeqSELL, 1740 /* 4*/ MatMultAdd_SeqSELL, 1741 MatMultTranspose_SeqSELL, 1742 MatMultTransposeAdd_SeqSELL, 1743 0, 1744 0, 1745 0, 1746 /* 10*/ 0, 1747 0, 1748 0, 1749 MatSOR_SeqSELL, 1750 0, 1751 /* 15*/ MatGetInfo_SeqSELL, 1752 MatEqual_SeqSELL, 1753 MatGetDiagonal_SeqSELL, 1754 MatDiagonalScale_SeqSELL, 1755 0, 1756 /* 20*/ 0, 1757 MatAssemblyEnd_SeqSELL, 1758 MatSetOption_SeqSELL, 1759 MatZeroEntries_SeqSELL, 1760 /* 24*/ 0, 1761 0, 1762 0, 1763 0, 1764 0, 1765 /* 29*/ MatSetUp_SeqSELL, 1766 0, 1767 0, 1768 0, 1769 0, 1770 /* 34*/ MatDuplicate_SeqSELL, 1771 0, 1772 0, 1773 0, 1774 0, 1775 /* 39*/ 0, 1776 0, 1777 0, 1778 MatGetValues_SeqSELL, 1779 MatCopy_SeqSELL, 1780 /* 44*/ 0, 1781 MatScale_SeqSELL, 1782 MatShift_SeqSELL, 1783 0, 1784 0, 1785 /* 49*/ 0, 1786 0, 1787 0, 1788 0, 1789 0, 1790 /* 54*/ MatFDColoringCreate_SeqXAIJ, 1791 0, 1792 0, 1793 0, 1794 0, 1795 /* 59*/ 0, 1796 MatDestroy_SeqSELL, 1797 MatView_SeqSELL, 1798 0, 1799 0, 1800 /* 64*/ 0, 1801 0, 1802 0, 1803 0, 1804 0, 1805 /* 69*/ 0, 1806 0, 1807 0, 1808 0, 1809 0, 1810 /* 74*/ 0, 1811 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1812 0, 1813 0, 1814 0, 1815 /* 79*/ 0, 1816 0, 1817 0, 1818 0, 1819 0, 1820 /* 84*/ 0, 1821 0, 1822 0, 1823 0, 1824 0, 1825 /* 89*/ 0, 1826 0, 1827 0, 1828 0, 1829 0, 1830 /* 94*/ 0, 1831 0, 1832 0, 1833 0, 1834 0, 1835 /* 99*/ 0, 1836 0, 1837 0, 1838 MatConjugate_SeqSELL, 1839 0, 1840 /*104*/ 0, 1841 0, 1842 0, 1843 0, 1844 0, 1845 /*109*/ 0, 1846 0, 1847 0, 1848 0, 1849 MatMissingDiagonal_SeqSELL, 1850 /*114*/ 0, 1851 0, 1852 0, 1853 0, 1854 0, 1855 /*119*/ 0, 1856 0, 1857 0, 1858 0, 1859 0, 1860 /*124*/ 0, 1861 0, 1862 0, 1863 0, 1864 0, 1865 /*129*/ 0, 1866 0, 1867 0, 1868 0, 1869 0, 1870 /*134*/ 0, 1871 0, 1872 0, 1873 0, 1874 0, 1875 /*139*/ 0, 1876 0, 1877 0, 1878 MatFDColoringSetUp_SeqXAIJ, 1879 0, 1880 /*144*/0 1881 }; 1882 1883 PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1884 { 1885 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1886 PetscErrorCode ierr; 1887 1888 PetscFunctionBegin; 1889 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1890 1891 /* allocate space for values if not already there */ 1892 if (!a->saved_values) { 1893 ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr); 1894 ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1895 } 1896 1897 /* copy values over */ 1898 ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1899 PetscFunctionReturn(0); 1900 } 1901 1902 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1903 { 1904 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1905 PetscErrorCode ierr; 1906 1907 PetscFunctionBegin; 1908 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1909 if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1910 /* copy values over */ 1911 ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1912 PetscFunctionReturn(0); 1913 } 1914 1915 /*@C 1916 MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray() 1917 1918 Not Collective 1919 1920 Input Parameters: 1921 . mat - a MATSEQSELL matrix 1922 . array - pointer to the data 1923 1924 Level: intermediate 1925 1926 .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90() 1927 @*/ 1928 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array) 1929 { 1930 PetscErrorCode ierr; 1931 1932 PetscFunctionBegin; 1933 ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 1934 PetscFunctionReturn(0); 1935 } 1936 1937 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1938 { 1939 Mat_SeqSELL *b; 1940 PetscMPIInt size; 1941 PetscErrorCode ierr; 1942 1943 PetscFunctionBegin; 1944 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 1945 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 1946 1947 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1948 1949 B->data = (void*)b; 1950 1951 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1952 1953 b->row = 0; 1954 b->col = 0; 1955 b->icol = 0; 1956 b->reallocs = 0; 1957 b->ignorezeroentries = PETSC_FALSE; 1958 b->roworiented = PETSC_TRUE; 1959 b->nonew = 0; 1960 b->diag = 0; 1961 b->solve_work = 0; 1962 B->spptr = 0; 1963 b->saved_values = 0; 1964 b->idiag = 0; 1965 b->mdiag = 0; 1966 b->ssor_work = 0; 1967 b->omega = 1.0; 1968 b->fshift = 0.0; 1969 b->idiagvalid = PETSC_FALSE; 1970 b->keepnonzeropattern = PETSC_FALSE; 1971 1972 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr); 1973 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr); 1974 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr); 1975 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr); 1976 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr); 1977 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr); 1978 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr); 1979 PetscFunctionReturn(0); 1980 } 1981 1982 /* 1983 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 1984 */ 1985 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 1986 { 1987 Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data; 1988 PetscInt i,m=A->rmap->n; 1989 PetscInt totalslices=a->totalslices; 1990 PetscErrorCode ierr; 1991 1992 PetscFunctionBegin; 1993 c = (Mat_SeqSELL*)C->data; 1994 1995 C->factortype = A->factortype; 1996 c->row = 0; 1997 c->col = 0; 1998 c->icol = 0; 1999 c->reallocs = 0; 2000 2001 C->assembled = PETSC_TRUE; 2002 2003 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 2004 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 2005 2006 ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr); 2007 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2008 ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr); 2009 ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 2010 2011 for (i=0; i<m; i++) c->rlen[i] = a->rlen[i]; 2012 for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i]; 2013 2014 /* allocate the matrix space */ 2015 if (mallocmatspace) { 2016 ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr); 2017 ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2018 2019 c->singlemalloc = PETSC_TRUE; 2020 2021 if (m > 0) { 2022 ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr); 2023 if (cpvalues == MAT_COPY_VALUES) { 2024 ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 2025 } else { 2026 ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 2027 } 2028 } 2029 } 2030 2031 c->ignorezeroentries = a->ignorezeroentries; 2032 c->roworiented = a->roworiented; 2033 c->nonew = a->nonew; 2034 if (a->diag) { 2035 ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr); 2036 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 2037 for (i=0; i<m; i++) { 2038 c->diag[i] = a->diag[i]; 2039 } 2040 } else c->diag = 0; 2041 2042 c->solve_work = 0; 2043 c->saved_values = 0; 2044 c->idiag = 0; 2045 c->ssor_work = 0; 2046 c->keepnonzeropattern = a->keepnonzeropattern; 2047 c->free_val = PETSC_TRUE; 2048 c->free_colidx = PETSC_TRUE; 2049 2050 c->maxallocmat = a->maxallocmat; 2051 c->maxallocrow = a->maxallocrow; 2052 c->rlenmax = a->rlenmax; 2053 c->nz = a->nz; 2054 C->preallocated = PETSC_TRUE; 2055 2056 c->nonzerorowcnt = a->nonzerorowcnt; 2057 C->nonzerostate = A->nonzerostate; 2058 2059 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2060 PetscFunctionReturn(0); 2061 } 2062 2063 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B) 2064 { 2065 PetscErrorCode ierr; 2066 2067 PetscFunctionBegin; 2068 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 2069 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 2070 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 2071 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 2072 } 2073 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2074 ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 /*@C 2079 MatCreateSeqSELL - Creates a sparse matrix in SELL format. 2080 2081 Collective on MPI_Comm 2082 2083 Input Parameters: 2084 + comm - MPI communicator, set to PETSC_COMM_SELF 2085 . m - number of rows 2086 . n - number of columns 2087 . rlenmax - maximum number of nonzeros in a row 2088 - rlen - array containing the number of nonzeros in the various rows 2089 (possibly different for each row) or NULL 2090 2091 Output Parameter: 2092 . A - the matrix 2093 2094 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2095 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2096 [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation] 2097 2098 Notes: 2099 If nnz is given then nz is ignored 2100 2101 Specify the preallocated storage with either rlenmax or rlen (not both). 2102 Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory 2103 allocation. For large problems you MUST preallocate memory or you 2104 will get TERRIBLE performance, see the users' manual chapter on matrices. 2105 2106 Level: intermediate 2107 2108 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays() 2109 2110 @*/ 2111 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A) 2112 { 2113 PetscErrorCode ierr; 2114 2115 PetscFunctionBegin; 2116 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2117 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2118 ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr); 2119 ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg) 2124 { 2125 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data; 2126 PetscInt totalslices=a->totalslices; 2127 PetscErrorCode ierr; 2128 2129 PetscFunctionBegin; 2130 /* If the matrix dimensions are not equal,or no of nonzeros */ 2131 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 2132 *flg = PETSC_FALSE; 2133 PetscFunctionReturn(0); 2134 } 2135 /* if the a->colidx are the same */ 2136 ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr); 2137 if (!*flg) PetscFunctionReturn(0); 2138 /* if a->val are the same */ 2139 ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr); 2140 PetscFunctionReturn(0); 2141 } 2142 2143 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 2144 { 2145 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2146 2147 PetscFunctionBegin; 2148 a->idiagvalid = PETSC_FALSE; 2149 PetscFunctionReturn(0); 2150 } 2151 2152 PetscErrorCode MatConjugate_SeqSELL(Mat A) 2153 { 2154 #if defined(PETSC_USE_COMPLEX) 2155 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2156 PetscInt i; 2157 PetscScalar *val = a->val; 2158 2159 PetscFunctionBegin; 2160 for (i=0; i<a->sliidx[a->totalslices]; i++) { 2161 val[i] = PetscConj(val[i]); 2162 } 2163 #else 2164 PetscFunctionBegin; 2165 #endif 2166 PetscFunctionReturn(0); 2167 } 2168