1 2 /* 3 Defines the basic matrix operations for the SELL matrix storage format. 4 */ 5 #include <../src/mat/impls/sell/seq/sell.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blocktranspose.h> 8 #if defined(PETSC_HAVE_IMMINTRIN_H) 9 #include <immintrin.h> 10 11 #if !defined(_MM_SCALE_8) 12 #define _MM_SCALE_8 8 13 #endif 14 15 #if defined(__AVX512F__) 16 /* these do not work 17 vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx); 18 vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval); 19 */ 20 #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \ 21 /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \ 22 vec_idx = _mm256_load_si256((__m256i const*)acolidx); \ 23 vec_vals = _mm512_load_pd(aval); \ 24 vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \ 25 vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y); 26 27 #endif 28 #endif /* PETSC_HAVE_IMMINTRIN_H */ 29 30 /*@C 31 MatSeqSELLSetPreallocation - For good matrix assembly performance 32 the user should preallocate the matrix storage by setting the parameter nz 33 (or the array nnz). By setting these parameters accurately, performance 34 during matrix assembly can be increased significantly. 35 36 Collective on MPI_Comm 37 38 Input Parameters: 39 + B - The matrix 40 . nz - number of nonzeros per row (same for all rows) 41 - nnz - array containing the number of nonzeros in the various rows 42 (possibly different for each row) or NULL 43 44 Notes: 45 If nnz is given then nz is ignored. 46 47 Specify the preallocated storage with either nz or nnz (not both). 48 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 49 allocation. For large problems you MUST preallocate memory or you 50 will get TERRIBLE performance, see the users' manual chapter on matrices. 51 52 You can call MatGetInfo() to get information on how effective the preallocation was; 53 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 54 You can also run with the option -info and look for messages with the string 55 malloc in them to see if additional memory allocation was needed. 56 57 Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix 58 entries or columns indices. 59 60 The maximum number of nonzeos in any row should be as accuate as possible. 61 If it is underesitmated, you will get bad performance due to reallocation 62 (MatSeqXSELLReallocateSELL). 63 64 Level: intermediate 65 66 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo() 67 68 @*/ 69 PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[]) 70 { 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 75 PetscValidType(B,1); 76 ierr = PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[]) 81 { 82 Mat_SeqSELL *b; 83 PetscInt i,j,totalslices; 84 PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE; 89 if (maxallocrow == MAT_SKIP_ALLOCATION) { 90 skipallocation = PETSC_TRUE; 91 maxallocrow = 0; 92 } 93 94 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 95 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 96 97 /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */ 98 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5; 99 if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow); 100 if (rlen) { 101 for (i=0; i<B->rmap->n; i++) { 102 if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]); 103 if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n); 104 } 105 } 106 107 B->preallocated = PETSC_TRUE; 108 109 b = (Mat_SeqSELL*)B->data; 110 111 totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */ 112 b->totalslices = totalslices; 113 if (!skipallocation) { 114 if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n); 115 116 if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */ 117 ierr = PetscMalloc1(totalslices+1,&b->sliidx);CHKERRQ(ierr); 118 ierr = PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 119 } 120 if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */ 121 if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10; 122 else if (maxallocrow < 0) maxallocrow = 1; 123 for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow; 124 } else { 125 maxallocrow = 0; 126 b->sliidx[0] = 0; 127 for (i=1; i<totalslices; i++) { 128 b->sliidx[i] = 0; 129 for (j=0;j<8;j++) { 130 b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]); 131 } 132 maxallocrow = PetscMax(b->sliidx[i],maxallocrow); 133 b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i]; 134 } 135 /* last slice */ 136 b->sliidx[totalslices] = 0; 137 for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]); 138 maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow); 139 b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices]; 140 } 141 142 /* allocate space for val, colidx, rlen */ 143 /* FIXME: should B's old memory be unlogged? */ 144 ierr = MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);CHKERRQ(ierr); 145 /* FIXME: assuming an element of the bit array takes 8 bits */ 146 ierr = PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);CHKERRQ(ierr); 147 ierr = PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 148 /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */ 149 ierr = PetscCalloc1(8*totalslices,&b->rlen);CHKERRQ(ierr); 150 ierr = PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));CHKERRQ(ierr); 151 152 b->singlemalloc = PETSC_TRUE; 153 b->free_val = PETSC_TRUE; 154 b->free_colidx = PETSC_TRUE; 155 } else { 156 b->free_val = PETSC_FALSE; 157 b->free_colidx = PETSC_FALSE; 158 } 159 160 b->nz = 0; 161 b->maxallocrow = maxallocrow; 162 b->rlenmax = maxallocrow; 163 b->maxallocmat = b->sliidx[totalslices]; 164 B->info.nz_unneeded = (double)b->maxallocmat; 165 if (realalloc) { 166 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 172 { 173 Mat B; 174 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 175 PetscInt i,j,row; 176 PetscBool isnonzero; 177 PetscErrorCode ierr; 178 179 PetscFunctionBegin; 180 if (reuse == MAT_REUSE_MATRIX) { 181 B = *newmat; 182 } else { 183 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 184 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 185 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 186 ierr = MatSeqAIJSetPreallocation(B,0,a->rlen);CHKERRQ(ierr); 187 ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 188 } 189 190 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 191 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 192 isnonzero = (PetscBool)((j-a->sliidx[i])/8 < a->rlen[8*i+row]); 193 if (isnonzero) { 194 ierr = MatSetValue(B,8*i+row,a->colidx[j],a->val[j],INSERT_VALUES);CHKERRQ(ierr); 195 } 196 } 197 } 198 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 199 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 200 B->rmap->bs = A->rmap->bs; 201 202 if (reuse == MAT_INPLACE_MATRIX) { 203 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 204 } else { 205 *newmat = B; 206 } 207 PetscFunctionReturn(0); 208 } 209 210 #include <../src/mat/impls/aij/seq/aij.h> 211 212 PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 213 { 214 Mat B; 215 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 216 PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols; 217 const PetscInt *cols; 218 const PetscScalar *vals; 219 PetscErrorCode ierr; 220 221 PetscFunctionBegin; 222 if (A->rmap->bs > 1) { 223 ierr = MatConvert_Basic(A,newtype,reuse,newmat);CHKERRQ(ierr); 224 PetscFunctionReturn(0); 225 } 226 227 if (reuse == MAT_REUSE_MATRIX) { 228 B = *newmat; 229 } else { 230 /* Can we just use ilen? */ 231 ierr = PetscMalloc1(m,&rowlengths);CHKERRQ(ierr); 232 for (i=0; i<m; i++) { 233 rowlengths[i] = ai[i+1] - ai[i]; 234 } 235 236 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 237 ierr = MatSetSizes(B,m,n,m,n);CHKERRQ(ierr); 238 ierr = MatSetType(B,MATSEQSELL);CHKERRQ(ierr); 239 ierr = MatSeqSELLSetPreallocation(B,0,rowlengths);CHKERRQ(ierr); 240 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 241 } 242 243 ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 244 245 for (row=0; row<m; row++) { 246 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 247 ierr = MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 248 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr); 249 } 250 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 251 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 252 B->rmap->bs = A->rmap->bs; 253 254 if (reuse == MAT_INPLACE_MATRIX) { 255 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 256 } else { 257 *newmat = B; 258 } 259 PetscFunctionReturn(0); 260 } 261 262 PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy) 263 { 264 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 265 PetscScalar *y; 266 const PetscScalar *x; 267 const MatScalar *aval=a->val; 268 PetscInt totalslices=a->totalslices; 269 const PetscInt *acolidx=a->colidx; 270 PetscInt i,j; 271 PetscErrorCode ierr; 272 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 273 __m512d vec_x,vec_y,vec_vals; 274 __m256i vec_idx; 275 __mmask8 mask; 276 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 277 __m256i vec_idx2,vec_idx3,vec_idx4; 278 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 279 __m128d vec_x_tmp; 280 __m256d vec_x,vec_y,vec_y2,vec_vals; 281 MatScalar yval; 282 PetscInt r,rows_left,row,nnz_in_row; 283 #else 284 PetscScalar sum[8]; 285 #endif 286 287 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 288 #pragma disjoint(*x,*y,*aval) 289 #endif 290 291 PetscFunctionBegin; 292 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 293 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 294 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 295 for (i=0; i<totalslices; i++) { /* loop over slices */ 296 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 297 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 298 299 vec_y = _mm512_setzero_pd(); 300 vec_y2 = _mm512_setzero_pd(); 301 vec_y3 = _mm512_setzero_pd(); 302 vec_y4 = _mm512_setzero_pd(); 303 304 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 305 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 306 case 3: 307 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 308 acolidx += 8; aval += 8; 309 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 310 acolidx += 8; aval += 8; 311 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 312 acolidx += 8; aval += 8; 313 j += 3; 314 break; 315 case 2: 316 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 317 acolidx += 8; aval += 8; 318 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 319 acolidx += 8; aval += 8; 320 j += 2; 321 break; 322 case 1: 323 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 324 acolidx += 8; aval += 8; 325 j += 1; 326 break; 327 } 328 #pragma novector 329 for (; j<(a->sliidx[i+1]>>3); j+=4) { 330 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 331 acolidx += 8; aval += 8; 332 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 333 acolidx += 8; aval += 8; 334 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 335 acolidx += 8; aval += 8; 336 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 337 acolidx += 8; aval += 8; 338 } 339 340 vec_y = _mm512_add_pd(vec_y,vec_y2); 341 vec_y = _mm512_add_pd(vec_y,vec_y3); 342 vec_y = _mm512_add_pd(vec_y,vec_y4); 343 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 344 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 345 _mm512_mask_store_pd(&y[8*i],mask,vec_y); 346 } else { 347 _mm512_store_pd(&y[8*i],vec_y); 348 } 349 } 350 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 351 for (i=0; i<totalslices; i++) { /* loop over full slices */ 352 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 353 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 354 355 vec_y = _mm256_setzero_pd(); 356 vec_y2 = _mm256_setzero_pd(); 357 358 /* last slice may have padding rows. Don't use vectorization. */ 359 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 360 rows_left = A->rmap->n - 8*i; 361 for (r=0; r<rows_left; ++r) { 362 yval = (MatScalar)0; 363 row = 8*i + r; 364 nnz_in_row = a->rlen[row]; 365 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]]; 366 y[row] = yval; 367 } 368 break; 369 } 370 371 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 372 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 373 vec_vals = _mm256_loadu_pd(aval); 374 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 375 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 376 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 377 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 378 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 379 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 380 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 381 aval += 4; 382 383 vec_vals = _mm256_loadu_pd(aval); 384 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 385 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 386 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 387 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 388 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 389 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 390 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 391 aval += 4; 392 } 393 394 _mm256_storeu_pd(y + i*8, vec_y); 395 _mm256_storeu_pd(y + i*8 + 4, vec_y2); 396 } 397 #else 398 for (i=0; i<totalslices; i++) { /* loop over slices */ 399 for (j=0; j<8; j++) sum[j] = 0.0; 400 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 401 sum[0] += aval[j] * x[acolidx[j]]; 402 sum[1] += aval[j+1] * x[acolidx[j+1]]; 403 sum[2] += aval[j+2] * x[acolidx[j+2]]; 404 sum[3] += aval[j+3] * x[acolidx[j+3]]; 405 sum[4] += aval[j+4] * x[acolidx[j+4]]; 406 sum[5] += aval[j+5] * x[acolidx[j+5]]; 407 sum[6] += aval[j+6] * x[acolidx[j+6]]; 408 sum[7] += aval[j+7] * x[acolidx[j+7]]; 409 } 410 if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */ 411 for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j]; 412 } else { 413 for(j=0; j<8; j++) y[8*i+j] = sum[j]; 414 } 415 } 416 #endif 417 418 ierr = PetscLogFlops(2.0*a->nz-a->nonzerorowcnt);CHKERRQ(ierr); /* theoretical minimal FLOPs */ 419 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 420 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h> 425 PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz) 426 { 427 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 428 PetscScalar *y,*z; 429 const PetscScalar *x; 430 const MatScalar *aval=a->val; 431 PetscInt totalslices=a->totalslices; 432 const PetscInt *acolidx=a->colidx; 433 PetscInt i,j; 434 PetscErrorCode ierr; 435 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 436 __m512d vec_x,vec_y,vec_vals; 437 __m256i vec_idx; 438 __mmask8 mask; 439 __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4; 440 __m256i vec_idx2,vec_idx3,vec_idx4; 441 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 442 __m128d vec_x_tmp; 443 __m256d vec_x,vec_y,vec_y2,vec_vals; 444 MatScalar yval; 445 PetscInt r,row,nnz_in_row; 446 #else 447 PetscScalar sum[8]; 448 #endif 449 450 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 451 #pragma disjoint(*x,*y,*aval) 452 #endif 453 454 PetscFunctionBegin; 455 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 456 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 457 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 458 for (i=0; i<totalslices; i++) { /* loop over slices */ 459 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 460 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 461 462 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 463 mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07))); 464 vec_y = _mm512_mask_load_pd(vec_y,mask,&y[8*i]); 465 } else { 466 vec_y = _mm512_load_pd(&y[8*i]); 467 } 468 vec_y2 = _mm512_setzero_pd(); 469 vec_y3 = _mm512_setzero_pd(); 470 vec_y4 = _mm512_setzero_pd(); 471 472 j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */ 473 switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) { 474 case 3: 475 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 476 acolidx += 8; aval += 8; 477 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 478 acolidx += 8; aval += 8; 479 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 480 acolidx += 8; aval += 8; 481 j += 3; 482 break; 483 case 2: 484 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 485 acolidx += 8; aval += 8; 486 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 487 acolidx += 8; aval += 8; 488 j += 2; 489 break; 490 case 1: 491 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 492 acolidx += 8; aval += 8; 493 j += 1; 494 break; 495 } 496 #pragma novector 497 for (; j<(a->sliidx[i+1]>>3); j+=4) { 498 AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y); 499 acolidx += 8; aval += 8; 500 AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2); 501 acolidx += 8; aval += 8; 502 AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3); 503 acolidx += 8; aval += 8; 504 AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4); 505 acolidx += 8; aval += 8; 506 } 507 508 vec_y = _mm512_add_pd(vec_y,vec_y2); 509 vec_y = _mm512_add_pd(vec_y,vec_y3); 510 vec_y = _mm512_add_pd(vec_y,vec_y4); 511 if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */ 512 _mm512_mask_store_pd(&z[8*i],mask,vec_y); 513 } else { 514 _mm512_store_pd(&z[8*i],vec_y); 515 } 516 } 517 #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) 518 for (i=0; i<totalslices; i++) { /* loop over full slices */ 519 PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 520 PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0); 521 522 /* last slice may have padding rows. Don't use vectorization. */ 523 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 524 for (r=0; r<(A->rmap->n & 0x07); ++r) { 525 row = 8*i + r; 526 yval = (MatScalar)0.0; 527 nnz_in_row = a->rlen[row]; 528 for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]]; 529 z[row] = y[row] + yval; 530 } 531 break; 532 } 533 534 vec_y = _mm256_loadu_pd(y+8*i); 535 vec_y2 = _mm256_loadu_pd(y+8*i+4); 536 537 /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */ 538 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 539 vec_vals = _mm256_loadu_pd(aval); 540 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 541 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 542 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 543 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 544 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 545 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 546 vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y); 547 aval += 4; 548 549 vec_vals = _mm256_loadu_pd(aval); 550 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 551 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 552 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0); 553 vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++); 554 vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++); 555 vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1); 556 vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2); 557 aval += 4; 558 } 559 560 _mm256_storeu_pd(z+i*8,vec_y); 561 _mm256_storeu_pd(z+i*8+4,vec_y2); 562 } 563 #else 564 for (i=0; i<totalslices; i++) { /* loop over slices */ 565 for (j=0; j<8; j++) sum[j] = 0.0; 566 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 567 sum[0] += aval[j] * x[acolidx[j]]; 568 sum[1] += aval[j+1] * x[acolidx[j+1]]; 569 sum[2] += aval[j+2] * x[acolidx[j+2]]; 570 sum[3] += aval[j+3] * x[acolidx[j+3]]; 571 sum[4] += aval[j+4] * x[acolidx[j+4]]; 572 sum[5] += aval[j+5] * x[acolidx[j+5]]; 573 sum[6] += aval[j+6] * x[acolidx[j+6]]; 574 sum[7] += aval[j+7] * x[acolidx[j+7]]; 575 } 576 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 577 for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j]; 578 } else { 579 for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j]; 580 } 581 } 582 #endif 583 584 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 585 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 586 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 590 PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy) 591 { 592 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 593 PetscScalar *y; 594 const PetscScalar *x; 595 const MatScalar *aval=a->val; 596 const PetscInt *acolidx=a->colidx; 597 PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices; 598 PetscErrorCode ierr; 599 600 #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 601 #pragma disjoint(*x,*y,*aval) 602 #endif 603 604 PetscFunctionBegin; 605 if (zz != yy) { ierr = VecCopy(zz,yy);CHKERRQ(ierr); } 606 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 607 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 608 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 609 if (i == totalslices-1 && (A->rmap->n & 0x07)) { 610 for (r=0; r<(A->rmap->n & 0x07); ++r) { 611 row = 8*i + r; 612 nnz_in_row = a->rlen[row]; 613 for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row]; 614 } 615 break; 616 } 617 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) { 618 y[acolidx[j]] += aval[j] * x[8*i]; 619 y[acolidx[j+1]] += aval[j+1] * x[8*i+1]; 620 y[acolidx[j+2]] += aval[j+2] * x[8*i+2]; 621 y[acolidx[j+3]] += aval[j+3] * x[8*i+3]; 622 y[acolidx[j+4]] += aval[j+4] * x[8*i+4]; 623 y[acolidx[j+5]] += aval[j+5] * x[8*i+5]; 624 y[acolidx[j+6]] += aval[j+6] * x[8*i+6]; 625 y[acolidx[j+7]] += aval[j+7] * x[8*i+7]; 626 } 627 } 628 ierr = PetscLogFlops(2.0*a->sliidx[a->totalslices]);CHKERRQ(ierr); 629 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 630 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 631 PetscFunctionReturn(0); 632 } 633 634 PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy) 635 { 636 PetscErrorCode ierr; 637 638 PetscFunctionBegin; 639 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 640 ierr = MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } 643 644 /* 645 Checks for missing diagonals 646 */ 647 PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d) 648 { 649 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 650 PetscInt *diag,i; 651 652 PetscFunctionBegin; 653 *missing = PETSC_FALSE; 654 if (A->rmap->n > 0 && !(a->colidx)) { 655 *missing = PETSC_TRUE; 656 if (d) *d = 0; 657 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 658 } else { 659 diag = a->diag; 660 for (i=0; i<A->rmap->n; i++) { 661 if (diag[i] == -1) { 662 *missing = PETSC_TRUE; 663 if (d) *d = i; 664 PetscInfo1(A,"Matrix is missing diagonal number %D\n",i); 665 break; 666 } 667 } 668 } 669 PetscFunctionReturn(0); 670 } 671 672 PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A) 673 { 674 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 675 PetscInt i,j,m=A->rmap->n,shift; 676 PetscErrorCode ierr; 677 678 PetscFunctionBegin; 679 if (!a->diag) { 680 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 681 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 682 a->free_diag = PETSC_TRUE; 683 } 684 for (i=0; i<m; i++) { /* loop over rows */ 685 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 686 a->diag[i] = -1; 687 for (j=0; j<a->rlen[i]; j++) { 688 if (a->colidx[shift+j*8] == i) { 689 a->diag[i] = shift+j*8; 690 break; 691 } 692 } 693 } 694 PetscFunctionReturn(0); 695 } 696 697 /* 698 Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways 699 */ 700 PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift) 701 { 702 Mat_SeqSELL *a=(Mat_SeqSELL*) A->data; 703 PetscInt i,*diag,m = A->rmap->n; 704 MatScalar *val = a->val; 705 PetscScalar *idiag,*mdiag; 706 PetscErrorCode ierr; 707 708 PetscFunctionBegin; 709 if (a->idiagvalid) PetscFunctionReturn(0); 710 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 711 diag = a->diag; 712 if (!a->idiag) { 713 ierr = PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);CHKERRQ(ierr); 714 ierr = PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));CHKERRQ(ierr); 715 val = a->val; 716 } 717 mdiag = a->mdiag; 718 idiag = a->idiag; 719 720 if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) { 721 for (i=0; i<m; i++) { 722 mdiag[i] = val[diag[i]]; 723 if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */ 724 if (PetscRealPart(fshift)) { 725 ierr = PetscInfo1(A,"Zero diagonal on row %D\n",i);CHKERRQ(ierr); 726 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 727 A->factorerror_zeropivot_value = 0.0; 728 A->factorerror_zeropivot_row = i; 729 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i); 730 } 731 idiag[i] = 1.0/val[diag[i]]; 732 } 733 ierr = PetscLogFlops(m);CHKERRQ(ierr); 734 } else { 735 for (i=0; i<m; i++) { 736 mdiag[i] = val[diag[i]]; 737 idiag[i] = omega/(fshift + val[diag[i]]); 738 } 739 ierr = PetscLogFlops(2.0*m);CHKERRQ(ierr); 740 } 741 a->idiagvalid = PETSC_TRUE; 742 PetscFunctionReturn(0); 743 } 744 745 PetscErrorCode MatZeroEntries_SeqSELL(Mat A) 746 { 747 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 748 PetscErrorCode ierr; 749 750 PetscFunctionBegin; 751 ierr = PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));CHKERRQ(ierr); 752 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 753 PetscFunctionReturn(0); 754 } 755 756 PetscErrorCode MatDestroy_SeqSELL(Mat A) 757 { 758 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 #if defined(PETSC_USE_LOG) 763 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz); 764 #endif 765 ierr = MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);CHKERRQ(ierr); 766 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 767 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 768 ierr = PetscFree(a->diag);CHKERRQ(ierr); 769 ierr = PetscFree(a->ibdiag);CHKERRQ(ierr); 770 ierr = PetscFree(a->rlen);CHKERRQ(ierr); 771 ierr = PetscFree(a->sliidx);CHKERRQ(ierr); 772 ierr = PetscFree3(a->idiag,a->mdiag,a->ssor_work);CHKERRQ(ierr); 773 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 774 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 775 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 776 777 ierr = PetscFree(A->data);CHKERRQ(ierr); 778 779 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 780 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 781 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 782 #if defined(PETSC_HAVE_ELEMENTAL) 783 #endif 784 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);CHKERRQ(ierr); 785 PetscFunctionReturn(0); 786 } 787 788 PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg) 789 { 790 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 791 PetscErrorCode ierr; 792 793 PetscFunctionBegin; 794 switch (op) { 795 case MAT_ROW_ORIENTED: 796 a->roworiented = flg; 797 break; 798 case MAT_KEEP_NONZERO_PATTERN: 799 a->keepnonzeropattern = flg; 800 break; 801 case MAT_NEW_NONZERO_LOCATIONS: 802 a->nonew = (flg ? 0 : 1); 803 break; 804 case MAT_NEW_NONZERO_LOCATION_ERR: 805 a->nonew = (flg ? -1 : 0); 806 break; 807 case MAT_NEW_NONZERO_ALLOCATION_ERR: 808 a->nonew = (flg ? -2 : 0); 809 break; 810 case MAT_UNUSED_NONZERO_LOCATION_ERR: 811 a->nounused = (flg ? -1 : 0); 812 break; 813 case MAT_NEW_DIAGONALS: 814 case MAT_IGNORE_OFF_PROC_ENTRIES: 815 case MAT_USE_HASH_TABLE: 816 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 817 break; 818 case MAT_SPD: 819 case MAT_SYMMETRIC: 820 case MAT_STRUCTURALLY_SYMMETRIC: 821 case MAT_HERMITIAN: 822 case MAT_SYMMETRY_ETERNAL: 823 /* These options are handled directly by MatSetOption() */ 824 break; 825 default: 826 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 827 } 828 PetscFunctionReturn(0); 829 } 830 831 PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v) 832 { 833 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 834 PetscInt i,j,n,shift; 835 PetscScalar *x,zero=0.0; 836 PetscErrorCode ierr; 837 838 PetscFunctionBegin; 839 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 840 if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 841 842 if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) { 843 PetscInt *diag=a->diag; 844 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 845 for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]]; 846 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 847 PetscFunctionReturn(0); 848 } 849 850 ierr = VecSet(v,zero);CHKERRQ(ierr); 851 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 852 for (i=0; i<n; i++) { /* loop over rows */ 853 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 854 x[i] = 0; 855 for (j=0; j<a->rlen[i]; j++) { 856 if (a->colidx[shift+j*8] == i) { 857 x[i] = a->val[shift+j*8]; 858 break; 859 } 860 } 861 } 862 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr) 867 { 868 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 869 const PetscScalar *l,*r; 870 PetscInt i,j,m,n,row; 871 PetscErrorCode ierr; 872 873 PetscFunctionBegin; 874 if (ll) { 875 /* The local size is used so that VecMPI can be passed to this routine 876 by MatDiagonalScale_MPISELL */ 877 ierr = VecGetLocalSize(ll,&m);CHKERRQ(ierr); 878 if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length"); 879 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 880 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 881 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 882 a->val[j] *= l[8*i+row]; 883 } 884 } 885 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 886 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 887 } 888 if (rr) { 889 ierr = VecGetLocalSize(rr,&n);CHKERRQ(ierr); 890 if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length"); 891 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr); 892 for (i=0; i<a->totalslices; i++) { /* loop over slices */ 893 for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) { 894 a->val[j] *= r[a->colidx[j]]; 895 } 896 } 897 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr); 898 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 899 } 900 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 905 906 PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 907 { 908 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 909 PetscInt *cp,i,k,low,high,t,row,col,l; 910 PetscInt shift; 911 MatScalar *vp; 912 913 PetscFunctionBegin; 914 for (k=0; k<m; k++) { /* loop over added rows */ 915 row = im[k]; 916 if (row<0) continue; 917 #if defined(PETSC_USE_DEBUG) 918 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 919 #endif 920 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 921 cp = a->colidx+shift; /* pointer to the row */ 922 vp = a->val+shift; /* pointer to the row */ 923 for (l=0; l<n; l++) { /* loop over added rows */ 924 col = in[l]; 925 if (col<0) continue; 926 #if defined(PETSC_USE_DEBUG) 927 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1); 928 #endif 929 high = a->rlen[row]; low = 0; /* assume unsorted */ 930 while (high-low > 5) { 931 t = (low+high)/2; 932 if (*(cp+t*8) > col) high = t; 933 else low = t; 934 } 935 for (i=low; i<high; i++) { 936 if (*(cp+8*i) > col) break; 937 if (*(cp+8*i) == col) { 938 *v++ = *(vp+8*i); 939 goto finished; 940 } 941 } 942 *v++ = 0.0; 943 finished:; 944 } 945 } 946 PetscFunctionReturn(0); 947 } 948 949 PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer) 950 { 951 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 952 PetscInt i,j,m=A->rmap->n,shift; 953 const char *name; 954 PetscViewerFormat format; 955 PetscErrorCode ierr; 956 957 PetscFunctionBegin; 958 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 959 if (format == PETSC_VIEWER_ASCII_MATLAB) { 960 PetscInt nofinalvalue = 0; 961 /* 962 if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) { 963 nofinalvalue = 1; 964 } 965 */ 966 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 967 ierr = PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);CHKERRQ(ierr); 968 ierr = PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);CHKERRQ(ierr); 969 #if defined(PETSC_USE_COMPLEX) 970 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 971 #else 972 ierr = PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);CHKERRQ(ierr); 973 #endif 974 ierr = PetscViewerASCIIPrintf(viewer,"zzz = [\n");CHKERRQ(ierr); 975 976 for (i=0; i<m; i++) { 977 shift = a->sliidx[i>>3]+(i&0x07); 978 for (j=0; j<a->rlen[i]; j++) { 979 #if defined(PETSC_USE_COMPLEX) 980 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 981 #else 982 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);CHKERRQ(ierr); 983 #endif 984 } 985 } 986 /* 987 if (nofinalvalue) { 988 #if defined(PETSC_USE_COMPLEX) 989 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);CHKERRQ(ierr); 990 #else 991 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);CHKERRQ(ierr); 992 #endif 993 } 994 */ 995 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 996 ierr = PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);CHKERRQ(ierr); 997 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 998 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) { 999 PetscFunctionReturn(0); 1000 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1001 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1002 for (i=0; i<m; i++) { 1003 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1004 shift = a->sliidx[i>>3]+(i&0x07); 1005 for (j=0; j<a->rlen[i]; j++) { 1006 #if defined(PETSC_USE_COMPLEX) 1007 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1008 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1009 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) { 1010 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1011 } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) { 1012 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1013 } 1014 #else 1015 if (a->val[shift+8*j] != 0.0) {ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr);} 1016 #endif 1017 } 1018 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1019 } 1020 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1021 } else if (format == PETSC_VIEWER_ASCII_DENSE) { 1022 PetscInt cnt=0,jcnt; 1023 PetscScalar value; 1024 #if defined(PETSC_USE_COMPLEX) 1025 PetscBool realonly=PETSC_TRUE; 1026 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1027 if (PetscImaginaryPart(a->val[i]) != 0.0) { 1028 realonly = PETSC_FALSE; 1029 break; 1030 } 1031 } 1032 #endif 1033 1034 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1035 for (i=0; i<m; i++) { 1036 jcnt = 0; 1037 shift = a->sliidx[i>>3]+(i&0x07); 1038 for (j=0; j<A->cmap->n; j++) { 1039 if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) { 1040 value = a->val[cnt++]; 1041 jcnt++; 1042 } else { 1043 value = 0.0; 1044 } 1045 #if defined(PETSC_USE_COMPLEX) 1046 if (realonly) { 1047 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));CHKERRQ(ierr); 1048 } else { 1049 ierr = PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));CHKERRQ(ierr); 1050 } 1051 #else 1052 ierr = PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);CHKERRQ(ierr); 1053 #endif 1054 } 1055 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1056 } 1057 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1058 } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) { 1059 PetscInt fshift=1; 1060 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1061 #if defined(PETSC_USE_COMPLEX) 1062 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");CHKERRQ(ierr); 1063 #else 1064 ierr = PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");CHKERRQ(ierr); 1065 #endif 1066 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);CHKERRQ(ierr); 1067 for (i=0; i<m; i++) { 1068 shift = a->sliidx[i>>3]+(i&0x07); 1069 for (j=0; j<a->rlen[i]; j++) { 1070 #if defined(PETSC_USE_COMPLEX) 1071 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1072 #else 1073 ierr = PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);CHKERRQ(ierr); 1074 #endif 1075 } 1076 } 1077 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1078 } else { 1079 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1080 if (A->factortype) { 1081 for (i=0; i<m; i++) { 1082 shift = a->sliidx[i>>3]+(i&0x07); 1083 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1084 /* L part */ 1085 for (j=shift; j<a->diag[i]; j+=8) { 1086 #if defined(PETSC_USE_COMPLEX) 1087 if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) { 1088 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1089 } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) { 1090 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1091 } else { 1092 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1093 } 1094 #else 1095 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1096 #endif 1097 } 1098 /* diagonal */ 1099 j = a->diag[i]; 1100 #if defined(PETSC_USE_COMPLEX) 1101 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1102 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));CHKERRQ(ierr); 1103 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1104 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));CHKERRQ(ierr); 1105 } else { 1106 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));CHKERRQ(ierr); 1107 } 1108 #else 1109 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));CHKERRQ(ierr); 1110 #endif 1111 1112 /* U part */ 1113 for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) { 1114 #if defined(PETSC_USE_COMPLEX) 1115 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1116 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));CHKERRQ(ierr); 1117 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1118 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));CHKERRQ(ierr); 1119 } else { 1120 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));CHKERRQ(ierr); 1121 } 1122 #else 1123 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);CHKERRQ(ierr); 1124 #endif 1125 } 1126 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1127 } 1128 } else { 1129 for (i=0; i<m; i++) { 1130 shift = a->sliidx[i>>3]+(i&0x07); 1131 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i);CHKERRQ(ierr); 1132 for (j=0; j<a->rlen[i]; j++) { 1133 #if defined(PETSC_USE_COMPLEX) 1134 if (PetscImaginaryPart(a->val[j]) > 0.0) { 1135 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1136 } else if (PetscImaginaryPart(a->val[j]) < 0.0) { 1137 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));CHKERRQ(ierr); 1138 } else { 1139 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));CHKERRQ(ierr); 1140 } 1141 #else 1142 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);CHKERRQ(ierr); 1143 #endif 1144 } 1145 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1146 } 1147 } 1148 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1149 } 1150 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #include <petscdraw.h> 1155 PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa) 1156 { 1157 Mat A=(Mat)Aa; 1158 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1159 PetscInt i,j,m=A->rmap->n,shift; 1160 int color; 1161 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1162 PetscViewer viewer; 1163 PetscViewerFormat format; 1164 PetscErrorCode ierr; 1165 1166 PetscFunctionBegin; 1167 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1168 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1169 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1170 1171 /* loop over matrix elements drawing boxes */ 1172 1173 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1174 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1175 /* Blue for negative, Cyan for zero and Red for positive */ 1176 color = PETSC_DRAW_BLUE; 1177 for (i=0; i<m; i++) { 1178 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1179 y_l = m - i - 1.0; y_r = y_l + 1.0; 1180 for (j=0; j<a->rlen[i]; j++) { 1181 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1182 if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue; 1183 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1184 } 1185 } 1186 color = PETSC_DRAW_CYAN; 1187 for (i=0; i<m; i++) { 1188 shift = a->sliidx[i>>3]+(i&0x07); 1189 y_l = m - i - 1.0; y_r = y_l + 1.0; 1190 for (j=0; j<a->rlen[i]; j++) { 1191 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1192 if (a->val[shift+8*j] != 0.) continue; 1193 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1194 } 1195 } 1196 color = PETSC_DRAW_RED; 1197 for (i=0; i<m; i++) { 1198 shift = a->sliidx[i>>3]+(i&0x07); 1199 y_l = m - i - 1.0; y_r = y_l + 1.0; 1200 for (j=0; j<a->rlen[i]; j++) { 1201 x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0; 1202 if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue; 1203 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1204 } 1205 } 1206 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1207 } else { 1208 /* use contour shading to indicate magnitude of values */ 1209 /* first determine max of all nonzero values */ 1210 PetscReal minv=0.0,maxv=0.0; 1211 PetscInt count=0; 1212 PetscDraw popup; 1213 for (i=0; i<a->sliidx[a->totalslices]; i++) { 1214 if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]); 1215 } 1216 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1217 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1218 ierr = PetscDrawScalePopup(popup,minv,maxv);CHKERRQ(ierr); 1219 1220 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1221 for (i=0; i<m; i++) { 1222 shift = a->sliidx[i>>3]+(i&0x07); 1223 y_l = m - i - 1.0; 1224 y_r = y_l + 1.0; 1225 for (j=0; j<a->rlen[i]; j++) { 1226 x_l = a->colidx[shift+j*8]; 1227 x_r = x_l + 1.0; 1228 color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv); 1229 ierr = PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);CHKERRQ(ierr); 1230 count++; 1231 } 1232 } 1233 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1234 } 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #include <petscdraw.h> 1239 PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer) 1240 { 1241 PetscDraw draw; 1242 PetscReal xr,yr,xl,yl,h,w; 1243 PetscBool isnull; 1244 PetscErrorCode ierr; 1245 1246 PetscFunctionBegin; 1247 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1248 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1249 if (isnull) PetscFunctionReturn(0); 1250 1251 xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0; 1252 xr += w; yr += h; xl = -w; yl = -h; 1253 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1254 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1255 ierr = PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);CHKERRQ(ierr); 1256 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1257 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer) 1262 { 1263 PetscBool iascii,isbinary,isdraw; 1264 PetscErrorCode ierr; 1265 1266 PetscFunctionBegin; 1267 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1268 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1269 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1270 if (iascii) { 1271 ierr = MatView_SeqSELL_ASCII(A,viewer);CHKERRQ(ierr); 1272 } else if (isbinary) { 1273 /* ierr = MatView_SeqSELL_Binary(A,viewer);CHKERRQ(ierr); */ 1274 } else if (isdraw) { 1275 ierr = MatView_SeqSELL_Draw(A,viewer);CHKERRQ(ierr); 1276 } 1277 PetscFunctionReturn(0); 1278 } 1279 1280 PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode) 1281 { 1282 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1283 PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k; 1284 MatScalar *vp; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1289 /* To do: compress out the unused elements */ 1290 ierr = MatMarkDiagonal_SeqSELL(A);CHKERRQ(ierr); 1291 ierr = PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);CHKERRQ(ierr); 1292 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);CHKERRQ(ierr); 1293 ierr = PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);CHKERRQ(ierr); 1294 /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */ 1295 for (i=0; i<a->totalslices; ++i) { 1296 shift = a->sliidx[i]; /* starting index of the slice */ 1297 cp = a->colidx+shift; /* pointer to the column indices of the slice */ 1298 vp = a->val+shift; /* pointer to the nonzero values of the slice */ 1299 for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */ 1300 row = 8*i + row_in_slice; 1301 nrow = a->rlen[row]; /* number of nonzeros in row */ 1302 /* 1303 Search for the nearest nonzero. Normally setting the index to zero may cause extra communication. 1304 But if the entire slice are empty, it is fine to use 0 since the index will not be loaded. 1305 */ 1306 lastcol = 0; 1307 if (nrow>0) { /* nonempty row */ 1308 lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */ 1309 } else if (!row_in_slice) { /* first row of the currect slice is empty */ 1310 for (j=1;j<8;j++) { 1311 if (a->rlen[8*i+j]) { 1312 lastcol = cp[j]; 1313 break; 1314 } 1315 } 1316 } else { 1317 if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */ 1318 } 1319 1320 for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) { 1321 cp[8*k+row_in_slice] = lastcol; 1322 vp[8*k+row_in_slice] = (MatScalar)0; 1323 } 1324 } 1325 } 1326 1327 A->info.mallocs += a->reallocs; 1328 a->reallocs = 0; 1329 1330 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info) 1335 { 1336 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1337 1338 PetscFunctionBegin; 1339 info->block_size = 1.0; 1340 info->nz_allocated = (double)a->maxallocmat; 1341 info->nz_used = (double)a->sliidx[a->totalslices]; /* include padding zeros */ 1342 info->nz_unneeded = (double)(a->maxallocmat-a->sliidx[a->totalslices]); 1343 info->assemblies = (double)A->num_ass; 1344 info->mallocs = (double)A->info.mallocs; 1345 info->memory = ((PetscObject)A)->mem; 1346 if (A->factortype) { 1347 info->fill_ratio_given = A->info.fill_ratio_given; 1348 info->fill_ratio_needed = A->info.fill_ratio_needed; 1349 info->factor_mallocs = A->info.factor_mallocs; 1350 } else { 1351 info->fill_ratio_given = 0; 1352 info->fill_ratio_needed = 0; 1353 info->factor_mallocs = 0; 1354 } 1355 PetscFunctionReturn(0); 1356 } 1357 1358 PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1359 { 1360 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1361 PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow; 1362 PetscInt *cp,nonew=a->nonew,lastcol=-1; 1363 MatScalar *vp,value; 1364 PetscErrorCode ierr; 1365 1366 PetscFunctionBegin; 1367 for (k=0; k<m; k++) { /* loop over added rows */ 1368 row = im[k]; 1369 if (row < 0) continue; 1370 #if defined(PETSC_USE_DEBUG) 1371 if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1); 1372 #endif 1373 shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */ 1374 cp = a->colidx+shift; /* pointer to the row */ 1375 vp = a->val+shift; /* pointer to the row */ 1376 nrow = a->rlen[row]; 1377 low = 0; 1378 high = nrow; 1379 1380 for (l=0; l<n; l++) { /* loop over added columns */ 1381 col = in[l]; 1382 if (col<0) continue; 1383 #if defined(PETSC_USE_DEBUG) 1384 if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1); 1385 #endif 1386 if (a->roworiented) { 1387 value = v[l+k*n]; 1388 } else { 1389 value = v[k+l*m]; 1390 } 1391 if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue; 1392 1393 /* search in this row for the specified colmun, i indicates the column to be set */ 1394 if (col <= lastcol) low = 0; 1395 else high = nrow; 1396 lastcol = col; 1397 while (high-low > 5) { 1398 t = (low+high)/2; 1399 if (*(cp+t*8) > col) high = t; 1400 else low = t; 1401 } 1402 for (i=low; i<high; i++) { 1403 if (*(cp+i*8) > col) break; 1404 if (*(cp+i*8) == col) { 1405 if (is == ADD_VALUES) *(vp+i*8) += value; 1406 else *(vp+i*8) = value; 1407 low = i + 1; 1408 goto noinsert; 1409 } 1410 } 1411 if (value == 0.0 && a->ignorezeroentries) goto noinsert; 1412 if (nonew == 1) goto noinsert; 1413 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1414 /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */ 1415 MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar); 1416 /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */ 1417 for (ii=nrow-1; ii>=i; ii--) { 1418 *(cp+(ii+1)*8) = *(cp+ii*8); 1419 *(vp+(ii+1)*8) = *(vp+ii*8); 1420 } 1421 a->rlen[row]++; 1422 *(cp+i*8) = col; 1423 *(vp+i*8) = value; 1424 a->nz++; 1425 A->nonzerostate++; 1426 low = i+1; high++; nrow++; 1427 noinsert:; 1428 } 1429 a->rlen[row] = nrow; 1430 } 1431 PetscFunctionReturn(0); 1432 } 1433 1434 PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str) 1435 { 1436 PetscErrorCode ierr; 1437 1438 PetscFunctionBegin; 1439 /* If the two matrices have the same copy implementation, use fast copy. */ 1440 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1441 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1442 Mat_SeqSELL *b=(Mat_SeqSELL*)B->data; 1443 1444 if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1445 ierr = PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1446 } else { 1447 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1448 } 1449 PetscFunctionReturn(0); 1450 } 1451 1452 PetscErrorCode MatSetUp_SeqSELL(Mat A) 1453 { 1454 PetscErrorCode ierr; 1455 1456 PetscFunctionBegin; 1457 ierr = MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);CHKERRQ(ierr); 1458 PetscFunctionReturn(0); 1459 } 1460 1461 PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[]) 1462 { 1463 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1464 1465 PetscFunctionBegin; 1466 *array = a->val; 1467 PetscFunctionReturn(0); 1468 } 1469 1470 PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[]) 1471 { 1472 PetscFunctionBegin; 1473 PetscFunctionReturn(0); 1474 } 1475 1476 PetscErrorCode MatRealPart_SeqSELL(Mat A) 1477 { 1478 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1479 PetscInt i; 1480 MatScalar *aval=a->val; 1481 1482 PetscFunctionBegin; 1483 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]); 1484 PetscFunctionReturn(0); 1485 } 1486 1487 PetscErrorCode MatImaginaryPart_SeqSELL(Mat A) 1488 { 1489 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1490 PetscInt i; 1491 MatScalar *aval=a->val; 1492 PetscErrorCode ierr; 1493 1494 PetscFunctionBegin; 1495 for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]); 1496 ierr = MatSeqSELLInvalidateDiagonal(A);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha) 1501 { 1502 Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data; 1503 MatScalar *aval=a->val; 1504 PetscScalar oalpha=alpha; 1505 PetscBLASInt one=1,size; 1506 PetscErrorCode ierr; 1507 1508 PetscFunctionBegin; 1509 ierr = PetscBLASIntCast(a->sliidx[a->totalslices],&size);CHKERRQ(ierr); 1510 PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one)); 1511 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1512 ierr = MatSeqSELLInvalidateDiagonal(inA);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a) 1517 { 1518 Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data; 1519 PetscErrorCode ierr; 1520 1521 PetscFunctionBegin; 1522 if (!Y->preallocated || !y->nz) { 1523 ierr = MatSeqSELLSetPreallocation(Y,1,NULL);CHKERRQ(ierr); 1524 } 1525 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 1526 PetscFunctionReturn(0); 1527 } 1528 1529 PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1530 { 1531 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 1532 PetscScalar *x,sum,*t; 1533 const MatScalar *idiag=0,*mdiag; 1534 const PetscScalar *b,*xb; 1535 PetscInt n,m=A->rmap->n,i,j,shift; 1536 const PetscInt *diag; 1537 PetscErrorCode ierr; 1538 1539 PetscFunctionBegin; 1540 its = its*lits; 1541 1542 if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */ 1543 if (!a->idiagvalid) {ierr = MatInvertDiagonal_SeqSELL(A,omega,fshift);CHKERRQ(ierr);} 1544 a->fshift = fshift; 1545 a->omega = omega; 1546 1547 diag = a->diag; 1548 t = a->ssor_work; 1549 idiag = a->idiag; 1550 mdiag = a->mdiag; 1551 1552 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1553 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1554 /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */ 1555 if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented"); 1556 if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented"); 1557 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 1558 1559 if (flag & SOR_ZERO_INITIAL_GUESS) { 1560 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1561 for (i=0; i<m; i++) { 1562 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1563 sum = b[i]; 1564 n = (diag[i]-shift)/8; 1565 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1566 t[i] = sum; 1567 x[i] = sum*idiag[i]; 1568 } 1569 xb = t; 1570 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1571 } else xb = b; 1572 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1573 for (i=m-1; i>=0; i--) { 1574 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1575 sum = xb[i]; 1576 n = a->rlen[i]-(diag[i]-shift)/8-1; 1577 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1578 if (xb == b) { 1579 x[i] = sum*idiag[i]; 1580 } else { 1581 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1582 } 1583 } 1584 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1585 } 1586 its--; 1587 } 1588 while (its--) { 1589 if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) { 1590 for (i=0; i<m; i++) { 1591 /* lower */ 1592 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1593 sum = b[i]; 1594 n = (diag[i]-shift)/8; 1595 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1596 t[i] = sum; /* save application of the lower-triangular part */ 1597 /* upper */ 1598 n = a->rlen[i]-(diag[i]-shift)/8-1; 1599 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1600 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1601 } 1602 xb = t; 1603 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1604 } else xb = b; 1605 if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1606 for (i=m-1; i>=0; i--) { 1607 shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */ 1608 sum = xb[i]; 1609 if (xb == b) { 1610 /* whole matrix (no checkpointing available) */ 1611 n = a->rlen[i]; 1612 for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]]; 1613 x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i]; 1614 } else { /* lower-triangular part has been saved, so only apply upper-triangular */ 1615 n = a->rlen[i]-(diag[i]-shift)/8-1; 1616 for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]]; 1617 x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */ 1618 } 1619 } 1620 if (xb == b) { 1621 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1622 } else { 1623 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); /* assumes 1/2 in upper */ 1624 } 1625 } 1626 } 1627 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1628 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1629 PetscFunctionReturn(0); 1630 } 1631 1632 /* -------------------------------------------------------------------*/ 1633 static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL, 1634 0, 1635 0, 1636 MatMult_SeqSELL, 1637 /* 4*/ MatMultAdd_SeqSELL, 1638 MatMultTranspose_SeqSELL, 1639 MatMultTransposeAdd_SeqSELL, 1640 0, 1641 0, 1642 0, 1643 /* 10*/ 0, 1644 0, 1645 0, 1646 MatSOR_SeqSELL, 1647 0, 1648 /* 15*/ MatGetInfo_SeqSELL, 1649 MatEqual_SeqSELL, 1650 MatGetDiagonal_SeqSELL, 1651 MatDiagonalScale_SeqSELL, 1652 0, 1653 /* 20*/ 0, 1654 MatAssemblyEnd_SeqSELL, 1655 MatSetOption_SeqSELL, 1656 MatZeroEntries_SeqSELL, 1657 /* 24*/ 0, 1658 0, 1659 0, 1660 0, 1661 0, 1662 /* 29*/ MatSetUp_SeqSELL, 1663 0, 1664 0, 1665 0, 1666 0, 1667 /* 34*/ MatDuplicate_SeqSELL, 1668 0, 1669 0, 1670 0, 1671 0, 1672 /* 39*/ 0, 1673 0, 1674 0, 1675 MatGetValues_SeqSELL, 1676 MatCopy_SeqSELL, 1677 /* 44*/ 0, 1678 MatScale_SeqSELL, 1679 MatShift_SeqSELL, 1680 0, 1681 0, 1682 /* 49*/ 0, 1683 0, 1684 0, 1685 0, 1686 0, 1687 /* 54*/ MatFDColoringCreate_SeqXAIJ, 1688 0, 1689 0, 1690 0, 1691 0, 1692 /* 59*/ 0, 1693 MatDestroy_SeqSELL, 1694 MatView_SeqSELL, 1695 0, 1696 0, 1697 /* 64*/ 0, 1698 0, 1699 0, 1700 0, 1701 0, 1702 /* 69*/ 0, 1703 0, 1704 0, 1705 0, 1706 0, 1707 /* 74*/ 0, 1708 MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */ 1709 0, 1710 0, 1711 0, 1712 /* 79*/ 0, 1713 0, 1714 0, 1715 0, 1716 0, 1717 /* 84*/ 0, 1718 0, 1719 0, 1720 0, 1721 0, 1722 /* 89*/ 0, 1723 0, 1724 0, 1725 0, 1726 0, 1727 /* 94*/ 0, 1728 0, 1729 0, 1730 0, 1731 0, 1732 /* 99*/ 0, 1733 0, 1734 0, 1735 MatConjugate_SeqSELL, 1736 0, 1737 /*104*/ 0, 1738 0, 1739 0, 1740 0, 1741 0, 1742 /*109*/ 0, 1743 0, 1744 0, 1745 0, 1746 MatMissingDiagonal_SeqSELL, 1747 /*114*/ 0, 1748 0, 1749 0, 1750 0, 1751 0, 1752 /*119*/ 0, 1753 0, 1754 0, 1755 0, 1756 0, 1757 /*124*/ 0, 1758 0, 1759 0, 1760 0, 1761 0, 1762 /*129*/ 0, 1763 0, 1764 0, 1765 0, 1766 0, 1767 /*134*/ 0, 1768 0, 1769 0, 1770 0, 1771 0, 1772 /*139*/ 0, 1773 0, 1774 0, 1775 MatFDColoringSetUp_SeqXAIJ, 1776 0, 1777 /*144*/0 1778 }; 1779 1780 PetscErrorCode MatStoreValues_SeqSELL(Mat mat) 1781 { 1782 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1783 PetscErrorCode ierr; 1784 1785 PetscFunctionBegin; 1786 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1787 1788 /* allocate space for values if not already there */ 1789 if (!a->saved_values) { 1790 ierr = PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);CHKERRQ(ierr); 1791 ierr = PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1792 } 1793 1794 /* copy values over */ 1795 ierr = PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1796 PetscFunctionReturn(0); 1797 } 1798 1799 PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat) 1800 { 1801 Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data; 1802 PetscErrorCode ierr; 1803 1804 PetscFunctionBegin; 1805 if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1806 if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1807 /* copy values over */ 1808 ierr = PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));CHKERRQ(ierr); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 /*@C 1813 MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray() 1814 1815 Not Collective 1816 1817 Input Parameters: 1818 . mat - a MATSEQSELL matrix 1819 . array - pointer to the data 1820 1821 Level: intermediate 1822 1823 .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90() 1824 @*/ 1825 PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array) 1826 { 1827 PetscErrorCode ierr; 1828 1829 PetscFunctionBegin; 1830 ierr = PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 1834 PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B) 1835 { 1836 Mat_SeqSELL *b; 1837 PetscMPIInt size; 1838 PetscErrorCode ierr; 1839 1840 PetscFunctionBegin; 1841 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 1842 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1"); 1843 1844 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1845 1846 B->data = (void*)b; 1847 1848 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1849 1850 b->row = 0; 1851 b->col = 0; 1852 b->icol = 0; 1853 b->reallocs = 0; 1854 b->ignorezeroentries = PETSC_FALSE; 1855 b->roworiented = PETSC_TRUE; 1856 b->nonew = 0; 1857 b->diag = 0; 1858 b->solve_work = 0; 1859 B->spptr = 0; 1860 b->saved_values = 0; 1861 b->idiag = 0; 1862 b->mdiag = 0; 1863 b->ssor_work = 0; 1864 b->omega = 1.0; 1865 b->fshift = 0.0; 1866 b->idiagvalid = PETSC_FALSE; 1867 b->ibdiagvalid = PETSC_FALSE; 1868 b->keepnonzeropattern = PETSC_FALSE; 1869 1870 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);CHKERRQ(ierr); 1871 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);CHKERRQ(ierr); 1872 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);CHKERRQ(ierr); 1873 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);CHKERRQ(ierr); 1874 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);CHKERRQ(ierr); 1875 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);CHKERRQ(ierr); 1876 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);CHKERRQ(ierr); 1877 PetscFunctionReturn(0); 1878 } 1879 1880 /* 1881 Given a matrix generated with MatGetFactor() duplicates all the information in A into B 1882 */ 1883 PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 1884 { 1885 Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data; 1886 PetscInt i,m=A->rmap->n; 1887 PetscInt totalslices=a->totalslices; 1888 PetscErrorCode ierr; 1889 1890 PetscFunctionBegin; 1891 c = (Mat_SeqSELL*)C->data; 1892 1893 C->factortype = A->factortype; 1894 c->row = 0; 1895 c->col = 0; 1896 c->icol = 0; 1897 c->reallocs = 0; 1898 1899 C->assembled = PETSC_TRUE; 1900 1901 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 1902 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 1903 1904 ierr = PetscMalloc1(8*totalslices,&c->rlen);CHKERRQ(ierr); 1905 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 1906 ierr = PetscMalloc1(totalslices+1,&c->sliidx);CHKERRQ(ierr); 1907 ierr = PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));CHKERRQ(ierr); 1908 1909 for (i=0; i<m; i++) c->rlen[i] = a->rlen[i]; 1910 for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i]; 1911 1912 /* allocate the matrix space */ 1913 if (mallocmatspace) { 1914 ierr = PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);CHKERRQ(ierr); 1915 ierr = PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 1916 1917 c->singlemalloc = PETSC_TRUE; 1918 1919 if (m > 0) { 1920 ierr = PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));CHKERRQ(ierr); 1921 if (cpvalues == MAT_COPY_VALUES) { 1922 ierr = PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 1923 } else { 1924 ierr = PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));CHKERRQ(ierr); 1925 } 1926 } 1927 } 1928 1929 c->ignorezeroentries = a->ignorezeroentries; 1930 c->roworiented = a->roworiented; 1931 c->nonew = a->nonew; 1932 if (a->diag) { 1933 ierr = PetscMalloc1(m,&c->diag);CHKERRQ(ierr); 1934 ierr = PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));CHKERRQ(ierr); 1935 for (i=0; i<m; i++) { 1936 c->diag[i] = a->diag[i]; 1937 } 1938 } else c->diag = 0; 1939 1940 c->solve_work = 0; 1941 c->saved_values = 0; 1942 c->idiag = 0; 1943 c->ssor_work = 0; 1944 c->keepnonzeropattern = a->keepnonzeropattern; 1945 c->free_val = PETSC_TRUE; 1946 c->free_colidx = PETSC_TRUE; 1947 1948 c->maxallocmat = a->maxallocmat; 1949 c->maxallocrow = a->maxallocrow; 1950 c->rlenmax = a->rlenmax; 1951 c->nz = a->nz; 1952 C->preallocated = PETSC_TRUE; 1953 1954 c->nonzerorowcnt = a->nonzerorowcnt; 1955 C->nonzerostate = A->nonzerostate; 1956 1957 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B) 1962 { 1963 PetscErrorCode ierr; 1964 1965 PetscFunctionBegin; 1966 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 1967 ierr = MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr); 1968 if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) { 1969 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 1970 } 1971 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1972 ierr = MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 1973 PetscFunctionReturn(0); 1974 } 1975 1976 /*@C 1977 MatCreateSeqSELL - Creates a sparse matrix in SELL format. 1978 1979 Collective on MPI_Comm 1980 1981 Input Parameters: 1982 + comm - MPI communicator, set to PETSC_COMM_SELF 1983 . m - number of rows 1984 . n - number of columns 1985 . rlenmax - maximum number of nonzeros in a row 1986 - rlen - array containing the number of nonzeros in the various rows 1987 (possibly different for each row) or NULL 1988 1989 Output Parameter: 1990 . A - the matrix 1991 1992 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1993 MatXXXXSetPreallocation() paradgm instead of this routine directly. 1994 [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation] 1995 1996 Notes: 1997 If nnz is given then nz is ignored 1998 1999 Specify the preallocated storage with either rlenmax or rlen (not both). 2000 Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory 2001 allocation. For large problems you MUST preallocate memory or you 2002 will get TERRIBLE performance, see the users' manual chapter on matrices. 2003 2004 Level: intermediate 2005 2006 .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays() 2007 2008 @*/ 2009 PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A) 2010 { 2011 PetscErrorCode ierr; 2012 2013 PetscFunctionBegin; 2014 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2015 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2016 ierr = MatSetType(*A,MATSEQSELL);CHKERRQ(ierr); 2017 ierr = MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);CHKERRQ(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg) 2022 { 2023 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data; 2024 PetscInt totalslices=a->totalslices; 2025 PetscErrorCode ierr; 2026 2027 PetscFunctionBegin; 2028 /* If the matrix dimensions are not equal,or no of nonzeros */ 2029 if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) { 2030 *flg = PETSC_FALSE; 2031 PetscFunctionReturn(0); 2032 } 2033 /* if the a->colidx are the same */ 2034 ierr = PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);CHKERRQ(ierr); 2035 if (!*flg) PetscFunctionReturn(0); 2036 /* if a->val are the same */ 2037 ierr = PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);CHKERRQ(ierr); 2038 PetscFunctionReturn(0); 2039 } 2040 2041 PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A) 2042 { 2043 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2044 2045 PetscFunctionBegin; 2046 a->idiagvalid = PETSC_FALSE; 2047 a->ibdiagvalid = PETSC_FALSE; 2048 PetscFunctionReturn(0); 2049 } 2050 2051 PetscErrorCode MatConjugate_SeqSELL(Mat A) 2052 { 2053 #if defined(PETSC_USE_COMPLEX) 2054 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 2055 PetscInt i; 2056 PetscScalar *val = a->val; 2057 2058 PetscFunctionBegin; 2059 for (i=0; i<a->sliidx[a->totalslices]; i++) { 2060 val[i] = PetscConj(val[i]); 2061 } 2062 #else 2063 PetscFunctionBegin; 2064 #endif 2065 PetscFunctionReturn(0); 2066 } 2067