1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 16 { 17 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 18 PetscErrorCode ierr; 19 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 20 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 21 PetscReal shift = 0.0; 22 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 23 24 PetscFunctionBegin; 25 allowzeropivot = PetscNot(A->erroriffailure); 26 27 if (a->idiagvalid) { 28 if (values) *values = a->idiag; 29 PetscFunctionReturn(0); 30 } 31 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 32 diag_offset = a->diag; 33 if (!a->idiag) { 34 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 35 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 36 } 37 diag = a->idiag; 38 mdiag = a->idiag+bs2*mbs; 39 if (values) *values = a->idiag; 40 /* factor and invert each block */ 41 switch (bs) { 42 case 1: 43 for (i=0; i<mbs; i++) { 44 odiag = v + 1*diag_offset[i]; 45 diag[0] = odiag[0]; 46 mdiag[0] = odiag[0]; 47 48 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 49 if (allowzeropivot) { 50 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 51 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 52 A->factorerror_zeropivot_row = i; 53 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 54 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 55 } 56 57 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 58 diag += 1; 59 mdiag += 1; 60 } 61 break; 62 case 2: 63 for (i=0; i<mbs; i++) { 64 odiag = v + 4*diag_offset[i]; 65 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 66 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 67 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 68 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 69 diag += 4; 70 mdiag += 4; 71 } 72 break; 73 case 3: 74 for (i=0; i<mbs; i++) { 75 odiag = v + 9*diag_offset[i]; 76 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 77 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 78 diag[8] = odiag[8]; 79 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 80 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 81 mdiag[8] = odiag[8]; 82 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 83 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 94 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 95 diag += 16; 96 mdiag += 16; 97 } 98 break; 99 case 5: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 25*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 105 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 106 diag += 25; 107 mdiag += 25; 108 } 109 break; 110 case 6: 111 for (i=0; i<mbs; i++) { 112 odiag = v + 36*diag_offset[i]; 113 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 114 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 115 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 116 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 117 diag += 36; 118 mdiag += 36; 119 } 120 break; 121 case 7: 122 for (i=0; i<mbs; i++) { 123 odiag = v + 49*diag_offset[i]; 124 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 125 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 127 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 diag += 49; 129 mdiag += 49; 130 } 131 break; 132 default: 133 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 134 for (i=0; i<mbs; i++) { 135 odiag = v + bs2*diag_offset[i]; 136 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 137 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 138 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 139 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 140 diag += bs2; 141 mdiag += bs2; 142 } 143 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 144 } 145 a->idiagvalid = PETSC_TRUE; 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 150 { 151 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 152 PetscScalar *x,*work,*w,*workt,*t; 153 const MatScalar *v,*aa = a->a, *idiag; 154 const PetscScalar *b,*xb; 155 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 156 PetscErrorCode ierr; 157 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 158 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 159 160 PetscFunctionBegin; 161 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 162 its = its*lits; 163 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 164 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 165 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 166 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 167 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 168 169 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 170 171 if (!m) PetscFunctionReturn(0); 172 diag = a->diag; 173 idiag = a->idiag; 174 k = PetscMax(A->rmap->n,A->cmap->n); 175 if (!a->mult_work) { 176 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 177 } 178 if (!a->sor_workt) { 179 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 180 } 181 if (!a->sor_work) { 182 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 183 } 184 work = a->mult_work; 185 t = a->sor_workt; 186 w = a->sor_work; 187 188 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 189 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 190 191 if (flag & SOR_ZERO_INITIAL_GUESS) { 192 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 193 switch (bs) { 194 case 1: 195 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 196 t[0] = b[0]; 197 i2 = 1; 198 idiag += 1; 199 for (i=1; i<m; i++) { 200 v = aa + ai[i]; 201 vi = aj + ai[i]; 202 nz = diag[i] - ai[i]; 203 s[0] = b[i2]; 204 for (j=0; j<nz; j++) { 205 xw[0] = x[vi[j]]; 206 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 207 } 208 t[i2] = s[0]; 209 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 210 x[i2] = xw[0]; 211 idiag += 1; 212 i2 += 1; 213 } 214 break; 215 case 2: 216 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 217 t[0] = b[0]; t[1] = b[1]; 218 i2 = 2; 219 idiag += 4; 220 for (i=1; i<m; i++) { 221 v = aa + 4*ai[i]; 222 vi = aj + ai[i]; 223 nz = diag[i] - ai[i]; 224 s[0] = b[i2]; s[1] = b[i2+1]; 225 for (j=0; j<nz; j++) { 226 idx = 2*vi[j]; 227 it = 4*j; 228 xw[0] = x[idx]; xw[1] = x[1+idx]; 229 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 230 } 231 t[i2] = s[0]; t[i2+1] = s[1]; 232 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 233 x[i2] = xw[0]; x[i2+1] = xw[1]; 234 idiag += 4; 235 i2 += 2; 236 } 237 break; 238 case 3: 239 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 240 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 241 i2 = 3; 242 idiag += 9; 243 for (i=1; i<m; i++) { 244 v = aa + 9*ai[i]; 245 vi = aj + ai[i]; 246 nz = diag[i] - ai[i]; 247 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 248 while (nz--) { 249 idx = 3*(*vi++); 250 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 251 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 252 v += 9; 253 } 254 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 255 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 256 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 257 idiag += 9; 258 i2 += 3; 259 } 260 break; 261 case 4: 262 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 263 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 264 i2 = 4; 265 idiag += 16; 266 for (i=1; i<m; i++) { 267 v = aa + 16*ai[i]; 268 vi = aj + ai[i]; 269 nz = diag[i] - ai[i]; 270 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 271 while (nz--) { 272 idx = 4*(*vi++); 273 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 274 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 275 v += 16; 276 } 277 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 278 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 279 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 280 idiag += 16; 281 i2 += 4; 282 } 283 break; 284 case 5: 285 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 286 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 287 i2 = 5; 288 idiag += 25; 289 for (i=1; i<m; i++) { 290 v = aa + 25*ai[i]; 291 vi = aj + ai[i]; 292 nz = diag[i] - ai[i]; 293 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 294 while (nz--) { 295 idx = 5*(*vi++); 296 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 297 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 298 v += 25; 299 } 300 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 301 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 302 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 303 idiag += 25; 304 i2 += 5; 305 } 306 break; 307 case 6: 308 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 309 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 310 i2 = 6; 311 idiag += 36; 312 for (i=1; i<m; i++) { 313 v = aa + 36*ai[i]; 314 vi = aj + ai[i]; 315 nz = diag[i] - ai[i]; 316 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 317 while (nz--) { 318 idx = 6*(*vi++); 319 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 320 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 321 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 322 v += 36; 323 } 324 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 325 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 326 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 327 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 328 idiag += 36; 329 i2 += 6; 330 } 331 break; 332 case 7: 333 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 334 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 335 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 336 i2 = 7; 337 idiag += 49; 338 for (i=1; i<m; i++) { 339 v = aa + 49*ai[i]; 340 vi = aj + ai[i]; 341 nz = diag[i] - ai[i]; 342 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 343 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 344 while (nz--) { 345 idx = 7*(*vi++); 346 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 347 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 348 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 349 v += 49; 350 } 351 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 352 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 353 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 354 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 355 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 356 idiag += 49; 357 i2 += 7; 358 } 359 break; 360 default: 361 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 362 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 363 i2 = bs; 364 idiag += bs2; 365 for (i=1; i<m; i++) { 366 v = aa + bs2*ai[i]; 367 vi = aj + ai[i]; 368 nz = diag[i] - ai[i]; 369 370 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 371 /* copy all rows of x that are needed into contiguous space */ 372 workt = work; 373 for (j=0; j<nz; j++) { 374 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 375 workt += bs; 376 } 377 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 378 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 379 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 380 381 idiag += bs2; 382 i2 += bs; 383 } 384 break; 385 } 386 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 387 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 388 xb = t; 389 } 390 else xb = b; 391 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 392 idiag = a->idiag+bs2*(a->mbs-1); 393 i2 = bs * (m-1); 394 switch (bs) { 395 case 1: 396 s[0] = xb[i2]; 397 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 398 x[i2] = xw[0]; 399 i2 -= 1; 400 for (i=m-2; i>=0; i--) { 401 v = aa + (diag[i]+1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s[0] = xb[i2]; 405 for (j=0; j<nz; j++) { 406 xw[0] = x[vi[j]]; 407 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 408 } 409 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 410 x[i2] = xw[0]; 411 idiag -= 1; 412 i2 -= 1; 413 } 414 break; 415 case 2: 416 s[0] = xb[i2]; s[1] = xb[i2+1]; 417 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 418 x[i2] = xw[0]; x[i2+1] = xw[1]; 419 i2 -= 2; 420 idiag -= 4; 421 for (i=m-2; i>=0; i--) { 422 v = aa + 4*(diag[i] + 1); 423 vi = aj + diag[i] + 1; 424 nz = ai[i+1] - diag[i] - 1; 425 s[0] = xb[i2]; s[1] = xb[i2+1]; 426 for (j=0; j<nz; j++) { 427 idx = 2*vi[j]; 428 it = 4*j; 429 xw[0] = x[idx]; xw[1] = x[1+idx]; 430 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 431 } 432 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 433 x[i2] = xw[0]; x[i2+1] = xw[1]; 434 idiag -= 4; 435 i2 -= 2; 436 } 437 break; 438 case 3: 439 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 440 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 441 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 442 i2 -= 3; 443 idiag -= 9; 444 for (i=m-2; i>=0; i--) { 445 v = aa + 9*(diag[i]+1); 446 vi = aj + diag[i] + 1; 447 nz = ai[i+1] - diag[i] - 1; 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 449 while (nz--) { 450 idx = 3*(*vi++); 451 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 452 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 453 v += 9; 454 } 455 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 456 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 457 idiag -= 9; 458 i2 -= 3; 459 } 460 break; 461 case 4: 462 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 463 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 464 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 465 i2 -= 4; 466 idiag -= 16; 467 for (i=m-2; i>=0; i--) { 468 v = aa + 16*(diag[i]+1); 469 vi = aj + diag[i] + 1; 470 nz = ai[i+1] - diag[i] - 1; 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 472 while (nz--) { 473 idx = 4*(*vi++); 474 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 475 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 476 v += 16; 477 } 478 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 479 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 480 idiag -= 16; 481 i2 -= 4; 482 } 483 break; 484 case 5: 485 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 486 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 488 i2 -= 5; 489 idiag -= 25; 490 for (i=m-2; i>=0; i--) { 491 v = aa + 25*(diag[i]+1); 492 vi = aj + diag[i] + 1; 493 nz = ai[i+1] - diag[i] - 1; 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 495 while (nz--) { 496 idx = 5*(*vi++); 497 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 498 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 499 v += 25; 500 } 501 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 502 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 503 idiag -= 25; 504 i2 -= 5; 505 } 506 break; 507 case 6: 508 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 509 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 510 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 511 i2 -= 6; 512 idiag -= 36; 513 for (i=m-2; i>=0; i--) { 514 v = aa + 36*(diag[i]+1); 515 vi = aj + diag[i] + 1; 516 nz = ai[i+1] - diag[i] - 1; 517 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 518 while (nz--) { 519 idx = 6*(*vi++); 520 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 521 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 522 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 523 v += 36; 524 } 525 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 526 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 527 idiag -= 36; 528 i2 -= 6; 529 } 530 break; 531 case 7: 532 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 533 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 534 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 535 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 536 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 537 i2 -= 7; 538 idiag -= 49; 539 for (i=m-2; i>=0; i--) { 540 v = aa + 49*(diag[i]+1); 541 vi = aj + diag[i] + 1; 542 nz = ai[i+1] - diag[i] - 1; 543 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 544 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 545 while (nz--) { 546 idx = 7*(*vi++); 547 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 548 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 549 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 550 v += 49; 551 } 552 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 553 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 554 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 555 idiag -= 49; 556 i2 -= 7; 557 } 558 break; 559 default: 560 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 561 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 562 i2 -= bs; 563 idiag -= bs2; 564 for (i=m-2; i>=0; i--) { 565 v = aa + bs2*(diag[i]+1); 566 vi = aj + diag[i] + 1; 567 nz = ai[i+1] - diag[i] - 1; 568 569 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 570 /* copy all rows of x that are needed into contiguous space */ 571 workt = work; 572 for (j=0; j<nz; j++) { 573 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 574 workt += bs; 575 } 576 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 577 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 578 579 idiag -= bs2; 580 i2 -= bs; 581 } 582 break; 583 } 584 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 585 } 586 its--; 587 } 588 while (its--) { 589 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 590 idiag = a->idiag; 591 i2 = 0; 592 switch (bs) { 593 case 1: 594 for (i=0; i<m; i++) { 595 v = aa + ai[i]; 596 vi = aj + ai[i]; 597 nz = ai[i+1] - ai[i]; 598 s[0] = b[i2]; 599 for (j=0; j<nz; j++) { 600 xw[0] = x[vi[j]]; 601 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 602 } 603 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 604 x[i2] += xw[0]; 605 idiag += 1; 606 i2 += 1; 607 } 608 break; 609 case 2: 610 for (i=0; i<m; i++) { 611 v = aa + 4*ai[i]; 612 vi = aj + ai[i]; 613 nz = ai[i+1] - ai[i]; 614 s[0] = b[i2]; s[1] = b[i2+1]; 615 for (j=0; j<nz; j++) { 616 idx = 2*vi[j]; 617 it = 4*j; 618 xw[0] = x[idx]; xw[1] = x[1+idx]; 619 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 620 } 621 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 622 x[i2] += xw[0]; x[i2+1] += xw[1]; 623 idiag += 4; 624 i2 += 2; 625 } 626 break; 627 case 3: 628 for (i=0; i<m; i++) { 629 v = aa + 9*ai[i]; 630 vi = aj + ai[i]; 631 nz = ai[i+1] - ai[i]; 632 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 633 while (nz--) { 634 idx = 3*(*vi++); 635 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 636 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 637 v += 9; 638 } 639 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 640 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 641 idiag += 9; 642 i2 += 3; 643 } 644 break; 645 case 4: 646 for (i=0; i<m; i++) { 647 v = aa + 16*ai[i]; 648 vi = aj + ai[i]; 649 nz = ai[i+1] - ai[i]; 650 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 651 while (nz--) { 652 idx = 4*(*vi++); 653 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 654 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 655 v += 16; 656 } 657 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 658 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 659 idiag += 16; 660 i2 += 4; 661 } 662 break; 663 case 5: 664 for (i=0; i<m; i++) { 665 v = aa + 25*ai[i]; 666 vi = aj + ai[i]; 667 nz = ai[i+1] - ai[i]; 668 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 669 while (nz--) { 670 idx = 5*(*vi++); 671 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 672 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 673 v += 25; 674 } 675 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 676 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 677 idiag += 25; 678 i2 += 5; 679 } 680 break; 681 case 6: 682 for (i=0; i<m; i++) { 683 v = aa + 36*ai[i]; 684 vi = aj + ai[i]; 685 nz = ai[i+1] - ai[i]; 686 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 687 while (nz--) { 688 idx = 6*(*vi++); 689 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 690 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 691 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 692 v += 36; 693 } 694 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 695 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 696 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 697 idiag += 36; 698 i2 += 6; 699 } 700 break; 701 case 7: 702 for (i=0; i<m; i++) { 703 v = aa + 49*ai[i]; 704 vi = aj + ai[i]; 705 nz = ai[i+1] - ai[i]; 706 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 707 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 708 while (nz--) { 709 idx = 7*(*vi++); 710 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 711 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 712 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 713 v += 49; 714 } 715 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 716 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 717 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 718 idiag += 49; 719 i2 += 7; 720 } 721 break; 722 default: 723 for (i=0; i<m; i++) { 724 v = aa + bs2*ai[i]; 725 vi = aj + ai[i]; 726 nz = ai[i+1] - ai[i]; 727 728 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 729 /* copy all rows of x that are needed into contiguous space */ 730 workt = work; 731 for (j=0; j<nz; j++) { 732 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 733 workt += bs; 734 } 735 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 736 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 737 738 idiag += bs2; 739 i2 += bs; 740 } 741 break; 742 } 743 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 744 } 745 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 746 idiag = a->idiag+bs2*(a->mbs-1); 747 i2 = bs * (m-1); 748 switch (bs) { 749 case 1: 750 for (i=m-1; i>=0; i--) { 751 v = aa + ai[i]; 752 vi = aj + ai[i]; 753 nz = ai[i+1] - ai[i]; 754 s[0] = b[i2]; 755 for (j=0; j<nz; j++) { 756 xw[0] = x[vi[j]]; 757 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 758 } 759 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 760 x[i2] += xw[0]; 761 idiag -= 1; 762 i2 -= 1; 763 } 764 break; 765 case 2: 766 for (i=m-1; i>=0; i--) { 767 v = aa + 4*ai[i]; 768 vi = aj + ai[i]; 769 nz = ai[i+1] - ai[i]; 770 s[0] = b[i2]; s[1] = b[i2+1]; 771 for (j=0; j<nz; j++) { 772 idx = 2*vi[j]; 773 it = 4*j; 774 xw[0] = x[idx]; xw[1] = x[1+idx]; 775 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 776 } 777 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 778 x[i2] += xw[0]; x[i2+1] += xw[1]; 779 idiag -= 4; 780 i2 -= 2; 781 } 782 break; 783 case 3: 784 for (i=m-1; i>=0; i--) { 785 v = aa + 9*ai[i]; 786 vi = aj + ai[i]; 787 nz = ai[i+1] - ai[i]; 788 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 789 while (nz--) { 790 idx = 3*(*vi++); 791 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 792 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 793 v += 9; 794 } 795 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 796 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 797 idiag -= 9; 798 i2 -= 3; 799 } 800 break; 801 case 4: 802 for (i=m-1; i>=0; i--) { 803 v = aa + 16*ai[i]; 804 vi = aj + ai[i]; 805 nz = ai[i+1] - ai[i]; 806 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 807 while (nz--) { 808 idx = 4*(*vi++); 809 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 810 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 811 v += 16; 812 } 813 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 814 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 815 idiag -= 16; 816 i2 -= 4; 817 } 818 break; 819 case 5: 820 for (i=m-1; i>=0; i--) { 821 v = aa + 25*ai[i]; 822 vi = aj + ai[i]; 823 nz = ai[i+1] - ai[i]; 824 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 825 while (nz--) { 826 idx = 5*(*vi++); 827 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 828 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 829 v += 25; 830 } 831 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 832 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 833 idiag -= 25; 834 i2 -= 5; 835 } 836 break; 837 case 6: 838 for (i=m-1; i>=0; i--) { 839 v = aa + 36*ai[i]; 840 vi = aj + ai[i]; 841 nz = ai[i+1] - ai[i]; 842 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 843 while (nz--) { 844 idx = 6*(*vi++); 845 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 846 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 847 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 848 v += 36; 849 } 850 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 851 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 852 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 853 idiag -= 36; 854 i2 -= 6; 855 } 856 break; 857 case 7: 858 for (i=m-1; i>=0; i--) { 859 v = aa + 49*ai[i]; 860 vi = aj + ai[i]; 861 nz = ai[i+1] - ai[i]; 862 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 863 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 864 while (nz--) { 865 idx = 7*(*vi++); 866 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 867 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 868 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 869 v += 49; 870 } 871 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 872 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 873 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 874 idiag -= 49; 875 i2 -= 7; 876 } 877 break; 878 default: 879 for (i=m-1; i>=0; i--) { 880 v = aa + bs2*ai[i]; 881 vi = aj + ai[i]; 882 nz = ai[i+1] - ai[i]; 883 884 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 885 /* copy all rows of x that are needed into contiguous space */ 886 workt = work; 887 for (j=0; j<nz; j++) { 888 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 889 workt += bs; 890 } 891 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 892 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 893 894 idiag -= bs2; 895 i2 -= bs; 896 } 897 break; 898 } 899 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 900 } 901 } 902 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 903 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } 906 907 908 /* 909 Special version for direct calls from Fortran (Used in PETSc-fun3d) 910 */ 911 #if defined(PETSC_HAVE_FORTRAN_CAPS) 912 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 913 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 914 #define matsetvaluesblocked4_ matsetvaluesblocked4 915 #endif 916 917 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 918 { 919 Mat A = *AA; 920 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 921 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 922 PetscInt *ai =a->i,*ailen=a->ilen; 923 PetscInt *aj =a->j,stepval,lastcol = -1; 924 const PetscScalar *value = v; 925 MatScalar *ap,*aa = a->a,*bap; 926 927 PetscFunctionBegin; 928 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 929 stepval = (n-1)*4; 930 for (k=0; k<m; k++) { /* loop over added rows */ 931 row = im[k]; 932 rp = aj + ai[row]; 933 ap = aa + 16*ai[row]; 934 nrow = ailen[row]; 935 low = 0; 936 high = nrow; 937 for (l=0; l<n; l++) { /* loop over added columns */ 938 col = in[l]; 939 if (col <= lastcol) low = 0; 940 else high = nrow; 941 lastcol = col; 942 value = v + k*(stepval+4 + l)*4; 943 while (high-low > 7) { 944 t = (low+high)/2; 945 if (rp[t] > col) high = t; 946 else low = t; 947 } 948 for (i=low; i<high; i++) { 949 if (rp[i] > col) break; 950 if (rp[i] == col) { 951 bap = ap + 16*i; 952 for (ii=0; ii<4; ii++,value+=stepval) { 953 for (jj=ii; jj<16; jj+=4) { 954 bap[jj] += *value++; 955 } 956 } 957 goto noinsert2; 958 } 959 } 960 N = nrow++ - 1; 961 high++; /* added new column index thus must search to one higher than before */ 962 /* shift up all the later entries in this row */ 963 for (ii=N; ii>=i; ii--) { 964 rp[ii+1] = rp[ii]; 965 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 966 } 967 if (N >= i) { 968 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 969 } 970 rp[i] = col; 971 bap = ap + 16*i; 972 for (ii=0; ii<4; ii++,value+=stepval) { 973 for (jj=ii; jj<16; jj+=4) { 974 bap[jj] = *value++; 975 } 976 } 977 noinsert2:; 978 low = i; 979 } 980 ailen[row] = nrow; 981 } 982 PetscFunctionReturnVoid(); 983 } 984 985 #if defined(PETSC_HAVE_FORTRAN_CAPS) 986 #define matsetvalues4_ MATSETVALUES4 987 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 988 #define matsetvalues4_ matsetvalues4 989 #endif 990 991 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 992 { 993 Mat A = *AA; 994 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 995 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 996 PetscInt *ai=a->i,*ailen=a->ilen; 997 PetscInt *aj=a->j,brow,bcol; 998 PetscInt ridx,cidx,lastcol = -1; 999 MatScalar *ap,value,*aa=a->a,*bap; 1000 1001 PetscFunctionBegin; 1002 for (k=0; k<m; k++) { /* loop over added rows */ 1003 row = im[k]; brow = row/4; 1004 rp = aj + ai[brow]; 1005 ap = aa + 16*ai[brow]; 1006 nrow = ailen[brow]; 1007 low = 0; 1008 high = nrow; 1009 for (l=0; l<n; l++) { /* loop over added columns */ 1010 col = in[l]; bcol = col/4; 1011 ridx = row % 4; cidx = col % 4; 1012 value = v[l + k*n]; 1013 if (col <= lastcol) low = 0; 1014 else high = nrow; 1015 lastcol = col; 1016 while (high-low > 7) { 1017 t = (low+high)/2; 1018 if (rp[t] > bcol) high = t; 1019 else low = t; 1020 } 1021 for (i=low; i<high; i++) { 1022 if (rp[i] > bcol) break; 1023 if (rp[i] == bcol) { 1024 bap = ap + 16*i + 4*cidx + ridx; 1025 *bap += value; 1026 goto noinsert1; 1027 } 1028 } 1029 N = nrow++ - 1; 1030 high++; /* added new column thus must search to one higher than before */ 1031 /* shift up all the later entries in this row */ 1032 for (ii=N; ii>=i; ii--) { 1033 rp[ii+1] = rp[ii]; 1034 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1035 } 1036 if (N>=i) { 1037 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1038 } 1039 rp[i] = bcol; 1040 ap[16*i + 4*cidx + ridx] = value; 1041 noinsert1:; 1042 low = i; 1043 } 1044 ailen[brow] = nrow; 1045 } 1046 PetscFunctionReturnVoid(); 1047 } 1048 1049 /* 1050 Checks for missing diagonals 1051 */ 1052 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1053 { 1054 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 PetscInt *diag,*ii = a->i,i; 1057 1058 PetscFunctionBegin; 1059 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1060 *missing = PETSC_FALSE; 1061 if (A->rmap->n > 0 && !ii) { 1062 *missing = PETSC_TRUE; 1063 if (d) *d = 0; 1064 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1065 } else { 1066 diag = a->diag; 1067 for (i=0; i<a->mbs; i++) { 1068 if (diag[i] >= ii[i+1]) { 1069 *missing = PETSC_TRUE; 1070 if (d) *d = i; 1071 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1072 break; 1073 } 1074 } 1075 } 1076 PetscFunctionReturn(0); 1077 } 1078 1079 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1080 { 1081 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1082 PetscErrorCode ierr; 1083 PetscInt i,j,m = a->mbs; 1084 1085 PetscFunctionBegin; 1086 if (!a->diag) { 1087 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1088 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1089 a->free_diag = PETSC_TRUE; 1090 } 1091 for (i=0; i<m; i++) { 1092 a->diag[i] = a->i[i+1]; 1093 for (j=a->i[i]; j<a->i[i+1]; j++) { 1094 if (a->j[j] == i) { 1095 a->diag[i] = j; 1096 break; 1097 } 1098 } 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 1104 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1105 { 1106 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1109 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1110 1111 PetscFunctionBegin; 1112 *nn = n; 1113 if (!ia) PetscFunctionReturn(0); 1114 if (symmetric) { 1115 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1116 nz = tia[n]; 1117 } else { 1118 tia = a->i; tja = a->j; 1119 } 1120 1121 if (!blockcompressed && bs > 1) { 1122 (*nn) *= bs; 1123 /* malloc & create the natural set of indices */ 1124 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1125 if (n) { 1126 (*ia)[0] = oshift; 1127 for (j=1; j<bs; j++) { 1128 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1129 } 1130 } 1131 1132 for (i=1; i<n; i++) { 1133 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1134 for (j=1; j<bs; j++) { 1135 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1136 } 1137 } 1138 if (n) { 1139 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1140 } 1141 1142 if (inja) { 1143 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1144 cnt = 0; 1145 for (i=0; i<n; i++) { 1146 for (j=0; j<bs; j++) { 1147 for (k=tia[i]; k<tia[i+1]; k++) { 1148 for (l=0; l<bs; l++) { 1149 (*ja)[cnt++] = bs*tja[k] + l; 1150 } 1151 } 1152 } 1153 } 1154 } 1155 1156 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1157 ierr = PetscFree(tia);CHKERRQ(ierr); 1158 ierr = PetscFree(tja);CHKERRQ(ierr); 1159 } 1160 } else if (oshift == 1) { 1161 if (symmetric) { 1162 nz = tia[A->rmap->n/bs]; 1163 /* add 1 to i and j indices */ 1164 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1165 *ia = tia; 1166 if (ja) { 1167 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1168 *ja = tja; 1169 } 1170 } else { 1171 nz = a->i[A->rmap->n/bs]; 1172 /* malloc space and add 1 to i and j indices */ 1173 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1174 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1175 if (ja) { 1176 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1177 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1178 } 1179 } 1180 } else { 1181 *ia = tia; 1182 if (ja) *ja = tja; 1183 } 1184 PetscFunctionReturn(0); 1185 } 1186 1187 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1188 { 1189 PetscErrorCode ierr; 1190 1191 PetscFunctionBegin; 1192 if (!ia) PetscFunctionReturn(0); 1193 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1194 ierr = PetscFree(*ia);CHKERRQ(ierr); 1195 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1196 } 1197 PetscFunctionReturn(0); 1198 } 1199 1200 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1201 { 1202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 #if defined(PETSC_USE_LOG) 1207 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1208 #endif 1209 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1210 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1211 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1212 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1213 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1214 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1215 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1216 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1217 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1218 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1219 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1220 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1221 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1222 1223 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1224 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1225 ierr = PetscFree(A->data);CHKERRQ(ierr); 1226 1227 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1238 #if defined(PETSC_HAVE_HYPRE) 1239 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1240 #endif 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1245 { 1246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 switch (op) { 1251 case MAT_ROW_ORIENTED: 1252 a->roworiented = flg; 1253 break; 1254 case MAT_KEEP_NONZERO_PATTERN: 1255 a->keepnonzeropattern = flg; 1256 break; 1257 case MAT_NEW_NONZERO_LOCATIONS: 1258 a->nonew = (flg ? 0 : 1); 1259 break; 1260 case MAT_NEW_NONZERO_LOCATION_ERR: 1261 a->nonew = (flg ? -1 : 0); 1262 break; 1263 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1264 a->nonew = (flg ? -2 : 0); 1265 break; 1266 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1267 a->nounused = (flg ? -1 : 0); 1268 break; 1269 case MAT_NEW_DIAGONALS: 1270 case MAT_IGNORE_OFF_PROC_ENTRIES: 1271 case MAT_USE_HASH_TABLE: 1272 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1273 break; 1274 case MAT_SPD: 1275 case MAT_SYMMETRIC: 1276 case MAT_STRUCTURALLY_SYMMETRIC: 1277 case MAT_HERMITIAN: 1278 case MAT_SYMMETRY_ETERNAL: 1279 case MAT_SUBMAT_SINGLEIS: 1280 case MAT_STRUCTURE_ONLY: 1281 /* These options are handled directly by MatSetOption() */ 1282 break; 1283 default: 1284 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1285 } 1286 PetscFunctionReturn(0); 1287 } 1288 1289 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1290 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1291 { 1292 PetscErrorCode ierr; 1293 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1294 MatScalar *aa_i; 1295 PetscScalar *v_i; 1296 1297 PetscFunctionBegin; 1298 bs = A->rmap->bs; 1299 bs2 = bs*bs; 1300 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1301 1302 bn = row/bs; /* Block number */ 1303 bp = row % bs; /* Block Position */ 1304 M = ai[bn+1] - ai[bn]; 1305 *nz = bs*M; 1306 1307 if (v) { 1308 *v = 0; 1309 if (*nz) { 1310 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1311 for (i=0; i<M; i++) { /* for each block in the block row */ 1312 v_i = *v + i*bs; 1313 aa_i = aa + bs2*(ai[bn] + i); 1314 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1315 } 1316 } 1317 } 1318 1319 if (idx) { 1320 *idx = 0; 1321 if (*nz) { 1322 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1323 for (i=0; i<M; i++) { /* for each block in the block row */ 1324 idx_i = *idx + i*bs; 1325 itmp = bs*aj[ai[bn] + i]; 1326 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1327 } 1328 } 1329 } 1330 PetscFunctionReturn(0); 1331 } 1332 1333 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1334 { 1335 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1344 { 1345 PetscErrorCode ierr; 1346 1347 PetscFunctionBegin; 1348 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1349 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1350 PetscFunctionReturn(0); 1351 } 1352 1353 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1354 { 1355 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1356 Mat C; 1357 PetscErrorCode ierr; 1358 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1359 PetscInt *rows,*cols,bs2=a->bs2; 1360 MatScalar *array; 1361 1362 PetscFunctionBegin; 1363 if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1364 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1365 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1366 1367 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1368 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1369 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1370 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1371 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1372 ierr = PetscFree(col);CHKERRQ(ierr); 1373 } else { 1374 C = *B; 1375 } 1376 1377 array = a->a; 1378 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1379 for (i=0; i<mbs; i++) { 1380 cols[0] = i*bs; 1381 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1382 len = ai[i+1] - ai[i]; 1383 for (j=0; j<len; j++) { 1384 rows[0] = (*aj++)*bs; 1385 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1386 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1387 array += bs2; 1388 } 1389 } 1390 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1391 1392 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1393 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1394 1395 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1396 *B = C; 1397 } else { 1398 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1404 { 1405 PetscErrorCode ierr; 1406 Mat Btrans; 1407 1408 PetscFunctionBegin; 1409 *f = PETSC_FALSE; 1410 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1411 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1412 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1417 { 1418 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1419 PetscErrorCode ierr; 1420 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1421 int fd; 1422 PetscScalar *aa; 1423 FILE *file; 1424 1425 PetscFunctionBegin; 1426 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1427 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1428 col_lens[0] = MAT_FILE_CLASSID; 1429 1430 col_lens[1] = A->rmap->N; 1431 col_lens[2] = A->cmap->n; 1432 col_lens[3] = a->nz*bs2; 1433 1434 /* store lengths of each row and write (including header) to file */ 1435 count = 0; 1436 for (i=0; i<a->mbs; i++) { 1437 for (j=0; j<bs; j++) { 1438 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1439 } 1440 } 1441 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1442 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1443 1444 /* store column indices (zero start index) */ 1445 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1446 count = 0; 1447 for (i=0; i<a->mbs; i++) { 1448 for (j=0; j<bs; j++) { 1449 for (k=a->i[i]; k<a->i[i+1]; k++) { 1450 for (l=0; l<bs; l++) { 1451 jj[count++] = bs*a->j[k] + l; 1452 } 1453 } 1454 } 1455 } 1456 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1457 ierr = PetscFree(jj);CHKERRQ(ierr); 1458 1459 /* store nonzero values */ 1460 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1461 count = 0; 1462 for (i=0; i<a->mbs; i++) { 1463 for (j=0; j<bs; j++) { 1464 for (k=a->i[i]; k<a->i[i+1]; k++) { 1465 for (l=0; l<bs; l++) { 1466 aa[count++] = a->a[bs2*k + l*bs + j]; 1467 } 1468 } 1469 } 1470 } 1471 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1472 ierr = PetscFree(aa);CHKERRQ(ierr); 1473 1474 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1475 if (file) { 1476 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1477 } 1478 PetscFunctionReturn(0); 1479 } 1480 1481 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1482 { 1483 PetscErrorCode ierr; 1484 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1485 PetscInt i,bs = A->rmap->bs,k; 1486 1487 PetscFunctionBegin; 1488 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1489 for (i=0; i<a->mbs; i++) { 1490 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1491 for (k=a->i[i]; k<a->i[i+1]; k++) { 1492 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1493 } 1494 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1495 } 1496 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1501 { 1502 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1503 PetscErrorCode ierr; 1504 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1505 PetscViewerFormat format; 1506 1507 PetscFunctionBegin; 1508 if (A->structure_only) { 1509 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1510 PetscFunctionReturn(0); 1511 } 1512 1513 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1514 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1515 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1516 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1517 const char *matname; 1518 Mat aij; 1519 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1520 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1521 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1522 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1523 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1524 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1525 PetscFunctionReturn(0); 1526 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1527 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1528 for (i=0; i<a->mbs; i++) { 1529 for (j=0; j<bs; j++) { 1530 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1531 for (k=a->i[i]; k<a->i[i+1]; k++) { 1532 for (l=0; l<bs; l++) { 1533 #if defined(PETSC_USE_COMPLEX) 1534 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1535 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1536 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1537 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1538 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1539 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1540 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1541 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1542 } 1543 #else 1544 if (a->a[bs2*k + l*bs + j] != 0.0) { 1545 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1546 } 1547 #endif 1548 } 1549 } 1550 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1551 } 1552 } 1553 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1554 } else { 1555 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1556 for (i=0; i<a->mbs; i++) { 1557 for (j=0; j<bs; j++) { 1558 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1559 for (k=a->i[i]; k<a->i[i+1]; k++) { 1560 for (l=0; l<bs; l++) { 1561 #if defined(PETSC_USE_COMPLEX) 1562 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1563 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1564 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1565 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1566 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1567 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1568 } else { 1569 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1570 } 1571 #else 1572 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1573 #endif 1574 } 1575 } 1576 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1577 } 1578 } 1579 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1580 } 1581 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #include <petscdraw.h> 1586 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1587 { 1588 Mat A = (Mat) Aa; 1589 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1590 PetscErrorCode ierr; 1591 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1592 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1593 MatScalar *aa; 1594 PetscViewer viewer; 1595 PetscViewerFormat format; 1596 1597 PetscFunctionBegin; 1598 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1599 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1600 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1601 1602 /* loop over matrix elements drawing boxes */ 1603 1604 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1605 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1606 /* Blue for negative, Cyan for zero and Red for positive */ 1607 color = PETSC_DRAW_BLUE; 1608 for (i=0,row=0; i<mbs; i++,row+=bs) { 1609 for (j=a->i[i]; j<a->i[i+1]; j++) { 1610 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1611 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1612 aa = a->a + j*bs2; 1613 for (k=0; k<bs; k++) { 1614 for (l=0; l<bs; l++) { 1615 if (PetscRealPart(*aa++) >= 0.) continue; 1616 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1617 } 1618 } 1619 } 1620 } 1621 color = PETSC_DRAW_CYAN; 1622 for (i=0,row=0; i<mbs; i++,row+=bs) { 1623 for (j=a->i[i]; j<a->i[i+1]; j++) { 1624 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1625 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1626 aa = a->a + j*bs2; 1627 for (k=0; k<bs; k++) { 1628 for (l=0; l<bs; l++) { 1629 if (PetscRealPart(*aa++) != 0.) continue; 1630 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1631 } 1632 } 1633 } 1634 } 1635 color = PETSC_DRAW_RED; 1636 for (i=0,row=0; i<mbs; i++,row+=bs) { 1637 for (j=a->i[i]; j<a->i[i+1]; j++) { 1638 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1639 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1640 aa = a->a + j*bs2; 1641 for (k=0; k<bs; k++) { 1642 for (l=0; l<bs; l++) { 1643 if (PetscRealPart(*aa++) <= 0.) continue; 1644 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1645 } 1646 } 1647 } 1648 } 1649 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1650 } else { 1651 /* use contour shading to indicate magnitude of values */ 1652 /* first determine max of all nonzero values */ 1653 PetscReal minv = 0.0, maxv = 0.0; 1654 PetscDraw popup; 1655 1656 for (i=0; i<a->nz*a->bs2; i++) { 1657 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1658 } 1659 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1660 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1661 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1662 1663 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1664 for (i=0,row=0; i<mbs; i++,row+=bs) { 1665 for (j=a->i[i]; j<a->i[i+1]; j++) { 1666 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1667 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1668 aa = a->a + j*bs2; 1669 for (k=0; k<bs; k++) { 1670 for (l=0; l<bs; l++) { 1671 MatScalar v = *aa++; 1672 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1673 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1674 } 1675 } 1676 } 1677 } 1678 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1679 } 1680 PetscFunctionReturn(0); 1681 } 1682 1683 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1684 { 1685 PetscErrorCode ierr; 1686 PetscReal xl,yl,xr,yr,w,h; 1687 PetscDraw draw; 1688 PetscBool isnull; 1689 1690 PetscFunctionBegin; 1691 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1692 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1693 if (isnull) PetscFunctionReturn(0); 1694 1695 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1696 xr += w; yr += h; xl = -w; yl = -h; 1697 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1698 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1699 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1700 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1701 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1702 PetscFunctionReturn(0); 1703 } 1704 1705 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1706 { 1707 PetscErrorCode ierr; 1708 PetscBool iascii,isbinary,isdraw; 1709 1710 PetscFunctionBegin; 1711 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1712 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1713 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1714 if (iascii) { 1715 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1716 } else if (isbinary) { 1717 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1718 } else if (isdraw) { 1719 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1720 } else { 1721 Mat B; 1722 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1723 ierr = MatView(B,viewer);CHKERRQ(ierr); 1724 ierr = MatDestroy(&B);CHKERRQ(ierr); 1725 } 1726 PetscFunctionReturn(0); 1727 } 1728 1729 1730 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1731 { 1732 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1733 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1734 PetscInt *ai = a->i,*ailen = a->ilen; 1735 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1736 MatScalar *ap,*aa = a->a; 1737 1738 PetscFunctionBegin; 1739 for (k=0; k<m; k++) { /* loop over rows */ 1740 row = im[k]; brow = row/bs; 1741 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1742 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1743 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1744 nrow = ailen[brow]; 1745 for (l=0; l<n; l++) { /* loop over columns */ 1746 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1747 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1748 col = in[l]; 1749 bcol = col/bs; 1750 cidx = col%bs; 1751 ridx = row%bs; 1752 high = nrow; 1753 low = 0; /* assume unsorted */ 1754 while (high-low > 5) { 1755 t = (low+high)/2; 1756 if (rp[t] > bcol) high = t; 1757 else low = t; 1758 } 1759 for (i=low; i<high; i++) { 1760 if (rp[i] > bcol) break; 1761 if (rp[i] == bcol) { 1762 *v++ = ap[bs2*i+bs*cidx+ridx]; 1763 goto finished; 1764 } 1765 } 1766 *v++ = 0.0; 1767 finished:; 1768 } 1769 } 1770 PetscFunctionReturn(0); 1771 } 1772 1773 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1774 { 1775 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1776 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1777 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1778 PetscErrorCode ierr; 1779 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1780 PetscBool roworiented=a->roworiented; 1781 const PetscScalar *value = v; 1782 MatScalar *ap=NULL,*aa = a->a,*bap; 1783 1784 PetscFunctionBegin; 1785 if (roworiented) { 1786 stepval = (n-1)*bs; 1787 } else { 1788 stepval = (m-1)*bs; 1789 } 1790 for (k=0; k<m; k++) { /* loop over added rows */ 1791 row = im[k]; 1792 if (row < 0) continue; 1793 #if defined(PETSC_USE_DEBUG) 1794 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1795 #endif 1796 rp = aj + ai[row]; 1797 if (!A->structure_only) ap = aa + bs2*ai[row]; 1798 rmax = imax[row]; 1799 nrow = ailen[row]; 1800 low = 0; 1801 high = nrow; 1802 for (l=0; l<n; l++) { /* loop over added columns */ 1803 if (in[l] < 0) continue; 1804 #if defined(PETSC_USE_DEBUG) 1805 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1806 #endif 1807 col = in[l]; 1808 if (!A->structure_only) { 1809 if (roworiented) { 1810 value = v + (k*(stepval+bs) + l)*bs; 1811 } else { 1812 value = v + (l*(stepval+bs) + k)*bs; 1813 } 1814 } 1815 if (col <= lastcol) low = 0; 1816 else high = nrow; 1817 lastcol = col; 1818 while (high-low > 7) { 1819 t = (low+high)/2; 1820 if (rp[t] > col) high = t; 1821 else low = t; 1822 } 1823 for (i=low; i<high; i++) { 1824 if (rp[i] > col) break; 1825 if (rp[i] == col) { 1826 if (A->structure_only) goto noinsert2; 1827 bap = ap + bs2*i; 1828 if (roworiented) { 1829 if (is == ADD_VALUES) { 1830 for (ii=0; ii<bs; ii++,value+=stepval) { 1831 for (jj=ii; jj<bs2; jj+=bs) { 1832 bap[jj] += *value++; 1833 } 1834 } 1835 } else { 1836 for (ii=0; ii<bs; ii++,value+=stepval) { 1837 for (jj=ii; jj<bs2; jj+=bs) { 1838 bap[jj] = *value++; 1839 } 1840 } 1841 } 1842 } else { 1843 if (is == ADD_VALUES) { 1844 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1845 for (jj=0; jj<bs; jj++) { 1846 bap[jj] += value[jj]; 1847 } 1848 bap += bs; 1849 } 1850 } else { 1851 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1852 for (jj=0; jj<bs; jj++) { 1853 bap[jj] = value[jj]; 1854 } 1855 bap += bs; 1856 } 1857 } 1858 } 1859 goto noinsert2; 1860 } 1861 } 1862 if (nonew == 1) goto noinsert2; 1863 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1864 if (A->structure_only) { 1865 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1866 } else { 1867 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1868 } 1869 N = nrow++ - 1; high++; 1870 /* shift up all the later entries in this row */ 1871 for (ii=N; ii>=i; ii--) { 1872 rp[ii+1] = rp[ii]; 1873 if (!A->structure_only) { 1874 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1875 } 1876 } 1877 if (N >= i && !A->structure_only) { 1878 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1879 } 1880 1881 rp[i] = col; 1882 if (!A->structure_only) { 1883 bap = ap + bs2*i; 1884 if (roworiented) { 1885 for (ii=0; ii<bs; ii++,value+=stepval) { 1886 for (jj=ii; jj<bs2; jj+=bs) { 1887 bap[jj] = *value++; 1888 } 1889 } 1890 } else { 1891 for (ii=0; ii<bs; ii++,value+=stepval) { 1892 for (jj=0; jj<bs; jj++) { 1893 *bap++ = *value++; 1894 } 1895 } 1896 } 1897 } 1898 noinsert2:; 1899 low = i; 1900 } 1901 ailen[row] = nrow; 1902 } 1903 PetscFunctionReturn(0); 1904 } 1905 1906 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1907 { 1908 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1909 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1910 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1911 PetscErrorCode ierr; 1912 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1913 MatScalar *aa = a->a,*ap; 1914 PetscReal ratio=0.6; 1915 1916 PetscFunctionBegin; 1917 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1918 1919 if (m) rmax = ailen[0]; 1920 for (i=1; i<mbs; i++) { 1921 /* move each row back by the amount of empty slots (fshift) before it*/ 1922 fshift += imax[i-1] - ailen[i-1]; 1923 rmax = PetscMax(rmax,ailen[i]); 1924 if (fshift) { 1925 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1926 N = ailen[i]; 1927 for (j=0; j<N; j++) { 1928 ip[j-fshift] = ip[j]; 1929 if (!A->structure_only) { 1930 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1931 } 1932 } 1933 } 1934 ai[i] = ai[i-1] + ailen[i-1]; 1935 } 1936 if (mbs) { 1937 fshift += imax[mbs-1] - ailen[mbs-1]; 1938 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1939 } 1940 1941 /* reset ilen and imax for each row */ 1942 a->nonzerorowcnt = 0; 1943 if (A->structure_only) { 1944 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1945 } else { /* !A->structure_only */ 1946 for (i=0; i<mbs; i++) { 1947 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1948 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1949 } 1950 } 1951 a->nz = ai[mbs]; 1952 1953 /* diagonals may have moved, so kill the diagonal pointers */ 1954 a->idiagvalid = PETSC_FALSE; 1955 if (fshift && a->diag) { 1956 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1957 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1958 a->diag = 0; 1959 } 1960 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1961 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1962 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1963 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1964 1965 A->info.mallocs += a->reallocs; 1966 a->reallocs = 0; 1967 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1968 a->rmax = rmax; 1969 1970 if (!A->structure_only) { 1971 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1972 } 1973 PetscFunctionReturn(0); 1974 } 1975 1976 /* 1977 This function returns an array of flags which indicate the locations of contiguous 1978 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1979 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1980 Assume: sizes should be long enough to hold all the values. 1981 */ 1982 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1983 { 1984 PetscInt i,j,k,row; 1985 PetscBool flg; 1986 1987 PetscFunctionBegin; 1988 for (i=0,j=0; i<n; j++) { 1989 row = idx[i]; 1990 if (row%bs!=0) { /* Not the begining of a block */ 1991 sizes[j] = 1; 1992 i++; 1993 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1994 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1995 i++; 1996 } else { /* Begining of the block, so check if the complete block exists */ 1997 flg = PETSC_TRUE; 1998 for (k=1; k<bs; k++) { 1999 if (row+k != idx[i+k]) { /* break in the block */ 2000 flg = PETSC_FALSE; 2001 break; 2002 } 2003 } 2004 if (flg) { /* No break in the bs */ 2005 sizes[j] = bs; 2006 i += bs; 2007 } else { 2008 sizes[j] = 1; 2009 i++; 2010 } 2011 } 2012 } 2013 *bs_max = j; 2014 PetscFunctionReturn(0); 2015 } 2016 2017 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2018 { 2019 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2020 PetscErrorCode ierr; 2021 PetscInt i,j,k,count,*rows; 2022 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2023 PetscScalar zero = 0.0; 2024 MatScalar *aa; 2025 const PetscScalar *xx; 2026 PetscScalar *bb; 2027 2028 PetscFunctionBegin; 2029 /* fix right hand side if needed */ 2030 if (x && b) { 2031 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2032 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2033 for (i=0; i<is_n; i++) { 2034 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2035 } 2036 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2037 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2038 } 2039 2040 /* Make a copy of the IS and sort it */ 2041 /* allocate memory for rows,sizes */ 2042 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2043 2044 /* copy IS values to rows, and sort them */ 2045 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2046 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2047 2048 if (baij->keepnonzeropattern) { 2049 for (i=0; i<is_n; i++) sizes[i] = 1; 2050 bs_max = is_n; 2051 } else { 2052 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2053 A->nonzerostate++; 2054 } 2055 2056 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2057 row = rows[j]; 2058 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2059 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2060 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2061 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2062 if (diag != (PetscScalar)0.0) { 2063 if (baij->ilen[row/bs] > 0) { 2064 baij->ilen[row/bs] = 1; 2065 baij->j[baij->i[row/bs]] = row/bs; 2066 2067 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2068 } 2069 /* Now insert all the diagonal values for this bs */ 2070 for (k=0; k<bs; k++) { 2071 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2072 } 2073 } else { /* (diag == 0.0) */ 2074 baij->ilen[row/bs] = 0; 2075 } /* end (diag == 0.0) */ 2076 } else { /* (sizes[i] != bs) */ 2077 #if defined(PETSC_USE_DEBUG) 2078 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2079 #endif 2080 for (k=0; k<count; k++) { 2081 aa[0] = zero; 2082 aa += bs; 2083 } 2084 if (diag != (PetscScalar)0.0) { 2085 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2086 } 2087 } 2088 } 2089 2090 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2091 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2092 PetscFunctionReturn(0); 2093 } 2094 2095 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2096 { 2097 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2098 PetscErrorCode ierr; 2099 PetscInt i,j,k,count; 2100 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2101 PetscScalar zero = 0.0; 2102 MatScalar *aa; 2103 const PetscScalar *xx; 2104 PetscScalar *bb; 2105 PetscBool *zeroed,vecs = PETSC_FALSE; 2106 2107 PetscFunctionBegin; 2108 /* fix right hand side if needed */ 2109 if (x && b) { 2110 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2111 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2112 vecs = PETSC_TRUE; 2113 } 2114 2115 /* zero the columns */ 2116 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2117 for (i=0; i<is_n; i++) { 2118 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2119 zeroed[is_idx[i]] = PETSC_TRUE; 2120 } 2121 for (i=0; i<A->rmap->N; i++) { 2122 if (!zeroed[i]) { 2123 row = i/bs; 2124 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2125 for (k=0; k<bs; k++) { 2126 col = bs*baij->j[j] + k; 2127 if (zeroed[col]) { 2128 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2129 if (vecs) bb[i] -= aa[0]*xx[col]; 2130 aa[0] = 0.0; 2131 } 2132 } 2133 } 2134 } else if (vecs) bb[i] = diag*xx[i]; 2135 } 2136 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2137 if (vecs) { 2138 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2139 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2140 } 2141 2142 /* zero the rows */ 2143 for (i=0; i<is_n; i++) { 2144 row = is_idx[i]; 2145 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2146 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2147 for (k=0; k<count; k++) { 2148 aa[0] = zero; 2149 aa += bs; 2150 } 2151 if (diag != (PetscScalar)0.0) { 2152 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2153 } 2154 } 2155 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2156 PetscFunctionReturn(0); 2157 } 2158 2159 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2160 { 2161 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2162 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2163 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2164 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2165 PetscErrorCode ierr; 2166 PetscInt ridx,cidx,bs2=a->bs2; 2167 PetscBool roworiented=a->roworiented; 2168 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2169 2170 PetscFunctionBegin; 2171 for (k=0; k<m; k++) { /* loop over added rows */ 2172 row = im[k]; 2173 brow = row/bs; 2174 if (row < 0) continue; 2175 #if defined(PETSC_USE_DEBUG) 2176 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2177 #endif 2178 rp = aj + ai[brow]; 2179 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2180 rmax = imax[brow]; 2181 nrow = ailen[brow]; 2182 low = 0; 2183 high = nrow; 2184 for (l=0; l<n; l++) { /* loop over added columns */ 2185 if (in[l] < 0) continue; 2186 #if defined(PETSC_USE_DEBUG) 2187 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2188 #endif 2189 col = in[l]; bcol = col/bs; 2190 ridx = row % bs; cidx = col % bs; 2191 if (!A->structure_only) { 2192 if (roworiented) { 2193 value = v[l + k*n]; 2194 } else { 2195 value = v[k + l*m]; 2196 } 2197 } 2198 if (col <= lastcol) low = 0; else high = nrow; 2199 lastcol = col; 2200 while (high-low > 7) { 2201 t = (low+high)/2; 2202 if (rp[t] > bcol) high = t; 2203 else low = t; 2204 } 2205 for (i=low; i<high; i++) { 2206 if (rp[i] > bcol) break; 2207 if (rp[i] == bcol) { 2208 bap = ap + bs2*i + bs*cidx + ridx; 2209 if (!A->structure_only) { 2210 if (is == ADD_VALUES) *bap += value; 2211 else *bap = value; 2212 } 2213 goto noinsert1; 2214 } 2215 } 2216 if (nonew == 1) goto noinsert1; 2217 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2218 if (A->structure_only) { 2219 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2220 } else { 2221 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2222 } 2223 N = nrow++ - 1; high++; 2224 /* shift up all the later entries in this row */ 2225 for (ii=N; ii>=i; ii--) { 2226 rp[ii+1] = rp[ii]; 2227 if (!A->structure_only) { 2228 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2229 } 2230 } 2231 if (N>=i && !A->structure_only) { 2232 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2233 } 2234 rp[i] = bcol; 2235 if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value; 2236 a->nz++; 2237 A->nonzerostate++; 2238 noinsert1:; 2239 low = i; 2240 } 2241 ailen[brow] = nrow; 2242 } 2243 PetscFunctionReturn(0); 2244 } 2245 2246 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2247 { 2248 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2249 Mat outA; 2250 PetscErrorCode ierr; 2251 PetscBool row_identity,col_identity; 2252 2253 PetscFunctionBegin; 2254 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2255 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2256 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2257 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2258 2259 outA = inA; 2260 inA->factortype = MAT_FACTOR_LU; 2261 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2262 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2263 2264 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2265 2266 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2267 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2268 a->row = row; 2269 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2270 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2271 a->col = col; 2272 2273 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2274 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2275 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2276 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2277 2278 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2279 if (!a->solve_work) { 2280 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2281 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2282 } 2283 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2284 PetscFunctionReturn(0); 2285 } 2286 2287 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2288 { 2289 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2290 PetscInt i,nz,mbs; 2291 2292 PetscFunctionBegin; 2293 nz = baij->maxnz; 2294 mbs = baij->mbs; 2295 for (i=0; i<nz; i++) { 2296 baij->j[i] = indices[i]; 2297 } 2298 baij->nz = nz; 2299 for (i=0; i<mbs; i++) { 2300 baij->ilen[i] = baij->imax[i]; 2301 } 2302 PetscFunctionReturn(0); 2303 } 2304 2305 /*@ 2306 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2307 in the matrix. 2308 2309 Input Parameters: 2310 + mat - the SeqBAIJ matrix 2311 - indices - the column indices 2312 2313 Level: advanced 2314 2315 Notes: 2316 This can be called if you have precomputed the nonzero structure of the 2317 matrix and want to provide it to the matrix object to improve the performance 2318 of the MatSetValues() operation. 2319 2320 You MUST have set the correct numbers of nonzeros per row in the call to 2321 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2322 2323 MUST be called before any calls to MatSetValues(); 2324 2325 @*/ 2326 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2327 { 2328 PetscErrorCode ierr; 2329 2330 PetscFunctionBegin; 2331 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2332 PetscValidPointer(indices,2); 2333 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2334 PetscFunctionReturn(0); 2335 } 2336 2337 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2338 { 2339 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2340 PetscErrorCode ierr; 2341 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2342 PetscReal atmp; 2343 PetscScalar *x,zero = 0.0; 2344 MatScalar *aa; 2345 PetscInt ncols,brow,krow,kcol; 2346 2347 PetscFunctionBegin; 2348 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2349 bs = A->rmap->bs; 2350 aa = a->a; 2351 ai = a->i; 2352 aj = a->j; 2353 mbs = a->mbs; 2354 2355 ierr = VecSet(v,zero);CHKERRQ(ierr); 2356 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2357 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2358 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2359 for (i=0; i<mbs; i++) { 2360 ncols = ai[1] - ai[0]; ai++; 2361 brow = bs*i; 2362 for (j=0; j<ncols; j++) { 2363 for (kcol=0; kcol<bs; kcol++) { 2364 for (krow=0; krow<bs; krow++) { 2365 atmp = PetscAbsScalar(*aa);aa++; 2366 row = brow + krow; /* row index */ 2367 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2368 } 2369 } 2370 aj++; 2371 } 2372 } 2373 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2374 PetscFunctionReturn(0); 2375 } 2376 2377 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2378 { 2379 PetscErrorCode ierr; 2380 2381 PetscFunctionBegin; 2382 /* If the two matrices have the same copy implementation, use fast copy. */ 2383 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2384 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2385 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2386 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2387 2388 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2389 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2390 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2391 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2392 } else { 2393 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2394 } 2395 PetscFunctionReturn(0); 2396 } 2397 2398 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2399 { 2400 PetscErrorCode ierr; 2401 2402 PetscFunctionBegin; 2403 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2404 PetscFunctionReturn(0); 2405 } 2406 2407 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2408 { 2409 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2410 2411 PetscFunctionBegin; 2412 *array = a->a; 2413 PetscFunctionReturn(0); 2414 } 2415 2416 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2417 { 2418 PetscFunctionBegin; 2419 PetscFunctionReturn(0); 2420 } 2421 2422 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2423 { 2424 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2425 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2426 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2427 PetscErrorCode ierr; 2428 2429 PetscFunctionBegin; 2430 /* Set the number of nonzeros in the new matrix */ 2431 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2432 PetscFunctionReturn(0); 2433 } 2434 2435 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2436 { 2437 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2438 PetscErrorCode ierr; 2439 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2440 PetscBLASInt one=1; 2441 2442 PetscFunctionBegin; 2443 if (str == SAME_NONZERO_PATTERN) { 2444 PetscScalar alpha = a; 2445 PetscBLASInt bnz; 2446 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2447 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2448 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2449 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2450 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2451 } else { 2452 Mat B; 2453 PetscInt *nnz; 2454 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2455 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2456 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2457 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2458 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2459 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2460 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2461 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2462 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2463 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2464 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2465 ierr = PetscFree(nnz);CHKERRQ(ierr); 2466 } 2467 PetscFunctionReturn(0); 2468 } 2469 2470 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2471 { 2472 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2473 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2474 MatScalar *aa = a->a; 2475 2476 PetscFunctionBegin; 2477 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2482 { 2483 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2484 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2485 MatScalar *aa = a->a; 2486 2487 PetscFunctionBegin; 2488 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 /* 2493 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2494 */ 2495 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2496 { 2497 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2498 PetscErrorCode ierr; 2499 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2500 PetscInt nz = a->i[m],row,*jj,mr,col; 2501 2502 PetscFunctionBegin; 2503 *nn = n; 2504 if (!ia) PetscFunctionReturn(0); 2505 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2506 else { 2507 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2508 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2509 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2510 jj = a->j; 2511 for (i=0; i<nz; i++) { 2512 collengths[jj[i]]++; 2513 } 2514 cia[0] = oshift; 2515 for (i=0; i<n; i++) { 2516 cia[i+1] = cia[i] + collengths[i]; 2517 } 2518 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2519 jj = a->j; 2520 for (row=0; row<m; row++) { 2521 mr = a->i[row+1] - a->i[row]; 2522 for (i=0; i<mr; i++) { 2523 col = *jj++; 2524 2525 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2526 } 2527 } 2528 ierr = PetscFree(collengths);CHKERRQ(ierr); 2529 *ia = cia; *ja = cja; 2530 } 2531 PetscFunctionReturn(0); 2532 } 2533 2534 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2535 { 2536 PetscErrorCode ierr; 2537 2538 PetscFunctionBegin; 2539 if (!ia) PetscFunctionReturn(0); 2540 ierr = PetscFree(*ia);CHKERRQ(ierr); 2541 ierr = PetscFree(*ja);CHKERRQ(ierr); 2542 PetscFunctionReturn(0); 2543 } 2544 2545 /* 2546 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2547 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2548 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2549 */ 2550 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2551 { 2552 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2553 PetscErrorCode ierr; 2554 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2555 PetscInt nz = a->i[m],row,*jj,mr,col; 2556 PetscInt *cspidx; 2557 2558 PetscFunctionBegin; 2559 *nn = n; 2560 if (!ia) PetscFunctionReturn(0); 2561 2562 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2563 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2564 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2565 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2566 jj = a->j; 2567 for (i=0; i<nz; i++) { 2568 collengths[jj[i]]++; 2569 } 2570 cia[0] = oshift; 2571 for (i=0; i<n; i++) { 2572 cia[i+1] = cia[i] + collengths[i]; 2573 } 2574 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2575 jj = a->j; 2576 for (row=0; row<m; row++) { 2577 mr = a->i[row+1] - a->i[row]; 2578 for (i=0; i<mr; i++) { 2579 col = *jj++; 2580 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2581 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2582 } 2583 } 2584 ierr = PetscFree(collengths);CHKERRQ(ierr); 2585 *ia = cia; *ja = cja; 2586 *spidx = cspidx; 2587 PetscFunctionReturn(0); 2588 } 2589 2590 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2591 { 2592 PetscErrorCode ierr; 2593 2594 PetscFunctionBegin; 2595 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2596 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2597 PetscFunctionReturn(0); 2598 } 2599 2600 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2601 { 2602 PetscErrorCode ierr; 2603 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2604 2605 PetscFunctionBegin; 2606 if (!Y->preallocated || !aij->nz) { 2607 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2608 } 2609 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2610 PetscFunctionReturn(0); 2611 } 2612 2613 /* -------------------------------------------------------------------*/ 2614 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2615 MatGetRow_SeqBAIJ, 2616 MatRestoreRow_SeqBAIJ, 2617 MatMult_SeqBAIJ_N, 2618 /* 4*/ MatMultAdd_SeqBAIJ_N, 2619 MatMultTranspose_SeqBAIJ, 2620 MatMultTransposeAdd_SeqBAIJ, 2621 0, 2622 0, 2623 0, 2624 /* 10*/ 0, 2625 MatLUFactor_SeqBAIJ, 2626 0, 2627 0, 2628 MatTranspose_SeqBAIJ, 2629 /* 15*/ MatGetInfo_SeqBAIJ, 2630 MatEqual_SeqBAIJ, 2631 MatGetDiagonal_SeqBAIJ, 2632 MatDiagonalScale_SeqBAIJ, 2633 MatNorm_SeqBAIJ, 2634 /* 20*/ 0, 2635 MatAssemblyEnd_SeqBAIJ, 2636 MatSetOption_SeqBAIJ, 2637 MatZeroEntries_SeqBAIJ, 2638 /* 24*/ MatZeroRows_SeqBAIJ, 2639 0, 2640 0, 2641 0, 2642 0, 2643 /* 29*/ MatSetUp_SeqBAIJ, 2644 0, 2645 0, 2646 0, 2647 0, 2648 /* 34*/ MatDuplicate_SeqBAIJ, 2649 0, 2650 0, 2651 MatILUFactor_SeqBAIJ, 2652 0, 2653 /* 39*/ MatAXPY_SeqBAIJ, 2654 MatCreateSubMatrices_SeqBAIJ, 2655 MatIncreaseOverlap_SeqBAIJ, 2656 MatGetValues_SeqBAIJ, 2657 MatCopy_SeqBAIJ, 2658 /* 44*/ 0, 2659 MatScale_SeqBAIJ, 2660 MatShift_SeqBAIJ, 2661 0, 2662 MatZeroRowsColumns_SeqBAIJ, 2663 /* 49*/ 0, 2664 MatGetRowIJ_SeqBAIJ, 2665 MatRestoreRowIJ_SeqBAIJ, 2666 MatGetColumnIJ_SeqBAIJ, 2667 MatRestoreColumnIJ_SeqBAIJ, 2668 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2669 0, 2670 0, 2671 0, 2672 MatSetValuesBlocked_SeqBAIJ, 2673 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2674 MatDestroy_SeqBAIJ, 2675 MatView_SeqBAIJ, 2676 0, 2677 0, 2678 /* 64*/ 0, 2679 0, 2680 0, 2681 0, 2682 0, 2683 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2684 0, 2685 MatConvert_Basic, 2686 0, 2687 0, 2688 /* 74*/ 0, 2689 MatFDColoringApply_BAIJ, 2690 0, 2691 0, 2692 0, 2693 /* 79*/ 0, 2694 0, 2695 0, 2696 0, 2697 MatLoad_SeqBAIJ, 2698 /* 84*/ 0, 2699 0, 2700 0, 2701 0, 2702 0, 2703 /* 89*/ 0, 2704 0, 2705 0, 2706 0, 2707 0, 2708 /* 94*/ 0, 2709 0, 2710 0, 2711 0, 2712 0, 2713 /* 99*/ 0, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /*104*/ 0, 2719 MatRealPart_SeqBAIJ, 2720 MatImaginaryPart_SeqBAIJ, 2721 0, 2722 0, 2723 /*109*/ 0, 2724 0, 2725 0, 2726 0, 2727 MatMissingDiagonal_SeqBAIJ, 2728 /*114*/ 0, 2729 0, 2730 0, 2731 0, 2732 0, 2733 /*119*/ 0, 2734 0, 2735 MatMultHermitianTranspose_SeqBAIJ, 2736 MatMultHermitianTransposeAdd_SeqBAIJ, 2737 0, 2738 /*124*/ 0, 2739 0, 2740 MatInvertBlockDiagonal_SeqBAIJ, 2741 0, 2742 0, 2743 /*129*/ 0, 2744 0, 2745 0, 2746 0, 2747 0, 2748 /*134*/ 0, 2749 0, 2750 0, 2751 0, 2752 0, 2753 /*139*/ MatSetBlockSizes_Default, 2754 0, 2755 0, 2756 MatFDColoringSetUp_SeqXAIJ, 2757 0, 2758 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2759 MatDestroySubMatrices_SeqBAIJ 2760 }; 2761 2762 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2763 { 2764 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2765 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2766 PetscErrorCode ierr; 2767 2768 PetscFunctionBegin; 2769 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2770 2771 /* allocate space for values if not already there */ 2772 if (!aij->saved_values) { 2773 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2774 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2775 } 2776 2777 /* copy values over */ 2778 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2779 PetscFunctionReturn(0); 2780 } 2781 2782 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2783 { 2784 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2785 PetscErrorCode ierr; 2786 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2787 2788 PetscFunctionBegin; 2789 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2790 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2791 2792 /* copy values over */ 2793 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2794 PetscFunctionReturn(0); 2795 } 2796 2797 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2798 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2799 2800 static PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2801 { 2802 Mat_SeqBAIJ *b; 2803 PetscErrorCode ierr; 2804 PetscInt i,mbs,nbs,bs2; 2805 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2806 2807 PetscFunctionBegin; 2808 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2809 if (nz == MAT_SKIP_ALLOCATION) { 2810 skipallocation = PETSC_TRUE; 2811 nz = 0; 2812 } 2813 2814 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2815 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2816 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2817 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2818 2819 B->preallocated = PETSC_TRUE; 2820 2821 mbs = B->rmap->n/bs; 2822 nbs = B->cmap->n/bs; 2823 bs2 = bs*bs; 2824 2825 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2826 2827 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2828 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2829 if (nnz) { 2830 for (i=0; i<mbs; i++) { 2831 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2832 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2833 } 2834 } 2835 2836 b = (Mat_SeqBAIJ*)B->data; 2837 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2838 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2839 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2840 2841 if (!flg) { 2842 switch (bs) { 2843 case 1: 2844 B->ops->mult = MatMult_SeqBAIJ_1; 2845 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2846 break; 2847 case 2: 2848 B->ops->mult = MatMult_SeqBAIJ_2; 2849 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2850 break; 2851 case 3: 2852 B->ops->mult = MatMult_SeqBAIJ_3; 2853 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2854 break; 2855 case 4: 2856 B->ops->mult = MatMult_SeqBAIJ_4; 2857 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2858 break; 2859 case 5: 2860 B->ops->mult = MatMult_SeqBAIJ_5; 2861 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2862 break; 2863 case 6: 2864 B->ops->mult = MatMult_SeqBAIJ_6; 2865 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2866 break; 2867 case 7: 2868 B->ops->mult = MatMult_SeqBAIJ_7; 2869 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2870 break; 2871 case 11: 2872 B->ops->mult = MatMult_SeqBAIJ_11; 2873 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2874 break; 2875 case 15: 2876 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2877 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2878 break; 2879 default: 2880 B->ops->mult = MatMult_SeqBAIJ_N; 2881 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2882 break; 2883 } 2884 } 2885 B->ops->sor = MatSOR_SeqBAIJ; 2886 b->mbs = mbs; 2887 b->nbs = nbs; 2888 if (!skipallocation) { 2889 if (!b->imax) { 2890 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2891 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2892 2893 b->free_imax_ilen = PETSC_TRUE; 2894 } 2895 /* b->ilen will count nonzeros in each block row so far. */ 2896 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2897 if (!nnz) { 2898 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2899 else if (nz < 0) nz = 1; 2900 for (i=0; i<mbs; i++) b->imax[i] = nz; 2901 nz = nz*mbs; 2902 } else { 2903 nz = 0; 2904 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2905 } 2906 2907 /* allocate the matrix space */ 2908 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2909 if (B->structure_only) { 2910 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2911 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2912 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2913 } else { 2914 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2915 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2916 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2917 } 2918 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2919 2920 if (B->structure_only) { 2921 b->singlemalloc = PETSC_FALSE; 2922 b->free_a = PETSC_FALSE; 2923 } else { 2924 b->singlemalloc = PETSC_TRUE; 2925 b->free_a = PETSC_TRUE; 2926 } 2927 b->free_ij = PETSC_TRUE; 2928 2929 b->i[0] = 0; 2930 for (i=1; i<mbs+1; i++) { 2931 b->i[i] = b->i[i-1] + b->imax[i-1]; 2932 } 2933 2934 } else { 2935 b->free_a = PETSC_FALSE; 2936 b->free_ij = PETSC_FALSE; 2937 } 2938 2939 b->bs2 = bs2; 2940 b->mbs = mbs; 2941 b->nz = 0; 2942 b->maxnz = nz; 2943 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2944 B->was_assembled = PETSC_FALSE; 2945 B->assembled = PETSC_FALSE; 2946 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2947 PetscFunctionReturn(0); 2948 } 2949 2950 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2951 { 2952 PetscInt i,m,nz,nz_max=0,*nnz; 2953 PetscScalar *values=0; 2954 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2955 PetscErrorCode ierr; 2956 2957 PetscFunctionBegin; 2958 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2959 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2960 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2961 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2962 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2963 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2964 m = B->rmap->n/bs; 2965 2966 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2967 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2968 for (i=0; i<m; i++) { 2969 nz = ii[i+1]- ii[i]; 2970 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2971 nz_max = PetscMax(nz_max, nz); 2972 nnz[i] = nz; 2973 } 2974 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2975 ierr = PetscFree(nnz);CHKERRQ(ierr); 2976 2977 values = (PetscScalar*)V; 2978 if (!values) { 2979 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2980 } 2981 for (i=0; i<m; i++) { 2982 PetscInt ncols = ii[i+1] - ii[i]; 2983 const PetscInt *icols = jj + ii[i]; 2984 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2985 if (!roworiented) { 2986 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2987 } else { 2988 PetscInt j; 2989 for (j=0; j<ncols; j++) { 2990 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2991 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2992 } 2993 } 2994 } 2995 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2996 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2997 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2998 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2999 PetscFunctionReturn(0); 3000 } 3001 3002 /*MC 3003 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3004 block sparse compressed row format. 3005 3006 Options Database Keys: 3007 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3008 3009 Level: beginner 3010 3011 .seealso: MatCreateSeqBAIJ() 3012 M*/ 3013 3014 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3015 3016 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3017 { 3018 PetscErrorCode ierr; 3019 PetscMPIInt size; 3020 Mat_SeqBAIJ *b; 3021 3022 PetscFunctionBegin; 3023 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3024 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3025 3026 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3027 B->data = (void*)b; 3028 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3029 3030 b->row = 0; 3031 b->col = 0; 3032 b->icol = 0; 3033 b->reallocs = 0; 3034 b->saved_values = 0; 3035 3036 b->roworiented = PETSC_TRUE; 3037 b->nonew = 0; 3038 b->diag = 0; 3039 B->spptr = 0; 3040 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3041 b->keepnonzeropattern = PETSC_FALSE; 3042 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3052 #if defined(PETSC_HAVE_HYPRE) 3053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3054 #endif 3055 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3056 PetscFunctionReturn(0); 3057 } 3058 3059 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3060 { 3061 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3062 PetscErrorCode ierr; 3063 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3064 3065 PetscFunctionBegin; 3066 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3067 3068 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3069 c->imax = a->imax; 3070 c->ilen = a->ilen; 3071 c->free_imax_ilen = PETSC_FALSE; 3072 } else { 3073 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3074 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3075 for (i=0; i<mbs; i++) { 3076 c->imax[i] = a->imax[i]; 3077 c->ilen[i] = a->ilen[i]; 3078 } 3079 c->free_imax_ilen = PETSC_TRUE; 3080 } 3081 3082 /* allocate the matrix space */ 3083 if (mallocmatspace) { 3084 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3085 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3086 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3087 3088 c->i = a->i; 3089 c->j = a->j; 3090 c->singlemalloc = PETSC_FALSE; 3091 c->free_a = PETSC_TRUE; 3092 c->free_ij = PETSC_FALSE; 3093 c->parent = A; 3094 C->preallocated = PETSC_TRUE; 3095 C->assembled = PETSC_TRUE; 3096 3097 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3098 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3099 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3100 } else { 3101 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3102 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3103 3104 c->singlemalloc = PETSC_TRUE; 3105 c->free_a = PETSC_TRUE; 3106 c->free_ij = PETSC_TRUE; 3107 3108 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3109 if (mbs > 0) { 3110 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3111 if (cpvalues == MAT_COPY_VALUES) { 3112 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3113 } else { 3114 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3115 } 3116 } 3117 C->preallocated = PETSC_TRUE; 3118 C->assembled = PETSC_TRUE; 3119 } 3120 } 3121 3122 c->roworiented = a->roworiented; 3123 c->nonew = a->nonew; 3124 3125 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3126 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3127 3128 c->bs2 = a->bs2; 3129 c->mbs = a->mbs; 3130 c->nbs = a->nbs; 3131 3132 if (a->diag) { 3133 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3134 c->diag = a->diag; 3135 c->free_diag = PETSC_FALSE; 3136 } else { 3137 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3138 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3139 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3140 c->free_diag = PETSC_TRUE; 3141 } 3142 } else c->diag = 0; 3143 3144 c->nz = a->nz; 3145 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3146 c->solve_work = NULL; 3147 c->mult_work = NULL; 3148 c->sor_workt = NULL; 3149 c->sor_work = NULL; 3150 3151 c->compressedrow.use = a->compressedrow.use; 3152 c->compressedrow.nrows = a->compressedrow.nrows; 3153 if (a->compressedrow.use) { 3154 i = a->compressedrow.nrows; 3155 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3156 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3157 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3158 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3159 } else { 3160 c->compressedrow.use = PETSC_FALSE; 3161 c->compressedrow.i = NULL; 3162 c->compressedrow.rindex = NULL; 3163 } 3164 C->nonzerostate = A->nonzerostate; 3165 3166 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3167 PetscFunctionReturn(0); 3168 } 3169 3170 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3171 { 3172 PetscErrorCode ierr; 3173 3174 PetscFunctionBegin; 3175 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3176 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3177 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3178 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3179 PetscFunctionReturn(0); 3180 } 3181 3182 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3183 { 3184 Mat_SeqBAIJ *a; 3185 PetscErrorCode ierr; 3186 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3187 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3188 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3189 PetscInt *masked,nmask,tmp,bs2,ishift; 3190 PetscMPIInt size; 3191 int fd; 3192 PetscScalar *aa; 3193 MPI_Comm comm; 3194 3195 PetscFunctionBegin; 3196 /* force binary viewer to load .info file if it has not yet done so */ 3197 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3198 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3199 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3200 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3201 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3202 if (bs < 0) bs = 1; 3203 bs2 = bs*bs; 3204 3205 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3206 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3207 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3208 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3209 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3210 M = header[1]; N = header[2]; nz = header[3]; 3211 3212 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3213 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3214 3215 /* 3216 This code adds extra rows to make sure the number of rows is 3217 divisible by the blocksize 3218 */ 3219 mbs = M/bs; 3220 extra_rows = bs - M + bs*(mbs); 3221 if (extra_rows == bs) extra_rows = 0; 3222 else mbs++; 3223 if (extra_rows) { 3224 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3225 } 3226 3227 /* Set global sizes if not already set */ 3228 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3229 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3230 } else { /* Check if the matrix global sizes are correct */ 3231 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3232 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3233 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3234 } 3235 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3236 } 3237 3238 /* read in row lengths */ 3239 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3240 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3241 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3242 3243 /* read in column indices */ 3244 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3245 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3246 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3247 3248 /* loop over row lengths determining block row lengths */ 3249 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3250 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3251 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3252 rowcount = 0; 3253 nzcount = 0; 3254 for (i=0; i<mbs; i++) { 3255 nmask = 0; 3256 for (j=0; j<bs; j++) { 3257 kmax = rowlengths[rowcount]; 3258 for (k=0; k<kmax; k++) { 3259 tmp = jj[nzcount++]/bs; 3260 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3261 } 3262 rowcount++; 3263 } 3264 browlengths[i] += nmask; 3265 /* zero out the mask elements we set */ 3266 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3267 } 3268 3269 /* Do preallocation */ 3270 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3271 a = (Mat_SeqBAIJ*)newmat->data; 3272 3273 /* set matrix "i" values */ 3274 a->i[0] = 0; 3275 for (i=1; i<= mbs; i++) { 3276 a->i[i] = a->i[i-1] + browlengths[i-1]; 3277 a->ilen[i-1] = browlengths[i-1]; 3278 } 3279 a->nz = 0; 3280 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3281 3282 /* read in nonzero values */ 3283 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3284 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3285 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3286 3287 /* set "a" and "j" values into matrix */ 3288 nzcount = 0; jcount = 0; 3289 for (i=0; i<mbs; i++) { 3290 nzcountb = nzcount; 3291 nmask = 0; 3292 for (j=0; j<bs; j++) { 3293 kmax = rowlengths[i*bs+j]; 3294 for (k=0; k<kmax; k++) { 3295 tmp = jj[nzcount++]/bs; 3296 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3297 } 3298 } 3299 /* sort the masked values */ 3300 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3301 3302 /* set "j" values into matrix */ 3303 maskcount = 1; 3304 for (j=0; j<nmask; j++) { 3305 a->j[jcount++] = masked[j]; 3306 mask[masked[j]] = maskcount++; 3307 } 3308 /* set "a" values into matrix */ 3309 ishift = bs2*a->i[i]; 3310 for (j=0; j<bs; j++) { 3311 kmax = rowlengths[i*bs+j]; 3312 for (k=0; k<kmax; k++) { 3313 tmp = jj[nzcountb]/bs; 3314 block = mask[tmp] - 1; 3315 point = jj[nzcountb] - bs*tmp; 3316 idx = ishift + bs2*block + j + bs*point; 3317 a->a[idx] = (MatScalar)aa[nzcountb++]; 3318 } 3319 } 3320 /* zero out the mask elements we set */ 3321 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3322 } 3323 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3324 3325 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3326 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3327 ierr = PetscFree(aa);CHKERRQ(ierr); 3328 ierr = PetscFree(jj);CHKERRQ(ierr); 3329 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3330 3331 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3332 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3333 PetscFunctionReturn(0); 3334 } 3335 3336 /*@C 3337 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3338 compressed row) format. For good matrix assembly performance the 3339 user should preallocate the matrix storage by setting the parameter nz 3340 (or the array nnz). By setting these parameters accurately, performance 3341 during matrix assembly can be increased by more than a factor of 50. 3342 3343 Collective on MPI_Comm 3344 3345 Input Parameters: 3346 + comm - MPI communicator, set to PETSC_COMM_SELF 3347 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3348 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3349 . m - number of rows 3350 . n - number of columns 3351 . nz - number of nonzero blocks per block row (same for all rows) 3352 - nnz - array containing the number of nonzero blocks in the various block rows 3353 (possibly different for each block row) or NULL 3354 3355 Output Parameter: 3356 . A - the matrix 3357 3358 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3359 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3360 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3361 3362 Options Database Keys: 3363 . -mat_no_unroll - uses code that does not unroll the loops in the 3364 block calculations (much slower) 3365 . -mat_block_size - size of the blocks to use 3366 3367 Level: intermediate 3368 3369 Notes: 3370 The number of rows and columns must be divisible by blocksize. 3371 3372 If the nnz parameter is given then the nz parameter is ignored 3373 3374 A nonzero block is any block that as 1 or more nonzeros in it 3375 3376 The block AIJ format is fully compatible with standard Fortran 77 3377 storage. That is, the stored row and column indices can begin at 3378 either one (as in Fortran) or zero. See the users' manual for details. 3379 3380 Specify the preallocated storage with either nz or nnz (not both). 3381 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3382 allocation. See Users-Manual: ch_mat for details. 3383 matrices. 3384 3385 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3386 @*/ 3387 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3388 { 3389 PetscErrorCode ierr; 3390 3391 PetscFunctionBegin; 3392 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3393 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3394 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3395 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3396 PetscFunctionReturn(0); 3397 } 3398 3399 /*@C 3400 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3401 per row in the matrix. For good matrix assembly performance the 3402 user should preallocate the matrix storage by setting the parameter nz 3403 (or the array nnz). By setting these parameters accurately, performance 3404 during matrix assembly can be increased by more than a factor of 50. 3405 3406 Collective on MPI_Comm 3407 3408 Input Parameters: 3409 + B - the matrix 3410 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3411 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3412 . nz - number of block nonzeros per block row (same for all rows) 3413 - nnz - array containing the number of block nonzeros in the various block rows 3414 (possibly different for each block row) or NULL 3415 3416 Options Database Keys: 3417 . -mat_no_unroll - uses code that does not unroll the loops in the 3418 block calculations (much slower) 3419 . -mat_block_size - size of the blocks to use 3420 3421 Level: intermediate 3422 3423 Notes: 3424 If the nnz parameter is given then the nz parameter is ignored 3425 3426 You can call MatGetInfo() to get information on how effective the preallocation was; 3427 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3428 You can also run with the option -info and look for messages with the string 3429 malloc in them to see if additional memory allocation was needed. 3430 3431 The block AIJ format is fully compatible with standard Fortran 77 3432 storage. That is, the stored row and column indices can begin at 3433 either one (as in Fortran) or zero. See the users' manual for details. 3434 3435 Specify the preallocated storage with either nz or nnz (not both). 3436 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3437 allocation. See Users-Manual: ch_mat for details. 3438 3439 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3440 @*/ 3441 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3442 { 3443 PetscErrorCode ierr; 3444 3445 PetscFunctionBegin; 3446 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3447 PetscValidType(B,1); 3448 PetscValidLogicalCollectiveInt(B,bs,2); 3449 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3450 PetscFunctionReturn(0); 3451 } 3452 3453 /*@C 3454 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3455 (the default sequential PETSc format). 3456 3457 Collective on MPI_Comm 3458 3459 Input Parameters: 3460 + B - the matrix 3461 . i - the indices into j for the start of each local row (starts with zero) 3462 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3463 - v - optional values in the matrix 3464 3465 Level: developer 3466 3467 Notes: 3468 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3469 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3470 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3471 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3472 block column and the second index is over columns within a block. 3473 3474 .keywords: matrix, aij, compressed row, sparse 3475 3476 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3477 @*/ 3478 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3479 { 3480 PetscErrorCode ierr; 3481 3482 PetscFunctionBegin; 3483 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3484 PetscValidType(B,1); 3485 PetscValidLogicalCollectiveInt(B,bs,2); 3486 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3487 PetscFunctionReturn(0); 3488 } 3489 3490 3491 /*@ 3492 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3493 3494 Collective on MPI_Comm 3495 3496 Input Parameters: 3497 + comm - must be an MPI communicator of size 1 3498 . bs - size of block 3499 . m - number of rows 3500 . n - number of columns 3501 . i - row indices 3502 . j - column indices 3503 - a - matrix values 3504 3505 Output Parameter: 3506 . mat - the matrix 3507 3508 Level: advanced 3509 3510 Notes: 3511 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3512 once the matrix is destroyed 3513 3514 You cannot set new nonzero locations into this matrix, that will generate an error. 3515 3516 The i and j indices are 0 based 3517 3518 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3519 3520 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3521 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3522 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3523 with column-major ordering within blocks. 3524 3525 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3526 3527 @*/ 3528 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3529 { 3530 PetscErrorCode ierr; 3531 PetscInt ii; 3532 Mat_SeqBAIJ *baij; 3533 3534 PetscFunctionBegin; 3535 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3536 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3537 3538 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3539 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3540 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3541 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3542 baij = (Mat_SeqBAIJ*)(*mat)->data; 3543 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3544 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3545 3546 baij->i = i; 3547 baij->j = j; 3548 baij->a = a; 3549 3550 baij->singlemalloc = PETSC_FALSE; 3551 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3552 baij->free_a = PETSC_FALSE; 3553 baij->free_ij = PETSC_FALSE; 3554 3555 for (ii=0; ii<m; ii++) { 3556 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3557 #if defined(PETSC_USE_DEBUG) 3558 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3559 #endif 3560 } 3561 #if defined(PETSC_USE_DEBUG) 3562 for (ii=0; ii<baij->i[m]; ii++) { 3563 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3564 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3565 } 3566 #endif 3567 3568 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3569 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3570 PetscFunctionReturn(0); 3571 } 3572 3573 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3574 { 3575 PetscErrorCode ierr; 3576 PetscMPIInt size; 3577 3578 PetscFunctionBegin; 3579 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3580 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3581 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3582 } else { 3583 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3584 } 3585 PetscFunctionReturn(0); 3586 } 3587