1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the MAIJ matrix storage format. 5 This format is used for restriction and interpolation operations for 6 multicomponent problems. It interpolates each component the same way 7 independently. 8 9 We provide: 10 MatMult() 11 MatMultTranspose() 12 MatMultTransposeAdd() 13 MatMultAdd() 14 and 15 MatCreateMAIJ(Mat,dof,Mat*) 16 17 This single directory handles both the sequential and parallel codes 18 */ 19 20 #include "src/mat/impls/maij/maij.h" 21 #include "src/mat/utils/freespace.h" 22 #include "private/vecimpl.h" 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatMAIJGetAIJ" 26 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 27 { 28 PetscErrorCode ierr; 29 PetscTruth ismpimaij,isseqmaij; 30 31 PetscFunctionBegin; 32 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 33 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 34 if (ismpimaij) { 35 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 36 37 *B = b->A; 38 } else if (isseqmaij) { 39 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 40 41 *B = b->AIJ; 42 } else { 43 *B = A; 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMAIJRedimension" 50 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 51 { 52 PetscErrorCode ierr; 53 Mat Aij; 54 55 PetscFunctionBegin; 56 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 57 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatDestroy_SeqMAIJ" 63 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 64 { 65 PetscErrorCode ierr; 66 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 67 68 PetscFunctionBegin; 69 if (b->AIJ) { 70 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 71 } 72 ierr = PetscFree(b);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatView_SeqMAIJ" 78 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 79 { 80 PetscErrorCode ierr; 81 Mat B; 82 83 PetscFunctionBegin; 84 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 85 ierr = MatView(B,viewer);CHKERRQ(ierr); 86 ierr = MatDestroy(B);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatView_MPIMAIJ" 92 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 93 { 94 PetscErrorCode ierr; 95 Mat B; 96 97 PetscFunctionBegin; 98 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 99 ierr = MatView(B,viewer);CHKERRQ(ierr); 100 ierr = MatDestroy(B);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatDestroy_MPIMAIJ" 106 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 107 { 108 PetscErrorCode ierr; 109 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 110 111 PetscFunctionBegin; 112 if (b->AIJ) { 113 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 114 } 115 if (b->OAIJ) { 116 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 117 } 118 if (b->A) { 119 ierr = MatDestroy(b->A);CHKERRQ(ierr); 120 } 121 if (b->ctx) { 122 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 123 } 124 if (b->w) { 125 ierr = VecDestroy(b->w);CHKERRQ(ierr); 126 } 127 ierr = PetscFree(b);CHKERRQ(ierr); 128 PetscFunctionReturn(0); 129 } 130 131 /*MC 132 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 133 multicomponent problems, interpolating or restricting each component the same way independently. 134 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 135 136 Operations provided: 137 . MatMult 138 . MatMultTranspose 139 . MatMultAdd 140 . MatMultTransposeAdd 141 142 Level: advanced 143 144 .seealso: MatCreateSeqDense 145 M*/ 146 147 EXTERN_C_BEGIN 148 #undef __FUNCT__ 149 #define __FUNCT__ "MatCreate_MAIJ" 150 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 151 { 152 PetscErrorCode ierr; 153 Mat_MPIMAIJ *b; 154 155 PetscFunctionBegin; 156 ierr = PetscNew(Mat_MPIMAIJ,&b);CHKERRQ(ierr); 157 A->data = (void*)b; 158 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 159 A->factor = 0; 160 A->mapping = 0; 161 162 b->AIJ = 0; 163 b->dof = 0; 164 b->OAIJ = 0; 165 b->ctx = 0; 166 b->w = 0; 167 PetscFunctionReturn(0); 168 } 169 EXTERN_C_END 170 171 /* --------------------------------------------------------------------------------------*/ 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatMult_SeqMAIJ_2" 174 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 175 { 176 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 177 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 178 PetscScalar *x,*y,*v,sum1, sum2; 179 PetscErrorCode ierr; 180 PetscInt m = b->AIJ->m,*idx,*ii; 181 PetscInt n,i,jrow,j; 182 183 PetscFunctionBegin; 184 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 185 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 186 idx = a->j; 187 v = a->a; 188 ii = a->i; 189 190 for (i=0; i<m; i++) { 191 jrow = ii[i]; 192 n = ii[i+1] - jrow; 193 sum1 = 0.0; 194 sum2 = 0.0; 195 for (j=0; j<n; j++) { 196 sum1 += v[jrow]*x[2*idx[jrow]]; 197 sum2 += v[jrow]*x[2*idx[jrow]+1]; 198 jrow++; 199 } 200 y[2*i] = sum1; 201 y[2*i+1] = sum2; 202 } 203 204 ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 205 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 206 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 207 PetscFunctionReturn(0); 208 } 209 210 #undef __FUNCT__ 211 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 212 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 213 { 214 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 215 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 216 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 217 PetscErrorCode ierr; 218 PetscInt m = b->AIJ->m,n,i,*idx; 219 220 PetscFunctionBegin; 221 ierr = VecSet(yy,zero);CHKERRQ(ierr); 222 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 223 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 224 225 for (i=0; i<m; i++) { 226 idx = a->j + a->i[i] ; 227 v = a->a + a->i[i] ; 228 n = a->i[i+1] - a->i[i]; 229 alpha1 = x[2*i]; 230 alpha2 = x[2*i+1]; 231 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 232 } 233 ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->n);CHKERRQ(ierr); 234 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 235 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 241 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 242 { 243 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 244 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 245 PetscScalar *x,*y,*v,sum1, sum2; 246 PetscErrorCode ierr; 247 PetscInt m = b->AIJ->m,*idx,*ii; 248 PetscInt n,i,jrow,j; 249 250 PetscFunctionBegin; 251 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 252 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 253 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 254 idx = a->j; 255 v = a->a; 256 ii = a->i; 257 258 for (i=0; i<m; i++) { 259 jrow = ii[i]; 260 n = ii[i+1] - jrow; 261 sum1 = 0.0; 262 sum2 = 0.0; 263 for (j=0; j<n; j++) { 264 sum1 += v[jrow]*x[2*idx[jrow]]; 265 sum2 += v[jrow]*x[2*idx[jrow]+1]; 266 jrow++; 267 } 268 y[2*i] += sum1; 269 y[2*i+1] += sum2; 270 } 271 272 ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 273 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 274 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 279 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 280 { 281 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 282 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 283 PetscScalar *x,*y,*v,alpha1,alpha2; 284 PetscErrorCode ierr; 285 PetscInt m = b->AIJ->m,n,i,*idx; 286 287 PetscFunctionBegin; 288 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 289 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 290 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 291 292 for (i=0; i<m; i++) { 293 idx = a->j + a->i[i] ; 294 v = a->a + a->i[i] ; 295 n = a->i[i+1] - a->i[i]; 296 alpha1 = x[2*i]; 297 alpha2 = x[2*i+1]; 298 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 299 } 300 ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->n);CHKERRQ(ierr); 301 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 302 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 /* --------------------------------------------------------------------------------------*/ 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatMult_SeqMAIJ_3" 308 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 309 { 310 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 311 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 312 PetscScalar *x,*y,*v,sum1, sum2, sum3; 313 PetscErrorCode ierr; 314 PetscInt m = b->AIJ->m,*idx,*ii; 315 PetscInt n,i,jrow,j; 316 317 PetscFunctionBegin; 318 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 319 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 320 idx = a->j; 321 v = a->a; 322 ii = a->i; 323 324 for (i=0; i<m; i++) { 325 jrow = ii[i]; 326 n = ii[i+1] - jrow; 327 sum1 = 0.0; 328 sum2 = 0.0; 329 sum3 = 0.0; 330 for (j=0; j<n; j++) { 331 sum1 += v[jrow]*x[3*idx[jrow]]; 332 sum2 += v[jrow]*x[3*idx[jrow]+1]; 333 sum3 += v[jrow]*x[3*idx[jrow]+2]; 334 jrow++; 335 } 336 y[3*i] = sum1; 337 y[3*i+1] = sum2; 338 y[3*i+2] = sum3; 339 } 340 341 ierr = PetscLogFlops(6*a->nz - 3*m);CHKERRQ(ierr); 342 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 343 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 349 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 350 { 351 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 352 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 353 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 354 PetscErrorCode ierr; 355 PetscInt m = b->AIJ->m,n,i,*idx; 356 357 PetscFunctionBegin; 358 ierr = VecSet(yy,zero);CHKERRQ(ierr); 359 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 360 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 361 362 for (i=0; i<m; i++) { 363 idx = a->j + a->i[i]; 364 v = a->a + a->i[i]; 365 n = a->i[i+1] - a->i[i]; 366 alpha1 = x[3*i]; 367 alpha2 = x[3*i+1]; 368 alpha3 = x[3*i+2]; 369 while (n-->0) { 370 y[3*(*idx)] += alpha1*(*v); 371 y[3*(*idx)+1] += alpha2*(*v); 372 y[3*(*idx)+2] += alpha3*(*v); 373 idx++; v++; 374 } 375 } 376 ierr = PetscLogFlops(6*a->nz - 3*b->AIJ->n);CHKERRQ(ierr); 377 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 378 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 379 PetscFunctionReturn(0); 380 } 381 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 384 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 385 { 386 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 387 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 388 PetscScalar *x,*y,*v,sum1, sum2, sum3; 389 PetscErrorCode ierr; 390 PetscInt m = b->AIJ->m,*idx,*ii; 391 PetscInt n,i,jrow,j; 392 393 PetscFunctionBegin; 394 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 395 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 396 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 397 idx = a->j; 398 v = a->a; 399 ii = a->i; 400 401 for (i=0; i<m; i++) { 402 jrow = ii[i]; 403 n = ii[i+1] - jrow; 404 sum1 = 0.0; 405 sum2 = 0.0; 406 sum3 = 0.0; 407 for (j=0; j<n; j++) { 408 sum1 += v[jrow]*x[3*idx[jrow]]; 409 sum2 += v[jrow]*x[3*idx[jrow]+1]; 410 sum3 += v[jrow]*x[3*idx[jrow]+2]; 411 jrow++; 412 } 413 y[3*i] += sum1; 414 y[3*i+1] += sum2; 415 y[3*i+2] += sum3; 416 } 417 418 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 419 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 420 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 425 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 426 { 427 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 428 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 429 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 430 PetscErrorCode ierr; 431 PetscInt m = b->AIJ->m,n,i,*idx; 432 433 PetscFunctionBegin; 434 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 435 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 436 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 437 for (i=0; i<m; i++) { 438 idx = a->j + a->i[i] ; 439 v = a->a + a->i[i] ; 440 n = a->i[i+1] - a->i[i]; 441 alpha1 = x[3*i]; 442 alpha2 = x[3*i+1]; 443 alpha3 = x[3*i+2]; 444 while (n-->0) { 445 y[3*(*idx)] += alpha1*(*v); 446 y[3*(*idx)+1] += alpha2*(*v); 447 y[3*(*idx)+2] += alpha3*(*v); 448 idx++; v++; 449 } 450 } 451 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 452 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 453 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 /* ------------------------------------------------------------------------------*/ 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatMult_SeqMAIJ_4" 460 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 461 { 462 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 463 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 464 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 465 PetscErrorCode ierr; 466 PetscInt m = b->AIJ->m,*idx,*ii; 467 PetscInt n,i,jrow,j; 468 469 PetscFunctionBegin; 470 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 471 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 472 idx = a->j; 473 v = a->a; 474 ii = a->i; 475 476 for (i=0; i<m; i++) { 477 jrow = ii[i]; 478 n = ii[i+1] - jrow; 479 sum1 = 0.0; 480 sum2 = 0.0; 481 sum3 = 0.0; 482 sum4 = 0.0; 483 for (j=0; j<n; j++) { 484 sum1 += v[jrow]*x[4*idx[jrow]]; 485 sum2 += v[jrow]*x[4*idx[jrow]+1]; 486 sum3 += v[jrow]*x[4*idx[jrow]+2]; 487 sum4 += v[jrow]*x[4*idx[jrow]+3]; 488 jrow++; 489 } 490 y[4*i] = sum1; 491 y[4*i+1] = sum2; 492 y[4*i+2] = sum3; 493 y[4*i+3] = sum4; 494 } 495 496 ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 497 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 498 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #undef __FUNCT__ 503 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 504 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 505 { 506 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 507 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 508 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 509 PetscErrorCode ierr; 510 PetscInt m = b->AIJ->m,n,i,*idx; 511 512 PetscFunctionBegin; 513 ierr = VecSet(yy,zero);CHKERRQ(ierr); 514 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 515 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 516 for (i=0; i<m; i++) { 517 idx = a->j + a->i[i] ; 518 v = a->a + a->i[i] ; 519 n = a->i[i+1] - a->i[i]; 520 alpha1 = x[4*i]; 521 alpha2 = x[4*i+1]; 522 alpha3 = x[4*i+2]; 523 alpha4 = x[4*i+3]; 524 while (n-->0) { 525 y[4*(*idx)] += alpha1*(*v); 526 y[4*(*idx)+1] += alpha2*(*v); 527 y[4*(*idx)+2] += alpha3*(*v); 528 y[4*(*idx)+3] += alpha4*(*v); 529 idx++; v++; 530 } 531 } 532 ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->n);CHKERRQ(ierr); 533 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 534 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 535 PetscFunctionReturn(0); 536 } 537 538 #undef __FUNCT__ 539 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 540 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 541 { 542 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 543 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 544 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 545 PetscErrorCode ierr; 546 PetscInt m = b->AIJ->m,*idx,*ii; 547 PetscInt n,i,jrow,j; 548 549 PetscFunctionBegin; 550 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 551 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 552 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 553 idx = a->j; 554 v = a->a; 555 ii = a->i; 556 557 for (i=0; i<m; i++) { 558 jrow = ii[i]; 559 n = ii[i+1] - jrow; 560 sum1 = 0.0; 561 sum2 = 0.0; 562 sum3 = 0.0; 563 sum4 = 0.0; 564 for (j=0; j<n; j++) { 565 sum1 += v[jrow]*x[4*idx[jrow]]; 566 sum2 += v[jrow]*x[4*idx[jrow]+1]; 567 sum3 += v[jrow]*x[4*idx[jrow]+2]; 568 sum4 += v[jrow]*x[4*idx[jrow]+3]; 569 jrow++; 570 } 571 y[4*i] += sum1; 572 y[4*i+1] += sum2; 573 y[4*i+2] += sum3; 574 y[4*i+3] += sum4; 575 } 576 577 ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 578 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 579 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 580 PetscFunctionReturn(0); 581 } 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 584 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 585 { 586 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 587 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 588 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 589 PetscErrorCode ierr; 590 PetscInt m = b->AIJ->m,n,i,*idx; 591 592 PetscFunctionBegin; 593 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 594 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 595 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 596 597 for (i=0; i<m; i++) { 598 idx = a->j + a->i[i] ; 599 v = a->a + a->i[i] ; 600 n = a->i[i+1] - a->i[i]; 601 alpha1 = x[4*i]; 602 alpha2 = x[4*i+1]; 603 alpha3 = x[4*i+2]; 604 alpha4 = x[4*i+3]; 605 while (n-->0) { 606 y[4*(*idx)] += alpha1*(*v); 607 y[4*(*idx)+1] += alpha2*(*v); 608 y[4*(*idx)+2] += alpha3*(*v); 609 y[4*(*idx)+3] += alpha4*(*v); 610 idx++; v++; 611 } 612 } 613 ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->n);CHKERRQ(ierr); 614 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 615 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 616 PetscFunctionReturn(0); 617 } 618 /* ------------------------------------------------------------------------------*/ 619 620 #undef __FUNCT__ 621 #define __FUNCT__ "MatMult_SeqMAIJ_5" 622 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 623 { 624 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 625 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 626 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 627 PetscErrorCode ierr; 628 PetscInt m = b->AIJ->m,*idx,*ii; 629 PetscInt n,i,jrow,j; 630 631 PetscFunctionBegin; 632 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 633 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 634 idx = a->j; 635 v = a->a; 636 ii = a->i; 637 638 for (i=0; i<m; i++) { 639 jrow = ii[i]; 640 n = ii[i+1] - jrow; 641 sum1 = 0.0; 642 sum2 = 0.0; 643 sum3 = 0.0; 644 sum4 = 0.0; 645 sum5 = 0.0; 646 for (j=0; j<n; j++) { 647 sum1 += v[jrow]*x[5*idx[jrow]]; 648 sum2 += v[jrow]*x[5*idx[jrow]+1]; 649 sum3 += v[jrow]*x[5*idx[jrow]+2]; 650 sum4 += v[jrow]*x[5*idx[jrow]+3]; 651 sum5 += v[jrow]*x[5*idx[jrow]+4]; 652 jrow++; 653 } 654 y[5*i] = sum1; 655 y[5*i+1] = sum2; 656 y[5*i+2] = sum3; 657 y[5*i+3] = sum4; 658 y[5*i+4] = sum5; 659 } 660 661 ierr = PetscLogFlops(10*a->nz - 5*m);CHKERRQ(ierr); 662 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 663 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 669 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 670 { 671 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 672 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 673 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 674 PetscErrorCode ierr; 675 PetscInt m = b->AIJ->m,n,i,*idx; 676 677 PetscFunctionBegin; 678 ierr = VecSet(yy,zero);CHKERRQ(ierr); 679 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 680 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 681 682 for (i=0; i<m; i++) { 683 idx = a->j + a->i[i] ; 684 v = a->a + a->i[i] ; 685 n = a->i[i+1] - a->i[i]; 686 alpha1 = x[5*i]; 687 alpha2 = x[5*i+1]; 688 alpha3 = x[5*i+2]; 689 alpha4 = x[5*i+3]; 690 alpha5 = x[5*i+4]; 691 while (n-->0) { 692 y[5*(*idx)] += alpha1*(*v); 693 y[5*(*idx)+1] += alpha2*(*v); 694 y[5*(*idx)+2] += alpha3*(*v); 695 y[5*(*idx)+3] += alpha4*(*v); 696 y[5*(*idx)+4] += alpha5*(*v); 697 idx++; v++; 698 } 699 } 700 ierr = PetscLogFlops(10*a->nz - 5*b->AIJ->n);CHKERRQ(ierr); 701 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 702 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } 705 706 #undef __FUNCT__ 707 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 708 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 709 { 710 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 711 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 712 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 713 PetscErrorCode ierr; 714 PetscInt m = b->AIJ->m,*idx,*ii; 715 PetscInt n,i,jrow,j; 716 717 PetscFunctionBegin; 718 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 719 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 720 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 721 idx = a->j; 722 v = a->a; 723 ii = a->i; 724 725 for (i=0; i<m; i++) { 726 jrow = ii[i]; 727 n = ii[i+1] - jrow; 728 sum1 = 0.0; 729 sum2 = 0.0; 730 sum3 = 0.0; 731 sum4 = 0.0; 732 sum5 = 0.0; 733 for (j=0; j<n; j++) { 734 sum1 += v[jrow]*x[5*idx[jrow]]; 735 sum2 += v[jrow]*x[5*idx[jrow]+1]; 736 sum3 += v[jrow]*x[5*idx[jrow]+2]; 737 sum4 += v[jrow]*x[5*idx[jrow]+3]; 738 sum5 += v[jrow]*x[5*idx[jrow]+4]; 739 jrow++; 740 } 741 y[5*i] += sum1; 742 y[5*i+1] += sum2; 743 y[5*i+2] += sum3; 744 y[5*i+3] += sum4; 745 y[5*i+4] += sum5; 746 } 747 748 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 749 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 750 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 751 PetscFunctionReturn(0); 752 } 753 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 756 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 757 { 758 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 759 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 760 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 761 PetscErrorCode ierr; 762 PetscInt m = b->AIJ->m,n,i,*idx; 763 764 PetscFunctionBegin; 765 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 766 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 767 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 768 769 for (i=0; i<m; i++) { 770 idx = a->j + a->i[i] ; 771 v = a->a + a->i[i] ; 772 n = a->i[i+1] - a->i[i]; 773 alpha1 = x[5*i]; 774 alpha2 = x[5*i+1]; 775 alpha3 = x[5*i+2]; 776 alpha4 = x[5*i+3]; 777 alpha5 = x[5*i+4]; 778 while (n-->0) { 779 y[5*(*idx)] += alpha1*(*v); 780 y[5*(*idx)+1] += alpha2*(*v); 781 y[5*(*idx)+2] += alpha3*(*v); 782 y[5*(*idx)+3] += alpha4*(*v); 783 y[5*(*idx)+4] += alpha5*(*v); 784 idx++; v++; 785 } 786 } 787 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 788 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 789 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 790 PetscFunctionReturn(0); 791 } 792 793 /* ------------------------------------------------------------------------------*/ 794 #undef __FUNCT__ 795 #define __FUNCT__ "MatMult_SeqMAIJ_6" 796 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 797 { 798 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 799 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 800 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 801 PetscErrorCode ierr; 802 PetscInt m = b->AIJ->m,*idx,*ii; 803 PetscInt n,i,jrow,j; 804 805 PetscFunctionBegin; 806 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 807 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 808 idx = a->j; 809 v = a->a; 810 ii = a->i; 811 812 for (i=0; i<m; i++) { 813 jrow = ii[i]; 814 n = ii[i+1] - jrow; 815 sum1 = 0.0; 816 sum2 = 0.0; 817 sum3 = 0.0; 818 sum4 = 0.0; 819 sum5 = 0.0; 820 sum6 = 0.0; 821 for (j=0; j<n; j++) { 822 sum1 += v[jrow]*x[6*idx[jrow]]; 823 sum2 += v[jrow]*x[6*idx[jrow]+1]; 824 sum3 += v[jrow]*x[6*idx[jrow]+2]; 825 sum4 += v[jrow]*x[6*idx[jrow]+3]; 826 sum5 += v[jrow]*x[6*idx[jrow]+4]; 827 sum6 += v[jrow]*x[6*idx[jrow]+5]; 828 jrow++; 829 } 830 y[6*i] = sum1; 831 y[6*i+1] = sum2; 832 y[6*i+2] = sum3; 833 y[6*i+3] = sum4; 834 y[6*i+4] = sum5; 835 y[6*i+5] = sum6; 836 } 837 838 ierr = PetscLogFlops(12*a->nz - 6*m);CHKERRQ(ierr); 839 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 840 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 841 PetscFunctionReturn(0); 842 } 843 844 #undef __FUNCT__ 845 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 846 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 847 { 848 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 849 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 850 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 851 PetscErrorCode ierr; 852 PetscInt m = b->AIJ->m,n,i,*idx; 853 854 PetscFunctionBegin; 855 ierr = VecSet(yy,zero);CHKERRQ(ierr); 856 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 857 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 858 859 for (i=0; i<m; i++) { 860 idx = a->j + a->i[i] ; 861 v = a->a + a->i[i] ; 862 n = a->i[i+1] - a->i[i]; 863 alpha1 = x[6*i]; 864 alpha2 = x[6*i+1]; 865 alpha3 = x[6*i+2]; 866 alpha4 = x[6*i+3]; 867 alpha5 = x[6*i+4]; 868 alpha6 = x[6*i+5]; 869 while (n-->0) { 870 y[6*(*idx)] += alpha1*(*v); 871 y[6*(*idx)+1] += alpha2*(*v); 872 y[6*(*idx)+2] += alpha3*(*v); 873 y[6*(*idx)+3] += alpha4*(*v); 874 y[6*(*idx)+4] += alpha5*(*v); 875 y[6*(*idx)+5] += alpha6*(*v); 876 idx++; v++; 877 } 878 } 879 ierr = PetscLogFlops(12*a->nz - 6*b->AIJ->n);CHKERRQ(ierr); 880 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 881 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 882 PetscFunctionReturn(0); 883 } 884 885 #undef __FUNCT__ 886 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 887 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 888 { 889 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 890 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 891 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 892 PetscErrorCode ierr; 893 PetscInt m = b->AIJ->m,*idx,*ii; 894 PetscInt n,i,jrow,j; 895 896 PetscFunctionBegin; 897 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 898 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 899 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 900 idx = a->j; 901 v = a->a; 902 ii = a->i; 903 904 for (i=0; i<m; i++) { 905 jrow = ii[i]; 906 n = ii[i+1] - jrow; 907 sum1 = 0.0; 908 sum2 = 0.0; 909 sum3 = 0.0; 910 sum4 = 0.0; 911 sum5 = 0.0; 912 sum6 = 0.0; 913 for (j=0; j<n; j++) { 914 sum1 += v[jrow]*x[6*idx[jrow]]; 915 sum2 += v[jrow]*x[6*idx[jrow]+1]; 916 sum3 += v[jrow]*x[6*idx[jrow]+2]; 917 sum4 += v[jrow]*x[6*idx[jrow]+3]; 918 sum5 += v[jrow]*x[6*idx[jrow]+4]; 919 sum6 += v[jrow]*x[6*idx[jrow]+5]; 920 jrow++; 921 } 922 y[6*i] += sum1; 923 y[6*i+1] += sum2; 924 y[6*i+2] += sum3; 925 y[6*i+3] += sum4; 926 y[6*i+4] += sum5; 927 y[6*i+5] += sum6; 928 } 929 930 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 931 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 932 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 933 PetscFunctionReturn(0); 934 } 935 936 #undef __FUNCT__ 937 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 938 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 939 { 940 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 941 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 942 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 943 PetscErrorCode ierr; 944 PetscInt m = b->AIJ->m,n,i,*idx; 945 946 PetscFunctionBegin; 947 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 948 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 949 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 950 951 for (i=0; i<m; i++) { 952 idx = a->j + a->i[i] ; 953 v = a->a + a->i[i] ; 954 n = a->i[i+1] - a->i[i]; 955 alpha1 = x[6*i]; 956 alpha2 = x[6*i+1]; 957 alpha3 = x[6*i+2]; 958 alpha4 = x[6*i+3]; 959 alpha5 = x[6*i+4]; 960 alpha6 = x[6*i+5]; 961 while (n-->0) { 962 y[6*(*idx)] += alpha1*(*v); 963 y[6*(*idx)+1] += alpha2*(*v); 964 y[6*(*idx)+2] += alpha3*(*v); 965 y[6*(*idx)+3] += alpha4*(*v); 966 y[6*(*idx)+4] += alpha5*(*v); 967 y[6*(*idx)+5] += alpha6*(*v); 968 idx++; v++; 969 } 970 } 971 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 972 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 973 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 977 /* ------------------------------------------------------------------------------*/ 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatMult_SeqMAIJ_7" 980 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 981 { 982 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 983 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 984 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 985 PetscErrorCode ierr; 986 PetscInt m = b->AIJ->m,*idx,*ii; 987 PetscInt n,i,jrow,j; 988 989 PetscFunctionBegin; 990 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 991 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 992 idx = a->j; 993 v = a->a; 994 ii = a->i; 995 996 for (i=0; i<m; i++) { 997 jrow = ii[i]; 998 n = ii[i+1] - jrow; 999 sum1 = 0.0; 1000 sum2 = 0.0; 1001 sum3 = 0.0; 1002 sum4 = 0.0; 1003 sum5 = 0.0; 1004 sum6 = 0.0; 1005 sum7 = 0.0; 1006 for (j=0; j<n; j++) { 1007 sum1 += v[jrow]*x[7*idx[jrow]]; 1008 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1009 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1010 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1011 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1012 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1013 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1014 jrow++; 1015 } 1016 y[7*i] = sum1; 1017 y[7*i+1] = sum2; 1018 y[7*i+2] = sum3; 1019 y[7*i+3] = sum4; 1020 y[7*i+4] = sum5; 1021 y[7*i+5] = sum6; 1022 y[7*i+6] = sum7; 1023 } 1024 1025 ierr = PetscLogFlops(14*a->nz - 7*m);CHKERRQ(ierr); 1026 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1027 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1028 PetscFunctionReturn(0); 1029 } 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1033 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1034 { 1035 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1036 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1037 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0; 1038 PetscErrorCode ierr; 1039 PetscInt m = b->AIJ->m,n,i,*idx; 1040 1041 PetscFunctionBegin; 1042 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1043 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1044 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1045 1046 for (i=0; i<m; i++) { 1047 idx = a->j + a->i[i] ; 1048 v = a->a + a->i[i] ; 1049 n = a->i[i+1] - a->i[i]; 1050 alpha1 = x[7*i]; 1051 alpha2 = x[7*i+1]; 1052 alpha3 = x[7*i+2]; 1053 alpha4 = x[7*i+3]; 1054 alpha5 = x[7*i+4]; 1055 alpha6 = x[7*i+5]; 1056 alpha7 = x[7*i+6]; 1057 while (n-->0) { 1058 y[7*(*idx)] += alpha1*(*v); 1059 y[7*(*idx)+1] += alpha2*(*v); 1060 y[7*(*idx)+2] += alpha3*(*v); 1061 y[7*(*idx)+3] += alpha4*(*v); 1062 y[7*(*idx)+4] += alpha5*(*v); 1063 y[7*(*idx)+5] += alpha6*(*v); 1064 y[7*(*idx)+6] += alpha7*(*v); 1065 idx++; v++; 1066 } 1067 } 1068 ierr = PetscLogFlops(14*a->nz - 7*b->AIJ->n);CHKERRQ(ierr); 1069 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1070 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 #undef __FUNCT__ 1075 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1076 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1077 { 1078 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1079 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1080 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1081 PetscErrorCode ierr; 1082 PetscInt m = b->AIJ->m,*idx,*ii; 1083 PetscInt n,i,jrow,j; 1084 1085 PetscFunctionBegin; 1086 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1087 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1088 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1089 idx = a->j; 1090 v = a->a; 1091 ii = a->i; 1092 1093 for (i=0; i<m; i++) { 1094 jrow = ii[i]; 1095 n = ii[i+1] - jrow; 1096 sum1 = 0.0; 1097 sum2 = 0.0; 1098 sum3 = 0.0; 1099 sum4 = 0.0; 1100 sum5 = 0.0; 1101 sum6 = 0.0; 1102 sum7 = 0.0; 1103 for (j=0; j<n; j++) { 1104 sum1 += v[jrow]*x[7*idx[jrow]]; 1105 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1106 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1107 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1108 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1109 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1110 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1111 jrow++; 1112 } 1113 y[7*i] += sum1; 1114 y[7*i+1] += sum2; 1115 y[7*i+2] += sum3; 1116 y[7*i+3] += sum4; 1117 y[7*i+4] += sum5; 1118 y[7*i+5] += sum6; 1119 y[7*i+6] += sum7; 1120 } 1121 1122 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1123 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1124 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1130 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1131 { 1132 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1133 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1134 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1135 PetscErrorCode ierr; 1136 PetscInt m = b->AIJ->m,n,i,*idx; 1137 1138 PetscFunctionBegin; 1139 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1141 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1142 for (i=0; i<m; i++) { 1143 idx = a->j + a->i[i] ; 1144 v = a->a + a->i[i] ; 1145 n = a->i[i+1] - a->i[i]; 1146 alpha1 = x[7*i]; 1147 alpha2 = x[7*i+1]; 1148 alpha3 = x[7*i+2]; 1149 alpha4 = x[7*i+3]; 1150 alpha5 = x[7*i+4]; 1151 alpha6 = x[7*i+5]; 1152 alpha7 = x[7*i+6]; 1153 while (n-->0) { 1154 y[7*(*idx)] += alpha1*(*v); 1155 y[7*(*idx)+1] += alpha2*(*v); 1156 y[7*(*idx)+2] += alpha3*(*v); 1157 y[7*(*idx)+3] += alpha4*(*v); 1158 y[7*(*idx)+4] += alpha5*(*v); 1159 y[7*(*idx)+5] += alpha6*(*v); 1160 y[7*(*idx)+6] += alpha7*(*v); 1161 idx++; v++; 1162 } 1163 } 1164 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1165 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1166 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1172 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1173 { 1174 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1175 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1176 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1177 PetscErrorCode ierr; 1178 PetscInt m = b->AIJ->m,*idx,*ii; 1179 PetscInt n,i,jrow,j; 1180 1181 PetscFunctionBegin; 1182 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1183 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1184 idx = a->j; 1185 v = a->a; 1186 ii = a->i; 1187 1188 for (i=0; i<m; i++) { 1189 jrow = ii[i]; 1190 n = ii[i+1] - jrow; 1191 sum1 = 0.0; 1192 sum2 = 0.0; 1193 sum3 = 0.0; 1194 sum4 = 0.0; 1195 sum5 = 0.0; 1196 sum6 = 0.0; 1197 sum7 = 0.0; 1198 sum8 = 0.0; 1199 for (j=0; j<n; j++) { 1200 sum1 += v[jrow]*x[8*idx[jrow]]; 1201 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1202 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1203 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1204 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1205 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1206 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1207 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1208 jrow++; 1209 } 1210 y[8*i] = sum1; 1211 y[8*i+1] = sum2; 1212 y[8*i+2] = sum3; 1213 y[8*i+3] = sum4; 1214 y[8*i+4] = sum5; 1215 y[8*i+5] = sum6; 1216 y[8*i+6] = sum7; 1217 y[8*i+7] = sum8; 1218 } 1219 1220 ierr = PetscLogFlops(16*a->nz - 8*m);CHKERRQ(ierr); 1221 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1222 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1228 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1229 { 1230 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1231 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1232 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1233 PetscErrorCode ierr; 1234 PetscInt m = b->AIJ->m,n,i,*idx; 1235 1236 PetscFunctionBegin; 1237 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1238 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1239 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1240 1241 for (i=0; i<m; i++) { 1242 idx = a->j + a->i[i] ; 1243 v = a->a + a->i[i] ; 1244 n = a->i[i+1] - a->i[i]; 1245 alpha1 = x[8*i]; 1246 alpha2 = x[8*i+1]; 1247 alpha3 = x[8*i+2]; 1248 alpha4 = x[8*i+3]; 1249 alpha5 = x[8*i+4]; 1250 alpha6 = x[8*i+5]; 1251 alpha7 = x[8*i+6]; 1252 alpha8 = x[8*i+7]; 1253 while (n-->0) { 1254 y[8*(*idx)] += alpha1*(*v); 1255 y[8*(*idx)+1] += alpha2*(*v); 1256 y[8*(*idx)+2] += alpha3*(*v); 1257 y[8*(*idx)+3] += alpha4*(*v); 1258 y[8*(*idx)+4] += alpha5*(*v); 1259 y[8*(*idx)+5] += alpha6*(*v); 1260 y[8*(*idx)+6] += alpha7*(*v); 1261 y[8*(*idx)+7] += alpha8*(*v); 1262 idx++; v++; 1263 } 1264 } 1265 ierr = PetscLogFlops(16*a->nz - 8*b->AIJ->n);CHKERRQ(ierr); 1266 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1267 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1273 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1274 { 1275 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1276 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1277 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1278 PetscErrorCode ierr; 1279 PetscInt m = b->AIJ->m,*idx,*ii; 1280 PetscInt n,i,jrow,j; 1281 1282 PetscFunctionBegin; 1283 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1284 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1285 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1286 idx = a->j; 1287 v = a->a; 1288 ii = a->i; 1289 1290 for (i=0; i<m; i++) { 1291 jrow = ii[i]; 1292 n = ii[i+1] - jrow; 1293 sum1 = 0.0; 1294 sum2 = 0.0; 1295 sum3 = 0.0; 1296 sum4 = 0.0; 1297 sum5 = 0.0; 1298 sum6 = 0.0; 1299 sum7 = 0.0; 1300 sum8 = 0.0; 1301 for (j=0; j<n; j++) { 1302 sum1 += v[jrow]*x[8*idx[jrow]]; 1303 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1304 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1305 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1306 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1307 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1308 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1309 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1310 jrow++; 1311 } 1312 y[8*i] += sum1; 1313 y[8*i+1] += sum2; 1314 y[8*i+2] += sum3; 1315 y[8*i+3] += sum4; 1316 y[8*i+4] += sum5; 1317 y[8*i+5] += sum6; 1318 y[8*i+6] += sum7; 1319 y[8*i+7] += sum8; 1320 } 1321 1322 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1323 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1324 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1330 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1331 { 1332 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1333 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1334 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1335 PetscErrorCode ierr; 1336 PetscInt m = b->AIJ->m,n,i,*idx; 1337 1338 PetscFunctionBegin; 1339 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1340 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1341 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1342 for (i=0; i<m; i++) { 1343 idx = a->j + a->i[i] ; 1344 v = a->a + a->i[i] ; 1345 n = a->i[i+1] - a->i[i]; 1346 alpha1 = x[8*i]; 1347 alpha2 = x[8*i+1]; 1348 alpha3 = x[8*i+2]; 1349 alpha4 = x[8*i+3]; 1350 alpha5 = x[8*i+4]; 1351 alpha6 = x[8*i+5]; 1352 alpha7 = x[8*i+6]; 1353 alpha8 = x[8*i+7]; 1354 while (n-->0) { 1355 y[8*(*idx)] += alpha1*(*v); 1356 y[8*(*idx)+1] += alpha2*(*v); 1357 y[8*(*idx)+2] += alpha3*(*v); 1358 y[8*(*idx)+3] += alpha4*(*v); 1359 y[8*(*idx)+4] += alpha5*(*v); 1360 y[8*(*idx)+5] += alpha6*(*v); 1361 y[8*(*idx)+6] += alpha7*(*v); 1362 y[8*(*idx)+7] += alpha8*(*v); 1363 idx++; v++; 1364 } 1365 } 1366 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1367 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1368 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 1373 /* ------------------------------------------------------------------------------*/ 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1376 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1377 { 1378 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1379 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1380 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1381 PetscErrorCode ierr; 1382 PetscInt m = b->AIJ->m,*idx,*ii; 1383 PetscInt n,i,jrow,j; 1384 1385 PetscFunctionBegin; 1386 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1387 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1388 idx = a->j; 1389 v = a->a; 1390 ii = a->i; 1391 1392 for (i=0; i<m; i++) { 1393 jrow = ii[i]; 1394 n = ii[i+1] - jrow; 1395 sum1 = 0.0; 1396 sum2 = 0.0; 1397 sum3 = 0.0; 1398 sum4 = 0.0; 1399 sum5 = 0.0; 1400 sum6 = 0.0; 1401 sum7 = 0.0; 1402 sum8 = 0.0; 1403 sum9 = 0.0; 1404 for (j=0; j<n; j++) { 1405 sum1 += v[jrow]*x[9*idx[jrow]]; 1406 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1407 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1408 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1409 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1410 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1411 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1412 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1413 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1414 jrow++; 1415 } 1416 y[9*i] = sum1; 1417 y[9*i+1] = sum2; 1418 y[9*i+2] = sum3; 1419 y[9*i+3] = sum4; 1420 y[9*i+4] = sum5; 1421 y[9*i+5] = sum6; 1422 y[9*i+6] = sum7; 1423 y[9*i+7] = sum8; 1424 y[9*i+8] = sum9; 1425 } 1426 1427 ierr = PetscLogFlops(18*a->nz - 9*m);CHKERRQ(ierr); 1428 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1429 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1435 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1436 { 1437 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1438 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1439 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1440 PetscErrorCode ierr; 1441 PetscInt m = b->AIJ->m,n,i,*idx; 1442 1443 PetscFunctionBegin; 1444 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1445 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1446 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1447 1448 for (i=0; i<m; i++) { 1449 idx = a->j + a->i[i] ; 1450 v = a->a + a->i[i] ; 1451 n = a->i[i+1] - a->i[i]; 1452 alpha1 = x[9*i]; 1453 alpha2 = x[9*i+1]; 1454 alpha3 = x[9*i+2]; 1455 alpha4 = x[9*i+3]; 1456 alpha5 = x[9*i+4]; 1457 alpha6 = x[9*i+5]; 1458 alpha7 = x[9*i+6]; 1459 alpha8 = x[9*i+7]; 1460 alpha9 = x[9*i+8]; 1461 while (n-->0) { 1462 y[9*(*idx)] += alpha1*(*v); 1463 y[9*(*idx)+1] += alpha2*(*v); 1464 y[9*(*idx)+2] += alpha3*(*v); 1465 y[9*(*idx)+3] += alpha4*(*v); 1466 y[9*(*idx)+4] += alpha5*(*v); 1467 y[9*(*idx)+5] += alpha6*(*v); 1468 y[9*(*idx)+6] += alpha7*(*v); 1469 y[9*(*idx)+7] += alpha8*(*v); 1470 y[9*(*idx)+8] += alpha9*(*v); 1471 idx++; v++; 1472 } 1473 } 1474 ierr = PetscLogFlops(18*a->nz - 9*b->AIJ->n);CHKERRQ(ierr); 1475 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1476 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1477 PetscFunctionReturn(0); 1478 } 1479 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1482 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1483 { 1484 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1485 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1486 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1487 PetscErrorCode ierr; 1488 PetscInt m = b->AIJ->m,*idx,*ii; 1489 PetscInt n,i,jrow,j; 1490 1491 PetscFunctionBegin; 1492 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1493 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1494 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1495 idx = a->j; 1496 v = a->a; 1497 ii = a->i; 1498 1499 for (i=0; i<m; i++) { 1500 jrow = ii[i]; 1501 n = ii[i+1] - jrow; 1502 sum1 = 0.0; 1503 sum2 = 0.0; 1504 sum3 = 0.0; 1505 sum4 = 0.0; 1506 sum5 = 0.0; 1507 sum6 = 0.0; 1508 sum7 = 0.0; 1509 sum8 = 0.0; 1510 sum9 = 0.0; 1511 for (j=0; j<n; j++) { 1512 sum1 += v[jrow]*x[9*idx[jrow]]; 1513 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1514 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1515 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1516 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1517 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1518 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1519 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1520 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1521 jrow++; 1522 } 1523 y[9*i] += sum1; 1524 y[9*i+1] += sum2; 1525 y[9*i+2] += sum3; 1526 y[9*i+3] += sum4; 1527 y[9*i+4] += sum5; 1528 y[9*i+5] += sum6; 1529 y[9*i+6] += sum7; 1530 y[9*i+7] += sum8; 1531 y[9*i+8] += sum9; 1532 } 1533 1534 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1535 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1536 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1542 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1543 { 1544 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1545 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1546 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1547 PetscErrorCode ierr; 1548 PetscInt m = b->AIJ->m,n,i,*idx; 1549 1550 PetscFunctionBegin; 1551 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1552 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1553 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1554 for (i=0; i<m; i++) { 1555 idx = a->j + a->i[i] ; 1556 v = a->a + a->i[i] ; 1557 n = a->i[i+1] - a->i[i]; 1558 alpha1 = x[9*i]; 1559 alpha2 = x[9*i+1]; 1560 alpha3 = x[9*i+2]; 1561 alpha4 = x[9*i+3]; 1562 alpha5 = x[9*i+4]; 1563 alpha6 = x[9*i+5]; 1564 alpha7 = x[9*i+6]; 1565 alpha8 = x[9*i+7]; 1566 alpha9 = x[9*i+8]; 1567 while (n-->0) { 1568 y[9*(*idx)] += alpha1*(*v); 1569 y[9*(*idx)+1] += alpha2*(*v); 1570 y[9*(*idx)+2] += alpha3*(*v); 1571 y[9*(*idx)+3] += alpha4*(*v); 1572 y[9*(*idx)+4] += alpha5*(*v); 1573 y[9*(*idx)+5] += alpha6*(*v); 1574 y[9*(*idx)+6] += alpha7*(*v); 1575 y[9*(*idx)+7] += alpha8*(*v); 1576 y[9*(*idx)+8] += alpha9*(*v); 1577 idx++; v++; 1578 } 1579 } 1580 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1581 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1582 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1583 PetscFunctionReturn(0); 1584 } 1585 1586 /*--------------------------------------------------------------------------------------------*/ 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1589 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1590 { 1591 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1592 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1593 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1594 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1595 PetscErrorCode ierr; 1596 PetscInt m = b->AIJ->m,*idx,*ii; 1597 PetscInt n,i,jrow,j; 1598 1599 PetscFunctionBegin; 1600 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1601 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1602 idx = a->j; 1603 v = a->a; 1604 ii = a->i; 1605 1606 for (i=0; i<m; i++) { 1607 jrow = ii[i]; 1608 n = ii[i+1] - jrow; 1609 sum1 = 0.0; 1610 sum2 = 0.0; 1611 sum3 = 0.0; 1612 sum4 = 0.0; 1613 sum5 = 0.0; 1614 sum6 = 0.0; 1615 sum7 = 0.0; 1616 sum8 = 0.0; 1617 sum9 = 0.0; 1618 sum10 = 0.0; 1619 sum11 = 0.0; 1620 sum12 = 0.0; 1621 sum13 = 0.0; 1622 sum14 = 0.0; 1623 sum15 = 0.0; 1624 sum16 = 0.0; 1625 for (j=0; j<n; j++) { 1626 sum1 += v[jrow]*x[16*idx[jrow]]; 1627 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1628 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1629 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1630 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1631 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1632 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1633 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1634 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1635 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1636 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1637 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1638 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1639 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1640 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1641 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1642 jrow++; 1643 } 1644 y[16*i] = sum1; 1645 y[16*i+1] = sum2; 1646 y[16*i+2] = sum3; 1647 y[16*i+3] = sum4; 1648 y[16*i+4] = sum5; 1649 y[16*i+5] = sum6; 1650 y[16*i+6] = sum7; 1651 y[16*i+7] = sum8; 1652 y[16*i+8] = sum9; 1653 y[16*i+9] = sum10; 1654 y[16*i+10] = sum11; 1655 y[16*i+11] = sum12; 1656 y[16*i+12] = sum13; 1657 y[16*i+13] = sum14; 1658 y[16*i+14] = sum15; 1659 y[16*i+15] = sum16; 1660 } 1661 1662 ierr = PetscLogFlops(32*a->nz - 16*m);CHKERRQ(ierr); 1663 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1664 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1665 PetscFunctionReturn(0); 1666 } 1667 1668 #undef __FUNCT__ 1669 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1670 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1671 { 1672 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1673 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1674 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1675 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1676 PetscErrorCode ierr; 1677 PetscInt m = b->AIJ->m,n,i,*idx; 1678 1679 PetscFunctionBegin; 1680 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1681 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1682 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1683 1684 for (i=0; i<m; i++) { 1685 idx = a->j + a->i[i] ; 1686 v = a->a + a->i[i] ; 1687 n = a->i[i+1] - a->i[i]; 1688 alpha1 = x[16*i]; 1689 alpha2 = x[16*i+1]; 1690 alpha3 = x[16*i+2]; 1691 alpha4 = x[16*i+3]; 1692 alpha5 = x[16*i+4]; 1693 alpha6 = x[16*i+5]; 1694 alpha7 = x[16*i+6]; 1695 alpha8 = x[16*i+7]; 1696 alpha9 = x[16*i+8]; 1697 alpha10 = x[16*i+9]; 1698 alpha11 = x[16*i+10]; 1699 alpha12 = x[16*i+11]; 1700 alpha13 = x[16*i+12]; 1701 alpha14 = x[16*i+13]; 1702 alpha15 = x[16*i+14]; 1703 alpha16 = x[16*i+15]; 1704 while (n-->0) { 1705 y[16*(*idx)] += alpha1*(*v); 1706 y[16*(*idx)+1] += alpha2*(*v); 1707 y[16*(*idx)+2] += alpha3*(*v); 1708 y[16*(*idx)+3] += alpha4*(*v); 1709 y[16*(*idx)+4] += alpha5*(*v); 1710 y[16*(*idx)+5] += alpha6*(*v); 1711 y[16*(*idx)+6] += alpha7*(*v); 1712 y[16*(*idx)+7] += alpha8*(*v); 1713 y[16*(*idx)+8] += alpha9*(*v); 1714 y[16*(*idx)+9] += alpha10*(*v); 1715 y[16*(*idx)+10] += alpha11*(*v); 1716 y[16*(*idx)+11] += alpha12*(*v); 1717 y[16*(*idx)+12] += alpha13*(*v); 1718 y[16*(*idx)+13] += alpha14*(*v); 1719 y[16*(*idx)+14] += alpha15*(*v); 1720 y[16*(*idx)+15] += alpha16*(*v); 1721 idx++; v++; 1722 } 1723 } 1724 ierr = PetscLogFlops(32*a->nz - 16*b->AIJ->n);CHKERRQ(ierr); 1725 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1726 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1732 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1733 { 1734 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1735 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1736 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1737 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1738 PetscErrorCode ierr; 1739 PetscInt m = b->AIJ->m,*idx,*ii; 1740 PetscInt n,i,jrow,j; 1741 1742 PetscFunctionBegin; 1743 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1744 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1745 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1746 idx = a->j; 1747 v = a->a; 1748 ii = a->i; 1749 1750 for (i=0; i<m; i++) { 1751 jrow = ii[i]; 1752 n = ii[i+1] - jrow; 1753 sum1 = 0.0; 1754 sum2 = 0.0; 1755 sum3 = 0.0; 1756 sum4 = 0.0; 1757 sum5 = 0.0; 1758 sum6 = 0.0; 1759 sum7 = 0.0; 1760 sum8 = 0.0; 1761 sum9 = 0.0; 1762 sum10 = 0.0; 1763 sum11 = 0.0; 1764 sum12 = 0.0; 1765 sum13 = 0.0; 1766 sum14 = 0.0; 1767 sum15 = 0.0; 1768 sum16 = 0.0; 1769 for (j=0; j<n; j++) { 1770 sum1 += v[jrow]*x[16*idx[jrow]]; 1771 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1772 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1773 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1774 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1775 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1776 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1777 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1778 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1779 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1780 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1781 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1782 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1783 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1784 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1785 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1786 jrow++; 1787 } 1788 y[16*i] += sum1; 1789 y[16*i+1] += sum2; 1790 y[16*i+2] += sum3; 1791 y[16*i+3] += sum4; 1792 y[16*i+4] += sum5; 1793 y[16*i+5] += sum6; 1794 y[16*i+6] += sum7; 1795 y[16*i+7] += sum8; 1796 y[16*i+8] += sum9; 1797 y[16*i+9] += sum10; 1798 y[16*i+10] += sum11; 1799 y[16*i+11] += sum12; 1800 y[16*i+12] += sum13; 1801 y[16*i+13] += sum14; 1802 y[16*i+14] += sum15; 1803 y[16*i+15] += sum16; 1804 } 1805 1806 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 1807 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1808 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 1814 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1815 { 1816 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1817 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1818 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1819 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1820 PetscErrorCode ierr; 1821 PetscInt m = b->AIJ->m,n,i,*idx; 1822 1823 PetscFunctionBegin; 1824 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1825 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1826 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1827 for (i=0; i<m; i++) { 1828 idx = a->j + a->i[i] ; 1829 v = a->a + a->i[i] ; 1830 n = a->i[i+1] - a->i[i]; 1831 alpha1 = x[16*i]; 1832 alpha2 = x[16*i+1]; 1833 alpha3 = x[16*i+2]; 1834 alpha4 = x[16*i+3]; 1835 alpha5 = x[16*i+4]; 1836 alpha6 = x[16*i+5]; 1837 alpha7 = x[16*i+6]; 1838 alpha8 = x[16*i+7]; 1839 alpha9 = x[16*i+8]; 1840 alpha10 = x[16*i+9]; 1841 alpha11 = x[16*i+10]; 1842 alpha12 = x[16*i+11]; 1843 alpha13 = x[16*i+12]; 1844 alpha14 = x[16*i+13]; 1845 alpha15 = x[16*i+14]; 1846 alpha16 = x[16*i+15]; 1847 while (n-->0) { 1848 y[16*(*idx)] += alpha1*(*v); 1849 y[16*(*idx)+1] += alpha2*(*v); 1850 y[16*(*idx)+2] += alpha3*(*v); 1851 y[16*(*idx)+3] += alpha4*(*v); 1852 y[16*(*idx)+4] += alpha5*(*v); 1853 y[16*(*idx)+5] += alpha6*(*v); 1854 y[16*(*idx)+6] += alpha7*(*v); 1855 y[16*(*idx)+7] += alpha8*(*v); 1856 y[16*(*idx)+8] += alpha9*(*v); 1857 y[16*(*idx)+9] += alpha10*(*v); 1858 y[16*(*idx)+10] += alpha11*(*v); 1859 y[16*(*idx)+11] += alpha12*(*v); 1860 y[16*(*idx)+12] += alpha13*(*v); 1861 y[16*(*idx)+13] += alpha14*(*v); 1862 y[16*(*idx)+14] += alpha15*(*v); 1863 y[16*(*idx)+15] += alpha16*(*v); 1864 idx++; v++; 1865 } 1866 } 1867 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 1868 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1869 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1870 PetscFunctionReturn(0); 1871 } 1872 1873 /*===================================================================================*/ 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 1876 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1877 { 1878 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1879 PetscErrorCode ierr; 1880 1881 PetscFunctionBegin; 1882 /* start the scatter */ 1883 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1884 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 1885 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1886 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 1887 PetscFunctionReturn(0); 1888 } 1889 1890 #undef __FUNCT__ 1891 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 1892 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 1893 { 1894 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1895 PetscErrorCode ierr; 1896 1897 PetscFunctionBegin; 1898 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1899 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 1900 ierr = VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1901 ierr = VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1902 PetscFunctionReturn(0); 1903 } 1904 1905 #undef __FUNCT__ 1906 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 1907 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1908 { 1909 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1910 PetscErrorCode ierr; 1911 1912 PetscFunctionBegin; 1913 /* start the scatter */ 1914 ierr = VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1915 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1916 ierr = VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);CHKERRQ(ierr); 1917 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 1923 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 1924 { 1925 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 1926 PetscErrorCode ierr; 1927 1928 PetscFunctionBegin; 1929 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 1930 ierr = VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1931 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 1932 ierr = VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);CHKERRQ(ierr); 1933 PetscFunctionReturn(0); 1934 } 1935 1936 #undef __FUNCT__ 1937 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 1938 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 1939 { 1940 /* This routine requires testing -- but it's getting better. */ 1941 PetscErrorCode ierr; 1942 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1943 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 1944 Mat P=pp->AIJ; 1945 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 1946 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 1947 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 1948 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof,cn; 1949 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 1950 MatScalar *ca; 1951 1952 PetscFunctionBegin; 1953 /* Start timer */ 1954 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 1955 1956 /* Get ij structure of P^T */ 1957 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 1958 1959 cn = pn*ppdof; 1960 /* Allocate ci array, arrays for fill computation and */ 1961 /* free space for accumulating nonzero column info */ 1962 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1963 ci[0] = 0; 1964 1965 /* Work arrays for rows of P^T*A */ 1966 ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 1967 ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1968 ptasparserow = ptadenserow + an; 1969 denserow = ptasparserow + an; 1970 sparserow = denserow + cn; 1971 1972 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 1973 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1974 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 1975 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 1976 current_space = free_space; 1977 1978 /* Determine symbolic info for each row of C: */ 1979 for (i=0;i<pn;i++) { 1980 ptnzi = pti[i+1] - pti[i]; 1981 ptJ = ptj + pti[i]; 1982 for (dof=0;dof<ppdof;dof++) { 1983 ptanzi = 0; 1984 /* Determine symbolic row of PtA: */ 1985 for (j=0;j<ptnzi;j++) { 1986 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 1987 arow = ptJ[j]*ppdof + dof; 1988 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 1989 anzj = ai[arow+1] - ai[arow]; 1990 ajj = aj + ai[arow]; 1991 for (k=0;k<anzj;k++) { 1992 if (!ptadenserow[ajj[k]]) { 1993 ptadenserow[ajj[k]] = -1; 1994 ptasparserow[ptanzi++] = ajj[k]; 1995 } 1996 } 1997 } 1998 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 1999 ptaj = ptasparserow; 2000 cnzi = 0; 2001 for (j=0;j<ptanzi;j++) { 2002 /* Get offset within block of P */ 2003 pshift = *ptaj%ppdof; 2004 /* Get block row of P */ 2005 prow = (*ptaj++)/ppdof; /* integer division */ 2006 /* P has same number of nonzeros per row as the compressed form */ 2007 pnzj = pi[prow+1] - pi[prow]; 2008 pjj = pj + pi[prow]; 2009 for (k=0;k<pnzj;k++) { 2010 /* Locations in C are shifted by the offset within the block */ 2011 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2012 if (!denserow[pjj[k]*ppdof+pshift]) { 2013 denserow[pjj[k]*ppdof+pshift] = -1; 2014 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2015 } 2016 } 2017 } 2018 2019 /* sort sparserow */ 2020 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 2021 2022 /* If free space is not available, make more free space */ 2023 /* Double the amount of total space in the list */ 2024 if (current_space->local_remaining<cnzi) { 2025 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 2026 } 2027 2028 /* Copy data into free space, and zero out denserows */ 2029 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 2030 current_space->array += cnzi; 2031 current_space->local_used += cnzi; 2032 current_space->local_remaining -= cnzi; 2033 2034 for (j=0;j<ptanzi;j++) { 2035 ptadenserow[ptasparserow[j]] = 0; 2036 } 2037 for (j=0;j<cnzi;j++) { 2038 denserow[sparserow[j]] = 0; 2039 } 2040 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2041 /* For now, we will recompute what is needed. */ 2042 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2043 } 2044 } 2045 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2046 /* Allocate space for cj, initialize cj, and */ 2047 /* destroy list of free space and other temporary array(s) */ 2048 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2049 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2050 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 2051 2052 /* Allocate space for ca */ 2053 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 2054 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 2055 2056 /* put together the new matrix */ 2057 ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2058 2059 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2060 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2061 c = (Mat_SeqAIJ *)((*C)->data); 2062 c->freedata = PETSC_TRUE; 2063 c->nonew = 0; 2064 2065 /* Clean up. */ 2066 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2067 2068 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2069 PetscFunctionReturn(0); 2070 } 2071 2072 #undef __FUNCT__ 2073 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 2074 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2075 { 2076 /* This routine requires testing -- first draft only */ 2077 PetscErrorCode ierr; 2078 PetscInt flops=0; 2079 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2080 Mat P=pp->AIJ; 2081 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 2082 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 2083 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 2084 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 2085 PetscInt *ci=c->i,*cj=c->j,*cjj; 2086 PetscInt am=A->M,cn=C->N,cm=C->M,ppdof=pp->dof; 2087 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 2088 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 2089 2090 PetscFunctionBegin; 2091 /* Allocate temporary array for storage of one row of A*P */ 2092 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 2093 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 2094 2095 apj = (PetscInt *)(apa + cn); 2096 apjdense = apj + cn; 2097 2098 /* Clear old values in C */ 2099 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 2100 2101 for (i=0;i<am;i++) { 2102 /* Form sparse row of A*P */ 2103 anzi = ai[i+1] - ai[i]; 2104 apnzj = 0; 2105 for (j=0;j<anzi;j++) { 2106 /* Get offset within block of P */ 2107 pshift = *aj%ppdof; 2108 /* Get block row of P */ 2109 prow = *aj++/ppdof; /* integer division */ 2110 pnzj = pi[prow+1] - pi[prow]; 2111 pjj = pj + pi[prow]; 2112 paj = pa + pi[prow]; 2113 for (k=0;k<pnzj;k++) { 2114 poffset = pjj[k]*ppdof+pshift; 2115 if (!apjdense[poffset]) { 2116 apjdense[poffset] = -1; 2117 apj[apnzj++] = poffset; 2118 } 2119 apa[poffset] += (*aa)*paj[k]; 2120 } 2121 flops += 2*pnzj; 2122 aa++; 2123 } 2124 2125 /* Sort the j index array for quick sparse axpy. */ 2126 /* Note: a array does not need sorting as it is in dense storage locations. */ 2127 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 2128 2129 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 2130 prow = i/ppdof; /* integer division */ 2131 pshift = i%ppdof; 2132 poffset = pi[prow]; 2133 pnzi = pi[prow+1] - poffset; 2134 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 2135 pJ = pj+poffset; 2136 pA = pa+poffset; 2137 for (j=0;j<pnzi;j++) { 2138 crow = (*pJ)*ppdof+pshift; 2139 cjj = cj + ci[crow]; 2140 caj = ca + ci[crow]; 2141 pJ++; 2142 /* Perform sparse axpy operation. Note cjj includes apj. */ 2143 for (k=0,nextap=0;nextap<apnzj;k++) { 2144 if (cjj[k]==apj[nextap]) { 2145 caj[k] += (*pA)*apa[apj[nextap++]]; 2146 } 2147 } 2148 flops += 2*apnzj; 2149 pA++; 2150 } 2151 2152 /* Zero the current row info for A*P */ 2153 for (j=0;j<apnzj;j++) { 2154 apa[apj[j]] = 0.; 2155 apjdense[apj[j]] = 0; 2156 } 2157 } 2158 2159 /* Assemble the final matrix and clean up */ 2160 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2161 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2162 ierr = PetscFree(apa);CHKERRQ(ierr); 2163 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 2164 2165 PetscFunctionReturn(0); 2166 } 2167 2168 EXTERN_C_BEGIN 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 2171 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2172 { 2173 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2174 Mat a = b->AIJ,B; 2175 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 2176 PetscErrorCode ierr; 2177 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 2178 PetscInt *cols; 2179 PetscScalar *vals; 2180 2181 PetscFunctionBegin; 2182 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 2183 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 2184 for (i=0; i<m; i++) { 2185 nmax = PetscMax(nmax,aij->ilen[i]); 2186 for (j=0; j<dof; j++) { 2187 ilen[dof*i+j] = aij->ilen[i]; 2188 } 2189 } 2190 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 2191 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2192 ierr = PetscFree(ilen);CHKERRQ(ierr); 2193 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 2194 ii = 0; 2195 for (i=0; i<m; i++) { 2196 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2197 for (j=0; j<dof; j++) { 2198 for (k=0; k<ncols; k++) { 2199 icols[k] = dof*cols[k]+j; 2200 } 2201 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2202 ii++; 2203 } 2204 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2205 } 2206 ierr = PetscFree(icols);CHKERRQ(ierr); 2207 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2208 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2209 2210 if (reuse == MAT_REUSE_MATRIX) { 2211 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2212 } else { 2213 *newmat = B; 2214 } 2215 PetscFunctionReturn(0); 2216 } 2217 EXTERN_C_END 2218 2219 #include "src/mat/impls/aij/mpi/mpiaij.h" 2220 2221 EXTERN_C_BEGIN 2222 #undef __FUNCT__ 2223 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 2224 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2225 { 2226 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 2227 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 2228 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 2229 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 2230 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 2231 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 2232 PetscInt dof = maij->dof,i,j,*dnz,*onz,nmax = 0,onmax = 0,*oicols,*icols,ncols,*cols,oncols,*ocols; 2233 PetscInt rstart,cstart,*garray,ii,k; 2234 PetscErrorCode ierr; 2235 PetscScalar *vals,*ovals; 2236 2237 PetscFunctionBegin; 2238 ierr = PetscMalloc2(A->m,PetscInt,&dnz,A->m,PetscInt,&onz);CHKERRQ(ierr); 2239 for (i=0; i<A->m/dof; i++) { 2240 nmax = PetscMax(nmax,AIJ->ilen[i]); 2241 onmax = PetscMax(onmax,OAIJ->ilen[i]); 2242 for (j=0; j<dof; j++) { 2243 dnz[dof*i+j] = AIJ->ilen[i]; 2244 onz[dof*i+j] = OAIJ->ilen[i]; 2245 } 2246 } 2247 ierr = MatCreateMPIAIJ(A->comm,A->m,A->n,A->M,A->N,0,dnz,0,onz,&B);CHKERRQ(ierr); 2248 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2249 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 2250 2251 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 2252 rstart = dof*mpiaij->rstart; 2253 cstart = dof*mpiaij->cstart; 2254 garray = mpiaij->garray; 2255 2256 ii = rstart; 2257 for (i=0; i<A->m/dof; i++) { 2258 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2259 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2260 for (j=0; j<dof; j++) { 2261 for (k=0; k<ncols; k++) { 2262 icols[k] = cstart + dof*cols[k]+j; 2263 } 2264 for (k=0; k<oncols; k++) { 2265 oicols[k] = dof*garray[ocols[k]]+j; 2266 } 2267 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2268 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 2269 ii++; 2270 } 2271 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2272 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2273 } 2274 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 2275 2276 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2277 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2278 2279 if (reuse == MAT_REUSE_MATRIX) { 2280 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2281 } else { 2282 *newmat = B; 2283 } 2284 PetscFunctionReturn(0); 2285 } 2286 EXTERN_C_END 2287 2288 2289 /* ---------------------------------------------------------------------------------- */ 2290 /*MC 2291 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 2292 operations for multicomponent problems. It interpolates each component the same 2293 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 2294 and MATMPIAIJ for distributed matrices. 2295 2296 Operations provided: 2297 + MatMult 2298 . MatMultTranspose 2299 . MatMultAdd 2300 . MatMultTransposeAdd 2301 - MatView 2302 2303 Level: advanced 2304 2305 M*/ 2306 #undef __FUNCT__ 2307 #define __FUNCT__ "MatCreateMAIJ" 2308 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 2309 { 2310 PetscErrorCode ierr; 2311 PetscMPIInt size; 2312 PetscInt n; 2313 Mat_MPIMAIJ *b; 2314 Mat B; 2315 2316 PetscFunctionBegin; 2317 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2318 2319 if (dof == 1) { 2320 *maij = A; 2321 } else { 2322 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 2323 ierr = MatSetSizes(B,dof*A->m,dof*A->n,dof*A->M,dof*A->N);CHKERRQ(ierr); 2324 B->assembled = PETSC_TRUE; 2325 2326 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2327 if (size == 1) { 2328 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 2329 B->ops->destroy = MatDestroy_SeqMAIJ; 2330 B->ops->view = MatView_SeqMAIJ; 2331 b = (Mat_MPIMAIJ*)B->data; 2332 b->dof = dof; 2333 b->AIJ = A; 2334 if (dof == 2) { 2335 B->ops->mult = MatMult_SeqMAIJ_2; 2336 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 2337 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 2338 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 2339 } else if (dof == 3) { 2340 B->ops->mult = MatMult_SeqMAIJ_3; 2341 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 2342 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 2343 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 2344 } else if (dof == 4) { 2345 B->ops->mult = MatMult_SeqMAIJ_4; 2346 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 2347 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 2348 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 2349 } else if (dof == 5) { 2350 B->ops->mult = MatMult_SeqMAIJ_5; 2351 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 2352 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 2353 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 2354 } else if (dof == 6) { 2355 B->ops->mult = MatMult_SeqMAIJ_6; 2356 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 2357 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 2358 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 2359 } else if (dof == 7) { 2360 B->ops->mult = MatMult_SeqMAIJ_7; 2361 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 2362 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 2363 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 2364 } else if (dof == 8) { 2365 B->ops->mult = MatMult_SeqMAIJ_8; 2366 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 2367 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 2368 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 2369 } else if (dof == 9) { 2370 B->ops->mult = MatMult_SeqMAIJ_9; 2371 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 2372 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 2373 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 2374 } else if (dof == 16) { 2375 B->ops->mult = MatMult_SeqMAIJ_16; 2376 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 2377 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 2378 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 2379 } else { 2380 SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 2381 } 2382 B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 2383 B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2384 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 2385 } else { 2386 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 2387 IS from,to; 2388 Vec gvec; 2389 PetscInt *garray,i; 2390 2391 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 2392 B->ops->destroy = MatDestroy_MPIMAIJ; 2393 B->ops->view = MatView_MPIMAIJ; 2394 b = (Mat_MPIMAIJ*)B->data; 2395 b->dof = dof; 2396 b->A = A; 2397 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 2398 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 2399 2400 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 2401 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 2402 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 2403 2404 /* create two temporary Index sets for build scatter gather */ 2405 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2406 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 2407 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 2408 ierr = PetscFree(garray);CHKERRQ(ierr); 2409 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 2410 2411 /* create temporary global vector to generate scatter context */ 2412 ierr = VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);CHKERRQ(ierr); 2413 ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 2414 2415 /* generate the scatter context */ 2416 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 2417 2418 ierr = ISDestroy(from);CHKERRQ(ierr); 2419 ierr = ISDestroy(to);CHKERRQ(ierr); 2420 ierr = VecDestroy(gvec);CHKERRQ(ierr); 2421 2422 B->ops->mult = MatMult_MPIMAIJ_dof; 2423 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 2424 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 2425 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 2426 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 2427 } 2428 *maij = B; 2429 ierr = MatView_Private(B);CHKERRQ(ierr); 2430 } 2431 PetscFunctionReturn(0); 2432 } 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445