1 static char help[] = "Test memory scalability of MatMatMult() for AIJ and DENSE matrices. \n\ 2 Modified from the code contributed by Ian Lin <iancclin@umich.edu> \n\n"; 3 4 /* 5 Example: 6 mpiexec -n <np> ./ex33 -mem_view -matmatmult_Bbn <Bbn> 7 */ 8 9 #include <petsc.h> 10 11 PetscErrorCode Print_memory(PetscLogDouble mem) 12 { 13 double max_mem,min_mem; 14 15 PetscFunctionBeginUser; 16 PetscCallMPI(MPI_Reduce(&mem, &max_mem, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD)); 17 PetscCallMPI(MPI_Reduce(&mem, &min_mem, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD)); 18 max_mem = max_mem / 1024.0 / 1024.0; 19 min_mem = min_mem / 1024.0 / 1024.0; 20 PetscCall(PetscPrintf(MPI_COMM_WORLD, " max and min memory across all processors %.4f Mb, %.4f Mb.\n", (double)max_mem,(double)min_mem)); 21 PetscFunctionReturn(0); 22 } 23 24 /* 25 Illustrate how to use MPI derived data types. 26 It would save memory significantly. See MatMPIDenseScatter() 27 */ 28 PetscErrorCode TestMPIDerivedDataType() 29 { 30 MPI_Datatype type1, type2,rtype1,rtype2; 31 PetscInt i,j; 32 PetscScalar buffer[24]; /* An array of 4 rows, 6 cols */ 33 MPI_Status status; 34 PetscMPIInt rank,size,disp[2]; 35 36 PetscFunctionBeginUser; 37 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 38 PetscCheck(size >= 2,PETSC_COMM_SELF,PETSC_ERR_WRONG_MPI_SIZE,"Must use at least 2 processors"); 39 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 40 41 if (rank == 0) { 42 /* proc[0] sends 2 rows to proc[1] */ 43 for (i=0; i<24; i++) buffer[i] = (PetscScalar)i; 44 45 disp[0] = 0; disp[1] = 2; 46 PetscCallMPI(MPI_Type_create_indexed_block(2, 1, (const PetscMPIInt *)disp, MPIU_SCALAR, &type1)); 47 /* one column has 4 entries */ 48 PetscCallMPI(MPI_Type_create_resized(type1,0,4*sizeof(PetscScalar),&type2)); 49 PetscCallMPI(MPI_Type_commit(&type2)); 50 PetscCallMPI(MPI_Send(buffer, 6, type2, 1, 123, MPI_COMM_WORLD)); 51 52 } else if (rank == 1) { 53 /* proc[1] receives 2 rows from proc[0], and put them into contiguous rows, starting at the row 1 (disp[0]) */ 54 PetscInt blen = 2; 55 for (i=0; i<24; i++) buffer[i] = 0.0; 56 57 disp[0] = 1; 58 PetscCallMPI(MPI_Type_create_indexed_block(1, blen, (const PetscMPIInt *)disp, MPIU_SCALAR, &rtype1)); 59 PetscCallMPI(MPI_Type_create_resized(rtype1, 0, 4*sizeof(PetscScalar), &rtype2)); 60 61 PetscCallMPI(MPI_Type_commit(&rtype2)); 62 PetscCallMPI(MPI_Recv(buffer, 6, rtype2, 0, 123, MPI_COMM_WORLD, &status)); 63 for (i=0; i<4; i++) { 64 for (j=0; j<6; j++) { 65 PetscCall(PetscPrintf(MPI_COMM_SELF," %g", (double)PetscRealPart(buffer[i+j*4]))); 66 } 67 PetscCall(PetscPrintf(MPI_COMM_SELF,"\n")); 68 } 69 } 70 71 if (rank == 0) { 72 PetscCallMPI(MPI_Type_free(&type1)); 73 PetscCallMPI(MPI_Type_free(&type2)); 74 } else if (rank == 1) { 75 PetscCallMPI(MPI_Type_free(&rtype1)); 76 PetscCallMPI(MPI_Type_free(&rtype2)); 77 } 78 PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD)); 79 PetscFunctionReturn(0); 80 } 81 82 int main(int argc, char **args) 83 { 84 PetscInt mA = 2700,nX = 80,nz = 40; 85 /* PetscInt mA=6,nX=5,nz=2; //small test */ 86 PetscLogDouble mem; 87 Mat A,X,Y; 88 PetscBool flg = PETSC_FALSE; 89 90 PetscFunctionBeginUser; 91 PetscCall(PetscInitialize(&argc,&args,(char*)0,help)); 92 PetscCall(PetscOptionsGetBool(NULL,NULL,"-test_mpiderivedtype",&flg,NULL)); 93 if (flg) { 94 PetscCall(TestMPIDerivedDataType()); 95 PetscCall(PetscFinalize()); 96 return 0; 97 } 98 99 PetscCall(PetscOptionsGetBool(NULL,NULL,"-mem_view",&flg,NULL)); 100 PetscCall(PetscMemoryGetCurrentUsage(&mem)); 101 if (flg) { 102 PetscCall(PetscPrintf(MPI_COMM_WORLD, "Before start,")); 103 PetscCall(Print_memory(mem)); 104 } 105 106 PetscCall(MatCreateAIJ(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,mA,mA,nz,PETSC_NULL,nz,PETSC_NULL,&A)); 107 PetscCall(MatSetRandom(A,PETSC_NULL)); 108 PetscCall(PetscMemoryGetCurrentUsage(&mem)); 109 if (flg) { 110 PetscCall(PetscPrintf(MPI_COMM_WORLD, "After creating A,")); 111 PetscCall(Print_memory(mem)); 112 } 113 114 PetscCall(MatCreateDense(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,mA,nX,PETSC_NULL,&X)); 115 PetscCall(MatSetRandom(X,PETSC_NULL)); 116 PetscCall(PetscMemoryGetCurrentUsage(&mem)); 117 if (flg) { 118 PetscCall(PetscPrintf(MPI_COMM_WORLD, "After creating X,")); 119 PetscCall(Print_memory(mem)); 120 } 121 122 PetscCall(MatMatMult(A,X,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Y)); 123 PetscCall(PetscMemoryGetCurrentUsage(&mem)); 124 if (flg) { 125 PetscCall(PetscPrintf(MPI_COMM_WORLD, "After MatMatMult,")); 126 PetscCall(Print_memory(mem)); 127 } 128 129 /* Test reuse */ 130 PetscCall(MatMatMult(A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y)); 131 PetscCall(PetscMemoryGetCurrentUsage(&mem)); 132 if (flg) { 133 PetscCall(PetscPrintf(MPI_COMM_WORLD, "After reuse MatMatMult,")); 134 PetscCall(Print_memory(mem)); 135 } 136 137 /* Check accuracy */ 138 PetscCall(MatMatMultEqual(A,X,Y,10,&flg)); 139 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMETYPE,"Error in MatMatMult()"); 140 141 PetscCall(MatDestroy(&A)); 142 PetscCall(MatDestroy(&X)); 143 PetscCall(MatDestroy(&Y)); 144 145 PetscCall(PetscFinalize()); 146 return 0; 147 } 148 149 /*TEST 150 151 test: 152 suffix: 1 153 nsize: 4 154 output_file: output/ex33.out 155 156 test: 157 suffix: 2 158 nsize: 8 159 output_file: output/ex33.out 160 161 test: 162 suffix: 3 163 nsize: 2 164 args: -test_mpiderivedtype 165 output_file: output/ex33_3.out 166 167 TEST*/ 168