1 static char help[] = "Test memory scalability of MatMatMult() for AIJ and DENSE matrices. \n\ 2 Modified from the code contributed by Ian Lin <iancclin@umich.edu> \n\n"; 3 4 /* 5 Example: 6 mpiexec -n <np> ./ex33 -mem_view -matmatmult_Bbn <Bbn> 7 */ 8 9 #include <petsc.h> 10 11 PetscErrorCode Print_memory(PetscLogDouble mem) { 12 double max_mem, min_mem; 13 14 PetscFunctionBeginUser; 15 PetscCallMPI(MPI_Reduce(&mem, &max_mem, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD)); 16 PetscCallMPI(MPI_Reduce(&mem, &min_mem, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD)); 17 max_mem = max_mem / 1024.0 / 1024.0; 18 min_mem = min_mem / 1024.0 / 1024.0; 19 PetscCall(PetscPrintf(MPI_COMM_WORLD, " max and min memory across all processors %.4f Mb, %.4f Mb.\n", (double)max_mem, (double)min_mem)); 20 PetscFunctionReturn(0); 21 } 22 23 /* 24 Illustrate how to use MPI derived data types. 25 It would save memory significantly. See MatMPIDenseScatter() 26 */ 27 PetscErrorCode TestMPIDerivedDataType() { 28 MPI_Datatype type1, type2, rtype1, rtype2; 29 PetscInt i, j; 30 PetscScalar buffer[24]; /* An array of 4 rows, 6 cols */ 31 MPI_Status status; 32 PetscMPIInt rank, size, disp[2]; 33 34 PetscFunctionBeginUser; 35 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 36 PetscCheck(size >= 2, PETSC_COMM_SELF, PETSC_ERR_WRONG_MPI_SIZE, "Must use at least 2 processors"); 37 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 38 39 if (rank == 0) { 40 /* proc[0] sends 2 rows to proc[1] */ 41 for (i = 0; i < 24; i++) buffer[i] = (PetscScalar)i; 42 43 disp[0] = 0; 44 disp[1] = 2; 45 PetscCallMPI(MPI_Type_create_indexed_block(2, 1, (const PetscMPIInt *)disp, MPIU_SCALAR, &type1)); 46 /* one column has 4 entries */ 47 PetscCallMPI(MPI_Type_create_resized(type1, 0, 4 * sizeof(PetscScalar), &type2)); 48 PetscCallMPI(MPI_Type_commit(&type2)); 49 PetscCallMPI(MPI_Send(buffer, 6, type2, 1, 123, MPI_COMM_WORLD)); 50 51 } else if (rank == 1) { 52 /* proc[1] receives 2 rows from proc[0], and put them into contiguous rows, starting at the row 1 (disp[0]) */ 53 PetscInt blen = 2; 54 for (i = 0; i < 24; i++) buffer[i] = 0.0; 55 56 disp[0] = 1; 57 PetscCallMPI(MPI_Type_create_indexed_block(1, blen, (const PetscMPIInt *)disp, MPIU_SCALAR, &rtype1)); 58 PetscCallMPI(MPI_Type_create_resized(rtype1, 0, 4 * sizeof(PetscScalar), &rtype2)); 59 60 PetscCallMPI(MPI_Type_commit(&rtype2)); 61 PetscCallMPI(MPI_Recv(buffer, 6, rtype2, 0, 123, MPI_COMM_WORLD, &status)); 62 for (i = 0; i < 4; i++) { 63 for (j = 0; j < 6; j++) { PetscCall(PetscPrintf(MPI_COMM_SELF, " %g", (double)PetscRealPart(buffer[i + j * 4]))); } 64 PetscCall(PetscPrintf(MPI_COMM_SELF, "\n")); 65 } 66 } 67 68 if (rank == 0) { 69 PetscCallMPI(MPI_Type_free(&type1)); 70 PetscCallMPI(MPI_Type_free(&type2)); 71 } else if (rank == 1) { 72 PetscCallMPI(MPI_Type_free(&rtype1)); 73 PetscCallMPI(MPI_Type_free(&rtype2)); 74 } 75 PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD)); 76 PetscFunctionReturn(0); 77 } 78 79 int main(int argc, char **args) { 80 PetscInt mA = 2700, nX = 80, nz = 40; 81 /* PetscInt mA=6,nX=5,nz=2; //small test */ 82 PetscLogDouble mem; 83 Mat A, X, Y; 84 PetscBool flg = PETSC_FALSE; 85 86 PetscFunctionBeginUser; 87 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 88 PetscCall(PetscOptionsGetBool(NULL, NULL, "-test_mpiderivedtype", &flg, NULL)); 89 if (flg) { 90 PetscCall(TestMPIDerivedDataType()); 91 PetscCall(PetscFinalize()); 92 return 0; 93 } 94 95 PetscCall(PetscOptionsGetBool(NULL, NULL, "-mem_view", &flg, NULL)); 96 PetscCall(PetscMemoryGetCurrentUsage(&mem)); 97 if (flg) { 98 PetscCall(PetscPrintf(MPI_COMM_WORLD, "Before start,")); 99 PetscCall(Print_memory(mem)); 100 } 101 102 PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, mA, mA, nz, PETSC_NULL, nz, PETSC_NULL, &A)); 103 PetscCall(MatSetRandom(A, PETSC_NULL)); 104 PetscCall(PetscMemoryGetCurrentUsage(&mem)); 105 if (flg) { 106 PetscCall(PetscPrintf(MPI_COMM_WORLD, "After creating A,")); 107 PetscCall(Print_memory(mem)); 108 } 109 110 PetscCall(MatCreateDense(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, mA, nX, PETSC_NULL, &X)); 111 PetscCall(MatSetRandom(X, PETSC_NULL)); 112 PetscCall(PetscMemoryGetCurrentUsage(&mem)); 113 if (flg) { 114 PetscCall(PetscPrintf(MPI_COMM_WORLD, "After creating X,")); 115 PetscCall(Print_memory(mem)); 116 } 117 118 PetscCall(MatMatMult(A, X, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Y)); 119 PetscCall(PetscMemoryGetCurrentUsage(&mem)); 120 if (flg) { 121 PetscCall(PetscPrintf(MPI_COMM_WORLD, "After MatMatMult,")); 122 PetscCall(Print_memory(mem)); 123 } 124 125 /* Test reuse */ 126 PetscCall(MatMatMult(A, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y)); 127 PetscCall(PetscMemoryGetCurrentUsage(&mem)); 128 if (flg) { 129 PetscCall(PetscPrintf(MPI_COMM_WORLD, "After reuse MatMatMult,")); 130 PetscCall(Print_memory(mem)); 131 } 132 133 /* Check accuracy */ 134 PetscCall(MatMatMultEqual(A, X, Y, 10, &flg)); 135 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMETYPE, "Error in MatMatMult()"); 136 137 PetscCall(MatDestroy(&A)); 138 PetscCall(MatDestroy(&X)); 139 PetscCall(MatDestroy(&Y)); 140 141 PetscCall(PetscFinalize()); 142 return 0; 143 } 144 145 /*TEST 146 147 test: 148 suffix: 1 149 nsize: 4 150 output_file: output/ex33.out 151 152 test: 153 suffix: 2 154 nsize: 8 155 output_file: output/ex33.out 156 157 test: 158 suffix: 3 159 nsize: 2 160 args: -test_mpiderivedtype 161 output_file: output/ex33_3.out 162 163 TEST*/ 164