xref: /petsc/src/mat/tests/ex33.c (revision 7d5fd1e4d9337468ad3f05b65b7facdcd2dfd2a4)
1 static char help[] = "Test memory scalability of MatMatMult() for AIJ and DENSE matrices. \n\
2 Modified from the code contributed by Ian Lin <iancclin@umich.edu> \n\n";
3 
4 /*
5 Example:
6   mpiexec -n <np> ./ex33 -mem_view -matmatmult_Bbn <Bbn>
7 */
8 
9 #include <petsc.h>
10 
11 PetscErrorCode Print_memory(PetscLogDouble mem)
12 {
13   PetscErrorCode ierr;
14   double         max_mem,min_mem;
15 
16   PetscFunctionBeginUser;
17   ierr = MPI_Reduce(&mem, &max_mem, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);CHKERRMPI(ierr);
18   ierr = MPI_Reduce(&mem, &min_mem, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);CHKERRMPI(ierr);
19   max_mem = max_mem / 1024.0 / 1024.0;
20   min_mem = min_mem / 1024.0 / 1024.0;
21   ierr = PetscPrintf(MPI_COMM_WORLD, " max and min memory across all processors %.4f Mb, %.4f Mb.\n", (double)max_mem,(double)min_mem);CHKERRQ(ierr);
22   PetscFunctionReturn(0);
23 }
24 
25 /*
26    Illustrate how to use MPI derived data types.
27    It would save memory significantly. See MatMPIDenseScatter()
28 */
29 PetscErrorCode TestMPIDerivedDataType()
30 {
31   PetscErrorCode    ierr;
32   MPI_Datatype      type1, type2,rtype1,rtype2;
33   PetscInt          i,j;
34   PetscScalar       buffer[24]; /* An array of 4 rows, 6 cols */
35   MPI_Status        status;
36   PetscMPIInt       rank,size,disp[2];
37 
38   PetscFunctionBeginUser;
39   ierr = MPI_Comm_size(MPI_COMM_WORLD, &size);CHKERRMPI(ierr);
40   if (size < 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Must use at least 2 processors");
41   ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank);CHKERRMPI(ierr);
42 
43   if (rank == 0) {
44     /* proc[0] sends 2 rows to proc[1] */
45     for (i=0; i<24; i++) buffer[i] = (PetscScalar)i;
46 
47     disp[0] = 0;  disp[1] = 2;
48     ierr = MPI_Type_create_indexed_block(2, 1, (const PetscMPIInt *)disp, MPIU_SCALAR, &type1);CHKERRMPI(ierr);
49     /* one column has 4 entries */
50     ierr = MPI_Type_create_resized(type1,0,4*sizeof(PetscScalar),&type2);CHKERRMPI(ierr);
51     ierr = MPI_Type_commit(&type2);CHKERRMPI(ierr);
52     ierr = MPI_Send(buffer, 6, type2, 1, 123, MPI_COMM_WORLD);CHKERRMPI(ierr);
53 
54   } else if (rank == 1) {
55     /* proc[1] receives 2 rows from proc[0], and put them into contiguous rows, starting at the row 1 (disp[0]) */
56     PetscInt blen = 2;
57     for (i=0; i<24; i++) buffer[i] = 0.0;
58 
59     disp[0] = 1;
60     ierr = MPI_Type_create_indexed_block(1, blen, (const PetscMPIInt *)disp, MPIU_SCALAR, &rtype1);CHKERRMPI(ierr);
61     ierr = MPI_Type_create_resized(rtype1, 0, 4*sizeof(PetscScalar), &rtype2);CHKERRMPI(ierr);
62 
63     ierr = MPI_Type_commit(&rtype2);CHKERRMPI(ierr);
64     ierr = MPI_Recv(buffer, 6, rtype2, 0, 123, MPI_COMM_WORLD, &status);CHKERRMPI(ierr);
65     for (i=0; i<4; i++) {
66       for (j=0; j<6; j++) {
67         ierr = PetscPrintf(MPI_COMM_SELF,"  %g", (double)PetscRealPart(buffer[i+j*4]));CHKERRQ(ierr);
68       }
69       ierr = PetscPrintf(MPI_COMM_SELF,"\n");CHKERRQ(ierr);
70     }
71   }
72 
73   if (rank == 0) {
74     ierr = MPI_Type_free(&type1);CHKERRMPI(ierr);
75     ierr = MPI_Type_free(&type2);CHKERRMPI(ierr);
76   } else if (rank == 1) {
77     ierr = MPI_Type_free(&rtype1);CHKERRMPI(ierr);
78     ierr = MPI_Type_free(&rtype2);CHKERRMPI(ierr);
79   }
80   ierr = MPI_Barrier(MPI_COMM_WORLD);CHKERRMPI(ierr);
81   PetscFunctionReturn(0);
82 }
83 
84 int main(int argc, char **args)
85 {
86   PetscInt          mA = 2700,nX = 80,nz = 40;
87   /* PetscInt        mA=6,nX=5,nz=2; //small test */
88   PetscLogDouble    mem;
89   Mat               A,X,Y;
90   PetscErrorCode    ierr;
91   PetscBool         flg = PETSC_FALSE;
92 
93   ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
94   ierr = PetscOptionsGetBool(NULL,NULL,"-test_mpiderivedtype",&flg,NULL);CHKERRQ(ierr);
95   if (flg) {
96     ierr = TestMPIDerivedDataType();CHKERRQ(ierr);
97     ierr = PetscFinalize();
98     return ierr;
99   }
100 
101   ierr = PetscOptionsGetBool(NULL,NULL,"-mem_view",&flg,NULL);CHKERRQ(ierr);
102   ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr);
103   if (flg) {
104     ierr = PetscPrintf(MPI_COMM_WORLD, "Before start,");CHKERRQ(ierr);
105     ierr = Print_memory(mem);CHKERRQ(ierr);
106   }
107 
108   ierr = MatCreateAIJ(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,mA,mA,nz,PETSC_NULL,nz,PETSC_NULL,&A);CHKERRQ(ierr);
109   ierr = MatSetRandom(A,PETSC_NULL);CHKERRQ(ierr);
110   ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr);
111   if (flg) {
112     ierr = PetscPrintf(MPI_COMM_WORLD, "After creating A,");CHKERRQ(ierr);
113     ierr = Print_memory(mem);CHKERRQ(ierr);
114   }
115 
116   ierr = MatCreateDense(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,mA,nX,PETSC_NULL,&X);CHKERRQ(ierr);
117   ierr = MatSetRandom(X,PETSC_NULL);CHKERRQ(ierr);
118   ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr);
119   if (flg) {
120     ierr = PetscPrintf(MPI_COMM_WORLD, "After creating X,");CHKERRQ(ierr);
121     ierr = Print_memory(mem);CHKERRQ(ierr);
122   }
123 
124   ierr = MatMatMult(A,X,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr);
125   ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr);
126   if (flg) {
127     ierr = PetscPrintf(MPI_COMM_WORLD, "After MatMatMult,");CHKERRQ(ierr);
128     ierr = Print_memory(mem);CHKERRQ(ierr);
129   }
130 
131   /* Test reuse */
132   ierr = MatMatMult(A,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr);
133   ierr = PetscMemoryGetCurrentUsage(&mem);CHKERRQ(ierr);
134   if (flg) {
135     ierr = PetscPrintf(MPI_COMM_WORLD, "After reuse MatMatMult,");CHKERRQ(ierr);
136     ierr = Print_memory(mem);CHKERRQ(ierr);
137   }
138 
139   /* Check accuracy */
140   ierr = MatMatMultEqual(A,X,Y,10,&flg);CHKERRQ(ierr);
141   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMETYPE,"Error in MatMatMult()");
142 
143   ierr = MatDestroy(&A);CHKERRQ(ierr);
144   ierr = MatDestroy(&X);CHKERRQ(ierr);
145   ierr = MatDestroy(&Y);CHKERRQ(ierr);
146 
147   ierr = PetscFinalize();
148   return ierr;
149 }
150 
151 /*TEST
152 
153    test:
154       suffix: 1
155       nsize: 4
156       output_file: output/ex33.out
157 
158    test:
159       suffix: 2
160       nsize: 8
161       output_file: output/ex33.out
162 
163    test:
164       suffix: 3
165       nsize: 2
166       args: -test_mpiderivedtype
167       output_file: output/ex33_3.out
168 
169 TEST*/
170