1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed! 28 29 #undef __FUNCT__ 30 #define __FUNCT__ "MatMult_SeqFFTW" 31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32 { 33 PetscErrorCode ierr; 34 Mat_FFT *fft = (Mat_FFT*)A->data; 35 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36 PetscScalar *x_array,*y_array; 37 PetscInt ndim=fft->ndim,*dim=fft->dim; 38 39 PetscFunctionBegin; 40 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 41 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 42 if (!fftw->p_forward){ /* create a plan, then excute it */ 43 switch (ndim){ 44 case 1: 45 #if defined(PETSC_USE_COMPLEX) 46 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 47 #else 48 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 49 #endif 50 break; 51 case 2: 52 #if defined(PETSC_USE_COMPLEX) 53 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 54 #else 55 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 56 #endif 57 break; 58 case 3: 59 #if defined(PETSC_USE_COMPLEX) 60 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 61 #else 62 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 63 #endif 64 break; 65 default: 66 #if defined(PETSC_USE_COMPLEX) 67 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 68 #else 69 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 70 #endif 71 break; 72 } 73 fftw->finarray = x_array; 74 fftw->foutarray = y_array; 75 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 76 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 77 fftw_execute(fftw->p_forward); 78 } else { /* use existing plan */ 79 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 80 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 81 } else { 82 fftw_execute(fftw->p_forward); 83 } 84 } 85 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 86 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 92 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 93 { 94 PetscErrorCode ierr; 95 Mat_FFT *fft = (Mat_FFT*)A->data; 96 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 97 PetscScalar *x_array,*y_array; 98 PetscInt ndim=fft->ndim,*dim=fft->dim; 99 100 PetscFunctionBegin; 101 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 102 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 103 if (!fftw->p_backward){ /* create a plan, then excute it */ 104 switch (ndim){ 105 case 1: 106 #if defined(PETSC_USE_COMPLEX) 107 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 108 #else 109 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 110 #endif 111 break; 112 case 2: 113 #if defined(PETSC_USE_COMPLEX) 114 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 115 #else 116 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 117 #endif 118 break; 119 case 3: 120 #if defined(PETSC_USE_COMPLEX) 121 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 122 #else 123 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 124 #endif 125 break; 126 default: 127 #if defined(PETSC_USE_COMPLEX) 128 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 129 #else 130 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 131 #endif 132 break; 133 } 134 fftw->binarray = x_array; 135 fftw->boutarray = y_array; 136 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 137 } else { /* use existing plan */ 138 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 139 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 140 } else { 141 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 142 } 143 } 144 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 145 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "MatMult_MPIFFTW" 151 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 152 { 153 PetscErrorCode ierr; 154 Mat_FFT *fft = (Mat_FFT*)A->data; 155 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 156 PetscScalar *x_array,*y_array; 157 PetscInt ndim=fft->ndim,*dim=fft->dim; 158 MPI_Comm comm=((PetscObject)A)->comm; 159 160 PetscFunctionBegin; 161 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 162 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 163 if (!fftw->p_forward){ /* create a plan, then excute it */ 164 switch (ndim){ 165 case 1: 166 #if defined(PETSC_USE_COMPLEX) 167 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 168 #else 169 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 170 #endif 171 break; 172 case 2: 173 #if defined(PETSC_USE_COMPLEX) 174 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 175 #else 176 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 177 #endif 178 break; 179 case 3: 180 #if defined(PETSC_USE_COMPLEX) 181 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 182 #else 183 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 184 #endif 185 break; 186 default: 187 #if defined(PETSC_USE_COMPLEX) 188 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 189 #else 190 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 191 #endif 192 break; 193 } 194 fftw->finarray = x_array; 195 fftw->foutarray = y_array; 196 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 197 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 198 fftw_execute(fftw->p_forward); 199 } else { /* use existing plan */ 200 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 201 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 202 } else { 203 fftw_execute(fftw->p_forward); 204 } 205 } 206 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 207 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 213 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 214 { 215 PetscErrorCode ierr; 216 Mat_FFT *fft = (Mat_FFT*)A->data; 217 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 218 PetscScalar *x_array,*y_array; 219 PetscInt ndim=fft->ndim,*dim=fft->dim; 220 MPI_Comm comm=((PetscObject)A)->comm; 221 222 PetscFunctionBegin; 223 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 224 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 225 if (!fftw->p_backward){ /* create a plan, then excute it */ 226 switch (ndim){ 227 case 1: 228 #if defined(PETSC_USE_COMPLEX) 229 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 230 #else 231 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 232 #endif 233 break; 234 case 2: 235 #if defined(PETSC_USE_COMPLEX) 236 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 237 #else 238 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 239 #endif 240 break; 241 case 3: 242 #if defined(PETSC_USE_COMPLEX) 243 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 244 #else 245 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 246 #endif 247 break; 248 default: 249 #if defined(PETSC_USE_COMPLEX) 250 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 251 #else 252 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 253 #endif 254 break; 255 } 256 fftw->binarray = x_array; 257 fftw->boutarray = y_array; 258 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 259 } else { /* use existing plan */ 260 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 261 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 262 } else { 263 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 264 } 265 } 266 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 267 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatDestroy_FFTW" 273 PetscErrorCode MatDestroy_FFTW(Mat A) 274 { 275 Mat_FFT *fft = (Mat_FFT*)A->data; 276 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 fftw_destroy_plan(fftw->p_forward); 281 fftw_destroy_plan(fftw->p_backward); 282 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 283 ierr = PetscFree(fft->data);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 288 #undef __FUNCT__ 289 #define __FUNCT__ "VecDestroy_MPIFFTW" 290 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 291 { 292 PetscErrorCode ierr; 293 PetscScalar *array; 294 295 PetscFunctionBegin; 296 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 297 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 298 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 299 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 304 #undef __FUNCT__ 305 #define __FUNCT__ "MatGetVecsFFTW" 306 /*@ 307 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 308 parallel layout determined by FFTW 309 310 Collective on Mat 311 312 Input Parameter: 313 . mat - the matrix 314 315 Output Parameter: 316 + fin - (optional) input vector of forward FFTW 317 - fout - (optional) output vector of forward FFTW 318 319 Level: advanced 320 321 .seealso: MatCreateFFTW() 322 @*/ 323 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 324 { 325 PetscErrorCode ierr; 326 PetscFunctionBegin; 327 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 EXTERN_C_BEGIN 332 #undef __FUNCT__ 333 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 334 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 335 { 336 PetscErrorCode ierr; 337 PetscMPIInt size,rank; 338 MPI_Comm comm=((PetscObject)A)->comm; 339 Mat_FFT *fft = (Mat_FFT*)A->data; 340 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 341 PetscInt N=fft->N; 342 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 343 344 PetscFunctionBegin; 345 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 346 PetscValidType(A,1); 347 348 ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 349 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 350 if (size == 1){ /* sequential case */ 351 #if defined(PETSC_USE_COMPLEX) 352 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 353 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 354 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 355 #else 356 // if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 357 // if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 358 // if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);} 359 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 360 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 361 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 362 #endif 363 } else { 364 ptrdiff_t alloc_local,local_n0,local_0_start; 365 ptrdiff_t local_n1; 366 fftw_complex *data_fout; 367 ptrdiff_t local_1_start; 368 #if defined(PETSC_USE_COMPLEX) 369 fftw_complex *data_fin,*data_bout; 370 #else 371 double *data_finr,*data_boutr; 372 PetscInt n1,N1,vsize; 373 ptrdiff_t temp; 374 #endif 375 376 switch (ndim){ 377 case 1: 378 #if !defined(PETSC_USE_COMPLEX) 379 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 380 #else 381 //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 382 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 383 if (fin) { 384 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 385 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 386 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 387 } 388 if (fout) { 389 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 390 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 391 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 392 } 393 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 394 if (bout) { 395 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 396 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 397 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 398 } 399 break; 400 #endif 401 case 2: 402 #if !defined(PETSC_USE_COMPLEX) 403 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 404 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 405 if (fin) { 406 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 407 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 408 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 409 } 410 if (bout) { 411 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 412 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 413 ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 414 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 415 } 416 //n1 = 2*local_n1*dim[0]; 417 if (fout) { 418 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 419 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 420 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 421 } 422 #else 423 /* Get local size */ 424 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 425 if (fin) { 426 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 427 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 428 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 429 } 430 if (fout) { 431 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 432 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 433 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 434 } 435 if (bout) { 436 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 437 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 438 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 439 } 440 #endif 441 break; 442 case 3: 443 #if !defined(PETSC_USE_COMPLEX) 444 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 445 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 446 if (fin) { 447 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 448 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 449 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 450 } 451 if (bout) { 452 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 453 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 454 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 455 } 456 //n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 457 if (fout) { 458 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 459 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 460 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 461 } 462 #else 463 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 464 if (fin) { 465 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 466 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 467 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 468 } 469 if (fout) { 470 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 471 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 472 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 473 } 474 if (bout) { 475 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 476 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 477 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 478 } 479 #endif 480 break; 481 default: 482 #if !defined(PETSC_USE_COMPLEX) 483 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 484 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 485 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 486 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 487 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 488 489 if (fin) { 490 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 491 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 492 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 493 } 494 if (bout) { 495 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 496 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 497 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 498 } 499 //temp = fftw->partial_dim; 500 //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 501 //n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 502 if (fout) { 503 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 504 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 505 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 506 } 507 #else 508 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 509 if (fin) { 510 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 511 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 512 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 513 } 514 if (fout) { 515 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 516 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 517 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 518 } 519 if (bout) { 520 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 521 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 522 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 523 } 524 #endif 525 break; 526 } 527 } 528 PetscFunctionReturn(0); 529 } 530 EXTERN_C_END 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatGetVecs_FFTW" 534 /* 535 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 536 parallel layout determined by FFTW 537 538 Collective on Mat 539 540 Input Parameter: 541 . mat - the matrix 542 543 Output Parameter: 544 + fin - (optional) input vector of forward FFTW 545 - fout - (optional) output vector of forward FFTW 546 547 Level: advanced 548 549 .seealso: MatCreateFFTW() 550 */ 551 PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 552 { 553 PetscErrorCode ierr; 554 PetscMPIInt size,rank; 555 MPI_Comm comm=((PetscObject)A)->comm; 556 Mat_FFT *fft = (Mat_FFT*)A->data; 557 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 558 PetscInt N=fft->N; 559 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 560 561 PetscFunctionBegin; 562 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 563 PetscValidType(A,1); 564 565 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 566 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 567 if (size == 1){ /* sequential case */ 568 #if defined(PETSC_USE_COMPLEX) 569 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 570 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 571 #else 572 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 573 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 574 #endif 575 } else { /* mpi case */ 576 ptrdiff_t alloc_local,local_n0,local_0_start; 577 ptrdiff_t local_n1,local_1_end; 578 fftw_complex *data_fin,*data_fout; 579 #if !defined(PETSC_USE_COMPLEX) 580 double *data_finr ; 581 ptrdiff_t local_1_start,temp; 582 PetscInt vsize,n1,N1; 583 #endif 584 585 // PetscInt ctr; 586 // ptrdiff_t ndim1,*pdim; 587 // ndim1=(ptrdiff_t) ndim; 588 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 589 590 // for(ctr=0;ctr<ndim;ctr++) 591 // {k 592 // pdim[ctr] = dim[ctr]; 593 // } 594 595 596 597 switch (ndim){ 598 case 1: 599 /* Get local size */ 600 /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 601 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 602 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 603 printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1); 604 if (fin) { 605 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 606 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 607 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 608 } 609 if (fout) { 610 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 611 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 612 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 613 } 614 break; 615 case 2: 616 #if !defined(PETSC_USE_COMPLEX) 617 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 618 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 619 if (fin) { 620 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 621 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 622 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 623 //printf("The code comes here with vector size %d\n",vsize); 624 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 625 } 626 n1 = 2*local_n1*(dim[0]); 627 //n1 = 2*local_n1*dim[1]; 628 printf("The values are %ld\n",local_n1); 629 if (fout) { 630 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 631 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 632 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 633 } 634 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 635 636 #else 637 /* Get local size */ 638 //printf("Hope this does not come here"); 639 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 640 if (fin) { 641 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 642 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 643 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 644 } 645 if (fout) { 646 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 647 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 648 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 649 } 650 //printf("Hope this does not come here"); 651 #endif 652 break; 653 case 3: 654 /* Get local size */ 655 #if !defined(PETSC_USE_COMPLEX) 656 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 657 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 658 if (fin) { 659 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 660 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 661 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 662 //printf("The code comes here with vector size %d\n",vsize); 663 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 664 } 665 printf("The value is %ld",local_n1); 666 n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 667 if (fout) { 668 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 669 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 670 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 671 } 672 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 673 674 675 #else 676 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 677 // printf("The quantity n is %d",n); 678 if (fin) { 679 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 680 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 681 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 682 } 683 if (fout) { 684 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 685 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 686 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 687 } 688 #endif 689 break; 690 default: 691 /* Get local size */ 692 #if !defined(PETSC_USE_COMPLEX) 693 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 694 printf("The value of temp is %ld\n",temp); 695 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 696 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 697 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 698 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 699 if (fin) { 700 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 701 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 702 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 703 //printf("The code comes here with vector size %d\n",vsize); 704 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 705 } 706 printf("The value is %ld. Global length is %d \n",local_n1,N1); 707 temp = fftw->partial_dim; 708 fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 709 710 n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 711 if (fout) { 712 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 713 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 714 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 715 } 716 717 #else 718 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 719 // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 720 // printf("The value of alloc local is %d",alloc_local); 721 // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 722 // for(i=0;i<ndim;i++) 723 // { 724 // pdim[i]=dim[i];printf("%d",pdim[i]); 725 // } 726 // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 727 // printf("The quantity n is %d",n); 728 if (fin) { 729 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 730 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 731 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 732 } 733 if (fout) { 734 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 735 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 736 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 737 } 738 #endif 739 break; 740 } 741 } 742 // if (fin){ 743 // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 744 // } 745 // if (fout){ 746 // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 747 // } 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "InputTransformFFT" 753 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 754 { 755 PetscErrorCode ierr; 756 PetscFunctionBegin; 757 ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 758 PetscFunctionReturn(0); 759 } 760 761 /* 762 InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real 763 parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst 764 changing dimension. 765 Input A, x, y 766 A - FFTW matrix 767 x - user data 768 Options Database Keys: 769 + -mat_fftw_plannerflags - set FFTW planner flags 770 771 Level: intermediate 772 773 */ 774 775 EXTERN_C_BEGIN 776 #undef __FUNCT__ 777 #define __FUNCT__ "InputTransformFFT_FTTW" 778 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 779 { 780 PetscErrorCode ierr; 781 MPI_Comm comm=((PetscObject)A)->comm; 782 Mat_FFT *fft = (Mat_FFT*)A->data; 783 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 784 PetscInt N=fft->N; 785 PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 786 PetscInt low; 787 PetscInt rank,size,vsize,vsize1; 788 ptrdiff_t alloc_local,local_n0,local_0_start; 789 ptrdiff_t local_n1,local_1_start; 790 VecScatter vecscat; 791 IS list1,list2; 792 #if !defined(PETSC_USE_COMPLEX) 793 PetscInt i,j,k,partial_dim; 794 PetscInt *indx1, *indx2, tempindx, tempindx1; 795 PetscInt N1, n1 ,NM; 796 ptrdiff_t temp; 797 #endif 798 799 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 800 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 801 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 802 803 if (size==1) 804 { 805 /* switch (ndim){ 806 case 1: 807 ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 808 for (i=0;i<dim[0];i++) 809 { 810 indx1[i] = i; 811 } 812 ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 813 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 814 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 815 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 816 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 817 ierr = ISDestroy(&list1);CHKERRQ(ierr); 818 ierr = PetscFree(indx1);CHKERRQ(ierr); 819 break; 820 821 case 2: 822 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 823 for (i=0;i<dim[0];i++){ 824 for (j=0;j<dim[1];j++){ 825 indx1[i*dim[1]+j] = i*dim[1] + j; 826 } 827 } 828 ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 829 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 830 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 831 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 832 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 833 ierr = ISDestroy(&list1);CHKERRQ(ierr); 834 ierr = PetscFree(indx1);CHKERRQ(ierr); 835 break; 836 case 3: 837 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 838 for (i=0;i<dim[0];i++){ 839 for (j=0;j<dim[1];j++){ 840 for (k=0;k<dim[2];k++){ 841 indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 842 } 843 } 844 } 845 ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 846 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 847 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 848 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 849 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 850 ierr = ISDestroy(&list1);CHKERRQ(ierr); 851 ierr = PetscFree(indx1);CHKERRQ(ierr); 852 break; 853 default: 854 */ 855 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 856 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 857 printf("The values of sizes are %d and %d",vsize,vsize1); 858 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 859 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 860 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 861 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 863 ierr = ISDestroy(&list1);CHKERRQ(ierr); 864 // break; 865 // } 866 } 867 868 else{ 869 870 switch (ndim){ 871 case 1: 872 #if defined(PETSC_USE_COMPLEX) 873 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 874 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 875 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 876 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 877 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 880 ierr = ISDestroy(&list1);CHKERRQ(ierr); 881 ierr = ISDestroy(&list2);CHKERRQ(ierr); 882 #else 883 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 884 #endif 885 break; 886 case 2: 887 #if defined(PETSC_USE_COMPLEX) 888 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 889 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 890 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 891 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 892 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 895 ierr = ISDestroy(&list1);CHKERRQ(ierr); 896 ierr = ISDestroy(&list2);CHKERRQ(ierr); 897 #else 898 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 899 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 900 //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 901 //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 902 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 903 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 904 905 if (dim[1]%2==0) 906 NM = dim[1]+2; 907 else 908 NM = dim[1]+1; 909 910 for (i=0;i<local_n0;i++){ 911 for (j=0;j<dim[1];j++){ 912 tempindx = i*dim[1] + j; 913 tempindx1 = i*NM + j; 914 indx1[tempindx]=local_0_start*dim[1]+tempindx; 915 indx2[tempindx]=low+tempindx1; 916 } 917 } 918 919 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 920 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 921 922 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 923 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 924 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 925 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 926 ierr = ISDestroy(&list1);CHKERRQ(ierr); 927 ierr = ISDestroy(&list2);CHKERRQ(ierr); 928 ierr = PetscFree(indx1);CHKERRQ(ierr); 929 ierr = PetscFree(indx2);CHKERRQ(ierr); 930 #endif 931 break; 932 933 case 3: 934 #if defined(PETSC_USE_COMPLEX) 935 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 936 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 937 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 938 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 939 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 940 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 941 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 942 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 944 ierr = ISDestroy(&list1);CHKERRQ(ierr); 945 ierr = ISDestroy(&list2);CHKERRQ(ierr); 946 #else 947 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 948 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 949 950 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 951 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 952 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 953 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 954 955 if (dim[2]%2==0) 956 NM = dim[2]+2; 957 else 958 NM = dim[2]+1; 959 960 for (i=0;i<local_n0;i++){ 961 for (j=0;j<dim[1];j++){ 962 for (k=0;k<dim[2];k++){ 963 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 964 tempindx1 = i*dim[1]*NM + j*NM + k; 965 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 966 indx2[tempindx]=low+tempindx1; 967 } 968 } 969 } 970 971 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 972 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 973 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 974 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 977 ierr = ISDestroy(&list1);CHKERRQ(ierr); 978 ierr = ISDestroy(&list2);CHKERRQ(ierr); 979 ierr = PetscFree(indx1);CHKERRQ(ierr); 980 ierr = PetscFree(indx2);CHKERRQ(ierr); 981 #endif 982 break; 983 984 default: 985 #if defined(PETSC_USE_COMPLEX) 986 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 987 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 988 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 989 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 990 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 993 ierr = ISDestroy(&list1);CHKERRQ(ierr); 994 ierr = ISDestroy(&list2);CHKERRQ(ierr); 995 #else 996 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 997 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 998 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 999 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1000 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1001 1002 partial_dim = fftw->partial_dim; 1003 1004 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1005 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1006 1007 if (dim[ndim-1]%2==0) 1008 NM = 2; 1009 else 1010 NM = 1; 1011 1012 j = low; 1013 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1014 { 1015 indx1[i] = local_0_start*partial_dim + i; 1016 indx2[i] = j; 1017 if (k%dim[ndim-1]==0) 1018 { j+=NM;} 1019 j++; 1020 } 1021 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1022 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1023 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1024 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1025 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1027 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1028 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1029 ierr = PetscFree(indx1);CHKERRQ(ierr); 1030 ierr = PetscFree(indx2);CHKERRQ(ierr); 1031 #endif 1032 break; 1033 } 1034 } 1035 1036 return 0; 1037 } 1038 EXTERN_C_END 1039 1040 #undef __FUNCT__ 1041 #define __FUNCT__ "OutputTransformFFT" 1042 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 1043 { 1044 PetscErrorCode ierr; 1045 PetscFunctionBegin; 1046 ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 /* 1051 OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 1052 Input A, x, y 1053 A - FFTW matrix 1054 x - FFTW vector 1055 y - PETSc vector that user can read 1056 Options Database Keys: 1057 + -mat_fftw_plannerflags - set FFTW planner flags 1058 1059 Level: intermediate 1060 1061 */ 1062 1063 EXTERN_C_BEGIN 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "OutputTransformFFT_FTTW" 1066 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 1067 { 1068 PetscErrorCode ierr; 1069 MPI_Comm comm=((PetscObject)A)->comm; 1070 Mat_FFT *fft = (Mat_FFT*)A->data; 1071 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 1072 PetscInt N=fft->N; 1073 PetscInt ndim=fft->ndim,*dim=fft->dim; 1074 PetscInt low; 1075 PetscInt rank,size; 1076 ptrdiff_t alloc_local,local_n0,local_0_start; 1077 ptrdiff_t local_n1,local_1_start; 1078 VecScatter vecscat; 1079 IS list1,list2; 1080 #if !defined(PETSC_USE_COMPLEX) 1081 PetscInt i,j,k,partial_dim; 1082 PetscInt *indx1, *indx2, tempindx, tempindx1; 1083 PetscInt N1, n1 ,NM; 1084 ptrdiff_t temp; 1085 #endif 1086 1087 1088 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1089 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1090 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 1091 1092 if (size==1){ 1093 /* 1094 switch (ndim){ 1095 case 1: 1096 ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 1097 for (i=0;i<dim[0];i++) 1098 { 1099 indx1[i] = i; 1100 } 1101 ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1102 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1103 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1104 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1105 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1106 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1107 ierr = PetscFree(indx1);CHKERRQ(ierr); 1108 break; 1109 1110 case 2: 1111 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 1112 for (i=0;i<dim[0];i++){ 1113 for (j=0;j<dim[1];j++){ 1114 indx1[i*dim[1]+j] = i*dim[1] + j; 1115 } 1116 } 1117 ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1118 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1119 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1121 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1122 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1123 ierr = PetscFree(indx1);CHKERRQ(ierr); 1124 break; 1125 case 3: 1126 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1127 for (i=0;i<dim[0];i++){ 1128 for (j=0;j<dim[1];j++){ 1129 for (k=0;k<dim[2];k++){ 1130 indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 1131 } 1132 } 1133 } 1134 ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1135 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1136 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1137 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1139 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1140 ierr = PetscFree(indx1);CHKERRQ(ierr); 1141 break; 1142 default: 1143 */ 1144 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 1145 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF); 1146 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1147 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1148 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1149 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1150 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1151 // break; 1152 // } 1153 } 1154 else{ 1155 1156 switch (ndim){ 1157 case 1: 1158 #if defined(PETSC_USE_COMPLEX) 1159 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1160 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 1161 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 1162 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1163 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1164 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1165 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1166 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1167 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1168 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1169 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1170 #else 1171 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 1172 #endif 1173 break; 1174 case 2: 1175 #if defined(PETSC_USE_COMPLEX) 1176 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1177 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1178 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1179 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1180 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1181 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1182 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1183 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1184 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1185 #else 1186 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1187 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 1188 1189 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 1190 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 1191 1192 if (dim[1]%2==0) 1193 NM = dim[1]+2; 1194 else 1195 NM = dim[1]+1; 1196 1197 for (i=0;i<local_n0;i++){ 1198 for (j=0;j<dim[1];j++){ 1199 tempindx = i*dim[1] + j; 1200 tempindx1 = i*NM + j; 1201 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1202 indx2[tempindx]=low+tempindx1; 1203 } 1204 } 1205 1206 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1207 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1208 1209 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1210 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1211 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1212 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1213 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1214 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1215 ierr = PetscFree(indx1);CHKERRQ(ierr); 1216 ierr = PetscFree(indx2);CHKERRQ(ierr); 1217 #endif 1218 break; 1219 case 3: 1220 #if defined(PETSC_USE_COMPLEX) 1221 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1222 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1223 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1224 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1225 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1226 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1227 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1228 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1229 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1230 #else 1231 1232 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1233 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1234 1235 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1236 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1237 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1238 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1239 1240 if (dim[2]%2==0) 1241 NM = dim[2]+2; 1242 else 1243 NM = dim[2]+1; 1244 1245 for (i=0;i<local_n0;i++){ 1246 for (j=0;j<dim[1];j++){ 1247 for (k=0;k<dim[2];k++){ 1248 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1249 tempindx1 = i*dim[1]*NM + j*NM + k; 1250 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1251 indx2[tempindx]=low+tempindx1; 1252 } 1253 // printf("Val tempindx1 = %d\n",tempindx1); 1254 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1255 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1256 // printf("-------------------------\n",indx2[tempindx],rank); 1257 } 1258 } 1259 1260 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1261 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1262 1263 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1264 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1267 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1268 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1269 ierr = PetscFree(indx1);CHKERRQ(ierr); 1270 ierr = PetscFree(indx2);CHKERRQ(ierr); 1271 #endif 1272 break; 1273 default: 1274 #if defined(PETSC_USE_COMPLEX) 1275 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1276 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1277 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1278 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1279 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1280 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1281 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1282 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1284 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1285 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1286 #else 1287 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1288 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1289 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1290 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1291 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1292 1293 partial_dim = fftw->partial_dim; 1294 1295 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1296 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1297 1298 if (dim[ndim-1]%2==0) 1299 NM = 2; 1300 else 1301 NM = 1; 1302 1303 j = low; 1304 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1305 { 1306 indx1[i] = local_0_start*partial_dim + i; 1307 indx2[i] = j; 1308 if (k%dim[ndim-1]==0) 1309 { j+=NM;} 1310 j++; 1311 } 1312 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1313 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1314 1315 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1316 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1317 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1318 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1319 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1320 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1321 ierr = PetscFree(indx1);CHKERRQ(ierr); 1322 ierr = PetscFree(indx2);CHKERRQ(ierr); 1323 #endif 1324 break; 1325 } 1326 } 1327 return 0; 1328 } 1329 EXTERN_C_END 1330 1331 EXTERN_C_BEGIN 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatCreate_FFTW" 1334 /* 1335 MatCreate_FFTW - Creates a matrix object that provides FFT 1336 via the external package FFTW 1337 Options Database Keys: 1338 + -mat_fftw_plannerflags - set FFTW planner flags 1339 1340 Level: intermediate 1341 1342 */ 1343 1344 PetscErrorCode MatCreate_FFTW(Mat A) 1345 { 1346 PetscErrorCode ierr; 1347 MPI_Comm comm=((PetscObject)A)->comm; 1348 Mat_FFT *fft=(Mat_FFT*)A->data; 1349 Mat_FFTW *fftw; 1350 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1351 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1352 PetscBool flg; 1353 PetscInt p_flag,partial_dim=1,ctr; 1354 PetscMPIInt size,rank; 1355 ptrdiff_t *pdim; 1356 ptrdiff_t local_n1,local_1_start; 1357 #if !defined(PETSC_USE_COMPLEX) 1358 ptrdiff_t temp; 1359 PetscInt N1,tot_dim; 1360 #else 1361 PetscInt n1; 1362 #endif 1363 1364 1365 PetscFunctionBegin; 1366 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1367 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1368 ierr = MPI_Barrier(PETSC_COMM_WORLD); 1369 1370 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 1371 pdim[0] = dim[0]; 1372 #if !defined(PETSC_USE_COMPLEX) 1373 tot_dim = 2*dim[0]; 1374 #endif 1375 for (ctr=1;ctr<ndim;ctr++) 1376 { 1377 partial_dim *= dim[ctr]; 1378 pdim[ctr] = dim[ctr]; 1379 #if !defined(PETSC_USE_COMPLEX) 1380 if (ctr==ndim-1) 1381 tot_dim *= (dim[ctr]/2+1); 1382 else 1383 tot_dim *= dim[ctr]; 1384 #endif 1385 } 1386 1387 if (size == 1) { 1388 #if defined(PETSC_USE_COMPLEX) 1389 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1390 n = N; 1391 #else 1392 //printf("The value of N is %d\n",N); 1393 //tot_dim = 2*N*(dim[ndim-1]/2+1); 1394 //tot_dim = ((long int)tot_dim)/dim[ndim-1]; 1395 //printf("The value of is %ld and %d\n",tot_dim,dim[ndim-1]); 1396 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1397 n = tot_dim; 1398 #endif 1399 1400 } else { 1401 ptrdiff_t alloc_local,local_n0,local_0_start;//local_1_end; 1402 switch (ndim){ 1403 case 1: 1404 #if !defined(PETSC_USE_COMPLEX) 1405 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1406 #else 1407 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1408 n = (PetscInt)local_n0; 1409 n1 = (PetscInt) local_n1; 1410 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1411 #endif 1412 break; 1413 case 2: 1414 #if defined(PETSC_USE_COMPLEX) 1415 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1416 /* 1417 PetscMPIInt rank; 1418 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1419 PetscSynchronizedFlush(comm); 1420 */ 1421 n = (PetscInt)local_n0*dim[1]; 1422 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1423 #else 1424 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1425 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1426 // n1 = 2*(PetscInt)local_n1*(dim[0]); 1427 // ierr = MatSetSizes(A,n1,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1428 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1429 #endif 1430 break; 1431 case 3: 1432 #if defined(PETSC_USE_COMPLEX) 1433 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1434 n = (PetscInt)local_n0*dim[1]*dim[2]; 1435 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1436 #else 1437 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1438 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1439 // n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 1440 // ierr = MatSetSizes(A,n1,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1441 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1442 #endif 1443 break; 1444 default: 1445 #if defined(PETSC_USE_COMPLEX) 1446 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1447 n = (PetscInt)local_n0*partial_dim; 1448 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1449 #else 1450 temp = pdim[ndim-1]; 1451 pdim[ndim-1] = temp/2 + 1; 1452 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1453 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1454 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1455 pdim[ndim-1] = temp; 1456 // temp = partial_dim*(dim[ndim-1]/2+1)*dim[0]/(dim[1]*dim[ndim-1]); 1457 // n1 = 2*local_n1*temp; 1458 // ierr = MatSetSizes(A,n1,n,N1,N1);CHKERRQ(ierr); 1459 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1460 #endif 1461 break; 1462 } 1463 } 1464 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1465 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1466 fft->data = (void*)fftw; 1467 1468 fft->n = n; 1469 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1470 fftw->partial_dim = partial_dim; 1471 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1472 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1473 1474 fftw->p_forward = 0; 1475 fftw->p_backward = 0; 1476 fftw->p_flag = FFTW_ESTIMATE; 1477 1478 if (size == 1){ 1479 A->ops->mult = MatMult_SeqFFTW; 1480 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1481 } else { 1482 A->ops->mult = MatMult_MPIFFTW; 1483 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1484 } 1485 fft->matdestroy = MatDestroy_FFTW; 1486 // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D 1487 //A->ops->getvecs = MatGetVecs_FFTW; 1488 A->assembled = PETSC_TRUE; 1489 PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 1490 PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 1491 PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1492 1493 /* get runtime options */ 1494 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1495 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1496 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1497 PetscOptionsEnd(); 1498 PetscFunctionReturn(0); 1499 } 1500 EXTERN_C_END 1501 1502 1503 1504 1505