xref: /petsc/src/sys/fileio/fretrieve.c (revision 09da24df01e50defd94bc4f7396f866a808ecea5)
1 
2 /*
3       Code for opening and closing files.
4 */
5 #include <petscsys.h>
6 #if defined(PETSC_HAVE_PWD_H)
7 #include <pwd.h>
8 #endif
9 #include <ctype.h>
10 #include <sys/stat.h>
11 #if defined(PETSC_HAVE_UNISTD_H)
12 #include <unistd.h>
13 #endif
14 #if defined(PETSC_HAVE_SYS_UTSNAME_H)
15 #include <sys/utsname.h>
16 #endif
17 #include <fcntl.h>
18 #include <time.h>
19 #if defined(PETSC_HAVE_SYS_SYSTEMINFO_H)
20 #include <sys/systeminfo.h>
21 #endif
22 
23 /*
24    Private routine to delete tmp/shared storage
25 
26    This is called by MPI, not by users.
27 
28    Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval()
29 
30 */
31 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_DelTmpShared(MPI_Comm comm,PetscMPIInt keyval,void *count_val,void *extra_state)
32 {
33   PetscErrorCode ierr;
34 
35   PetscFunctionBegin;
36   ierr = PetscInfo1(NULL,"Deleting tmp/shared data in an MPI_Comm %ld\n",(long)comm);CHKERRMPI(ierr);
37   ierr = PetscFree(count_val);CHKERRMPI(ierr);
38   PetscFunctionReturn(MPI_SUCCESS);
39 }
40 
41 /*@C
42    PetscGetTmp - Gets the name of the tmp directory
43 
44    Collective
45 
46    Input Parameters:
47 +  comm - MPI_Communicator that may share /tmp
48 -  len - length of string to hold name
49 
50    Output Parameters:
51 .  dir - directory name
52 
53    Options Database Keys:
54 +    -shared_tmp  - indicates the directory is shared among the MPI ranks
55 .    -not_shared_tmp - indicates the directory is not shared among the MPI ranks
56 -    -tmp tmpdir - name of the directory you wish to use as /tmp
57 
58    Environmental Variables:
59 +     PETSC_SHARED_TMP - indicates the directory is shared among the MPI ranks
60 .     PETSC_NOT_SHARED_TMP - indicates the directory is not shared among the MPI ranks
61 -     PETSC_TMP - name of the directory you wish to use as /tmp
62 
63    Level: developer
64 
65 
66 @*/
67 PetscErrorCode  PetscGetTmp(MPI_Comm comm,char dir[],size_t len)
68 {
69   PetscErrorCode ierr;
70   PetscBool      flg;
71 
72   PetscFunctionBegin;
73   ierr = PetscOptionsGetenv(comm,"PETSC_TMP",dir,len,&flg);CHKERRQ(ierr);
74   if (!flg) {
75     ierr = PetscStrncpy(dir,"/tmp",len);CHKERRQ(ierr);
76   }
77   PetscFunctionReturn(0);
78 }
79 
80 /*@C
81    PetscSharedTmp - Determines if all processors in a communicator share a
82          /tmp or have different ones.
83 
84    Collective
85 
86    Input Parameters:
87 .  comm - MPI_Communicator that may share /tmp
88 
89    Output Parameters:
90 .  shared - PETSC_TRUE or PETSC_FALSE
91 
92    Options Database Keys:
93 +    -shared_tmp  - indicates the directory is shared among the MPI ranks
94 .    -not_shared_tmp - indicates the directory is not shared among the MPI ranks
95 -    -tmp tmpdir - name of the directory you wish to use as /tmp
96 
97    Environmental Variables:
98 +     PETSC_SHARED_TMP  - indicates the directory is shared among the MPI ranks
99 .     PETSC_NOT_SHARED_TMP - indicates the directory is not shared among the MPI ranks
100 -     PETSC_TMP - name of the directory you wish to use as /tmp
101 
102    Level: developer
103 
104    Notes:
105    Stores the status as a MPI attribute so it does not have
106     to be redetermined each time.
107 
108       Assumes that all processors in a communicator either
109        1) have a common /tmp or
110        2) each has a separate /tmp
111       eventually we can write a fancier one that determines which processors
112       share a common /tmp.
113 
114    This will be very slow on runs with a large number of processors since
115    it requires O(p*p) file opens.
116 
117    If the environmental variable PETSC_TMP is set it will use this directory
118   as the "/tmp" directory.
119 
120 @*/
121 PetscErrorCode  PetscSharedTmp(MPI_Comm comm,PetscBool  *shared)
122 {
123   PetscErrorCode     ierr;
124   PetscMPIInt        size,rank,*tagvalp,sum,cnt,i;
125   PetscBool          flg,iflg;
126   FILE               *fd;
127   static PetscMPIInt Petsc_Tmp_keyval = MPI_KEYVAL_INVALID;
128   int                err;
129 
130   PetscFunctionBegin;
131   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
132   if (size == 1) {
133     *shared = PETSC_TRUE;
134     PetscFunctionReturn(0);
135   }
136 
137   ierr = PetscOptionsGetenv(comm,"PETSC_SHARED_TMP",NULL,0,&flg);CHKERRQ(ierr);
138   if (flg) {
139     *shared = PETSC_TRUE;
140     PetscFunctionReturn(0);
141   }
142 
143   ierr = PetscOptionsGetenv(comm,"PETSC_NOT_SHARED_TMP",NULL,0,&flg);CHKERRQ(ierr);
144   if (flg) {
145     *shared = PETSC_FALSE;
146     PetscFunctionReturn(0);
147   }
148 
149   if (Petsc_Tmp_keyval == MPI_KEYVAL_INVALID) {
150     ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_DelTmpShared,&Petsc_Tmp_keyval,NULL);CHKERRMPI(ierr);
151   }
152 
153   ierr = MPI_Comm_get_attr(comm,Petsc_Tmp_keyval,(void**)&tagvalp,(int*)&iflg);CHKERRMPI(ierr);
154   if (!iflg) {
155     char filename[PETSC_MAX_PATH_LEN],tmpname[PETSC_MAX_PATH_LEN];
156 
157     /* This communicator does not yet have a shared tmp attribute */
158     ierr = PetscMalloc1(1,&tagvalp);CHKERRQ(ierr);
159     ierr = MPI_Comm_set_attr(comm,Petsc_Tmp_keyval,tagvalp);CHKERRMPI(ierr);
160 
161     ierr = PetscOptionsGetenv(comm,"PETSC_TMP",tmpname,238,&iflg);CHKERRQ(ierr);
162     if (!iflg) {
163       ierr = PetscStrcpy(filename,"/tmp");CHKERRQ(ierr);
164     } else {
165       ierr = PetscStrcpy(filename,tmpname);CHKERRQ(ierr);
166     }
167 
168     ierr = PetscStrcat(filename,"/petsctestshared");CHKERRQ(ierr);
169     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
170 
171     /* each processor creates a /tmp file and all the later ones check */
172     /* this makes sure no subset of processors is shared */
173     *shared = PETSC_FALSE;
174     for (i=0; i<size-1; i++) {
175       if (rank == i) {
176         fd = fopen(filename,"w");
177         if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open test file %s",filename);
178         err = fclose(fd);
179         if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
180       }
181       ierr = MPI_Barrier(comm);CHKERRMPI(ierr);
182       if (rank >= i) {
183         fd = fopen(filename,"r");
184         if (fd) cnt = 1;
185         else cnt = 0;
186         if (fd) {
187           err = fclose(fd);
188           if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
189         }
190       } else cnt = 0;
191 
192       ierr = MPIU_Allreduce(&cnt,&sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
193       if (rank == i) unlink(filename);
194 
195       if (sum == size) {
196         *shared = PETSC_TRUE;
197         break;
198       } else if (sum != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Subset of processes share /tmp ");
199     }
200     *tagvalp = (int)*shared;
201     ierr = PetscInfo2(NULL,"processors %s %s\n",(*shared) ? "share":"do NOT share",(iflg ? tmpname:"/tmp"));CHKERRQ(ierr);
202   } else *shared = (PetscBool) *tagvalp;
203   PetscFunctionReturn(0);
204 }
205 
206 /*@C
207    PetscSharedWorkingDirectory - Determines if all processors in a communicator share a
208          working directory or have different ones.
209 
210    Collective
211 
212    Input Parameters:
213 .  comm - MPI_Communicator that may share working directory
214 
215    Output Parameters:
216 .  shared - PETSC_TRUE or PETSC_FALSE
217 
218    Options Database Keys:
219 +    -shared_working_directory - indicates the directory is shared among the MPI ranks
220 -    -not_shared_working_directory - indicates the directory is shared among the MPI ranks
221 
222    Environmental Variables:
223 +     PETSC_SHARED_WORKING_DIRECTORY - indicates the directory is shared among the MPI ranks
224 -     PETSC_NOT_SHARED_WORKING_DIRECTORY - indicates the directory is shared among the MPI ranks
225 
226    Level: developer
227 
228    Notes:
229    Stores the status as a MPI attribute so it does not have
230     to be redetermined each time.
231 
232       Assumes that all processors in a communicator either
233        1) have a common working directory or
234        2) each has a separate working directory
235       eventually we can write a fancier one that determines which processors
236       share a common working directory.
237 
238    This will be very slow on runs with a large number of processors since
239    it requires O(p*p) file opens.
240 
241 @*/
242 PetscErrorCode  PetscSharedWorkingDirectory(MPI_Comm comm,PetscBool  *shared)
243 {
244   PetscErrorCode     ierr;
245   PetscMPIInt        size,rank,*tagvalp,sum,cnt,i;
246   PetscBool          flg,iflg;
247   FILE               *fd;
248   static PetscMPIInt Petsc_WD_keyval = MPI_KEYVAL_INVALID;
249   int                err;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
253   if (size == 1) {
254     *shared = PETSC_TRUE;
255     PetscFunctionReturn(0);
256   }
257 
258   ierr = PetscOptionsGetenv(comm,"PETSC_SHARED_WORKING_DIRECTORY",NULL,0,&flg);CHKERRQ(ierr);
259   if (flg) {
260     *shared = PETSC_TRUE;
261     PetscFunctionReturn(0);
262   }
263 
264   ierr = PetscOptionsGetenv(comm,"PETSC_NOT_SHARED_WORKING_DIRECTORY",NULL,0,&flg);CHKERRQ(ierr);
265   if (flg) {
266     *shared = PETSC_FALSE;
267     PetscFunctionReturn(0);
268   }
269 
270   if (Petsc_WD_keyval == MPI_KEYVAL_INVALID) {
271     ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_DelTmpShared,&Petsc_WD_keyval,NULL);CHKERRMPI(ierr);
272   }
273 
274   ierr = MPI_Comm_get_attr(comm,Petsc_WD_keyval,(void**)&tagvalp,(int*)&iflg);CHKERRMPI(ierr);
275   if (!iflg) {
276     char filename[PETSC_MAX_PATH_LEN];
277 
278     /* This communicator does not yet have a shared  attribute */
279     ierr = PetscMalloc1(1,&tagvalp);CHKERRQ(ierr);
280     ierr = MPI_Comm_set_attr(comm,Petsc_WD_keyval,tagvalp);CHKERRMPI(ierr);
281 
282     ierr = PetscGetWorkingDirectory(filename,240);CHKERRQ(ierr);
283     ierr = PetscStrcat(filename,"/petsctestshared");CHKERRQ(ierr);
284     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
285 
286     /* each processor creates a  file and all the later ones check */
287     /* this makes sure no subset of processors is shared */
288     *shared = PETSC_FALSE;
289     for (i=0; i<size-1; i++) {
290       if (rank == i) {
291         fd = fopen(filename,"w");
292         if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN,"Unable to open test file %s",filename);
293         err = fclose(fd);
294         if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
295       }
296       ierr = MPI_Barrier(comm);CHKERRMPI(ierr);
297       if (rank >= i) {
298         fd = fopen(filename,"r");
299         if (fd) cnt = 1;
300         else cnt = 0;
301         if (fd) {
302           err = fclose(fd);
303           if (err) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SYS,"fclose() failed on file");
304         }
305       } else cnt = 0;
306 
307       ierr = MPIU_Allreduce(&cnt,&sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
308       if (rank == i) unlink(filename);
309 
310       if (sum == size) {
311         *shared = PETSC_TRUE;
312         break;
313       } else if (sum != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Subset of processes share working directory");
314     }
315     *tagvalp = (int)*shared;
316   } else *shared = (PetscBool) *tagvalp;
317   ierr = PetscInfo1(NULL,"processors %s working directory\n",(*shared) ? "shared" : "do NOT share");CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
321 
322 /*@C
323     PetscFileRetrieve - Obtains a file from a URL or compressed
324         and copies into local disk space as uncompressed.
325 
326     Collective
327 
328     Input Parameter:
329 +   comm     - processors accessing the file
330 .   url      - name of file, including entire URL (with or without .gz)
331 -   llen     - length of localname
332 
333     Output Parameter:
334 +   localname - name of local copy of file - valid on only process zero
335 -   found - if found or retrieved the file - valid on all processes
336 
337     Notes:
338     if the file already exists local this function just returns without downloading it.
339 
340     Level: intermediate
341 @*/
342 PetscErrorCode  PetscFileRetrieve(MPI_Comm comm,const char url[],char localname[],size_t llen,PetscBool  *found)
343 {
344   char           buffer[PETSC_MAX_PATH_LEN],*par,*tlocalname,name[PETSC_MAX_PATH_LEN];
345   FILE           *fp;
346   PetscErrorCode ierr;
347   PetscMPIInt    rank;
348   size_t         len = 0;
349   PetscBool      flg1,flg2,flg3,flg4,download,compressed = PETSC_FALSE;
350 
351   PetscFunctionBegin;
352   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
353   if (!rank) {
354     *found = PETSC_FALSE;
355 
356     ierr = PetscStrstr(url,".gz",&par);CHKERRQ(ierr);
357     if (par) {
358       ierr = PetscStrlen(par,&len);CHKERRQ(ierr);
359       if (len == 3) compressed = PETSC_TRUE;
360     }
361 
362     ierr = PetscStrncmp(url,"ftp://",6,&flg1);CHKERRQ(ierr);
363     ierr = PetscStrncmp(url,"http://",7,&flg2);CHKERRQ(ierr);
364     ierr = PetscStrncmp(url,"file://",7,&flg3);CHKERRQ(ierr);
365     ierr = PetscStrncmp(url,"https://",8,&flg4);CHKERRQ(ierr);
366     download = (PetscBool) (flg1 || flg2 || flg3 || flg4);
367 
368     if (!download && !compressed) {
369       ierr = PetscStrncpy(localname,url,llen);CHKERRQ(ierr);
370       ierr = PetscTestFile(url,'r',found);CHKERRQ(ierr);
371       if (*found) {
372         ierr = PetscInfo1(NULL,"Found file %s\n",url);CHKERRQ(ierr);
373       } else {
374         ierr = PetscInfo1(NULL,"Did not find file %s\n",url);CHKERRQ(ierr);
375       }
376       goto done;
377     }
378 
379     /* look for uncompressed file in requested directory */
380     if (compressed) {
381       ierr = PetscStrncpy(localname,url,llen);CHKERRQ(ierr);
382       ierr = PetscStrstr(localname,".gz",&par);CHKERRQ(ierr);
383       *par = 0; /* remove .gz extension */
384       ierr = PetscTestFile(localname,'r',found);CHKERRQ(ierr);
385       if (*found) goto done;
386     }
387 
388     /* look for file in current directory */
389     ierr = PetscStrrchr(url,'/',&tlocalname);CHKERRQ(ierr);
390     ierr = PetscStrncpy(localname,tlocalname,llen);CHKERRQ(ierr);
391     if (compressed) {
392       ierr = PetscStrstr(localname,".gz",&par);CHKERRQ(ierr);
393       *par = 0; /* remove .gz extension */
394     }
395     ierr = PetscTestFile(localname,'r',found);CHKERRQ(ierr);
396     if (*found) goto done;
397 
398     if (download) {
399       /* local file is not already here so use curl to get it */
400       ierr = PetscStrncpy(localname,tlocalname,llen);CHKERRQ(ierr);
401       ierr = PetscStrcpy(buffer,"curl --fail --silent --show-error ");CHKERRQ(ierr);
402       ierr = PetscStrcat(buffer,url);CHKERRQ(ierr);
403       ierr = PetscStrcat(buffer," > ");CHKERRQ(ierr);
404       ierr = PetscStrcat(buffer,localname);CHKERRQ(ierr);
405 #if defined(PETSC_HAVE_POPEN)
406       ierr = PetscPOpen(PETSC_COMM_SELF,NULL,buffer,"r",&fp);CHKERRQ(ierr);
407       ierr = PetscPClose(PETSC_COMM_SELF,fp);CHKERRQ(ierr);
408 #else
409       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Cannot run external programs on this machine");
410 #endif
411       ierr = PetscTestFile(localname,'r',found);CHKERRQ(ierr);
412       if (*found) {
413         FILE      *fd;
414         char      buf[1024],*str,*substring;
415 
416         /* check if the file didn't exist so it downloaded an HTML message instead */
417         fd = fopen(localname,"r");
418         if (!fd) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PetscTestFile() indicates %s exists but fopen() cannot open it",localname);
419         str = fgets(buf,sizeof(buf)-1,fd);
420         while (str) {
421           ierr = PetscStrstr(buf,"<!DOCTYPE html>",&substring);CHKERRQ(ierr);
422           if (substring) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unable to download %s it does not appear to exist at this URL, dummy HTML file was downloaded",url);
423           ierr = PetscStrstr(buf,"Not Found",&substring);CHKERRQ(ierr);
424           if (substring) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unable to download %s it does not appear to exist at this URL, dummy HTML file was downloaded",url);
425           str = fgets(buf,sizeof(buf)-1,fd);
426         }
427         fclose(fd);
428       }
429     } else if (compressed) {
430       ierr = PetscTestFile(url,'r',found);CHKERRQ(ierr);
431       if (!*found) goto done;
432       ierr = PetscStrncpy(localname,url,llen);CHKERRQ(ierr);
433     }
434     if (compressed) {
435       ierr = PetscStrrchr(localname,'/',&tlocalname);CHKERRQ(ierr);
436       ierr = PetscStrncpy(name,tlocalname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr);
437       ierr = PetscStrstr(name,".gz",&par);CHKERRQ(ierr);
438       *par = 0; /* remove .gz extension */
439       /* uncompress file */
440       ierr = PetscStrcpy(buffer,"gzip -c -d ");CHKERRQ(ierr);
441       ierr = PetscStrcat(buffer,localname);CHKERRQ(ierr);
442       ierr = PetscStrcat(buffer," > ");CHKERRQ(ierr);
443       ierr = PetscStrcat(buffer,name);CHKERRQ(ierr);
444 #if defined(PETSC_HAVE_POPEN)
445       ierr = PetscPOpen(PETSC_COMM_SELF,NULL,buffer,"r",&fp);CHKERRQ(ierr);
446       ierr = PetscPClose(PETSC_COMM_SELF,fp);CHKERRQ(ierr);
447 #else
448       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Cannot run external programs on this machine");
449 #endif
450       ierr = PetscStrncpy(localname,name,llen);CHKERRQ(ierr);
451       ierr = PetscTestFile(localname,'r',found);CHKERRQ(ierr);
452     }
453   }
454   done:
455   ierr = MPI_Bcast(found,1,MPIU_BOOL,0,comm);CHKERRMPI(ierr);
456   ierr = MPI_Bcast(localname, llen, MPI_CHAR, 0, comm);CHKERRMPI(ierr);
457   PetscFunctionReturn(0);
458 }
459