xref: /petsc/lib/petsc/bin/PetscBinaryIO.py (revision 96fb840952472f0bde9edef16d0c2c1f359bff6a)
1"""PetscBinaryIO
2===============
3
4Provides
5  1. PETSc-named objects Vec, Mat, and IS that inherit numpy.ndarray
6  2. A class to read and write these objects from PETSc binary files.
7
8The standard usage of this module should look like:
9
10  >>> import PetscBinaryIO
11  >>> io = PetscBinaryIO.PetscBinaryIO()
12  >>> objects = io.readBinaryFile('file.dat')
13
14or
15
16  >>> import PetscBinaryIO
17  >>> import numpy
18  >>> vec = numpy.array([1., 2., 3.]).view(PetscBinaryIO.Vec)
19  >>> io = PetscBinaryIO.PetscBinaryIO()
20  >>> io.writeBinaryFile('file.dat', [vec,])
21
22to read in objects one at a time use such as
23
24  >>> import PetscBinaryIO
25  >>> io = PetscBinaryIO.PetscBinaryIO()
26  >>> fh = open('file.dat')
27  >>> objecttype = io.readObjectType(fh)
28  >>> if objecttype == 'Vec':
29  >>>   v = io.readVec(fh)
30
31   Note that one must read in the object type first and then call readVec(), readMat() etc.
32
33
34See also PetscBinaryIO.__doc__ and methods therein.
35
36This module can also be used as a command line tool for converting matrices.
37Run with --help to see options.
38"""
39
40import numpy as np
41import functools
42
43try:
44    basestring                  # Python-2 has basestring as a common parent of unicode and str
45except NameError:
46    basestring = str            # Python-3 is unicode through and through
47
48def update_wrapper_with_doc(wrapper, wrapped):
49    """Similar to functools.update_wrapper, but also gets the wrapper's __doc__ string"""
50    wdoc = wrapper.__doc__
51
52    functools.update_wrapper(wrapper, wrapped)
53    if wdoc is not None:
54        if wrapper.__doc__ is None:
55            wrapper.__doc__ = wdoc
56        else:
57            wrapper.__doc__ = wrapper.__doc__ + wdoc
58    return wrapper
59
60def wraps_with_doc(wrapped):
61    """Similar to functools.wraps, but also gets the wrapper's __doc__ string"""
62    return functools.partial(update_wrapper_with_doc, wrapped=wrapped)
63
64def decorate_with_conf(f):
65    """Decorates methods to take kwargs for precisions."""
66    @wraps_with_doc(f)
67    def decorated_f(self, *args, **kwargs):
68        """
69        Additional kwargs:
70          precision: 'single', 'double', '__float128' for scalars
71          indices: '32bit', '64-bit' integer size
72          complexscalars: True/False
73
74          Note these are set in order of preference:
75            1. kwargs if given here
76            2. PetscBinaryIO class __init__ arguments
77            3. PETSC_DIR/PETSC_ARCH defaults
78        """
79
80        changed = False
81        old_precision = self.precision
82        old_indices = self.indices
83        old_complexscalars = self.complexscalars
84
85        try:
86            self.precision = kwargs.pop('precision')
87        except KeyError:
88            pass
89        else:
90            changed = True
91
92        try:
93            self.indices = kwargs.pop('indices')
94        except KeyError:
95            pass
96        else:
97            changed = True
98
99        try:
100            self.complexscalars = kwargs.pop('complexscalars')
101        except KeyError:
102            pass
103        else:
104            changed = True
105
106        if changed:
107            self._update_dtypes()
108
109        result = f(self, *args, **kwargs)
110
111        if changed:
112            self.precision = old_precision
113            self.indices = old_indices
114            self.complexscalars = old_complexscalars
115            self._update_dtypes()
116
117        return result
118    return decorated_f
119
120
121class DoneWithFile(Exception): pass
122
123
124class Vec(np.ndarray):
125    """Vec represented as 1D numpy array
126
127    The best way to instantiate this class for use with writeBinaryFile()
128    is through the numpy view method:
129
130    vec = numpy.array([1,2,3]).view(Vec)
131    """
132    _classid = 1211214
133
134
135class MatDense(np.matrix):
136    """Mat represented as 2D numpy array
137
138    The best way to instantiate this class for use with writeBinaryFile()
139    is through the numpy view method:
140
141    mat = numpy.array([[1,0],[0,1]]).view(Mat)
142    """
143    _classid = 1211216
144
145
146class MatSparse(tuple):
147    """Mat represented as CSR tuple ((M, N), (rowindices, col, val))
148
149    This should be instantiated from a tuple:
150
151    mat = MatSparse( ((M,N), (rowindices,col,val)) )
152    """
153    _classid = 1211216
154    def __repr__(self):
155        return 'MatSparse: %s'%super(MatSparse, self).__repr__()
156
157
158class IS(np.ndarray):
159    """IS represented as 1D numpy array
160
161    The best way to instantiate this class for use with writeBinaryFile()
162    is through the numpy "view" method:
163
164    an_is = numpy.array([3,4,5]).view(IS)
165    """
166    _classid = 1211218
167
168
169class PetscBinaryIO(object):
170    """Reader/Writer class for PETSc binary files.
171
172    Note that by default, precisions for both scalars and indices, as well as
173    complex scalars, are picked up from the PETSC_DIR/PETSC_ARCH configuration
174    as set by environmental variables.
175
176    Alternatively, defaults can be overridden at class instantiation, or for
177    a given method call.
178    """
179
180    _classid = {1211216:'Mat',
181                1211214:'Vec',
182                1211218:'IS',
183                1211219:'Bag',
184                1211213:'Real'}
185
186    def __init__(self, precision=None, indices=None, complexscalars=None):
187        if (precision is None) or (indices is None) or (complexscalars is None):
188            import petsc_conf
189            defaultprecision, defaultindices, defaultcomplexscalars = petsc_conf.get_conf()
190            if precision is None:
191                if defaultprecision is None:
192                    precision = 'double'
193                else:
194                    precision = defaultprecision
195
196            if indices is None:
197                if defaultindices is None:
198                    indices = '32bit'
199                else:
200                    indices = defaultindices
201
202            if complexscalars is None:
203                if defaultcomplexscalars is None:
204                    complexscalars = False
205                else:
206                    complexscalars = defaultcomplexscalars
207
208        self.precision = precision
209        if self.precision == '__float128' :
210            raise RuntimeError('__float128 (quadruple) precision is not properly supported. One may use double precision by using -binary_write_double in PETSc and precision=\'double\' here')
211        self.indices = indices
212        self.complexscalars = complexscalars
213        self._update_dtypes()
214
215    def _update_dtypes(self):
216        if self.indices == '64bit':
217            self._inttype = np.dtype('>i8')
218        else:
219            self._inttype = np.dtype('>i4')
220
221        if self.precision == '__float128':
222            nbyte = 16
223        elif self.precision == 'single':
224            nbyte = 4
225        else:
226            nbyte = 8
227
228        if self.complexscalars:
229            name = 'c'
230            nbyte = nbyte * 2 # complex scalar takes twice as many bytes
231        else:
232            name = 'f'
233
234        self._scalartype = '>{0}{1}'.format(name, nbyte)
235
236    @decorate_with_conf
237    def readReal(self, fh):
238        """Reads a single real from a binary file handle, must be called after readObjectType()."""
239
240        try:
241            vals = np.fromfile(fh, dtype=self._scalartype, count=1)
242        except MemoryError:
243            raise IOError('Inconsistent or invalid real data in file')
244        if not len(vals) == 1:
245            raise IOError('Inconsistent or invalid real data in file')
246        return vals
247
248    @decorate_with_conf
249    def readVec(self, fh):
250        """Reads a PETSc Vec from a binary file handle, must be called after readObjectType()."""
251
252        nz = np.fromfile(fh, dtype=self._inttype, count=1)[0]
253        try:
254            vals = np.fromfile(fh, dtype=self._scalartype, count=nz)
255        except MemoryError:
256            raise IOError('Inconsistent or invalid Vec data in file')
257        if not len(vals) == nz:
258            raise IOError('Inconsistent or invalid Vec data in file')
259        return vals.view(Vec)
260
261    @decorate_with_conf
262    def writeVec(self, fh, vec):
263        """Writes a PETSc Vec to a binary file handle."""
264
265        metadata = np.array([Vec._classid, len(vec)], dtype=self._inttype)
266        metadata.tofile(fh)
267        vec.astype(self._scalartype).tofile(fh)
268        return
269
270    @decorate_with_conf
271    def readMatSparse(self, fh):
272        """Reads a PETSc Mat, returning a sparse representation of the data. Must be called after readObjectType()
273
274        (M,N), (I,J,V) = readMatSparse(fid)
275
276        Input:
277          fid : file handle to open binary file.
278        Output:
279          M,N : matrix size
280          I,J : arrays of row and column for each nonzero
281          V: nonzero value
282        """
283
284        try:
285            M,N,nz = np.fromfile(fh, dtype=self._inttype, count=3)
286            I = np.empty(M+1, dtype=self._inttype)
287            I[0] = 0
288            rownz = np.fromfile(fh, dtype=self._inttype, count=M)
289            np.cumsum(rownz, out=I[1:])
290            assert I[-1] == nz
291
292            J = np.fromfile(fh, dtype=self._inttype,    count=nz)
293            assert len(J) == nz
294            V = np.fromfile(fh, dtype=self._scalartype, count=nz)
295            assert len(V) == nz
296        except (AssertionError, MemoryError, IndexError):
297            raise IOError('Inconsistent or invalid Mat data in file')
298
299        return MatSparse(((M, N), (I, J, V)))
300
301    @decorate_with_conf
302    def writeMatSparse(self, fh, mat):
303        """Writes a Mat into a PETSc binary file handle"""
304
305        ((M,N), (I,J,V)) = mat
306        metadata = np.array([MatSparse._classid,M,N,I[-1]], dtype=self._inttype)
307        rownz = I[1:] - I[:-1]
308
309        assert len(J.shape) == len(V.shape) == len(I.shape) == 1
310        assert len(J) == len(V) == I[-1] == rownz.sum()
311        assert (rownz > -1).all()
312
313        metadata.tofile(fh)
314        rownz.astype(self._inttype).tofile(fh)
315        J.astype(self._inttype).tofile(fh)
316        V.astype(self._scalartype).tofile(fh)
317        return
318
319    @decorate_with_conf
320    def readMatDense(self, fh):
321        """Reads a PETSc Mat, returning a dense representation of the data, must be called after readObjectType()"""
322
323        try:
324            M,N,nz = np.fromfile(fh, dtype=self._inttype, count=3)
325            I = np.empty(M+1, dtype=self._inttype)
326            I[0] = 0
327            rownz = np.fromfile(fh, dtype=self._inttype, count=M)
328            np.cumsum(rownz, out=I[1:])
329            assert I[-1] == nz
330
331            J = np.fromfile(fh, dtype=self._inttype, count=nz)
332            assert len(J) == nz
333            V = np.fromfile(fh, dtype=self._scalartype, count=nz)
334            assert len(V) == nz
335
336        except (AssertionError, MemoryError, IndexError):
337            raise IOError('Inconsistent or invalid Mat data in file')
338
339        mat = np.zeros((M,N), dtype=self._scalartype)
340        for row in range(M):
341            rstart, rend = I[row:row+2]
342            mat[row, J[rstart:rend]] = V[rstart:rend]
343        return mat.view(MatDense)
344
345    @decorate_with_conf
346    def readMatSciPy(self, fh):
347        from scipy.sparse import csr_matrix
348        (M, N), (I, J, V) = self.readMatSparse(fh)
349        return csr_matrix((V, J, I), shape=(M, N))
350
351    @decorate_with_conf
352    def writeMatSciPy(self, fh, mat):
353        from scipy.sparse import csr_matrix
354        if hasattr(mat, 'tocsr'):
355            mat = mat.tocsr()
356        assert isinstance(mat, csr_matrix)
357        V = mat.data
358        M,N = mat.shape
359        J = mat.indices
360        I = mat.indptr
361        return self.writeMatSparse(fh, (mat.shape, (mat.indptr,mat.indices,mat.data)))
362
363    @decorate_with_conf
364    def readMat(self, fh, mattype='sparse'):
365        """Reads a PETSc Mat from binary file handle, must be called after readObjectType()
366
367        optional mattype: 'sparse" or 'dense'
368
369        See also: readMatSparse, readMatDense
370        """
371
372        if mattype == 'sparse':
373            return self.readMatSparse(fh)
374        elif mattype == 'dense':
375            return self.readMatDense(fh)
376        elif mattype == 'scipy.sparse':
377            return self.readMatSciPy(fh)
378        else:
379            raise RuntimeError('Invalid matrix type requested: choose sparse/dense/scipy.sparse')
380
381    @decorate_with_conf
382    def readIS(self, fh):
383        """Reads a PETSc Index Set from binary file handle, must be called after readObjectType()"""
384
385        try:
386            nz = np.fromfile(fh, dtype=self._inttype, count=1)[0]
387            v = np.fromfile(fh, dtype=self._inttype, count=nz)
388            assert len(v) == nz
389        except (MemoryError,IndexError):
390            raise IOError('Inconsistent or invalid IS data in file')
391        return v.view(IS)
392
393    @decorate_with_conf
394    def writeIS(self, fh, anis):
395        """Writes a PETSc IS to binary file handle."""
396
397        metadata = np.array([IS._classid, len(anis)], dtype=self._inttype)
398        metadata.tofile(fh)
399        anis.astype(self._inttype).tofile(fh)
400        return
401
402    @decorate_with_conf
403    def readObjectType(self, fid):
404        """Returns the next object type as a string in the file"""
405        try:
406              header = np.fromfile(fid, dtype=self._inttype, count=1)[0]
407        except (MemoryError, IndexError):
408              raise DoneWithFile
409        try:
410              objecttype = self._classid[header]
411        except KeyError:
412              raise IOError('Invalid PetscObject CLASSID or object not implemented for python')
413        return objecttype
414
415    @decorate_with_conf
416    def readBinaryFile(self, fid, mattype='sparse'):
417        """Reads a PETSc binary file, returning a tuple of the contained objects.
418
419        objects = self.readBinaryFile(fid, **kwargs)
420
421        Input:
422          fid : either file name or handle to an open binary file.
423
424        Output:
425          objects : tuple of objects representing the data in numpy arrays.
426
427        Optional:
428          mattype :
429            'sparse': Return matrices as raw CSR: (M, N), (row, col, val).
430            'dense': Return matrices as MxN numpy arrays.
431            'scipy.sparse': Return matrices as scipy.sparse objects.
432        """
433
434        close = False
435
436        if isinstance(fid, basestring):
437            fid = open(fid, 'rb')
438            close = True
439
440        objects = []
441        try:
442            while True:
443                objecttype = self.readObjectType(fid)
444
445                if objecttype == 'Vec':
446                    objects.append(self.readVec(fid))
447                elif objecttype == 'IS':
448                    objects.append(self.readIS(fid))
449                elif objecttype == 'Mat':
450                    objects.append(self.readMat(fid,mattype))
451                elif objecttype == 'Real':
452                    objects.append(self.readReal(fid))
453                elif objecttype == 'Bag':
454                    raise NotImplementedError('Bag Reader not yet implemented')
455        except DoneWithFile:
456            pass
457        finally:
458            if close:
459                fid.close()
460
461        return tuple(objects)
462
463    @decorate_with_conf
464    def writeBinaryFile(self, fid, objects):
465        """Writes a PETSc binary file containing the objects given.
466
467        readBinaryFile(fid, objects)
468
469        Input:
470          fid : either file handle to an open binary file, or filename.
471          objects : list of objects representing the data in numpy arrays,
472                    which must be of type Vec, IS, MatSparse, or MatSciPy.
473        """
474        close = False
475        if isinstance(fid, basestring):
476            fid = open(fid, 'wb')
477            close = True
478
479        for petscobj in objects:
480            if (isinstance(petscobj, Vec)):
481                self.writeVec(fid, petscobj)
482            elif (isinstance(petscobj, IS)):
483                self.writeIS(fid, petscobj)
484            elif (isinstance(petscobj, MatSparse)):
485                self.writeMatSparse(fid, petscobj)
486            elif (isinstance(petscobj, MatDense)):
487                if close:
488                    fid.close()
489                raise NotImplementedError('Writing a dense matrix is not yet supported')
490            else:
491                try:
492                    self.writeMatSciPy(fid, petscobj)
493                except AssertionError:
494                    if close:
495                        fid.close()
496                    raise TypeError('Object %s is not a valid PETSc object'%(petscobj.__repr__()))
497        if close:
498            fid.close()
499        return
500
501def _convert(infile, outfile, args):
502    ext = os.path.splitext(infile)[1]
503    if ext in ('.mtx', '.npz'):
504        if ext == '.mtx':
505            import scipy.io
506            mat = scipy.io.mmread(infile)
507        else:
508            import scipy.sparse
509            mat = scipy.sparse.load_npz(infile)
510        if args.symmetrize:
511            mat = (mat + mat.T)/2
512        with open(outfile, 'wb') as fd:
513            PetscBinaryIO().writeMatSciPy(fd, mat, precision=args.precision, complexscalars=args.complex, indices=args.indices)
514    else:
515        print('Unknown format: {}'.format(infile))
516        exit(1)
517
518if __name__ == "__main__":
519    import argparse
520    import os
521    parser = argparse.ArgumentParser('PetscBinaryIO')
522    subparsers = parser.add_subparsers(title='commands')
523    convert = subparsers.add_parser('convert', help='convert matrices to/from PETSc format')
524    convert.add_argument('infile', help='file to convert')
525    convert.add_argument('-o', '--outfile', help='name of output file (defaults to {infile}.petsc)')
526    convert.add_argument('--symmetrize', help='Symmetrize (A+A^T)/2 during conversion', action='store_true')
527    convert.add_argument('--precision', help='Precision of scalar values', choices=['single', 'double', '__float128'], default='double')
528    convert.add_argument('--complex', help='Use complex scalars', action='store_true')
529    convert.add_argument('--indices', help='Integer size for indices', choices=['32bit', '64bit'], default='32bit')
530    args = parser.parse_args()
531    if args.outfile is None:
532        args.outfile = os.path.splitext(args.infile)[0] + '.petsc'
533    _convert(args.infile, args.outfile, args)
534