! ***********************************************************************
!
!   Copyright (C) 2015  Bill Paxton
!
!   This file is part of MESA.
!
!   MESA is free software; you can redistribute it and/or modify
!   it under the terms of the GNU General Library Public License as published
!   by the Free Software Foundation; either version 2 of the License, or
!   (at your option) any later version.
!
!   MESA is distributed in the hope that it will be useful,
!   but WITHOUT ANY WARRANTY; without even the implied warranty of
!   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!   GNU Library General Public License for more details.
!
!   You should have received a copy of the GNU Library General Public License
!   along with this software; if not, write to the Free Software
!   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
!
! ***********************************************************************


! this is an old version of umfpack from back when it was written in fortran
! our copy is derived from the version on Frank Timmes' cococubed website.
! we disable the HSL routines mc21b_hsl and mc13e_hsl.




!#ifdef DBLE
      module mod_umf_dble
!#else
      module mod_umf_quad
!#endif
      
#ifdef DBLE
      use const_def, only: dp
      use utils_lib, only: is_bad_num
#define is_bad is_bad_num
#else
      use const_def, only: qp, dp
      use utils_lib, only: is_bad_quad
#define is_bad is_bad_quad
#endif
      
      implicit none
      
#ifdef DBLE
      integer, parameter :: fltp = dp
#else
      integer, parameter :: fltp = qp
#endif
      
      integer, parameter :: num_umf_ipar_decsol = 0
      integer, parameter :: num_umf_rpar_decsol = 1
      
      
!      contains




        subroutine umd21i (keep, cntl, icntl)
        integer icntl (20), keep (20)
        real(fltp) cntl (10)
        
!=== umd21i ============================================================
! 
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  hsl compatibility:  this routine has the same arguments as ma38i/id. 

!=======================================================================
!  user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  initialize user-controllable parameters to default values, and
!  non-user controllable parameters.  this routine is normally
!  called once prior to any call to umd2fa.
!
!  this routine sets the default control parameters.  we recommend
!  changing these defaults under certain circumstances:
!
!  (1) if you know that your matrix has nearly symmetric nonzero
!       pattern, then we recommend setting icntl (6) to 1 so that
!       diagonal pivoting is preferred.  this can have a significant
!       impact on the performance for matrices that are essentially
!       symmetric in pattern.
!
!   (2) if you know that your matrix is not reducible to block
!       triangular form, then we recommend setting icntl (4) to 0
!       so that umfpack does not try to permute the matrix to block
!       triangular form (it will not do any useful work and will
!       leave the matrix in its irreducible form).  the work saved
!       is typically small, however.
!
!   the other control parameters typically have less effect on overall
!   performance.

!=======================================================================
!  installation note:
!=======================================================================
!
!  this routine can be modified on installation to reflect the computer
!  or environment in which this package is installed (printing control,
!  maximum integer, block size, and machine epsilon in particular).  if
!  you, the installer, modify this routine, please comment out the
!  original code, and add comments (with date) to describe the
!  installation.  do not delete any original code.

!=======================================================================
!  arguments:
!=======================================================================

!               --------------------------------------------------------
!  icntl:       an integer array of size 20.  need not be set by
!               caller on input.  on output, it contains default
!               integer control parameters.
!
!  icntl (1):   fortran output unit for error messages.
!               default: 6
!
!  icntl (2):   fortran output unit for diagnostic messages.
!               default: 6
!
!  icntl (3):   printing-level.
!               0 or less: no output
!               1: error messages only
!               2: error messages and terse diagnostics
!               3: as 2, and print first few entries of all input and
!                       output arguments.  invalid and duplicate entries
!                       are printed.
!               4: as 2, and print all entries of all input and
!                       output arguments.  invalid and duplicate entries
!                       are printed.  the entire input matrix and its
!                       factors are printed.
!               5: as 4, and print out information on the data
!                       structures used to represent the lu factors,
!                       the assembly dag, etc.
!               default: 2
!
!  icntl (4):   whether or not to attempt a permutation to block
!               triangular form.  if equal to one, then attempt the
!               permutation.  if you know the matrix is not reducible
!               to block triangular form, then setting icntl (4) to
!               zero can save a small amount of computing time.
!               default: 1 (attempt the permutation)
!
!  icntl (5):   the number of columns to examine during the global
!               pivot search.  a value less than one is treated as one.
!               default: 4
!
!  icntl (6):   if not equal to zero, then pivots from the diagonal
!               of a (or the diagonal of the block-triangular form) are
!               preferred.  if the nonzero pattern of the matrix is
!               basically symmetric, we recommend that you change this
!               default value to 1 so that pivots on the diagonal
!               are preferred.
!               default: 0 (do not prefer the diagonal)
!
!  icntl (7):   block size for the blas, controlling the tradeoff
!               between the level-2 and level-3 blas.  values less than
!               one are treated as one.
!               default: 16, which is suitable for the cray ymp.
!
!  icntl (8):   number of steps of iterative refinement to perform.
!               values less than zero are treated as zero.  the matrix
!               must be preserved for iterative refinement to be done
!               (job=1 in umd2fa or umd2rf).
!               default: 0  (no iterative refinement)
!
!  icntl (9 ... 20):  set to zero.  reserved for future releases.

!               --------------------------------------------------------
!  cntl:        a real(fltp) array of size 10.
!               need not be set by caller on input.  on output, contains
!               default control parameters.
!
!  cntl (1):    pivoting tradeoff between sparsity-preservation
!               and numerical stability.  an entry a(k,k) is numerically
!               acceptable if:
!                  abs (a(k,k)) >= cntl (1) * max (abs (a(*,k)))
!               values less than zero are treated as zero (no numerical
!               constraints).  values greater than one are treated as
!               one (partial pivoting with row interchanges).
!               default: 0.1
!
!  cntl (2):    amalgamation parameter.  if the first pivot in a
!               frontal matrix has row degree r and column degree c,
!               then a working array of size
!                  (cntl (2) * c) - by - (cntl (2) * r)
!               is allocated for the frontal matrix.  subsequent pivots
!               within the same frontal matrix must fit within this
!               working array, or they are not selected for this frontal
!               matrix.  values less than one are treated as one (no
!               fill-in due to amalgamation).  some fill-in due to
!               amalgamation is necessary for efficient use of the blas
!               and to reduce the assembly operations required.
!               default: 2.0
!
!  cntl (3):    normally not modified by the user.
!               defines the smallest positive number,
!               epsilon = cntl (3), such that fl (1.0 + epsilon)
!               is greater than 1.0 (fl (x) is the floating-point
!               representation of x).  if the floating-point mantissa
!               is binary, then cntl (3) is 2 ** (-b+1), where b
!               is the number of bits in the mantissa (including the
!               implied bit, if applicable).
!
!               typical defaults:
!               for ieee real(fltp), cntl (3) = 2 ** (-53+1)
!               for ieee single precision, cntl (3) = 2 ** (-24+1)
!               for cray real(fltp), cntl (3) = 2 ** (-96+1)
!               for cray single precision, cntl (3) = 2 ** (-48+1)
!
!               a value of cntl (3) less than or equal to zero
!               or greater than 2 ** (-15) is treated as 2 ** (-15),
!               which assumes that any floating point representation
!               has at least a 16-bit mantissa.  cntl (3) is only
!               used in umd2s2 to compute the sparse backward error
!               estimates, rinfo (7) and rinfo (8), when
!               icntl (8) > 0 (the default is icntl (8) = 0,
!               so by default, cntl (3) is not used).
!
!  cntl (4 ... 10):  set to zero.  reserved for future releases.

!               --------------------------------------------------------
!  keep:        an integer array of size 20.
!               need not be set by the caller.  on output, contains
!               integer control parameters that are (normally) non-user
!               controllable (but can of course be modified by the
!               "expert" user or library installer).
!
!  keep (1 ... 5):  unmodified (see umd2fa or umd2rf for a description).
!
!  keep (6):    largest representable positive integer.  set to
!               2^31 - 1 = 2147483647 for 32-bit machines with 2's
!               complement arithmetic (the usual case).
!               default: 2147483647
!
!  keep (7) and keep (8): a column is treated as "dense" if
!               it has more than
!               max (0, keep(7), keep(8)*int(sqrt(float(n))))
!               original entries.  "dense" columns are treated
!               differently that "sparse" rows and columns.  dense
!               columns are transformed into a priori contribution
!               blocks of dimension cdeg-by-1, where cdeg is the number
!               of original entries in the column.  modifying these two
!               parameters can change the pivot order.
!               default:  keep (7) = 64
!               default:  keep (8) = 1
!
!  keep (9 ... 20):  set to zero.  reserved for future releases.

!## end of user documentation for umd21i ###############################

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   user routine

!=======================================================================
!  local scalars:
!=======================================================================

        integer i

!  i:       loop index

!=======================================================================
!  executable statments:
!=======================================================================

!       ----------------------------------------------------------------
!       integer control parameters:
!       ----------------------------------------------------------------

        icntl (1) = 6
        icntl (2) = 6
        icntl (3) = 2
        icntl (4) = 1
        icntl (5) = 4
        icntl (6) = 0
        icntl (7) = 16
        icntl (8) = 0

!       icntl (9 ... 20) is reserved for future releases:
        do 10 i = 9, 20 
           icntl (i) = 0
10      continue 

!       ----------------------------------------------------------------
!       control parameters:
!       ----------------------------------------------------------------

        cntl (1) = 0.1
        cntl (2) = 2

!       ieee real(fltp):  epsilon = 2 ** (-53)
        cntl (3) = 2.0 ** (-52)

!       cntl (4 ... 10) is reserved for future releases:
        do 30 i = 4, 10 
           cntl (i) = 0
30      continue 

!       ----------------------------------------------------------------
!       integer control parameters in keep:
!       ----------------------------------------------------------------

        keep (6) = 2147483647
        keep (7) = 64
        keep (8) = 1

!       keep (9 ... 20) is reserved for future releases:
        do 20 i = 9, 20 
           keep (i) = 0
20      continue 

        return
        end subroutine umd21i
        subroutine umd2co (n, nz, transa, xx, xsize, info, icntl,
     >          ii, isize, w, wp, who)
        integer isize, ii (isize), n, nz, w (n), wp (n+1), info (40),
     >          icntl (20), xsize, who
        real(fltp)
     >          xx (xsize)
        logical transa
  
!=== umd2co ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  convert input matrix (ii,xx,n,nz) into from triplet form to column-
!  oriented form, optionally transposing the input matrix (single and
!  real(fltp) versions only).  remove invalid entries and
!  duplicate entries.

!=======================================================================
!  input:
!=======================================================================
!
!       n:              size of the matrix
!       nz:             number of nonzeros in the input matrix

!       transa:         if true then transpose input matrix

!       xx (1..nz):     values of triplet form
!       xsize:          size of xx, must be >= 2*nz
!       ii (1..2*nz):   row and col indices of triplet form
!       isize:          size of ii, must be >= max (2*nz,n+1) + nz
!       icntl:          integer control parameters
!       who:            who called umd2co, 1: umd2fa, 2: umd2rf
!
!       ii must be at least of size (nz + max (2*nz, n+1))
!       xx must be at least of size (nz + max (  nz, n+1))
!
!       input triplet matrix:

!          if (transa) is false:
!               ii (p)          i, row index, for p = 1..nz
!               ii (nz+p)       j, col index
!               xx (p)          a_ij
!          if (transa) is true:
!               ii (p)          j, col index, for p = 1..nz
!               ii (nz+p)       i, row index
!               xx (p)          a_ij

!=======================================================================
!  workspace:
!=======================================================================
!
!       w (1..n)

!=======================================================================
!  output: 
!=======================================================================
!
!       nz:             number of nonzeros in the output matrix,
!                       after removing invalid entries, and summing up
!                       duplicate entries
!       ii (n+2..nz+n+1): row indices in column-form
!       xx (1..nz):     values in column-form.
!       info (1):       error flag
!       info (3):       invalid entries
!       info (2):       duplicate entries
!       info (5):       remaining valid entries
!       info (6):       remaining valid entries
!       info (7):       0
!       wp (1..n+1)     column pointers for column form
!       ii (1..n+1)     column pointers for column form

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutines:  umd2fa, umd2rf
!       subroutines called:     umd2er, umd2p2
!       functions called:       max
        intrinsic max

!=======================================================================
!  local scalars:
!=======================================================================

        integer row, col, pdest, p, nz1, pcol, ip, xp, io, prl, ninvld,
     >          ndupl, i
        logical pr3

!  row:     row index
!  col:     column index
!  pdest:   location of an entry in the column-form, for dupl. removal
!  p:       pointer
!  nz1:     number of entries after removing invalid or duplic. entries
!  pcol:    column col starts here after duplicates removed
!  ip:      column-form copy of matrix placed in ii (ip...ip+nz-1)
!  xp:      column-form copy of matrix placed in xx (xp...xp+nz-1)
!  ninvld:  number of invalid entries
!  ndupl:   number of duplicate entries
!  i:       a row index if transa is true, a column index otherwise
!  io:      i/o unit for warning messages (for invalid or dupl. entries)
!  prl:     printing level
!  pr3:     true if printing invalid and duplicate entries

!=======================================================================
!  executable statements:
!=======================================================================

!-----------------------------------------------------------------------
!  get arguments and check memory sizes
!-----------------------------------------------------------------------

        io = icntl (2)
        prl = icntl (3)
        pr3 = prl .ge. 3 .and. io .ge. 0

!-----------------------------------------------------------------------
!  count nonzeros in columns and check for invalid entries
!-----------------------------------------------------------------------

        ninvld = 0
        ndupl = 0
        do 10 col = 1, n 
           w (col) = 0
10      continue 
        nz1 = nz
        do 20 p = nz, 1, -1 
           row = ii (p)
           col = ii (nz+p)
           if (row.lt.1.or.row.gt.n.or.col.lt.1.or.col.gt.n) then 
!             this entry is invalid - delete it
              if (pr3) then 
!                print the offending entry on the diagnostic i/o unit
                 call umd2p2 (who, 99, row, col, xx(p), io)
              endif 
              ii (p)    = ii (nz1)
              ii (nz+p) = ii (nz+nz1)
              xx (p)    = xx (nz1)
              nz1 = nz1 - 1
           else 

              if (transa) then 
!                factorizing a transpose
                 w (row) = w (row) + 1
              else 
!                factorizing a
                 w (col) = w (col) + 1
              endif 

           endif 
20      continue 
        ninvld = nz - nz1
        if (ninvld .ne. 0) then 
!          invalid entries found - set warning flag and continue
           call umd2er (who, icntl, info, 1, ninvld)
        endif 

!-----------------------------------------------------------------------
!  convert triplet form to column-form
!-----------------------------------------------------------------------

        wp (1) = 1
        do 30 i = 1, n 
           wp (i+1) = wp (i) + w (i)
30      continue 
        do 40 i = 1, n 
           w (i) = wp (i)
40      continue 

!       ----------------------------------------------------------------
!       construct column-form in ii (2*nz+1..3*nz) and xx (nz+1..2*nz)
!       ----------------------------------------------------------------

        ip = max (2*nz, n+1)
        xp = nz

        if (transa) then 
           do 50 p = 1, nz1 
              row = ii (p)
              col = ii (nz+p)
              ii (ip + w (row)) = col
              xx (xp + w (row)) = xx (p)
              w (row) = w (row) + 1
50         continue 
        else 

           do 60 p = 1, nz1 
              row = ii (p)
              col = ii (nz+p)
              ii (ip + w (col)) = row
              xx (xp + w (col)) = xx (p)
              w (col) = w (col) + 1
60         continue 

        endif 

!       ----------------------------------------------------------------
!       shift the matrix back to ii (n+2..nz+n+1) and xx (n+2..nz+n+1)
!       ----------------------------------------------------------------

        nz = nz1
!fpp$ nodepchk l
        do 70 p = 1, nz 
           ii (n+1+p) = ii (ip+p)
           xx (p) = xx (xp+p)
70      continue 

!-----------------------------------------------------------------------
!  remove duplicate entries by adding them up
!-----------------------------------------------------------------------

        do 80 row = 1, n 
           w (row) = 0
80      continue 
        pdest = 1
        do 100 col = 1, n 
           pcol = pdest
           do 90 p = wp (col), wp (col+1)-1 
              row = ii (n+1+p)
              if (w (row) .ge. pcol) then 
!                this is a duplicate entry
                 xx (w (row)) = xx (w (row)) + xx (p)
                 if (pr3) then 
!                   print the duplicate entry on the diagnostic i/o
!                   unit.  the row and column indices printed reflect
!                   the input matrix.

                    if (transa) then 
                       call umd2p2 (who, 98, col, row, xx (p), io)
                    else 
                       call umd2p2 (who, 98, row, col, xx (p), io)
                    endif 

                 endif 
              else 
!                this is a new entry, store and record where it is
                 w (row) = pdest
                 if (pdest .ne. p) then 
                    ii (n+1+pdest) = row
                    xx (pdest) = xx (p)
                 endif 
                 pdest = pdest + 1
              endif 
90         continue 
           wp (col) = pcol
100     continue 
        wp (n+1) = pdest
        nz1 = pdest - 1
        ndupl = nz - nz1
        if (ndupl .ne. 0) then 
!          duplicate entries found - set warning flag and continue
           call umd2er (who, icntl, info, 2, ndupl)
        endif 
        nz = nz1

!-----------------------------------------------------------------------
!  save column pointers in ii (1..n+1)
!-----------------------------------------------------------------------

        do 110 col = 1, n+1 
           ii (col) = wp (col)
110     continue 

        info (2) = ndupl
        info (3) = ninvld
        info (5) = nz
        info (6) = nz
        info (7) = 0
        if (nz .eq. 0) then 
!          set error flag if all entries are invalid
           call umd2er (who, icntl, info, -2, -1)
        endif 
        return
        end subroutine umd2co
        subroutine umd2er (who, icntl, info, error, s)
        integer who, icntl (20), info (40), error, s
  
!=== umd2er ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  print error and warning messages, and set error flags.

!=======================================================================
!  input:
!=======================================================================
!
!       who             which user-callable routine called:
!                       1: umd2fa, 2: umd2rf, 3: umd2so
!       icntl (1):      i/o unit for error and warning messages
!       icntl (3):      printing level
!       info (1):       the error/warning status
!       error:          the applicable error (<0) or warning (>0).
!                       see umd2p2 for a description.
!       s:              the relevant offending value

!=======================================================================
!  output: 
!=======================================================================
!
!       info (1):       the error/warning status

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutines:  umd2co, umd2fa, umd2f0, umd2f1, umd2f2,
!                               umd2rf, umd2r0, umd2r2, umd2so, umd2s2
!       subroutines called:     umd2p2
!       functions called:       mod
        intrinsic mod

!=======================================================================
!  local scalars:
!=======================================================================

        logical both
        real(fltp)
     >          ignore_array(1), ignore
        integer ioerr, prl

!  ioerr:   i/o unit for error and warning messages
!  prl:     printing level
!  both:    if true, then combine errors -3 and -4 into error -5

!=======================================================================
!  executable statements:
!=======================================================================

        ignore = 0
        ioerr = icntl (1)
        prl = icntl (3)
        if (error .lt. 0) then 
!          this is an error message
           both = (info (1) .eq. -3 .and. error .eq. -4) .or.
     >            (info (1) .eq. -4 .and. error .eq. -3)
           if (both) then 
!             combine error -3 (out of integer memory) and error -4
!             (out of real memory)
              info (1) = -5
           else 
              info (1) = error
           endif 
           if (prl .ge. 1) then 
              call umd2p2 (who, error, s, 0, ignore, ioerr)
           endif 
        else if (error .gt. 0) then 
!          this is a warning message
           if (info (1) .ge. 0) then 
!             do not override a prior error setting, sum up warnings
              if (mod (info (1) / error, 2) .eq. 0) then 
                 info (1) = info (1) + error
              endif 
           endif 
           if (prl .ge. 2) then 
              call umd2p2 (who, error, s, 0, ignore, ioerr)
           endif 
        endif 
        return
        end subroutine umd2er
        subroutine umd2f0 (n, nz, cp, xx, xsize, ii, isize, xtail,
     >          itail, iuse, xuse, nzoff, nblks, icntl, cntl, info,
     >          rinfo, presrv, ap, ai, ax, an, anz, keep, ne)
        integer n, nz, isize, ii (isize), icntl (20), info (40),
     >          cp (n+1), xsize, xtail, itail, iuse, xuse, an, anz,
     >          ap (an+1), ai (anz), keep (20), nzoff, nblks, ne
        logical presrv
        real(fltp)
     >          xx (xsize), ax (anz)
        real(fltp)
     >          cntl (10), rinfo (20)
  
!=== umd2f0 ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  factorize an unsymmetric sparse matrix in column-form, optionally
!  permuting the matrix to upper block triangular form and factorizing
!  each diagonal block.

!=======================================================================
!  input:
!=======================================================================
!
!       n:              order of matrix
!       nz:             entries in matrix, after removing duplicates
!                       and invalid entries.
!       ne:             number of triplets, unchanged from umd2fa
!       cp (1..n+1):    column pointers of input matrix
!       presrv:         if true then preserve original matrix
!       xsize:          size of xx
!       isize:          size of ii
!       iuse:           memory usage in index on input
!       xuse:           memory usage in value on input
!       icntl:          integer control parameters, see umd21i
!       cntl:           real control parameters, see umd21i
!       keep (6..8):    integer control parameters, see umd21i
!
!       if presrv is true:
!           an:                 = n, order of preserved matrix
!           anz:                = anz, order of preserved matrix
!           ap (1..an+1):       column pointers of preserved matrix
!           ai (1..nz):         row indices of preserved matrix
!           ax (1..nz):         values of preserved matrix
!           ii:                 unused on input
!           xx:                 unused on input
!       else
!           an:                 1
!           anz:                1
!           ap:                 unused
!           ai:                 unused
!           ax:                 unused
!           ii (1..nz):         row indices of input matrix
!           xx (1..nz):         values of input matrix

!=======================================================================
!  output: 
!=======================================================================
!
!       xx (xtail ... xsize), xtail:
!
!                       lu factors are located in xx (xtail ... xsize),
!                       including values in off-diagonal part if matrix
!                       was permuted to block triangular form.
!
!       ii (itail ... isize), itail:
!
!                       lu factors are located in ii (itail ... isize),
!                       including pattern, row and column permutations,
!                       block triangular information, etc.  see umf2fa
!                       for more information.
!
!       info:           integer informational output, see umd2fa
!       rinfo:          real informational output, see umd2fa
!
!       iuse:           memory usage in index on output
!       xuse:           memory usage in value on output
!
!       nzoff:          entries in off-diagonal part (0 if btf not used)
!       nblks:          number of diagonal blocks (1 if btf not used)

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   umd2fa
!       subroutines called:     umd2er, umd2fb, umd2f1, umd2of
!       functions called:       max
        intrinsic max

!=======================================================================
!  local scalars:
!=======================================================================

        integer kn, nzdia, blkpp, lublpp, p, offip, xhead, row,
     >          offxp, offpp, ihead, k1, k2, blk, prp, p2, cpermp,
     >          rpermp, nsgltn, npiv, mnz, nsym, k, col, rmax, cmax,
     >          totnlu, xrmax, xruse, zero_array(1)
        logical trybtf, iout, xout
        real(fltp)
     >          a

!  allocated array pointers:
!  -------------------------
!  blkpp:   blkp (1..nblks+1) array located in ii (blkpp..blkp+nblks)
!  lublpp:  lublkp (1..nblks) array loc. in ii (lublpp..lublpp+nblks-1)
!  offip:   offi (1..nzoff) array located in ii (offip..offip+nzoff-1)
!  offxp:   offx (1..nzoff) array located in xx (offxp..offxp+nzoff-1)
!  offpp:   offp (1..n+1) array located in ii (offpp..offpp+n)
!  cpermp:  cperm (1..n) array located in ii (cpermp..cpermp+n-1)
!  rpermp:  rperm (1..n) array located in ii (rpermp..rpermp+n-1)
!  prp:     pr (1..n) work array located in ii (prp..prp+n-1)
!
!  btf information:
!  ----------------
!  k1:      starting index of diagonal block being factorized
!  k2:      ending index of diagonal block being factorized
!  kn:      the order of the diagonal block being factorized
!  blk:     block number of diagonal block being factorized
!  trybtf:  true if btf is to be attempted (= icntl (4) .eq. 1)
!  nzdia:   number of entries in diagonal blocks (= nz if btf not used)
!  nsgltn:  number of 1-by-1 diagonal blocks ("singletons")
!  npiv:    number of numerically valid singletons
!  a:       numerical value of a singleton
!  mnz:     nzoff
!
!  memory usage:
!  -------------
!  xhead:   xx (1..xhead-1) is in use, xx (xhead..xtail-1) is free
!  ihead:   ii (1..ihead-1) is in use, ii (ihead..itail-1) is free
!  iout:    true if umd2f1 ran out of integer memory, but did not
!           set error flag
!  xout:    true if umd2f2 ran out of integer memory, but did not
!           set error flag
!
!  estimated memory for umd2rf:
!  ----------------------------
!  rmax:    largest contribution block is cmax-by-rmax
!  cmax:       "         "         "    "   "   "  "
!  totnlu:  total number of lu arrowheads in all diagonal blocks
!  xrmax:   estimated maximum real memory usage for umd2rf
!  xruse:   estimated current real memory usage for umd2rf
!
!  other:
!  ------
!  k:       loop index (kth pivot)
!  row:     row index
!  col:     column index
!  p:       pointer
!  p2:      pointer
!  nsym:    number of symmetric pivots chosen

!=======================================================================
!  executable statements:
!=======================================================================

!-----------------------------------------------------------------------
!  get input parameters and initialize
!-----------------------------------------------------------------------

********************************************************************************
***  minor bug fix, that was in version 2.2 (and in ma38).  jan. 20, 1998. *****
***  xout and iout need to be initialized.  they are set in umd2fb, but that ***
***  routine is not called if the btf is turned on (default) and the matrix ****
***  is permutable to upper *triangular* form.  in this case, xout and iout ****
***  are uninitialized, but used at statement 9000.  fixed for version 2.2.1, **
***  by adding the following two lines: ****************************************
********************************************************************************
        xout = .false.
        iout = .false.
********************************************************************************
        zero_array(1) = 0
        blkpp = 0
        lublpp = 0
        p2 = 0
        nblks = 1
        nzoff = 0
        nzdia = nz
        nsgltn = 0
        npiv = 0
        rmax = 1
        cmax = 1
        totnlu = 0
        if (presrv) then 
!          original matrix is not in cp/ii/xx, but in ap/ai/ax:
           ihead = 1
           xhead = 1
        else 
           ihead = nz + 1
           xhead = nz + 1
        endif 
        itail = isize + 1
        xtail = xsize + 1

!-----------------------------------------------------------------------
!  allocate permutation arrays: cperm (1..n) and rperm (1..n), and
!  seven scalars:  transa, nzoff, nblks, presrv, nz, n, ne
!  (in that order) at tail of ii (in lu factors)
!-----------------------------------------------------------------------

        itail = itail - (2*n+7)
        iuse = iuse + (2*n+7)
        info (18) = max (info (18), iuse)
        info (19) = info (18)
        cpermp = itail
        rpermp = cpermp + n
        if (ihead .gt. itail) then 
!          error return, if not enough integer memory:
           go to 9000
        endif 

!-----------------------------------------------------------------------
!  find permutations to block upper triangular form, if requested.
!-----------------------------------------------------------------------

        trybtf = icntl (4) .eq. 1
        if (trybtf) then 

!          -------------------------------------------------------------
!          get workspace at tail of ii of size 6n+2
!          -------------------------------------------------------------

           itail = itail - (n+1)
           offpp = itail
           itail = itail - (5*n+1)
           p = itail
           iuse = iuse + (6*n+2)
           info (18) = max (info (18), iuse)
           info (19) = info (18)

!          -------------------------------------------------------------
           if (presrv) then 
!          find permutation, but do not convert to btf form
!          -------------------------------------------------------------

              if (ihead .gt. itail) then 
!                error return, if not enough integer memory:
                 go to 9000
              endif 
              call umd2fb (ax, anz, ai, anz, n, nz, nzdia, nzoff,
     >           nblks, cp, ii (cpermp), ii (rpermp), ii(p), ii(p+n),
     >           ii (p+2*n), ii (p+3*n), ii (p+4*n), ii (offpp),
     >           presrv, icntl)

!          -------------------------------------------------------------
           else 
!          find permutation, convert to btf form, and discard original
!          -------------------------------------------------------------

!             use additional size nz temporary workspace in ii and xx
              ihead = ihead + nz
              xhead = xhead + nz
              iuse = iuse + nz
              xuse = xuse + nz
              info (18) = max (info (18), iuse)
              info (20) = max (info (20), xuse)
              info (19) = info (18)
              info (21) = info (20)
              if (ihead .gt. itail .or. xhead .gt. xtail) then 
!                error return, if not enough integer and/or real memory:
                 go to 9000
              endif 
              call umd2fb (xx, 2*nz, ii, 2*nz, n, nz, nzdia, nzoff,
     >              nblks, cp, ii (cpermp), ii (rpermp), ii(p), ii(p+n),
     >              ii (p+2*n), ii (p+3*n), ii (p+4*n), ii (offpp),
     >              presrv, icntl)
!             deallocate extra workspace in ii and xx
              ihead = ihead - nz
              xhead = xhead - nz
              iuse = iuse - nz
              xuse = xuse - nz
           endif 

!          -------------------------------------------------------------
!          deallocate workspace, and allocate btf arrays if required
!          -------------------------------------------------------------

           if (nblks .gt. 1) then 
!             replace (6*n+2) workspace at tail of ii with
!             blkp (1..nblks+1) and lublkp (1..nblks), offp (1..n+1)
              blkpp = offpp - (nblks+1)
              lublpp = blkpp - (nblks)
              itail = lublpp
              iuse = iuse - (6*n+2) + (2*nblks+n+2)
           else 
!             the matrix is irreducible.  there is only one block. 
!             remove everything at tail of ii, except
!             for the 2*n permutation vectors and the 7 scalars.
!             (transa, nzoff, nblks, presrv, nz, n, ne).
              itail = (isize + 1) - (2*n+7)
              iuse = iuse - (6*n+2)
           endif 

        endif 

!-----------------------------------------------------------------------
! current memory usage:
!-----------------------------------------------------------------------

!       if .not. presrv then
!               input matrix is now in ii (1..nz) and xx (1..nz)
!               off-diagonal part: in ii/xx (1..nzoff)
!                       col pattern: ii (offp (col) ... offp (col+1))
!                       col values:  xx (offp (col) ... offp (col+1))
!               diagonal blocks: in ii/xx (nzoff+1..nz)
!                       col pattern: ii (cp (col) ... cp (col+1))
!                       col values:  xx (cp (col) ... cp (col+1))
!               total: nz+n+1 integers, nz reals
!       else
!               input matrix is now in ai (1..nz) and ax (1..nz),
!               in original (non-btf) order:
!                       col pattern: ai (ap (col) ... ap (col+1))
!                       col values:  ax (ap (col) ... ap (col+1))
!               cp is a size n+1 integer workspace
!               total: nz+2*(n+1) integers, nz reals
!
!       if (nblks > 1) then
!               at tail of ii (in order): 2*nblks+n+2
!                       lublkp (1..nblks)
!                       blkp (1..nblks+1)
!                       offp (1..n+1)
!               total: (2*nblks+n+2) integers
!
!       remainder at tail of ii:
!               cperm (1..n)
!               rperm (1..n)
!               seven scalars: transa, nzoff, nblks, presrv, nz, n, ne
!
!   grand total current memory usage (including ii,xx,cp,ai,ap,ax):
!
!       presrv  nblks>1         integers, iuse =
!       f       f               nz+  (n+1)+(2*n+7)
!       f       t               nz+  (n+1)+(2*n+7)+(2*nblks+n+2)
!       t       f               nz+2*(n+1)+(2*n+7)
!       t       t               nz+2*(n+1)+(2*n+7)+(2*nblks+n+2)
!
!   real usage is xuse = nz

!       ----------------------------------------------------------------
!       get memory usage for next call to umd2rf
!       ----------------------------------------------------------------

        xrmax = 2*ne
        xruse = nz

!-----------------------------------------------------------------------
! factorization
!-----------------------------------------------------------------------

        if (nblks .eq. 1) then 

!          -------------------------------------------------------------
!          factorize the matrix as a single block
!          -------------------------------------------------------------

           call umd2f1 (cp, n, ii (cpermp), ii (rpermp), nzoff,
     >          itail, xtail, xx, xsize, xuse, ii, itail-1, iuse,
     >          icntl, cntl, info, rinfo, nblks,
     >          ap, ai, ax, presrv, 1, an, anz, ii, keep,
     >          rmax, cmax, totnlu, xrmax, xruse, iout, xout)
           if (iout .or. xout) then 
!             error return, if not enough integer and/or real memory:
              go to 9000
           endif 
           if (info (1) .lt. 0) then 
!             error return, if error in umd2f2:
              go to 9010
           endif 
!          original matrix has been deallocated
           ihead = 1
           xhead = 1

!          -------------------------------------------------------------
!          make the index of the block relative to start of lu factors
!          -------------------------------------------------------------

           ii (itail) = 1

        else 

!          -------------------------------------------------------------
!          factorize the block-upper-triangular form of the matrix
!          -------------------------------------------------------------

           prp = offpp
           if (presrv) then 
!             count the off-diagonal entries during factorization
              nzoff = 0
!             compute temp inverse permutation in ii (prp..prp+n-1)
!fpp$ nodepchk l
              do 10 k = 1, n 
                 ii (prp + ii (rpermp+k-1) - 1) = k
10            continue 
           endif 

           do 30 blk = nblks, 1, -1 

!             ----------------------------------------------------------
!             factorize the kn-by-kn block, a (k1..k2, k1..k2)
!             ----------------------------------------------------------

!             get k1 and k2, the start and end of this block
              k1 = ii (blkpp+blk-1)
              k2 = ii (blkpp+blk) - 1
              kn = k2-k1+1
              if (.not. presrv) then 
                 p = cp (k1)
                 cp (k2+1) = ihead
              endif 

              if (kn .gt. 1) then 

!                -------------------------------------------------------
!                factor the block (the block is not a singleton)
!                -------------------------------------------------------

                 call umd2f1 (cp (k1), kn,
     >              ii (cpermp+k1-1), ii (rpermp+k1-1), nzoff,
     >              itail, xtail, xx, xtail-1, xuse, ii, itail-1,
     >              iuse, icntl, cntl, info, rinfo, nblks,
     >              ap, ai, ax, presrv, k1, an, anz, ii (prp), keep,
     >              rmax, cmax, totnlu, xrmax, xruse, iout, xout)
                 if (iout .or. xout) then 
!                   error return, if not enough int. and/or real memory:
                    go to 9000
                 endif 
                 if (info (1) .lt. 0) then 
!                   error return, if error in umd2f2:
                    go to 9010
                 endif 
                 if (presrv) then 
                    ihead = 1
                    xhead = 1
                 else 
                    ihead = p
                    xhead = p
                 endif 

!                -------------------------------------------------------
!                save the location of the lu factors in lubkp (blk)
!                -------------------------------------------------------

                 ii (lublpp+blk-1) = itail

              else 

!                -------------------------------------------------------
!                get the value of singleton at a (k1,k1), if it exists
!                -------------------------------------------------------

                 a = 0
                 if (presrv) then 
!                   find the diagonal entry in the unpermuted matrix
                    col = ii (cpermp + k1 - 1)
                    do 20 p2 = ap (col), ap (col + 1) - 1 
                       row = ii (prp + ai (p2) - 1)
                       if (row .lt. k1) then 
!                         entry in off-diagonal blocks
                          nzoff = nzoff + 1
                       else 
                          a = ax (p2)
                       endif 
20                  continue 
                    ihead = 1
                    xhead = 1
                 else if (p .ne. ihead) then 
                    a = xx (p)
                    ihead = p
                    xhead = p
                    iuse = iuse - 1
                    xuse = xuse - 1
                    xruse = xruse - 1
                 endif 

!                -------------------------------------------------------
!                store the 1-by-1 lu factors of a singleton
!                -------------------------------------------------------

                 nsgltn = nsgltn + 1
                 if (abs (a) .eq. 0) then 
!                   the diagonal entry is either not present, or present
!                   but numerically zero.  this is a singular matrix,
!                   replace with 1-by-1 identity matrix.
                    a = 1
                 else 
!                   increment pivot count
                    npiv = npiv + 1
                 endif 
                 xtail = xtail - 1
!                note: if the matrix is not preserved and nonsingular
!                then we will not run out of memory at this point.
                 xuse = xuse + 1
                 xruse = xruse + 1
                 xrmax = max (xrmax, xruse)
                 info (20) = max (info (20), xuse)
                 info (21) = max (info (21), xuse)
!                error return, if not enough real memory:
                 if (xhead .gt. xtail) then 
                    go to 9000
                 endif 
                 ii (lublpp+blk-1) = -xtail
                 xx (xtail) = a

              endif 

30         continue 

!          -------------------------------------------------------------
!          make the index of each block relative to start of lu factors
!          -------------------------------------------------------------

!fpp$ nodepchk l
           do 40 p = lublpp, lublpp + nblks - 1 
              if (ii (p) .gt. 0) then 
                 ii (ii (p)) = ii (ii (p)) - xtail + 1
                 ii (p) = ii (p) - itail + 1
              else 
!                this is a singleton
                 ii (p) = (-ii (p)) - xtail + 1
              endif 
40         continue 

!          -------------------------------------------------------------
!          allocate temporary workspace for pr (1..n) at head of ii
!          -------------------------------------------------------------

           prp = ihead
           ihead = ihead + n
           iuse = iuse + n

!          -------------------------------------------------------------
!          allocate a single entry in case the lu factors are empty
!          -------------------------------------------------------------

           if (nblks .eq. n) then 
!             otherwise, arrays in umd2rf and umd2so would have
!             zero size, which can cause an address fault later on
              itail = itail - 1
              iuse = iuse + 1
              p2 = itail
           endif 

!          -------------------------------------------------------------
!          allocate permanent copy of off-diagonal blocks
!          -------------------------------------------------------------

           itail = itail - nzoff
           offip = itail
           xtail = xtail - nzoff
           offxp = xtail
           iuse = iuse + nzoff
           xuse = xuse + nzoff
           xruse = xruse + nzoff
           xrmax = max (xrmax, xruse)
           info (18) = max (info (18), iuse)
           info (19) = max (info (19), iuse)
           info (20) = max (info (20), xuse)
           info (21) = max (info (21), xuse)
           if (ihead .gt. itail .or. xhead .gt. xtail) then 
!             error return, if not enough integer and/or real memory:
              go to 9000
           endif 

!          -------------------------------------------------------------
!          re-order the off-diagonal blocks according to pivot perm
!          -------------------------------------------------------------

!          use cp as temporary work array:
           mnz = nzoff
           if (presrv) then 
              call umd2of (cp, n, ii (rpermp), ii (cpermp), nzoff,
     >          ii (offpp), ii (offip), xx (offxp), ii (prp),
     >          icntl, ap, ai, ax, an, anz, presrv, nblks, ii (blkpp),
     >          mnz, 1, info, p)
           else 
              call umd2of (cp, n, ii (rpermp), ii (cpermp), nzoff,
     >          ii (offpp), ii (offip), xx (offxp), ii (prp),
     >          icntl, zero_array, ii, xx, 0, mnz, presrv, 0, zero_array,
     >          mnz, 1, info, p)
           endif 
           if (nblks .eq. n) then 
!             zero the only entry in the integer part of the lu factors
              ii (p2) = 0
           endif 

!          -------------------------------------------------------------
!          deallocate pr (1..n), and ii/xx (1..nzoff) if present
!          -------------------------------------------------------------

           ihead = 1
           xhead = 1
           iuse = iuse - n
           if (.not. presrv) then 
              iuse = iuse - nzoff
              xuse = xuse - nzoff
           endif 

        endif 

!-----------------------------------------------------------------------
!  normal and error return
!-----------------------------------------------------------------------

!       error return label:
9000    continue
        if (iout .or. ihead .gt. itail) then 
!          set error flag if not enough integer memory
           call umd2er (1, icntl, info, -3, info (19))
        endif 
        if (xout .or. xhead .gt. xtail) then 
!          set error flag if not enough real memory
           call umd2er (1, icntl, info, -4, info (21))
        endif 

!       error return label, for error from umd2f2:
9010    continue

        info (4) = 0
        nzdia = nz - nzoff
        info (5) = nz
        info (6) = nzdia
        info (7) = nzoff
        info (8) = nsgltn
        info (9) = nblks
        info (12) = info (10) + info (11) + n + info (7)

!       count the number of symmetric pivots chosen.  note that some of
!       these may have been numerically unacceptable.
        nsym = 0
        if (info (1) .ge. 0) then 
           do 50 k = 1, n 
              if (ii (cpermp+k-1) .eq. ii (rpermp+k-1)) then 
!                this kth pivot came from the diagonal of a
                 nsym = nsym + 1
              endif 
50         continue 
        endif 
        info (16) = nsym

        info (17) = info (17) + npiv
        rinfo (1) = rinfo (4) + rinfo (5) + rinfo (6)

        if (info (1) .ge. 0 .and. info (17) .lt. n) then 
!          set warning flag if matrix is singular
           call umd2er (1, icntl, info, 4, info (17))
        endif 

!       ----------------------------------------------------------------
!       determine an upper bound on the amount of integer memory needed
!       (lindex) for a subsequent call to umd2rf.  if block-upper-
!       triangular-form is not in use (info (9) is 1), then
!       this bound is exact.  if ne is higher in the call to umd2rf
!       than in the call to umd2fa, then add 3 integers for each
!       additional entry (including the 2 integers required for the
!       row and column indices of the additional triplet itself).
!       this estimate assumes that job and transa are the same in
!       umd2fa and umd2rf.
!       ----------------------------------------------------------------

!       (keep (5) - keep (4) + 1), is added to info (22)
!       in umd2fa, to complete the computation of the estimate.

        if (presrv) then 
           info (22) = max (3*ne+2*n+1, ne+3*n+2,
     >                           2*nz+4*n+10+rmax+3*cmax+4*totnlu)
        else 
           info (22) = max (3*ne+2*n+1, ne+3*n+2, 2*nz+3*n+2,
     >                             nz+3*n+ 9+rmax+3*cmax+4*totnlu)
        endif 

!       ----------------------------------------------------------------
!       approximate the amount of real memory needed (lvalue) for a
!       subsequent call to umd2rf.  the approximation is an upper bound
!       on the bare minimum amount needed.  some garbage collection may
!       occur, but umd2rf is guaranteed to finish if given an lvalue of
!       size info (23) and if the pattern is the same.  if ne is
!       higher in the call to umd2rf than in the call to umd2fa, then
!       add 2 reals for each additional entry (including the 1 real
!       required for the value of the additional triplet itself).
!       this estimate assumes that job and transa are the same in
!       umd2fa and umd2rf.
!       ----------------------------------------------------------------

        info (23) = xrmax
        return
        end subroutine umd2f0
        subroutine umd2f1 (cp, n, cperm, rperm, nzoff,
     >          itail, xtail, xx, xsize, xuse, ii, isize, iuse,
     >          icntl, cntl, info, rinfo, nblks,
     >          ap, ai, ax, presrv, k1, an, anz, pr, keep,
     >          rmax, cmax, totnlu, xrmax, xruse, iout, xout)
        integer xsize, isize, n, icntl (20), info (40), xuse, iuse,
     >          itail, xtail, ii (isize), cp (n+1), cperm (n), nzoff,
     >          an, anz, rperm (n), ai (anz), ap (an+1), k1, pr (an),
     >          nblks, keep (20), rmax, cmax, totnlu, xrmax, xruse
        logical presrv, iout, xout
        real(fltp)
     >          xx (xsize), ax (anz)
        real(fltp)
     >          cntl (10), rinfo (20)
  
!=== umd2f1 ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  umd2f1 factorizes the n-by-n column-form matrix at the head of ii/xx
!  or in ap/ai/ax, and places its lu factors at the tail of ii/xx.  the
!  input matrix overwritten if it is located in ii/xx on input.  if
!  block-triangular-form (btf) is in use, this routine factorizes a
!  single diagonal block.

!=======================================================================
!  input:
!=======================================================================
!
!       n:              order of matrix (or order of diagonal block
!                       if btf is in use).
!       cp (1..n+1):    column pointers for input matrix
!       nblks:          number of diagonal blocks in btf form
!       isize:          size of ii
!       xsize:          size of xx
!       k1:             first index of this matrix (1 if btf not used)
!       icntl:          integer control parameters, see umd21i
!       cntl:           real control parameters, see umd21i
!       keep (6..8):    integer control parameters, see umd21i
!       iuse:           memory usage in index
!       xuse:           memory usage in value
!       rmax:           maximum ludegr seen so far (see umd2f2 for info)
!       cmax:           maximum ludegc seen so far (see umd2f2 for info)
!       totnlu:         total number of lu arrowheads constructed so far
!
!       if nblks>1 then:
!          cperm (1..n):        col permutation to btf
!          rperm (1..n):        row permutation to btf
!       else
!          cperm (1..n):        undefined on input
!          rperm (1..n):        undefined on input
!
!
!       presrv:         if true then input matrix is preserved
!
!       if presrv is true then:
!           an:                 order of preserved matrix (all blocks)
!           anz:                entries in preserved matrix
!           ap (1..an+1):       column pointers for preserved matrix
!           ai (1..anz):        row indices of preserved matrix
!           ax (1..anz):        values of preserved matrix
!                               the preserved matrix is not in btf form;
!                               it is in the orginal order.
!           if nblks > 1:
!               pr (1..n):      inverse row permutations to btf form
!               nzoff           entries in off-diagonal blocks
!                               seen so far
!           else
!               pr (1..n):      undefined on input
!
!           ii (1..isize):      undefined on input
!           xx (1..xsize):      undefined on input
!           cp (1..n+1):        undefined on input
!
!       else, if presrv is false:
!           an:                         1
!           anz:                        1
!           ii (1..cp (1) - 1):         unused
!           ii (cp (1) ... cp (n+1)-1): row indices of matrix to factor,
!                                       will be overwritten on output
!           ii (cp (n+1) ... isize):    unused on input
!
!           xx (1..cp (1) - 1):         unused
!           xx (cp (1) ... cp (n+1)-1): values of matrix to factorize,
!                                       will be overwritten on output
!           xx (cp (n+1) ... xsize):    unused on input
!                       if btf is in use, then ii and xx contain a
!                       single diagonal block.

!=======================================================================
!  output:
!=======================================================================
!
!       xx (xtail ... xsize), xtail,  ii (itail ... isize), itail:
!
!                       the lu factors of a single diagonal block.
!                       see umd2f2 for a description.
!
!       ii (cp1 ... itail-1):   undefined on output
!       xx (cp1 ... xtail-1):   undefined on output,
!                       where cp1 is equal to the value of cp (1)
!                       if presrv is false, or cp1 = 1 if presrv is
!                       true.
!
!       info:           integer informational output, see umd2fa
!       rinfo:          real informational output, see umd2fa
!       cperm (1..n):   the final col permutations, including btf
!       rperm (1..n):   the final row permutations, including btf
!
!       iuse:           memory usage in index
!       xuse:           memory usage in value
!       rmax:           maximum ludegr seen so far (see umd2f2 for info)
!       cmax:           maximum ludegc seen so far (see umd2f2 for info)
!       totnlu:         total number of lu arrowheads constructed so far
!
!       if nblks>1 and presrv:
!           nzoff       entries in off-diagonal blocks seen so far
!
!       iout:           true if ran out of integer memory in umd2f1
!       xout:           true if ran out of real memory in umd2f1

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   umd2f0
!       subroutines called:     umd2f2
!       functions called:       max, sqrt
        intrinsic max, sqrt

!=======================================================================
!  local scalars:
!=======================================================================

        integer cp1, pc, pend, pcol, cdeg, col, csiz, nz, xp, ip, is, p,
     >          dn, dsiz, wrksiz, i, clen, row, cscal
        parameter (cscal = 9)
        
        integer one_array(1)

!  original and expanded column-form:
!  ----------------------------------
!  cp1:     = value of cp (1) on input
!  pc:      pointer to integer part of expanded column-form matrix
!  pend:    column col ends here in the input column-form matrix
!  pcol:    column col starts here in the input column-form matrix
!  cdeg:    degree (number of entries) in a column
!  clen:    number of original entries in a column (= degree, here,
!           but decreases in umd2f2)
!  csiz:    size of the integer part of an expanded column (cdeg+cscal)
!  cscal:   = 9, the number of scalars in column data structure
!  nz:      number of entries in the diagonal block being factorized
!  xp:      pointer to real part of the expanded column
!  ip:      pointer to integer part of the expanded column
!
!  memory usage:
!  -------------
!  wrksiz:  size of integer workspace needed by umd2f2
!
!  "dense" columns: (converted to a prior, or artificial, frontal mat.):
!  ----------------
!  dn:      number of "dense" columns
!  dsiz:    a column is "dense" if it has more than dsiz entries
!
!  other:
!  ------
!  row:     row index
!  col:     a column index
!  i:       loop index
!  p:       pointer

!=======================================================================
!  executable statements:
!=======================================================================
        one_array(1) = 1
        iout = .false.
        xout = .false.

!-----------------------------------------------------------------------
!  count "dense" columns (they are treated as a priori frontal matrices)
!-----------------------------------------------------------------------

!       a column is "dense" if it has more than dsiz entries
        dsiz = max (0, keep (7),
     >                 keep (8) * int (sqrt (real (n))))
        dn = 0
        if (presrv) then 
           if (nblks .eq. 1) then 
              do 10 col = 1, n 
                 if (ap (col+1) - ap (col) .gt. dsiz) then 
!                   this is a "dense" column
                    dn = dn + 1
                 endif 
10            continue 
           else 
              do 40 col = 1, n 
!                if col might be dense, check more carefully:
                 cdeg = ap (cperm (col) + 1)- ap (cperm (col))
                 if (cdeg .gt. dsiz) then 
                    cdeg = 0
                    do 20 p = ap (cperm (col)), ap (cperm (col) + 1) -1
                       row = pr (ai (p))
                       if (row .ge. k1) then 
                          cdeg = cdeg + 1
                          if (cdeg .gt. dsiz) then 
!                            this is a "dense" column, exit out of loop
                             dn = dn + 1
                             go to 30
                          endif 
                       endif 
20                  continue 
!                   loop exit label:
30                  continue
                 endif 
40            continue 
           endif 
        else 
           do 50 col = 1, n 
              if (cp (col+1) - cp (col) .gt. dsiz) then 
!                this is a "dense" column
                 dn = dn + 1
              endif 
50         continue 
        endif 

!-----------------------------------------------------------------------
!  get size of workspaces to allocate from ii
!-----------------------------------------------------------------------

!       workspaces: wir (n), wic (n), wpr (n), wpc (n),
!       wm (n), head (n), rp (n+dn), wc (n+dn), wr (n+dn), wj (n)
        if (nblks .eq. 1) then 
!          rperm (1..n) is used as wir (1..n), and
!          cperm (1..n) is used as wic (1..n) in umd2f2
           wrksiz = 8*n + 3*dn
        else 
           wrksiz = 10*n + 3*dn
        endif 

!-----------------------------------------------------------------------
!  construct the expanded column-form of the matrix or the diag. block
!-----------------------------------------------------------------------

        if (presrv) then 

!          -------------------------------------------------------------
!          allocate space for wrksiz workspace and nz+cscal*n
!          integers and nz reals for the expanded column-form matrix.
!          -------------------------------------------------------------

           cp1 = 1
           xp = 1
           ip = 1 + wrksiz
           if (nblks .eq. 1) then 

!             ----------------------------------------------------------
!             construct copy of entire matrix
!             ----------------------------------------------------------

              nz = anz
              is = nz + wrksiz + cscal*n
              iuse = iuse + is
              xuse = xuse + nz
              info (18) = max (info (18), iuse)
              info (19) = max (info (19), iuse)
              info (20) = max (info (20), xuse)
              info (21) = max (info (21), xuse)
              iout = is .gt. isize
              xout = nz .gt. xsize
              if (iout .or. xout) then 
!                error return, if not enough integer and/or real memory:
                 go to 9000
              endif 

              pc = ip
              do 70 col = 1, n 
                 cp (col) = pc - wrksiz
                 cdeg = ap (col+1) - ap (col)
                 clen = cdeg
                 csiz = cdeg + cscal
                 ii (pc) = csiz
                 ii (pc+1) = cdeg
                 ii (pc+5) = 0
                 ii (pc+6) = clen
                 ii (pc+7) = 0
                 ii (pc+8) = 0
                 ii (pc+2) = xp
                 xp = xp + cdeg
                 pc = pc + cscal
                 p = ap (col)
                 do 60 i = 0, cdeg - 1 
                    ii (pc + i) = ai (p + i)
60               continue 
                 pc = pc + cdeg
70            continue 
              do 80 p = 1, nz 
                 xx (p) = ax (p)
80            continue 

           else 

!             ----------------------------------------------------------
!             construct copy of a single block in btf form
!             ----------------------------------------------------------

!             check for memory usage during construction of block
              do 100 col = 1, n 
                 pc = ip
                 cp (col) = pc - wrksiz
                 ip = ip + cscal
                 iout = ip .gt. isize
                 if (iout) then 
!                   error return, if not enough integer memory:
                    go to 9000
                 endif 
                 ii (pc+2) = xp
                 cdeg = ip
                 do 90 p = ap (cperm (col)), ap (cperm (col)+1)-1 
                    row = pr (ai (p))
                    if (row .ge. k1) then 
                       iout = ip .gt. isize
                       xout = xp .gt. xsize
                       if (iout .or. xout) then 
!                         error return, if not enough memory
                          go to 9000
                       endif 
                       ii (ip) = row - k1 + 1
                       xx (xp) = ax (p)
                       ip = ip + 1
                       xp = xp + 1
                    else 
!                      entry in off-diagonal part
                       nzoff = nzoff + 1
                    endif 
90               continue 
                 cdeg = ip - cdeg
                 clen = cdeg
                 csiz = cdeg + cscal
                 ii (pc) = csiz
                 ii (pc+1) = cdeg
                 ii (pc+5) = 0
                 ii (pc+6) = clen
                 ii (pc+7) = 0
                 ii (pc+8) = 0
100           continue 

              nz = xp - 1
              is = nz + wrksiz + cscal*n
              iuse = iuse + is
              xuse = xuse + nz
              info (18) = max (info (18), iuse)
              info (19) = max (info (19), iuse)
              info (20) = max (info (20), xuse)
              info (21) = max (info (21), xuse)

           endif 

!          -------------------------------------------------------------
!          get memory usage for next call to umd2rf
!          -------------------------------------------------------------

           xruse = xruse + nz
           xrmax = max (xrmax, xruse)

        else 

!          -------------------------------------------------------------
!          allocate space for wrksiz workspace and additional cscal*n
!          space for the expanded column-form of the matrix.
!          -------------------------------------------------------------

           cp1 = cp (1)
           nz = cp (n+1) - cp1
           pc = cp1 + wrksiz + (nz+cscal*n)
           iuse = iuse + wrksiz + cscal*n
           info (18) = max (info (18), iuse)
           info (19) = max (info (19), iuse)
           iout = pc .gt. isize+1
           if (iout) then 
!             error return, if not enough integer memory:
              go to 9000
           endif 

!          -------------------------------------------------------------
!          expand the column form in place and make space for workspace
!          -------------------------------------------------------------

           xp = nz + 1
           ip = nz + cscal*n + 1
           pend = cp (n+1)
           do 120 col = n, 1, -1 
              pcol = cp (col)
              do 110 p = pend-1, pcol, -1 
                 pc = pc - 1
                 ii (pc) = ii (p)
110           continue 
              pc = pc - cscal
              cdeg = pend - pcol
              clen = cdeg
              pend = pcol
              csiz = cdeg + cscal
              ip = ip - csiz
              cp (col) = ip
              ii (pc) = csiz
              ii (pc+1) = cdeg
              ii (pc+5) = 0
              ii (pc+6) = clen
              ii (pc+7) = 0
              ii (pc+8) = 0
              xp = xp - cdeg
              ii (pc+2) = xp
120        continue 
        endif 

!-----------------------------------------------------------------------
!  factorize the expanded column-form, with allocated workspaces
!-----------------------------------------------------------------------

        xp = cp1
        ip = cp1 + wrksiz

        if (nblks .eq. 1) then 

!          pass rperm and cperm as the wir and wic arrays in umd2f2:
           call umd2f2 (cp, nz, n, 1, one_array, one_array, itail, xtail,
     >          xx (xp), xsize-xp+1, ii (ip), isize-ip+1, icntl, cntl,
     >          info, rinfo, .false., iuse, xuse,
     >          rperm, cperm, ii (cp1), ii (cp1+n),
     >          ii (cp1+2*n), ii (cp1+3*n), ii (cp1+4*n), ii (cp1+5*n),
     >          ii (cp1+6*n+dn), ii (cp1+7*n+2*dn),
     >          dn, dsiz, keep, rmax, cmax, totnlu, xrmax, xruse)

        else 

!          pass cperm, rperm, wic and wir as separate arrays, and
!          change cperm and rperm from the btf permutations to the
!          final permutations (including btf and numerical pivoting).
           call umd2f2 (cp, nz, n, n, cperm, rperm, itail, xtail,
     >          xx (xp), xsize-xp+1, ii (ip), isize-ip+1, icntl, cntl,
     >          info, rinfo, .true., iuse, xuse,
     >          ii (cp1), ii (cp1+n), ii (cp1+2*n), ii (cp1+3*n),
     >          ii (cp1+4*n), ii (cp1+5*n), ii (cp1+6*n), ii (cp1+7*n),
     >          ii (cp1+8*n+dn), ii (cp1+9*n+2*dn),
     >          dn, dsiz, keep, rmax, cmax, totnlu, xrmax, xruse)

        endif 

        if (info (1) .lt. 0) then 
!          error return, if error occured in umd2f2:
           return
        endif 

!-----------------------------------------------------------------------
!  adjust tail pointers, and save pointer to numerical part of lu
!-----------------------------------------------------------------------

!       head = cp1
        iuse = iuse - wrksiz
        itail = itail + ip - 1
        xtail = xtail + xp - 1
        ii (itail) = xtail
        return

!=======================================================================
!  error return
!=======================================================================

!       error return label:
9000    continue
        return
        end subroutine umd2f1
        subroutine umd2f2 (cp, nz, n, pn, cperm, rperm, itail, xtail,
     >          xx, xsize, ii, isize, icntl, cntl, info, rinfo, pgiven,
     >          iuse, xuse, wir, wic, wpr, wpc, wm, head,
     >          wj, rp, wc, wr, dn, dsiz, keep,
     >          rmax, cmax, totnlu, xrmax, xruse)
        integer xsize, isize, icntl (20), info (40), pn,
     >          itail, xtail, nz, n, ii (isize), cp (n+1), dn, dsiz,
     >          rperm (pn), cperm (pn), wir (n), wic (n), wpr (n),
     >          wpc (n), wm (n), head (n), rp (n+dn), wc (n+dn),
     >          wr (n+dn), iuse, xuse, wj (n), keep (20),
     >          rmax, cmax, totnlu, xrmax, xruse
        logical pgiven
        real(fltp)
     >          xx (xsize)
        real(fltp)
     >          cntl (10), rinfo (20)
  
!=== umd2f2 ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  umd2f2 factorizes the n-by-n input matrix at the head of ii/xx
!  (in expanded column-form) and places its lu factors at the tail of
!  ii/xx.  the input matrix is overwritten.   no btf information is
!  used in this routine, except that the btf permutation arrays are
!  modified to include the final permutations.

!=======================================================================
!  input:
!=======================================================================
!
!       cp (1..n+1):    column pointers of expanded column-form,
!                       undefined on output
!       n:              order of input matrix
!       nz:             entries in input matrix
!       isize:          size of ii
!       xsize:          size of xx
!       iuse:           memory usage in index
!       xuse:           memory usage in value
!       icntl:          integer control parameters, see umd21i
!       cntl:           real control parameters, see umd21i
!       keep (6)        integer control parameter, see umd21i
!       dn:             number of dense columns
!       dsiz:           entries required for col to be treated as dense
!       rmax:           maximum ludegr seen so far (see below)
!       cmax:           maximum ludegc seen so far (see below)
!       totnlu:         total number of lu arrowheads constructed so far
!       xrmax:          maximum real memory usage for umd2rf
!       xruse:          current real memory usage for umd2rf
!
!       pgiven:         true if cperm and rperm are defined on input
!       if pgiven then:
!          cperm (1..pn):       col permutation to btf, n = pn
!          rperm (1..pn):       row permutation to btf
!       else
!          cperm (1..pn):       unaccessed pn = 1
!          rperm (1..pn):       unaccessed
!
!       ii (1..nz+cscal*n):             expanded column-form, see below
!       ii (nz+cscal*n+1..isize):       undefined on input
!       xx (1..nz):                     expanded column-form, see below
!       xx (nz+1..xsize):               undefined on input

!=======================================================================
!  workspace:
!=======================================================================
!
!       wir (1..n)
!       wic (1..n)
!       wpr (1..n)
!       wpc (1..n)
!       wm (1..n)
!       head (n)
!       rp (1..n+dn)
!       wr (1..n+dn)
!       wc (1..n+dn)

!=======================================================================
!  output:
!=======================================================================
!
!       ii (1..itail-1):        undefined on output
!       ii (itail..isize):      lu factors of this matrix, see below
!       xx (1..xtail-1):        undefined on output
!       xx (xtail..xsize):      lu factors of this matrix, see below
!
!       info:           integer informational output, see umd2fa
!       rinfo:          real informational output, see umd2fa
!       if pgiven:
!          cperm (1..n): the final col permutations, including btf
!          rperm (1..n): the final row permutations, including btf
!
!       wic (1..n):     row permutations, not including btf
!       wir (1..n):     column permutations, not including btf
!
!       iuse:           memory usage in index
!       xuse:           memory usage in value
!       rmax:           maximum ludegr seen so far (see below)
!       cmax:           maximum ludegc seen so far (see below)
!       totnlu:         total number of lu arrowheads constructed so far
!       xrmax:          maximum real memory usage for umd2rf
!       xruse:          current real memory usage for umd2rf

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   umd2f1
!       subroutines called:     umd2er, umd2fg, dgemv, dgemm 
!       functions called:       idamax, abs, max, min
        integer idamax
        intrinsic abs, max, min

!=======================================================================
!  description of data structures:
!=======================================================================

!-----------------------------------------------------------------------
!  column/element/arrowhead pointers:
!-----------------------------------------------------------------------
!
!  the cp (1..n) array contains information about non-pivotal columns
!
!       p = cp (j)
!       if (p = 0) then j is a pivotal column
!       else i is a non-pivotal column
!
!  the rp (1..n) array contains information about non-pivotal rows,
!  unassembled frontal matrices (elements), and the lu arrowheads
!
!       p = rp (i)
!       if (i > n) then
!          i is an artificial frontal matrix (a dense column)
!          if (p = 0) then i is assembled, else unassembled
!       else if (p = 0) then i is pivotal but not element/arrowhead
!       else if (wc (i) >= 0 and wc (i) <= n) then
!          i is a non-pivotal row
!       else if (wc (i) = -(n+dn+2)) then
!          i is a pivotal row, an assembled element, and an lu arrowhead
!       else i an unassembled element

!-----------------------------------------------------------------------
!  matrix being factorized:
!-----------------------------------------------------------------------
!
!    each column is stored in ii and xx:
!    -----------------------------------
!
!       if j is a non-pivotal column, pc = cp (j):
!
!       csiz = ii (pc) size of the integer data structure for col j,
!                        including the cscal scalars
!       cdeg = ii (pc+1) degree of column j
!       cxp  = ii (pc+2) pointer into xx for numerical values
!       next = ii (pc+3) pointer to next block of memory in xx
!       prev = ii (pc+4) pointer to previous block of memory in xx
!       celn = ii (pc+5) number of elements in column j element list
!       clen = ii (pc+6) number of original entries in column j
!       cnxt = ii (pc+7) next column with same degree as col j
!       cprv = ii (pc+8) previous column with same degree as col j
!       cep = (pc+9) pointer to start of the element list
!       ii (cep ... cep + 2*celn - 1)
!                       element list (e,f) for the column
!       ii (cep + 2*celn ... pc + csiz - clen - 1)
!                       empty
!       ii (pc + csiz - clen ... pc + csiz - 1)
!                       row indices of original nonzeros in the column
!       xx (xp ... xp + clen - 1)
!                       numerical values of original nonzeros in the col
!
!       if cdeg = ii (pc+1) = -(n+2), then this is a singular column
!       if cdeg = -1, then this column is deallocated
!
!    each row is stored in ii only:
!    ------------------------------
!
!       if i is a non-pivotal row, pr = rp (i)
!
!       rsiz = ii (pr) size of the integer data structure for row i,
!                        including the rscal scalars
!       rdeg = ii (pr+1) degree of row i
!       reln = wr (i) number of elements in row i element list
!       rlen = wc (i) number of original entries in row i
!       rep  = (pr+2) pointer to start of the element list
!       ii (rep ... rep + 2*reln - 1)
!                       element list (e,f) for the row
!       ii (rep + 2*reln ... pr + rsiz - rlen - 1)
!                       empty
!       ii (pr + rsiz - rlen ... pr + rsiz - 1)
!                       column indices of original nonzeros in the row
!
!       if rdeg = -1, then this row is deallocated

!-----------------------------------------------------------------------
!  frontal matrices
!-----------------------------------------------------------------------
!
!   each unassembled frontal matrix (element) is stored as follows:
!       total size: fscal integers, (fdimr*fdimc) reals
!
!       if e is an unassembled element, ep = rp (e), and e is also
!       the first pivot row in the frontal matrix.
!
!       fluip  = ii (ep)        pointer to lu arrowhead in ii
!       fdimc  = ii (ep+1)      column dimension of contribution block
!       fxp    = ii (ep+2)      pointer to contribution block in xx
!       next   = ii (ep+3)      pointer to next block in xx
!       prev   = ii (ep+4)      pointer to previous block in xx
!       fleftr = ii (ep+5)      number of unassembled rows
!       fleftc = ii (ep+6)      number of unassembled columns
!       fextr = wr (e) - w0     external row degree of the frontal mtx
!       fextc = wc (e) - w0     external col degree of the frontal mtx
!       xx (fxp ... )
!               a 2-dimensional array, c (1..fdimc, 1..fdimr).
!               note that fdimr is not kept (it is not needed,
!               except for the current frontal).  if this is not the
!               current frontal matrix, then luip points to the
!               corresponding lu arrowhead, and the contribution block
!               is stored in c (1..ludegc, 1..ludegr) in the
!               c (1..fdimc, ...) array.
!
!               if memory is limited, garbage collection will occur.
!               in this case, the c (1..fdimc, 1..fdimr) array is
!               compressed to be just large enough to hold the
!               unassembled contribution block,
!               c (1..ludegc, 1..ludegr).

!-----------------------------------------------------------------------
!  artificial frontal matrices
!-----------------------------------------------------------------------
!
!   an artificial frontal matrix is an original column that is treated
!   as a c-by-1 frontal matrix, where c is the number of original
!   nonzeros in the column.  dense columns (c > dsiz) are treated this
!   way.  an artificial frontal matrix is just the same as a frontal
!   matrix created by the elimination of one or more pivots, except
!   that there is no corresponding lu arrowhead.  the row and column
!   patterns are stored in:
!
!       ep = rp (e), where e = n+1 .. n+dn, where there are dn
!                    artificial frontal matrices.
!         
!       lucp = (ep+9)   pointer to row pattern (just one column index)
!       lurp = (ep+8) pointer to column pattern (fdimc row indices)

!-----------------------------------------------------------------------
!  current frontal matrix
!-----------------------------------------------------------------------
!
!  ffxp points to current frontal matrix (contribution block and lu
!  factors).  for example, if fflefc = 4, fflefr = 6, k = 3, and
!  gro = 2.0, then "x" is a term in the contribution block, "l" in l1,
!  "u" in u1, "l" in l2, "u" in u2, and "." is unused.  xx (fxp) is "x".
!  the first 3 pivot values (diagonal entries in u1) are 1,2, and 3.
!  for this frontal matrix, ffdimr = 12 (the number of columns), and
!  ffdimc = 8 (the number of rows).  the frontal matrix is
!  ffdimc-by-ffdimr
!
!                             |----------- col 1 of l1 and l2, etc.
!                             v
!       x x x x x x . . . l l l
!       x x x x x x . . . l l l
!       x x x x x x . . . l l l
!       x x x x x x . . . l l l
!       . . . . . . . . . . . .
!       u u u u u u . . . 3 l l         <- row 3 of u1 and u2
!       u u u u u u . . . u 2 l         <- row 2 of u1 and u2
!       u u u u u u . . . u u 1         <- row 1 of u1 and u2

!-----------------------------------------------------------------------
!  lu factors
!-----------------------------------------------------------------------
!
!   the lu factors are placed at the tail of ii and xx.  if this routine
!   is factorizing a single block, then this decription is for the
!   factors of the single block:
!
!       ii (itail):             xtail = start of lu factors in xx
!       ii (itail+1):           nlu = number of lu arrowheads
!       ii (itail+2):           npiv = number of pivots
!       ii (itail+3):           maximum number of rows in any
!                               contribution block (max ludegc)
!       ii (itail+4):           maximum number of columns in any
!                               contribution block (max ludegr)
!       ii (itail+5..itail+nlu+4): lup (1..nlu) array, pointers to each
!                               lu arrowhead, in order of their
!                               factorization
!       ii (itail+nlu+5...isize):integer info. for lu factors
!       xx (xtail..xsize):      real values in lu factors
!
!   each lu arrowhead is stored as follows:
!   ---------------------------------------
!
!       total size: (7 + ludegc + ludegr + nsons) integers,
!                   (luk**2 + ludeg!*luk + luk*ludegc) reals
!
!       if e is an lu arrowhead, then luip = rp (e), and luip >= itail.
!       when umd2f2 returns, then luip is given by luip =
!       ii (itail+s+1), where s = 1..nlu is the position of the lu
!       arrowhead in the lu factors (s=1,2,.. refers to the first,
!       second,.. lu arrowhead)
!
!       luxp   = ii (luip) pointer to numerical lu arrowhead
!       luk    = ii (luip+1) number of pivots in lu arrowhead
!       ludegr = ii (luip+2) degree of last row of u (excl. diag)
!       ludegc = ii (luip+3) degree of last col of l (excl. diag)
!       nsons  = ii (luip+4) number of children in assembly dag
!       ludimr = ii (luip+5)
!       ludimc = ii (luip+5) max front size is ludimr-by-ludimc,
!                       or zero if this lu arrowhead factorized within
!                       the frontal matrix of a prior lu arrowhead.
!       lucp   = (luip + 7)
!                       pointer to pattern of column of l
!       lurp   = lucp + ludegc
!                       pointer to patter of row of u
!       lusonp = lurp + ludegr
!                       pointer to list of sons in the assembly dag
!       ii (lucp ... lucp + ludegc - 1)
!                       row indices of column of l
!       ii (lurp ... lurp + ludegr - 1)
!                       column indices of row of u
!       ii (lusonp ... lusonp + nsons - 1)
!                       list of sons
!       xx (luxp...luxp + luk**2 + ludeg!*luk + luk*ludegr - 1)
!                       pivot block (luk-by-luk) and the l block
!                       (ludegc-by-luk) in a single (luk+ludegc)-by-luk
!                       array, followed by the u block in a
!                       luk-by-ludegr array.
!
!   pivot column/row pattern (also columns/rows in contribution block):
!       if the column/row index is negated, the column/row has been
!       assembled out of the frontal matrix into a subsequent frontal
!       matrix.  after factorization, the negative flags are removed,
!       and the row/col indices are replaced with their corresponding
!       index in the permuted lu factors.
!
!   list of sons:
!       1 <= son <= n:           son an luson
!       n+1 <= son <= 2n:        son-n is an uson
!       2n+n <= son <= 3n:       son-2n is a lson
!       during factorzation, a son is referred to by its first
!       pivot column.  after factorization, they are numbered according
!       to their order in the lu factors.

!-----------------------------------------------------------------------
!  workspaces:
!-----------------------------------------------------------------------
!
!   wir (e):  link list of sons of the current element
!       wir (e) = -1 means that e is not in the list.
!       wir (e) = next+n+2 means that "next" is the element after e.
!       the end of the list is marked with wir (e) = -(n+2).
!       sonlst points to the first element in the list, or 0 if
!       the sonlst is empty.
!
!   wir (row), wic (col):  used for pivot row/col offsets:
!
!       if wir (row) >= 0 then the row is in the current
!       column pattern.  similarly for wic (col).
!
!       if wir (row) is set to "empty" (<= -1), then
!       the row is not in the current pivot column pattern.
!
!       similarly, if wic (col) is set to -2, then the column is
!       not in the current pivot row pattern.
!
!       if wic (col) = -1 then col is pivotal
!
!       after factorization, wir/c holds the pivot permutations.
!
!   wpr/c (1..n):  the first part is used for the current frontal
!           matrix pattern.  during factorization, the last part holds
!           a stack of the row and column permutations (wpr/c (n-k+1)
!           is the k-th pivot row/column).
!
!   head (1..n):        degree lists for columns.  head (d) is the
!                       first column in list d with degree d.
!                       the cnxt and cprv pointers are stored in the
!                       column data structure itself.
!                       mindeg is the least non-empty list
!
!   wm (1..n):          various uses
!   wj (1..degc) or wj (1..fdegc):      offset in pattern of a son 

!-----------------------------------------------------------------------
!  memory allocation in ii and xx:
!-----------------------------------------------------------------------
!
!   ii (1..ihead):      rows and columns of active submatrix, and
!                       integer information for frontal matrices.
!   xx (1..xhead):      values of original entries in columns of
!                       matrix, values of contribution blocks, followed
!                       by the current frontal matrix.
!
!   mhead:              a pointer to the first block in the head of
!                       xx.  each block (a column or frontal matrix)
!                       contains a next and prev pointer for this list.
!                       if the list is traversed starting at mhead,
!                       then the pointers to the reals (cxp or fxp)
!                       will appear in strictly increasing order.
!                       note that the next, prev, and real pointers
!                       are in ii.  next and prev point to the next
!                       and previous block in ii, and the real pointer
!                       points to the real part in xx.
!
!   mtail:              the end of the memory list.

!=======================================================================
!  local scalars:
!=======================================================================

        integer swpcol, swprow, fdimc, k0, colpos, rowpos, row2, rdeg2,
     >          p, i, j, ffrow, pivrow, pivcol, ludegr, ludegc, e1,
     >          fxp, lurp, lucp, ip, next, fflefr, pc, mnext, mprev,
     >          fflefc, fedegr, fedegc, k, xudp, xdp, xsp, xlp, s, col2,
     >          bestco, col, e, row, cost, srched, pr, f1, rscan, rep,
     >          kleft1, ffsize, ffxp, w0, ffdimr, ffdimc, kleft, xldp,
     >          ep, scan1, scan2, scan3, scan4, nzl, nzu, degc, cep
        integer mindeg, nsrch, npiv, eson, luip1, dnz, iworst, wxp,
     >          nb, lupp, nlu, nsons, ineed, xneed, ldimc, lxp, rlen2,
     >          rsiz, lsons, sonlst, xhead, ihead, deln, dlen,
     >          slist, xp, luip, rdeg, cdeg1, pfree, xfree, cdeg2,
     >          f, cdeg, mtail, mhead, rsiz2, csiz2, ip2, maxdr, maxdc,
     >          xs, is, luxp, fsp, flp, fdp, jj, usons, ndn, p2,
     >          csiz, celn, clen, reln, rlen, uxp, pc2, pr2
        integer cnxt, cprv, cxp, fluip, lusonp, fleftr, fleftc, maxint,
     >          fmaxr, fmaxc, slots, limit, rscal, cscal, fscal, extra,
     >          fmax, w0big, minmem, dummy1, dummy2, dummy3, dummy4
        logical symsrc, pfound, movelu, okcol, okrow, better
        real(fltp)
     >          temp, one
        real(fltp)
     >          toler, maxval, relpt, gro, apiv, tmp
        parameter (rscal = 2, cscal = 9, fscal = 7,
     >          minmem = 24)

!  current element and working array, c:
!  -------------------------------------
!  ffxp:    current working array is in xx (ffxp ... ffxp+ffsize-1)
!  ffsize:  size of current working array in xx
!  ffdimr:  row degree (number of columns) of current working array
!  ffdimc:  column degree (number of rows) of current working array
!  fflefr:  row degree (number of columns) of current contribution block
!  fflefc:  column degree (number of rows) of current contribution block
!  fmaxr:   max row degree (maximum front size is fmaxr-by-fmaxc)
!  fmaxc:   max col degree (maximum front size is fmaxr-by-fmaxc)
!  fedegr:  extended row degree
!  fedegc:  extended column degree
!  ffrow:   current element being factorized (a pivot row index)
!  pivrow:  current pivot row index
!  pivcol:  current pivot column index
!  e1:      first pivot row in the frontal matrix
!  gro:     frontal matrix amalgamation growth factor
!  usons:   pointer to a link list of usons, in wc, assembled this scan3
!  lsons:   pointer to a link list of lsons, in wr, assembled this scan4
!  sonlst:  pointer to a link list of sons, in wir, of current element
!  swpcol:  the non-pivotal column to be swapped with pivot column
!  swprow:  the non-pivotal row to be swapped with pivot row
!  colpos:  position in wpr of the pivot column
!  rowpos:  position in wpc of the pivot row
!  k:       current pivot is kth pivot of current element
!  k0:      contribution block, c, has been updated with pivots 1..k0
!
!  lu arrowhead (a factorized element):
!  ------------------------------------
!  movelu:  true if a new lu arrowhead is to be created
!  luip:    current element is in ii (luip ...)
!  luip1:   first element from current frontal matrix in ii (luip1...) 
!  ludegc:  degree of pivot column (excluding pivots themselves)
!  ludegr:  degree of pivot row (excluding pivots themselves)
!  lucp:    pattern of col(s) of current element in ii (lucp...)
!  lurp:    pattern of row(s) of current element in ii (lurp...)
!  lusonp:  list of sons of current element is in ii (lusonp...)
!  nsons:   number of sons of current element
!  ldimc:   column dimension (number of rows) of [l1\u1 l2] block
!  luxp:    numerical values of lu arrowhead stored in xx (luxp ...)
!  lxp:     l2 block is stored in xx (lxp ...) when computed
!  uxp:     u2 block is stored in xx (uxp ...) when computed
!  nzu:     nonzeros above diagonal in u in current lu arrowhead
!  nzl:     nonzeros below diagonal in l in current lu arrowhead
!
!  son, or element other than current element:
!  -------------------------------------------
!  e:       an element
!  eson:    an element
!  s:       a renumbered element (1..nlu) for umd2so and umd2rf
!  ep:      frontal matrix integer data struct. in ii (ep...ep+fscal-1)
!  fscal:   = 7, size of frontal matrix data structure
!  fluip:   lu arrowhead of e is in ii (fluip ...)
!  fxp:     contribution block of son is in xx (fxp ...)
!  fdimc:   leading dimension of contribution block of e
!  lucp:    pattern of col(s) of e in ii (lucp...)
!  lurp:    pattern of row(s) of e in ii (lurp...)
!  ludegr:  row degree of contribution block of e
!  ludegr:  column degree of contribution block of e
!  maxdr:   maximum ludegr for any lu arrowhead, for umd2rf
!  maxdc:   maximum ludegc for any lu arrowhead, for umd2rf
!  degc:    compressed column offset vector of son is in wj/wm (1..degc)
!  fleftr:  remaining row degree (number of columns) of a contrib. block
!  fleftc:  remaining column degree (number of rows) of a contrib. block
!  xudp:    pointer to a column of a prior contribution block
!  xldp:    pointer to a row of a prior contribution block
!
!  memory allocation:
!  ------------------
!  mhead:   head pointer for link list of blocks in xx
!  mtail:   tail pointer for link list of blocks in xx
!  mprev:   previous block, ii (p+4), of the block located at p
!  mnext:   next block, ii (p+3), of the block located at p
!  pfree:   ii (pfree+2) is the largest known free block in xx
!  xfree:   size of largest known free block in xx
!  xhead:   xx (1..xhead-1) is in use, xx (xhead ..xtail-1) is free
!  xtail:   xx (xtail..xsize) is in use, xx (xhead ..xtail-1) is free
!  xneed:   bare minimum memory currently needed in xx
!  ihead:   ii (1..ihead-1) is in use, ii (ihead ..itail-1) is free
!  itail:   ii (itail..isize) is in use, ii (ihead ..itail-1) is free
!  ineed:   bare minimum memory currently needed in ii
!  iworst:  worst possible current integer memory required
!  xs:      size of a block of memory in xx
!  is:      size of a block of memory in ii
!  wxp:     pointer to a temporary workspace in xx (wxp ... )
!  slots:   number of slots added to element lists during garbage coll.
!  minmem:  smallest isize allowed
!
!  wr and wc flag arrays:
!  ----------------------
!  w0:      marker value for wr (1..n) and wc (1..n) arrays 
!  w0big:   largest permissible value of w0 (w0+n must not overflow)
!  fmax:    largest row/col degree of an element seen so far
!
!  a column:
!  ---------
!  pc:      pointer to a column, in ii (p!...)
!  pc2:     pointer to a column, in ii (pc2...)
!  csiz:    size of integer data structure of a column
!  csiz2:   size of integer data structure of a column
!  cscal:   = 9, number of scalars in data structure of a column
!  cdeg:    degree of a column
!  cdeg1:   degree of a column
!  cdeg2:   degree of a column
!  celn:    number of elements in the element list of a column
!  clen:    number of original entries that remain in a column
!  cnxt:    next column with same degree as this column
!  cprv:    previous column with same degree as this column
!  cep:     pointer to the element list of a column
!  cxp:     pointer to the numerical values in a column
!  limit:   maximum size for row/col data structure (excl. scalars)
!
!  dense columns:
!  --------------
!  dnz:     number of original entries that reside in "dense" columns
!  dn:      number of "dense" columns
!  ndn:     n + dn
!  extra:   number of extra slots to add to reconstructed "dense" cols
!
!  a row:
!  ------
!  pr:      pointer to a row, in ii (pr...)
!  pr2:     pointer to a row, in ii (pr2...)
!  rsiz:    size of integer data structure of a row
!  rsiz2:   size of integer data structure of a row
!  rscal:   = 2, number of scalars in data structure of a row
!  rdeg:    degree of a row
!  rdeg2:   degree of a row
!  reln:    number of elements in the element list of a row
!  rlen:    number of original entries that remain in a row
!  rlen2:   number of original entries that remain in a row
!  rep:     pointer to the element list of a row
!
!  pivot search:
!  -------------
!  cost:    approximate markowitz-cost of the current candidate pivot
!  bestco:  best approximate markowitz-cost seen so far
!  srched:  number of non-singular candidates searched so far
!  mindeg:  minimum degree of columns in active submatrix
!  nsrch:   maximum number of columns to search
!  slist:   pointer to a link list of searched columns, in ii
!  symsrc:  true if attempting to preserve symmetry
!  pfound:  true if pivot found during local search
!  okcol:   true if candidate pivot column is acceptable, so far
!  okrow:   true if candidate pivot row is acceptable, so far
!  toler:   pivot tolerance; abs(pivot) must be >= toler
!  maxval:  maximum absolute value in a candidate pivot column
!  relpt:   relative pivot tolerance (cntl (1))
!  npiv:    number of pivots factorized so far, incl. current element
!  kleft:   number of rows/columns remaining in active submatrix
!  kleft1:  kleft - 1
!  better:  if true, then candidate is better than the prior candidate
!
!  assembly:
!  ---------
!  f1:      degree prior to assembly next item
!  f:       offset into an element
!  rscan:   skip row assembly if more than rscan original entries
!  scan1:   start scan1 at wpc (scan1 ... fflefc) 
!  scan2:   start scan2 at wpr (scan2 ... fflefr) 
!  scan3:   start scan3 at wpr (scan3 ... fflefr) 
!  scan4:   start scan4 at wpc (scan4 ... fflefc) 
!  deln:    number of (e,f) tuples to delete from an element list
!  dlen:    number of original entries to delete from a row/col
!
!  allocated arrays:
!  -----------------
!  lupp:    lup (1..nlu) array located in ii (lupp...lupp+nlu-1)
!  nlu:     number of lu arrowheads
!
!  other:
!  ------
!  xdp:     destination pointer, into xx
!  xsp:     source pointer, into xx
!  xlp:     pointer into xx of location of last row/col in c 
!  xp:      pointer into xx
!  ip:      pointer into ii
!  ip2:     pointer into ii
!  p2:      pointer into ii
!  fsp:     source pointer, into xx
!  fsp:     destination pointer, into xx
!  flp:     last row/column in current contribution is in xx (flp...)
!  col,col2: a column index
!  row,row2: a row index
!  nb:      block size for tradeoff between level-2 and level-3 blas  
!  p, i, j, k, x:  various uses
!  jj:      loop index
!  maxint:  largest representable positive integer
!  next:    next pointer, for a link list
!  dummy1:  dummy loop index for main factorization loop
!  dummy2:  dummy loop index for global pivot search loop
!  dummy3:  dummy loop index for outer frontal matrix factorization loop
!  dummy4:  dummy loop index for inner frontal matrix factorization loop

!=======================================================================
!  executable statements:
!=======================================================================

!       ----------------------------------------------------------------
!       get control parameters and initialize various scalars
!       ----------------------------------------------------------------
        fedegr = 0
        luip = 0
        luip1 = 0
        rowpos = 0
        pivrow = 0
        one = 1
        nsrch = max (1, icntl (5))
        symsrc = icntl (6) .ne. 0
        nb = max (1, icntl (7))
        relpt = cntl (1)
        if (relpt .lt. 0) relpt = 0
        if (relpt .gt. 1) relpt = 1
        gro = cntl (2)
        if (gro .lt. 1) gro = 1
        maxint = keep (6)
        ndn = n + dn
        w0big = maxint - n
        w0 = ndn + 2
!       currently: w0 = n+dn+2 < 2n+2 < w0big = maxint - n
!       2n+2 < maxint - n must hold, so n < (maxint - 2) / 3 is the
!       largest that n can be.  this condition is checked in umd2fa.
        kleft = n
        npiv = 0
        nlu = 0
        mindeg = 1
        fmax = 1
        ihead = nz + cscal*n + 1
        xhead = nz + 1
        itail = isize + 1
        xtail = xsize + 1
!       cp (1) must equal 1, the first block
        xfree = -1
        pfree = 0
!       make sure integer space is at least of size minmem (simplifies
!       link list management and memory management)
        info (19) = max (info (19), iuse+minmem)
        if (ihead.gt.itail.or.isize.lt.minmem.or.xhead.gt.xtail) then 
!          error return, if not enough integer and/or real memory:
           go to 9000
        endif 
        bestco = 0
        limit = n + 2*ndn
        lsons = ndn + 1
        usons = ndn + 1

!       ----------------------------------------------------------------
!       initialize workspaces
!       ----------------------------------------------------------------

        do 10 i = 1, n 
           wir (i) = -1
           wic (i) = -2
           head (i) = 0
           wc (i) = 0
           wr (i) = 0
10      continue 

!       ----------------------------------------------------------------
!       initialize the link list for keeping track of real memory usage
!       ----------------------------------------------------------------

        mhead = 0
        mtail = 0
        do 20 col = 1, n 
           pc = cp (col)
           clen = ii (pc+6)
           if (clen .gt. 0) then 
!             place the column in the link list of blocks in xx
              if (mhead .eq. 0) then 
                 mhead = pc
              endif 
              ii (pc+4) = mtail
              ii (pc+3) = 0
              if (mtail .ne. 0) then 
                 ii (mtail+3) = pc
              endif 
              mtail = pc
           else 
              ii (pc+2) = 0
              ii (pc+4) = 0
              ii (pc+3) = 0
           endif 
20      continue 

!       ----------------------------------------------------------------
!       convert dense columns to a-priori contribution blocks and
!       get the count of nonzeros in each row
!       ----------------------------------------------------------------

        e = n
        dnz = 0
        do 50 col = 1, n 
           pc = cp (col)
           clen = ii (pc+6)
           cep = (pc+9)
           if (clen .gt. dsiz) then 
!             this is a dense column - add to element list length
              dnz = dnz + clen
              do 30 ip = cep, cep + clen - 1 
                 row = ii (ip)
                 wr (row) = wr (row) + 1
30            continue 
!             convert dense column (in place) into a frontal matrix
              e = e + 1
              ep = pc
              rp (e) = ep
              fdimc = clen
              fleftc = clen
              fleftr = 1
              ii (ep+1) = fdimc
              ii (ep+5) = fleftr
              ii (ep+6) = fleftc
              wr (e) = w0-1
              wc (e) = w0-1
              lurp = (ep+8)
              ii (lurp) = col
              fmax = max (fmax, fleftc)
           else 
!             this is a sparse column - add to orig entry length
              do 40 ip = cep, cep + clen - 1 
                 row = ii (ip)
                 wc (row) = wc (row) + 1
40            continue 
           endif 
50      continue 

!       ----------------------------------------------------------------
!       get memory for row-oriented form, and dense column element lists
!       ----------------------------------------------------------------

        pr = ihead
        csiz = cscal + 2
        is = (nz + rscal*n + dnz) + (dn * csiz)
        ihead = ihead + is
        iuse = iuse + is
        ineed = iuse
        xneed = xuse
        info (18) = max (info (18), iuse)
        info (19) = max (info (19), ineed)
        if (ihead .gt. itail) then 
!          error return, if not enough integer memory:
           go to 9000
        endif 

!       ----------------------------------------------------------------
!       if memory is available, add up to dsiz+6 extra slots in the
!       reconstructed dense columns to allow for element list growth
!       ----------------------------------------------------------------

        if (dn .gt. 0) then 
           extra = min ((itail - ihead) / dn, dsiz + 6)
           csiz = csiz + extra
           is = dn * extra
           ihead = ihead + is
           iuse = iuse + is
           info (18) = max (info (18), iuse)
        endif 

!       ----------------------------------------------------------------
!       construct row pointers
!       ----------------------------------------------------------------

        do 60 row = 1, n 
           rp (row) = pr
           rep  = (pr+2)
           reln = wr (row)
           rlen = wc (row)
           rsiz = 2*reln + rlen + rscal
           ii (pr) = rsiz
           rdeg = reln + rlen
           ii (pr+1) = rdeg
           wm (row) = rep
           pr = pr + rsiz
60      continue 

!       ----------------------------------------------------------------
!       construct row element lists for dense columns
!       ----------------------------------------------------------------

        pc = pr
        do 80 e = n+1, n+dn 
           ep = rp (e)
           lucp = (ep+9)
           fdimc = ii (ep+1)
!fpp$ nodepchk l
           do 70 f = 0, fdimc - 1 
              row = ii (lucp+f)
              ii (wm (row)    ) = e
              ii (wm (row) + 1) = f
              wm (row) = wm (row) + 2
70         continue 
!          re-construct dense columns as just an element list,
!          containing a single element tuple (e,f), where f = 0
           lurp = (ep+8)
           col = ii (lurp)
           cp (col) = pc
           ii (pc) = csiz
           cdeg = fdimc
           ii (pc+1) = cdeg
           ii (pc+2) = 0
           ii (pc+4) = 0
           ii (pc+3) = 0
           ii (pc+5) = 1
           ii (pc+6) = 0
           ii (pc+7) = 0
           ii (pc+8) = 0
!          store the (e,0) tuple:
           cep = (pc+9)
           ii (cep  ) = e
           ii (cep+1) = 0
           pc = pc + csiz
80      continue 

!       ----------------------------------------------------------------
!       construct the nonzero pattern of the row-oriented form
!       ----------------------------------------------------------------

        do 100 col = 1, n 
           pc = cp (col)
           cep = (pc+9)
           clen = ii (pc+6)
!fpp$ nodepchk l
           do 90 p = cep, cep + clen - 1 
              row = ii (p)
              ii (wm (row)) = col
              wm (row) = wm (row) + 1
90         continue 
100     continue 

!       count the numerical assembly of the original matrix
        rinfo (2) = rinfo (2) + (nz)

!       ----------------------------------------------------------------
!       initialize the degree lists
!       ----------------------------------------------------------------

!       do so in reverse order to try to improve pivot tie-breaking
        do 110 col = n, 1, -1 
           pc = cp (col)
           cdeg = ii (pc+1)
           if (cdeg .le. 0) then 
!             empty column - remove from pivot search
              cdeg = -(n+2)
              ii (pc+1) = cdeg
           else 
              cnxt = head (cdeg)
              ii (pc+7) = cnxt
              ii (pc+8) = 0
              if (cnxt .ne. 0) then 
                 ii (cp (cnxt)+8) = col
              endif 
              head (cdeg) = col
           endif 
110     continue 

!=======================================================================
!=======================================================================
!  main factorization loop [
!=======================================================================
!=======================================================================

        do 1540 dummy1 = 1, n 
!       (this loop is not indented due to its length)

!       ----------------------------------------------------------------
!       factorization is done if n pivots have been found
!       ----------------------------------------------------------------

        if (npiv .ge. n) then 
           go to 2000
        endif 

!=======================================================================
!  global pivot search, and initialization of a new frontal matrix [
!=======================================================================

        if (mtail .ne. 0 .and. ii (mtail+6) .eq. 0) then 
!          tail block is free, delete it
           xp = ii (mtail+2)
           xuse = xuse - (xhead - xp)
           xhead = xp
           if (mtail .eq. pfree) then 
              pfree = 0
              xfree = -1
           endif 
           mtail = ii (mtail+4)
           if (mtail .ne. 0) then 
              ii (mtail+3) = 0
           else 
!             singular matrix.  no columns or contribution blocks left.
              mhead = 0
           endif 
        endif 

!=======================================================================
!  global pivot search:  find pivot row and column
!=======================================================================

        nsons = 0
        sonlst = 0
        srched = 0
        pivcol = 0
        slist = 0

        do 255 dummy2 = 1, n 

!          -------------------------------------------------------------
!          get col from column upper-bound degree list
!          -------------------------------------------------------------

           col = 0
           do 140 cdeg = mindeg, n 
              col = head (cdeg)
              if (col .ne. 0) then 
!                exit out of loop if column found:
                 go to 150
              endif 
140        continue 
           if (col .eq. 0) then 
!             exit out of loop if column not found (singular matrix):
              go to 260
           endif 
!          loop exit label:
150        continue
           pc = cp (col)
           cnxt = ii (pc+7)
           if (cnxt .ne. 0) then 
              ii (cp (cnxt)+8) = 0
           endif 
           head (cdeg) = cnxt
           mindeg = cdeg

!          -------------------------------------------------------------
!          construct candidate column in wm and xx (wxp..wxp+cdeg-1)
!          -------------------------------------------------------------

           xs = cdeg
!          use wm (1..cdeg) for pattern [
!          use xx (wxp..wxp+xs-1) as workspace for values [

           if (xs .gt. xtail-xhead) then 

              info (15) = info (15) + 1
              call umd2fg (xx, xsize, xhead, xtail, xuse,
     >                     ii, isize, ihead, itail, iuse,
     >                     cp, rp, dn, n, icntl, wir, wic, wr, wc,
     >                     0, 0, 0, 0, .false.,
     >                     pfree, xfree, mhead, mtail, slots)
!             at this point, iuse = ineed and xuse = xneed
              pc = cp (col)
           endif 

           wxp = xhead
           xhead = xhead + xs
           xuse = xuse + xs
           xneed = xneed + xs
           info (20) = max (info (20), xuse)
           info (21) = max (info (21), xneed)
           if (xhead .gt. xtail) then 
!             error return, if not enough real memory:
              go to 9000
           endif 

!          -------------------------------------------------------------
!          assemble the elements in the element list
!          -------------------------------------------------------------

           cdeg = 0
           cep = (pc+9)
           celn = ii (pc+5)
           do 190 ip = cep, cep + 2*celn - 2, 2 
              e = ii (ip)
              f = ii (ip+1)
              ep = rp (e)
              fdimc = ii (ep+1)
              fxp = ii (ep+2)
              if (e .le. n) then 
                 fluip = ii (ep)
                 ludegc = ii (fluip+3)
                 lucp = (fluip + 7)
              else 
                 ludegc = fdimc
                 lucp = (ep+9)
              endif 
              xp = fxp + f * fdimc
!             split into 3 loops so that they all vectorize on a cray
              cdeg1 = cdeg
              do 160 p = lucp, lucp + ludegc - 1 
                 row = ii (p)
                 if (row .gt. 0) then 
                    if (wir (row) .le. 0) then 
                       cdeg = cdeg + 1
                       wm (cdeg) = row
                    endif 
                 endif 
160           continue 
              do 170 i = cdeg1+1, cdeg 
                 row = wm (i)
                 wir (row) = i
                 xx (wxp+i-1) = 0
170           continue 
!fpp$ nodepchk l
              do 180 j = 0, ludegc - 1 
                 row = ii (lucp+j)
                 if (row .gt. 0) then 
                    xx (wxp + wir (row) - 1) =
     >              xx (wxp + wir (row) - 1) + xx (xp+j)
                 endif 
180           continue 
190        continue 

!          -------------------------------------------------------------
!          assemble the original entries in the column
!          -------------------------------------------------------------

           cdeg1 = cdeg
           clen = ii (pc+6)
           csiz = ii (pc)
           ip = pc + csiz - clen
           cxp = ii (pc+2)
!fpp$ nodepchk l
           do 200 i = 0, clen - 1 
              row = ii (ip+i)
              wm (cdeg+1+i) = row
              xx (wxp+cdeg+i) = xx (cxp+i)
200        continue 
           cdeg = cdeg + clen

!          -------------------------------------------------------------
!          update the degree of this column (exact, not upper bound)
!          -------------------------------------------------------------

           ii (pc+1) = cdeg

!          wm (1..cdeg) holds the pattern of col being searched.
!          xx (wxp..wxp+cdeg-1) holds the numerical values of col being
!          searched.  wir (wm (1..cdeg1)) is 1..cdeg1.

!          -------------------------------------------------------------
!          find the maximum absolute value in the column
!          -------------------------------------------------------------

           maxval = abs (xx (wxp - 1 + idamax (cdeg, xx (wxp), 1)))
           rinfo (3) = rinfo (3) + (cdeg)
           toler = relpt * maxval
           rdeg = n+1

!          -------------------------------------------------------------
!          look for the best possible pivot row in this column
!          -------------------------------------------------------------

           if (cdeg .ne. 0 .and. maxval .gt. 0) then 
              if (symsrc) then 
!                prefer symmetric pivots, if numerically acceptable
                 row = col
                 rowpos = wir (row)
                 if (rowpos .le. 0) then 
!                   diagonal may be in original entries
                    do 210 i = cdeg1 + 1, cdeg1 + clen 
                       if (wm (i) .eq. row) then 
                          rowpos = i
!                         exit out of loop if symmetric pivot found:
                          go to 220
                       endif 
210                 continue 
!                   loop exit label:
220                 continue
                 endif 
                 if (rowpos .gt. 0) then 
!                   diagonal entry exists in the column pattern
                    apiv = abs (xx (wxp-1+rowpos))
                    if (apiv .ge. toler .and. apiv .gt. 0) then 
!                      diagonal entry is numerically acceptable
                       pr = rp (row)
                       rdeg = ii (pr+1)
                    endif 
                 endif 
              endif 
              if (rdeg .eq. n+1) then 
!                continue searching - no diagonal found or sought for.
!                minimize row degree subject to abs(value) constraints.
                 row = n+1
                 do 230 i = 1, cdeg 
                    row2 = wm (i)
                    pr = rp (row2)
                    rdeg2 = ii (pr+1)
!                   among those numerically acceptable rows of least
!                   (upper bound) degree, select the row with the
!                   lowest row index
                    better = rdeg2 .lt. rdeg .or.
     >                      (rdeg2 .eq. rdeg .and. row2 .lt. row)
                    if (better) then 
                       apiv = abs (xx (wxp-1+i))
                       if (apiv .ge. toler .and. apiv .gt. 0) then 
                          row = row2
                          rdeg = rdeg2
                          rowpos = i
                       endif 
                    endif 
230              continue 
              endif 
           endif 

!          -------------------------------------------------------------
!          deallocate workspace
!          -------------------------------------------------------------

           xhead = xhead - xs
           xuse = xuse - xs
           xneed = xneed - xs
!          done using xx (wxp...wxp+xs-1) ]

!          -------------------------------------------------------------
!          reset work vector
!          -------------------------------------------------------------

           do 240 i = 1, cdeg1 
              wir (wm (i)) = -1
240        continue 

!          -------------------------------------------------------------
!          check to see if a pivot column was found
!          -------------------------------------------------------------

           if (rdeg .eq. n+1) then 

!             ----------------------------------------------------------
!             no pivot found, column is zero
!             ----------------------------------------------------------

!             remove this singular column from any further pivot search
              cdeg = -(n+2)
              ii (pc+1) = cdeg

           else 

!             ----------------------------------------------------------
!             save a list of the columns searched (with nonzero degrees)
!             ----------------------------------------------------------

              srched = srched + 1
              ii (pc+7) = slist
              slist = col

!             ----------------------------------------------------------
!             check if this is the best pivot seen so far
!             ----------------------------------------------------------

!             compute the true markowitz cost without scanning the row
!             wm (1..cdeg) holds pivot column, including pivot row index
!             wm (rowpos) contains the candidate pivot row index
              cost = (cdeg - 1) * (rdeg - 1)
              if (pivcol .eq. 0 .or. cost .lt. bestco) then 
                 fflefc = cdeg
                 do 250 i = 1, fflefc-1 
                    wpc (i) = wm (i)
250              continue 
!                remove the pivot row index from pivot column pattern
                 wpc (rowpos) = wm (fflefc)
                 pivcol = col
                 pivrow = row
                 bestco = cost
              endif 
           endif 

!          done using wm (1..cdeg) for pattern ]
!          wpc (1..fflefc-1) holds pivot column (excl. pivot row index)

!          -------------------------------------------------------------
!          exit global pivot search if nsrch pivots have been searched
!          -------------------------------------------------------------

           if (srched .ge. nsrch) then 
              go to 260
           endif 

255     continue 
!       exit label for loop 255:
260     continue

!=======================================================================
!  quit early if no pivot found (singular matrix detected)
!=======================================================================

        if (pivcol .eq. 0) then 
!          complete the column permutation vector in
!          wpc (n-npiv+1 ... n) in reverse order
           k = n - npiv + 1
           do 270 col = 1, n 
              if (cp (col) .ne. 0) then 
!                this is a non-pivotal column
                 k = k - 1
                 wpc (k) = col
                 cp (col) = 0
              endif 
270        continue 
!          complete the row permutation vector in
!          wpr (n-npiv+1 ... n) in reverse order
           k = n - npiv + 1
           do 280 row = 1, ndn 
              if (row .gt. n) then 
!                this is an artificial frontal matrix
                 e = row
                 rp (e) = 0
              else if (rp (row) .ne. 0) then 
                 rlen = wc (row)
                 if (rlen .ge. 0 .and. rlen .le. n) then 
!                   this is a non-pivotal row
                    k = k - 1
                    wpr (k) = row
                    rp (row) = 0
                 else if (rlen .ne. -(ndn+2)) then 
!                   this is an unassembled element: convert to lu
                    e = row
                    ep = rp (row)
                    wr (e) = -(ndn+2)
                    wc (e) = -(ndn+2)
                    fluip = ii (ep)
                    rp (e) = fluip
                 endif 
              endif 
280        continue 
!          factorization is done, exit the main factorization loop:
           go to 2000
        endif 

!=======================================================================
!  place the non-pivotal columns searched back in degree lists
!=======================================================================

        do 300 i = 1, srched 
           col = slist
           pc = cp (col)
           slist = ii (pc+7)
           if (col .ne. pivcol) then 
              cdeg = ii (pc+1)
              cnxt = head (cdeg)
              ii (pc+7) = cnxt
              ii (pc+8) = 0
              if (cnxt .ne. 0) then 
                 ii (cp (cnxt)+8) = col
              endif 
              head (cdeg) = col
              mindeg = min (mindeg, cdeg)
           endif 
300     continue 

!=======================================================================
!  construct pivot row pattern
!=======================================================================

!       at this point, wir (1..n) = -1 and wic (1..n) is -2 for
!       nonpivotal columns and -1 for pivotal columns.
!       wic (wpr (1..fflefr+1)) is set to zero in the code below.  it
!       will be set to the proper offsets in do 775, once ffdimc is
!       known (offsets are dependent on ffdimc, which is dependent on
!       fflefr calculated below, and the memory allocation).

!       ----------------------------------------------------------------
!       assemble the elements in the element list
!       ----------------------------------------------------------------

        pr = rp (pivrow)
        fflefr = 0
        rep = (pr+2)
        reln = wr (pivrow)
        do 330 ip = rep, rep + 2*reln - 2, 2 
           e = ii (ip)
           ep = rp (e)
           if (e .le. n) then 
              fluip = ii (ep)
              lucp = (fluip + 7)
              ludegr = ii (fluip+2)
              ludegc = ii (fluip+3)
              lurp = lucp + ludegc
!             split into two loops so that they both vectorize on a cray
              f1 = fflefr
              do 310 p = lurp, lurp + ludegr - 1 
                 col = ii (p)
                 if (col .gt. 0) then 
                    if (wic (col) .eq. -2) then 
                       fflefr = fflefr + 1
                       wpr (fflefr) = col
                    endif 
                 endif 
310           continue 
              do 320 i = f1+1, fflefr 
                 wic (wpr (i)) = 0
320           continue 
           else 
!             this is an artifical element (a dense column)
              lurp = (ep+8)
              col = ii (lurp)
              if (wic (col) .eq. -2) then 
                 fflefr = fflefr + 1
                 wpr (fflefr) = col
                 wic (col) = 0
              endif 
           endif 
330     continue 

!       ----------------------------------------------------------------
!       assemble the original entries in the pivot row
!       ----------------------------------------------------------------

        rsiz = ii (pr)
        rlen = wc (pivrow)
        do 340 p = pr + rsiz - rlen, pr + rsiz - 1 
           col = ii (p)
           if (wic (col) .eq. -2) then 
              fflefr = fflefr + 1
              wpr (fflefr) = col
           endif 
340     continue 
!       the exact degree of the pivot row is fflefr

!=======================================================================
!  initialize the new frontal matrix
!=======================================================================

!       ffrow is the name of current frontal matrix
        ffrow = pivrow
        e1 = pivrow
        k = 1
        k0 = 0

        ffdimr = min (kleft, int (gro * fflefr))
        ffdimc = min (kleft, int (gro * fflefc))

        fmaxr = fflefr
        fmaxc = fflefc
        ffsize = ffdimc * ffdimr
        rscan = max (dsiz, ffdimr)

!       ----------------------------------------------------------------
!       compute the offsets for rows in the pivot column
!       and the offsets for columns in the pivot row
!       ----------------------------------------------------------------

        do 350 i = 1, fflefc - 1 
           wir (wpc (i)) = i - 1
350     continue 
        do 360 i = 1, fflefr 
           wic (wpr (i)) = (i - 1) * ffdimc
360     continue 

!       ----------------------------------------------------------------
!       remove the pivot column index from the pivot row pattern
!       ----------------------------------------------------------------

        col = wpr (fflefr)
        colpos = (wic (pivcol)/ffdimc)+1
        wpr (colpos) = col
        wic (col) = wic (pivcol)
        wic (pivcol) = (ffdimr - 1) * ffdimc
        wir (pivrow) = ffdimc - 1

!       ----------------------------------------------------------------
!       remove the pivot row/col from the nonzero count
!       ----------------------------------------------------------------

        fflefr = fflefr - 1
        fflefc = fflefc - 1

!       ----------------------------------------------------------------
!       allocate the working array, doing garbage collection if needed
!       also allocate space for a work vector of size ffdimc
!       ----------------------------------------------------------------

        if (ffsize + ffdimc .gt. xtail-xhead) then 
           info (15) = info (15) + 1
           call umd2fg (xx, xsize, xhead, xtail, xuse,
     >                  ii, isize, ihead, itail, iuse,
     >                  cp, rp, dn, n, icntl, wir, wic, wr, wc,
     >                  0, 0, 0, 0, .false.,
     >                  pfree, xfree, mhead, mtail, slots)
!          at this point, iuse = ineed and xuse = xneed
        endif 

        ffxp = xhead
        xhead = xhead + ffsize
        wxp = xhead
        xhead = xhead + ffdimc
        xuse = xuse + ffsize + ffdimc
        xneed = xneed + ffsize + ffdimc
        info (20) = max (info (20), xuse)
        info (21) = max (info (21), xneed)
        if (xhead .gt. xtail) then 
!          error return, if not enough real memory:
           go to 9000
        endif 

!       ----------------------------------------------------------------
!       get memory usage for next call to umd2rf
!       ----------------------------------------------------------------

        xruse = xruse + ffsize
        xrmax = max (xrmax, xruse)

!       ----------------------------------------------------------------
!       zero the working array
!       ----------------------------------------------------------------

!       zero the contribution block:
        do 380 j = 0, fflefr - 1 
           do 370 i = 0, fflefc - 1 
              xx (ffxp + j*ffdimc + i) = 0
370        continue 
380     continue 

!       zero the pivot row:
        do 390 j = 0, fflefr - 1 
           xx (ffxp + j*ffdimc + ffdimc-1) = 0
390     continue 

!       zero the pivot column:
        do 400 i = 0, fflefc - 1 
           xx (ffxp + (ffdimr-1)*ffdimc + i) = 0
400     continue 

!       zero the pivot entry itself:
        xx (ffxp + (ffdimr-1)*ffdimc + ffdimc-1) = 0

!       ----------------------------------------------------------------
!       current workspace usage:
!       ----------------------------------------------------------------

!       wpc (1..fflefc):        holds the pivot column pattern
!                               (excluding the pivot row index)
!       wpc (fflefc+1 .. n-npiv):       unused
!       wpc (n-npiv+1 .. n):            pivot columns in reverse order
!
!       wpr (1..fflefr):        holds the pivot row pattern
!                               (excluding the pivot column index)
!       wpr (fflefr+1 .. n-npiv):       unused
!       wpr (n-npiv+1 .. n):            pivot rows in reverse order
!
!       c (1..ffdimr, 1..ffdimc):  space for the new frontal matrix.
!
!       c (i,j) is located at xx (ffxp+((i)-1)+((j)-1)*ffdimc)
!
!       wir (row) >= 0 for each row in pivot column pattern.
!               offset into pattern is given by:
!               wir (row) == offset - 1
!               also, wir (pivrow) is ffdimc-1, the offset in c of
!               the pivot row itself.
!               otherwise, wir (1..n) is -1
!
!       wic (col) >= 0 for each col in pivot row pattern.
!               wic (col) == (offset - 1) * ffdimc
!               also, wic (pivcol) is (ffdimr-1)*ffdimc,
!               the offset in c of the pivot column itself.
!               otherwise, wic (1..n) is -2 for nonpivotal columns,
!               and -1 for pivotal columns

!       ----------------------------------------------------------------
!       remove the columns affected by this element from degree lists
!       ----------------------------------------------------------------

        do 410 j = 1, fflefr 
           pc = cp (wpr (j))
           cdeg = ii (pc+1)
           if (cdeg .gt. 0) then 
              cnxt = ii (pc+7)
              cprv = ii (pc+8)
              if (cnxt .ne. 0) then 
                 ii (cp (cnxt)+8) = cprv
              endif 
              if (cprv .ne. 0) then 
                 ii (cp (cprv)+7) = cnxt
              else 
                 head (cdeg) = cnxt
              endif 
           endif 
410     continue 

!=======================================================================
!  initialization of new frontal matrix is complete ]
!=======================================================================

!=======================================================================
!  assemble and factorize the current frontal matrix [
!=======================================================================

!       for first pivot in frontal matrix, do all scans
        scan1 = 0
        scan2 = 0
        scan3 = 0
        scan4 = 0

        do 1395 dummy3 = 1, n 
!       (this loop is not indented due to its length)

!=======================================================================
!  degree update and numerical assembly [
!=======================================================================

        kleft1 = kleft - 1

!       ----------------------------------------------------------------
!       scan1:  scan the element lists of each row in the pivot col
!               and compute the external column degree for each frontal
!       ----------------------------------------------------------------

        row = pivrow
        do 440 j = scan1, fflefc 
           if (j .ne. 0) then 
!             get a row;  otherwise, scan the pivot row if j is zero.
              row = wpc (j)
           endif 
           pr = rp (row)
           rep = (pr+2)
           reln = wr (row)
!fpp$ nodepchk l
           do 430 p = rep, rep + 2*reln - 2, 2 
              e = ii (p)
              if (wc (e) .lt. w0) then 
!                this is the first time seen in either scan 1 or 2:
                 ep = rp (e)
                 fleftr = ii (ep+5)
                 fleftc = ii (ep+6)
                 wr (e) = fleftr + w0
                 wc (e) = fleftc + w0
              endif 
              wc (e) = wc (e) - 1
430        continue 
440     continue 

!       ----------------------------------------------------------------
!       scan2:  scan the element lists of each col in the pivot row
!               and compute the external row degree for each frontal
!       ----------------------------------------------------------------

        col = pivcol
        do 460 j = scan2, fflefr 
           if (j .ne. 0) then 
!             get a col;  otherwise, scan the pivot col if j is zero.
              col = wpr (j)
           endif 
           pc = cp (col)
           celn = ii (pc+5)
           cep = (pc+9)
!fpp$ nodepchk l
           do 450 p = cep, cep + 2*celn - 2, 2 
              e = ii (p)
              if (wr (e) .lt. w0) then 
!                this is the first time seen in either scan 1 or 2:
                 ep = rp (e)
                 fleftr = ii (ep+5)
                 fleftc = ii (ep+6)
                 wr (e) = fleftr + w0
                 wc (e) = fleftc + w0
              endif 
              wr (e) = wr (e) - 1
450        continue 
460     continue 

!       ----------------------------------------------------------------
!       scan3:  scan the element lists of each column in pivot row
!               do degree update for the columns
!               assemble effective usons and lu-sons
!       ----------------------------------------------------------------

!       flag usons in wc (e) as scanned (all now unflagged) [
!       uses wc (e) for the link list.  wc (e) <= 0
!       means that e is in the list, the external column
!       degree is zero, and -(wc (e)) is the next element in
!       the uson list.

        col = pivcol
        do 700 jj = scan3, fflefr 

!          -------------------------------------------------------------
!          assemble and update the degree of a column
!          -------------------------------------------------------------

           if (jj .ne. 0) then 
!             get a col;  otherwise, scan the pivot col if jj is zero
              col = wpr (jj)
           endif 

!          -------------------------------------------------------------
!          compute the degree, and partition the element list into
!          two parts.  the first part are not lusons or usons, and
!          are not assembled.  the second part is assembled.
!          -------------------------------------------------------------

           cdeg = 0
           deln = 0
           pc = cp (col)
           cep = (pc+9)
           celn = ii (pc+5)
           ip2 = cep + 2*celn - 2
           xudp = ffxp + wic (col)
!fpp$ nodepchk l
           do 470 ip = cep, ip2, 2 
              e = ii (ip)
              if (wc (e) .gt. w0) then 
!                this element cannot be assembled
                    cdeg = cdeg + (wc (e) - w0)
              else 
!                delete this tuple from the element list
                 deln = deln + 1
                 wm (deln) = ip
              endif 
470       continue 

          if (deln .ne. 0) then 

!             ----------------------------------------------------------
!             move the deleted tuples to the end of the element list
!             ----------------------------------------------------------

              p2 = ip2
              do 480 i = deln, 1, -1 
                 e = ii (wm (i)  )
                 f = ii (wm (i)+1)
                 ii (wm (i)  ) = ii (p2  )
                 ii (wm (i)+1) = ii (p2+1)
                 ii (p2  ) = e
                 ii (p2+1) = f
                 p2 = p2 - 2
480           continue 

!             ----------------------------------------------------------
!             assemble from lusons and usons (the deleted tuples) 
!             ----------------------------------------------------------

              do 670 ip = p2 + 2, ip2, 2 

!                -------------------------------------------------------
!                this is an luson or uson.  if fextc < 0 then this has
!                already been assembled.
!                -------------------------------------------------------

                 e = ii (ip)
                 if (wc (e) .lt. w0) then 
!                   go to next iteration if already assembled
                    goto 670
                 endif 

!                -------------------------------------------------------
!                get scalar info, add son to list if not already there
!                -------------------------------------------------------

                 ep = rp (e)
                 fdimc = ii (ep+1)
                 fxp = ii (ep+2)
                 fleftr = ii (ep+5)
                 fleftc = ii (ep+6)
                 if (e .le. n) then 
                    fluip = ii (ep)
                    ludegr = ii (fluip+2)
                    ludegc = ii (fluip+3)
                    lucp = (fluip + 7)
                    lurp = lucp + ludegc
                    if (wir (e) .eq. -1) then 
                       wir (e) = sonlst - n - 2
                       sonlst = e
                       nsons = nsons + 1
                    endif 
                 else 
!                   an artificial frontal matrix
                    ludegr = 1
                    ludegc = fdimc
                    lucp = (ep+9)
                    lurp = (ep+8)
                 endif 

!                -------------------------------------------------------
                 if (wr (e) .eq. w0) then 
!                this is an luson - assemble an entire frontal matrix
!                -------------------------------------------------------

!                   ----------------------------------------------------
                    if (ludegc .eq. fleftc) then 
!                   no rows assembled out of this frontal yet
!                   ----------------------------------------------------

!                      compute the compressed column offset vector
!                      use wm (1..ludegc for offsets) [
                       do 490 i = 0, ludegc-1 
                          row2 = ii (lucp+i)
                          wm (i+1) = wir (row2)
490                    continue 

!                      -------------------------------------------------
                       if (ludegr .eq. fleftr) then 
!                      no rows or cols assembled out of frontal yet
!                      -------------------------------------------------

                          do 510 j = 0, ludegr-1 
                             col2 = ii (lurp+j)
                             xdp = ffxp + wic (col2)
!fpp$ nodepchk l
                             do 500 i = 0, ludegc-1 
                                xx (xdp + wm (i+1)) =
     >                          xx (xdp + wm (i+1)) +
     >                          xx (fxp + j*fdimc + i)
500                          continue 
510                       continue 

!                      -------------------------------------------------
                       else 
!                      only cols have been assembled out of frontal
!                      -------------------------------------------------

                          do 530 j = 0, ludegr-1 
                             col2 = ii (lurp+j)
                             if (col2 .gt. 0) then 
                                xdp = ffxp + wic (col2)
!fpp$ nodepchk l
                                do 520 i = 0, ludegc-1 
                                   xx (xdp + wm (i+1)) =
     >                             xx (xdp + wm (i+1)) +
     >                             xx (fxp + j*fdimc + i)
520                             continue 
                             endif 
530                       continue 
                       endif 
!                      done using wm (1..ludegc for offsets) ]

!                   ----------------------------------------------------
                    else 
!                   some rows have been assembled out of this frontal
!                   ----------------------------------------------------

!                      compute the compressed column offset vector
!                      use wm (1..ludegc for offsets) [
                       degc = 0
                       do 540 i = 0, ludegc-1 
                          row2 = ii (lucp+i)
                          if (row2 .gt. 0) then 
                             degc = degc + 1
                             wj (degc) = i
                             wm (degc) = wir (row2)
                          endif 
540                    continue 

!                      -------------------------------------------------
                       if (ludegr .eq. fleftr) then 
!                      only rows assembled out of this frontal
!                      -------------------------------------------------

                          do 560 j = 0, ludegr-1 
                             col2 = ii (lurp+j)
                             xdp = ffxp + wic (col2)
!fpp$ nodepchk l
                             do 550 i = 1, degc 
                                xx (xdp + wm (i)) =
     >                          xx (xdp + wm (i)) +
     >                          xx (fxp + j*fdimc + wj (i))
550                          continue 
560                       continue 

!                      -------------------------------------------------
                       else 
!                      both rows and columns assembled out of frontal
!                      -------------------------------------------------

                          do 580 j = 0, ludegr-1 
                             col2 = ii (lurp+j)
                             if (col2 .gt. 0) then 
                                xdp = ffxp + wic (col2)
!fpp$ nodepchk l
                                do 570 i = 1, degc 
                                   xx (xdp + wm (i)) =
     >                             xx (xdp + wm (i)) +
     >                             xx (fxp + j*fdimc + wj (i))
570                             continue 
                             endif 
580                       continue 
                       endif 
!                      done using wm (1..ludegc for offsets) ]
                    endif 

!                   ----------------------------------------------------
!                   deallocate the luson frontal matrix
!                   ----------------------------------------------------

                    wr (e) = -(ndn+2)
                    wc (e) = -(ndn+2)
                    if (e .le. n) then 
                       rp (e) = fluip
                       ii (ep) = fscal
                       ineed = ineed - fscal
                    else 
                       rp (e) = 0
                       ii (ep) = fdimc + cscal
                       ineed = ineed - (fdimc + cscal)
                    endif 
                    ii (ep+1) = -1
                    ii (ep+6) = 0
                    mprev = ii (ep+4)
                    mnext = ii (ep+3)
                    xneed = xneed - ludegr*ludegc
                    if (mnext .ne. 0 .and. ii (mnext+6) .eq. 0) then 
!                      next block is free - delete it
                       mnext = ii (mnext+3)
                       ii (ep+3) = mnext
                       if (mnext .ne. 0) then 
                          ii (mnext+4) = ep
                       else 
                          mtail = ep
                       endif 
                    endif 
                    if (mprev .ne. 0 .and. ii (mprev+6) .eq. 0) then 
!                      previous block is free - delete it
                       ii (ep+2) = ii (mprev+2)
                       mprev = ii (mprev+4)
                       ii (ep+4) = mprev
                       if (mprev .ne. 0) then 
                          ii (mprev+3) = ep
                       else 
                          mhead = ep
                       endif 
                    endif 
!                   get the size of the freed block
                    if (mnext .ne. 0) then 
                       xs = ii (mnext+2) - ii (ep+2)
                    else 
                       xs = ffxp - ii (ep+2)
                    endif 
                    if (xs .gt. xfree) then 
!                      keep track of the largest free block
                       xfree = xs
                       pfree = ep
                    endif 

!                   ----------------------------------------------------
!                   get memory usage for next call to umd2rf
!                   ----------------------------------------------------

                    xruse = xruse - ludegr*ludegc

!                -------------------------------------------------------
                 else if (wr (e) - w0 .le. fleftr/2) then 
!                this is a uson - assemble all possible columns
!                -------------------------------------------------------

!                   ----------------------------------------------------
!                   add to uson list - to be cleared just after scan 3
!                   ----------------------------------------------------

                    wc (e) = -usons
                    usons = e

!                   ----------------------------------------------------
                    if (ludegc .eq. fleftc) then 
!                   no rows assembled out of this uson frontal yet
!                   ----------------------------------------------------

!                      compute the compressed column offset vector
!                      use wm (1..ludegc for offsets)
                       do 590 i = 0, ludegc-1 
                          row2 = ii (lucp+i)
                          wm (i+1) = wir (row2)
590                    continue 

                       do 610 j = 0, ludegr-1 
                          col2 = ii (lurp+j)
                          if (col2 .gt. 0) then 
                             if (wic (col2) .ge. 0) then 
                                xdp = ffxp + wic (col2)
!fpp$ nodepchk l
                                do 600 i = 0, ludegc-1 
                                   xx (xdp + wm (i+1)) =
     >                             xx (xdp + wm (i+1)) +
     >                             xx (fxp + j*fdimc + i)
600                             continue 
!                               flag this column as assembled from uson
                                ii (lurp+j) = -col2
                             endif 
                          endif 
610                    continue 

!                   ----------------------------------------------------
                    else 
!                   some rows already assembled out of this uson frontal
!                   ----------------------------------------------------

!                      compute the compressed column offset vector
!                      use wm (1..ludegc for offsets)
                       degc = 0
                       do 620 i = 0, ludegc-1 
                          row2 = ii (lucp+i)
                          if (row2 .gt. 0) then 
                             degc = degc + 1
                             wj (degc) = i
                             wm (degc) = wir (row2)
                          endif 
620                    continue 

                       do 640 j = 0, ludegr-1 
                          col2 = ii (lurp+j)
                          if (col2 .gt. 0) then 
                             if (wic (col2) .ge. 0) then 
                                xdp = ffxp + wic (col2)
!fpp$ nodepchk l
                                do 630 i = 1, degc 
                                   xx (xdp + wm (i)) =
     >                             xx (xdp + wm (i)) +
     >                             xx (fxp + j*fdimc + wj (i))
630                             continue 
!                               flag this column as assembled from uson
                                ii (lurp+j) = -col2
                             endif 
                          endif 
640                    continue 

                    endif 

                    fleftr = wr (e) - w0
                    ii (ep+5) = fleftr

!                -------------------------------------------------------
                 else 
!                this is a uson - assemble just one column
!                -------------------------------------------------------

!                   get the offset, f, from the (e,f) tuple
                    f = ii (ip+1)

!                   ----------------------------------------------------
                    if (ludegc .eq. fleftc) then 
!                   no rows assembled out of this uson yet
!                   ----------------------------------------------------

!fpp$ nodepchk l
                       do 650 i = 0, ludegc-1 
                          row2 = ii (lucp+i)
                          xx (xudp + wir (row2)) =
     >                    xx (xudp + wir (row2)) + 
     >                    xx (fxp + f*fdimc + i)
650                    continue 

!                   ----------------------------------------------------
                    else 
!                   some rows already assembled out of this uson
!                   ----------------------------------------------------

!fpp$ nodepchk l
                       do 660 i = 0, ludegc-1 
                          row2 = ii (lucp+i)
                          if (row2 .gt. 0) then 
                             xx (xudp + wir (row2)) =
     >                       xx (xudp + wir (row2)) +
     >                       xx (fxp + f*fdimc + i)
                          endif 
660                    continue 
                    endif 

!                   ----------------------------------------------------
!                   decrement count of unassembled cols in frontal
!                   ----------------------------------------------------

                    ii (ep+5) = fleftr - 1
!                   flag the column as assembled from the uson
                    ii (lurp+f) = -col
                 endif 

670           continue 

!             ----------------------------------------------------------
!             update the count of (e,f) tuples in the element list
!             ----------------------------------------------------------

              ii (pc+5) = ii (pc+5) - deln
              ineed = ineed - 2*deln
           endif 

!          -------------------------------------------------------------
!          assemble the original column and update count of entries
!          -------------------------------------------------------------

           clen = ii (pc+6)
           if (clen .gt. 0) then 
              csiz = ii (pc)
              ip = pc + csiz - clen
              dlen = 0
!fpp$ nodepchk l
              do 680 i = 0, clen - 1 
                 row = ii (ip+i)
                 if (wir (row) .ge. 0) then 
!                   this entry can be assembled and deleted
                    dlen = dlen + 1
                    wm (dlen) = i
                 endif 
680           continue 
              if (dlen .ne. 0) then 
                 cxp = ii (pc+2)
                 do 690 j = 1, dlen 
                    i = wm (j)
                    row = ii (ip+i)
!                   assemble the entry
                    xx (xudp + wir (row)) =
     >              xx (xudp + wir (row)) + xx (cxp+i)
!                   and delete the entry
                    ii (ip +i) = ii (ip +j-1)
                    xx (cxp+i) = xx (cxp+j-1)
690              continue 
                 clen = clen - dlen
                 cxp = cxp + dlen
                 ineed = ineed - dlen
                 xneed = xneed - dlen
                 ii (pc+6) = clen
                 if (clen .ne. 0) then 
                    ii (pc+2) = cxp
                 else 
!                   deallocate the real portion of the column:
                    mprev = ii (pc+4)
                    mnext = ii (pc+3)
                    if (mnext .ne. 0 .and. ii (mnext+6) .eq. 0) then
!                      next block is free - delete it
                       mnext = ii (mnext+3)
                       ii (pc+3) = mnext
                       if (mnext .ne. 0) then 
                          ii (mnext+4) = pc
                       else 
                          mtail = pc
                       endif 
                    endif 
                    if (mprev .ne. 0 .and. ii (mprev+6) .eq. 0) then
!                      previous block is free - delete it
                       ii (pc+2) = ii (mprev+2)
                       mprev = ii (mprev+4)
                       ii (pc+4) = mprev
                       if (mprev .ne. 0) then 
                          ii (mprev+3) = pc
                       else 
                          mhead = pc
                       endif 
                    endif 
                    if (pc .eq. mhead) then 
!                      adjust the start of the block if this is head
                       ii (pc+2) = 1
                    endif 
!                   get the size of the freed block
                    if (mnext .ne. 0) then 
                       xs = ii (mnext+2) - ii (pc+2)
                    else 
                       xs = ffxp - ii (pc+2)
                    endif 
                    if (xs .gt. xfree) then 
!                      keep track of the largest free block
                       xfree = xs
                       pfree = pc
                    endif 
                 endif 
              endif 
              cdeg = cdeg + clen
           endif 

!          -------------------------------------------------------------
!          compute the upper bound degree - excluding current front
!          -------------------------------------------------------------

           cdeg2 = ii (pc+1)
           cdeg = min (kleft1 - fflefc, cdeg2, cdeg)
           ii (pc+1) = cdeg

700     continue 

!       ----------------------------------------------------------------
!       scan-3 wrap-up:  remove flags from assembled usons
!       ----------------------------------------------------------------

!       while (usons .ne. ndn+1) do
710     continue
        if (usons .ne. ndn+1) then 
           next = -wc (usons)
           wc (usons) = w0
           usons = next
!       end while:
        goto 710
        endif 
!       done un-flagging usons, all now unflagged in wc (e) ]

!       ----------------------------------------------------------------
!       scan4:  scan element lists of each row in the pivot column
!               do degree update for the rows
!               assemble effective lsons
!       ----------------------------------------------------------------

!       flag lsons in wr (e) (all are now unflagged) [
!       uses wr (e) for the link list.  wr (e) <= 0 means
!       that e is in the list, the external row degree is zero, and
!       -(wr (e)) is the next element in the lson list.

        row = pivrow
        do 840 jj = scan4, fflefc 

!          -------------------------------------------------------------
!          assemble and update the degree of a row
!          -------------------------------------------------------------

           if (jj .ne. 0) then 
!             get a row;  otherwise, scan the pivot row if jj is zero
              row = wpc (jj)
           endif 

!          -------------------------------------------------------------
!          compute the degree, and partition the element list into
!          two parts.  the first part are not lusons or lsons, and
!          are not assembled.  the second part is assembled.
!          -------------------------------------------------------------

           rdeg = 0
           deln = 0
           pr = rp (row)
           rep = (pr+2)
           reln = wr (row)
           ip2 = rep + 2*reln - 2
!fpp$ nodepchk l
           do 720 ip = rep, ip2, 2 
              e = ii (ip)
              if (wr (e) .gt. w0) then 
                 rdeg = rdeg + (wr (e) - w0)
              else 
                 deln = deln + 1
                 wm (deln) = ip
              endif 
720        continue 
                    
           if (deln .ne. 0) then 

!             ----------------------------------------------------------
!             move the deleted tuples to the end of the element list
!             ----------------------------------------------------------

              p2 = ip2
              do 730 i = deln, 1, -1 
                 e = ii (wm (i)  )
                 f = ii (wm (i)+1)
                 ii (wm (i)  ) = ii (p2  )
                 ii (wm (i)+1) = ii (p2+1)
                 ii (p2  ) = e
                 ii (p2+1) = f
                 p2 = p2 - 2
730           continue 

!             ----------------------------------------------------------
!             assemble from lsons (the deleted tuples) 
!             ----------------------------------------------------------

              do 810 ip = p2 + 2, ip2, 2 

!                -------------------------------------------------------
!                this is an luson or lson.  if fextr < 0 then this has
!                already been assembled.  all lusons have already been
!                assembled (in scan3, above).
!                -------------------------------------------------------

                 e = ii (ip)
                 if (wr (e) .lt. w0) then 
!                   go to next iteration if already assembled
                    goto 810
                 endif 

!                -------------------------------------------------------
!                get scalar info, add to son list if not already there
!                -------------------------------------------------------

                 ep = rp (e)
                 fdimc = ii (ep+1)
                 fxp = ii (ep+2)
                 fleftr = ii (ep+5)
                 fleftc = ii (ep+6)
                 if (e .le. n) then 
                    fluip = ii (ep)
                    ludegr = ii (fluip+2)
                    ludegc = ii (fluip+3)
                    lucp = (fluip + 7)
                    lurp = lucp + ludegc
                    if (wir (e) .eq. -1) then 
                       wir (e) = sonlst - n - 2
                       sonlst = e
                       nsons = nsons + 1
                    endif 
                 else 
!                   an artificial frontal matrix
                    ludegr = 1
                    ludegc = fdimc
                    lucp = (ep+9)
                    lurp = (ep+8)
                 endif 

!                -------------------------------------------------------
                 if (wc (e) - w0 .le. fleftc/2) then 
!                this is an lson - assemble all possible rows
!                -------------------------------------------------------

!                   ----------------------------------------------------
!                   add to lson list - to be cleared just after scan 4
!                   ----------------------------------------------------

                    wr (e) = -lsons
                    lsons = e

!                   compute the compressed column offset vector
!                   use wm (1..ludegc for offsets) [
                    degc = 0
                    do 740 i = 0, ludegc-1 
                       row2 = ii (lucp+i)
                       if (row2 .gt. 0) then 
                          if (wir (row2) .ge. 0) then 
!                            this row will be assembled in loop below
                             degc = degc + 1
                             wj (degc) = i
                             wm (degc) = wir (row2)
!                            flag the row as assembled from the lson
                             ii (lucp+i) = -row2
                          endif 
                       endif 
740                 continue 

!                   ----------------------------------------------------
                    if (ludegr .eq. fleftr) then 
!                   no columns assembled out this lson yet
!                   ----------------------------------------------------

                       do 760 j = 0, ludegr-1 
                          col2 = ii (lurp+j)
                          xdp = ffxp + wic (col2)
!fpp$ nodepchk l
                          do 750 i = 1, degc 
                             xx (xdp + wm (i)) =
     >                       xx (xdp + wm (i)) +
     >                       xx (fxp + j*fdimc + wj (i))
750                       continue 
760                    continue 

!                   ----------------------------------------------------
                    else 
!                   some columns already assembled out of this lson
!                   ----------------------------------------------------

                       do 780 j = 0, ludegr-1 
                          col2 = ii (lurp+j)
                          if (col2 .gt. 0) then 
                             xdp = ffxp + wic (col2)
!fpp$ nodepchk l
                             do 770 i = 1, degc 
                                xx (xdp + wm (i)) =
     >                          xx (xdp + wm (i)) +
     >                          xx (fxp + j*fdimc + wj (i))
770                          continue 
                          endif 
780                    continue 
                    endif 

!                   done using wm (1..ludegc for offsets) ]
                    fleftc = wc (e) - w0
                    ii (ep+6) = fleftc

!                -------------------------------------------------------
                 else 
!                this is an lson - assemble just one row
!                -------------------------------------------------------

                    xldp = ffxp + wir (row)
!                   get the offset, f, from the (e,f) tuple
                    f = ii (ip+1)

!                   ----------------------------------------------------
                    if (ludegr .eq. fleftr) then 
!                   no columns assembled out this lson yet
!                   ----------------------------------------------------

!fpp$ nodepchk l
                       do 790 j = 0, ludegr-1 
                          col2 = ii (lurp+j)
                          xx (xldp + wic (col2)) =
     >                    xx (xldp + wic (col2)) +
     >                    xx (fxp + j*fdimc + f)
790                    continue 

!                   ----------------------------------------------------
                    else 
!                   some columns already assembled out of this lson
!                   ----------------------------------------------------

!fpp$ nodepchk l
                       do 800 j = 0, ludegr-1 
                          col2 = ii (lurp+j)
                          if (col2 .gt. 0) then 
                             xx (xldp + wic (col2)) =
     >                       xx (xldp + wic (col2)) +
     >                       xx (fxp + j*fdimc + f)
                          endif 
800                    continue 
                    endif 

                    ii (ep+6) = fleftc - 1
!                   flag the row as assembled from the lson
                    ii (lucp+f) = -row
                 endif 

810           continue 

!             ----------------------------------------------------------
!             update the count of (e,f) tuples in the element list
!             ----------------------------------------------------------

              wr (row) = wr (row) - deln
              ineed = ineed - 2*deln
           endif 

!          -------------------------------------------------------------
!          assemble the original row and update count of entries
!          -------------------------------------------------------------

           rlen = wc (row)
           if (rlen .gt. 0) then 
!             do not scan a very long row:
              if (rlen .le. rscan) then 
                 rsiz = ii (pr)
                 ip = pr + rsiz - rlen
                 dlen = 0
!fpp$ nodepchk l
                 do 820 p = ip, ip + rlen - 1 
                    col = ii (p)
                    if (wic (col) .ne. -2) then 
!                      this entry can be assembled and deleted
!                      if wic (col) = -1, it is an older pivot col,
!                      otherwise (>=0) it is in the current element
                       dlen = dlen + 1
                       wm (dlen) = p
                    endif 
820              continue 
                 if (dlen .ne. 0) then 
                    do 830 j = 1, dlen 
!                      delete the entry
                       ii (wm (j)) = ii (ip+j-1)
830                 continue 
                    rlen = rlen - dlen
                    ineed = ineed - dlen
                    wc (row) = rlen
                 endif 
              endif 
              rdeg = rdeg + rlen
           endif 

!          -------------------------------------------------------------
!          compute the upper bound degree - excluding current front
!          -------------------------------------------------------------

           rdeg2 = ii (pr+1)
           rdeg = min (kleft1 - fflefr, rdeg2, rdeg)
           ii (pr+1) = rdeg

840     continue 

!       ----------------------------------------------------------------
!       scan-4 wrap-up:  remove flags from assembled lsons
!       ----------------------------------------------------------------

!       while (lsons .ne. ndn+1) do
850     continue
        if (lsons .ne. ndn+1) then 
           next = -wr (lsons)
           wr (lsons) = w0
           lsons = next
!       end while:
        goto 850
        endif 
!       done un-flagging lsons, all now unflagged in wr (e) ]

!=======================================================================
!  degree update and numerical assemble is complete ]
!=======================================================================

!=======================================================================
!  factorize frontal matrix until next pivot extends it [
!=======================================================================

        do 1324 dummy4 = 1, n 
!       (this loop is not indented due to its length)

!       ----------------------------------------------------------------
!       wc (e) = fextc+w0, where fextc is the external column
!               degree for each element (ep = rp (e)) appearing in
!               the element lists for each row in the pivot column.
!               if wc (e) < w0, then fextc is defined as ii (ep+6)
!
!       wr (e) = fextr+w0, where fextr is the external row
!               degree for each element (ep = rp (e)) appearing in
!               the element lists for each column in the pivot row
!               if wr (e) < w0, then fextr is defined as ii (ep+5)
!
!       wir (row) >= 0 for each row in pivot column pattern.
!               offset into pattern is given by:
!               wir (row) == offset - 1
!               wir (pivrow) is the offset of the latest pivot row
!
!       wic (col) >= 0 for each col in pivot row pattern.
!               wic (col) == (offset - 1) * ffdimc
!               wic (pivcol) is the offset of the latest pivot column
!
!       wpr (1..fflefr) is the pivot row pattern (excl pivot cols)
!       wpc (1..fflefc) is the pivot col pattern (excl pivot rows)
!       ----------------------------------------------------------------

!=======================================================================
!  divide pivot column by pivot
!=======================================================================

!       k-th pivot in frontal matrix located in c(ffdimc-k+1,ffdimr-k+1)
        xdp = ffxp + (ffdimr - k) * ffdimc
        temp = xx (xdp + ffdimc - k)

!       divide c(1:fflefc,ffdimr-k+1) by pivot value
        temp = 1 / temp
        do 870 p = xdp, xdp + fflefc-1 
           xx (p) = xx (p) * temp
870     continue 
!       count this as a call to the level-1 blas:
        rinfo (4) = rinfo (4) + (fflefc)

!=======================================================================
!  a pivot step is complete
!=======================================================================

        kleft = kleft - 1
        npiv = npiv + 1
        info (17) = info (17) + 1

!       ----------------------------------------------------------------
!       the pivot column is fully assembled and scaled, and is now the
!       (npiv)-th column of l. the pivot row is the (npiv)-th row of u.
!       ----------------------------------------------------------------

        wpr (n-npiv+1) = pivrow
        wpc (n-npiv+1) = pivcol
        wir (pivrow) = -1
        wic (pivcol) = -1

!       ----------------------------------------------------------------
!       deallocate the pivot row and pivot column
!       ----------------------------------------------------------------

        rlen = wc (pivrow)
        ineed = ineed - cscal - rscal - rlen
        pr = rp (pivrow)
        pc = cp (pivcol)
        ii (pr+1) = -1
        ii (pc+1) = -1
        rp (pivrow) = 0
        cp (pivcol) = 0

!=======================================================================
!  local search for next pivot within current frontal matrix [
!=======================================================================

        fedegc = fflefc
        fedegr = fflefr
        pfound = .false.
        okcol = fflefc .gt. 0
        okrow = .false.

!       ----------------------------------------------------------------
!       find column of minimum degree in current frontal row pattern
!       ----------------------------------------------------------------

!       among those columns of least (upper bound) degree, select the
!       column with the lowest column index
        if (okcol) then 
           colpos = 0
           pivcol = n+1
           cdeg = n+1
!          can this be vectorized?  this is the most intensive
!          non-vector loop.
           do 880 j = 1, fflefr 
              col = wpr (j)
              pc = cp (col)
              cdeg2 = ii (pc+1)
              better = cdeg2 .ge. 0 .and.
     >                (cdeg2 .lt. cdeg .or.
     >                (cdeg2 .eq. cdeg .and. col .lt. pivcol))
              if (better) then 
                 cdeg = cdeg2
                 colpos = j
                 pivcol = col
              endif 
880        continue 
           okcol = colpos .ne. 0
        endif 

!=======================================================================
!  assemble candidate pivot column in temporary workspace
!=======================================================================

        if (okcol) then 
           pc = cp (pivcol)
           clen = ii (pc+6)
           okcol = fedegc + clen .le. ffdimc
        endif 

        if (okcol) then 

!          -------------------------------------------------------------
!          copy candidate column from current frontal matrix into
!          work vector xx (wxp ... wxp+ffdimc-1) [
!          -------------------------------------------------------------

           p = ffxp + (colpos - 1) * ffdimc - 1
!fpp$ nodepchk l
           do 890 i = 1, fflefc 
              xx (wxp-1+i) = xx (p+i)
890        continue 

!          -------------------------------------------------------------
!          update candidate column with previous pivots in this front
!          -------------------------------------------------------------

           if (k-k0 .gt. 0 .and. fflefc .ne. 0) then 
              call dgemv ('n', fflefc, k-k0,
     >          -one, xx (ffxp + (ffdimr - k) * ffdimc)        ,ffdimc,
     >                xx (ffxp + (colpos - 1) * ffdimc + ffdimc - k), 1,
     >           one, xx (wxp)                                      , 1)
              tmp = fflefc
              tmp = tmp * (k-k0)
              rinfo (3) = rinfo (3) + 2.0*(tmp)
           endif 

!          -------------------------------------------------------------
!          compute extended pivot column in xx (wxp..wxp-1+fedegc).
!          pattern of pivot column is placed in wpc (1..fedegc)
!          -------------------------------------------------------------

!          assemble the elements in the element list
           cep = (pc+9)
           celn = ii (pc+5)
           do 930 ip = cep, cep + 2*celn - 2, 2 
              e = ii (ip)
              f = ii (ip+1)
              ep = rp (e)
              fleftc = ii (ep+6)
              fdimc = ii (ep+1)
              fxp = ii (ep+2)
              if (e .le. n) then 
                 fluip = ii (ep)
                 lucp = (fluip + 7)
                 ludegc = ii (fluip+3)
              else 
                 lucp = (ep+9)
                 ludegc = fdimc
              endif 
              xp = fxp + f * fdimc
!             split into 3 loops so that they all vectorize on a cray
              f1 = fedegc
              do 900 p = lucp, lucp + ludegc - 1 
                 row = ii (p)
                 if (row .gt. 0) then 
                    if (wir (row) .lt. 0) then 
                       f1 = f1 + 1
                       wpc (f1) = row
                    endif 
                 endif 
900           continue 
              okcol = f1 + clen .le. ffdimc
              if (.not. okcol) then 
!                exit out of loop if column too long:
                 go to 940
              endif 
              do 910 i = fedegc+1, f1 
                 row = wpc (i)
                 wir (row) = i - 1
                 xx (wxp-1+i) = 0
910           continue 
              fedegc = f1
!fpp$ nodepchk l
              do 920 j = 0, ludegc - 1 
                 row = ii (lucp+j)
                 if (row .gt. 0) then 
                    xx (wxp + wir (row)) = 
     >              xx (wxp + wir (row)) + xx (xp+j)
                 endif 
920           continue 
930        continue 
!          loop exit label:
940        continue
        endif 

!=======================================================================
!  find candidate pivot row - unless candidate pivot column is too long
!=======================================================================

        if (okcol) then 

!          -------------------------------------------------------------
!          assemble the original entries in the column
!          -------------------------------------------------------------

           csiz = ii (pc)
           ip = pc + csiz - clen
           cxp = ii (pc+2)
!fpp$ nodepchk l
           do 950 i = 0, clen - 1 
              row = ii (ip+i)
              wir (row) = fedegc + i
              wpc (fedegc+1+i) = row
              xx  (wxp+fedegc+i) = xx (cxp+i)
950        continue 
           fedegc = fedegc + clen

!          -------------------------------------------------------------
!          update degree of candidate column - excluding current front
!          -------------------------------------------------------------

           cdeg = fedegc - fflefc
           ii (pc+1) = cdeg

!          -------------------------------------------------------------
!          find the maximum absolute value in the column
!          -------------------------------------------------------------

           maxval = abs (xx (wxp-1 + idamax (fedegc, xx (wxp), 1)))
           rinfo (3) = rinfo (3) + (fedegc)
           toler = relpt * maxval
           rdeg = n+1

!          -------------------------------------------------------------
!          look for the best possible pivot row in this column
!          -------------------------------------------------------------

           if (maxval .gt. 0) then 
              if (symsrc) then 
!                prefer symmetric pivots, if numerically acceptable
                 pivrow = pivcol
                 rowpos = wir (pivrow) + 1
                 if (rowpos .gt. 0 .and. rowpos .le. fflefc) then 
!                   diagonal entry exists in the column pattern
!                   also within the current frontal matrix
                    apiv = abs (xx (wxp-1+rowpos))
                    if (apiv .ge. toler .and. apiv .gt. 0) then 
!                      diagonal entry is numerically acceptable
                       pr = rp (pivrow)
                       rdeg = ii (pr+1)
                    endif 
                 endif 
              endif 
              if (rdeg .eq. n+1) then 
!                continue searching - no diagonal found or sought for.
!                minimize row degree subject to abs(value) constraints.
                 pivrow = n+1
                 do 960 i = 1, fflefc 
                    row2 = wpc (i)
                    pr = rp (row2)
                    rdeg2 = ii (pr+1)
!                   among those numerically acceptable rows of least
!                   (upper bound) degree, select the row with the
!                   lowest row index
                    better = rdeg2 .lt. rdeg .or.
     >                      (rdeg2 .eq. rdeg .and. row2 .lt. pivrow)
                    if (better) then 
                       apiv = abs (xx (wxp-1+i))
                       if (apiv .ge. toler .and. apiv .gt. 0) then 
                          pivrow = row2
                          rdeg = rdeg2
                          rowpos = i
                       endif 
                    endif 
960              continue 
              endif 
           else 
!             remove this column from any further pivot search
              cdeg = -(n+2)
              ii (pc+1) = cdeg
           endif 
           okrow = rdeg .ne. n+1
        endif 

!       done using xx (wxp...wxp+ffdimc-1) ]

!=======================================================================
!  if found, construct candidate pivot row pattern
!=======================================================================

        if (okrow) then 

!          -------------------------------------------------------------
!          assemble the elements in the element list
!          -------------------------------------------------------------

           pr = rp (pivrow)
           rep = (pr+2)
           reln = wr (pivrow)
           do 990 ip = rep, rep + 2*reln - 2, 2 
              e = ii (ip)
              ep = rp (e)
              if (e .le. n) then 
                 fluip = ii (ep)
                 lucp = (fluip + 7)
                 ludegr = ii (fluip+2)
                 ludegc = ii (fluip+3)
                 lurp = lucp + ludegc
                 fleftr = ii (ep+5)
                 okrow = fleftr .le. ffdimr
                 if (.not. okrow) then 
!                   exit out of loop if row too long:
                    go to 1000
                 endif 
!                split into two loops so that both vectorize on a cray
                 f1 = fedegr
                 do 970 p = lurp, lurp + ludegr - 1 
                    col = ii (p)
                    if (col .gt. 0) then 
                       if (wic (col) .eq. -2) then 
                          f1 = f1 + 1
                          wpr (f1) = col
                       endif 
                    endif 
970              continue 
                 okrow = f1 .le. ffdimr
                 if (.not. okrow) then 
!                   exit out of loop if row too long:
                    go to 1000
                 endif 
                 do 980 i = fedegr+1, f1 
                    wic (wpr (i)) = (i - 1) * ffdimc
980              continue 
                 fedegr = f1
              else 
!                this is an artificial element (a dense column)
                 lurp = (ep+8)
                 col = ii (lurp)
                 if (wic (col) .eq. -2) then 
                    wic (col) = fedegr * ffdimc
                    fedegr = fedegr + 1
                    wpr (fedegr) = col
                    okrow = fedegr .le. ffdimr
                    if (.not. okrow) then 
!                      exit out of loop if row too long:
                       go to 1000
                    endif 
                 endif 
              endif 
990        continue 
!          loop exit label:
1000       continue
        endif 

        if (okrow) then 

!          -------------------------------------------------------------
!          assemble the original entries in the row
!          -------------------------------------------------------------

           rlen = wc (pivrow)
           if (rlen .gt. 0) then 
              f1 = fedegr
              rsiz = ii (pr)
              p2 = pr + rsiz
!             split into two loops so that they both vectorize on a cray
              do 1010 p = p2 - rlen, p2 - 1 
                 col = ii (p)
                 if (wic (col) .eq. -2) then 
!                   this entry cannot be assembled, do not delete
                    f1 = f1 + 1
                    wpr (f1) = col
                 endif 
1010          continue 
              rlen2 = f1 - fedegr
              if (rlen2 .lt. rlen) then 
!                delete one or more entries in the row
                 do 1020 i = fedegr+1, f1 
                    ii (p2 - f1 + i - 1) = wpr (i)
1020             continue 
                 ineed = ineed - (rlen - rlen2)
                 wc (pivrow) = rlen2
              endif 

!             ----------------------------------------------------------
!             update the candidate row degree - excluding current front
!             ----------------------------------------------------------

              rdeg = f1 - fflefr
              ii (pr+1) = rdeg

!             ----------------------------------------------------------
!             pivot is found if candidate pivot row is not too long
!             ----------------------------------------------------------

              okrow = f1 .le. ffdimr
              if (okrow) then 
                 do 1030 i = fedegr+1, f1 
                    wic (wpr (i)) = (i - 1) * ffdimc
1030             continue 
                 fedegr = f1
              endif 

           else 

!             ----------------------------------------------------------
!             update the candidate row degree - excluding current front
!             ----------------------------------------------------------

              rdeg = fedegr - fflefr
              ii (pr+1) = rdeg
           endif 
        endif 

!       ----------------------------------------------------------------
!       if pivot not found: clear wir and wic
!       ----------------------------------------------------------------

        pfound = okrow .and. okcol
        if (.not. pfound) then 
           movelu = k .gt. 0
           do 1040 i = fflefr+1, fedegr 
              wic (wpr (i)) = -2
1040       continue 
           fedegr = fflefr
           do 1050 i = fflefc+1, fedegc 
              wir (wpc (i)) = -1
1050       continue 
           fedegc = fflefc
        else 
           movelu = fedegc .gt. ffdimc - k .or. fedegr .gt. ffdimr - k
        endif 

!       ----------------------------------------------------------------
!       wpr (1..fflefr)                 unextended pivot row pattern
!       wpr (fflefr+1 .. fedegr)        extended pattern, if pfound
!       wpr (fedegr+1 .. n-npiv)        empty space
!       wpr (n-npiv+1 .. n)             pivot row order
!
!       wpc (1..fflefc)                 unextended pivot column pattern
!       wpc (fflefc+1 .. fedegc)        extended pattern, if pfound
!       wpc (fedegc+1 .. n-npiv)        empty space
!       wpc (n-npiv+1 .. n)             pivot column order
!       ----------------------------------------------------------------

!=======================================================================
!  local pivot search complete ]
!=======================================================================

!=======================================================================
!  update contribution block: rank-nb, or if lu arrowhead to be moved
!=======================================================================

        if (k-k0 .ge. nb .or. movelu) then 
           call dgemm ('n', 'n', fflefc, fflefr, k-k0,
     >          -one, xx (ffxp + (ffdimr - k) * ffdimc), ffdimc,
     >                xx (ffxp +  ffdimc - k)          , ffdimc,
     >           one, xx (ffxp)                        , ffdimc)
           tmp = fflefc
           tmp = tmp * fflefr
           tmp = tmp * (k-k0)
           rinfo (6) = rinfo (6) + 2.0*(tmp)
           k0 = k
        endif 

!=======================================================================
!  move the lu arrowhead if no pivot found, or pivot needs room
!=======================================================================

        if (movelu) then 

!          allocate permanent space for the lu arrowhead
           ludegr = fflefr
           ludegc = fflefc
           xs = k*ludegc + k*ludegr + k*k
           is = 7 + ludegc + ludegr + nsons
           if (is .gt. itail-ihead .or. xs .gt. xtail-xhead) then 
              if (is .gt. itail-ihead) then 
!                garbage collection because we ran out of integer mem
                 info (14) = info (14) + 1
              endif 
              if (xs .gt. xtail-xhead) then 
!                garbage collection because we ran out of real mem
                 info (15) = info (15) + 1
              endif 
              call umd2fg (xx, xsize, xhead, xtail, xuse,
     >                     ii, isize, ihead, itail, iuse,
     >                     cp, rp, dn, n, icntl, wir, wic, wr, wc,
     >                     ffxp, ffsize, wxp, ffdimc, .false.,
     >                     pfree, xfree, mhead, mtail, slots)
!             at this point, iuse = ineed and xuse = xneed
           endif 

           itail = itail - is
           luip = itail
           iuse = iuse + is
           ineed = ineed + is
           xtail = xtail - xs
           luxp = xtail
           xuse = xuse + xs
           xneed = xneed + xs
           info (18) = max (info (18), iuse)
           info (19) = max (info (19), ineed)
           info (20) = max (info (20), xuse)
           info (21) = max (info (21), xneed)
           if (ihead .gt. itail .or. xhead .gt. xtail) then 
!             error return, if not enough integer and/or real memory:
              go to 9000
           endif 

!          -------------------------------------------------------------
!          get memory usage for next call to umd2rf
!          -------------------------------------------------------------

           xruse = xruse + xs
           xrmax = max (xrmax, xruse)

!          -------------------------------------------------------------
!          save the new lu arrowhead
!          -------------------------------------------------------------

!          save the scalar data of the lu arrowhead
           ii (luip) = luxp
           ii (luip+1) = k
           ii (luip+2) = ludegr
           ii (luip+3) = ludegc
           ii (luip+4) = nsons
           ii (luip+5) = 0
           ii (luip+6) = 0
           e = ffrow
           if (e .eq. e1) then 
!             this is the first lu arrowhead from this global pivot
              luip1 = luip
           endif 
           wr (e) = -(ndn+2)
           wc (e) = -(ndn+2)

!          save column pattern
           lucp = (luip + 7)
           do 1060 i = 0, ludegc-1 
              ii (lucp+i) = wpc (i+1)
1060       continue 

!          save row pattern
           lurp = lucp + ludegc
           do 1070 i = 0, ludegr-1 
              ii (lurp+i) = wpr (i+1)
1070       continue 

!          add list of sons after the end of the frontal matrix pattern
!          this list of sons is for the refactorization (umd2rf) only.
           lusonp = lurp + ludegr
           ip = lusonp
           e = sonlst
!          while (e > 0) do
1080       continue
           if (e .gt. 0) then 
              ep = rp (e)
              if (wc (e) .eq. -(ndn+2)) then 
!                luson
                 ii (ip) = e
              else if (wc (e) .eq. w0) then 
!                uson
                 ii (ip) = e + n
              else if (wr (e) .eq. w0) then 
!                lson
                 ii (ip) = e + 2*n
              endif 
              next = wir (e) + n + 2
              wir (e) = -1
              e = next
              ip = ip + 1
!          end while:
           goto 1080
           endif 
           nsons = 0
           sonlst = 0

!          move the l1,u1 matrix, compressing the dimension from
!          ffdimc to ldimc.  the lu arrowhead grows on top of stack.
           ldimc = k + ludegc
           xp = ffxp + (ffdimr-1)*ffdimc + ffdimc-1
           do 1100 j = 0, k-1 
!fpp$ nodepchk l
              do 1090 i = 0, k-1 
                 xx (luxp + j*ldimc + i) = xx (xp - j*ffdimc - i)
1090          continue 
1100       continue 

!          move l2 matrix, compressing dimension from ffdimc to ludegc+k
           if (ludegc .ne. 0) then 
              lxp = luxp + k
              xp = ffxp + (ffdimr-1)*ffdimc
              do 1120 j = 0, k-1 
!fpp$ nodepchk l
                 do 1110 i = 0, ludegc-1 
                    xx (lxp + j*ldimc + i) = xx (xp - j*ffdimc + i)
1110             continue 
1120          continue 
           endif 

!          move the u2 block.
           if (ludegr .ne. 0) then 
              uxp = luxp + k * ldimc
              xp = ffxp + ffdimc-1
              do 1140 j = 0, ludegr-1 
!fpp$ nodepchk l
                 do 1130 i = 0, k-1 
                    xx (uxp + j*k + i) = xx (xp + j*ffdimc - i)
1130             continue 
1140          continue 
           endif 

!          one more lu arrowhead has been created
           nlu = nlu + 1
           nzu = (k*(k-1)/2) + k*ludegc
           nzl = (k*(k-1)/2) + k*ludegr
           info (10) = info (10) + nzl
           info (11) = info (11) + nzu

!          no more rows of u or columns of l in current frontal array
           k = 0
           k0 = 0

           if (pfound) then 

!             ----------------------------------------------------------
!             place the old frontal matrix as the only item in the son
!             list, since the next "implied" frontal matrix will have
!             this as its son.
!             ----------------------------------------------------------

              nsons = 1
              e = ffrow
              wir (e) = - n - 2
              sonlst = e

!             ----------------------------------------------------------
!             the contribution block of the old frontal matrix is still
!             stored in the current frontal matrix, and continues (in a
!             unifrontal sense) as a "new" frontal matrix (same array
!             but with a new name, and the lu arrowhead is removed and
!             placed in the lu factors).  old name is "ffrow", new name
!             is "pivrow".
!             ----------------------------------------------------------

              rp (e) = luip
              ffrow = pivrow
           endif 
        endif 

!=======================================================================
!  stop the factorization of this frontal matrix if no pivot found
!=======================================================================

!       (this is the only way out of loop 1395)
        if (.not. pfound) then 
!          exit out of loop 1395 if pivot not found:
           go to 1400
        endif 

!=======================================================================
!  update the pivot column, and move into position as (k+1)-st col of l
!=======================================================================

        xsp = (colpos - 1) * ffdimc
        xdp = (ffdimr - k - 1) * ffdimc
        fsp = ffxp + xsp
        fdp = ffxp + xdp

        if (k-k0 .gt. 0 .and. fflefc .ne. 0) then 
           call dgemv ('n', fflefc, k-k0,
     >          -one, xx (fdp + ffdimc    ), ffdimc,
     >                xx (fsp + ffdimc - k), 1,
     >           one, xx (fsp             ), 1)
           tmp = fflefc
           tmp = tmp * (k-k0)
           rinfo (5) = rinfo (5) + 2.0*(tmp)
        endif 

        if (fflefr .lt. ffdimr - k) then 

           xlp = (fflefr - 1) * ffdimc
           if (fflefr .eq. colpos) then 

!             ----------------------------------------------------------
!             move c(:,colpos) => c(:,ffdimr-k)
!             ----------------------------------------------------------

!                column of the contribution block:
!fpp$ nodepchk l
                 do 1160 i = 0, fflefc - 1 
                    xx (fdp+i) = xx (fsp+i)
1160             continue 
!                column of the u2 block
!fpp$ nodepchk l
                 do 1170 i = ffdimc - k, ffdimc - 1 
                    xx (fdp+i) = xx (fsp+i)
1170             continue 

           else 

!             ----------------------------------------------------------
!             move c(:,colpos) => c(:,ffdimr-k)
!             move c(:,fflefr) => c(:,colpos)
!             ----------------------------------------------------------

              flp = ffxp + xlp

!                columns of the contribution block:
!fpp$ nodepchk l
                 do 1190 i = 0, fflefc - 1 
                    xx (fdp+i) = xx (fsp+i)
                    xx (fsp+i) = xx (flp+i)
1190             continue 
!                columns of the u2 block:
!fpp$ nodepchk l
                 do 1200 i = ffdimc - k, ffdimc - 1 
                    xx (fdp+i) = xx (fsp+i)
                    xx (fsp+i) = xx (flp+i)
1200             continue 

              swpcol = wpr (fflefr)
              wpr (colpos) = swpcol
              wic (swpcol) = xsp
           endif 

           if (fedegr .ne. fflefr) then 
!             move column fedegr to column fflefr (pattern only)
              swpcol = wpr (fedegr)
              wpr (fflefr) = swpcol
              wic (swpcol) = xlp
           endif 

        else if (colpos .ne. ffdimr - k) then 

!          -------------------------------------------------------------
!          swap c(:,colpos) <=> c (:,ffdimr-k)
!          -------------------------------------------------------------

!             swap only what needs to be swapped
!             columns of the contribution block:
!fpp$ nodepchk l
!fpp$ nolstval l
              do 1220 i = 0, fflefc - 1 
                 temp = xx (fdp+i)
                 xx (fdp+i) = xx (fsp+i)
                 xx (fsp+i) = temp
1220          continue 
!             columns of the u2 block:
!fpp$ nodepchk l
!fpp$ nolstval l
              do 1230 i = ffdimc - k, ffdimc - 1 
                 temp = xx (fdp+i)
                 xx (fdp+i) = xx (fsp+i)
                 xx (fsp+i) = temp
1230          continue 

           swpcol = wpr (ffdimr - k)
           wpr (colpos) = swpcol
           wic (swpcol) = xsp
        endif 

        wic (pivcol) = xdp
        fedegr = fedegr - 1
        scan2 = fflefr
        fflefr = fflefr - 1

!=======================================================================
!  move pivot row into position as (k+1)-st row of u, and update
!=======================================================================

        xsp = rowpos - 1
        xdp = ffdimc - k - 1
        fsp = ffxp + xsp
        fdp = ffxp + xdp

        if (fflefc .lt. ffdimc - k) then 

           xlp = fflefc - 1
           if (fflefc .eq. rowpos) then 

!             ----------------------------------------------------------
!             move c(rowpos,:) => c(ffdimc-k,:)
!             ----------------------------------------------------------

!                row of the contribution block:
!fpp$ nodepchk l
                 do 1250 j = 0, (fflefr - 1) * ffdimc, ffdimc 
                    xx (fdp+j) = xx (fsp+j)
1250             continue 
!                row of the l2 block:
!fpp$ nodepchk l
                 do 1260 j = (ffdimr - k - 1) * ffdimc,
     >                       (ffdimr - 1) * ffdimc, ffdimc 
                    xx (fdp+j) = xx (fsp+j)
1260             continue 

           else 

!             ----------------------------------------------------------
!             move c(rowpos,:) => c(ffdimc-k,:)
!             move c(fflefc,:) => c(rowpos,:)
!             ----------------------------------------------------------

              flp = ffxp + xlp

!                rows of the contribution block:
!fpp$ nodepchk l
                 do 1280 j = 0, (fflefr - 1) * ffdimc, ffdimc 
                    xx (fdp+j) = xx (fsp+j)
                    xx (fsp+j) = xx (flp+j)
1280             continue 
!                rows of the l2 block:
!fpp$ nodepchk l
                 do 1290 j = (ffdimr - k - 1) * ffdimc,
     >                       (ffdimr - 1) * ffdimc, ffdimc 
                    xx (fdp+j) = xx (fsp+j)
                    xx (fsp+j) = xx (flp+j)
1290             continue 

              swprow = wpc (fflefc)
              wpc (rowpos) = swprow
              wir (swprow) = xsp
           endif 

           if (fedegc .ne. fflefc) then 
!             move row fedegc to row fflefc (pattern only)
              swprow = wpc (fedegc)
              wpc (fflefc) = swprow
              wir (swprow) = xlp
           endif 

        else if (rowpos .ne. ffdimc - k) then 

!          -------------------------------------------------------------
!          swap c(rowpos,:) <=> c (ffdimc-k,:)
!          -------------------------------------------------------------

!             swap only what needs to be swapped
!             rows of the contribution block:
!fpp$ nodepchk l
!fpp$ nolstval l
              do 1310 j = 0, (fflefr - 1) * ffdimc, ffdimc 
                 temp = xx (fdp+j)
                 xx (fdp+j) = xx (fsp+j)
                 xx (fsp+j) = temp
1310          continue 
!             rows of the l2 block:
!fpp$ nodepchk l
!fpp$ nolstval l
              do 1320 j = (ffdimr - k - 1) * ffdimc,
     >                    (ffdimr - 1) * ffdimc, ffdimc 
                 temp = xx (fdp+j)
                 xx (fdp+j) = xx (fsp+j)
                 xx (fsp+j) = temp
1320          continue 

           swprow = wpc (ffdimc - k)
           wpc (rowpos) = swprow
           wir (swprow) = xsp
        endif 

        wir (pivrow) = xdp
        fedegc = fedegc - 1
        scan1 = fflefc
        fflefc = fflefc - 1

        if (k-k0 .gt. 0 .and. fflefr .gt. 0) then 
           call dgemv ('t', k-k0, fflefr,
     >       -one, xx (fdp + 1)                    , ffdimc,
     >             xx (fdp + (ffdimr - k) * ffdimc), ffdimc,
     >        one, xx (fdp)                        , ffdimc)
           tmp = k-k0
           tmp = tmp * fflefr
           rinfo (5) = rinfo (5) + 2.0*(tmp)
        endif 

!=======================================================================
!  prepare for degree update and next local pivot search
!=======================================================================

!       ----------------------------------------------------------------
!       if only column pattern has been extended:
!               scan1:  new rows only
!               scan2:  no columns scanned
!               scan3:  all columns
!               scan4:  new rows only
!
!       if only row pattern has been extended:
!               scan1:  no rows scanned
!               scan2:  new columns only
!               scan3:  new columns only
!               scan4:  all rows
!
!       if both row and column pattern have been extended:
!               scan1:  new rows only
!               scan2:  new columns only
!               scan3:  all columns
!               scan4:  all rows
!
!       if no patterns have been extended:
!               scan1-4: none
!       ----------------------------------------------------------------

        if (fedegc .eq. fflefc) then 
!          column pattern has not been extended
           scan3 = fflefr + 1
        else 
!          column pattern has been extended.
           scan3 = 0
        endif 

        if (fedegr .eq. fflefr) then 
!          row pattern has not been extended
           scan4 = fflefc + 1
        else 
!          row pattern has been extended
           scan4 = 0
        endif 

!=======================================================================
!  finished with step k (except for assembly and scaling of pivot col)
!=======================================================================

        k = k + 1

!       ----------------------------------------------------------------
!       exit loop if frontal matrix has been extended
!       ----------------------------------------------------------------

        if (fedegr .ne. fflefr .or. fedegc .ne. fflefc) then 
           go to 1325
        endif 

1324    continue 
!       exit label for loop 1324:
1325    continue

!=======================================================================
!  finished factorizing while frontal matrix is not extended ]
!=======================================================================

!=======================================================================
!  extend the frontal matrix [
!=======================================================================

!       ----------------------------------------------------------------
!       zero the newly extended frontal matrix
!       ----------------------------------------------------------------

!       fill-in due to amalgamation caused by this step is
!       k*(fedegr-fflefr+fedegc-fflefc)

        do 1350 j = fflefr, fedegr - 1 
!          zero the new columns in the contribution block:
           do 1330 i = 0, fedegc - 1 
              xx (ffxp + j*ffdimc + i) = 0
1330       continue 
!          zero the new columns in u block:
           do 1340 i = ffdimc - k, ffdimc - 1 
              xx (ffxp + j*ffdimc + i) = 0
1340       continue 
1350    continue 

!fpp$ nodepchk l
        do 1380 i = fflefc, fedegc - 1 
!          zero the new rows in the contribution block:
!fpp$ nodepchk l
           do 1360 j = 0, fflefr - 1 
              xx (ffxp + j*ffdimc + i) = 0
1360       continue 
!          zero the new rows in l block:
!fpp$ nodepchk l
           do 1370 j = ffdimr - k, ffdimr - 1 
              xx (ffxp + j*ffdimc + i) = 0
1370       continue 
1380    continue 

!       ----------------------------------------------------------------
!       remove the new columns from the degree lists
!       ----------------------------------------------------------------

        do 1390 j = fflefr+1, fedegr 
           pc = cp (wpr (j))
           cdeg = ii (pc+1)
           if (cdeg .gt. 0) then 
              cnxt = ii (pc+7)
              cprv = ii (pc+8)
              if (cnxt .ne. 0) then 
                 ii (cp (cnxt)+8) = cprv
              endif 
              if (cprv .ne. 0) then 
                 ii (cp (cprv)+7) = cnxt
              else 
                 head (cdeg) = cnxt
              endif 
           endif 
1390    continue 

!       ----------------------------------------------------------------
!       finalize extended row and column pattern of the frontal matrix
!       ----------------------------------------------------------------

        fflefc = fedegc
        fflefr = fedegr
        fmaxr = max (fmaxr, fflefr + k)
        fmaxc = max (fmaxc, fflefc + k)

!=======================================================================
!  done extending the current frontal matrix ]
!=======================================================================

1395    continue 
!       exit label for loop 1395:
1400    continue

!=======================================================================
!  done assembling and factorizing the current frontal matrix ]
!=======================================================================

!=======================================================================
!  wrap-up:  complete the current frontal matrix [
!=======================================================================

!       ----------------------------------------------------------------
!       store the maximum front size in the first lu arrowhead
!       ----------------------------------------------------------------

        ii (luip1+5) = fmaxr
        ii (luip1+6) = fmaxc

!       one more frontal matrix is finished
        info (13) = info (13) + 1

!       ----------------------------------------------------------------
!       add the current frontal matrix to the degrees of each column,
!       and place the modified columns back in the degree lists
!       ----------------------------------------------------------------

!       do so in reverse order to try to improve pivot tie-breaking
        do 1410 j = fflefr, 1, -1 
           col = wpr (j)
           pc = cp (col)
!          add the current frontal matrix to the degree
           cdeg = ii (pc+1)
           cdeg = min (kleft, cdeg + fflefc)
           if (cdeg .gt. 0) then 
              ii (pc+1) = cdeg
              cnxt = head (cdeg)
              ii (pc+7) = cnxt
              ii (pc+8) = 0
              if (cnxt .ne. 0) then 
                 ii (cp (cnxt)+8) = col
              endif 
              head (cdeg) = col
              mindeg = min (mindeg, cdeg)
           endif 
1410    continue 

!       ----------------------------------------------------------------
!       add the current frontal matrix to the degrees of each row
!       ----------------------------------------------------------------

!fpp$ nodepchk l
        do 1420 i = 1, fflefc 
           row = wpc (i)
           pr = rp (row)
           rdeg = ii (pr+1)
           rdeg = min (kleft, rdeg + fflefr)
           ii (pr+1) = rdeg
1420    continue 

!       ----------------------------------------------------------------
!       reset w0 so that wr (1..n) < w0 and wc (1..n) < w0.
!       also ensure that w0 + n would not cause integer overflow
!       ----------------------------------------------------------------

        w0 = w0 + fmax + 1
        if (w0 .ge. w0big) then 
           w0 = ndn+2
           do 1430 e = 1, n+dn 
              if (wr (e) .gt. ndn) then 
!                this is a frontal matrix
                 wr (e) = w0-1
                 wc (e) = w0-1
              endif 
1430       continue 
        endif 

!       ----------------------------------------------------------------
!       deallocate work vector
!       ----------------------------------------------------------------

        xuse = xuse - ffdimc
        xneed = xneed - ffdimc
        xhead = xhead - ffdimc

!       ----------------------------------------------------------------
!       get the name of this new frontal matrix, and size of
!       contribution block
!       ----------------------------------------------------------------

        e = ffrow
        xs = fflefr * fflefc
        fmax = max (fmax, fflefr, fflefc)

!       ----------------------------------------------------------------
!       get memory usage for next call to umd2rf
!       ----------------------------------------------------------------

        xruse = xruse - ffsize + xs

!       ----------------------------------------------------------------
!       if contribution block empty, deallocate and continue next step
!       ----------------------------------------------------------------

        if (fflefr .le. 0 .or. fflefc .le. 0) then 
           rp (e) = luip
           xuse = xuse - ffsize
           xneed = xneed - ffsize
           xhead = ffxp
           do 1440 i = 1, fflefr 
              wic (wpr (i)) = -2
1440       continue 
           do 1450 i = 1, fflefc 
              wir (wpc (i)) = -1
1450       continue 
!          next iteration of main factorization loop 1540:
           goto 1540
        endif 

!       ----------------------------------------------------------------
!       prepare the contribution block for later assembly
!       ----------------------------------------------------------------

        if (fscal .gt. itail-ihead) then 
           info (14) = info (14) + 1
           call umd2fg (xx, xsize, xhead, xtail, xuse,
     >                  ii, isize, ihead, itail, iuse,
     >                  cp, rp, dn, n, icntl, wir, wic, wr, wc,
     >                  ffxp, ffsize, 0, 0, .false.,
     >                  pfree, xfree, mhead, mtail, slots)
!          at this point, iuse = ineed and xuse = xneed
        endif 

        ep = ihead
        ihead = ihead + fscal
        iuse = iuse + fscal
        ineed = ineed + fscal
        info (18) = max (info (18), iuse)
        info (19) = max (info (19), ineed)
        if (ihead .gt. itail) then 
!          error return, if not enough integer memory:
!          (highly unlikely to run out of memory at this point)
           go to 9000
        endif 

        rp (e) = ep
        ii (ep) = luip
        ii (ep+5) = fflefr
        ii (ep+6) = fflefc
        wr (e) = w0-1
        wc (e) = w0-1

!       count the numerical assembly
        rinfo (2) = rinfo (2) + (xs)

        if (xs .le. xfree) then 

!          -------------------------------------------------------------
!          compress and store the contribution block in a freed block
!          -------------------------------------------------------------

!          place the new block in the list in front of the free block
           xdp = ii (pfree+2)
           ii (pfree+2) = ii (pfree+2) + xs
           xfree = xfree - xs
           mprev = ii (pfree+4)
           if (xfree .eq. 0) then 
!             delete the free block if its size is zero
              mnext = ii (pfree+3)
              pfree = 0
              xfree = -1
           else 
              mnext = pfree
           endif 
           if (mnext .ne. 0) then 
              ii (mnext+4) = ep
           else 
              mtail = ep
           endif 
           if (mprev .ne. 0) then 
              ii (mprev+3) = ep
           else 
              mhead = ep
           endif 
           do 1470 j = 0, fflefr - 1 
!fpp$ nodepchk l
              do 1460 i = 0, fflefc - 1 
                 xx (xdp + j*fflefc + i) = xx (ffxp + j*ffdimc + i)
1460          continue 
1470       continue 
           xhead = ffxp
           xuse = xuse - ffsize
           xneed = xneed - ffsize + xs
           ffdimc = fflefc
           ii (ep+1) = ffdimc
           ii (ep+2) = xdp
           ii (ep+3) = mnext
           ii (ep+4) = mprev

        else 

!          -------------------------------------------------------------
!          deallocate part of the unused portion of the frontal matrix
!          -------------------------------------------------------------

!          leave the contribution block c (1..fflefc, 1..fflefr) at the
!          head of xx, with column dimension of ffdimc and in space
!          of size (fflefr-1)*ffdimc for the first fflefr columns, and
!          fflefc for the last column.
           xneed = xneed - ffsize + xs
           xs = ffsize - (fflefc + (fflefr-1)*ffdimc)
           xhead = xhead - xs
           xuse = xuse - xs
           ii (ep+1) = ffdimc
           ii (ep+2) = ffxp
           ii (ep+3) = 0
           ii (ep+4) = mtail
           if (mtail .eq. 0) then 
              mhead = ep
           else 
              ii (mtail+3) = ep
           endif 
           mtail = ep
        endif 

!       ----------------------------------------------------------------
!       add tuples to the amount of integer space needed - and add
!       limit+cscal to maximum need to account for worst-case possible
!       reallocation of rows/columns.  required integer memory usage
!       is guaranteed not to exceed iworst during the placement of (e,f)
!       tuples in the two loops below.
!       ----------------------------------------------------------------

        ineed = ineed + 2*(fflefr+fflefc)
        iworst = ineed + limit + cscal
        info (19) = max (info (19), iworst)
        info (18) = max (info (18), iworst)

!       ----------------------------------------------------------------
!       place (e,f) in the element list of each column
!       ----------------------------------------------------------------

        do 1500 i = 1, fflefr 
           col = wpr (i)
           pc = cp (col)
           celn = ii (pc+5)
           csiz = ii (pc)
           clen = ii (pc+6)
!          clear the column offset
           wic (col) = -2

!          -------------------------------------------------------------
!          make sure an empty slot exists - if not, create one
!          -------------------------------------------------------------

           if (2*(celn+1) + clen + cscal .gt. csiz) then 

!             ----------------------------------------------------------
!             no room exists - reallocate elsewhere
!             ----------------------------------------------------------

!             at least this much space is needed:
              is = 2 * (celn + 1) + clen
!             add some slots for growth: at least 8 tuples,
!             or double the size - whichever is larger (but with a total
!             size not larger than limit+cscal)
              is = min (is + max (16, is), limit)
              csiz2 = is + cscal

!             ----------------------------------------------------------
!             make sure enough room exists: garbage collection if needed
!             ----------------------------------------------------------

              if (csiz2 .gt. itail-ihead) then 
!                garbage collection:
                 info (14) = info (14) + 1
                 call umd2fg (xx, xsize, xhead, xtail, xuse,
     >                        ii, isize, ihead, itail, iuse,
     >                        cp, rp, dn, n, icntl, wir, wic, wr, wc,
     >                        0, 0, 0, 0, .true.,
     >                        pfree, xfree, mhead, mtail, slots)
!                at this point, iuse+csiz2 <= iworst and xuse = xneed
                 pc = cp (col)
                 csiz = ii (pc)
              endif 

!             ----------------------------------------------------------
!             get space for the new copy
!             ----------------------------------------------------------

              pc2 = ihead
              ihead = ihead + csiz2
              iuse = iuse + csiz2
              info (18) = max (info (18), iuse)
              if (ihead .gt. itail) then 
!                error return, if not enough integer memory:
                 go to 9000
              endif 

!             ----------------------------------------------------------
!             make the copy, leaving hole in middle for element list
!             ----------------------------------------------------------

!             copy the cscal scalars, and the element list
!fpp$ nodepchk l
              do 1480 j = 0, cscal + 2*celn - 1 
                 ii (pc2+j) = ii (pc+j)
1480          continue 

!             copy column indices of original entries (xx is unchanged)
!fpp$ nodepchk l
              do 1490 j = 0, clen - 1 
                 ii (pc2+csiz2-clen+j) = ii (pc+csiz-clen+j)
1490          continue 

              if (clen .gt. 0) then 
!                place the new block in the memory-list
                 mnext = ii (pc2+3) 
                 mprev = ii (pc2+4)
                 if (mnext .ne. 0) then 
                    ii (mnext+4) = pc2
                 else 
                    mtail = pc2
                 endif 
                 if (mprev .ne. 0) then 
                    ii (mprev+3) = pc2
                 else 
                    mhead = pc2
                 endif 
              endif 

              cp (col) = pc2
              ii (pc2) = csiz2

!             ----------------------------------------------------------
!             deallocate the old copy of the column in ii (not in xx)
!             ----------------------------------------------------------

              ii (pc+1) = -1
              ii (pc+6) = 0
              pc = pc2
           endif 

!          -------------------------------------------------------------
!          place the new (e,f) tuple in the element list of the column
!          -------------------------------------------------------------

           cep = (pc+9)
           ii (cep + 2*celn  ) = e
           ii (cep + 2*celn+1) = i - 1
           ii (pc+5) = celn + 1
1500    continue 

!       ----------------------------------------------------------------
!       place (e,f) in the element list of each row
!       ----------------------------------------------------------------

        do 1530 i = 1, fflefc 
           row = wpc (i)
           pr = rp (row)
           rsiz = ii (pr)
           reln = wr (row)
           rlen = wc (row)
!          clear the row offset
           wir (row) = -1

!          -------------------------------------------------------------
!          make sure an empty slot exists - if not, create one
!          -------------------------------------------------------------

           if (2*(reln+1) + rlen + rscal .gt. rsiz) then 

!             ----------------------------------------------------------
!             no room exists - reallocate elsewhere
!             ----------------------------------------------------------

!             at least this much space is needed:
              is = 2 * (reln + 1) + rlen
!             add some extra slots for growth - for at least 8
!             tuples, or double the size (but with a total size not
!             larger than limit+rscal)
              is = min (is + max (16, is), limit)
              rsiz2 = is + rscal

!             ----------------------------------------------------------
!             make sure enough room exists: garbage collection if needed
!             ----------------------------------------------------------

              if (rsiz2 .gt. itail-ihead) then 
!                garbage collection:
                 info (14) = info (14) + 1
                 call umd2fg (xx, xsize, xhead, xtail, xuse,
     >                        ii, isize, ihead, itail, iuse,
     >                        cp, rp, dn, n, icntl, wir, wic, wr, wc,
     >                        0, 0, 0, 0, .true.,
     >                        pfree, xfree, mhead, mtail, slots)
!                at this point, iuse+rsiz2 <= iworst and xuse = xneed
                 pr = rp (row)
                 rsiz = ii (pr)
              endif 

!             ----------------------------------------------------------
!             get space for the new copy
!             ----------------------------------------------------------

              pr2 = ihead
              ihead = ihead + rsiz2
              iuse = iuse + rsiz2
              info (18) = max (info (18), iuse)
              if (ihead .gt. itail) then 
!                error return, if not enough integer memory:
                 go to 9000
              endif 

!             ----------------------------------------------------------
!             make the copy, leaving hole in middle for element list
!             ----------------------------------------------------------

!             copy the rscal scalars, and the element list
!fpp$ nodepchk l
              do 1510 j = 0, rscal + 2*reln - 1 
                 ii (pr2+j) = ii (pr+j)
1510          continue 

!             copy the original entries
!fpp$ nodepchk l
              do 1520 j = 0, rlen - 1 
                 ii (pr2+rsiz2-rlen+j) = ii (pr+rsiz-rlen+j)
1520          continue 

              rp (row) = pr2
              ii (pr2) = rsiz2

!             ----------------------------------------------------------
!             deallocate the old copy of the row
!             ----------------------------------------------------------

              ii (pr+1) = -1
              pr = pr2
           endif 

!          -------------------------------------------------------------
!          place the new (e,f) tuple in the element list of the row
!          -------------------------------------------------------------

           rep = (pr+2)
           ii (rep + 2*reln  ) = e
           ii (rep + 2*reln+1) = i - 1
           wr (row) = reln + 1
1530    continue 

!=======================================================================
!  wrap-up of factorized frontal matrix is complete ]
!=======================================================================

1540    continue 
!       exit label for loop 1540:
2000    continue

!=======================================================================
!=======================================================================
!  end of main factorization loop ]
!=======================================================================
!=======================================================================

!=======================================================================
!  wrap-up:  store lu factors in their final form [
!=======================================================================

!       ----------------------------------------------------------------
!       deallocate all remaining columns, rows, and frontal matrices
!       ----------------------------------------------------------------

        iuse = iuse - (ihead - 1)
        xuse = xuse - (xhead - 1)
        ineed = iuse
        xneed = xuse
        ihead = 1
        xhead = 1

        if (nlu .eq. 0) then 
!          lu factors are completely empty (a = 0).
!          add one integer and one real, to simplify rest of code.
!          otherwise, some arrays in umd2rf or umd2so would have
!          zero size, which can cause an address fault.
           itail = isize
           xtail = xsize
           iuse = iuse + 1
           xuse = xuse + 1
           ineed = iuse
           xneed = xuse
           ip = itail
           xp = xtail
        endif 

!       ----------------------------------------------------------------
!       compute permutation and inverse permutation vectors.
!       use wir/c for the row/col permutation, and wpr/c for the
!       inverse row/col permutation.
!       ----------------------------------------------------------------

        do 2010 k = 1, n 
!          the kth pivot row and column:
           row = wpr (n-k+1)
           col = wpc (n-k+1)
           wir (k) = row
           wic (k) = col
2010    continue 
!       replace wpr/c with the inversion permutations:
        do 2020 k = 1, n 
           row = wir (k)
           col = wic (k)
           wpr (row) = k
           wpc (col) = k
2020    continue 

        if (pgiven) then 
!          the input matrix had been permuted from the original ordering
!          according to rperm and cperm.  combine the initial
!          permutations (now in rperm and cperm) and the pivoting
!          permutations, and place them back into rperm and cperm.
           do 2030 row = 1, n 
              wm (wpr (row)) = rperm (row)
2030       continue 
           do 2040 row = 1, n 
              rperm (row) = wm (row)
2040       continue 
           do 2050 col = 1, n 
              wm (wpc (col)) = cperm (col)
2050       continue 
           do 2060 col = 1, n 
              cperm (col) = wm (col)
2060       continue 
!       else 
!          the input matrix was not permuted on input.  rperm and cperm
!          in umd2f1 have been passed to this routine as wir and wic,
!          which now contain the row and column permutations.  rperm and
!          cperm in this routine (umd2f2) are not defined.
        endif 

!       ----------------------------------------------------------------
!       allocate nlu+3 integers for xtail, nlu, npiv and lup (1..nlu)
!       ----------------------------------------------------------------

        is = nlu + 5
        luip1 = itail
        itail = itail - is
        iuse = iuse + is
        ineed = iuse
        info (18) = max (info (18), iuse)
        info (19) = max (info (19), ineed)
        if (ihead .le. itail) then 

!          -------------------------------------------------------------
!          sufficient memory exist to finish the factorization
!          -------------------------------------------------------------

           ii (itail+1) = nlu
           ii (itail+2) = npiv
           lupp = itail+5
           if (nlu .eq. 0) then 
!             zero the dummy entries, if lu factors are empty
              ii (ip) = 0
              xx (xp) = 0
           endif 

!          -------------------------------------------------------------
!          convert the lu factors into the new pivot order
!          -------------------------------------------------------------

           s = 0
           maxdr = 1
           maxdc = 1
           do 2100 k = 1, n 
              e = wir (k)
              luip = rp (e)
              if (luip .gt. 0) then 
!                this is an lu arrowhead - save a pointer in lup:
                 s = s + 1
!                update pointers to lu arrowhead relative to start of lu
                 ii (lupp+s-1) = luip - luip1 + 1
                 luxp = ii (luip)
                 ii (luip) = luxp - xtail + 1
!                convert the row and column indices to their final order
!                pattern of a column of l:
                 p = (luip + 7)
                 ludegc = ii (luip+3)
                 maxdc = max (maxdc, ludegc)
                 do 2070 j = 1, ludegc 
                    ii (p) = wpr (abs (ii (p)))
                    p = p + 1
2070             continue 
!                pattern of a row of u:
                 ludegr = ii (luip+2)
                 maxdr = max (maxdr, ludegr)
                 do 2080 j = 1, ludegr 
                    ii (p) = wpc (abs (ii (p)))
                    p = p + 1
2080             continue 
!                convert the lusons, usons, and lsons:
                 nsons = ii (luip+4)
                 do 2090 j = 1, nsons 
                    eson = ii (p)
                    if (eson .le. n) then 
!                      an luson
                       ii (p) = wm (eson)
                    else if (eson .le. 2*n) then 
!                      a uson
                       ii (p) = wm (eson-n) + n
                    else 
!                      an lson
                       ii (p) = wm (eson-2*n) + 2*n
                    endif 
                    p = p + 1
2090             continue 
!                renumber this lu arrowhead
                 wm (e) = s
              endif 
2100       continue 

           cmax = max (cmax, maxdc)
           rmax = max (rmax, maxdr)
           totnlu = totnlu + nlu

           ii (itail+3) = maxdc
           ii (itail+4) = maxdr

!          -------------------------------------------------------------
!          get memory usage for next call to umd2rf
!          -------------------------------------------------------------

           xruse = xruse - nz
           return
        endif 

!=======================================================================
!  lu factors are now stored in their final form ]
!=======================================================================

!=======================================================================
!  error conditions
!=======================================================================

!       error return label:
9000    continue
        if (ihead .gt. itail .or. isize .lt. minmem) then 
!          error return if out of integer memory
           call umd2er (1, icntl, info, -3, info (19))
        endif 
        if (xhead .gt. xtail) then 
!          error return if out of real memory
           call umd2er (1, icntl, info, -4, info (21))
        endif 
        return
        end subroutine umd2f2


        subroutine umd2fa (n, ne, job, transa, lvalue, lindex, value,
     >          index, keep, cntl, icntl, info, rinfo)
        integer n, ne, job, lvalue, lindex, index (lindex), keep (20),
     >          icntl (20), info (40)
        real(fltp)
     >          value (lvalue)
        real(fltp)
     >          cntl (10), rinfo (20)
        logical transa
  
!=== umd2fa ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  hsl compatibility:  this routine has the same arguments as ma38a/ad. 

!=======================================================================
!  user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  given a sparse matrix a, find a sparsity-preserving and numerically-
!  acceptable pivot order and compute the lu factors, paq = lu.  the
!  matrix is optionally preordered into a block upper triangular form
!  (btf).  pivoting is performed within each diagonal block to maintain
!  sparsity and numerical stability.  the method used to factorize the
!  matrix is an unsymmetric-pattern variant of the multifrontal method.
!  most of the floating-point work is done in the level-3 blas (dense
!  matrix multiply).  in addition, approximate degrees are used in the
!  markowitz-style pivot search to reduce the symbolic overhead.  for
!  best performance, be sure to use an optimized blas library.
!
!  this routine is normally preceded by a call to umd21i to
!  initialize the default control parameters.  umd21i need only be
!  called once.  a call to umd2fa can be followed by any number of
!  calls to umd2so, which solves a linear system using the lu factors
!  computed by this routine.  a call to umd2fa can also be followed by
!  any number of calls to umd2rf, which factorizes another matrix with
!  the same nonzero pattern as the matrix factorized by umd2fa (but with
!  different numerical values).
! 
!  for more information, see t. a. davis and i. s. duff, "an 
!  unsymmetric-pattern multifrontal method for sparse lu factorization",
!  siam j. matrix analysis and applications (to appear), also
!  technical report tr-94-038, cise dept., univ. of florida,
!  p.o. box 116120, gainesville, fl 32611-6120, usa.  the method used
!  here is a modification of that method, described in t. a. davis,
!  "a combined unifrontal/multifrontal method for unsymmetric sparse
!  matrices," tr-94-005.  (technical reports are available via www at
!  http://www.cis.ufl.edu/).  the appoximate degree update algorithm
!  used here has been incorporated into an approximate minimum degree
!  ordering algorithm, desribed in p. amestoy, t. a. davis, and i. s.
!  duff, "an approximate minimum degree ordering algorithm", siam j.
!  matrix analysis and applications (to appear, also tr-94-039).  the
!  approximate minimum degree ordering algorithm is implemented as mc47
!  in the harwell subroutine library (mc47 is not called by
!  umfpack).

!=======================================================================
!  installation note:
!=======================================================================
!
!  requires the blas (basic linear algebra subprograms) and two routines
!  from the harwell subroutine library.  ideally, you should use
!  vendor-optimized blas for your computer.  if you do not have them,
!  you may obtain the fortran blas from 1.  send email to 
!  netlib@ornl.gov with the two-line message:
!               send index from blas
!               send blas.shar from blas
!
!  to obtain the two harwell subroutine library (hsl) routines, send
!  email to netlib@ornl.gov with the message:
!               send mc21b_hsl.f mc13e_hsl.f from harwell
!  these two routines hsl contain additional licensing restrictions.
!  if you want to run umfpack without them, see the "installation
!  note:" comment in umd2fb.
!
!  to permamently disable any diagnostic and/or error printing, see
!  the "installation note:" comments in umd2p1 and umd2p2.
!
!  to change the default control parameters, see the
!  "installation note:" comments in umd21i

!=======================================================================
!  arguments:
!=======================================================================

!           ------------------------------------------------------------
!  n:       an integer variable.
!           must be set by caller on input (not modified).
!           order of the matrix.  restriction:  n >= 1.

!           ------------------------------------------------------------
!  ne:      an integer variable.
!           must be set by caller on input (not modified).
!           number of entries in input matrix.  restriction:  ne => 1.

!           ------------------------------------------------------------
!  job:     an integer variable.
!           must be set by caller on input (not modified).
!           if job=1, then a column-oriented form of the input matrix
!           is preserved, otherwise, the input matrix is overwritten
!           with its lu factors.  if iterative refinement is to done
!           in umd2so, (icntl (8) > 0), then job must be set to 1.

!           ------------------------------------------------------------
!  transa:  a logical variable.
!           must be set by caller on input (not modified).
!           if false then a is factorized: paq = lu.  otherwise, a
!           transpose is factorized:  pa'q = lu.

!           ------------------------------------------------------------
!  lvalue:  an integer variable.
!           must be set by caller on input (not modified).
!           size of the value array.  restriction:  lvalue >= 2*ne
!           is required to convert the input form of the matrix into
!           the internal represenation.  lvalue >= ne + axcopy is
!           required to start the factorization, where axcopy = ne if
!           job = 1, or axcopy = 0 otherwise.  during factorization,
!           additional memory is required to hold the frontal matrices.
!           the internal representation of the matrix is overwritten
!           with the lu factors, of size (keep (2) - keep (1) + 1
!           + axcopy), on output.

!           ------------------------------------------------------------
!  lindex:  an integer variable.
!           must be set by caller on input (not modified).
!           size of the index array.  restriction: lindex >= 3*ne+2*n+1,
!           is required to convert the input form of the matrix into
!           its internal representation.  lindex >= wlen + alen + acopy
!           is required to start the factorization, where
!           wlen <= 11*n + 3*dn + 8 is the size of the workspaces,
!           dn <= n is the number of columns with more than d
!           entries (d = max (64, sqrt (n)) is the default),
!           alen <= 2*ne + 11*n + 11*dn + dne is the size of the
!           internal representation of the matrix, dne <= ne is the
!           number of entries in such columns with more than d entries,
!           and acopy = ne+n+1 if job = 1, or acopy = 0 otherwize.
!           during factorization, the internal representation of size
!           alen is overwritten with the lu factors, of size
!           luilen = (keep (5) - keep (3) + 1 - acopy) on output.
!           additional memory is also required to hold the unsymmetric
!           quotient graph, but this also overwrites the input matrix.
!           usually about 7*n additional space is adequate for this
!           purpose.  just prior to the end of factorization,
!           lindex >= wlen + luilen + acopy is required.

!           ------------------------------------------------------------
!  value:   a real(fltp) array of size lvalue.
!           must be set by caller on input.  modified on output.  on
!           input, value (1..ne) holds the original matrix in triplet
!           form.  on output, value holds the lu factors, and
!           (optionally) a column-oriented form of the original matrix
!           - otherwise the input matrix is overwritten with the lu
!           factors.

!           ------------------------------------------------------------
!  index:   an integer array of size lindex.
!           must be set by caller on input.  modified on output.  on
!           input, index (1..2*ne) holds the original matrix in triplet
!           form.  on output, index holds the lu factors, and
!           (optionally) a column-oriented form of the original matrix
!           - otherwise the input matrix is overwritten with the lu
!           factors.
!
!           on input the kth triplet (for k = 1...ne) is stored as:
!                       a (row,col) = value (k)
!                       row         = index (k)
!                       col         = index (k+ne)
!           if there is more than one entry for a particular position,
!           the values are accumulated, and the number of such duplicate
!           entries is returned in info (2), and a warning flag is
!           set.  however, applications such as finite element methods
!           naturally generate duplicate entries which are then
!           assembled (added) together.  if this is the case, then
!           ignore the warning message.
!
!           on output, the lu factors and the column-oriented form
!           of a (if preserved) are stored in:
!               value (keep (1)...keep (2))
!               index (keep (3)...keep (5))
!           where keep (2) = lvalue, and keep (5) = lindex.

!           ------------------------------------------------------------
!  keep:    an integer array of size 20.
!
!           keep (1 ... 5):  need not be set by caller on input.
!               modified on output.
!               keep (1): lu factors start here in value
!               keep (2) = lvalue: lu factors end here in value
!               keep (3): lu factors start here in index
!               keep (4): lu factors needed for umd2rf start here
!                             in index
!               keep (5) = lindex: lu factors end here in index
!
!           keep (6 ... 8):  must be set by caller on input (not
!               modified).
!               integer control arguments not normally modified by the
!               user.  see umd21i for details, which sets the defaults.
!               keep (6) is the largest representable positive
!               integer.  keep (7) and keep (8) determine the
!               size of d, where columns with more than d original
!               entries are treated as a priori frontal matrices.
!
!           keep (9 ... 20): unused.  reserved for future releases.

!           ------------------------------------------------------------
!  cntl:    a real(fltp) array of size 10.
!           must be set by caller on input (not modified).
!           real control arguments, see umd21i for a description,
!           which sets the defaults. umd2fa uses cntl (1) and cntl (2).

!           ------------------------------------------------------------
!  icntl:   an integer array of size 20.
!           must be set by caller on input (not modified).
!           integer control arguments, see umd21i for a description,
!           which sets the defaults.  umd2fa uses icntl (1..7).

!           ------------------------------------------------------------
!  info:    an integer array of size 40.
!           need not be set by caller on input.  modified on output.
!           it contains information about the execution of umd2fa.
!
!           info (1): zero if no error occurred, negative if
!               an error occurred and the factorization was not
!               completed, positive if a warning occurred (the
!               factorization was completed). 
!
!               these errors cause the factorization to terminate:
!
!               error   description
!               -1      n < 1
!               -2      ne < 1
!               -3      lindex too small
!               -4      lvalue too small
!               -5      both lindex and lvalue are too small
!
!               with these warnings the factorization was able to 
!               complete:
!
!               error   description
!               1       invalid entries
!               2       duplicate entries
!               3       invalid and duplicate entries
!               4       singular matrix
!               5       invalid entries, singular matrix
!               6       duplicate entries, singular matrix
!               7       invalid and duplicate entries, singular matrix
!
!               subsequent calls to umd2rf and umd2so can only be made
!               if info (1) is zero or positive.  if info (1)
!               is negative, then some or all of the remaining
!               info and rinfo arrays may not be valid.
!
!           info (2): duplicate entries in a.  a warning is set
!               if info (2) > 0.  however, the duplicate entries
!               are summed and the factorization continues.  duplicate
!               entries are sometimes intentional - for finite element
!               codes, for example.
!
!           info (3): invalid entries in a, indices not in 1..n.
!               these entries are ignored and a warning is set
!               in info (1).
!
!           info (4): zero.  used by umd2rf only.
!
!           info (5): entries in a after adding duplicates and
!               removing invalid entries.
!
!           info (6): entries in diagonal blocks of a.
!
!           info (7): entries in off-diagonal blocks of a.  zero
!               if info (9) = 1.
!
!           info (8): 1-by-1 diagonal blocks.
!
!           info (9): blocks in block-triangular form.
!
!           info (10): entries below diagonal in l.
!
!           info (11): entries below diagonal in u.
!
!           info (12): entries in l+u+offdiagonal part.
!
!           info (13): frontal matrices.
!
!           info (14): garbage collections performed on index, when
!               memory is exhausted.  garbage collections are performed
!               to remove external fragmentation.  if info (14) is
!               excessively high, performance can be degraded.  try
!               increasing lindex if that occurs.  note that external
!               fragmentation in *both* index and value is removed when
!               either is exhausted.
!
!           info (15): garbage collections performed on value.
!
!           info (16): diagonal pivots chosen.
!
!           info (17): numerically acceptable pivots found in a.
!               if less than n, then a is singular (or nearly so).
!               the factorization still proceeds, and umd2so can still
!               be called.  the zero-rank active submatrix of order
!               n - info (17) is replaced with the identity matrix
!               (assuming btf is not in use).  if btf is in use, then
!               one or more of the diagonal blocks are singular. 
!
!           info (18): memory used in index.
!
!           info (19): minimum memory needed in index
!               (or minimum recommended).  if lindex is set to
!               info (19) on a subsequent call, then a moderate
!               number of garbage collections (info (14)) will
!               occur.
!
!           info (20): memory used in value.
!
!           info (21): minimum memory needed in value
!               (or minimum recommended).  if lvalue is set to
!               info (21) on a subsequent call, then a moderate
!               number of garbage collections (info (15)) will
!               occur.
!
!           info (22): memory needed in index for the next call to
!               umd2rf.
!
!           info (23): memory needed in value for the next call to
!               umd2rf.
!
!           info (24): zero.  used by umd2so only.
!
!           info (25 ... 40): reserved for future releases

!           ------------------------------------------------------------
!  rinfo:   a real(fltp) array of size 20.
!           need not be set by caller on input.  modified on output.
!           it contains information about the execution of umd2fa.
!
!           rinfo (1): total flop count in the blas
!
!           rinfo (2): total assembly flop count
!
!           rinfo (3): total flops during pivot search
!
!           rinfo (4): level-1 blas flops
!
!           rinfo (5): level-2 blas flops
!
!           rinfo (6): level-3 blas flops
!
!           rinfo (7): zero.  used by umd2so only.
!
!           rinfo (8): zero.  used by umd2so only.
!
!           rinfo (9 ... 20): reserved for future releases

!=======================================================================
!  to be preserved between calls to umd2fa, umd2rf, umd2so:
!=======================================================================
!
!  when calling umd2so to solve a linear system using the factors
!  computed by umd2fa or umd2rf, the following must be preserved:
!
!       n
!       value (keep (1)...keep (2))
!       index (keep (3)...keep (5))
!       keep (1 ... 20)
!
!  when calling umd2rf to factorize a subsequent matrix with a pattern
!  similar to that factorized by umd2fa, the following must be
!  preserved:
!
!       n
!       index (keep (4)...keep (5))
!       keep (4 ... 20)
!
!  note that the user may move the lu factors to a different position
!  in value and/or index, as long as keep (1 ... 5) are modified
!  correspondingly.

!## end of user documentation for umd2fa ###############################

!=======================================================================
!  coding conventions:
!=======================================================================
!
!  this package is written in ansi fortran 77.  to make the code more
!  understandable, the following coding conventions are followed for all
!  routines in this package:
!
!  1) large code blocks are delimited with [...] comments.
!
!  2) goto usage:
!       a) goto's used to return if an error condition is found are
!          written as "go to 9000" or "go to 9010".
!       b) goto's used to exit loops prematurely are written as "go to",
!          and have a target label of 2000 or less.
!       c) goto's used to jump to the next iteration of a do loop or
!          while loop (or to implement a while loop) are written as
!          "goto".
!       no other goto's are used in this package.
!
!  this package uses the following cray compiler directives to help
!  in the vectorization of loops.  each of them operate on the
!  do-loop immediately following the directive.  other compilers
!  normally treat these directives as ordinary comments.
!
!       cfpp$ nodepchk l        disables data dependency check, and
!                               asserts that no recursion exists.
!       cfpp$ nolstval l        disables the saving of last values of
!                               transformed scalars (indexes or promoted
!                               scalars, especially those in array
!                               subscripts).  asserts that values do not
!                               need to be the same as in the scalar
!                               version (for later use of the scalars).
!       cdir$ shortloop         asserts that the loop count is always
!                               64 or less.

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   user routine
!       subroutines called:     umd2er, umd2p1, umd2co, umd2f0
!       functions called:       max, min
        intrinsic max, min

!=======================================================================
!  local scalars:
!=======================================================================

        integer i, nz, lux1, lui1, iuse, xuse, luir1, nzoff, nblks,
     >          maxint, nmax, one_array(1)
        logical presrv
        real(fltp)
     >          ignore_array(1), ignore

!  location of lu factors:
!  -----------------------
!  lux1:    real part of lu factors placed in value (lux1 ... lvalue)
!  lui1:    integer part of lu factors placed in index (lui1 ... lindex)
!  luir1:   index (luir1 ... lindex) must be preserved for umd2rf
!
!  memory usage:
!  -------------
!  iuse:    current memory usage in index
!  xuse:    current memory usage in value
!
!  matrix to factorize:
!  --------------------
!  nblks:   number of diagonal blocks (1 if btf not used)
!  nzoff:   entries in off-diagonal part (0 if btf not used)
!  nz:      entries in matrix after removing invalid/duplicate entries
!
!  other:
!  ------
!  maxint:  largest representable positive integer
!  nmax:    largest permissible value of n
!  i:       general loop index
!  presrv:  true if original matrix to be preserved

!=======================================================================
!  executable statements:
!=======================================================================         
         one_array(1) = 1
!-----------------------------------------------------------------------
!  clear informational output, and keep array (except keep (6..8)):
!-----------------------------------------------------------------------

        do 10 i = 1, 40 
           info (i) = 0
10      continue 
        do 20 i = 1, 20 
           rinfo (i) = 0
20      continue 
        keep (1) = 0
        keep (2) = 0
        keep (3) = 0
        keep (4) = 0
        keep (5) = 0
        ignore = 0

!-----------------------------------------------------------------------
!  print input arguments if requested
!-----------------------------------------------------------------------

        call umd2p1 (1, 1,
     >          n, ne, job, transa, lvalue, lindex, value,
     >          index, keep, cntl, icntl, info, rinfo,
     >          ignore_array, ignore_array, 1, ignore_array, 1)

!-----------------------------------------------------------------------
!  initialize and check inputs
!-----------------------------------------------------------------------

        iuse = 0
        xuse = 0
        info (5) = ne
        info (6) = ne
        maxint = keep (6)
        nmax = (maxint - 2) / 3
        if (n .lt. 1) then 
!          n is too small
           call umd2er (1, icntl, info, -1, -1)
           go to 9000
        endif 
        if (ne .lt. 1) then 
!          ne is too small
           call umd2er (1, icntl, info, -2, -1)
           go to 9000
        endif 

!-----------------------------------------------------------------------
!  get memory for conversion to column form
!-----------------------------------------------------------------------

        nz = ne
        iuse = 2*n+1 + max (2*nz, n+1) + nz
        xuse = 2*nz
        info (18) = iuse
        info (20) = xuse
        info (19) = iuse
        info (21) = xuse
        if (lindex .lt. iuse) then 
!          set error flag if out of integer memory:
           call umd2er (1, icntl, info, -3, iuse)
        endif 
        if (lvalue .lt. xuse) then 
!          set error flag if out of real memory:
           call umd2er (1, icntl, info, -4, xuse)
        endif 
        if (info (1) .lt. 0) then 
!          error return, if not enough integer and/or real memory:
           go to 9000
        endif 

!-----------------------------------------------------------------------
!  convert to column-oriented form and remove duplicates
!-----------------------------------------------------------------------

        call umd2co (n, nz, transa, value, lvalue, info, icntl,
     >     index, lindex-(2*n+1), index(lindex-2*n), index(lindex-n), 1)
        if (info (1) .lt. 0) then 
!          error return, if all entries invalid (nz is now 0):
           go to 9000
        endif 

!-----------------------------------------------------------------------
!  current memory usage:
!-----------------------------------------------------------------------

!       index (1..n+1): column pointers.  input matrix is now in
!       index (1..nz+n+1) and value (1..nz)
!       col pattern: index (n+1+ index (col) ... n+1+ index (col+1))
!       col values:  value (     index (col) ...      index (col+1))
!       at this point, nz <= ne (nz = ne if there are no invalid or
!       duplicate entries; nz < ne otherwise).

        iuse = nz + (n+1)
        xuse = nz

!-----------------------------------------------------------------------
!  factorize
!-----------------------------------------------------------------------

        presrv = job .eq. 1
        if (presrv) then 

!          -------------------------------------------------------------
!          keep a copy of the original matrix in column-oriented form
!          -------------------------------------------------------------

!          copy column pointers (cp (1..n+1) = ap (1..n+1))
           iuse = iuse + (n+1)
!fpp$ nodepchk l
           do 30 i = 1, n+1 
              index (nz+n+1+i) = index (i)
30         continue 

           call umd2f0 (n, nz, index (nz+n+2),
     >          value (nz+1), lvalue-nz,
     >          index (nz+2*n+3), lindex-(nz+2*n+2),
     >          lux1, lui1, iuse, xuse, nzoff, nblks,
     >          icntl, cntl, info, rinfo,
     >          presrv, index, index (n+2), value, n, nz, keep, ne)
           if (info (1) .lt. 0) then 
!             error return, if umd2f0 fails
              go to 9000
           endif 
!          adjust pointers to reflect index/value, not ii/xx:
           lux1 = lux1 + nz
           lui1 = lui1 + (nz+2*n+2)

!          move preserved copy of a to permanent place
           lux1 = lux1 - nz
           lui1 = lui1 - (nz+n+1)
           do 40 i = nz+n+1, 1, -1 
              index (lui1+i-1) = index (i)
40         continue 
           do 50 i = nz, 1, -1 
              value (lux1+i-1) = value (i)
50         continue 

        else 

!          -------------------------------------------------------------
!          do not preserve the original matrix
!          -------------------------------------------------------------

           call umd2f0 (n, nz, index,
     >          value, lvalue,
     >          index (n+2), lindex-(n+1),
     >          lux1, lui1, iuse, xuse, nzoff, nblks,
     >          icntl, cntl, info, rinfo,
     >          presrv, one_array, one_array, ignore_array, 0, 1, keep, ne)
           if (info (1) .lt. 0) then 
!             error return, if umd2f0 fails
              go to 9000
           endif 
!          adjust pointers to reflect index/value, not ii/xx:
           lui1 = lui1 + (n+1)
        endif 

!-----------------------------------------------------------------------
!  wrap-up
!-----------------------------------------------------------------------

        if (transa) then 
           index (lindex-6) = 1
        else 
           index (lindex-6) = 0
        endif 

        index (lindex-5) = nzoff
        index (lindex-4) = nblks
        if (presrv) then 
           index (lindex-3) = 1
        else 
           index (lindex-3) = 0
        endif 
        index (lindex-2) = nz
        index (lindex-1) = n
        index (lindex) = ne

!       do not need preserved matrix (n+1+nz), or off-diagonal entries
!       (nzoff) for umd2rf:
        luir1 = lui1
        if (presrv) then 
!          do not need preserved matrix for umd2rf
           luir1 = luir1 + n+1 + nz
        endif 
        if (nblks .gt. 1) then 
!          do not need off-diagonal part for umd2rf
           luir1 = luir1 + nzoff
        endif 

!       save location of lu factors
        keep (1) = lux1
        keep (2) = lvalue
        keep (3) = lui1
        keep (4) = luir1
        keep (5) = lindex

!       update memory usage information
        iuse = lindex - lui1 + 1
        xuse = lvalue - lux1 + 1
        info (22) = info (22) + (lindex - luir1 + 1)

!-----------------------------------------------------------------------
!  print the output arguments if requested, and return
!-----------------------------------------------------------------------

!       error return label:
9000    continue
        if (info (1) .lt. 0) then 
           keep (1) = 0
           keep (2) = 0
           keep (3) = 0
           keep (4) = 0
           keep (5) = 0
        endif 

        info (18) = min (lindex, max (info (18), iuse))
        info (20) = min (lvalue, max (info (20), xuse))

        call umd2p1 (1, 2,
     >          n, ne, job, transa, lvalue, lindex, value,
     >          index, keep, cntl, icntl, info, rinfo,
     >          ignore_array, ignore_array, 1, ignore_array, 1)
        return
        end subroutine umd2fa
        subroutine umd2fb (xx, xsize, ii, isize, n, nz, nzdia, nzoff,
     >          nblks, cp, cperm, rperm, pr, pc,
     >          w, zperm, bp, offp,
     >          presrv, icntl)
        integer n, nz, isize, ii (isize), nzdia, nzoff, nblks, cp (n+1),
     >          cperm (n), rperm (n), pr (n), pc (n), w (n), zperm (n),
     >          bp (n+1), offp (n+1), icntl (20), xsize
        logical presrv
        real(fltp)
     >          xx (xsize)
  
!=== umd2fb ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  find permutations to block triangular form:
!       1) permute the matrix so that it has a zero-free diagonal.
!       2) find strongly-connected components of the corresponding
!          graph.  each diagonal block corresponds to exactly one
!          strongly-connected component.
!       3) convert the matrix to block triangular form, unless it is
!          to be preserved in its original form.

!  calls harwell ma28 routines mc21b_hsl and mc13e_hsl, which can be obtained
!  separately from netlib.  send email to netlib@ornl.gov with the
!  message:
!       send mc13e_hsl.f mc21b_hsl.f from harwell

!=======================================================================
!  installation note:
!=======================================================================
!
!  if the ma28 harwell subroutine library routines mc21b_hsl and mc13e_hsl
!  (which perform the permutation to block-triangular-form) are not
!  available, then you may comment out all executable code in this
!  routine, or place a "return" statement as the first executable
!  statement (see below).  if you do make this modification, please do
!  not delete any original code.  add a comment and date to your
!  modifications.

!=======================================================================
!  input:
!=======================================================================
!
!       presrv:         true if original matrix is to be preserved
!       n:              order of matrix
!       nz:             entries in matrix
!       isize:          size of ii
!       xsize:          size of xx
!       cp (1..n+1):    column pointers
!       xx (1..nz):     values
!       ii (1..nz):     row indices
!       icntl:          integer control arguments
!
!          input matrix in column form is in:
!          xx (1..nz), ii (1..nz), n, nz, cp (1..n+1), where
!               ii (cp(col) ... cp(col+1)-1): row indices
!               xx (cp(col) ... cp(col+1)-1): values
!          if presrv is false then xsize and isize must be >= 2*nz
!          otherwise, xsize and isize must be >= nz

!=======================================================================
!  workspace:
!=======================================================================
!
!       pr (1..n), pc (1..n), w (1..n), zperm (1..n)

!======================================================================
!  output: 
!=======================================================================
!
!       nblks: number of blocks
!       if (nblks > 1):
!
!           cperm (1..n), rperm (1..n): permutation to block form:
!               rperm (newrow) = oldrow
!               cperm (newcol) = oldcol
! 
!           bp (n-nblks+1...n+1) holds the start/end of blocks 1..nblks
!
!           if (presrv is false) then
!
!              input matrix is converted to block-upper-tri. form,
!              using ii/xx (nz+1..2*nz) as workspace.
!              nzdia: nonzeros in diagonal blocks
!              nzoff: nonzeros in off-diagonal blocks
!              (nz = nzdia + nzoff)
!
!              off-diagonal column-oriented form in xx/ii (1..nzoff)
!              col is located in
!              xx/ii (offp (col) ... offp (col+1)-1)
!
!              diagonal blocks now in xx/ii (nzoff+1 .. nzoff+nzdia)
!              col is located in
!              xx/ii (cp (col) ... cp (col+1)-1)
!
!       else, nblks=1: and no other output is generated.

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   umd2f0
!       subroutines called:     mc21b_hsl, mc13e_hsl (in ma28 hsl package)

!=======================================================================
!  local scalars:
!=======================================================================

        integer col, ndiag, i, po, pb, blk, p, row, k1, k

!  ndiag:   number of entries on the diagonal
!  po:      pointer into off-diagonal part
!  pb:      pointer into diagonal blocks
!  blk:     block number for current diagonal block
!  k1:      column col is in diagonal block a (k1.., k1...)
!  k:       kth row/col in btf form is rperm(k)/cperm(k) in input matrix
!  p:       pointer
!  row:     row index
!  col:     column index
!  i:       general loop index

!=======================================================================
!  executable statements:
!       if (mc21b_hsl and mc13e_hsl not available during installation) return
!=======================================================================

        nzdia = nz
        nzoff = 0

!-----------------------------------------------------------------------
! compute the length of each column
!-----------------------------------------------------------------------

        do 10 col = 1, n 
           w (col) = cp (col+1) - cp (col)
10      continue 

!-----------------------------------------------------------------------
! find a column permutation for a zero-free diagonal
!-----------------------------------------------------------------------

        call mc21b_hsl (n,ii,nz,cp,w,zperm,ndiag,offp,cperm,pr,pc)
!          mc21b_hsl calling interface:
!          input:       n, ii (1..nz), nz, cp (n), w (n):
!                       n-by-n matrix, col is of length w (col),
!                       and its pattern is located in
!                       ii (cp (col) ... cp (col)+w(col)-1)
!          output:      zperm (n), the permutation, such that
!                       colold = zperm (col), and ndiag (number of 
!                       structural nonzeros on the diagonal.    
!                       matrix is structurally singular if ndiag < n
!          workspace:   offp, cperm, pr, pc

!-----------------------------------------------------------------------
!  permute the columns of the temporary matrix to get zero-free diagonal
!-----------------------------------------------------------------------

        do 20 col = 1, n 
           offp (col) = cp (zperm (col))
           w (col) = cp (zperm (col)+1) - cp (zperm (col))
20      continue 

!-----------------------------------------------------------------------
!  find a symmetric permutation into upper block triangular form
!  (that is, find the strongly-connected components in the graph).
!-----------------------------------------------------------------------

        call mc13e_hsl(n,ii,nz,offp,w,rperm,bp,nblks,cperm,pr,pc)
!          mc13e_hsl calling interface:
!          input:       n, ii (1..nz), nz, offp (n), w (n)
!                       n-by-n matrix, col of length w(col),
!                       in ii (offp(col) ... offp(col)+w(col)-1), where
!                       this permuted matrix has a zero-free diagonal
!                       (unless the matrix is structurally singular).
!          output:      rperm (n), bp (n+1), nblks
!                       old = rperm (new) is the symmetric permutation,
!                       there are nblks diagonal blocks, bp (i) is
!                       the position in new order of the ith block.
!          workspace:   cperm, pr, pc

!-----------------------------------------------------------------------
!  if more than one block, get permutations and block pointers,
!  and convert to block-upper-triangular form (unless matrix preserved)
!-----------------------------------------------------------------------

        if (nblks .ne. 1) then 

!          -------------------------------------------------------------
!          find the composite column permutation vector (cperm):
!          -------------------------------------------------------------

           do 30 col = 1, n 
              cperm (col) = zperm (rperm (col))
30         continue 

!          -------------------------------------------------------------
!          convert to block-upper-triangular form, if not preserved
!          -------------------------------------------------------------

           if (.not. presrv) then 

!             ----------------------------------------------------------
!             find the inverse permutation vectors, pr and pc
!             ----------------------------------------------------------

              do 40 k = 1, n 
                 pc (cperm (k)) = k
                 pr (rperm (k)) = k
40            continue 

!             ----------------------------------------------------------
!             construct flag array to determine if entry in block or not
!             ----------------------------------------------------------

              bp (nblks+1) = n+1
              do 60 blk = 1, nblks 
                 do 50 i = bp (blk), bp (blk+1)-1 
                    w (i) = bp (blk)
50               continue 
60            continue 

!             ----------------------------------------------------------
!             construct block-diagonal form in xx/ii (nz+1..nz+nzdia)
!             ----------------------------------------------------------

!             these blocks are in a permuted order (according to rperm
!             and cperm).  the row indices in each block range from 1
!             to the size of the block.

              pb = nz + 1
              do 80 col = 1, n 
                 zperm (col) = pb
                 k1 = w (col)
!fpp$ nodepchk l
                 do 70 p = cp (cperm (col)), cp (cperm (col)+1)-1 
                    row = pr (ii (p)) 
                    if (w (row) .eq. k1) then 
!                      entry is in the diagonal block:
                       ii (pb) = row - k1 + 1
                       xx (pb) = xx (p)
                       pb = pb + 1
                    endif 
70               continue 
80            continue 
!             zperm (n+1) == pb  ( but zperm (n+1) does not exist )
              nzdia = pb - (nz + 1)
              nzoff = nz - nzdia

!             diagonal blocks now in xx/ii (nz+1..nz+nzdia)
!             col is located in xx/ii (zperm (col) ... zperm (col+1)-1)

!             ----------------------------------------------------------
!             compress original matrix to off-diagonal part, in place
!             ----------------------------------------------------------

!             the rows/cols of off-diagonal form correspond to rows/cols
!             in the original, unpermuted matrix.  they are permuted to
!             the final pivot order and stored in a row-oriented form,
!             after the factorization is complete (by umd2of).

              po = 1
              do 100 col = 1, n 
                 offp (col) = po
                 k1 = w (pc (col))
!fpp$ nodepchk l
                 do 90 p = cp (col), cp (col+1)-1 
                    row = pr (ii (p))
                    if (w (row) .ne. k1) then 
!                      offdiagonal entry
                       ii (po) = ii (p)
                       xx (po) = xx (p)
                       po = po + 1
                    endif 
90               continue 
100           continue 
              offp (n+1) = po

!             off-diagonal form now in xx/ii (1..nzoff)
!             col is located in xx/ii(offp(col)..offp(col+1)-1)

!             ----------------------------------------------------------
!             move block-diagonal part into place
!             ----------------------------------------------------------

              pb = nz + 1
!fpp$ nodepchk l
              do 110 i = 0, nzdia - 1 
                 ii (po+i) = ii (pb+i)
                 xx (po+i) = xx (pb+i)
110           continue 
              do 120 col = 1, n 
                 cp (col) = zperm (col) - nzdia
120           continue 
!             cp (n+1) == nz+1  ( this is unchanged )

!             diagonal blocks now in xx/ii (nzoff+1 .. nzoff+nzdia)
!             col is located in xx/ii (cp (col) ... cp (col+1)-1)

           endif 

!          -------------------------------------------------------------
!          shift bp (1 .. nblks+1) down to bp (1+n-nblks .. n+1), which
!          then becomes the blkp (1 .. nblks+1) array.
!          -------------------------------------------------------------

           bp (nblks+1) = n+1
!fpp$ nodepchk l
           do 130 blk = nblks + 1, 1, -1 
              bp (blk + (n-nblks)) = bp (blk)
130        continue 
        endif 

        return
        end subroutine umd2fb
        subroutine umd2fg (xx, xsize, xhead, xtail, xuse,
     >          ii, isize, ihead, itail, iuse,
     >          cp, rp, dn, n, icntl, wir, wic, wr, wc,
     >          ffxp, ffsize, wxp, ffdimc, doslot,
     >          pfree, xfree, mhead, mtail, slots)
        integer n, dn, isize, ii (isize), ihead, itail, rp (n+dn),
     >          cp (n+1), icntl (20), wir (n), wic (n), xsize, xuse,
     >          iuse, xhead, xtail, ffxp, ffsize, wxp,
     >          ffdimc, wr (n+dn), wc (n+dn), pfree, xfree, mhead,
     >          mtail, slots
        logical doslot
        real(fltp)
     >          xx (xsize)
  
!=== umd2fg ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  garbage collection for umd2f2.

!=======================================================================
!  input:
!=======================================================================
!
!       ii/xx:          integer/real workspace, containing matrix being
!                       factorized and partially-computed lu factors
!       isize:          size of ii
!       xsize:          size of xx
!       xhead:          xx (1..xhead) is in use (matrix, frontal mtc's)
!       xtail:          xx (xtail..xsize) is in use (lu factors)
!       xuse:           memory usage in value
!       ihead:          ii (1..ihead) is in use (matrix, frontal mtc's)
!       itail:          ii (itail..isize) is in use (lu factors)
!       iuse:           memory usage in index
!       cp (1..n+1):    pointers to columns
!       rp (1..n+dn):   pointers to rows, frontal matrices, and lu
!                       arrowheads
!       dn:             number of dense columns
!       n:              order of matrix
!       icntl:          integer control parameters, see umd21i
!       wr (1..n):      see umd2f2
!       wc (1..n):      see umd2f2
!       ffxp:           pointer to current contribution block
!       ffsize:         size of current contribution block
!       mhead:          pointer to first block in memory list
!       mtail:          pointer to last block in memory list
!       doslot:         true if adding slots
!       if doslot:
!           wir (1..n)  if wir (row) >= 0 then add (or keep) an extra
!                       slot in the row's element list
!           wic (1..n)  if wir (col) >= 0 then add (or keep) an extra
!                       slot in the col's element list

!=======================================================================
!  output:
!=======================================================================
!
!       ii/xx:          external fragmentation is removed at head 
!       xhead:          xx (1..xhead) is in use, reduced in size
!       xuse:           memory usage in value, reduced
!       ihead:          ii (1..ihead) is in use, reduced in size
!       iuse:           memory usage in index, reduced
!       pfree:          pointer to free block in memory list, set to 0
!       xfree:          size of free block in xx, set to -1
!       mhead:          pointer to first block in memory list
!       mtail:          pointer to last block in memory list
!       ffxp            current working array has been shifted
!       wxp             current work vector has been shifted

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   umd2f2

!=======================================================================
!  local scalars:
!=======================================================================

        integer what, fsiz, row, col, p, idp, xdp, i, e, ep, fdimc,
     >          ludegr, ludegc, j, pc, celn, clen, reln, rlen,
     >          csiz1, csiz2, rsiz1, rsiz2, fluip, cxp, fxp, rdeg,
     >          cdeg, cscal, rscal, fscal
        parameter (cscal = 9, rscal = 2, fscal = 7)
        logical slot

!  compression:
!  ------------
!  what:    what this block of memory is (a row, column, etc.)
!  idp:     int. destination pointer, current block moved to ii (idp...)
!  xdp:     real destination pointer, current block moved to xx (xdp...)
!  slot:    true if adding, or keeping, a size-2 hole in an element list
!
!  columns:
!  --------
!  cscal:   = 9, the number of scalars in column data structure
!  celn:    number of (e,f) tuples in element list of a column
!  clen:    number of unassembled original entries in a column
!  cdeg:    degree of a column (number of entries, including fill-in)
!  cxp:     a column is in xx (cxp...) prior to compression
!  pc:      column is in ii (pc ...) prior to compression
!  csiz1:   size of a column in ii, prior to compression
!  csiz2:   size of a column in ii, after compression
!  col:     column index
!
!  rows:
!  -----
!  rscal:   = 2, the number of scalars in row data structure
!  reln:    number of (e,f) tuples in element list of a row
!  rlen:    number of unassembled original entries in a row
!  rsiz1:   size of a row in ii, prior to compression
!  rsiz2:   size of a row in ii, after compression
!  rdeg:    degree of a row (number of entries, including fill-in)
!  row:     row index
!
!  frontal matrices:
!  -----------------
!  fscal:   = 7, the number of scalars in element data structure
!  fluip:   element is in ii (fluip...) prior to compression
!  fxp:     a frontal matrix is in xx (fxp...) prior to compression
!  e:       an element
!  fdimc:   column dimension (number of rows) of a frontal matrix
!  ludegr:  row degree (number of columns) of a contribution block
!  ludegc:  column degree (number of rows) of a contribution block
!  fsiz:    size of an artificial frontal matrix
!  ep:      an artificial frontal matrix is in ii (ep ...) prior to comp
!
!  other:
!  ------
!  p:       pointer
!  i:       general loop index
!  j:       general loop index

!=======================================================================
!  executable statments:
!=======================================================================

        slots = 0

!-----------------------------------------------------------------------
!   prepare the non-pivotal rows/cols and unassembled elements
!-----------------------------------------------------------------------

!       place the size of each block of memory at the beginning,
!       and mark the 2nd entry in each block with what it is

!       ----------------------------------------------------------------
!       mark the columns
!       ----------------------------------------------------------------

!fpp$ nodepchk l
        do 10 col = 1, n 
           pc = cp (col)
           if (pc .ne. 0) then 
!             this is a non-pivotal, non-null column
              cdeg = ii (pc+1)
              cp (col) = cdeg
              ii (pc+1) = col+n
           endif 
10      continue 

!       ----------------------------------------------------------------
!       mark the rows and frontal matrices
!       ----------------------------------------------------------------

!fpp$ nodepchk l
        do 20 row = 1, n 
           p = rp (row)
           rlen = wc (row)
           if (p .eq. 0) then 
!             a pivotal row
              continue
           else if (rlen .ge. 0 .and. rlen .le. n) then 
!             this is a non-pivotal, non-null row
              rdeg = ii (p+1)
              rp (row) = rdeg
              ii (p+1) = row+2*n
           else if (wr (row) .eq. -(n+dn+2)) then 
!             a pivotal row, and an assembled element
              continue
           else 
!             this is an unassembled frontal matrix
!             the size is implicitly fscal
              fdimc = ii (p+1)
              rp (row) = fdimc
              ii (p+1) = row
           endif 
20      continue 

!       ----------------------------------------------------------------
!       mark the artificial frontal matrices
!       ----------------------------------------------------------------

!fpp$ nodepchk l
        do 30 e = n+1, n+dn 
           ep = rp (e)
           if (ep .ne. 0) then 
!             this is an unassembled artificial frontal matrix
!             the size is ii (ep+1) + cscal
              fdimc = ii (ep+1)
              rp (e) = fdimc
              ii (ep+1) = e+2*n
           endif 
30      continue 

!-----------------------------------------------------------------------
!  scan the link list and compress the reals
!-----------------------------------------------------------------------

        xdp = 1
        p = mhead
!       while (p .ne. 0) do
40      continue
        if (p .ne. 0) then 

           what = ii (p+1)

!          -------------------------------------------------------------
           if (what .gt. 3*n) then 
!          -------------------------------------------------------------

!             this is an unassembled artificial frontal matrix
              e = what - 2*n
              fxp = ii (p+2)
              ii (p+2) = xdp
!fpp$ nodepchk l
              do 50 j = 0, rp (e) - 1 
                 xx (xdp+j) = xx (fxp+j)
50            continue 
              xdp = xdp + rp (e)

!          -------------------------------------------------------------
           else if (what .eq. -1 .or. ii (p+6) .eq. 0) then 
!          -------------------------------------------------------------

!             this is a real hole - delete it from the link list
              if (ii (p+4) .ne. 0) then 
                 ii (ii (p+4)+3) = ii (p+3)
              else 
                 mhead = ii (p+3)
              endif 
              if (ii (p+3) .ne. 0) then 
                 ii (ii (p+3)+4) = ii (p+4)
              else 
                 mtail = ii (p+4)
              endif 

!          -------------------------------------------------------------
           else if (what .le. n) then 
!          -------------------------------------------------------------

!             this is an unassembled frontal matrix
              e = what
              fxp = ii (p+2)
              ii (p+2) = xdp
              fluip = ii (p)
              ludegr = ii (fluip+2)
              ludegc = ii (fluip+3)
              fdimc = rp (e)
              if (fdimc .eq. ludegc) then 
!                contribution block is already compressed
!fpp$ nodepchk l
                 do 60 i = 0, (ludegr * ludegc) - 1 
                    xx (xdp+i) = xx (fxp+i)
60               continue 
              else 
!                contribution block is not compressed
!                compress xx (fxp..) to xx (xdp..xdp+(ludegr*ludegc)-1)
                 do 80 j = 0, ludegr - 1 
!fpp$ nodepchk l
                    do 70 i = 0, ludegc - 1 
                       xx (xdp + j*ludegc + i) = xx (fxp + j*fdimc + i)
70                  continue 
80               continue 
                 rp (e) = ludegc
              endif 
              xdp = xdp + ludegr*ludegc

!          -------------------------------------------------------------
           else if (what .le. 2*n) then 
!          -------------------------------------------------------------

!             this is a column
              cxp = ii (p+2)
              ii (p+2) = xdp
              clen = ii (p+6)
!fpp$ nodepchk l
              do 90 j = 0, clen - 1 
                 xx (xdp+j) = xx (cxp+j)
90            continue 
              xdp = xdp + clen

!          -------------------------------------------------------------
           endif 
!          -------------------------------------------------------------

!          -------------------------------------------------------------
!          get the next item in the link list
!          -------------------------------------------------------------

           p = ii (p+3)

!       end while:
        goto 40
        endif 

        pfree = 0
        xfree = -1

!       ----------------------------------------------------------------
!       shift the current working array (if it exists)
!       ----------------------------------------------------------------

        if (ffxp .ne. 0) then 
!fpp$ nodepchk l
           do 100 i = 0, ffsize - 1 
              xx (xdp+i) = xx (ffxp+i)
100        continue 
           ffxp = xdp
           xdp = xdp + ffsize
        endif 

!       ----------------------------------------------------------------
!       shift the current work vector (if it exists)
!       ----------------------------------------------------------------

        if (wxp .ne. 0) then 
           wxp = xdp
           xdp = xdp + ffdimc
        endif 

!-----------------------------------------------------------------------
!  scan from the top of integer memory (1) to bottom (ihead) and
!  compress the integers
!-----------------------------------------------------------------------

        p = 1
        idp = p
!       while (p .lt. ihead) do:
110     continue
        if (p .lt. ihead) then 

           what = ii (p+1)

!          -------------------------------------------------------------
           if (what .gt. 3*n) then 
!          -------------------------------------------------------------

!             this is an unassembled artificial frontal matrix
              e = what - 2*n
              fsiz = rp (e) + cscal
              ii (p+1) = rp (e)
              rp (e) = idp
!fpp$ nodepchk l
              do 120 i = 0, fsiz - 1 
                 ii (idp+i) = ii (p+i)
120           continue 
!             shift pointers in the link list
              if (ii (idp+4) .ne. 0) then 
                 ii (ii (idp+4)+3) = idp
              else 
                 mhead = idp
              endif 
              if (ii (idp+3) .ne. 0) then 
                 ii (ii (idp+3)+4) = idp
              else 
                 mtail = idp
              endif 
              p = p + fsiz
              idp = idp + fsiz

!          -------------------------------------------------------------
           else if (what .eq. -1) then 
!          -------------------------------------------------------------

!             this is a integer hole
              p = p + ii (p)

!          -------------------------------------------------------------
           else if (what .ge. 1 .and. what .le. n) then 
!          -------------------------------------------------------------

!             this is an unassembled frontal matrix (fscal integers)
              e = what
              fdimc = rp (e)
              ii (p+1) = fdimc
              rp (e) = idp
!fpp$ nodepchk l
              do 130 i = 0, fscal - 1 
                 ii (idp+i) = ii (p+i)
130           continue 
!             shift pointers in the link list
              if (ii (idp+4) .ne. 0) then 
                 ii (ii (idp+4)+3) = idp
              else 
                 mhead = idp
              endif 
              if (ii (idp+3) .ne. 0) then 
                 ii (ii (idp+3)+4) = idp
              else 
                 mtail = idp
              endif 
              p = p + fscal
              idp = idp + fscal

!          -------------------------------------------------------------
           else if (what .le. 2*n) then 
!          -------------------------------------------------------------

!             this is a non-pivotal column
              csiz1 = ii (p)
              col = what - n
              celn = ii (p+5)
              clen = ii (p+6)
              csiz2 = 2*celn + clen + cscal
              slot = doslot .and. wic (col) .ge. 0 .and. p .ge. idp+2
              if (slot) then 
!                keep (or make) one extra slot for element list growth
                 csiz2 = csiz2 + 2
                 slots = slots + 2
              endif 
              cdeg = cp (col)
              ii (p+1) = cdeg
              cp (col) = idp
              ii (p) = csiz2
!             copy the cscal scalars and the celn (e,f) tuples
!fpp$ nodepchk l
              do 140 i = 0, cscal + 2*celn - 1 
                 ii (idp+i) = ii (p+i)
140           continue 
              if (clen .gt. 0) then 
!                shift pointers in the link list
                 if (ii (idp+4) .ne. 0) then 
                    ii (ii (idp+4)+3) = idp
                 else 
                    mhead = idp
                 endif 
                 if (ii (idp+3) .ne. 0) then 
                    ii (ii (idp+3)+4) = idp
                 else 
                    mtail = idp
                 endif 
              endif 
              p = p + csiz1 - clen
              idp = idp + cscal + 2*celn
              if (slot) then 
!                skip past the slot
                 idp = idp + 2
              endif 
!             copy the clen original row indices
!fpp$ nodepchk l
              do 150 i = 0, clen - 1 
                 ii (idp+i) = ii (p+i)
150           continue 
              p = p + clen
              idp = idp + clen

!          -------------------------------------------------------------
           else 
!          -------------------------------------------------------------

!             this is a non-pivotal row
              rsiz1 = ii (p)
              row = what - 2*n
              reln = wr (row)
              rlen = wc (row)
              rsiz2 = 2*reln + rlen + rscal
              slot = doslot .and. wir (row) .ge. 0 .and. p .ge. idp+2
              if (slot) then 
!                keep (or make) one extra slot for element list growth
                 rsiz2 = rsiz2 + 2
                 slots = slots + 2
              endif 
              rdeg = rp (row)
              ii (p+1) = rdeg
              rp (row) = idp
              ii (p) = rsiz2
!             copy the rscal scalars, and the reln (e,f) tuples
!fpp$ nodepchk l
              do 160 i = 0, rscal + 2*reln - 1 
                 ii (idp+i) = ii (p+i)
160           continue 
              p = p + rsiz1 - rlen
              idp = idp + rscal + 2*reln
              if (slot) then 
!                skip past the slot
                 idp = idp + 2
              endif 
!             copy the rlen original column indices
!fpp$ nodepchk l
              do 170 i = 0, rlen - 1 
                 ii (idp+i) = ii (p+i)
170           continue 
              p = p + rlen
              idp = idp + rlen

!          -------------------------------------------------------------
           endif 
!          -------------------------------------------------------------

!          -------------------------------------------------------------
!          move to the next block
!          -------------------------------------------------------------

!       end while:
        goto 110
        endif 

!-----------------------------------------------------------------------
!  deallocate the unused space
!-----------------------------------------------------------------------

        iuse = iuse - (ihead - idp)
        ihead = idp
        xuse = xuse - (xhead - xdp)
        xhead = xdp
        return
        end subroutine umd2fg
        subroutine umd2in (icntl, cntl, keep)
        integer icntl (20), keep (20)
        real(fltp)
     >          cntl (10)
  
!=== umd2in ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  user-callable, but provided for backward compatibility with umfpack
!  version 2.0 only!  use umd21i instead.  this subroutine is *not*
!  compatible with the harwell subroutine library.

        call umd21i (keep, cntl, icntl)
        return
        end subroutine umd2in
        subroutine umd2lt (nlu, npiv, n, lup, lui, lux, x, w)
        integer nlu, npiv, n, lup (nlu), lui (*)
        real(fltp)
     >          lux (*), x (n), w (n)
  
!=== umd2lt ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  solves l'x = b, where l is the lower triangular factor of a matrix
!  (if btf not used) or a single diagonal block (if btf is used).
!  b is overwritten with the solution x.

!=======================================================================
!  input:
!=======================================================================
!
!       nlu:            number of lu arrowheads in the lu factors
!       npiv:           number of pivots found (normally n)
!       n:              order of matrix
!       lup (1..nlu):   pointer to lu arrowheads in lui
!       lui ( ... ):    integer values of lu arrowheads
!       lux ( ... ):    real values of lu arroheads
!       x (1..n):       the right-hand-side

!=======================================================================
!  workspace:
!=======================================================================
!
!       w (1..n):

!=======================================================================
!  output:
!=======================================================================
!
!       x (1..n):       the solution to l'x=b

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   umd2s2
!       subroutines called:     dtrsv, dgemv

!=======================================================================
!  local scalars:
!=======================================================================

        integer j, k, s, luip, luxp, luk, ludegc, lucp, lxp, col
        real(fltp)
     >          one

!  s:       an element, or lu arrowhead
!  k:       kth pivot
!  j:       jth column in l2' array in element s
!  luip:    s is in lui (luip...)
!  luxp:    real part of s is in lux (luxp...)
!  luk:     number of pivots in s
!  ludegc:  column degree of non-pivotal part of s
!  lucp:    pattern of column of s in lui (lucp...lucp+ludegc-1)
!  lxp:     the ludegc-by-luk l2 block of s is in lux (lxp...)
!  col:     column index

!=======================================================================
!  executable statments:
!=======================================================================

        one = 1
        k = npiv
        do 30 s = nlu, 1, -1 

!          -------------------------------------------------------------
!          get s-th lu arrowhead (s = nlu..1, in reverse pivotal order)
!          -------------------------------------------------------------

           luip   = lup (s)
           luxp   = lui (luip)
           luk    = lui (luip+1)
           ludegc = lui (luip+3)
           lucp   = (luip + 7)
           lxp    = luxp + luk

           if (luk .eq. 1) then 

!             ----------------------------------------------------------
!             only one pivot, stride-1 sparse dot product
!             ----------------------------------------------------------

!fpp$ nodepchk l
              do 10 j = 1, ludegc 
                 col = lui (lucp+j-1)
!                row: k, u (row,col): lux (lxp+j-1)
                 x (k) = x (k) - lux (lxp+j-1) * x (col)
10            continue 
!             l (k,k) is one
              k = k - 1

           else 

!             ----------------------------------------------------------
!             more than one pivot
!             ----------------------------------------------------------

              k = k - luk
              do 20 j = 1, ludegc 
                 col = lui (lucp+j-1)
                 w (j) = x (col)
20            continue 
              call dgemv ('t', ludegc, luk, -one,
     >           lux (lxp), ludegc + luk, w, 1, one, x (k+1), 1)
              call dtrsv ('l', 't', 'u', luk,
     >           lux (luxp), ludegc + luk, x (k+1), 1)

           endif 

30      continue 
        return
        end subroutine umd2lt
        subroutine umd2of (w, n, rperm, cperm, nzoff,
     >          offp, offi, offx, pr,
     >          icntl, mp, mi, mx, mn, mnz, presrv, nblks, blkp,
     >          onz, who, info, nbelow)
        integer n, nzoff, w (n+1), rperm (n), cperm (n), onz,
     >          offp (n+1), offi (onz), pr (n), icntl (20), mn, mnz,
     >          mp (mn+1), mi (mnz), nblks, blkp (nblks+1), who, nbelow,
     >          info (40)
        logical presrv
        real(fltp)
     >          offx (onz), mx (mnz)
  
!=== umd2of ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  permute the off-diagonal blocks according to final pivot permutation.
!  this routine is called only if the block-triangular-form (btf) is
!  used.

!=======================================================================
!  input:
!=======================================================================
!
!       n:              order of matrix
!       rperm (1..n):   the final row permutations, including btf
!                       if i is the k-th pivot row, then rperm (k) = i
!       cperm (1..n):   the final column permutations, including btf
!                       if j is the k-th pivot col, then cperm (k) = j
!       icntl:          integer control parameters, see umd21i
!       info:           integer informational parameters
!       who:            who called (1: umd2fa, 2: umd2rf)
!
!       if presrv is true then
!           mn:                 order of preserved matrix
!           mnz:                number of entries in preserved matrix
!           mp (1..mn+1):       column pointers of preserved matrix
!           mi (1..mnz):        row indices of preserved matrix
!           mx (1..mnz):        values of preserved matrix
!           blkp (1..nblks+1):  the index range of the blocks
!           nblks:              the number of diagonal blocks
!       else
!           mn:                 0
!           mnz:                nzoff
!           mp:                 unaccessed
!           offp (1..n+1):      column pointers for off-diagonal entries
!                               in original order
!           mi (1..mnz):        the row indices of off-diagonal entries,
!                               in original order
!           mx (1..mnz):        the values of off-diagonal entries,
!                               in original order
!           nblks:              0
!           blkp (1..nblks+1):  unaccessed
!           nzoff:              number of entries in off-diagonal blocks

!=======================================================================
!  workspace:
!=======================================================================
!
!       w (1..n)

!=======================================================================
!  output:
!=======================================================================
!
!       offp (1..n+1):          row pointers for off-diagonal part
!       offi (1..nzoff):        column indices in off-diagonal part
!       offx (1..nzoff):        values in off-diagonal part
!       nzoff:                  number of entries in off-diagonal blocks
!       pr (1..n):              inverse row permutation
!       nbelow:                 entries that are below the diagonal
!                               blocks (can only occur if who = 2)

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   umd2f0, umd2ra, umd2r0
!       subroutines called:     umd2p2

!=======================================================================
!  local scalars:
!=======================================================================

        integer row, col, p, blk, k, k1, k2, io, prl
        logical pr3

!  row:     row index
!  col:     column index
!  p:       pointer
!  blk:     current diagonal block
!  k:       kth pivot
!  k1,k2:   current diaogonal block is a (k1..k2, k1..k2)
!  io:      i/o unit for diagnostic messages
!  prl:     printing level
!  pr3:     true if printing entries below diagonal blocks (umd2rf)

!=======================================================================
!  executable statments:
!=======================================================================

        io = icntl (2)
        prl = icntl (3)
        pr3 = prl .ge. 3 .and. io .ge. 0

!-----------------------------------------------------------------------
!  compute inverse row permutation
!-----------------------------------------------------------------------

!       if original row i is the kth pivot row, then
!               rperm (k) = i
!               pr (i) = k
!       if original col j is the kth pivot col, then
!               cperm (k) = j
!fpp$ nodepchk l
        do 10 k = 1, n 
           pr (rperm (k)) = k
10      continue 

!-----------------------------------------------------------------------
!  construct row-oriented pointers for permuted row-form
!-----------------------------------------------------------------------

        w (1) = 1
        do 20 row = 2, n 
           w (row) = 0
20      continue 
        nbelow = 0
        if (presrv) then 
           do 50 blk = 1, nblks 
              k1 = blkp (blk)
              k2 = blkp (blk+1) - 1
              do 40 col = k1, k2 
!fpp$ nodepchk l
                 do 30 p = mp (cperm (col)), mp (cperm (col)+1)-1 
                    row = pr (mi (p))
                    if (row .lt. k1) then 
!                      offdiagonal entry
                       w (row) = w (row) + 1
                    else if (row .gt. k2 .and. who .eq. 2) then 
!                      this entry is below the diagonal block - invalid.
!                      this can only occur if who = 2 (umd2rf).
                       if (pr3) then 
!                         print the original row and column indices:
                          call umd2p2 (2, 96, mi (p), col, mx (p), io)
                       endif 
                       nbelow = nbelow + 1
                    endif 
30               continue 
40            continue 
50         continue 
        else 
           do 70 col = 1, n 
!fpp$ nodepchk l
              do 60 p = offp (col), offp (col+1) - 1 
                 row = pr (mi (p))
                 w (row) = w (row) + 1
60            continue 
70         continue 
        endif 
        do 80 row = 2, n 
           w (row) = w (row) + w (row-1)
80      continue 
        w (n+1) = w (n)
!       w (row) now points just past end of row in offi/x

!-----------------------------------------------------------------------
!  construct the row-oriented form of the off-diagonal values,
!  in the final pivot order.  the column indices in each row
!  are placed in ascending order (the access of offi/offx later on
!  does not require this, but it makes access more efficient).
!-----------------------------------------------------------------------

        if (presrv) then 
           do 110 blk = nblks, 1, -1 
              k1 = blkp (blk)
              k2 = blkp (blk+1) - 1
              do 100 col = k2, k1, - 1 
!fpp$ nodepchk l
                 do 90 p = mp (cperm (col)), mp (cperm (col)+1)-1 
                    row = pr (mi (p))
                    if (row .lt. k1) then 
!                      offdiagonal entry
                       w (row) = w (row) - 1
                       offi (w (row)) = col
                       offx (w (row)) = mx (p)
                    endif 
90               continue 
100           continue 
110        continue 
        else 
           do 130 col = n, 1, -1 
!fpp$ nodepchk l
              do 120 p = offp (cperm (col)), offp (cperm (col) + 1) - 1
                 row = pr (mi (p))
                 w (row) = w (row) - 1
                 offi (w (row)) = col
                 offx (w (row)) = mx (p)
120           continue 
130        continue 
        endif 

!-----------------------------------------------------------------------
!  save the new row pointers
!-----------------------------------------------------------------------

        do 140 row = 1, n+1 
           offp (row) = w (row)
140     continue 

        nzoff = offp (n+1) - 1

        return
        end subroutine umd2of
        subroutine umd2p1 (who, where,
     >          n, ne, job, trans, lvalue, lindex, value,
     >          index, keep, cntl, icntl, info, rinfo,
     >          b, x, lx, w, lw)
        integer who, where, n, ne, job, lvalue, lindex, index (lindex),
     >          keep (20), icntl (20), info (40), lx, lw
        real(fltp)
     >          value (lvalue), b (lx), x (lx), w (lw)
        real(fltp)
     >          cntl (10), rinfo (20)
        logical trans
  
!=== umd2p1 ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  print input/output arguments for umd2fa, umd2rf, and umd2so

!=======================================================================
!  installation note:
!=======================================================================
!
!  this routine can be deleted on installation (replaced with a dummy
!  routine that just returns without printing) in order to completely
!  disable the printing of all input/output parameters.  to completely
!  disable all i/o, you can also replace the umd2p2 routine with a
!  dummy subroutine.  if you make this modification, please do
!  not delete any original code - just comment it out instead.  add a
!  comment and date to your modifications.

!=======================================================================
!  input:
!=======================================================================
!
!       who:            what routine called umd2p1:
!                       1: umd2fa, 2: umd2rf, 3: umd2so
!       where:          called from where:
!                       1: entry of routine, else exit of routine
!       icntl (3):      if < 3 then print nothing, if 3 then print
!                       terse output, if 4 print info and matrix
!                       values.  if 5, print everything.
!       icntl (2):      i/o unit on which to print.  no printing
!                       occurs if < 0.
!
!       parameters to print, see umd2fa, umd2rf, or umd2so for
!       descriptions:
!
!           n, ne, job, trans, lvalue, lindex, value, index, keep,
!           icntl, info, rinfo, b, x, lx, w, lw

!=======================================================================
!  output:
!=======================================================================
!
!       on icntl (2) i/o unit only

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutines:  umd2fa, umd2rf, umd2so
!       functions called:       min
        intrinsic min

!=======================================================================
!  local scalars:
!=======================================================================

        logical transa, transc, prlu, badlu, sglton, presrv, symbol
        integer io, prl, prn, k, lui1, lui2, lux1, lux2, row, col,
     >          facne, facn, nz, facjob, nblks, nzoff, factra, cpermp,
     >          rpermp, app, axp, aip, offip, offxp, lublpp, offpp,
     >          blkpp, p1, p2, p, blk, k1, k2, kn, luiip, luxxp, npiv,
     >          nlu, e, luk, lupp, luip, luxp, ludegr, ludegc, lunson,
     >          lusonp, lucp, lurp, i, j, nzcol, nzrow, uxp, son,
     >          prmax, ludimr, ludimc, maxdr, maxdc, luir1, ip1, ip2,
     >          xp1
        real(fltp)
     >          one
        parameter (prmax = 10)

!  printing control:
!  -----------------
!  io:      i/o unit for diagnostic messages
!  prl:     printing level
!  prn:     number of entries printed so far
!  prmax:   maximum number of entries to print if prl = 3
!  prlu:    true if printing lu factors
!
!  location and status of lu factors:
!  ----------------------------------
!  transc:  transc argument in umd2so
!  transa:  transa argument in umd2fa or umd2rf when matrix factorized
!  badlu:   true if lu factors uncomputed or corrupted
!  presrv:  true if original matrix was preserved when factorized
!  symbol:  true if only symbolic part of lu factors needed on input
!  lui1:    integer part of lu factors start in index (lui1...)
!  luir1:   index (luir1 ... lui2) is needed for a call to umd2rf
!  lui2:    integer part of lu factors end in index (..lui2)
!  lux1:    real part of lu factors start in value (lux1...)
!  lux2:    real part of lu factors end in value (...lux1)
!  ip1:     pointer into leading part of lu factors in index
!  ip2:     pointer into trailing part of lu factors in index
!  xp1:     pointer into leading part of lu factors in value
!
!  arrays and scalars allocated in lu factors (in order):
!  ------------------------------------------------------
!  app:     ap (1..n+1) array located in index (app...app+n)
!  axp:     ax (1..nz) array located in value (axp...axp+nz-1)
!  aip:     ai (1..nz) array located in index (aip...aip+nz-1)
!  offip:   offi (1..nzoff) array loc. in index (offip...offip+nzoff-1)
!  offxp:   offx (1..nzoff) array loc. in value (offxp...offxp+nzoff-1)
!  ...      lu factors of each diagonal block located here
!  lublpp:  lublkp (1..nblks) array in index (lublpp..lublpp+nblks-1)
!  blkpp:   blkp (1..nblks+1) array loc. in index (blkpp...blkpp+nblks)
!  offpp:   offp (1..n+1) array located in index (offpp...offpp+n)
!  cpermp:  cperm (1..n) array located in index (cpermp...cpermp+n-1)
!  rpermp:  rperm (1..n) array located in index (rpermp...rpermp+n-1)
!  ...      seven scalars in index (lui2-6...lui2):
!  factra:  0/1 if transa argument was false/true in umd2fa or umd2rf
!  nzoff:   number of entries in off-diagonal part
!  nblks:   number of diagonal blocks
!  facjob:  job argument in umd2fa or umd2rf when matrix factorized 
!  nz:      entries in a
!  facn:    n argument in umd2fa or umd2rf when matrix factorized 
!  facne:   ne argument in umd2fa or umd2rf when matrix factorized 
!
!  a single diagonal block and its lu factors:
!  -------------------------------------------
!  blk:     current diagonal block
!  k1,k2:   current diagonal is a (k1..k2, k1..k2)
!  kn:      order of current diagonal block (= k2-k1+1)
!  sglton:  true if current diagonal block is 1-by-1 (a singleton)
!  luiip:   lu factors of a diagonal block start in index (luiip...)
!  luxxp:   lu factors of a diagonal block start in value (luxxp...)
!  npiv:    number of pivots in a diagonal block (0 <= npiv <= kn)
!  nlu:     number of elements in a diagonal block
!  lupp:    lup (1..nlu) array located in index (lupp...lupp+nlu-1)
!
!  an element in the lu factors of a single diagonal block:
!  --------------------------------------------------------
!  e:       element
!  luk:     number of pivots in element e
!  luip:    integer part of element is in index (luip...)
!  luxp:    real part of element e is in value (luxp...)
!  ludegr:  row degree (number of columns) of u2 block in element e
!  ludegc:  column degree (number of rows) of l2 block in element e
!  lunson:  number of sons of element e in the assembly dag
!  lusonp:  list of sons of element e in index(lusonp...lusonp+lunson-1)
!  lucp:    column pattern (row indices) of l2 block in index (lucp..)
!  lurp:    row pattern (column indices) of u2 block in index (lurp..)
!  nzcol:   entries in a column of l, including unit diagonal
!  nzrow:   entries in a row of u, including non-unit diagonal
!  uxp:     a row of the u2 block located in value (uxp...)
!  son:     a son of the element e
!  ludimr:  row dimension (number of columns) in frontal matrix
!  ludimc:  column dimension (number of rows) in frontal matrix
!  maxdr:   largest ludimr for this block
!  maxdc:   largest ludimc for this block
!
!  other:
!  ------
!  row:     row index
!  col:     column index
!  k:       kth pivot, and general loop index
!  i, j:    loop indices
!  p:       pointer
!  p1:      column of a starts ai/ax (p1...), or row offi/x (p1...)
!  p2:      column of a ends in ai/ax (...p2), or row offi/x (...p2)

!=======================================================================
!  executable statements:
!       if (printing disabled on installation) return
!=======================================================================

!-----------------------------------------------------------------------
!  get printing control parameters
!-----------------------------------------------------------------------
        transa = .false.
        prlu = .false.
        one = 1
        io = icntl (2)
        prl = icntl (3)
        if (prl .lt. 3 .or. io .lt. 0) then 
!          printing has not been requested
           return
        endif 

!-----------------------------------------------------------------------
!  who is this, and where.  determine if lu factors are to be printed
!-----------------------------------------------------------------------

        if (who .eq. 1) then 
           if (where .eq. 1) then 
              write (io, 6) 'umd2fa input:'
              prlu = .false.
           else 
              write (io, 6) 'umd2fa output:'
              prlu = .true.
           endif 
        else if (who .eq. 2) then 
           if (where .eq. 1) then 
              write (io, 6) 'umd2rf input:'
              prlu = .true.
           else 
              write (io, 6) 'umd2rf output:'
              prlu = .true.
           endif 
        else if (who .eq. 3) then 
           if (where .eq. 1) then 
              write (io, 6) 'umd2so input:'
              prlu = .true.
           else 
              write (io, 6) 'umd2so output:'
              prlu = .false.
           endif 
        endif 

!-----------------------------------------------------------------------
!  print scalar input arguments: n, ne, job, trans, lvalue, lindex
!-----------------------------------------------------------------------

        if (where .eq. 1) then 
           write (io, 1)  'scalar arguments:'
           write (io, 1)  '   n:         ', n, ' : order of matrix a'
           if (who .eq. 3) then 
!             umd2so:
              lui2 = keep (5)

!             was a or a^t factorized?
              transa = .false.
              if (lui2-6 .ge. 1 .and. lui2-6 .le. lindex) then 
                 transa = index (lui2-6) .ne. 0
              endif 
              transc = trans
              if (.not. transc) then 
                 if (job .eq. 1) then 
                    write (io, 1) '   job:       ', job,
     >              ' : solve p''lx=b'
                 else if (job .eq. 2) then 
                    write (io, 1) '   job:       ', job,
     >              ' : solve uq''x=b'
                 else if (.not. transa) then 
                    write (io, 1) '   job:       ', job,
     >              ' : solve ax=b (paq=lu was factorized)'
                 else 
                    write (io, 1) '   job:       ', job,
     >              ' : solve a''x=b (pa''q=lu was factorized)'
                 endif 
              else 
                 if (job .eq. 1) then 
                    write (io, 1) '   job:       ', job,
     >              ' : solve l''px=b'
                 else if (job .eq. 2) then 
                    write (io, 1) '   job:       ', job,
     >              ' : solve qu''x=b'
                 else if (.not. transa) then 
                    write (io, 1) '   job:       ', job,
     >              ' : solve a''x=b (paq=lu was factorized)'
                 else 
                    write (io, 1) '   job:       ', job,
     >              ' : solve ax=b (pa''q=lu was factorized)'
                 endif 
              endif 
              if (transc) then 
                 write (io, 1)
     >           '   transc:          .true. : see job above '
              else 
                 write (io, 1)
     >           '   transc:         .false. : see job above '
              endif 

           else 
!             umd2fa or umd2rf:
              write (io, 1) '   ne:        ', ne,
     >        ' : entries in matrix a'
              if (job .eq. 1) then 
                 write (io, 1) '   job:       ', job,
     >           ' : matrix a preserved'
              else 
                 write (io, 1) '   job:       ', job,
     >           ' : matrix a not preserved'
              endif 

              transa = trans
              if (transa) then 
                 write (io, 1)
     >           '   transa:          .true. : factorize a transpose'
              else 
                 write (io, 1)
     >           '   transa:         .false. : factorize a'
              endif 

           endif 
           write (io, 1) '   lvalue:    ',lvalue,
     >     ' : size of value array'
           write (io, 1) '   lindex:    ',lindex,
     >     ' : size of index array'
        endif 

!-----------------------------------------------------------------------
!  print control parameters:  icntl, cntl, and keep (6..8)
!-----------------------------------------------------------------------

        if (where .eq. 1) then 
           write (io, 1)
     >     'control parameters, normally initialized by umd21i:'
           write (io, 1) '   icntl (1): ', icntl (1),
     >     ' : i/o unit for error and warning messages'
           write (io, 1) '   icntl (2): ', io,
     >     ' : i/o unit for diagnostics'
           write (io, 1) '   icntl (3): ', prl,
     >     ' : printing control'
           if (who .eq. 1) then 
              if (icntl (4) .eq. 1) then 
                 write (io, 1) '   icntl (4): ', icntl (4),
     >           ' : use block triangular form (btf)'
              else 
                 write (io, 1) '   icntl (4): ', icntl (4),
     >           ' : do not permute to block triangular form (btf)'
              endif 
              write (io, 1) '   icntl (5): ', icntl (5),
     >        ' : columns examined during pivot search'
              if (icntl (6) .ne. 0) then 
                 write (io, 1) '   icntl (6): ', icntl (6),
     >           ' : preserve symmetry'
              else 
                 write (io, 1) '   icntl (6): ', icntl (6),
     >           ' : do not preserve symmetry'
              endif 
           endif 
           if (who .ne. 3) then 
              write (io, 1) '   icntl (7): ', icntl (7),
     >        ' : block size for dense matrix multiply'
           else 
              write (io, 1) '   icntl (8): ', icntl (8),
     >        ' : maximum number of iterative refinement steps'
           endif 
           if (who .eq. 1) then 
              write (io, 3) '   cntl (1):   ',cntl (1),
     >        ' : relative pivot tolerance'
              write (io, 3) '   cntl (2):   ',cntl (2),
     >        ' : frontal matrix growth factor'
              write (io, 1) '   keep (6):  ',keep(6),
     >        ' : largest positive integer'
              write (io, 1) '   keep (7):  ',keep(7),
     >        ' : dense row/col control, d1'
              write (io, 1) '   keep (8):  ',keep(8),
     >        ' : dense row/col control, d2'
           else if (who .eq. 3) then 
              write (io, 3) '   cntl (3):   ',cntl(3),
     >        ' : machine epsilon'
           endif 
        endif 

!-----------------------------------------------------------------------
!  print the informational output
!-----------------------------------------------------------------------

        if (where .ne. 1) then 
           write (io, 1) 'output information:'
           if (info (1) .lt. 0) then 
              write (io, 1) '   info (1):  ', info (1),
     >        ' : error occurred!'
           else if (info (1) .gt. 0) then 
              write (io, 1) '   info (1):  ', info (1),
     >        ' : warning occurred'
           else 
              write (io, 1) '   info (1):  ', info (1),
     >        ' : no error or warning occurred'
           endif 
           if (who .ne. 3) then 
              write (io, 1) '   info (2):  ', info (2),
     >        ' : duplicate entries in a'
              write (io, 1) '   info (3):  ', info (3),
     >        ' : invalid entries in a (indices not in 1..n)'
              write (io, 1) '   info (4):  ', info (4),
     >        ' : invalid entries in a (not in prior pattern)'
              write (io, 1) '   info (5):  ', info (5),
     >        ' : entries in a after summing duplicates'
              write (io, 1)
     >  '                             and removing invalid entries'
              write (io, 1) '   info (6):  ', info (6),
     >        ' : entries in diagonal blocks of a'
              write (io, 1) '   info (7):  ', info (7),
     >        ' : entries in off-diagonal blocks of a'
              write (io, 1) '   info (8):  ', info (8),
     >        ' : 1-by-1 diagonal blocks in a'
              write (io, 1) '   info (9):  ', info (9),
     >        ' : diagonal blocks in a (>1 only if btf used)'
              write (io, 1) '   info (10): ', info (10),
     >        ' : entries below diagonal in l'
              write (io, 1) '   info (11): ', info (11),
     >        ' : entries above diagonal in u'
              write (io, 1) '   info (12): ', info (12),
     >        ' : entries in l + u + offdiagonal blocks of a'
              write (io, 1) '   info (13): ', info (13),
     >        ' : frontal matrices'
              write (io, 1) '   info (14): ', info (14),
     >        ' : integer garbage collections'
              write (io, 1) '   info (15): ', info (15),
     >        ' : real garbage collections'
              write (io, 1) '   info (16): ', info (16),
     >        ' : diagonal pivots chosen'
              write (io, 1) '   info (17): ', info (17),
     >        ' : numerically valid pivots found in a'
              write (io, 1) '   info (18): ', info (18),
     >        ' : memory used in index'
              write (io, 1) '   info (19): ', info (19),
     >        ' : minimum memory needed in index'
              write (io, 1) '   info (20): ', info (20),
     >        ' : memory used in value'
              write (io, 1) '   info (21): ', info (21),
     >        ' : minimum memory needed in value'
              write (io, 1) '   info (22): ', info (22),
     >        ' : memory needed in index for next call to umd2rf'
              write (io, 1) '   info (23): ', info (23),
     >        ' : memory needed in value for next call to umd2rf'
           else 
              write (io, 1) '   info (24): ', info (24),
     >        ' : steps of iterative refinement taken'
           endif 
           if (who .ne. 3) then 
              write (io, 3) '   rinfo (1):  ', rinfo (1),
     >        ' : total blas flop count'
              write (io, 3) '   rinfo (2):  ', rinfo (2),
     >        ' : assembly flop count'
              write (io, 3) '   rinfo (3):  ', rinfo (3),
     >        ' : pivot search flop count'
              write (io, 3) '   rinfo (4):  ', rinfo (4),
     >        ' : level-1 blas flop count'
              write (io, 3) '   rinfo (5):  ', rinfo (5),
     >        ' : level-2 blas flop count'
              write (io, 3) '   rinfo (6):  ', rinfo (6),
     >        ' : level-3 blas flop count'
           else if (lw .eq. 4*n) then 
              write (io, 3) '   rinfo (7):  ', rinfo (7),
     >        ' : sparse error estimate omega1'
              write (io, 3) '   rinfo (8):  ', rinfo (8),
     >        ' : sparse error estimate omega2'
           endif 
        endif 

!-----------------------------------------------------------------------
!  print input matrix a, in triplet form, for umd2fa and umd2rf
!-----------------------------------------------------------------------

        if (where .eq. 1 .and. who .ne. 3) then 

           if (transa) then 
              if (prl .ge. 5) then 
                 write (io, 1) 'the input matrix a transpose:'
                 write (io, 1) '   value (1 ... ',ne,
     >           ' ): numerical values'
                 write (io, 1) '   index (1 ... ',ne,
     >           ' ): column indices'
                 write (io, 1) '   index (',ne+1,' ... ',2*ne,
     >           ' ): row indices'
              endif 
              write (io, 1)
     >        'input matrix a transpose (entry: row, column, value):'
           else 

              if (prl .ge. 5) then 
                 write (io, 1) 'the input matrix a:'
                 write (io, 1) '   value (1 ... ',ne,
     >           ' ): numerical values'
                 write (io, 1) '   index (1 ... ',ne,
     >           ' ): row indices'
                 write (io, 1) '   index (',ne+1,' ... ',2*ne,
     >           ' ): column indices'
              endif 
              write (io, 1)
     >        'input matrix a (entry: row, column, value):'

           endif 

           prn = min (prmax, ne)
           if (prl .ge. 4) then 
              prn = ne
           endif 
           do 20 k = 1, prn 

              if (transa) then 
                 row = index (k+ne)
                 col = index (k)
              else 
                 row = index (k)
                 col = index (k+ne)
              endif 

              write (io, 2) k, row, col, value (k)
20         continue 
           if (prn .lt. ne) then 
              write (io, 7)
           endif 
        endif 

!-----------------------------------------------------------------------
!  print the lu factors:  umd2fa output, umd2rf input/output,
!                         and umd2so input
!-----------------------------------------------------------------------

        if (prlu .and. info (1) .lt. 0) then 
           write (io, 1)
     >     'lu factors not printed because of error flag, info (1) ='
     >     , info (1)
           prlu = .false.
        endif 

        if (prlu) then 

!          -------------------------------------------------------------
!          description of what must be preserved between calls
!          -------------------------------------------------------------

           lux1 = keep (1)
           lux2 = keep (2)
           lui1 = keep (3)
           luir1 = keep (4)
           lui2 = keep (5)

           xp1 = lux1
           ip1 = lui1
           ip2 = lui2

!          -------------------------------------------------------------
!          on input to umd2rf, only the symbol information is used
!          -------------------------------------------------------------

           symbol = who .eq. 2 .and. where .eq. 1

           if (prl .ge. 5) then 
              if (symbol) then 
                 write (io, 1)
     >           'keep (4...5) gives the location of lu factors'
                 write (io, 1)
     >           '   which must be preserved for calls to umd2rf: '
              else 
                 write (io, 1)
     >           'keep (1...5) gives the location of lu factors'
                 write (io, 1)
     >           '   which must be preserved for calls to umd2so: '
                 write (io, 1) '      value ( keep (1): ', lux1,
     >           ' ... keep (2): ', lux2,' )'
                 write (io, 1) '      index ( keep (3): ', lui1,
     >           ' ... keep (5): ', lui2,' )'
                 write (io, 1) '   and for calls to umd2rf: '
              endif 
              write (io, 1) '      index ( keep (4): ',luir1,
     >        ' ... keep (5): ', lui2,' )'
           endif 

           badlu = luir1 .le. 0 .or. lui2-6 .lt. luir1 .or.
     >        lui2 .gt. lindex
           if (.not. symbol) then 
              badlu = badlu .or. lux1 .le. 0 .or.
     >        lux1 .gt. lux2 .or. lux2 .gt. lvalue .or. lui1 .le. 0 .or.
     >        luir1 .lt. lui1 .or. luir1 .gt. lui2
           endif 

!          -------------------------------------------------------------
!          get the 7 scalars, and location of permutation vectors
!          -------------------------------------------------------------

           if (badlu) then 
!             pointers are bad, so these values cannot be obtained
              facne  = 0
              facn   = 0
              nz     = 0
              facjob = 0
              nblks  = 0
              nzoff  = 0
              factra = 0
           else 
              facne  = index (lui2)
              facn   = index (lui2-1)
              nz     = index (lui2-2)
              facjob = index (lui2-3)
              nblks  = index (lui2-4)
              nzoff  = index (lui2-5)
              factra = index (lui2-6)
           endif 

           presrv = facjob .ne. 0
           transa = factra .ne. 0
           rpermp = (lui2-6) - (facn)
           cpermp = rpermp - (facn)
           ip2 = cpermp - 1

           if (prl .ge. 5) then 
              if (symbol) then 
                 write (io, 1) 'layout of lu factors in index:'
              else 
                 write (io, 1)
     >           'layout of lu factors in value and index:'
              endif 
           endif 

!          -------------------------------------------------------------
!          get location of preserved input matrix
!          -------------------------------------------------------------

           if (presrv) then 
!             preserved column-form of original matrix
              app = ip1
              aip = app + (facn+1)
              ip1 = aip + (nz)
              axp = xp1
              xp1 = xp1 + (nz)
              if (prl .ge. 5 .and. .not. symbol) then 
                 write (io, 1)'   preserved copy of original matrix:'
                 write (io, 1)'      index ( ',app,' ... ', aip-1,
     >           ' ): column pointers'
                 write (io, 1)'      index ( ',aip,' ... ', ip1-1,
     >           ' ): row indices'
                 write (io, 1)'      value ( ',axp,' ... ', xp1-1,
     >           ' ): numerical values'
              endif 
           else 
              if (prl .ge. 5 .and. .not. symbol) then 
                 write (io, 1) '   original matrix not preserved.'
              endif 
           endif 

           badlu = badlu .or.
     >          n .ne. facn .or. nz .le. 0 .or. luir1 .gt. ip2 .or.
     >          nblks .le. 0 .or. nblks .gt. n
           if (.not. symbol) then 
              badlu = badlu .or. xp1 .gt. lux2 .or. nzoff .lt. 0
           endif 
           if (badlu) then 
              nblks = 0
           endif 

           if (nblks .le. 1) then 

!             ----------------------------------------------------------
!             single block (or block triangular form not used),
!             or lu factors are corrupted
!             ----------------------------------------------------------

              if (prl .ge. 5) then 
                 write (io, 1)
     >           '   collection of elements in lu factors:'
                 write (io, 1) '      index ( ',luir1,' ... ', ip2,
     >           ' ): integer data'
                 if (.not. symbol) then 
                    write (io, 1) '      value ( ',xp1,' ... ', lux2,
     >              ' ): numerical values'
                 endif 
              endif 

           else 

!             ----------------------------------------------------------
!             block triangular form with more than one block
!             ----------------------------------------------------------

              offip = ip1
              ip1 = ip1 + (nzoff)
              offxp = xp1
              xp1 = xp1 + (nzoff)
              offpp = cpermp - (n+1)
              blkpp = offpp - (nblks+1)
              lublpp = blkpp - (nblks)
              ip2 = lublpp - 1
              badlu = badlu .or. luir1 .gt. ip2
              if (.not. symbol) then 
                 badlu = badlu .or. ip1 .gt. ip2 .or.
     >           xp1 .gt. lux2 .or. luir1 .ne. ip1
              endif 

              if (prl .ge. 5) then 
                 write (io, 1)
     >           '   matrix permuted to upper block triangular form.'
                 if (nzoff .ne. 0 .and. .not. symbol) then 
                    write (io, 1)'   entries not in diagonal blocks:'
                    write (io, 1)'      index ( ',offip,' ... ',
     >              luir1-1, ' ): row indices'
                    write (io, 1)'      value ( ',offxp,' ... ',
     >              xp1-1, ' ): numerical values'
                 endif 
                 write (io, 1)
     >  '   collection of elements in lu factors of diagonal blocks:'
                 if (luir1 .le. lublpp-1) then 
                    write (io, 1) '      index ( ',luir1,' ... ',
     >              ip2, ' ): integer data'
                 endif 
                 if (xp1 .le. lux2 .and. .not. symbol) then 
                    write (io, 1) '      value ( ',xp1,' ... ', lux2,
     >              ' ): numerical values'
                 endif 
                 write (io, 1) '   other block triangular data:'
                 write (io, 1) '      index ( ',lublpp,' ... ',
     >           blkpp-1, ' ): pointers to block factors' 
                 write (io, 1) '      index ( ', blkpp,' ... ',
     >           offpp-1, ' ): index range of blocks'
                 if (.not. symbol) then 
                    write (io, 1) '      index ( ', offpp,' ... ',
     >              lui2-7,' ): off-diagonal row pointers'
                 endif 
              endif 

           endif 

!          -------------------------------------------------------------
!          print location of permutation vectors and 7 scalars at tail
!          -------------------------------------------------------------

           if (prl .ge. 5) then 
              write (io, 1)
     >        '   permutation vectors (start at keep(4)-2*n-6):'
              write (io, 1) '      index ( ',cpermp,' ... ',rpermp-1,
     >        ' ): column permutations'
              write (io, 1) '      index ( ',rpermp,' ... ',lui2-7,
     >        ' ): row permutations'
              write (io, 1) '   other data in index: '
              write (io, 1) '      index ( ',lui2-6,' ): ', factra,
     >        ' : transa umd2fa/umd2rf argument'
              write (io, 1) '      index ( ',lui2-5,' ): ', nzoff,
     >        ' : entries in off-diagonal part'
              write (io, 1) '      index ( ',lui2-4,' ): ', nblks,
     >        ' : number of diagonal blocks'
              write (io, 1) '      index ( ',lui2-3,' ): ', facjob,
     >        ' : job umd2fa/umd2rf argument'
              write (io, 1) '      index ( ',lui2-2,' ): ', nz,
     >        ' : entries in original matrix'
              write (io, 1) '      index ( ',lui2-1,' ): ', facn,
     >        ' : n umd2fa/umd2rf argument'
              write (io, 1) '      index ( ',lui2  ,' ): ', facne,
     >        ' : ne umd2fa/umd2rf argument'
           endif 

           if (.not. symbol) then 
              badlu = badlu .or. ip1 .ne. luir1
           endif 
           ip1 = luir1
           if (badlu) then 
              write (io, 1) 'lu factors uncomputed or corrupted!'
              presrv = .false.
              nblks = 0
           endif 

!          -------------------------------------------------------------
!          copy of original matrix in column-oriented form
!          -------------------------------------------------------------

           if (presrv .and. .not. symbol) then 
              write (io, 8)
              write (io, 1)
     >        'preserved copy of original matrix (stored by column),'
              write (io, 1) 'one entry per line (row index, value):'
              do 40 col = 1, n 
                 p1 = index (app-1 + col)
                 p2 = index (app-1 + col+1) - 1
                 write (io, 1) '   col: ', col
                 if (prl .eq. 3) then 
                    p2 = min (prmax, p2)
                 endif 
                 do 30 p = p1, p2 
                    write (io, 5) index (aip-1 + p), value (axp-1 + p)
30               continue 
                 if (prl .eq. 3 .and. p2 .ge. prmax) then 
!                   exit out of loop if done printing:
                    write (io, 7)
                    go to 50
                 endif 
40            continue 
!             loop exit label:
50            continue
           endif 

!          -------------------------------------------------------------
!          entries in off-diagonal blocks, in row-oriented form
!          -------------------------------------------------------------

           if (nblks .gt. 1 .and. .not. symbol) then 
              write (io, 8)
              write (io, 1)
     >        'entries not in diagonal blocks (stored by row):'
              write (io, 1) 'one entry per line (column index, value):'
              if (nzoff .eq. 0) then 
                 write (io, 1) '   (none)'
              endif 
              do 70 row = 1, n 
                 p1 = index (offpp-1 + row)
                 p2 = index (offpp-1 + row+1) - 1
                 if (p2 .ge. p1) then 
                    write (io, 1) '   row: ', row
                    if (prl .eq. 3) then 
                       p2 = min (prmax, p2)
                    endif 
                    do 60 p = p1, p2 
                       write (io, 5)
     >                 index (offip-1 + p), value (offxp-1 + p)
60                  continue 
                 endif 
                 if (prl .eq. 3 .and. p2 .ge. prmax) then 
!                   exit out of loop if done printing:
                    write (io, 7)
                    go to 80
                 endif 
70            continue 
!             loop exit label:
80            continue
           endif 

!          -------------------------------------------------------------
!          lu factors of each diagonal block
!          -------------------------------------------------------------

           write (io, 8)
           if (nblks .gt. 0) then 
              if (symbol) then 
                 write (io, 1) 'nonzero pattern of prior lu factors:'
              else 
                 write (io, 1) 'lu factors:'
              endif 
           endif 
           prn = 0
           do 200 blk = 1, nblks 

!             ----------------------------------------------------------
!             print the factors of a single diagonal block
!             ----------------------------------------------------------

              if (nblks .gt. 1) then 
                 k1 = index (blkpp-1 + blk)
                 k2 = index (blkpp-1 + blk+1) - 1
                 kn = k2-k1+1
                 sglton = kn .eq. 1
                 if (sglton) then 
!                   this is a singleton
                    luxxp = xp1-1 + index (lublpp-1 + blk)
                 else 
                    luiip = ip1-1 + index (lublpp-1 + blk)
                 endif 
                 if (blk .gt. 1) then 
                    write (io, 9)
                 endif 
              else 
                 sglton = .false.
                 k1 = 1
                 k2 = n
                 kn = n
                 luiip = ip1
              endif 

              if (sglton) then 

!                -------------------------------------------------------
!                this is a singleton
!                -------------------------------------------------------

                 if (prl .eq. 3 .and. prn .ge. prmax) then 
!                   exit out of loop if done printing:
                    write (io, 7)
                    go to 210
                 endif 
                 prn = prn + 1
                 if (symbol) then 
                    write (io, 1) 'block: ', blk,
     >              ' (singleton) at index : ', k1
                 else 
                    write (io, 4) 'block: ', blk,
     >              ' (singleton) at index : ', k1,'       value: ',
     >              value (luxxp)
                 endif 
                 if (prl .ge. 5) then 
                    write (io, 1) 'located in value ( ', luxxp,' )'
                 endif 

              else 

!                -------------------------------------------------------
!                this block is larger than 1-by-1
!                -------------------------------------------------------

                 luxxp = xp1-1 + index (luiip)
                 nlu = index (luiip+1)
                 npiv = index (luiip+2)
                 maxdc = index (luiip+3)
                 maxdr = index (luiip+4)
                 lupp = luiip+5
                 if (nblks .gt. 1) then 
                    write (io, 1) 'block: ',blk,' first index: ',k1,
     >              ' last index: ',k2
                 endif 
                 if (prl .ge. 5) then 
                    write (io, 1) 'elements: ', nlu, ' pivots: ', npiv
                    write (io, 1) 'largest contribution block: ',
     >                         maxdc, ' by ', maxdr
                    write (io, 1)'located in index ( ',luiip,' ... )'
                    if (.not. symbol) then 
                       write (io, 1) 'and in value ( ',luxxp,' ... )'
                    endif 
                 endif 
                 luiip = lupp + nlu

!                note: the indices of the lu factors of the block range
!                from 1 to kn, even though the kn-by-kn block resides in
!                a (k1 ... k2, k1 ... k2).
                 k = 0

                 do 190 e = 1, nlu 

!                   ----------------------------------------------------
!                   print a single element
!                   ----------------------------------------------------

                    luip = luiip-1 + index (lupp-1 + e)
                    luxp = luxxp-1 + index (luip)
                    luk  = index (luip+1)
                    ludegr = index (luip+2)
                    ludegc = index (luip+3)
                    lunson = index (luip+4)
                    ludimr = index (luip+5)
                    ludimc = index (luip+6)
                    lucp = luip + 7
                    lurp = lucp + ludegc
                    lusonp = lurp + ludegr
                    if (prl .ge. 5) then 
                       write (io, 1) '   e: ', e, ' pivots: ', luk
                       write (io, 1) '   children in dag: ', lunson,
     >                 ' frontal matrix: ', ludimr, ' by ', ludimc
                       endif 

!                   ----------------------------------------------------
!                   print the columns of l
!                   ----------------------------------------------------

                    p = luxp
                    do 140 j = 1, luk 
                       col = k+j
                       nzcol = luk-j+1+ludegc
                       write (io, 1) '      l, col: ', col
                       prn = prn + 1
                       row = col
                       if (symbol) then 
                          write (io, 5) row
                       else 
!                         l is unit diagonal:
                          write (io, 5) row, one
                       endif 
                       p = p + 1
!                      pivot block
                       do 120 i = j+1, luk 
                          if (prl .eq. 3 .and. prn .ge. prmax) then 
!                            exit out of loop if done printing:
                             write (io, 7)
                             go to 210
                          endif 
                          prn = prn + 1
                          row = k+i
                          if (symbol) then 
                             write (io, 5) row
                          else 
                             write (io, 5) row, value (p)
                          endif 
                          p = p + 1
120                    continue 
!                      l block
                       do 130 i = 1, ludegc 
                          if (prl .eq. 3 .and. prn .ge. prmax) then  
!                            exit out of loop if done printing:
                             write (io, 7)
                             go to 210
                          endif 
                          prn = prn + 1
                          row = index (lucp-1+i)
                          if (symbol) then 
                             write (io, 5) row
                          else 
                             write (io, 5) row, value (p)
                          endif 
                          p = p + 1
130                    continue 
                       p = p + j
140                 continue 

!                   ----------------------------------------------------
!                   print the rows of u
!                   ----------------------------------------------------

                    uxp = luxp + luk*(ludegc+luk)
                    do 170 i = 1, luk 
                       row = k+i
                       nzrow = luk-i+1+ludegr
                       write (io, 1) '      u, row: ', row
                       p = luxp + (i-1) + (i-1) * (ludegc+luk)
!                      pivot block
                       do 150 j = i, luk 
                          if (prl .eq. 3 .and. prn .ge. prmax) then 
!                            exit out of loop if done printing:
                             write (io, 7)
                             go to 210
                          endif 
                          prn = prn + 1
                          col = k+j
                          if (symbol) then 
                             write (io, 5) col
                          else 
                             write (io, 5) col, value (p)
                          endif 
                          p = p + (ludegc+luk)
150                    continue 
                       p = uxp
!                      u block
                       do 160 j = 1, ludegr 
                          if (prl .eq. 3 .and. prn .ge. prmax) then 
!                            exit out of loop if done printing:
                             write (io, 7)
                             go to 210
                          endif 
                          prn = prn + 1
                          col = index (lurp-1+j)
                          if (symbol) then 
                             write (io, 5) col
                          else 
                             write (io, 5) col, value (p)
                          endif 
                          p = p + luk
160                    continue 
                       uxp = uxp + 1
170                 continue 

!                   ----------------------------------------------------
!                   print the sons of the element in the assembly dag
!                   ----------------------------------------------------

                    if (prl .ge. 5) then 
                       do 180 i = 1, lunson 
                          prn = prn + 1
                          son = index (lusonp-1+i)
                          if (son .le. kn) then 
!                            an luson
                             write (io, 1) '      luson: ', son
                          else if (son .le. 2*kn) then 
!                            a uson
                             write (io, 1) '      uson:  ', son-kn
                          else 
!                            an lson
                             write (io, 1) '      lson:  ', son-2*kn
                          endif 
180                    continue 
                    endif 

!                   ----------------------------------------------------
!                   increment count of pivots within this block
!                   ----------------------------------------------------

                    k = k + luk
190              continue 
              endif 
200        continue 
!          loop exit label:
210        continue

!          -------------------------------------------------------------
!          row and column permutations
!          -------------------------------------------------------------

           if (.not. badlu) then 
              prn = min (prmax, n)
              if (prl .ge. 4) then 
!                print all of cperm and rperm
                 prn = n
              endif 
              write (io, 8)
              write (io, 1) 'column permutations'
              do 220 i = 1, prn 
                 write (io, 5) index (cpermp+i-1)
220           continue 
              if (prn .lt. n) then 
                 write (io, 7)
              endif 
              write (io, 8)
              write (io, 1) 'row permutations'
              do 230 i = 1, prn 
                 write (io, 5) index (rpermp+i-1)
230           continue 
              if (prn .lt. n) then 
                 write (io, 7)
              endif 
           endif 

        endif 

!-----------------------------------------------------------------------
!  print b (on input) or w and x (on output) for umd2so
!-----------------------------------------------------------------------

        if (who .eq. 3) then 
           write (io, 8)
           prn = min (prmax, n)
           if (prl .ge. 4) then 
!             print all of b, or w and x
              prn = n
           endif 
           if (where .eq. 1) then 
              write (io, 1) 'w (1 ... ',lw,
     >        ' ), work vector: not printed'
              write (io, 1) 'b (1 ... ',n,' ), right-hand side: '
              do 240 i = 1, prn 
                 write (io, 5) i, b (i)
240           continue 
              if (prn .lt. n) then 
                 write (io, 7)
              endif 
           else 
              if (info (1) .lt. 0) then 
                 write (io, 1) 'w (1 ... ',lw,' ), work vector, and'
                 write (io, 1) 'x (1 ... ',n, ' ), solution,'
                 write (io, 1)
     >           '   not printed because of error flag, info (1) = ',
     >           info (1)
              else 
                 if (lw .eq. 4*n) then 
!                   umd2so did iterative refinement
                    write (io, 1) 'w (1 ... ',n,' ), residual: '
                    do 250 i = 1, prn 
                       write (io, 5) i, w (i)
250                 continue 
                    if (prn .lt. n) then 
                       write (io, 7)
                    endif 
                    write (io, 1) 'w (',n+1,' ... ',lw,
     >              ' ), work vector: not printed'
                 else 
!                   no iterative refinement
                    write (io, 1) 'w (1 ... ',lw,
     >              ' ), work vector: not printed'
                 endif 
                 write (io, 1) 'x (1 ... ',n,' ), solution: '
                 do 260 i = 1, prn 
                    write (io, 5) i, x (i)
260              continue 
                 if (prn .lt. n) then 
                    write (io, 7)
                 endif 
              endif 
           endif 
        endif 

!-----------------------------------------------------------------------
!  who is this, and where:
!-----------------------------------------------------------------------

        if (who .eq. 1) then 
           if (where .eq. 1) then 
              write (io, 6) 'end of umd2fa input '
           else 
              write (io, 6) 'end of umd2fa output'
           endif 
        else if (who .eq. 2) then 
           if (where .eq. 1) then 
              write (io, 6) 'end of umd2rf input '
           else 
              write (io, 6) 'end of umd2rf output'
           endif 
        else if (who .eq. 3) then 
           if (where .eq. 1) then 
              write (io, 6) 'end of umd2so input '
           else 
              write (io, 6) 'end of umd2so output'
           endif 
        endif 

        return

!=======================================================================
!  format statments
!=======================================================================

1       format (' ', a, :, i12, :, a, :, i12, :,
     >               a, :, i12, :, a, :, i12, :, a, :, i12)
2       format (' ', i12, ': ', i12, ' ', i12, ' ',
     >          d11.4)
3       format (' ', a, d11.4, a)
4       format (' ', a, i12, a, i12, /, a,
     >          d11.4)
5       format (' ', i12, :, ': ',
     >          d11.4)
6       format (' ', 59('='), a)
7       format ('    ...')
8       format (' ', 79 ('-'))
9       format (' ', 79 ('.'))
        end subroutine umd2p1
        subroutine umd2p2 (who, error, i, j, x, io)
        integer who, error, i, j, io
        real(fltp)
     >          x
  
!=== umd2p2 ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable

!=======================================================================
!  description:
!=======================================================================
!
!  print error and warning messages for for umd2fa, umd2rf, and umd2so.

!=======================================================================
!  installation note:
!=======================================================================
!
!  this routine can be deleted on installation (replaced with a dummy
!  routine that just returns without printing) in order to completely
!  disable the printing of all error and warning messages.  the error
!  and warning return flag (info (1)) will not be affected.  to
!  completely disable all i/o, you can also replace the umd2p1 routine
!  with a dummy subroutine.  if you make this modification, please do
!  not delete any original code - just comment it out instead.  add a
!  comment and date to your modifications.

!=======================================================================
!  input:
!=======================================================================
!
!       who:            what user-callable routine called umd2p2:
!                       1: umd2fa, 2: umd2rf, 3: umd2so
!       i, j, x:        the relevant offending value(s)
!       io:             i/o unit on which to print.  no printing
!                       occurs if < 0.
!       error:          the applicable error (<0) or warning (>0)
!                       errors (<0) cause the factorization/solve to
!                       be terminated.  if an error occurs, a prior
!                       warning status is overwritten with the error
!                       status.
!
!  the following error codes are returned in info (1) by umd2er.
!  these errors cause the factorization or solve to terminate:
!
!  where**      error   description
! 
!  fa rf  -     -1      n < 1
!  fa rf  -     -2      ne < 1 or ne > maximum value
!  fa rf  -     -3      lindex too small
!  fa rf  -     -4      lvalue too small
!  fa rf  -     -5      both lindex and lvalue are too small
!   - rf  -     -6      prior pivot ordering no longer acceptable
!   - rf so     -7      lu factors are uncomputed, or are corrupted

!
!  the following warning codes are returned in info (1) by umd2er.
!  the factorization or solve was able to complete:
!
!  fa rf  -     1       invalid entries
!  fa rf  -     2       duplicate entries
!  fa rf  -     3       invalid and duplicate entries
!  fa rf  -     4       singular matrix
!  fa rf  -     5       invalid entries, singular matrix
!  fa rf  -     6       duplicate entries, singular matrix
!  fa rf  -     7       invalid and duplicate entries, singular matrix
!   -  - so     8       iterative refinement cannot be done
!
!  the following are internal error codes (not returned in info (1))
!  for printing specific invalid or duplicate entries.  these codes are
!  for umd2co, umd2of, and umd2r2.  invalid entries are ignored, and
!  duplicate entries are added together (and the factorization
!  continues).  warning levels (1..7) will be set later by umd2er,
!  above.
!
!  fa rf  -     99      invalid entry, out of range 1..n
!  fa rf  -     98      duplicate entry
!   - rf  -     97      invalid entry:  within a diagonal block, but not
!                       in the pattern of the lu factors of that block.
!   - rf  -     96      invalid entry:  below the diagonal blocks.  can
!                       only occur if the matrix has been ordered into
!                       block-upper-triangular form.
!   - rf  -     95      invalid entry:  matrix is singular.  the
!                       remaining rank 0 submatrix yet to be factorized
!                       is replaced with the identity matrix in the lu
!                       factors.  any entry that remains is ignored.

! ** fa: umd2fa, rf: umd2rf, so: umd2so

!=======================================================================
!  output:
!=======================================================================
!
!  error or warning message printed on i/o unit

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutines:  umd2er, umd2co, umd2r0, umd2r2

!=======================================================================
!  executable statements:
!       if (printing disabled on installation) return
!=======================================================================

        if (io .lt. 0) then 
!          printing of error / warning messages has not been requested
           return
        endif 

        if (who .eq. 1) then 

!          -------------------------------------------------------------
!          umd2fa error messages
!          -------------------------------------------------------------

           if (error .eq. -1) then 
              write (io, 1) 'umd2fa: n less than one!'
           else if (error .eq. -2) then 
              write (io, 1) 'umd2fa: ne less than one!'
           else if (error .eq. -3) then 
              write (io, 1)
     >        'umd2fa: lindex too small!  must be greater than ', i
           else if (error .eq. -4) then 
              write (io, 1)
     >        'umd2fa: lvalue too small!  must be greater than ', i

!          -------------------------------------------------------------
!          umd2fa cumulative warning messages
!          -------------------------------------------------------------

           else if (error .eq. 1) then 
              write (io, 1) 'umd2fa: ', i,
     >        ' invalid entries ignored (out of range 1..n).'
           else if (error .eq. 2) then 
              write (io, 1) 'umd2fa: ', i,' duplicate entries summed.'
           else if (error .eq. 4) then 
              write (io, 1)
     >        'umd2fa: matrix is singular.  only ', i, ' pivots found.'

!          -------------------------------------------------------------
!          umd2fa non-cumulative warning messages (internal error codes)
!          -------------------------------------------------------------

           else if (error .eq. 99) then 
              write (io, 2)
     >        'umd2fa: invalid entry (out of range 1..n):', i, j, x
           else if (error .eq. 98) then 
              write (io, 2)
     >        'umd2fa: duplicate entry summed:', i, j, x

           endif 

        else if (who .eq. 2) then 

!          -------------------------------------------------------------
!          umd2rf error messages
!          -------------------------------------------------------------

           if (error .eq. -1) then 
              write (io, 1) 'umd2rf: n less than one!'
           else if (error .eq. -2) then 
              if (i .lt. 0) then 
                 write (io, 1) 'umd2rf: ne less than one!'
              else 
                 write (io, 1)
     >           'umd2rf: ne too large!  must be less than ', i
              endif 
           else if (error .eq. -3) then 
              write (io, 1)
     >        'umd2rf: lindex too small!  must be greater than ', i
           else if (error .eq. -4) then 
              write (io, 1)
     >        'umd2rf: lvalue too small!  must be greater than ', i
           else if (error .eq. -6) then 
              write (io, 1) 'umd2rf: pivot order from umd2fa failed!'
           else if (error .eq. -7) then 
              write (io, 1)
     >        'umd2rf: lu factors uncomputed or corrupted!'

!          -------------------------------------------------------------
!          umd2rf cumulative warning messages
!          -------------------------------------------------------------

           else if (error .eq. 1) then 
              if (i .gt. 0) then 
                 write (io, 1) 'umd2rf: ', i,
     >           ' invalid entries ignored (out of range 1..n).'
              else 
                 write (io, 1) 'umd2rf: ',-i,
     >           ' invalid entries ignored (not in prior pattern).'
              endif 
           else if (error .eq. 2) then 
              write (io, 1) 'umd2rf: ', i,' duplicate entries summed.'
           else if (error .eq. 4) then 
              write (io, 1) 'umd2rf: matrix is singular.  only ', i,
     >        ' pivots found.'

!          -------------------------------------------------------------
!          umd2rf non-cumulative warning messages (internal error codes)
!          -------------------------------------------------------------

           else if (error .eq. 99) then 
              write (io, 2)
     >        'umd2rf: invalid entry (out of range 1..n):', i, j, x
           else if (error .eq. 98) then 
              write (io, 2)
     >        'umd2rf: duplicate entry summed:', i, j, x
           else if (error .eq. 97) then 
              write (io, 2)
     >        'umd2rf: invalid entry (not in pattern of prior factors)',
     >        i, j, x
           else if (error .eq. 96) then 
              write (io, 2)
     >        'umd2rf: invalid entry (below diagonal blocks):', i, j, x
           else if (error .eq. 95) then 
              write (io, 2)
     >        'umd2rf: invalid entry (prior matrix singular):', i, j, x

           endif 

        else if (who .eq. 3) then 

!          -------------------------------------------------------------
!          umd2so error messages
!          -------------------------------------------------------------

           if (error .eq. -7) then 
              write (io, 1)
     >        'umd2so: lu factors uncomputed or corrupted!'

!          -------------------------------------------------------------
!          umd2so non-cumulative warning messages
!          -------------------------------------------------------------

           else if (error .eq. 8) then 
              if (i .eq. 0) then 
                 write (io, 1)
     >  'umd2so: no iterative refinement: original matrix not preserved'
              else 
                 write (io, 1)
     >  'umd2so: no iterative refinement: only for ax=b or a''x=b'
              endif 

           endif 

        endif 

        return

!=======================================================================
!  format statments
!=======================================================================

1       format (' ', a, :, i12, :, a)
2       format (' ', a,/, '    row: ', i12, ' col: ', i12, ' ',
     >          d11.4)
        end subroutine umd2p2
        subroutine umd2r0 (n, nz, cp, xx, xsize, ii, isize, xtail,
     >          itail, iuse, xuse, nzoff, nblks, icntl, cntl, info,
     >          rinfo, presrv, ap, ai, ax, an, anz, lui, luisiz,
     >          lublkp, blkp, offp, on, cperm, rperm, ne)
        integer n, nz, isize, ii (isize), icntl (20), info (40),
     >          cp (n+1), xsize, xtail, itail, iuse, xuse, an, anz,
     >          ap (an+1), ai (anz), luisiz, lui (luisiz), nblks,
     >          lublkp (nblks), blkp (nblks+1), on, offp (on+1),
     >          cperm (n), rperm (n), nzoff, ne
        logical presrv
        real(fltp)
     >          xx (xsize), ax (anz)
        real(fltp)
     >          cntl (10), rinfo (20)
  
!=== umd2r0 ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  refactorize an unsymmetric sparse matrix in column-form, optionally
!  permuting the matrix to upper block triangular form and factorizing
!  each diagonal block.

!=======================================================================
!  input:
!=======================================================================
!
!       n:              order of matrix
!       nz:             entries in matrix
!       cp (1..n+1):    column pointers of input matrix
!       presrv:         if true then preserve original matrix
!       xsize:          size of xx
!       isize:          size of ii
!       iuse:           memory usage in index on input
!       xuse:           memory usage in value on input
!       icntl:          integer control parameters, see umd21i
!       cntl:           real control parameters, see umd21i
!
!       if presrv is true:
!           an:                 = n, order of preserved matrix
!           anz:                = anz, order of preserved matrix
!           ap (1..an+1):       column pointers of preserved matrix
!           ai (1..nz):         row indices of preserved matrix
!           ax (1..nz):         values of preserved matrix
!           ii:                 unused on input
!           xx:                 unused on input
!       else
!           an:                 0
!           anz:                1
!           ap:                 unused
!           ai:                 unused
!           ax:                 unused
!           ii (1..nz):         row indices of input matrix
!           xx (1..nz):         values of input matrix
!
!       information from prior lu factorization:
!
!       luisiz:                 size of lui
!       lui (1..luisiz):        patterns of lu factors, excluding
!                               prior preserved matrix (if it existed)
!                               and prior off-diagonal part (if it
!                               existed)
!       cperm (1..n):           column permutations
!       rperm (1..n):           row permutations
!       nblks:                  number of diagonal blocks for btf
!       if nblks > 1:
!           lublkp (1..nblks):  pointers to each diagonal lu factors
!           blkp (1..nblks+1):  index range of diagonal blocks

!=======================================================================
!  output: 
!=======================================================================
!
!       xx (xtail ... xsize), xtail:
!
!                       lu factors are located in xx (xtail ... xsize),
!                       including values in off-diagonal part if matrix
!                       was permuted to block triangular form.
!
!       ii (itail ... isize), itail:
!
!                       the off-diagonal nonzeros, if nblks > 1
!
!       offp (1..n+1):  row pointers for off-diagonal part, if nblks > 1
!       info:           integer informational output, see umd2fa
!       rinfo:          real informational output, see umd2fa

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   umd2rf
!       subroutines called:     umd2er, umd2r2, umd2ra, umd2of
!       functions called:       max
        intrinsic max

!=======================================================================
!  local scalars:
!=======================================================================

        integer i, nzdia, p, ihead, nsgltn, nsym, wp, arip, arxp, npiv,
     >   wrksiz, nlu, prp, mc, mr, dummy1, dummy2, nz2, k, blk, k1, k2, kn,
     >   nzblk, col, row, prl, io, luip, mnz, arnz, zero_array(1),
     >   xhead, offip, offxp, noutsd, nbelow, nzorig, xrmax
        real(fltp)
     >          a

!  printing control:
!  -----------------
!  io:      i/o unit for diagnostic messages
!  prl:     printing level
!
!  allocated array pointers:
!  -------------------------
!  wp:      w (1..n+1), or w (1..kn+1), work array located in ii (wp...)
!  prp:     pr (1..n) work array located in ii (prp..prp+n-1)
!  arip:    ari (1..nz) array located in ii (arip..arip+nz-1)
!  arxp:    arx (1..nz) array located in xx (arxp..arxp+nz-1)
!  offip:   offi (1..nzoff) array located in ii (offip..offip+nzoff-1)
!  offxp:   offx (1..nzoff) array located in xx (offxp..offip+nzoff-1)
!
!  arrowhead-form matrix:
!  ----------------------
!  nz2:     number of entries in arrowhead matrix
!  nzblk:   number of entries in arrowhead matrix of a single block
!  arnz:    arrowhead form of blocks located in ii/xx (1..arnz)
!
!  btf information:
!  ----------------
!  k1:      starting index of diagonal block being factorized
!  k2:      ending index of diagonal block being factorized
!  kn:      the order of the diagonal block being factorized
!  blk:     block number of diagonal block being factorized
!  nsgltn:  number of 1-by-1 diagonal blocks ("singletons")
!  a:       numerical value of a singleton
!  mnz:     nzoff
!  noutsd:  entries in diagonal blocks, but not in lu (invalid)
!  nbelow:  entries below diagonal blocks (invalid)
!  nzoff:   entries above diagonal blocks (valid)
!  nzdia:   entries in diagonal blocks (valid)
!  nzorig:  total number of original entries
!
!  memory usage:
!  -------------
!  xhead:   xx (1..xhead-1) is in use, xx (xhead..xtail-1) is free
!  ihead:   ii (1..ihead-1) is in use, ii (ihead..itail-1) is free
!  wrksiz:  total size of work arrays need in ii for call to umd2r2
!  xrmax:   memory needed in value for next call to umd2rf
!
!  symbolic information and pattern of prior lu factors:
!  -----------------------------------------------------
!  nlu:     number of elements in a diagonal block
!  luip:    integer part of lu factors located in lui (luip...)
!  mr,mc:   largest frontal matrix for this diagonal block is mc-by-mr
!
!  other:
!  ------
!  k:       loop index (kth pivot)
!  i:       loop index
!  row:     row index
!  col:     column index
!  p:       pointer
!  nsym:    number of symmetric pivots chosen
!  dummy1:  argument returned by umd2ra, but not needed
!  dummy2:  argument returned by umd2ra, but not needed

!=======================================================================
!  executable statements:
!=======================================================================
        zero_array(1) = 0
        io = icntl (2)
        prl = icntl (3)
        nzorig = nz

        if (presrv) then 
!          original matrix is not in cp/ii/xx, but in ap/ai/ax:
           ihead = 1
           xhead = 1
        else 
           ihead = nz + 1
           xhead = nz + 1
        endif 

        nzoff = 0
        nzdia = nz
        nsgltn = 0
        npiv = 0
        noutsd = 0
        nbelow = 0
        itail = isize + 1
        xtail = xsize + 1

!-----------------------------------------------------------------------
! current memory usage:
!-----------------------------------------------------------------------

!       if .not. presrv then
!               input matrix is now in ii (1..nz) and xx (1..nz)
!                       col pattern: ii (cp (col) ... cp (col+1))
!                       col values:  xx (cp (col) ... cp (col+1))
!               total: nz+n+1 integers, nz reals
!       else
!               input matrix is now in ai (1..nz) and ax (1..nz)
!                       col pattern: ai (ap (col) ... ap (col+1))
!                       col values:  ax (ap (col) ... ap (col+1))
!               cp is a size n+1 integer workspace
!               total: nz+2*(n+1) integers, nz reals
!
!       if (nblks > 1) then
!                       lublkp (1..nblks)
!                       blkp (1..nblks+1)
!                       offp (1..n+1)
!               total: (2*nblks+n+2) integers
!
!       cperm (1..n) and rperm (1..n)
!               total: 2*n integers
!
!   grand total current memory usage (including ii,xx,cp,ai,ap,ax
!       and lui):
!
!       presrv  nblks>1 integers, iuse = 
!       f       f       luisiz + nz+  (n+1)+(2*n+7)
!       f       t       luisiz + nz+  (n+1)+(2*n+7)+(2*nblks+n+2)
!       t       f       luisiz + nz+2*(n+1)+(2*n+7)
!       t       t       luisiz + nz+2*(n+1)+(2*n+7)+(2*nblks+n+2)
!
!   real usage is xuse = nz

!-----------------------------------------------------------------------
!  get memory usage estimate for next call to umd2rf
!-----------------------------------------------------------------------

        xrmax = 2*ne

!-----------------------------------------------------------------------
!  convert matrix into arrowhead format (unless btf and preserved)
!-----------------------------------------------------------------------

        if (nblks .gt. 1 .and. presrv) then 

!          -------------------------------------------------------------
!          btf is to be used, and original matrix is to be preserved.
!          it is converted and factorized on a block-by-block basis,
!          using the inverse row permutation (computed and stored in
!          offp (1..n)
!          -------------------------------------------------------------

           do 10 k = 1, n 
              offp (rperm (k)) = k
10         continue 

        else 

!          -------------------------------------------------------------
!          convert the entire input matrix to arrowhead form
!          -------------------------------------------------------------

!          -------------------------------------------------------------
!          allocate workspace: w (n+1), pr (n), ari (nz), arx (nz)
!          -------------------------------------------------------------

           itail = itail - (2*n+1)
           iuse = iuse + 2*n+1
           prp = itail
           wp = prp + n
           iuse = iuse + nz
           xuse = xuse + nz
           arxp = xhead
           arip = ihead
           ihead = ihead + nz
           xhead = xhead + nz
           info (18) = max (info (18), iuse)
           info (20) = max (info (20), xuse)
           info (21) = max (info (21), xuse)
           if (ihead .gt. itail .or. xhead .gt. xtail) then 
!             error return, if not enough integer and/or real memory:
              go to 9000
           endif 

!          -------------------------------------------------------------
!          convert
!          -------------------------------------------------------------

           if (nblks .eq. 1) then 
              if (presrv) then 
                 call umd2ra (presrv, n, nz, cperm, rperm, ii (prp),
     >              ii (wp), nblks, xx (arxp), ii (arip), nzoff, nzdia,
     >              icntl, ap, blkp, ai, ax, info, offp, on, nz,
     >              0, n, nz2, i)
              else 
                 call umd2ra (presrv, n, nz, cperm, rperm, ii (prp),
     >              ii (wp), nblks, xx (arxp), ii (arip), nzoff, nzdia,
     >              icntl, cp, blkp, ii, xx, info, offp, on, nz,
     >              0, n, nz2, i)
              endif 
           else 
!             note that presrv is false in this case
              call umd2ra (presrv, n, nz, cperm, rperm, ii (prp),
     >           ii (wp), nblks, xx (arxp), ii (arip), nzoff, nzdia,
     >           icntl, cp, blkp, ii, xx, info, offp, on, nz,
     >           0, n, nz2, nbelow)
           endif 

!          -------------------------------------------------------------
!          copy the arrowhead pointers from w (1..n+1) to cp (1..n+1)
!          -------------------------------------------------------------

           do 20 i = 1, n+1 
              cp (i) = ii (wp+i-1)
20         continue 

!          -------------------------------------------------------------
!          deallocate w and pr.  if not presrv deallocate ari and arx
!          -------------------------------------------------------------

           iuse = iuse - (2*n+1)
           if (.not. presrv) then 
!             ari and arx have been deallocated.
              xuse = xuse - nz
              iuse = iuse - nz
           endif 
           itail = isize + 1
           xtail = xsize + 1
           nz = nz2
           ihead = nz + 1
           xhead = nz + 1

        endif 

        info (5) = nz
        info (6) = nzdia
        info (7) = nzoff
        info (4) = nbelow

!-----------------------------------------------------------------------
!  refactorization
!-----------------------------------------------------------------------

!       ----------------------------------------------------------------
!       if nblks=1
!          arrowhead form is now stored in ii (1..nz) and xx (1..nz)
!          in reverse pivotal order (arrowhead n, n-1, ..., 2, 1).
!          the arrowhead form will be overwritten.
!       else if not presrv
!          off-diagonal part is in ii (1..nzoff), xx (1..nzoff),
!          (with row pointers offp (1..n+1)) followed by each diagonal
!          block (block 1, 2, ... nblks) in ii/xx (nzoff+1...nz).
!          each diagonal block is in arrowhead form, and in
!          reverse pivotal order (arrowhead k2, k2-1, ..., k1-1, k1).
!          the arrowhead form will be overwritten.
!       else (nblks > 1 and presrv)
!          ii and xx are still empty.  original matrix is in ap, ai,
!          and ax.  inverse row permutation (pr) is in offp (1..n).
!          the arrowhead form is not yet computed.
!       ----------------------------------------------------------------

        if (nblks .eq. 1) then 

!          -------------------------------------------------------------
!          refactorize the matrix as a single block
!          -------------------------------------------------------------

           nlu = lui (2)
           mc = lui (4)
           mr = lui (5)
           wrksiz = 2*n + mr + 3*mc + 4*(nlu+2)
           itail = itail - wrksiz
           iuse = iuse + wrksiz
           p = itail
           info (18) = max (info (18), iuse)
           if (ihead .gt. itail) then 
!             error return, if not enough integer memory:
              go to 9000
           endif 

           call umd2r2 (cp, nz, n, xtail,
     >          xx, xsize, xuse, ii, cperm, rperm,
     >          icntl, cntl, info, rinfo, mc, mr,
     >          ii (p), ii (p+n), ii (p+2*n), ii (p+2*n+mr),
     >          ii (p+2*n+mr+mc), ii (p+2*n+mr+2*mc),
     >          ii (p+2*n+mr+3*mc), ii (p+2*n+mr+3*mc+(nlu+2)),
     >          ii (p+2*n+mr+3*mc+2*(nlu+2)),
     >          ii (p+2*n+mr+3*mc+3*(nlu+2)),
     >          nlu, lui (6), lui (nlu+6), noutsd,
     >          xrmax)

           if (info (1) .lt. 0) then 
!             error return, if not enough real memory or bad pivot found
              go to 9010
           endif 

!          -------------------------------------------------------------
!          deallocate workspace and original matrix (reals already done)
!          -------------------------------------------------------------

           iuse = iuse - wrksiz - nz
           itail = itail + wrksiz
           lui (1) = 1
           ihead = 1
           xhead = 1

        else 

!          -------------------------------------------------------------
!          refactorize the block-upper-triangular form of the matrix
!          -------------------------------------------------------------

           if (presrv) then 
!             count the entries in off-diagonal part
              nzoff = 0
           endif 

           do 70 blk = nblks, 1, -1 

!             ----------------------------------------------------------
!             factorize the kn-by-kn block, a (k1..k2, k1..k2)
!             ----------------------------------------------------------

!             get k1 and k2, the start and end of this block
              k1 = blkp (blk)
              k2 = blkp (blk+1) - 1
              kn = k2-k1+1
              a = 0

!             ----------------------------------------------------------
!             get pointers to, or place the block in, arrowhead form
!             ----------------------------------------------------------

              if (presrv) then 

                 if (kn .gt. 1) then 

!                   ----------------------------------------------------
!                   convert a single block to arrowhead format, using
!                   the inverse row permutation stored in offp
!                   ----------------------------------------------------

!                   ----------------------------------------------------
!                   compute nzblk, allocate ii/xx (1..nzblk), w(1..kn+1)
!                   ----------------------------------------------------

                    nzblk = 0
                    do 40 k = k1, k2 
                       col = cperm (k)
!fpp$ nodepchk l
                       do 30 p = ap (col), ap (col+1) - 1 
                          row = offp (ai (p))
                          if (row .lt. k1) then 
!                            entry in off-diagonal part
                             nzoff = nzoff + 1
                          else if (row .le. k2) then 
!                            entry in diagonal block
                             nzblk = nzblk + 1
                          endif 
30                     continue 
40                  continue 

                    itail = itail - (kn+1)
                    wp = itail
                    ihead = nzblk + 1
                    xhead = nzblk + 1
                    iuse = iuse + nzblk + kn+1
                    xuse = xuse + nzblk
                    xrmax = max (xrmax, xuse)
                    info (18) = max (info (18), iuse)
                    info (20) = max (info (20), xuse)
                    info (21) = max (info (21), xuse)
                    if (ihead .gt. itail .or. xhead .gt. xtail) then 
!                      error return, if not enough integer
!                      and/or real memory:
                       go to 9000
                    endif 

!                   ----------------------------------------------------
!                   convert blk from column-form in ai/ax to arrowhead
!                   form in ii/xx (1..nzblk)
!                   ----------------------------------------------------

                    call umd2ra (presrv, n, nz, cperm, rperm, offp,
     >                 ii (wp), nblks, xx, ii, dummy1, dummy2,
     >                 icntl, ap, blkp, ai, ax, info, zero_array, 0, nzblk,
     >                 blk, kn, nz2, i)

!                   ----------------------------------------------------
!                   copy the arrowhead pointers from w (1..kn+1)
!                   to cp (k1 ... k2+1)
!                   ----------------------------------------------------

                    do 50 i = 0, kn 
                       cp (k1+i) = ii (wp+i)
50                  continue 

!                   cp (k1) is nzblk + 1 and cp (k2+1) is 1

!                   ----------------------------------------------------
!                   deallocate w (1..kn+1)
!                   ----------------------------------------------------

                    iuse = iuse - (kn+1)
                    itail = itail + (kn+1)

                 else 

!                   ----------------------------------------------------
!                   get the value of singleton at a (k1,k1) if it exists
!                   ----------------------------------------------------

!                   find the diagonal entry in the unpermuted matrix,
!                   and count the entries in the diagonal and
!                   off-diagonal blocks.
                    col = cperm (k1)
                    do 60 p = ap (col), ap (col + 1) - 1 
!                      inverse row permutation is stored in offp
                       row = offp (ai (p))
                       if (row .lt. k1) then 
                          nzoff = nzoff + 1
                       else if (row .eq. k1) then 
                          a = ax (p)
!                      else 
!                         this is an invalid entry, below the diagonal
!                         block.  it will be detected (and optionally
!                         printed) in the call to umd2of below.
                       endif 
60                  continue 

                    ihead = 1
                    xhead = 1
                 endif 

              else 

!                -------------------------------------------------------
!                the block is located in ii/xx (cp (k2+1) ... cp (k1)-1)
!                and has already been converted to arrowhead form
!                -------------------------------------------------------

                 if (blk .eq. 1) then 
!                   this is the last block to factorize
                    cp (k2+1) = nzoff + 1
                 else 
                    cp (k2+1) = cp (blkp (blk-1))
                 endif 

                 ihead = cp (k1)
                 xhead = ihead

                 if (kn .eq. 1) then 
!                   singleton block in ii/xx (cp (k1+1) ... cp (k1)-1)
                    if (cp (k1) .gt. cp (k1+1)) then 
                       a = xx (cp (k1) - 1)
                       ihead = ihead - 1
                       xhead = xhead - 1
                       iuse = iuse - 1
                       xuse = xuse - 1
                    endif 
                 endif 
                    
              endif 

!             ----------------------------------------------------------
!             factor the block
!             ----------------------------------------------------------

              if (kn .gt. 1) then 

!                -------------------------------------------------------
!                the a (k1..k2, k1..k2) block is not a singleton.
!                block is now in ii/xx (cp (k2+1) ... cp (k1)-1), in
!                arrowhead form, and is to be overwritten with lu
!                -------------------------------------------------------

                 arnz = cp (k1) - 1

!                if (presrv) then 
!                   ii/xx (1..arnz) holds just the current block, blk
!                else 
!                   ii/xx (1..arnz) holds the off-diagonal part, and
!                   blocks 1..blk, in that order.
!                endif 

                 luip = lublkp (blk)
!                luxp = lui (luip), not needed for refactorization
                 nlu = lui (luip+1)
!                npiv = lui (luip+2), not needed for refactorization
                 mc = lui (luip+3)
                 mr = lui (luip+4)
                 wrksiz = 2*kn + mr + 3*mc + 4*(nlu+2)
                 itail = itail - wrksiz
                 iuse = iuse + wrksiz
                 p = itail
                 info (18) = max (info (18), iuse)
                 if (ihead .gt. itail) then 
!                   error return, if not enough integer memory:
                    go to 9000
                 endif 

                 call umd2r2 (cp (k1), arnz, kn, xtail,
     >                xx, xtail-1, xuse, ii, cperm (k1), rperm (k1),
     >                icntl, cntl, info, rinfo, mc, mr,
     >                ii (p), ii (p+kn), ii (p+2*kn), ii (p+2*kn+mr),
     >                ii (p+2*kn+mr+mc), ii (p+2*kn+mr+2*mc),
     >                ii (p+2*kn+mr+3*mc), ii (p+2*kn+mr+3*mc+(nlu+2)),
     >                ii (p+2*kn+mr+3*mc+2*(nlu+2)),
     >                ii (p+2*kn+mr+3*mc+3*(nlu+2)),
     >                nlu, lui (luip+5), lui (luip+nlu+5), noutsd,
     >                xrmax)

                 if (info (1) .lt. 0) then 
!                   error return, if not enough real memory or bad pivot
                    go to 9010
                 endif 

!                -------------------------------------------------------
!                deallocate workspace and original matrix (reals
!                already deallocated in umd2r2)
!                -------------------------------------------------------

                 iuse = iuse - wrksiz
                 itail = itail + wrksiz
                 lui (luip) = xtail
                 iuse = iuse - (ihead - cp (k2+1))
                 ihead = cp (k2+1)
                 xhead = ihead

              else 

!                -------------------------------------------------------
!                factor the singleton a (k1,k1) block, in a
!                -------------------------------------------------------

                 nsgltn = nsgltn + 1
                 if (abs (a) .eq. 0) then 
!                   this is a singular matrix, replace with 1-by-1
!                   identity matrix.
                    a = 1
                 else 
!                   increment pivot count
                    npiv = npiv + 1
                 endif 
                 xtail = xtail - 1
                 xuse = xuse + 1
                 xrmax = max (xrmax, xuse)
                 info (20) = max (info (20), xuse)
                 info (21) = max (info (21), xuse)
!                note: if the matrix is not preserved and nonsingular
!                then we will not run out of memory
                 if (xhead .gt. xtail) then 
!                   error return, if not enough real memory:
                    go to 9000
                 endif 

!                -------------------------------------------------------
!                store the 1-by-1 lu factors
!                -------------------------------------------------------

                 xx (xtail) = a
                 lublkp (blk) = -xtail

              endif 
70         continue 

!          -------------------------------------------------------------
!          make the index of each block relative to start of lu factors
!          -------------------------------------------------------------
!fpp$ nodepchk l
           do 80 blk = 1, nblks 
              if (lublkp (blk) .gt. 0) then 
                 lui (lublkp (blk)) = lui (lublkp (blk)) - xtail + 1
              else 
!                this is a singleton
                 lublkp (blk) = (-lublkp (blk)) - xtail + 1
              endif 
80         continue 

!          -------------------------------------------------------------
!          store the off-diagonal blocks
!          -------------------------------------------------------------

           if (presrv) then 

!             ----------------------------------------------------------
!             allocate temporary workspace for pr (1..n) at head of ii
!             ----------------------------------------------------------

              prp = ihead
              ihead = ihead + n
              iuse = iuse + n

!             ----------------------------------------------------------
!             allocate permanent copy of off-diagonal blocks
!             ----------------------------------------------------------

              itail = itail - nzoff
              offip = itail
              xtail = xtail - nzoff
              offxp = xtail
              iuse = iuse + nzoff
              xuse = xuse + nzoff
              xrmax = max (xrmax, xuse)
              info (18) = max (info (18), iuse)
              info (20) = max (info (20), xuse)
              info (21) = max (info (21), xuse)
              if (ihead .gt. itail .or. xhead .gt. xtail) then 
!                error return, if not enough integer and/or real memory:
                 go to 9000
              endif 

!             ----------------------------------------------------------
!             re-order the off-diagonal blocks according to pivot perm
!             ----------------------------------------------------------

!             use cp as temporary work array:
              mnz = nzoff
              if (nzoff .eq. 0) then 
!                offi and offx are not accessed in umd2of.  set offip
!                and offxp to 1 (since offip = itail = isize+1, which
!                can generate an address fault, otherwise).
                 offip = 1
                 offxp = 1
              endif 
              call umd2of (cp, n, rperm, cperm, nzoff,
     >             offp, ii (offip), xx (offxp), ii (prp),
     >             icntl, ap, ai, ax, an, anz, presrv, nblks, blkp,
     >             mnz, 2, info, nbelow)

!             ----------------------------------------------------------
!             deallocate pr (1..n)
!             ----------------------------------------------------------

              ihead = 1
              xhead = 1
              iuse = iuse - n

           else 

!             off-diagonal entries are in ii/xx (1..nzoff); shift down
!             to ii/xx ( ... itail/xtail).  no extra memory needed.
              do 90 i = nzoff, 1, -1 
                 ii (itail+i-nzoff-1) = ii (i)
                 xx (xtail+i-nzoff-1) = xx (i)
90            continue 
              ihead = 1
              xhead = 1
              itail = itail - nzoff
              xtail = xtail - nzoff
           endif 

        endif 

!       ----------------------------------------------------------------
!       clear the flags (negated row/col indices, and negated ludegr/c)
!       ----------------------------------------------------------------

        do 100 i = 1, luisiz 
           lui (i) = abs (lui (i))
100     continue 

!-----------------------------------------------------------------------
!  normal and error return
!-----------------------------------------------------------------------

!       error return label:
9000    continue
        if (ihead .gt. itail) then 
!          set error flag if not enough integer memory
           call umd2er (2, icntl, info, -3, info (18))
        endif 
        if (xhead .gt. xtail) then 
!          set error flag if not enough real memory
           call umd2er (2, icntl, info, -4, info (21))
        endif 

!       error return label, for error return from umd2r2:
9010    continue

!       cp can now be deallocated in umd2rf:
        iuse = iuse - (n+1)

        info (4) = noutsd + nbelow
        nzdia = nzorig - nzoff - noutsd - nbelow
        info (5) = nzoff + nzdia
        info (6) = nzdia
        info (7) = nzoff
        info (8) = nsgltn
        info (9) = nblks
        info (12) = info (10) + info (11) + n + info (7)

!       count the number of symmetric pivots chosen.  note that some
!       of these may have been numerically unacceptable.
        nsym = 0
        do 110 k = 1, n 
           if (cperm (k) .eq. rperm (k)) then 
!             this kth pivot came from the diagonal of a
              nsym = nsym + 1
           endif 
110     continue 
        info (16) = nsym

        info (17) = info (17) + npiv
        rinfo (1) = rinfo (4) + rinfo (5) + rinfo (6)

!       set warning flag if entries outside prior pattern are present
        if (info (4) .gt. 0) then 
           call umd2er (2, icntl, info, 1, -info (4))
        endif 

!       set warning flag if matrix is singular
        if (info (1) .ge. 0 .and. info (17) .lt. n) then 
           call umd2er (2, icntl, info, 4, info (17))
        endif 

!       ----------------------------------------------------------------
!       return memory usage estimate for next call to umd2rf
!       ----------------------------------------------------------------

        info (23) = xrmax

        return
        end subroutine umd2r0
        subroutine umd2r2 (cp, nz, n, xtail, xx, xsize, xuse, ari,
     >          cperm, rperm, icntl, cntl, info, rinfo, mc, mr,
     >          wir, wic, wpr, wpc, wm, wj, frdimc, frxp, frnext,
     >          frprev, nlu, lup, lui, noutsd, xrmax)
        integer nz, n, xsize, icntl (20), info (40), cperm (n), 
     >          rperm (n), xtail, ari (nz), cp (n+1), mr, mc, noutsd,
     >          wir (n), wic (n), wpr (mr), xrmax, wpc (mc), wm (mc),
     >          nlu, frdimc (nlu+2), frxp (nlu+2), xuse, wj (mc),
     >          frnext (nlu+2), frprev (nlu+2), lup (nlu), lui (*)
        real(fltp)
     >          xx (xsize)
        real(fltp)
     >          cntl (10), rinfo (20)
  
!=== umd2r2 ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  umd2r2 refactorizes the n-by-n input matrix at the head of xx
!  (in arrowhead form) and places its lu factors at the tail of
!  xx.  the input matrix is overwritten.   no btf information is
!  used in this routine.

!=======================================================================
!  input:
!=======================================================================
!
!       cp (1..n+1):    column pointers of arrowhead form
!       n:              order of input matrix
!       nz:             entries in input matrix
!       xsize:          size of xx
!       icntl:          integer control parameters, see umd21i
!       cntl:           real control parameters, see umd21i
!
!       ari (1..nz):            arrowhead format of a
!       xx (1..nz):             arrowhead format of a, see below
!       xx (nz+1..xsize):       undefined on input, used as workspace
!
!       nlu:            number of lu arrowheads
!       lup (1..nlu):   pointers to lu arrowheads in lui
!       lui (1.. ):     lu arrowheads
!
!       xuse:           memory usage in value
!
!       noutsd:         entries not in prior lu pattern
!
!       cperm (1..n):   column permutation
!       rperm (1..n):   row permutation

!=======================================================================
!  workspace:
!=======================================================================
!
!       wir (1..n)
!       wic (1..n)
!
!       wpr (1.. max ludegr)
!       wpc (1.. max ludegc)
!       wm  (1.. max ludegc)
!       wj  (1.. max ludegc)
!
!       frdimc (1..nlu+2)
!       frxp   (1..nlu+2)
!       frnext (1..nlu+2)
!       frprev (1..nlu+2)

!=======================================================================
!  output:
!=======================================================================
!
!       lui (1..):              lu arrowheads, modified luxp pointers
!       xx (1..xtail-1):        undefined on output
!       xx (xtail..xsize):      lu factors of this matrix, see below
!
!       info:           integer informational output, see umd2fa
!       rinfo:          real informational output, see umd2fa
!
!       xuse:           memory usage in value
!
!       noutsd:         entries not in prior lu pattern, incremented

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   umd2r0
!       subroutines called:     umd2er, umd2p2, umd2rg, dgemv,
!                               dgemm, dtrsv, dtrsm
!       functions called:       abs, max
        intrinsic abs, max

!=======================================================================
!  description of data structures:
!=======================================================================

!-----------------------------------------------------------------------
!  matrix being factorized:
!-----------------------------------------------------------------------
!
!  the input matrix is held in an arrowhead format.  for the kth pivot,
!  the nonzeros in the pivot row (a (k, k...n)) and pivot column
!  (a (k...n, k)) are stored in the kth arrowhead.  the kth arrowhead
!  is located in:
!       ari (cp (k+1) ... cp (k)-1):    pattern
!       xx  (cp (k+1) ... cp (k)-1):    values
!
!  suppose p is in the range cp (k+1) to cp (k)-1.  if ari (p) is
!  greater than zero, then the entry is in row ari (p), column k,
!  with value xx (p).  if ari (p) is less than zero, then the entry is
!  in row k, column -ari (p), with value xx (p).  the arrowheads are
!  stored in reverse order (arrowhead n, n-1, ... 2, 1) in ari and xx.
!  note that cp (n+1) = 1 unless btf is in use and the original matrix
!  is not preserved.   in all cases, the real part of the arrowhead
!  format (xx (cp (n+1) ... cp (1)-1)) is overwritten with the lu
!  factors.  the integer part (ari (cp (n+1) ... cp (1)-1)) is not
!  overwritten, since umd2r2 does not require dynamic allocation of
!  integer memory.

!-----------------------------------------------------------------------
!  frontal matrices
!-----------------------------------------------------------------------
!
!   each unassembled frontal matrix (element) is stored as follows:
!       total size: fscal integers, (fdimr*fdimc) reals
!
!       if e is an unassembled element, and not the current frontal
!       matrix:
!
!       fluip = lup (e) pointer to lu arrowhead in ii
!       fdimc = frdimc (e)      column dimension of contribution block
!       fxp   = frxp (e)        pointer to contribution block in xx
!       next  = frnext (e)      pointer to next block in xx
!       prev  = frprev (e)      pointer to previous block in xx
!       fdegr = abs (lui (fluip+2))
!       fdegc = abs (lui (fluip+2))
!       xx (fxp ... )
!               a 2-dimensional array, c (1..fdimc, 1..fdimr), where
!               fdimr = fdegr if the contribution block is compressed,
!               or fdimr = lui (fluip+5) if not.  note, however, that
!               fdimr is not needed.  the contribution block is stored
!               in c (1..fdegc, 1..fdegr) in the c (1..fdimc,...) array.
!
!               if memory is limited, garbage collection will occur.
!               in this case, the c (1..fdimc, 1..fdimr) array is
!               compressed to be just large enough to hold the
!               unassembled contribution block,
!               c (1..fdegc, 1..fdegr).

!-----------------------------------------------------------------------
!  current frontal matrix
!-----------------------------------------------------------------------
!
!  ffxp points to current frontal matrix (contribution block and lu
!  factors).  for example, if fflefc = 4, fflefr = 6, luk = 3,
!  ffdimc = 8, ffdimr = 12, then "x" is a term in the contribution
!  block, "l" in l1, "u" in u1, "l" in l2, "u" in u2, and "." is unused.
!  xx (fxp) is "x". the first 3 pivot values (diagonal entries in u1)
!  are labelled 1, 2, and 3.  the frontal matrix is ffdimc-by-ffdimr.
!
!                   |----------- col 1 of l1 and l2, etc.
!                   v
!       x x x x x x l l l . . .
!       x x x x x x l l l . . .
!       x x x x x x l l l . . .
!       x x x x x x l l l . . .
!       u u u u u u 3 l l . . .         <- row 3 of u1 and u2
!       u u u u u u u 2 l . . .         <- row 2 of u1 and u2
!       u u u u u u u u 1 . . .         <- row 1 of u1 and u2
!       . . . . . . . . . . . .

!-----------------------------------------------------------------------
!  lu factors
!-----------------------------------------------------------------------
!
!   the lu factors are placed at the tail of xx.  if this routine
!   is factorizing a single block, then this description is for the
!   factors of the single block:
!
!       lui (1..):      integer info. for lu factors
!       xx (xtail..xsize):      real values in lu factors
!
!   each lu arrowhead (or factorized element) is stored as follows:
!   ---------------------------------------------------------------
!
!       total size: (7 + ludegc + ludegr + lunson) integers,
!                   (luk**2 + ludeg!*luk + luk*ludegc) reals
!
!       if e is an lu arrowhead, then luip = lup (e).
!
!       luxp   = lui (luip) pointer to numerical lu arrowhead
!       luk    = lui (luip+1) number of pivots in lu arrowhead
!       ludegr = lui (luip+2) degree of last row of u (excl. diag)
!       ludegc = lui (luip+3) degree of last col of l (excl. diag)
!       lunson = lui (luip+4) number of children in assembly dag
!       ffdimr = lui (luip+5)
!       ffdimc = lui (luip+6)
!                       max front size for this lu arrowhead is
!                       ffdimr-by-ffdimc, or zero if this lu arrowhead
!                       factorized within the frontal matrix of a prior
!                       lu arrowhead.
!       lucp   = (luip + 7)
!                       pointer to pattern of column of l
!       lurp   = lucp + ludegc
!                       pointer to patter of row of u
!       lusonp = lurp + ludegr
!                       pointer to list of sons in the assembly dag
!       lui (lucp ... lucp + ludegc - 1)
!                       row indices of column of l
!       lui (lurp ... lurp + ludegr - 1)
!                       column indices of row of u
!       lui (lusonp ... lusonp + lunson - 1)
!                       list of sons
!       xx (luxp...luxp + luk**2 + ludeg!*luk + luk*ludegr - 1)
!                       pivot block (luk-by-luk) and the l block
!                       (ludegc-by-luk) in a single (luk+ludegc)-by-luk
!                       array, followed by the u block in a
!                       luk-by-ludegr array.
!
!   pivot column/row pattern (also columns/rows in contribution block):
!       if the column/row index is negated, the column/row has been
!       assembled out of the frontal matrix into a subsequent frontal
!       matrix.  after factorization, the negative flags are removed.
!
!   list of sons:
!       1 <= son <= n:           son an luson
!       n+1 <= son <= 2n:        son-n is an uson
!       2n+n <= son <= 3n:       son-2n is a lson

!-----------------------------------------------------------------------
!  workspaces:
!-----------------------------------------------------------------------
!
!  wpc (1..ludegr):     holds the pivot column pattern
!                       (excluding the pivot row indices)
!
!  wpr (1..ludegr):     holds the pivot row pattern
!                       (excluding the pivot column indices)
!
!  wir (row) >= 0 for each row in pivot column pattern.
!               offset into pattern is given by:
!               wir (row) == offset - 1
!               otherwise, wir (1..n) is < 0
!
!  wic (col) >= 0 for each col in pivot row pattern.
!               wic (col) == (offset - 1) * ffdimc
!               otherwise, wic (1..n) is < 0
!
!  wm (1..degc) or wm (1..fdegc):       a gathered copy of wir
!  wj (1..degc) or wj (1..fdegc):       offset in pattern of a son 

!-----------------------------------------------------------------------
!  memory allocation in xx:
!-----------------------------------------------------------------------
!
!   xx (1..xhead):      values of original entries in arrowheads of
!                       matrix, values of contribution blocks, followed
!                       by the current frontal matrix.
!
!   mtail = nlu+2
!   mhead = nlu+1:      frnext (mhead) points to the first contribution
!                       block in the head of xx.  the frnext and frprev
!                       arrays form a doubly-linked list.  traversing
!                       the list from mhead to mtail gives the
!                       contribution blocks in ascending ordering of
!                       address (frxp).  a block is free if frdimc <= 0.
!                       the largest known free block in xx is pfree,
!                       located in
!                       xx (frxp (pfree) ... frxp (pfree) + xfree -1),
!                       unless pfree = 0, in which case no largest free
!                       block is known.

!=======================================================================
!  local scalars:
!=======================================================================

        integer swpcol, swprow, fdimc, k0, colpos, rowpos, pivot, ffpp,
     >          p, i, j, ludegr, ludegc, kpos, sp, ffrp, ffcp, type,
     >          fxp, lurp, lucp, next, fflefr, prev, xhead, fdegr,
     >          fflefc, k, xcdp, xdp, xsp, s, fdegc, flurp, flucp,
     >          col, e, row, mhead, mtail, uxp, luk, io, fluip, lusonp,
     >          ffsize, ffxp, ffdimr, ffdimc, xrdp, npiv, nb, lunson,
     >          xneed, ldimr, ldimc, lxp, prl, xp, luip, pfree, xfree,
     >          xs, luxp, fsp, flp, fdp, degc, nzu, nzl, xruse
        logical pr3, allcol, allrow
        real(fltp)
     >          one, piv, temp
        real(fltp)
     >          tmp

!  printing control:
!  -----------------
!  prl:     invalid entries printed if prl >= 3
!  io:      i/o unit for warning messages (printing invalid entries)
!  pr3:     true if invalid entries are to be printed when found
!
!  current working array:
!  ----------------------
!  ffxp:    current working array is in xx (ffxp ... ffxp+ffsize-1)
!  ffsize:  size of current working array in xx
!  ffdimr:  row degree (number of columns) of current working array
!  ffdimc:  column degree (number of rows) of current working array
!  fflefr:  row degree (number of columns) of current contribution block
!  fflefc:  column degree (number of rows) of current contribution block
!  ffrp:     u2 block is in xx (ffrp ...)
!  ffcp:     l2 block is in xx (ffcp ...)
!  ffpp:     location in xx of the current pivot value
!
!  current element:
!  ----------------
!  s:       current element being factorized
!  luip:    current element is in lui (luip ...)
!  luk:     number of pivots in current element
!  ludegc:  degree of pivot column (excluding pivots themselves)
!  ludegr:  degree of pivot row (excluding pivots themselves)
!  ldimr:   row degree (number of columns) of current element
!  ldimc:   column degree (number of row) of current element
!  lucp:    pattern of col(s) of current element in lui (lucp...)
!  lurp:    pattern of row(s) of current element in lui (lurp...)
!  lusonp:  list of sons of current element is in lui (lusonp...)
!  lunson:  number of sons of current element
!  sp:      pointer into list of sons of current element
!  luxp:    numerical values of lu arrowhead stored in xx (luxp ...)
!  lxp:     l2 block is stored in xx (lxp ...) when computed
!  uxp:     u2 block is stored in xx (uxp ...) when computed
!  nzu:     nonzeros above diagonal in u in current lu arrowhead
!  nzl:     nonzeros below diagonal in l in current lu arrowhead
!  swpcol:  the non-pivotal column to be swapped with pivot column
!  swprow:  the non-pivotal row to be swapped with pivot row
!  colpos:  position in wpr of the pivot column
!  rowpos:  position in wpc of the pivot row
!  kpos:    position in c to place pivot row/column
!  k:       current pivot is kth pivot of current element, k=1..luk
!  k0:      contribution block, c, has been updated with pivots 1..k0
!  npiv:    number of pivots factorized so far, excl. current element
!  pivot:   current pivot entry is a (pivot, pivot)
!  xcdp:    current pivot column is in xx (xcdp ...)
!  xrdp:    current pivot row is in xx (xrdp ...)
!
!  son, or element other than current element:
!  -------------------------------------------
!  e:       an element other than s (a son of s, for example)
!  fluip:   lu arrowhead of e is in lui (fluip ...)
!  fxp:     contribution block of son is in xx (fxp ...)
!  fdimc:   leading dimension of contribution block of a son
!  fdegr:   row degree of contribution block of son (number of columns)
!  fdegc:   column degree of contribution block of son (number of rows)
!  allcol:  true if all columns are present in son
!  allrow:  true if all rows are present in son
!  flucp:   pattern of col(s) of son in lui (flucp...)
!  flurp:   pattern of row(s) of son in lui (flurp...)
!  type:    an luson (type = 1), uson (type = 2) or lson (type = 3)
!  degc:    compressed column offset vector of son is in wj/wm (1..degc)
!
!  memory allocation:
!  ------------------
!  mhead:   nlu+1, head pointer for contribution block link list
!  mtail:   nlu+2, tail pointer for contribution block link list
!  prev:    frprev (e) of the element e
!  next:    frnext (e) of the element e
!  pfree:   frxp (pfree) is the largest known free block in xx
!  xfree:   size of largest known free block in xx
!  xneed:   bare minimum memory currently needed in xx
!  xhead:   xx (1..xhead-1) is in use, xx (xhead ..) is free
!  xruse:   estimated memory needed in xx for next call to umd2rf,
!           assuming a modest number of garbage collections
!  xs:      size of a block of memory in xx
!
!  other:
!  ------
!  xdp:     destination pointer, into xx
!  xsp:     source pointer, into xx
!  xp:      a pointer into xx
!  fsp:     source pointer, into xx
!  fsp:     destination pointer, into xx
!  flp:     last row/column in current contribution is in xx (flp...)
!  col:     a column index
!  row:     a row index
!  nb:      block size for tradeoff between level-2 and level-3 blas  
!  p, i, j, x:  various uses

!=======================================================================
!  executable statements:
!=======================================================================

!       ----------------------------------------------------------------
!       get control parameters and initialize various scalars
!       ----------------------------------------------------------------

        one = 1
        io = icntl (2)
        prl = icntl (3)
        nb = max (1, icntl (7))
        npiv = 0
        xhead = cp (1)
        xtail = xsize + 1
        xneed = xuse
        xruse = xuse
        xrmax = max (xrmax, xruse)
        mhead = nlu+1
        mtail = nlu+2
        xfree = -1
        pfree = 0
        pr3 = prl .ge. 3 .and. io .ge. 0

!       ----------------------------------------------------------------
!       initialize workspaces
!       ----------------------------------------------------------------

        do 10 i = 1, n 
           wir (i) = -1
           wic (i) = -1
10      continue 

        do 20 e = 1, nlu+2 
           frdimc (e) = 0
           frxp (e) = 0
           frnext (e) = 0
           frprev (e) = 0
20      continue 
        frnext (mhead) = mtail
        frprev (mtail) = mhead
        frxp (mhead) = xhead
        frxp (mtail) = xhead

!       count the numerical assembly of the original matrix
        rinfo (2) = rinfo (2) + (nz)

!       current working array is empty:
        fflefr = 0
        fflefc = 0
        ffsize = 0
        ffxp = xhead

!=======================================================================
!  factorization [
!=======================================================================

        do 600 s = 1, nlu 

!=======================================================================
!  get the next element to factorize
!=======================================================================

           luip = lup (s)
           luk = lui (luip+1)
           ludegc = lui (luip+3)
           ludegr = lui (luip+2)
           lunson = lui (luip+4)
           lucp = (luip + 7)
           lurp = lucp + ludegc
           lusonp = lurp + ludegr
           ldimc = luk + ludegc
           ldimr = luk + ludegr

!=======================================================================
!  start new frontal matrix or merge with prior contribution block [
!=======================================================================

!          =============================================================
           if (lui (luip+6) .ne. 0) then 
!          start new contribution block
!          =============================================================

!             ----------------------------------------------------------
!             clear the prior offsets
!             ----------------------------------------------------------

              do 30 i = 1, fflefr 
                 wic (wpr (i)) = -1
30            continue 
              do 40 i = 1, fflefc 
                 wir (wpc (i)) = -1
40            continue 

!             ----------------------------------------------------------
!             save prior contribution block (s-1), if it exists
!             ----------------------------------------------------------

              xs = fflefr * fflefc
              if (ffsize .ne. 0) then 
!                one more frontal matrix is finished
                 xneed = xneed - (ffsize - xs)
                 xruse = xruse - (ffsize - xs)
                 info (13) = info (13) + 1
!             else 
!                prior contribution block does not exist
              endif 

              if (fflefr .le. 0 .or. fflefc .le. 0) then 

!                -------------------------------------------------------
!                if prior contribution block nonexistent or empty
!                -------------------------------------------------------

                 xuse = xuse - (xhead - frxp (mtail))
                 xhead = frxp (mtail)

              else 

!                -------------------------------------------------------
!                prepare the prior contribution block for later assembly
!                -------------------------------------------------------

                 e = s - 1

!                count the numerical assembly
                 rinfo (2) = rinfo (2) + (xs)

                 if (xs .le. xfree) then 

!                   ----------------------------------------------------
!                   compress and store in a freed block
!                   ----------------------------------------------------

!                   place the new block in the list
                    xfree = xfree - xs
                    if (pfree .eq. mtail) then 
!                      place the new block at start of tail block
                       prev = frprev (mtail)
                       next = mtail
                       xdp = frxp (mtail)
                       frxp (mtail) = xdp + xs
                    else 
!                      place the new block at end of block
                       prev = pfree
                       next = frnext (pfree)
                       xdp = frxp (next) - xs
                       if (xfree .eq. 0 .and. pfree .ne. mhead) then 
!                         delete the free block if its size is zero
                          prev = frprev (prev)
                          pfree = 0
                          xfree = -1
                       endif 
                    endif 
                    do 60 j = 0, fflefr - 1 
!fpp$ nodepchk l
                       do 50 i = 0, fflefc - 1 
                          xx (xdp+j*fflefc+i) = xx (ffxp+j*ffdimc+i)
50                     continue 
60                  continue 
                    xuse = xuse - (xhead - frxp (mtail))
                    xhead = frxp (mtail)
                    frxp (e) = xdp
                    frdimc (e) = fflefc

                 else 

!                   ----------------------------------------------------
!                   deallocate part of unused portion of frontal matrix
!                   ----------------------------------------------------

!                   leave the contribution block c (1:fflefc, 1:fflefr)
!                   at head of xx, with column dimension of ffdimc and
!                   space of size (fflefr-1)*ffdimc for the first
!                   fflefr columns, and fflefc for the last column.
                    xs = ffsize - (fflefc + (fflefr-1)*ffdimc)
                    xhead = xhead - xs
                    xuse = xuse - xs
                    prev = frprev (mtail)
                    next = mtail
                    frxp (mtail) = xhead
                    frxp (e) = ffxp
                    frdimc (e) = ffdimc
                 endif 

                 frnext (prev) = e
                 frprev (next) = e
                 frnext (e) = next
                 frprev (e) = prev

              endif 

              if (pfree .eq. mtail) then 
                 pfree = 0
                 xfree = -1
              endif 

!             ----------------------------------------------------------
!             allocate a new ffdimr-by-ffdimc frontal matrix
!             ----------------------------------------------------------

              ffdimc = lui (luip+6)
              ffdimr = lui (luip+5)
              ffsize = ffdimr * ffdimc
              ffxp = 0

!             ----------------------------------------------------------
!             allocate and zero the space, garbage collection if needed
!             ----------------------------------------------------------

              if (ffsize .gt. xtail-xhead) then 
                 info (15) = info (15) + 1
                 call umd2rg (xx, xsize, xhead, xtail, xuse,
     >              lui, frdimc, frxp, frnext, frprev, nlu, lup,
     >              icntl, ffxp, ffsize, pfree, xfree)
              endif 

              ffxp = xhead
              xhead = xhead + ffsize
              xuse = xuse + ffsize
              xneed = xneed + ffsize
              xruse = xruse + ffsize
              xrmax = max (xrmax, xruse)
              info (20) = max (info (20), xuse)
              info (21) = max (info (21), xneed)
              if (xhead .gt. xtail) then 
!                error return, if not enough real memory:
                 go to 9000
              endif 

!             ----------------------------------------------------------
!             zero the frontal matrix
!             ----------------------------------------------------------

              do 70 p = ffxp, ffxp + ffsize - 1 
                 xx (p) = 0
70            continue 

!             ----------------------------------------------------------
!             place pivot rows and columns in correct position
!             ----------------------------------------------------------

              do 80 k = 1, luk 
                 wic (npiv + k) = (ldimr - k) * ffdimc
                 wir (npiv + k) =  ldimc - k
80            continue 

!             ----------------------------------------------------------
!             get the pivot row pattern of the new lu arrowhead
!             ----------------------------------------------------------

              do 90 i = 0, ludegr - 1 
                 col = lui (lurp+i)
                 wic (col) = i * ffdimc
                 wpr (i+1) = col
90            continue 

!             ----------------------------------------------------------
!             get the pivot column pattern of the new lu arrowhead
!             ----------------------------------------------------------

              do 100 i = 0, ludegc - 1 
                 row = lui (lucp+i)
                 wir (row) = i
                 wpc (i+1) = row
100           continue 

!          =============================================================
           else 
!          merge with prior contribution block
!          =============================================================

!             ----------------------------------------------------------
!             prior block is located at xx (ffxp ... ffxp + ffsize - 1).
!             it holds a working array c (1..ffdimc, 1..ffdimr), with a
!             prior contribution block in c (1..fflefc, 1..fflefr).
!             the last pivot column pattern is wpc (1..fflefc), and
!             the last pivot row pattern is wpr (1..fflefr).  the
!             offsets wir and wic are:
!             wir (wpc (i)) = i-1, for i = 1..fflefc, and -1 otherwise.
!             wic (wpr (i)) = (i-1)*ffdimc, for i = 1..fflefr, else -1.
!             the prior lu arrowhead is an implicit luson of the current
!             element (and is implicitly assembled into the same
!             frontal matrix).
!             ----------------------------------------------------------

!             ----------------------------------------------------------
!             zero the newly extended frontal matrix
!             ----------------------------------------------------------

!             zero the new columns in the contribution and lu blocks
!             c (1..ldimc, fflefr+1..ldimr) = 0
              do 120 j = fflefr, ldimr - 1 
                 do 110 i = 0, ldimc - 1 
                    xx (ffxp + j*ffdimc + i) = 0
110              continue 
120           continue 

!             c (fflefc+1..ldimc, 1..fflefr) = 0
!             zero the new rows in the contribution and u blocks
              do 140 i = fflefc, ldimc - 1 
!fpp$ nodepchk l
                 do 130 j = 0, fflefr - 1 
                    xx (ffxp + j*ffdimc + i) = 0
130              continue 
140           continue 

!             ----------------------------------------------------------
!             move pivot rows and columns into correct position
!             ----------------------------------------------------------

              do 220 k = 1, luk 

!                -------------------------------------------------------
!                kth pivot of frontal matrix, (npiv+k)th pivot of lu
!                -------------------------------------------------------

                 pivot = npiv + k

!                -------------------------------------------------------
!                move the kth pivot column into position
!                -------------------------------------------------------

                 xsp = wic (pivot)
                 kpos = ldimr - k + 1
                 xdp = (kpos - 1) * ffdimc
                 wic (pivot) = xdp

                 if (xsp .ge. 0) then 
!                   pivot column is already in current frontal matrix,
!                   shift into proper position
                    colpos = (xsp / ffdimc) + 1
                    fsp = ffxp + xsp
                    fdp = ffxp + xdp

                    if (fflefr .lt. kpos) then 

                       if (fflefr .eq. colpos) then 

!                         ----------------------------------------------
!                         move c(:,colpos) => c (:,kpos)
!                         c (:,colpos) = 0
!                         ----------------------------------------------
!fpp$ nodepchk l
                          do 150 i = 0, ldimc - 1 
                             xx (fdp+i) = xx (fsp+i)
                             xx (fsp+i) = 0
150                       continue 

                       else 

!                         ----------------------------------------------
!                         move c(:,colpos) => c (:,kpos)
!                         move c(:,fflefr) => c (:,colpos)
!                         c (:,fflefr) = 0
!                         ----------------------------------------------

                          flp = ffxp + (fflefr - 1) * ffdimc
!fpp$ nodepchk l
                          do 160 i = 0, ldimc - 1 
                             xx (fdp+i) = xx (fsp+i)
                             xx (fsp+i) = xx (flp+i)
                             xx (flp+i) = 0
160                       continue 

                          swpcol = wpr (fflefr)
                          wpr (colpos) = swpcol
                          wic (swpcol) = xsp
                       endif 

                    else if (colpos .ne. kpos) then 

!                      -------------------------------------------------
!                      swap c (:,colpos) <=> c (:,kpos)
!                      -------------------------------------------------
!fpp$ nodepchk l
                       do 180 i = 0, ldimc - 1 
                          temp = xx (fdp+i)
                          xx (fdp+i) = xx (fsp+i)
                          xx (fsp+i) = temp
180                    continue 

                       swpcol = wpr (kpos)
                       wpr (colpos) = swpcol
                       wic (swpcol) = xsp
                    endif 

                    fflefr = fflefr - 1
                 endif 

!                -------------------------------------------------------
!                move the kth pivot row into position
!                -------------------------------------------------------

                 xsp = wir (pivot)
                 kpos = ldimc - k + 1
                 xdp = (kpos - 1)
                 wir (pivot) = xdp

                 if (xsp .ge. 0) then 
!                   pivot row is already in current frontal matrix,
!                   shift into proper position
                    rowpos = xsp + 1
                    fsp = ffxp + xsp
                    fdp = ffxp + xdp

                    if (fflefc .lt. kpos) then 

                       if (fflefc .eq. rowpos) then 

!                         ----------------------------------------------
!                         move c(rowpos,:) => c (kpos,:)
!                         c (rowpos,:) = 0
!                         ----------------------------------------------
!fpp$ nodepchk l
                          do 190 j = 0, (ldimr - 1) * ffdimc, ffdimc 
                             xx (fdp+j) = xx (fsp+j)
                             xx (fsp+j) = 0
190                       continue 

                       else 

!                         ----------------------------------------------
!                         move c(rowpos,:) => c (kpos,:)
!                         move c(fflefc,:) => c (rowpos,:)
!                         c (fflefc,:) = 0
!                         ----------------------------------------------

                          flp = ffxp + (fflefc - 1)
!fpp$ nodepchk l
                          do 200 j = 0, (ldimr - 1) * ffdimc, ffdimc 
                             xx (fdp+j) = xx (fsp+j)
                             xx (fsp+j) = xx (flp+j)
                             xx (flp+j) = 0
200                       continue 

                          swprow = wpc (fflefc)
                          wpc (rowpos) = swprow
                          wir (swprow) = xsp
                       endif 

                    else if (rowpos .ne. kpos) then 

!                      -------------------------------------------------
!                      swap c (rowpos,:) <=> c (kpos,:)
!                      -------------------------------------------------
!fpp$ nodepchk l
                       do 210 j = 0, (ldimr - 1) * ffdimc, ffdimc 
                          temp = xx (fdp+j)
                          xx (fdp+j) = xx (fsp+j)
                          xx (fsp+j) = temp
210                    continue 

                       swprow = wpc (kpos)
                       wpc (rowpos) = swprow
                       wir (swprow) = xsp
                    endif 

                    fflefc = fflefc - 1
                 endif 

220           continue 

!             ----------------------------------------------------------
!             merge with pivot row pattern of new lu arrowhead
!             ----------------------------------------------------------

              i = fflefr
              do 230 p = lurp, lurp + ludegr - 1 
                 col = lui (p)
                 if (wic (col) .lt. 0) then 
                    wic (col) = i * ffdimc
                    i = i + 1
                    wpr (i) = col
                 endif 
230           continue 

!             ----------------------------------------------------------
!             merge with pivot column pattern of new lu arrowhead
!             ----------------------------------------------------------

              i = fflefc
              do 240 p = lucp, lucp + ludegc - 1 
                 row = lui (p)
                 if (wir (row) .lt. 0) then 
                    wir (row) = i
                    i = i + 1
                    wpc (i) = row
                 endif 
240           continue 

           endif 

!=======================================================================
!  done initializing frontal matrix ]
!=======================================================================

!=======================================================================
!  assemble original arrowheads into the frontal matrix, and deallocate
!=======================================================================

!          -------------------------------------------------------------
!          current workspace usage:
!          -------------------------------------------------------------

!          wpc (1..ludegr):     holds the pivot column pattern
!                               (excluding the pivot row indices)
!
!          wpr (1..ludegr):     holds the pivot row pattern
!                               (excluding the pivot column indices)
!
!          c (1..ffdimr, 1..ffdimc):  space for the frontal matrix,
!               in xx (ffxp ... ffxp + ffsize - 1)
!
!          c (i,j) is located at xx (ffxp+((i)-1)+((j)-1)*ffdimc)
!
!          c (1..ludegc, 1..ludegr):            contribution block
!          c (ludegc+1..ludegc+luk, 1..ludegr):             u2 block
!          c (1..ludegc, ludegr+1..ludegr+luk):             l2 block
!          c (ludegc+1..ludegc+luk, ludegr+1..ludegr+luk):  l1\u1 block
!
!          wir (row) >= 0 for each row in pivot column pattern.
!               offset into pattern is given by:
!               wir (row) == offset - 1
!               also, wir (npiv+1 ... npiv+luk) is
!               ludegc+luk-1 ... ludegc, the offsets of the pivot rows.
!
!               otherwise, wir (1..n) is < 0
!
!          wic (col) >= 0 for each col in pivot row pattern.
!               wic (col) == (offset - 1) * ffdimc
!               also, wic (npiv+1 ... npiv+luk) is
!               ludegr+luk-1 ... ludegr, the offsets of the pivot rows.
!
!               otherwise, wic (1..n) is < 0

           do 260 k = 1, luk 
              i = npiv + k
              xcdp = ffxp + wic (i)
              xrdp = ffxp + wir (i)
              do 250 p = cp (i+1), cp (i) - 1 
                 j = ari (p)
                 if (j .gt. 0) then 
!                   a diagonal entry, or lower triangular entry
!                   row = j, col = i
                    xp = xcdp + wir (j)
                    if (xp .lt. xcdp) then 
!                      invalid entry - not in prior lu pattern
                       noutsd = noutsd + 1
                       if (pr3) then 
!                         get original row and column index and print it
                          row = rperm (j)
                          col = cperm (i)
                          call umd2p2 (2, 97, row, col, xx (p), io)
                       endif 
                    else 
                       xx (xp) = xx (xp) + xx (p)
                    endif 
                 else 
!                   an upper triangular entry
!                   row = i, col = -j
                    xp = xrdp + wic (-j)
                    if (xp .lt. xrdp) then 
!                      invalid entry - not in prior lu pattern
                       noutsd = noutsd + 1
                       if (pr3) then 
!                         get original row and column index and print it
                          row = rperm (i)
                          col = cperm (-j)
                          call umd2p2 (2, 97, row, col, xx (p), io)
                       endif 
                    else 
                       xx (xp) = xx (xp) + xx (p)
                    endif 
                 endif 
250           continue 
260        continue 

!          deallocate the original arrowheads
           p = cp (npiv + luk + 1)
           xs = cp (npiv + 1) - p
           frxp (mhead) = p
           xneed = xneed - xs
           if (xs .gt. xfree) then 
              xfree = xs
              pfree = mhead
           endif 

!=======================================================================
!  assemble lusons, usons, and lsons into the frontal matrix [
!=======================================================================

           do 480 sp = lusonp, lusonp + lunson - 1 

!             ----------------------------------------------------------
!             get the son and determine its type (luson, uson, or lson)
!             ----------------------------------------------------------

              e = lui (sp)
              if (e .le. n) then 
!                luson
                 type = 1
              else if (e .le. 2*n) then 
!                uson
                 e = e - n
                 type = 2
              else 
!                lson
                 e = e - 2*n
                 type = 3
              endif 

!             ----------------------------------------------------------
!             if fdimc=0 this is the implicit luson (already assembled)
!             ----------------------------------------------------------

              fdimc = frdimc (e)
              if (fdimc .ne. 0) then 

!                -------------------------------------------------------
!                get scalar info of the son (it needs assembling)
!                -------------------------------------------------------

                 fxp = frxp (e)
                 fluip = lup (e)
                 fdegr = lui (fluip+2)
                 fdegc = lui (fluip+3)
                 allcol = fdegr .gt. 0
                 allrow = fdegc .gt. 0
                 fdegr = abs (fdegr)
                 fdegc = abs (fdegc)
                 flucp = (fluip + 7)
                 flurp = flucp + fdegc

!                use wm (1..fdegc) for offsets:

!                -------------------------------------------------------
                 if (type .eq. 1) then 
!                this is an luson - assemble an entire frontal matrix
!                -------------------------------------------------------

!                   ----------------------------------------------------
                    if (allrow) then 
!                   no rows assembled out of this luson yet
!                   ----------------------------------------------------

!                      compute the compressed column offset vector
                       do 270 i = 0, fdegc-1 
                          row = lui (flucp+i)
                          wm (i+1) = wir (row)
270                    continue 

!                      -------------------------------------------------
                       if (allcol) then 
!                      no rows or cols assembled out of luson yet
!                      -------------------------------------------------

                          do 290 j = 0, fdegr-1 
                             col = lui (flurp+j)
                             xdp = ffxp + wic (col)
!fpp$ nodepchk l
                             do 280 i = 0, fdegc-1 
                                xx (xdp + wm (i+1)) =
     >                          xx (xdp + wm (i+1)) +
     >                          xx (fxp + j*fdimc + i)
280                          continue 
290                       continue 

!                      -------------------------------------------------
                       else 
!                      some columns already assembled out of luson
!                      -------------------------------------------------

                          do 310 j = 0, fdegr-1 
                             col = lui (flurp+j)
                             if (col .gt. 0) then 
                                xdp = ffxp + wic (col)
!fpp$ nodepchk l
                                do 300 i = 0, fdegc-1 
                                   xx (xdp + wm (i+1)) =
     >                             xx (xdp + wm (i+1)) +
     >                             xx (fxp + j*fdimc + i)
300                             continue 
                             endif 
310                       continue 

                       endif 

!                   ----------------------------------------------------
                    else 
!                   some rows already assembled out of luson
!                   ----------------------------------------------------

!                      compute the compressed column offset vector
                       degc = 0
                       do 320 i = 0, fdegc-1 
                          row = lui (flucp+i)
                          if (row .gt. 0) then 
                             degc = degc + 1
                             wj (degc) = i
                             wm (degc) = wir (row)
                          endif 
320                    continue 

!                      -------------------------------------------------
                       if (allcol) then 
!                      some rows already assembled out of luson
!                      -------------------------------------------------

                          do 340 j = 0, fdegr-1 
                             col = lui (flurp+j)
                             xdp = ffxp + wic (col)
!fpp$ nodepchk l
                             do 330 i = 1, degc 
                                xx (xdp + wm (i)) =
     >                          xx (xdp + wm (i)) +
     >                          xx (fxp + j*fdimc + wj (i))
330                          continue 
340                       continue 

!                      -------------------------------------------------
                       else 
!                      rows and columns already assembled out of luson
!                      -------------------------------------------------

                          do 360 j = 0, fdegr-1 
                             col = lui (flurp+j)
                             if (col .gt. 0) then 
                                xdp = ffxp + wic (col)
!fpp$ nodepchk l
                                do 350 i = 1, degc 
                                   xx (xdp + wm (i)) =
     >                             xx (xdp + wm (i)) +
     >                             xx (fxp + j*fdimc + wj (i))
350                             continue 
                             endif 
360                       continue 

                       endif 
                    endif 

!                   ----------------------------------------------------
!                   deallocate the luson frontal matrix
!                   ----------------------------------------------------

                    frdimc (e) = 0
                    prev = frprev (e)
                    next = frnext (e)
                    xneed = xneed - fdegr*fdegc
                    xruse = xruse - fdegr*fdegc

                    if (frdimc (prev) .le. 0) then 
!                      previous block is free - delete this block
                       frnext (prev) = next
                       frprev (next) = prev
                       e = prev
                       prev = frprev (e)
                    endif 

                    if (frdimc (next) .le. 0) then 
!                      next block is free - delete this block
                       frxp (next) = frxp (e)
                       if (e .le. nlu) then 
                          frnext (prev) = next
                          frprev (next) = prev
                       endif 
                       e = next
                       next = frnext (e)
                       if (frnext (mhead) .eq. mtail) then 
!                         no blocks left except mhead and mtail
                          frxp (mtail) = frxp (mhead)
                       endif 
                    endif 

!                   get the size of the freed block
                    if (next .eq. 0) then 
!                      this is the mtail block
                       xs = ffxp - frxp (e)
                    else 
                       xs = frxp (next) - frxp (e)
                    endif 
                    if (xs .gt. xfree) then 
!                      keep track of the largest free block
                       xfree = xs
                       pfree = e
                    endif 

!                -------------------------------------------------------
                 else if (type .eq. 2) then 
!                uson:  assemble all possible columns
!                -------------------------------------------------------

!                   ----------------------------------------------------
                    if (allrow) then 
!                   no rows assembled out of this uson yet
!                   ----------------------------------------------------

!                      compute the compressed column offset vector
                       do 370 i = 0, fdegc-1 
                          row = lui (flucp+i)
                          wm (i+1) = wir (row)
370                    continue 

                       do 390 j = 0, fdegr-1 
                          col = lui (flurp+j)
                          if (col .gt. 0) then 
                             if (wic (col) .ge. 0) then 
                                xdp = ffxp + wic (col)
!fpp$ nodepchk l
                                do 380 i = 0, fdegc-1 
                                   xx (xdp + wm (i+1)) =
     >                             xx (xdp + wm (i+1)) +
     >                             xx (fxp + j*fdimc + i)
380                             continue 
!                               flag this column as assembled
                                lui (flurp+j) = -col
                             endif 
                          endif 
390                    continue 

!                   ----------------------------------------------------
                    else 
!                   some rows already assembled out of this uson
!                   ----------------------------------------------------

!                      compute the compressed column offset vector
                       degc = 0
                       do 400 i = 0, fdegc-1 
                          row = lui (flucp+i)
                          if (row .gt. 0) then 
                             degc = degc + 1
                             wj (degc) = i
                             wm (degc) = wir (row)
                          endif 
400                    continue 

                       do 420 j = 0, fdegr-1 
                          col = lui (flurp+j)
                          if (col .gt. 0) then 
                             if (wic (col) .ge. 0) then 
                                xdp = ffxp + wic (col)
!fpp$ nodepchk l
                                do 410 i = 1, degc 
                                   xx (xdp + wm (i)) =
     >                             xx (xdp + wm (i)) +
     >                             xx (fxp + j*fdimc + wj (i))
410                             continue 
!                               flag this column as assembled
                                lui (flurp+j) = -col
                             endif 
                          endif 
420                    continue 

                    endif 

!                   flag this element as missing some columns
                    lui (fluip+2) = -fdegr

!                -------------------------------------------------------
                 else 
!                lson:  assemble all possible rows
!                -------------------------------------------------------

!                   compute the compressed column offset vector
                    degc = 0
                    do 430 i = 0, fdegc-1 
                       row = lui (flucp+i)
                       if (row .gt. 0) then 
                          if (wir (row) .ge. 0) then 
!                            this row will be assembled in loop below
                             degc = degc + 1
                             wj (degc) = i
                             wm (degc) = wir (row)
!                            flag this row as assembled
                             lui (flucp+i) = -row
                          endif 
                       endif 
430                 continue 

!                   ----------------------------------------------------
                    if (allcol) then 
!                   no columns assembled out of this lson yet
!                   ----------------------------------------------------

                       do 450 j = 0, fdegr-1 
                          col = lui (flurp+j)
                          xdp = ffxp + wic (col)
!fpp$ nodepchk l
                          do 440 i = 1, degc 
                             xx (xdp + wm (i)) =
     >                       xx (xdp + wm (i)) +
     >                       xx (fxp + j*fdimc + wj (i))
440                       continue 
450                    continue 

!                   ----------------------------------------------------
                    else 
!                   some columns already assembled out of this lson
!                   ----------------------------------------------------

                       do 470 j = 0, fdegr-1 
                          col = lui (flurp+j)
                          if (col .gt. 0) then 
                             xdp = ffxp + wic (col)
!fpp$ nodepchk l
                             do 460 i = 1, degc 
                                xx (xdp + wm (i)) =
     >                          xx (xdp + wm (i)) +
     >                          xx (fxp + j*fdimc + wj (i))
460                          continue 
                          endif 
470                    continue 

                    endif 

!                   flag this element as missing some rows
                    lui (fluip+3) = -fdegc

                 endif 

              endif 

480        continue 

!=======================================================================
!  done assemblying sons into the frontal matrix ]
!=======================================================================

!=======================================================================
!  factorize the frontal matrix [
!=======================================================================

           k0 = 0
           fflefr = ldimr
           fflefc = ldimc
           ffcp = ffxp + fflefr * ffdimc
           ffrp = ffxp + fflefc
           ffpp = ffxp + fflefc + fflefr * ffdimc

           do 500 k = 1, luk 

!             ----------------------------------------------------------
!             compute kth column of u1, and update pivot column
!             ----------------------------------------------------------

              if (k-k0-2 .gt. 0) then 
!                u1 = l1 \ u1.  note that l1 transpose is stored, and
!                that u1 is stored with rows in reverse order.
                 call dtrsv ('u', 'n', 'u', k-k0-1,
     >                         xx (ffpp         ), ffdimc,
     >                         xx (ffpp - ffdimc), 1)
                 tmp = k-k0-2
                 tmp = tmp * (k-k0-1)
                 rinfo (5) = rinfo (5) + 2.0*(tmp) / 2.0
              endif 
              if (k-k0-1 .gt. 0) then 
!                l1 = l1 - l2*u1
                 call dgemv ('n', fflefc, k-k0-1,
     >                   -one, xx (ffcp         ), ffdimc,
     >                         xx (ffpp - ffdimc), 1,
     >                    one, xx (ffcp - ffdimc), 1)
                 tmp = fflefc
                 tmp = tmp * (k-k0-1)
                 rinfo (5) = rinfo (5) + 2.0*(tmp)
              endif 

              ffcp = ffcp - ffdimc
              ffrp = ffrp - 1
              ffpp = ffpp - ffdimc - 1
              fflefr = fflefr - 1
              fflefc = fflefc - 1

!             ----------------------------------------------------------
!             divide pivot column by pivot
!             ----------------------------------------------------------

!             k-th pivot in frontal matrix located in xx (ffpp)
              piv = xx (ffpp)
              if (abs (piv) .eq. 0) then 
!                error return, if pivot order from umd2fa not acceptable
                 go to 9010
              endif 
              piv = 1 / piv
              do 490 p = ffcp, ffcp + fflefc - 1 
                 xx (p) = xx (p) * piv
490           continue 
!             count this as a call to the level-1 blas:
              rinfo (4) = rinfo (4) + (fflefc)
              info (17) = info (17) + 1

!             ----------------------------------------------------------
!             compute u1 (k0+1..k, k..ldimc) and
!             update contribution block: rank-nb, or if last pivot
!             ----------------------------------------------------------

              if (k-k0 .ge. nb .or. k .eq. luk) then 
                 call dtrsm ('l', 'u', 'n', 'u', k-k0, fflefr, one,
     >                      xx (ffpp), ffdimc,
     >                      xx (ffrp), ffdimc)
                 tmp = fflefr
                 tmp = tmp * (k-k0-1)
                 tmp = tmp * (k-k0)
                 rinfo (6) = rinfo (6) + 2.0*(tmp) / 2.0
                 call dgemm ('n', 'n', fflefc, fflefr, k-k0,
     >                -one, xx (ffcp ), ffdimc,
     >                      xx (ffrp ), ffdimc,
     >                 one, xx (ffxp), ffdimc)
                 tmp = fflefc
                 tmp = tmp * fflefr
                 tmp = tmp * (k-k0)
                 rinfo (6) = rinfo (6) + 2.0*(tmp)
                 k0 = k
              endif 

500        continue 

!=======================================================================
!  done factorizing the frontal matrix ]
!=======================================================================

!=======================================================================
!  save the new lu arrowhead [
!=======================================================================

!          allocate permanent space for the lu arrowhead
           xs = luk*ludegc + luk*ludegr + luk*luk

           if (xs .gt. xtail-xhead) then 
              info (15) = info (15) + 1
              call umd2rg (xx, xsize, xhead, xtail, xuse,
     >              lui, frdimc, frxp, frnext, frprev, nlu, lup,
     >              icntl, ffxp, ffsize, pfree, xfree)
           endif 

           xtail = xtail - xs
           luxp = xtail
           xuse = xuse + xs
           xneed = xneed + xs
           xruse = xruse + xs
           xrmax = max (xrmax, xruse)
           info (20) = max (info (20), xuse)
           info (21) = max (info (21), xneed)
           if (xhead .gt. xtail) then 
!             error return, if not enough real memory:
              go to 9000
           endif 

!          save the scalar data of the lu arrowhead
           lui (luip) = luxp

!          save column pattern (it may have been rearranged)
           do 510 i = 0, ludegc-1 
              lui (lucp+i) = wpc (i+1)
510        continue 

!          save row pattern (it may have been rearranged)
           do 520 i = 0, ludegr-1 
              lui (lurp+i) = wpr (i+1)
520        continue 

!          move the l1,u1 matrix, compressing the dimension from
!          ffdimc to ldimc.  the lu arrowhead grows on top of stack.
           xp = ffxp + (ldimr-1)*ffdimc + ldimc-1
           do 540 j = 0, luk-1 
!fpp$ nodepchk l
              do 530 i = 0, luk-1 
                 xx (luxp + j*ldimc + i) = xx (xp - j*ffdimc - i)
530           continue 
540        continue 

!          move l2 matrix, compressing dimension from ffdimc to ldimc
           if (ludegc .ne. 0) then 
              lxp = luxp + luk
              xp = ffxp + (ldimr-1)*ffdimc
              do 560 j = 0, luk-1 
!fpp$ nodepchk l
                 do 550 i = 0, ludegc-1 
                    xx (lxp + j*ldimc + i) = xx (xp - j*ffdimc + i)
550              continue 
560           continue 
           endif 

!          move the u2 block.
           if (ludegr .ne. 0) then 
              uxp = luxp + luk * ldimc
              xp = ffxp + ldimc-1
              do 580 j = 0, ludegr-1 
!fpp$ nodepchk l
                 do 570 i = 0, luk-1 
                    xx (uxp + j*luk + i) = xx (xp + j*ffdimc - i)
570              continue 
580           continue 
           endif 

!          one more lu arrowhead has been refactorized
           nzu = (luk*(luk-1)/2) + luk*ludegc
           nzl = (luk*(luk-1)/2) + luk*ludegr
           info (10) = info (10) + nzl
           info (11) = info (11) + nzu

!          -------------------------------------------------------------
!          clear the pivot row and column offsets
!          -------------------------------------------------------------

           do 590 pivot = npiv + 1, npiv + luk 
              wir (pivot) = -1
              wic (pivot) = -1
590        continue 
           npiv = npiv + luk

!=======================================================================
!  done saving the new lu arrowhead ]
!=======================================================================

600     continue 

!=======================================================================
!  factorization complete ]
!=======================================================================

!=======================================================================
!  wrap-up:  store lu factors in their final form
!=======================================================================

!       ----------------------------------------------------------------
!       flag remaining arrowheads as invalid entries, if prior matrix
!       was singular.  print them if requested.
!       ----------------------------------------------------------------

        if (npiv .lt. n) then 
           if (pr3) then 
              do 620 i = npiv+1, n 
                 do 610 p = cp (i+1), cp (i) - 1 
                    j = ari (p)
                    if (j .gt. 0) then 
!                      a diagonal entry, or lower triangular entry
!                      get original row and column index
                       row = rperm (j)
                       col = cperm (i)
                    else 
!                      an upper triangular entry
!                      get original row and column index
                       row = rperm (i)
                       col = cperm (-j)
                    endif 
                    call umd2p2 (2, 95, row, col, xx(p), io)
610              continue 
620           continue 
           endif 
           noutsd = noutsd + (cp (npiv+1) - cp (n+1))
        endif 

!       ----------------------------------------------------------------
!       deallocate all remaining input arrowheads and frontal matrices
!       ----------------------------------------------------------------

        if (ffsize .ne. 0) then 
           info (13) = info (13) + 1
        endif 
        xuse = xuse - (xhead - cp (n+1))
        xneed = xuse
        xhead = cp (n+1)

        if (nlu .eq. 0) then 
!          lu factors are completely empty (a = 0).
!          add one real, to simplify rest of code.
!          otherwise, some arrays in umd2rf or umd2so would have
!          zero size, which can cause an address fault.
           xtail = xsize
           xuse = xuse + 1
           xruse = xuse
           xneed = xuse
           info (20) = max (info (20), xuse)
           info (21) = max (info (21), xneed)
        endif 

        if (xhead .le. xtail) then 

!          -------------------------------------------------------------
!          sufficient memory to complete the factorization
!          -------------------------------------------------------------

           if (nlu .eq. 0) then 
!             zero the dummy entry, although it won't be accessed:
              xx (xtail) = 0
           endif 

!          -------------------------------------------------------------
!          update pointers in lu factors
!          -------------------------------------------------------------

           do 630 s = 1, nlu 
              luip = lup (s)
              luxp = lui (luip)
              lui (luip) = luxp - xtail + 1
630        continue 

!          -------------------------------------------------------------
!          get memory usage estimate for next call to umd2rf
!          -------------------------------------------------------------

           xruse = xuse
           xrmax = max (xrmax, xruse)
           return

        endif 

!=======================================================================
!  error conditions
!=======================================================================

!       error return label:
9000    continue
!       out of real memory
        call umd2er (2, icntl, info, -4, info (21))
        return

!       error return label:
9010    continue
!       original pivot order computed by umd2fa is no longer acceptable
        call umd2er (2, icntl, info, -6, 0)
        return
        end subroutine umd2r2
        subroutine umd2ra (presrv, n, nz, cperm, rperm, pr,
     >          w, nblks, arx, ari, nzoff, nzdia,
     >          icntl, mp, blkp, mi, mx, info, offp, on, nzblk,
     >          cblk, kn, nz2, nbelow)
        integer n, nz, cperm (n), rperm (n), pr (n), kn, w (kn+1),
     >          nblks, nzblk, ari (nzblk), nzoff, nzdia, mp (n+1),
     >          mi (nz), on, icntl (20), blkp (nblks+1), nz2,
     >          info (40), offp (on+1), cblk, nbelow
        logical presrv
        real(fltp)
     >          arx (nzblk), mx (nz)
  
!=== umd2ra ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  convert a column-oriented matrix into an arrowhead format.

!=======================================================================
!  input:
!=======================================================================
!
!       n               size of entire matrix
!       mi (1..nz):     row indices of column form of entire matrix
!       mx (1..nz):     values of column form of entire matrix
!       mp (1..n+1)     column pointers for entire matrix
!       cperm (1..n):   column permutations
!       rperm (1..n):   row permutations
!
!       if nblks > 1 and presrv
!           cblk:               the block to convert
!           kn:                 the size of the block to convert
!       else
!           cblk:               0
!           kn                  n, size of input matrix

!=======================================================================
!  output: 
!=======================================================================
!
!       if nblks = 1 and not presrv
!
!           nzoff               0
!           nzdia               nz - (entries below in diagonal blocks)
!           nz2                 nzdia
!
!           mi (1..nz2)         arrowheads for the diagonal block
!           mx (1..nz2)
!           ari, arx            used as workspace
!           w (1..n+1)          pointer to each arrowhead in mi/mx
!
!           offp                not accessed
!
!       if nblks = 1 and presrv
!
!           nzoff               0
!           nzdia               nz - (entries below in diagonal blocks)
!           nz2                 nzdia
!
!           mi, mx              not modified
!           ari (1..nz2)        arrowheads for the diagonal block
!           arx (1..nz2)
!           w (1..n+1)          pointer to each arrowhead in ari/arx
!
!           offp                not accessed
!
!       else if nblks > 1 and not presrv
!
!           nzoff               number of entries in off-diagonal part
!           nzdia               number of entries in diagonal blocks
!                               (nz = nzoff + nzdia + entries below
!                               diagonal blocks)
!           nz2                 nzoff + nzdia
!
!           mi (nzoff+1..nz2)   arrowheads for each diagonal block
!           mx (nzoff+1..nz2)
!           ari, arx            used as workspace
!           w (1..n+1)          pointer to each arrowhead in mi/mx
!
!           offp (1..n+1)       row pointers for off-diagonal part
!           mi (1..nzoff)       col indices for off-diagonal part
!           mx (1..nzoff)       values for off-diagonal part
!
!       else (nblks > 1 and presrv)
!
!           nzoff               0
!           nzdia               nonzeros in the diagonal block, cblk
!           nz2                 nzdia
!
!           mi, mx              not modified
!           ari (1..nz2)        arrowheads for the diagonal block, cblk
!           arx (1..nz2)
!           w (1..kn+1)         pointer to each arrowhead in ari/arx
!
!           offp                not accessed

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   umd2r0
!       subroutines called:     umd2of
!       functions called:       min
        intrinsic min

!=======================================================================
!  local scalars:
!=======================================================================

        integer i, p, row, col, blk, base, k1, k2, k, b1, b2, k0

!  i:       loop index, arrowhead index
!  p:       pointer into column-form input matrix
!  row:     row index
!  col:     column index
!  blk:     current diagonal block
!  base:    where to start the construction of the arrowhead form
!  k1,k2:   current diagonal block is a (k1..k2, k1..k2)
!  k:       loop index, kth pivot
!  b1,b2:   convert blocks b1...b2 from column-form to arrowhead form
!  k0:      convert a (k0+1..., k0+1...) to arrowhead form

!=======================================================================
!  executable statements:
!=======================================================================

!-----------------------------------------------------------------------
!  if entire matrix is to be converted, then create the off-diagonal
!  part in row-oriented form in ari (1..nzoff) and arx (1..nzoff) and
!  compute inverse row permutation.  otherwise, the inverse row
!  permutation has already been computed.
!-----------------------------------------------------------------------

        nzoff = 0
        nbelow = 0
        if (nblks .eq. 1) then 
           do 10 k = 1, n 
              pr (rperm (k)) = k
10         continue 
        else if (nblks .gt. 1 .and. .not. presrv) then 
           call umd2of (w, n, rperm, cperm, nzoff,
     >        offp, ari, arx, pr,
     >        icntl, mp, mi, mx, n, nz, .true., nblks, blkp,
     >        nz, 2, info, nbelow)
        endif 

!-----------------------------------------------------------------------
!  construct the arrowhead form for the diagonal block(s)
!-----------------------------------------------------------------------

        do 20 i = 1, kn+1 
           w (i) = 0
20      continue 

        base = nzoff + 1

        if (cblk .ne. 0) then 
!          convert just cblk
           k0 = blkp (cblk) - 1
           b1 = cblk
           b2 = cblk
        else 
!          convert all the block(s)
           k0 = 0
           b1 = 1
           b2 = nblks
        endif 

        do 80 blk = b1, b2 

!          -------------------------------------------------------------
!          get the starting and ending indices of this diagonal block
!          -------------------------------------------------------------

           if (nblks .gt. 1) then 
              k1 = blkp (blk)
              k2 = blkp (blk+1) - 1
           else 
              k1 = 1
              k2 = n
           endif 

!          -------------------------------------------------------------
!          count the number of entries in each arrowhead
!          -------------------------------------------------------------

           do 40 col = k1, k2 
              do 30 p = mp (cperm (col)), mp (cperm (col) + 1) - 1 
                 row = pr (mi (p))
                 if (row .ge. k1 .and. row .le. k2) then 
!                   this is in a diagonal block, arrowhead i
                    i = min (row, col) - k0
                    w (i) = w (i) + 1
                 endif 
30            continue 
40         continue 

!          -------------------------------------------------------------
!          set pointers to point just past end of each arrowhead
!          -------------------------------------------------------------

           w (k2-k0+1) = w (k2-k0) + base
           do 50 i = k2-k0, k1-k0+1, -1 
              w (i) = w (i+1) + w (i-1)
50         continue 
           w (k1-k0) = w (k1-k0+1)
!          w (i+1-k0) points just past end of arrowhead i in ari/arx

!          -------------------------------------------------------------
!          construct arrowhead form, leaving pointers in final state
!          -------------------------------------------------------------

           do 70 col = k1, k2 
              do 60 p = mp (cperm (col)), mp (cperm (col) + 1) - 1 
                 row = pr (mi (p))
                 if (row .ge. k1 .and. row .le. k2) then 
                    if (row .ge. col) then 
!                      diagonal, or lower triangular part
                       i = col - k0 + 1
                       w (i) = w (i) - 1
                       ari (w (i)) = row - k1 + 1
                       arx (w (i)) = mx (p)
                    else 
!                      upper triangular part, flag by negating col
                       i = row - k0 + 1
                       w (i) = w (i) - 1
                       ari (w (i)) = -(col - k1 + 1)
                       arx (w (i)) = mx (p)
                    endif 
                 endif 
60            continue 
70         continue 

           base = w (k1-k0)
           w (k2-k0+1) = 0
80      continue 

        w (kn+1) = nzoff + 1
        nzdia = base - nzoff - 1
        nz2 = nzoff + nzdia

!       ----------------------------------------------------------------
!       if cblk = 0, the entire matrix has been converted:
!
!          w (i) now points just past end of arrowhead i in ari/arx
!          arrowhead i is located in ari/arx (w (i+1) ... w (i)-1),
!          except for the k2-th arrowhead in each block.  those are
!          located in ari/arx (base ... w (k2) - 1), where base is
!          w (blkp (blk-1)) if blk>1 or w (n+1) = nzoff + 1 otherwise.
!
!       otherwise, just one block has been converted:
!
!          w (i) now points just past end of arrowhead i in ari/arx,
!          where i = 1 is the first arrowhead of this block (not the
!          first arrowhead of the entire matrix).  arrowhead i is
!          located in ari/arx (w (i+1) ... w (i)-1).
!          this option is used only if nblks>1 and presrv is true.
!       ----------------------------------------------------------------

!-----------------------------------------------------------------------
!  if not preserved, overwrite column-form with arrowhead form
!-----------------------------------------------------------------------

        if (.not. presrv) then 
           do 90 i = 1, nz 
              mi (i) = ari (i)
              mx (i) = arx (i)
90         continue 
        endif 

        return
        end subroutine umd2ra
!..
!..
!..
!..
!..
        subroutine umd2rf (n, ne, job, transa, lvalue, lindex, value,
     >          index, keep, cntl, icntl, info, rinfo)
        integer n, ne, job, lvalue, lindex, index (lindex), keep (20),
     >          icntl (20), info (40)
        real(fltp)
     >          value (lvalue)
        real(fltp)
     >          cntl (10), rinfo (20)
        logical transa

!=== umd2rf ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  hsl compatibility:  this routine has the same arguments as ma38b/bd. 

!=======================================================================
!  user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  given a sparse matrix a, and a sparsity-preserving and numerically-
!  acceptable pivot order and symbolic factorization, compute the lu
!  factors, paq = lu.  uses the sparsity pattern and permutations from
!  a prior factorization by umd2fa or umd2rf.  the matrix a should have
!  the same nonzero pattern as the matrix factorized by umd2fa or
!  umd2rf.  the matrix can have different numerical values.  no
!  variations are made in the pivot order computed by umd2fa.  if a
!  zero pivot is encountered, an error flag is set and the
!  factorization terminates.
!
!  this routine can actually handle any matrix a such that (paq)_ij can
!  be nonzero only if (lu)_ij is be nonzero, where l and u are the lu
!  factors of the matrix factorized by umd2fa.  if btf (block triangular
!  form) is used, entries above the diagonal blocks of (paq)_ij can have
!  an arbitrary sparsity pattern.  entries for which (lu)_ij is not
!  present, or those below the diagonal blocks are invalid and ignored
!  (a warning flag is set and the factorization proceeds without the
!  invalid entries).  a listing of the invalid entries can be printed.
!
!  this routine must be preceded by a call to umd2fa or umd2rf.
!  a call to umd2rf can be followed by any number of calls to umd2so,
!  which solves a linear system using the lu factors computed by this
!  routine or by umd2fa.  a call to umd2rf can also be followed by any
!  number of calls to umd2rf.

!=======================================================================
!  arguments:
!=======================================================================

!           ------------------------------------------------------------
!  n:       an integer variable.
!           must be set by caller on input (not modified).
!           order of the matrix.  must be identical to the value of n
!           in the last call to umd2fa.

!           ------------------------------------------------------------
!  ne:      an integer variable.
!           must be set by caller on input (not modified).
!           number of entries in input matrix.  normally not modified
!           since the last call to umd2fa.
!           restriction:  1 <= ne < (keep (4)) / 2

!           ------------------------------------------------------------
!  job:     an integer variable.
!           must be set by caller on input (not modified).
!           if job=1, then a column-oriented form of the input matrix
!           is preserved, otherwise, the input matrix is overwritten
!           with its lu factors.  if iterative refinement is to done
!           (icntl (8) > 0), then job must be set to 1.  can be
!           the same, or different, as the last call to umd2fa.

!           ------------------------------------------------------------
!  transa:  a logical variable.
!           must be set by caller on input (not modified).
!           if false then a is factorized: paq = lu.  otherwise, a
!           transpose is factorized:  pa'q = lu.  normally the same as
!           the last call to umd2fa.

!           ------------------------------------------------------------
!  lvalue:  an integer variable.
!           must be set by caller on input (not modified).
!           size of the value array.  restriction:  lvalue >= 2*ne,
!           although a larger will typically be required to complete
!           the factorization.  the exact value required is computed
!           by the last call to umd2fa or umd2rf (info (23)).
!           this value assumes that the ne, job, and transa parameters
!           are the same as the last call.  some garbage collection may
!           occur if lvalue is set to info (23), but usually not
!           much.  we recommend lvalue => 1.2 * info (23).  the
!           lvalue parameter is usually the same as in the last call to
!           umd2fa, however.

!           ------------------------------------------------------------
!  lindex:  an integer variable.
!           must be set by caller on input (not modified).
!           size of the index array.  restriction:
!           lindex >= 3*ne+2*n+1 + (keep (5) - keep (4) + 1),
!           although a larger will typically be required to complete
!           the factorization.  the exact value required is computed
!           by the last call to umd2fa or umd2rf (info (22)).
!           this value assumes that the ne, job, and transa parameters
!           are the same as the last call.  no garbage collection ever
!           occurs in the index array, since umd2rf does not create
!           external fragmentation in index.  the lindex parameter is
!           usually the same as in the last call to umd2fa, however.
!           note that lindex >= keep (5) is also required, since
!           the pattern of the prior lu factors reside in
!           index (keep (4) ... keep (5)).

!           ------------------------------------------------------------
!  value:   a real(fltp) array of size lvalue.
!           must be set by caller on input (normally from the last call
!           to umd2fa or umd2rf).  modified on output.  on input,
!           value (1..ne) holds the original matrix in triplet form.
!           on output, value holds the lu factors, and (optionally) a
!           column-oriented form of the original matrix - otherwise
!           the input matrix is overwritten with the lu factors.

!           ------------------------------------------------------------
!  index:   an integer array of size lindex.
!           must be set by caller on input (normally from the last call
!           to umd2fa or umd2rf).  modified on output.  on input,
!           index (1..2*ne) holds the original matrix in triplet form,
!           and index (keep (4) ... keep (5)) holds the pattern
!           of the prior lu factors.  on output, index holds the lu
!           factors, and (optionally) a column-oriented form of the
!           original matrix - otherwise the input matrix is overwritten
!           with the lu factors.
!
!           on input the kth triplet (for k = 1...ne) is stored as:
!                       a (row,col) = value (k)
!                       row         = index (k)
!                       col         = index (k+ne)
!           if there is more than one entry for a particular position,
!           the values are accumulated, and the number of such duplicate
!           entries is returned in info (2), and a warning flag is
!           set.  however, applications such as finite element methods
!           naturally generate duplicate entries which are then
!           assembled (added) together.  if this is the case, then
!           ignore the warning message.
!
!           on input, and the pattern of the prior lu factors is in
!               index (keep (4) ... keep (5))
!
!           on output, the lu factors and the column-oriented form
!           of a (if preserved) are stored in:
!               value (keep (1)...keep (2))
!               index (keep (3)...keep (5))
!           where keep (2) = lvalue, and keep (5) = lindex.

!           ------------------------------------------------------------
!  keep:    an integer array of size 20.
!
!           keep (1 ... 3):  need not be set by caller on input.
!               modified on output.
!               keep (1): new lu factors start here in value
!               keep (2) = lvalue: new lu factors end here in value
!               keep (3): new lu factors start here in index
!
!           keep (4 ... 5): must be set by caller on input (normally
!               from the last call to umd2fa or umd2rf). modified on
!               output.
!               keep (4):  on input, the prior lu factors start here
!               in index, not including the prior (optionally) preserved
!               input matrix, nor the off-diagonal pattern (if btf was
!               used in the last call to umd2fa).  on output, the new
!               lu factors needed for umd2rf start here in index.
!               keep (5):  on input, the prior lu factors end here in
!               index.  on output, keep (5) is set to lindex, which
!               is where the new lu factors end in index
!
!           keep (6 ... 8):  unused.  these are used by umd2fa only.
!               future releases may make use of them, however.
!
!           keep (9 ... 20): unused.  reserved for future releases.

!           ------------------------------------------------------------
!  cntl:    a real(fltp) array of size 10.
!           must be set by caller on input (not modified).
!           control arguments, see umd21i for a
!           description, which sets the default values.  the current
!           version of umd2rf does not actually use cntl.  it is
!           included to make the argument list of umd2rf the same as
!           umd2fa.  umd2rf may use cntl in future releases.

!           ------------------------------------------------------------
!  icntl:   an integer array of size 20.
!           must be set by caller on input (not modified).
!           integer control arguments, see umd21i for a description,
!           which sets the default values.  umd2rf uses icntl (1),
!           icntl (2), icntl (3), and icntl (7).

!           ------------------------------------------------------------
!  info:    an integer array of size 40.
!           need not be set by caller on input.  modified on output.
!           it contains information about the execution of umd2rf.
!
!           info (1): zero if no error occurred, negative if
!               an error occurred and the factorization was not
!               completed, positive if a warning occurred (the
!               factorization was completed).
!
!               these errors cause the factorization to terminate:
!
!               error   description
!               -1      n < 1 or n > maximum value
!               -2      ne < 1 or ne > maximum value
!               -3      lindex too small
!               -4      lvalue too small
!               -5      both lindex and lvalue are too small
!               -6      prior pivot ordering no longer acceptable
!               -7      lu factors are uncomputed, or are corrupted
!
!               with these warnings the factorization was able to 
!               complete:
!
!               error   description
!               1       invalid entries
!               2       duplicate entries
!               3       invalid and duplicate entries
!               4       singular matrix
!               5       invalid entries, singular matrix
!               6       duplicate entries, singular matrix
!               7       invalid and duplicate entries, singular matrix
!
!               subsequent calls to umd2rf and umd2so can only be made
!               if info (1) is zero or positive.  if info (1)
!               is negative, then some or all of the remaining
!               info and rinfo arrays may not be valid.
!
!           info (2): duplicate entries in a.  a warning is set
!               if info (2) > 0.  however, the duplicate entries
!               are summed and the factorization continues.  duplicate
!               entries are sometimes intentional - for finite element
!               codes, for example.
!
!           info (3): invalid entries in a, indices not in 1..n.
!               these entries are ignored and a warning is set in
!               info (1).
!
!           info (4): invalid entries in a, not in prior lu
!               factors.  these entries are ignored and a warning is
!               set in info (1).
!
!           info (5): entries in a after adding duplicates and
!               removing invalid entries.
!
!           info (6): entries in diagonal blocks of a.
!
!           info (7): entries in off-diagonal blocks of a.  zero
!               if info (9) = 1.
!
!           info (8): 1-by-1 diagonal blocks.
!
!           info (9): blocks in block-triangular form.
!
!           info (10): entries below diagonal in l.
!
!           info (11): entries below diagonal in u.
!
!           info (12): entries in l+u+offdiagonal part.
!
!           info (13): frontal matrices.
!
!           info (14): zero.  used by umd2fa only.
!
!           info (15): garbage collections performed on value.
!
!           info (16): diagonal pivots chosen.
!
!           info (17): numerically acceptable pivots found in a.
!               if less than n, then a is singular (or nearly so).
!               the factorization still proceeds, and umd2so can still
!               be called.  the zero-rank active submatrix of order
!               n - info (17) is replaced with the identity matrix
!               (assuming btf is not in use).  if btf is in use, then
!               one or more of the diagonal blocks are singular. 
!               umd2rf can be called if the value of info (17) 
!               returned by umd2fa was less than n, but the order
!               (n - info (17)) active submatrix is still replaced
!               with the identity matrix.  entries residing in this
!               submatrix are ignored, their number is included in
!               info (4), and a warning is set in info (1).
!
!           info (18): memory used in index.
!
!           info (19): memory needed in index (same as info (18)).
!
!           info (20): memory used in value.
!
!           info (21): minimum memory needed in value
!               (or minimum recommended).  if lvalue is set to
!               info (21) on a subsequent call, then a moderate
!               number of garbage collections (info (15)) will
!               occur.
!
!           info (22): memory needed in index for the next call to
!               umd2rf.
!
!           info (23): memory needed in value for the next call to
!               umd2rf.
!
!           info (24): zero.  used by umd2so only.
!
!           info (25 ... 40): reserved for future releases

!           ------------------------------------------------------------
!  rinfo:   a real(fltp) array of size 20.
!           need not be set by caller on input.  modified on output.
!           it contains information about the execution of umd2rf.
!
!           rinfo (1): total flop count in the blas
!
!           rinfo (2): total assembly flop count
!
!           rinfo (3): zero.  used by umd2fa only.
!
!           rinfo (4): level-1 blas flops
!
!           rinfo (5): level-2 blas flops
!
!           rinfo (6): level-3 blas flops
!
!           rinfo (7): zero.  used by umd2so only.
!
!           rinfo (8): zero.  used by umd2so only.
!
!           rinfo (9 ... 20): reserved for future releases

!=======================================================================
!  to be preserved between calls to umd2fa, umd2rf, umd2so:
!=======================================================================
!
!  when calling umd2so to solve a linear system using the factors
!  computed by umd2fa or umd2rf, the following must be preserved:
!
!       n
!       value (keep (1)...keep (2))
!       index (keep (3)...keep (5))
!       keep (1 ... 20)
!
!  when calling umd2rf to factorize a subsequent matrix with a pattern
!  similar to that factorized by umd2fa, the following must be
!  preserved:
!
!       n
!       index (keep (4)...keep (5))
!       keep (4 ... 20)
!
!  note that the user may move the lu factors to a different position
!  in value and/or index, as long as keep (1 ... 5) are modified
!  correspondingly.

!## end of user documentation for umd2rf ###############################

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   user routine
!       subroutines called:     umd2er, umd2p1, umd2co, umd2r0
!       functions called:       max, min
        intrinsic max, min

!=======================================================================
!  local scalars:
!=======================================================================

        integer i, nz, lux1, lui1, iuse, xuse, n1, nz1, nblks,
     >   lind2, luir1, lusiz, lui2, rpermp, cpermp, offpp, 
     >   lublpp, blkpp, on, nzoff, ip2, io, prl, one_array(1)
        logical presrv, badlu
        real(fltp)
     >          ignore_array(1), ignore

!  printing control:
!  -----------------
!  io:      i/o unit for diagnostic messages
!  prl:     printing level
!
!  matrix to factorize:
!  --------------------
!  nz:      number of entries, after removing invalid/duplicate entries
!  presrv:  true if original matrix to be preserved
!
!  memory usage:
!  -------------
!  iuse:    current memory usage in index
!  xuse:    current memory usage in value
!  lind2:   allocatable part of index is (1..lind2)
!
!  location and status of lu factors:
!  ----------------------------------
!  lui1:    integer part of lu factors start in index (lui1...)
!  luir1:   index (luir1 ... lui2) is needed for this call to umd2rf
!  lusiz:   size of index (luir1..lui2), needed from prior lu factors
!  lui2:    integer part of lu factors end in index (..lui2)
!  lux1:    real part of lu factors in value (lux1...lvalue)
!  ip2:     pointer into trailing part of lu factors in index
!  badlu:   if true, then lu factors are corrupted or not computed
!
!  arrays and scalars allocated in lu factors (in order):
!  ------------------------------------------------------
!  ...      lu factors of each diagonal block located here
!  lublpp:  lublkp (1..nblks) array in index (lublpp..lublpp+nblks-1)
!  blkpp:   blkp (1..nblks+1) array loc. in index (blkpp...blkpp+nblks)
!  offpp:   offp (1..n+1) array located in index (offpp...offpp+n)
!  on:      size of offp array
!  cpermp:  cperm (1..n) array located in index (cpermp...cpermp+n-1)
!  rpermp:  rperm (1..n) array located in index (rpermp...rpermp+n-1)
!  nblks:   number of diagonal blocks
!  nz1:     number of entries when prior matrix factorize
!  n1:      n argument in umd2fa or umd2rf when prior matrix factorized 
!
!  other:
!  ------
!  i:       loop index

!=======================================================================
!  executable statements:
!=======================================================================
        one_array(1) = 1
        ignore = 0
        io = icntl (2)
        prl = icntl (3)

!-----------------------------------------------------------------------
!  clear informational output, and the unneeded part of the keep array
!-----------------------------------------------------------------------

        do 10 i = 1, 40 
           info (i) = 0
10      continue 
        do 20 i = 1, 20 
           rinfo (i) = 0
20      continue 
        keep (1) = 0
        keep (2) = 0
        keep (3) = 0

!-----------------------------------------------------------------------
!  print input arguments if requested
!-----------------------------------------------------------------------

        call umd2p1 (2, 1,
     >          n, ne, job, transa, lvalue, lindex, value,
     >          index, keep, cntl, icntl, info, rinfo,
     >          ignore_array, ignore_array, 1, ignore_array, 1)

!-----------------------------------------------------------------------
!  check input arguments
!-----------------------------------------------------------------------

        iuse = 0
        xuse = 0
        info (5) = ne
        info (6) = ne
        if (n .lt. 1) then 
!          n is too small
           call umd2er (2, icntl, info, -1, -1)
           go to 9000
        endif 
        if (ne .lt. 1) then 
!          ne is too small
           call umd2er (2, icntl, info, -2, -1)
           go to 9000
        endif 

!-----------------------------------------------------------------------
!  get pointers to integer part of prior lu factors
!-----------------------------------------------------------------------

        luir1 = keep (4)
        lui2 = keep (5)
        lusiz = lui2 - luir1 + 1

        badlu = luir1 .le. 0 .or. lui2-6 .lt. luir1 .or. lui2.gt.lindex
        if (badlu) then 
           call umd2er (2, icntl, info, -7, 0)
!          error return, lu factors are corrupted:
           go to 9000
        endif 
        if (2*ne .gt. luir1) then 
           call umd2er (2, icntl, info, -2, luir1/2)
!          error return, ne is too large:
           go to 9000
        endif 

!-----------------------------------------------------------------------
!  shift the prior lu factors down to the end of index.  if keep and
!  lindex are unmodified from the prior call to umd2fa, then
!  keep (5) = lindex, and this shift is not performed.
!-----------------------------------------------------------------------

        if (lui2 .lt. lindex) then 
           do 30 i = lindex, lindex - lusiz + 1, -1 
              index (i) = index (i - lindex + lui2)
30         continue 
           luir1 = lindex - lusiz + 1
           keep (5) = lindex
           keep (4) = luir1
        endif 

!-----------------------------------------------------------------------
!  get seven scalars (transa, nzoff, nblks, presrv, nz, n, ne) from lu
!-----------------------------------------------------------------------

!       ne1 = index (lindex), not required for umd2rf
        n1 = index (lindex-1)
        nz1 = index (lindex-2)
!       presr1 = index (lindex-3) .ne. 0, not required for umd2rf
        nblks = index (lindex-4)
!       nzoff1 = index (lindex-5), not required for umd2rf
!       trans1 = index (lindex-6) .ne. 0, not required for umd2rf

!-----------------------------------------------------------------------
!  get pointers to permutation vectors
!-----------------------------------------------------------------------

        rpermp = (lindex-6) - n
        cpermp = rpermp - n
        ip2 = cpermp - 1

!-----------------------------------------------------------------------
!  get pointers to block-triangular information, if btf was used
!-----------------------------------------------------------------------

        if (nblks .gt. 1) then 

!          -------------------------------------------------------------
!          get pointers to btf arrays
!          -------------------------------------------------------------

           offpp = cpermp - (n+1)
           blkpp = offpp - (nblks+1)
           lublpp = blkpp - (nblks)
           ip2 = lublpp - 1
           on = n

        else 

!          -------------------------------------------------------------
!          matrix was factorized as a single block, pass dummy arg.
!          -------------------------------------------------------------

           offpp = 1
           blkpp = 1
           lublpp = 1
           on = 0

        endif 

        badlu = n .ne. n1 .or. nz1 .le. 0 .or. luir1 .gt. ip2 .or.
     >          nblks .le. 0 .or. nblks .gt. n
        if (badlu) then 
           call umd2er (2, icntl, info, -7, 0)
!          error return, lu factors are corrupted:
           go to 9000
        endif 

!-----------------------------------------------------------------------
!  get memory for conversion to column form
!-----------------------------------------------------------------------

        nz = ne
        iuse = 2*n+1 + max (2*nz,n+1) + nz + lusiz
        xuse = 2*nz
        info (18) = iuse
        info (20) = xuse
        info (19) = iuse
        info (21) = xuse
        info (23) = xuse
        lind2 = luir1 - 1
        if (lindex .lt. iuse) then 
!          set error flag if out of integer memory
           call umd2er (2, icntl, info, -3, iuse)
        endif 
        if (lvalue .lt. xuse) then 
!          set error flag if out of real memory
           call umd2er (2, icntl, info, -4, xuse)
        endif 
        if (info (1) .lt. 0) then 
!          error return, if not enough integer and/or real memory
           go to 9000
        endif 

!-----------------------------------------------------------------------
!  convert to column-oriented form and remove duplicates
!-----------------------------------------------------------------------

        call umd2co (n, nz, transa, value, lvalue, info, icntl,
     >     index, lind2-(2*n+1), index (lind2-2*n), index (lind2-n), 2)
        if (info (1) .lt. 0) then 
!          error return, if all entries are invalid (nz is now 0)
           go to 9000
        endif 

!-----------------------------------------------------------------------
!  current memory usage:
!-----------------------------------------------------------------------

!       index (1..n+1): column pointers.  input matrix is now in
!       index (1..nz+n+1) and value (1..nz)
!       col pattern: index (n+1+ index (col) ... n+1+ index (col+1))
!       col values:  value (     index (col) ...      index (col+1))
!       at this point, nz <= ne (nz = ne if there are no invalid or
!       duplicate entries; nz < ne otherwise).
!       pattern of prior lu factors and btf arrays are in
!       index (keep (4) ... keep (5))

        iuse = nz + (n+1) + lusiz
        xuse = nz

!-----------------------------------------------------------------------
!  refactorize
!-----------------------------------------------------------------------

        presrv = job .eq. 1
        if (presrv) then 

!          -------------------------------------------------------------
!          keep a copy of the original matrix in column-oriented form
!          -------------------------------------------------------------

!          copy column pointers (cp (1..n+1) = ap (1..n+1))
           iuse = iuse + (n+1)
!fpp$ nodepchk l
           do 40 i = 1, n+1 
              index (nz+n+1+i) = index (i)
40         continue 

           call umd2r0 (n, nz, index (nz+n+2),
     >          value (nz+1), lvalue-nz,
     >          index (nz+2*n+3), lind2-(nz+2*n+2),
     >          lux1, lui1, iuse, xuse, nzoff, nblks,
     >          icntl, cntl, info, rinfo,
     >          presrv, index, index (n+2), value, n, nz,
     >          index (luir1), ip2 - luir1 + 1,
     >          index (lublpp), index (blkpp), index (offpp), on,
     >          index (cpermp), index (rpermp), ne)
           if (info (1) .lt. 0) then 
!             error return, if umd2r0 fails
              go to 9000
           endif 
!          adjust pointers to reflect index/value, not ii/xx:
           lux1 = lux1 + nz
           lui1 = lui1 + (nz+2*n+2)

!          move preserved copy of a to permanent place
           lux1 = lux1 - (nz)
           lui1 = lui1 - (nz+n+1)
           do 50 i = nz+n+1, 1, -1 
              index (lui1+i-1) = index (i)
50         continue 
           do 60 i = nz, 1, -1 
              value (lux1+i-1) = value (i)
60         continue 

        else 

!          -------------------------------------------------------------
!          do not preserve the original matrix
!          -------------------------------------------------------------

           call umd2r0 (n, nz, index,
     >          value, lvalue,
     >          index (n+2), lind2-(n+1),
     >          lux1, lui1, iuse, xuse, nzoff, nblks,
     >          icntl, cntl, info, rinfo,
     >          presrv, one_array, one_array, ignore_array, 0, 1,
     >          index (luir1), ip2 - luir1 + 1,
     >          index (lublpp), index (blkpp), index (offpp), on,
     >          index (cpermp), index (rpermp), ne)
           if (info (1) .lt. 0) then 
!             error return, if umd2r0 fails
              go to 9000
           endif 
!          adjust pointers to reflect index/value, not ii/xx:
           lui1 = lui1 + (n+1)

        endif 

!-----------------------------------------------------------------------
!  wrap-up
!-----------------------------------------------------------------------

        if (transa) then 
           index (lindex-6) = 1
        else 
           index (lindex-6) = 0
        endif 

        index (lindex-5) = nzoff
        index (lindex-4) = nblks
        if (presrv) then 
           index (lindex-3) = 1
        else 
           index (lindex-3) = 0
        endif 
        index (lindex-2) = nz
        index (lindex-1) = n
        index (lindex) = ne

!       save location of lu factors
        keep (1) = lux1
        keep (2) = lvalue
        keep (3) = lui1
        keep (4) = luir1
        keep (5) = lindex

!       update memory usage information
        iuse = lindex - lui1 + 1
        xuse = lvalue - lux1 + 1

!-----------------------------------------------------------------------
!  print the output arguments if requested, and return
!-----------------------------------------------------------------------

!       error return label:
9000    continue
        if (info (1) .lt. 0) then 
           keep (1) = 0
           keep (2) = 0
           keep (3) = 0
           keep (4) = 0
           keep (5) = 0
        endif 

        info (18) = min (lindex, max (info (18), iuse))
        info (19) = info (18)
        info (22) = info (19)
        info (20) = min (lvalue, max (info (20), xuse))

        call umd2p1 (2, 2,
     >          n, ne, job, transa, lvalue, lindex, value,
     >          index, keep, cntl, icntl, info, rinfo,
     >          ignore_array, ignore_array, 1, ignore_array, 1)
        return
        end subroutine umd2rf
        subroutine umd2rg (xx, xsize, xhead, xtail, xuse,
     >          lui, frdimc, frxp, frnext, frprev, nlu, lup,
     >          icntl, ffxp, ffsize, pfree, xfree)
        integer lui (*), nlu, frdimc (nlu+2), frxp (nlu+2),
     >          frnext (nlu+2), frprev (nlu+2), lup (nlu),
     >          icntl (20), xsize, xuse, xhead, xtail, ffxp, ffsize,
     >          pfree, xfree
        real(fltp)
     >          xx (xsize)
  
!=== umd2rg ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  garbage collection for umd2r2.

!=======================================================================
!  input:
!=======================================================================
!
!       xx:             real workspace, containing matrix being
!                       factorized and partially-computed lu factors
!       xsize:          size of xx
!       xhead:          xx (1..xhead) is in use (matrix, frontal mtc's)
!       xtail:          xx (xtail..xsize) is in use (lu factors)
!       xuse:           memory usage in value
!       icntl:          integer control parameters, see umd21i
!       ffxp:           pointer to current contribution block
!       ffsize:         size of current contribution block
!       nlu:            number of lu arrowheads
!
!       frdimc (1..nlu+2)       leading dimension of frontal matrices
!       frxp (1..nlu+2)         pointer to frontal matrices in xx
!       frnext (1..nlu+2)       pointer to next block in xx
!       frprev (1..nlu+2)       pointer to previous block in xx
!       lup (1..nlu)            pointer to lu arrowhead patters in lui
!       lui (*)                 pattern of lu factors

!=======================================================================
!  output:
!=======================================================================
!
!       xx:             external fragmentation is removed at head 
!       xhead:          xx (1..xhead) is in use, reduced in size
!       xuse:           memory usage in value, reduced
!       pfree:          pointer to free block in memory list, set to 0
!       xfree:          size of free block in xx, set to -1
!       frdimc          arrays for frontal matrices are compressed
!       frxp            frontal matrices have been shifted
!       ffxp            current working array has been shifted

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   umd2r2
!       functions called:       abs
        intrinsic abs

!=======================================================================
!  local scalars:
!=======================================================================

        integer xdp, i, e, fdimc, ludegr, ludegc, j, fluip, fxp,
     >          mhead, mtail

!  xdp:     real destination pointer, current block moved to xx (xdp...)
!  e:       an element
!  fdimc:   column dimension (number of rows) of a frontal matrix
!  ludegr:  row degree (number of columns) of a contribution block
!  ludegc:  column degree (number of rows) of a contribution block
!  fluip:   element is in lui (fluip...)
!  fxp:     element is in xx (fxp...) prior to compression
!  mhead:   nlu+1, head pointer for contribution block link list
!  mtail:   nlu+2, tail pointer for contribution block link list
!  i:       general loop index
!  j:       general loop index

!=======================================================================
!  executable statments:
!=======================================================================

!-----------------------------------------------------------------------
!  scan the link list and compress the reals
!-----------------------------------------------------------------------

        mhead = nlu+1
        mtail = nlu+2
        xdp = frxp (mhead)
        e = frnext (mhead)

!       while (e .ne. mtail) do
10      continue
        if (e .ne. mtail) then 

           fdimc = frdimc (e)

!          -------------------------------------------------------------
           if (fdimc .eq. 0) then 
!          -------------------------------------------------------------

!             this is a real hole - delete it from the link list

              frnext (frprev (e)) = frnext (e)
              frprev (frnext (e)) = frprev (e)

!          -------------------------------------------------------------
           else 
!          -------------------------------------------------------------

!             this is an unassembled frontal matrix
              fxp = frxp (e)
              frxp (e) = xdp
              fluip = lup (e)
              ludegr = abs (lui (fluip+2))
              ludegc = abs (lui (fluip+3))
              if (fdimc .eq. ludegc) then 
!                contribution block is already compressed
!fpp$ nodepchk l
                 do 20 i = 0, (ludegr * ludegc) - 1 
                    xx (xdp+i) = xx (fxp+i)
20               continue 
              else 
!                contribution block is not compressed
!                compress xx (fxp..) to xx (xdp..xdp+(ludegr*ludegc)-1)
                 do 40 j = 0, ludegr - 1 
!fpp$ nodepchk l
                    do 30 i = 0, ludegc - 1 
                       xx (xdp + j*ludegc + i) = xx (fxp + j*fdimc + i)
30                  continue 
40               continue 
                 frdimc (e) = ludegc
              endif 
              xdp = xdp + ludegr*ludegc

           endif 

!          -------------------------------------------------------------
!          get the next item in the link list
!          -------------------------------------------------------------

           e = frnext (e)

!       end while:
        goto 10
        endif 

        frxp (mtail) = xdp
        pfree = 0
        xfree = -1

!       ----------------------------------------------------------------
!       shift the current working array (if it exists)
!       ----------------------------------------------------------------

        if (ffxp .ne. 0) then 
!fpp$ nodepchk l
           do 50 i = 0, ffsize - 1 
              xx (xdp+i) = xx (ffxp+i)
50         continue 
           ffxp = xdp
           xdp = xdp + ffsize
        endif 

!-----------------------------------------------------------------------
!  deallocate the unused space
!-----------------------------------------------------------------------

        xuse = xuse - (xhead - xdp)
        xhead = xdp
        return
        end subroutine umd2rg
        subroutine umd2s2 (n, job, transc, luxsiz, lux,
     >          luisiz, lui, b, x, r, z, ly, y, s, cntl, icntl, info,
     >          rinfo, cperm, rperm, presrv, an, anz, ap, ai, ax, on,
     >          nzoff, offp, offi, offx, nblks, lublkp, blkp, irstep)
        integer n, job, luxsiz, luisiz, lui (luisiz), ly, irstep,
     >          icntl (20), info (40), cperm (n), rperm (n), an,
     >          anz, ap (an+1), ai (anz), on, nzoff, offp (on+1),
     >          offi (nzoff), nblks, lublkp (nblks), blkp (nblks+1)
        logical transc, presrv
        real(fltp)
     >          lux (luxsiz), b (n), x (n), r (n), z (n), y (ly),
     >          s (ly), ax (anz), offx (nzoff)
        real(fltp)
     >          cntl (10), rinfo (20)
  
!=== umd2s2 ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  solve a system, given lu factors, permutation arrays, original
!  matrix (if preserved), and off-diagonal blocks (if btf was used).

!=======================================================================
!  input:
!=======================================================================
!
!       n:              order of matrix
!       job:            0: solve ax=b, 1: solve lx=b, 2: solve ux=b

!       transc:         if true, solve with transposed factors instead

!       luxsiz:         size of lux
!       lux (1..luxsiz) real values in lu factors for each block
!       luisiz:         size of lui
!       lui (1..luisiz) integers in lu factors for each block
!       b (1..n):       right-hand-side
!       ly:             size of y and s, ly=n if y and s are used
!       cntl:           control parameters, see umd21i
!       icntl:          integer control parameters, see umd21i
!       cperm (1..n):   q, column permutation array
!       rperm (1..n):   p, row permutation array
!       presrv:         if true, then original matrix was preserved
!       nblks:          number of diagonoal blocks (1 if no btf)
!       irstep:         maximum number of steps of iterative refinement
!
!       if presrv then
!           an:                 order of preserved matrix, n
!           anz:                number of entries in preserved matrix
!           ap (1..an+1):       column pointers of preserved matrix
!           ai (1..anz):        row indices of preserved matrix
!           ax (1..anz):        values of preserved matrix
!           an, anz, ap, ai, ax:        not accessed
!
!       if nblks > 1 then
!           on:                 n
!           nzoff:              number of off-diagonoal entries
!           offp (1..n+1)       row pointers for off-diagonal part
!           offi (1..nzoff):    column indices for off-diagonal part
!           offx (1..nzoff):    values of off-diagonal part
!           lublkp (1..nblks):  pointers to lu factors of each block
!           blkp (1..nblks+1):  index range of each block
!       else
!           on, nzoff, offp, offi, offx, lublkp, blkp:  not accessed

!=======================================================================
!  workspace:
!=======================================================================
!
!       r (1..n), z (1..n)
!       y (1..ly), s (1..ly):   unaccessed if no iterative refinement

!=======================================================================
!  output:
!=======================================================================
!
!       x (1..n):       solution
!       info:           integer informational output, see umd21i
!       rinfo:          real informational output, see umd21i
!
!       if irsteps > 0 and presrv is true then
!           w (1..n):           residual
!           rinfo (7):  sparse error estimate, omega1
!           rinfo (8):  sparse error estimate, omega2

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   umd2so
!       subroutines called:     umd2er, umd2sl, umd2lt, umd2su, umd2ut
!       functions called:       idamax, abs, max
        intrinsic abs, max
        integer idamax

!=======================================================================
!  local scalars:
!=======================================================================

        integer nlu, i, blk, k1, k2, kn, p, step, npiv, j
        real(fltp)
     >          a, axx, r2, x2, y2, z2
        real(fltp)
     >          xnorm, tau, nctau, omega1, omega2, d1,
     >          d2, omega, omlast, om1lst, om2lst, eps, maxeps
        parameter (maxeps = 2.0 ** (-15))

!  lu factors:
!  -----------
!  blk:     current diagonal block
!  k1,k2:   current diagonal block is a (k1..k2, k1..k2)
!  kn:      size of diagonal block (= k2-k1+1)
!  nlu:     number of elements in the lu factors of a single diag block
!  npiv:    number of pivots in the lu factors of a single diag block
!
!  iterative refinement and sparse backward error:
!  -----------------------------------------------
!  step:    number of steps of iterative refinement taken
!  xnorm:   ||x|| maxnorm of solution vector, x
!  tau:     threshold for selecting which estimate to use (1 or 2)
!  nctau:   1000*n*eps
!  eps:     largest positive value such that fl (1.0 + eps) = 1.0
!  omega1:  current sparse backward error estimate 1
!  omega2:  current sparse backward error estimate 2
!  d1:      divisor for omega1
!  d2:      divisor for omega2
!  omega:   omega1 + omega2
!  omlast:  value of omega from previous step
!  om1lst:  value of omega1 from previous step
!  om2lst:  value of omega2 from previous step
!  maxeps:  2**(-15), maximum value that eps is allowed to be
!  a:       value of an entry in a, a_ij
!  axx:     a_ij * x_j
!
!  other:
!  ------
!  i,j:     loop indices
!  p:       pointer
!  r2:      r (i)
!  x2:      x (i)
!  y2:      y (i)
!  z2:      z (i)

!=======================================================================
!  executable statements:
!=======================================================================

!-----------------------------------------------------------------------
!  initializations for sparse backward error
!-----------------------------------------------------------------------

        omega = 0
        omega1 = 0
        omega2 = 0
        eps = cntl (3)
        if (eps .le. 0 .or. eps .gt. maxeps) then 
!          eps is too small or too big: set to a large default value
           eps = maxeps
        endif 
        nctau = 1000 * n * eps

!-----------------------------------------------------------------------
!  get information on lu factorization if btf was not used  
!-----------------------------------------------------------------------

        if (nblks .eq. 1) then 
!          p is 1, and lui (p) is 1
           nlu = lui (2)
           npiv = lui (3)
        endif 

!-----------------------------------------------------------------------
        if (job .eq. 1) then 
!-----------------------------------------------------------------------

!          -------------------------------------------------------------
           if (.not. transc) then 
!          -------------------------------------------------------------

!             ----------------------------------------------------------
!             solve p'lx=b:  x = l \ pb
!             ----------------------------------------------------------

              do 10 i = 1, n 
                 x (i) = b (rperm (i))
10            continue 
              if (nblks .eq. 1) then 
                 call umd2sl (nlu, npiv, n, lui(6), lui(6+nlu), lux,x,z)
              else 
                 do 20 blk = 1, nblks 
                    k1 = blkp (blk)
                    k2 = blkp (blk+1) - 1
                    kn = k2-k1+1
                    if (kn .gt. 1) then 
                       p = lublkp (blk)
                       nlu = lui (p+1)
                       npiv = lui (p+2)
                       call umd2sl (nlu, npiv, kn, lui (p+5),
     >                    lui (p+5+nlu), lux (lui (p)), x (k1), z)
                    endif 
20               continue 
              endif 

!          -------------------------------------------------------------
           else 
!          -------------------------------------------------------------

!             ----------------------------------------------------------
!             solve l'px=b:  x = p' (l' \ b)
!             ----------------------------------------------------------

              do 30 i = 1, n 
                 r (i) = b (i)
30            continue 
              if (nblks .eq. 1) then 
                 call umd2lt (nlu, npiv, n, lui(6), lui(6+nlu), lux,r,z)
              else 
                 do 40 blk = 1, nblks 
                    k1 = blkp (blk)
                    k2 = blkp (blk+1) - 1
                    kn = k2-k1+1
                    if (kn .gt. 1) then 
                       p = lublkp (blk)
                       nlu = lui (p+1)
                       npiv = lui (p+2)
                       call umd2lt (nlu, npiv, kn, lui (p+5),
     >                    lui (p+5+nlu), lux (lui (p)), r (k1), z)
                    endif 
40               continue 
              endif 
              do 50 i = 1, n 
                 x (rperm (i)) = r (i)
50            continue 

!          -------------------------------------------------------------
           endif 
!          -------------------------------------------------------------

!-----------------------------------------------------------------------
        else if (job .eq. 2) then 
!-----------------------------------------------------------------------

!          -------------------------------------------------------------
           if (transc) then 
!          -------------------------------------------------------------

!             ----------------------------------------------------------
!             solve qu'x=b:  x = u' \ q'b
!             ----------------------------------------------------------

              do 60 i = 1, n 
                 x (i) = b (cperm (i))
60            continue 
              if (nblks .eq. 1) then 
                 call umd2ut (nlu, npiv, n, lui(6), lui(6+nlu), lux,x,z)
              else 
                 do 100 blk = 1, nblks 
                    k1 = blkp (blk)
                    k2 = blkp (blk+1) - 1
                    kn = k2-k1+1
                    if (kn .eq. 1) then 
                       x (k1) = x (k1) / lux (lublkp (blk))
                       r (k1) = x (k1)
                    else 
                       p = lublkp (blk)
                       nlu = lui (p+1)
                       npiv = lui (p+2)
                       call umd2ut (nlu, npiv, kn, lui (p+5),
     >                    lui (p+5+nlu), lux (lui (p)), x (k1), z)
                       do 70 i = k1, k2 
                          r (i) = x (i)
70                     continue 
                       call umd2lt (nlu, npiv, kn, lui (p+5),
     >                    lui (p+5+nlu), lux (lui (p)), r (k1), z)
                    endif 
                    do 90 i = k1, k2 
                       r2 = r (i)
                       do 80 p = offp (i), offp (i+1)-1 
                          x (offi (p)) = x (offi (p)) - offx (p) * r2
80                     continue 
90                  continue 
100              continue 
              endif 

!          -------------------------------------------------------------
           else 
!          -------------------------------------------------------------

!             ----------------------------------------------------------
!             solve uq'x=b:  x = q (u \ b)
!             ----------------------------------------------------------

              if (nblks .eq. 1) then 
                 do 110 i = 1, n 
                    r (i) = b (i)
110              continue 
                 call umd2su (nlu, npiv, n, lui(6), lui(6+nlu), lux,r,z)
              else 
                 do 150 blk = nblks, 1, -1 
                    k1 = blkp (blk)
                    k2 = blkp (blk+1) - 1
                    kn = k2-k1+1
                    do 130 i = k1, k2 
                       x2 = 0
                       do 120 p = offp (i), offp (i+1)-1 
                          x2 = x2 + offx (p) * r (offi (p))
120                    continue 
                       x (i) = x2
130                 continue 
                    if (kn .eq. 1) then 
                       r (k1) = (b (k1) - x (k1)) / lux (lublkp (blk))
                    else 
                       p = lublkp (blk)
                       nlu = lui (p+1)
                       npiv = lui (p+2)
                       call umd2sl (nlu, npiv, kn, lui (p+5),
     >                    lui (p+5+nlu), lux (lui (p)), x (k1), z)
                       do 140 i = k1, k2 
                          r (i) = b (i) - x (i)
140                    continue 
                       call umd2su (nlu, npiv, kn, lui (p+5),
     >                    lui (p+5+nlu), lux (lui (p)), r (k1), z)
                    endif 
150              continue 
              endif 
              do 160 i = 1, n 
                 x (cperm (i)) = r (i)
160           continue 

!          -------------------------------------------------------------
           endif 
!          -------------------------------------------------------------

!-----------------------------------------------------------------------
        else 
!-----------------------------------------------------------------------

           do 450 step = 0, irstep 

!             ----------------------------------------------------------
!             if transa was true in umd2fa or umd2rf, then c = a'.
!             otherwise c = a.  in both cases, the factorization is
!             pcq = lu, and c is stored in column-form in ai,ax,ap if
!             it is preserved.
!             ----------------------------------------------------------

!             ----------------------------------------------------------
              if (.not. transc) then 
!             ----------------------------------------------------------

!                -------------------------------------------------------
!                solve cx=b (step 0):
!                   x = q (u \ l \ pb)
!                and then perform iterative refinement (step > 0):
!                   x = x + q (u \ l \ p (b-cx))
!                -------------------------------------------------------

                 if (step .eq. 0) then 
                    do 170 i = 1, n 
                       r (i) = b (rperm (i))
170                 continue 
                 else 
                    do 180 i = 1, n 
                       z (i) = b (i)
180                 continue 
                    do 200 i = 1, n 
                       x2 = x (i)
                       do 190 p = ap (i), ap (i+1) - 1 
                          z (ai (p)) = z (ai (p)) - ax (p) * x2
190                    continue 
200                 continue 
                    do 210 i = 1, n 
                       r (i) = z (rperm (i))
210                 continue 
                 endif 
                 if (nblks .eq. 1) then 
                    call umd2sl (nlu, npiv, n,lui(6),lui(6+nlu),lux,r,z)
                    call umd2su (nlu, npiv, n,lui(6),lui(6+nlu),lux,r,z)
                 else 
                    do 240 blk = nblks, 1, -1 
                       k1 = blkp (blk)
                       k2 = blkp (blk+1) - 1
                       kn = k2-k1+1
                       do 230 i = k1, k2 
                          r2 = r (i)
                          do 220 p = offp (i), offp (i+1)-1 
                             r2 = r2 - offx (p) * r (offi (p))
220                       continue 
                          r (i) = r2
230                    continue 
                       if (kn .eq. 1) then 
                          r (k1) = r (k1) / lux (lublkp (blk))
                       else 
                          p = lublkp (blk)
                          nlu = lui (p+1)
                          npiv = lui (p+2)
                          call umd2sl (nlu, npiv, kn, lui (p+5),
     >                       lui (p+5+nlu), lux (lui (p)), r (k1), z)
                          call umd2su (nlu, npiv, kn, lui (p+5),
     >                       lui (p+5+nlu), lux (lui (p)), r (k1), z)
                       endif 
240                 continue 
                 endif 
                 if (step .eq. 0) then 
                    do 250 i = 1, n 
                       x (cperm (i)) = r (i)
250                 continue 
                 else 
                    do 260 i = 1, n 
                       x (cperm (i)) = x (cperm (i)) + r (i)
260                 continue 
                 endif 

!             ----------------------------------------------------------
              else 
!             ----------------------------------------------------------

!                -------------------------------------------------------
!                solve c'x=b (step 0):
!                   x = p' (l' \ u' \ q'b)
!                and then perform iterative refinement (step > 0):
!                   x = x + p' (l' \ u' \ q' (b-c'x))
!                -------------------------------------------------------

                 if (step .eq. 0) then 
                    do 270 i = 1, n 
                       r (i) = b (cperm (i))
270                 continue 
                 else 
                    do 280 i = 1, n 
                       z (i) = b (i)
280                 continue 
                    do 300 i = 1, n 
                       z2 = z (i)
                       do 290 p = ap (i), ap (i+1) - 1 
                          z2 = z2 - ax (p) * x (ai (p))
290                    continue 
                       z (i) = z2
300                 continue 
                    do 310 i = 1, n 
                       r (i) = z (cperm (i))
310                 continue 
                 endif 
                 if (nblks .eq. 1) then 
                    call umd2ut (nlu, npiv, n,lui(6),lui(6+nlu),lux,r,z)
                    call umd2lt (nlu, npiv, n,lui(6),lui(6+nlu),lux,r,z)
                 else 
                    do 340 blk = 1, nblks 
                       k1 = blkp (blk)
                       k2 = blkp (blk+1) - 1
                       kn = k2-k1+1
                       if (kn .eq. 1) then 
                          r (k1) = r (k1) / lux (lublkp (blk))
                       else 
                          p = lublkp (blk)
                          nlu = lui (p+1)
                          npiv = lui (p+2)
                          call umd2ut (nlu, npiv, kn, lui (p+5),
     >                       lui (p+5+nlu), lux (lui (p)), r (k1), z)
                          call umd2lt (nlu, npiv, kn, lui (p+5),
     >                       lui (p+5+nlu), lux (lui (p)), r (k1), z)
                       endif 
                       do 330 i = k1, k2 
                          r2 = r (i)
                          do 320 p = offp (i), offp (i+1)-1 
                             r (offi (p)) = r (offi (p)) - offx (p) * r2
320                       continue 
330                    continue 
340                 continue 
                 endif 
                 if (step .eq. 0) then 
                    do 350 i = 1, n 
                       x (rperm (i)) = r (i)
350                 continue 
                 else 
                    do 360 i = 1, n 
                       x (rperm (i)) = x (rperm (i)) + r (i)
360                 continue 
                 endif 

!             ----------------------------------------------------------
              endif 
!             ----------------------------------------------------------

!             ----------------------------------------------------------
!             sparse backward error estimate
!             ----------------------------------------------------------

              if (irstep .gt. 0) then 

!                xnorm = ||x|| maxnorm
                 xnorm = abs (x (idamax (n, x, 1)))

!                r (i) = (b-ax)_i, residual (or a')
!                z (i) = (|a||x|)_i
!                y (i) = ||a_i||, maxnorm of row i of a (or a')
                 do 370 i = 1, n 
                    r (i) = b (i)
                    z (i) = 0
                    y (i) = 0
370              continue 

                 if (.not. transc) then 

!                   ----------------------------------------------------
!                   sparse backward error for cx=b, c stored by column
!                   ----------------------------------------------------

                    do 390 j = 1, n 
                       x2 = x (j)
!fpp$ nodepchk l
                       do 380 p = ap (j), ap (j+1) - 1 
                          i = ai (p)
                          a = ax (p)
                          axx = a * x2
                          r (i) = r (i) -     (axx)
                          z (i) = z (i) + abs (axx)
                          y (i) = y (i) + abs (a)
380                    continue 
390                 continue 

                 else 

!                   ----------------------------------------------------
!                   sparse backward error for c'x=b, c' stored by row
!                   ----------------------------------------------------

                    do 410 i = 1, n 
                       r2 = r (i)
                       z2 = z (i)
                       y2 = y (i)
!fpp$ nodepchk l
                       do 400 p = ap (i), ap (i+1) - 1 
                          j = ai (p)
                          a = ax (p)
                          axx = a * x (j)
                          r2 = r2 -     (axx)
                          z2 = z2 + abs (axx)
                          y2 = y2 + abs (a)
400                    continue 
                       r (i) = r2
                       z (i) = z2
                       y (i) = y2
410                 continue 

                 endif 

!                -------------------------------------------------------
!                save the last iteration in case we need to reinstate it
!                -------------------------------------------------------

                 omlast = omega
                 om1lst = omega1
                 om2lst = omega2

!                -------------------------------------------------------
!                compute sparse backward errors: omega1 and omega2
!                -------------------------------------------------------

                 omega1 = 0
                 omega2 = 0
                 do 420 i = 1, n 
                    tau = (y (i) * xnorm + abs (b (i))) * nctau
                    d1 = z (i) + abs (b (i))
                    if (d1 .gt. tau) then 
                       omega1 = max (omega1, abs (r (i)) / d1)
                    else if (tau .gt. 0) then 
                       d2 = z (i) + y (i) * xnorm
                       omega2 = max (omega2, abs (r (i)) / d2)
                    endif 
420              continue 
                 omega = omega1 + omega2
                 rinfo (7) = omega1
                 rinfo (8) = omega2

!                -------------------------------------------------------
!                stop the iterations if the backward error is small
!                -------------------------------------------------------

                 info (24) = step
                 if (1 + omega .le. 1) then 
!                   further iterative refinement will no longer improve
!                   the solution
                    return
                 endif 

!                -------------------------------------------------------
!                stop if insufficient decrease in omega
!                -------------------------------------------------------

                 if (step .gt. 0 .and. omega .gt. omlast / 2) then 
                    if (omega .gt. omlast) then 
!                      last iteration better than this one, reinstate it
                       do 430 i = 1, n 
                          x (i) = s (i)
                          rinfo (7) = om1lst
                          rinfo (8) = om2lst
430                    continue 
                    endif 
                    info (24) = step - 1
                    return
                 endif 

!                -------------------------------------------------------
!                save current solution in case we need to reinstate
!                -------------------------------------------------------

                 do 440 i = 1, n 
                    s (i) = x (i)
440              continue 

              endif 

450        continue 

!-----------------------------------------------------------------------
        endif 
!-----------------------------------------------------------------------

        return
        end subroutine umd2s2
        subroutine umd2sl (nlu, npiv, n, lup, lui, lux, x, w)
        integer nlu, npiv, n, lup (nlu), lui (*)
        real(fltp)
     >          lux (*), x (n), w (n)
  
!=== umd2sl ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  solves lx = b, where l is the lower triangular factor of a matrix
!  (if btf not used) or a single diagonal block (if btf is used).
!  b is overwritten with the solution x.

!=======================================================================
!  input:
!=======================================================================
!
!       nlu:            number of lu arrowheads in the lu factors
!       npiv:           number of pivots found (normally n)
!       n:              order of matrix
!       lup (1..nlu):   pointer to lu arrowheads in lui
!       lui ( ... ):    integer values of lu arrowheads
!       lux ( ... ):    real values of lu arroheads
!       x (1..n):       the right-hand-side

!=======================================================================
!  workspace:
!=======================================================================
!
!       w (1..n)

!=======================================================================
!  output:
!=======================================================================
!
!       x (1..n):       the solution to lx=b

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   umd2s2
!       subroutines called:     dtrsv, dgemv

!=======================================================================
!  local scalars:
!=======================================================================

        integer i, k, s, luip, luxp, luk, ludegc, lucp, lxp, row
        real(fltp)
     >          one

!  s:       an element, or lu arrowhead
!  k:       kth pivot
!  i:       ith row in l2 array in element s
!  luip:    integer part of s is in lui (luip...)
!  luxp:    real part of s is in lux (luxp...)
!  luk:     number of pivots in s
!  ludegc:  column degree of non-pivotal part of s
!  lucp:    pattern of column of s in lui (lucp...lucp+ludegc-1)
!  lxp:     the ludegc-by-luk l2 block of s is in lux (lxp...)
!  row:     row index

!=======================================================================
!  executable statments:
!=======================================================================

        one = 1
        k = 0
        do 40 s = 1, nlu 

!          -------------------------------------------------------------
!          get the s-th lu arrowhead (s = 1..nlu, in pivotal order)
!          -------------------------------------------------------------

           luip   = lup (s)
           luxp   = lui (luip)
           luk    = lui (luip+1)
           ludegc = lui (luip+3)
           lucp   = (luip + 7)
           lxp    = luxp + luk

           if (luk .eq. 1) then 

!             ----------------------------------------------------------
!             only one pivot, stride-1 sparse saxpy
!             ----------------------------------------------------------

              k = k + 1
!             l (k,k) is one
!fpp$ nodepchk l
              do 10 i = 1, ludegc 
                 row = lui (lucp+i-1)
!                col: k, l (row,col): lux (lxp+i-1)
                 x (row) = x (row) - lux (lxp+i-1) * x (k)
10            continue 

           else 

!             ----------------------------------------------------------
!             more than one pivot
!             ----------------------------------------------------------

              call dtrsv ('l', 'n', 'u', luk,
     >           lux (luxp), ludegc + luk, x (k+1), 1)
              do 20 i = 1, ludegc 
                 row = lui (lucp+i-1)
                 w (i) = x (row)
20            continue 
              call dgemv ('n', ludegc, luk, -one,
     >           lux (lxp), ludegc + luk, x (k+1), 1, one, w, 1)
              do 30 i = 1, ludegc 
                 row = lui (lucp+i-1)
                 x (row) = w (i)
30            continue 
              k = k + luk
           endif 
40      continue 
        return
        end subroutine umd2sl
        subroutine umd2so (n, job, transc, lvalue, lindex, value,
     >          index, keep, b, x, w, cntl, icntl, info, rinfo)
        integer n, job, lvalue, lindex, index (lindex), keep (20),
     >          icntl (20), info (40)
        real(fltp)
     >          value (lvalue), b (n), x (n), w (*)
        real(fltp)
     >          cntl (10), rinfo (20)
        logical transc
  
!=== umd2so ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  hsl compatibility:  this routine has the same arguments as ma38c/cd. 

!=======================================================================
!  user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  given lu factors computed by umd2fa or umd2rf, and the
!  right-hand-side, b, solve a linear system for the solution x.
!
!  this routine handles all permutations, so that b and x are in terms
!  of the original order of the matrix, a, and not in terms of the
!  permuted matrix.
!
!  if iterative refinement is done, then the residual is returned in w,
!  and the sparse backward error estimates are returned in
!  rinfo (7) and rinfo (8).  the computed solution x is the
!  exact solution of the equation (a + da)x = (b + db), where
!    da (i,j)  <= max (rinfo (7), rinfo (8)) * abs (a(i,j))
!  and
!    db (i) <= max (rinfo (7) * abs (b (i)),
!                   rinfo (8) * maxnorm (a) * maxnorm (x computed))
!  note that da has the same sparsity pattern as a.
!  the method used to compute the sparse backward error estimate is
!  described in m. arioli, j. w. demmel, and i. s. duff, "solving
!  sparse linear systems with sparse backward error," siam j. matrix
!  analysis and applications, vol 10, 1989, pp. 165-190.

!=======================================================================
!  arguments:
!=======================================================================

!           ------------------------------------------------------------
!  n:       an integer variable.
!           must be set by caller on input (not modified).
!           must be the same as passed to umd2fa or umd2rf.

!           ------------------------------------------------------------
!  job:     an integer variable.
!           must be set by caller on input (not modified).
!           what system to solve (see the transc argument below).
!           iterative refinement is only performed if job = 0,
!           icntl (8) > 0, and only if the original matrix was
!           preserved (job = 1 in umd2fa or umd2rf).

!           ------------------------------------------------------------
!  transc:  a logical variable.
!           must be set by caller on input (not modified).
!           solve with l and u factors or with l' and u', where
!           transa was passed to umd2fa or umd2rf.
!
!           if transa = false, then paq = lu was performed,
!           and the following systems are solved:
!
!                               transc = false          transc = true
!                               ----------------        ----------------
!                  job = 0      solve ax = b            solve a'x = b
!                  job = 1      solve p'lx = b          solve l'px = b
!                  job = 2      solve uq'x = b          solve qu'x = b
!
!           if transa = true, then a was transformed prior to lu
!           factorization, and p(a')q = lu
!
!                               transc = false          transc = true
!                               ----------------        ----------------
!                  job = 0      solve a'x = b           solve ax = b
!                  job = 1      solve p'lx = b          solve l'px = b
!                  job = 2      solve uq'x = b          solve qu'x = b
!
!           other values of job are treated as zero.  iterative
!           refinement can be done only when solving ax=b or a'x=b.
!
!           the comments below use matlab notation, where
!           x = l \ b means x = (l^(-1)) * b, premultiplication by
!           the inverse of l.

!           ------------------------------------------------------------
!  lvalue:  an integer variable.
!           must be set by caller on input (not modified).
!           the size of value.

!           ------------------------------------------------------------
!  lindex:  an integer variable.
!           must be set by caller on input (not modified).
!           the size of index.

!           ------------------------------------------------------------
!  value:   a real(fltp) array of size lvalue.
!           must be set by caller on input (normally from last call to
!           umd2fa or umd2rf) (not modified).
!           the lu factors, in value (keep (1) ... keep (2)).
!           the entries in value (1 ... keep (1) - 1) and in
!           value (keep (2) + 1 ... lvalue) are not accessed.

!           ------------------------------------------------------------
!  index:   an integer array of size lindex.
!           must be set by caller on input (normally from last call to
!           umd2fa or umd2rf) (not modified).
!           the lu factors, in index (keep (3) ... keep (5)).
!           the entries in index (1 ... keep (3) - 1) and in
!           index (keep (5) + 1 ... lindex) are not accessed.

!           ------------------------------------------------------------
!  keep:    an integer array of size 20.
!
!           keep (1..5): must be set by caller on input (normally from
!               last call to umd2fa or umd2rf) (not modified).
!               layout of the lu factors in value and index

!           ------------------------------------------------------------
!  b:       a real(fltp) array of size n.
!           must be set by caller on input (not modified).
!           the right hand side, b, of the system to solve.

!           ------------------------------------------------------------
!  w:       a real(fltp) array of size 2*n or 4*n.
!           need not be set by caller on input.  modified on output.
!           workspace of size w (1..2*n) if icntl (8) = 0, which
!           is the default value.  if iterative refinement is
!           performed, and w must be of size w (1..4*n) and the
!           residual b-ax (or b-a'x) is returned in w (1..n).

!           ------------------------------------------------------------
!  x:       a real(fltp) array of size n.
!           need not be set by caller on input.  modified on output.
!           the solution, x, of the system that was solved.  valid only
!           if info (1) is greater than or equal to 0.

!           ------------------------------------------------------------
!  cntl:    a real(fltp) array of size 10.
!           must be set by caller on input (not modified).
!           real control parameters, see umd21i for a description,
!           which sets the defaults.  

!           ------------------------------------------------------------
!  icntl:   an integer array of size 20.
!           must be set by caller on input (not modified).
!           integer control parameters, see umd21i for a description,
!           which sets the defaults.  in particular, icntl (8) is
!           the maximum number of steps of iterative refinement to be
!           performed.

!           ------------------------------------------------------------
!  info:    an integer array of size 40.
!           need not be set by caller on input.  modified on output.
!           it contains information about the execution of umd2so.
!
!           info (1) is the error flag.  if info (1) is -7, then
!           the lu factors are uncomputed, or have been corrupted since
!           the last call to umd2fa or umd2rf.  no system is solved,
!           and x (1..n) is not valid on output.  if info (1) is 8,
!           then iterative refinement was requested but cannot be done.
!           to perform iterative refinement, the original matrix must be
!           preserved (job = 1 in umd2fa or umd2rf) and ax=b or a'x=b
!           must be solved (job = 0 in umd2so).  info (24) is the
!           steps of iterative refinement actually taken.

!           ------------------------------------------------------------
!  rinfo:   a real(fltp) array of size 20.
!           need not be set by caller on input.  modified on output.
!           it contains information about the execution of umd2so.
!
!           if iterative refinement was performed then
!           rinfo (7) is the sparse error estimate, omega1, and
!           rinfo (8) is the sparse error estimate, omega2.

!=======================================================================
!  to be preserved between calls to umd2fa, umd2rf, umd2so:
!=======================================================================
!
!  the following must be unchanged since the call to umd2fa or umd2rf
!  that computed the lu factors:
!
!       n
!       value (keep (1) ... keep (2))
!       index (keep (3) ... keep (5))
!       keep (1 ... 20)

!## end of user documentation for umd2so ###############################

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   user routine
!       subroutines called:     umd2er, umd2p1, umd2s2
!       functions called:       max
        intrinsic max

!=======================================================================
!  local scalars:
!=======================================================================

        integer nblks, offip, offxp, n1, nz, ne, offpp, blkpp, lublpp,
     >          app, an, anz, on, lui1, lui2, lux1, lux2, aip, axp,
     >          cpermp, rpermp, nzoff, irstep, yp, ly, lw, sp, ip1, ip2,
     >          xp1, luir1, io, prl
        logical presrv, badlu

!  printing control:
!  -----------------
!  io:      i/o unit for diagnostic messages
!  prl:     printing level
!
!  location and status of lu factors:
!  ----------------------------------
!  lui1:    integer part of lu factors start in index (lui1...)
!  luir1:   index (luir1 ... lui2) is needed for a call to umd2rf
!  lui2:    integer part of lu factors end in index (..lui2)
!  lux1:    real part of lu factors start in value (lux1...)
!  lux2:    real part of lu factors end in value (...lux1)
!  ip1:     pointer into leading part of lu factors in index
!  ip2:     pointer into trailing part of lu factors in index
!  xp1:     pointer into leading part of lu factors in value
!  badlu:   if true, then lu factors are corrupted or not computed
!
!  arrays and scalars allocated in lu factors (in order):
!  ------------------------------------------------------
!  app:     ap (1..n+1) array located in index (app...app+n)
!  axp:     ax (1..nz) array located in value (axp...axp+nz-1)
!  aip:     ai (1..nz) array located in index (aip...aip+nz-1)
!  an:      n if a is preserved, 1 otherwise
!  anz:     nz if a is preserved, 1 otherwise
!  offip:   offi (1..nzoff) array loc. in index (offip...offip+nzoff-1)
!  offxp:   offx (1..nzoff) array loc. in value (offxp...offxp+nzoff-1)
!  ...      lu factors of each diagonal block located here
!  lublpp:  lublkp (1..nblks) array in index (lublpp..lublpp+nblks-1)
!  blkpp:   blkp (1..nblks+1) array loc. in index (blkpp...blkpp+nblks)
!  offpp:   offp (1..n+1) array located in index (offpp...offpp+n)
!  on:      size of offp (1..n+1):  n if nblks > 1, 1 otherwise
!  cpermp:  cperm (1..n) array located in index (cpermp...cpermp+n-1)
!  rpermp:  rperm (1..n) array located in index (rpermp...rpermp+n-1)
!  ...      seven scalars in index (lui2-6...lui2):
!  nzoff:   number of entries in off-diagonal part
!  nblks:   number of diagonal blocks
!  presrv:  true if original matrix was preserved when factorized
!  nz:      entries in a
!  n1:      n argument in umd2fa or umd2rf when matrix factorized
!  ne:      ne argument in umd2fa or umd2rf when matrix factorized
!
!  arrays allocated from w work array:
!  -----------------------------------
!  lw:      size of w
!  yp:      y (1..n) located in w (yp...yp+n-1) 
!  sp:      s (1..n) located in w (sp...sp+n-1) 
!  ly:      size of y and s
!
!  other:
!  ------
!  irstep:  maximum number of iterative refinement steps to take

!=======================================================================
!  executable statements:
!=======================================================================

        io = icntl (2)
        prl = icntl (3)

!-----------------------------------------------------------------------
!  clear informational output
!-----------------------------------------------------------------------

        info (1) = 0
        info (24) = 0
        rinfo (7) = 0
        rinfo (8) = 0

!-----------------------------------------------------------------------
!  print input arguments if requested
!-----------------------------------------------------------------------

        irstep = max (0, icntl (8))
        if (irstep .eq. 0) then 
           lw = 2*n
        else 
           lw = 4*n
        endif 
        call umd2p1 (3, 1,
     >          n, ne, job, transc, lvalue, lindex, value,
     >          index, keep, cntl, icntl, info, rinfo,
     >          b, x, n, w, lw)

!-----------------------------------------------------------------------
!  get pointers to lu factors
!-----------------------------------------------------------------------

        lux1 = keep (1)
        lux2 = keep (2)
        lui1 = keep (3)
        luir1 = keep (4)
        lui2 = keep (5)
        badlu = luir1 .le. 0 .or. lui2-6 .lt. luir1
     >     .or. lui2 .gt. lindex
     >     .or. lux1 .le. 0 .or. lux1 .gt. lux2 .or. lux2 .gt. lvalue
     >     .or. lui1 .le. 0 .or. luir1 .lt. lui1 .or. luir1 .gt. lui2
        if (badlu) then 
           call umd2er (3, icntl, info, -7, 0)
!          error return, lu factors are corrupted:
           go to 9000
        endif 

!-----------------------------------------------------------------------
!  get seven scalars (transa, nzoff, nblks, presrv, nz, n, ne) from lu
!-----------------------------------------------------------------------

        ne = index (lui2)
        n1 = index (lui2-1)
        nz = index (lui2-2)
        presrv = index (lui2-3) .ne. 0
        nblks = index (lui2-4)
        nzoff = index (lui2-5)
!       transa = index (lui2-6) .ne. 0, we don't actually need this here

!-----------------------------------------------------------------------
!  get pointers to permutation vectors
!-----------------------------------------------------------------------

        rpermp = (lui2-6) - n
        cpermp = rpermp - n
        ip2 = cpermp - 1
        xp1 = lux1
        ip1 = lui1

!-----------------------------------------------------------------------
!  get pointers to preserved column-oriented copy of input matrix
!-----------------------------------------------------------------------

        if (presrv) then 

!          -------------------------------------------------------------
!          original matrix preserved in index (lui1..lui1+nz+n) and
!          value (lux1..lux1+nz-1)
!          -------------------------------------------------------------

           app = ip1
           aip = app + n+1
           ip1 = aip + nz
           axp = xp1
           xp1 = axp + nz
           an = n
           anz = nz

        else 

!          -------------------------------------------------------------
!          original matrix not preserved, pass dummy argument to umd2s2
!          -------------------------------------------------------------

           app = 1
           aip = 1
           axp = 1
           an = 1
           anz = 1

        endif 

!-----------------------------------------------------------------------
!  get pointers to block-triangular information, if btf was used
!-----------------------------------------------------------------------

        if (nblks .gt. 1) then 

!          -------------------------------------------------------------
!          get pointers to off-diagonal nonzeros, and btf arrays
!          -------------------------------------------------------------

           offip = ip1
           ip1 = ip1 + nzoff
           offxp = xp1
           xp1 = xp1 + nzoff
           offpp = cpermp - (n+1)
           blkpp = offpp - (nblks+1)
           lublpp = blkpp - (nblks)
           ip2 = lublpp - 1
           on = n

        else 

!          -------------------------------------------------------------
!          matrix was factorized as a single block, pass dummy arg.
!          -------------------------------------------------------------

           offip = 1
           offxp = 1
           offpp = 1
           blkpp = 1
           lublpp = 1
           on = 1

        endif 

        badlu = n .ne. n1 .or. nz .le. 0 .or. luir1 .gt. ip2 .or.
     >     nblks .le. 0 .or. nblks .gt. n .or.
     >     xp1 .gt. lux2 .or. nzoff .lt. 0 .or. ip1 .ne. luir1
        if (badlu) then 
           call umd2er (3, icntl, info, -7, 0)
!          error return, lu factors are corrupted:
           go to 9000
        endif 

!-----------------------------------------------------------------------
!  get the number of steps of iterative refinement
!-----------------------------------------------------------------------

        if (irstep .gt. 0 .and. .not. presrv) then 
!          original matrix not preserved (umd2fa/umd2rf job .ne. 1)
           call umd2er (3, icntl, info, 8, 0)
           irstep = 0
        endif 
        if (irstep .gt. 0 .and. (job .eq. 1 .or. job .eq. 2)) then 
!          iterative refinement for ax=b and a'x=b only (job = 0)
           call umd2er (3, icntl, info, 8, 1)
           irstep = 0
        endif 
        if (irstep .eq. 0) then 
!          pass a dummy argument as y, which is not accessed in umd2s2
           yp = 1
           ly = 1
           sp = 1
           lw = 2*n
        else 
!          pass w (yp ... yp+n-1) as y (1..n) to umd2s2
           yp = 2*n+1
           ly = n
           sp = 3*n+1
           lw = 4*n
        endif 

!-----------------------------------------------------------------------
!  solve; optional iterative refinement and sparse backward error
!-----------------------------------------------------------------------

        call umd2s2 (n, job, transc, lux2-xp1+1, value (xp1),
     >     ip2-luir1+1, index (luir1), b, x,
     >     w, w (n+1), ly, w (yp), w (sp),
     >     cntl, icntl, info, rinfo, index (cpermp), index (rpermp),
     >     presrv, an, anz, index (app), index (aip), value (axp),
     >     on, max (1, nzoff), index (offpp), index (offip),
     >     value (offxp), nblks, index (lublpp), index (blkpp), irstep)

!-----------------------------------------------------------------------
!  print output arguments if requested
!-----------------------------------------------------------------------

!       error return label:
9000    continue

        call umd2p1 (3, 2,
     >          n, ne, job, transc, lvalue, lindex, value,
     >          index, keep, cntl, icntl, info, rinfo,
     >          b, x, n, w, lw)
        return
        end subroutine umd2so
        subroutine umd2su (nlu, npiv, n, lup, lui, lux, x, w)
        integer nlu, npiv, n, lup (nlu), lui (*)
        real(fltp)
     >          lux (*), x (n), w (n)
  
!=== umd2su ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  solves ux = b, where u is the upper triangular factor of a matrix
!  (if btf not used) or a single diagonal block (if btf is used).
!  b is overwritten with the solution x.

!=======================================================================
!  input:
!=======================================================================
!
!       nlu:            number of lu arrowheads in the lu factors
!       npiv:           number of pivots found (normally n)
!       n:              order of matrix
!       lup (1..nlu):   pointer to lu arrowheads in lui
!       lui ( ... ):    integer values of lu arrowheads
!       lux ( ... ):    real values of lu arroheads
!       x (1..n):       the right-hand-side

!=======================================================================
!  workspace:
!=======================================================================
!
!       w (1..n)

!=======================================================================
!  output:
!=======================================================================
!
!       x (1..n):       the solution to ux=b

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   umd2s2
!       subroutines called:     dtrsv, dgemv

!=======================================================================
!  local scalars:
!=======================================================================

        integer j, k, s, luip, luxp, luk, ludegr, ludegc, lurp, uxp,
     >          lucp, col
        real(fltp)
     >          one

!  s:       an element, or lu arrowhead
!  k:       kth pivot
!  j:       jth column in u2 array in element s
!  luip:    s is in lui (luip...)
!  luxp:    real part of s is in lux (luxp...)
!  luk:     number of pivots in s
!  ludegc:  column degree of non-pivotal part of s
!  ludegr:  row degree of non-pivotal part of s
!  lucp:    pattern of column of s in lui (lucp...lucp+ludegc-1)
!  lurp:    pattern of row of s in lui (lurp...lurp+ludegr-1)
!  uxp:     the luk-by-ludegr u2 block of s is in lux (uxp...)
!  col:     column index

!=======================================================================
!  executable statments:
!=======================================================================

        one = 1
        k = npiv
        do 30 s = nlu, 1, -1 

!          -------------------------------------------------------------
!          get s-th lu arrowhead (s = nlu..1, in reverse pivotal order)
!          -------------------------------------------------------------

           luip   = lup (s)
           luxp   = lui (luip)
           luk    = lui (luip+1)
           ludegr = lui (luip+2)
           ludegc = lui (luip+3)
           lucp   = (luip + 7)
           lurp   = lucp + ludegc
           uxp    = luxp + luk * (ludegc + luk)

           if (luk .eq. 1) then 

!             ----------------------------------------------------------
!             only one pivot, stride-1 sparse dot product
!             ----------------------------------------------------------

!fpp$ nodepchk l
              do 10 j = 1, ludegr 
                 col = lui (lurp+j-1)
!                row: k, u (row,col): lux (uxp+j-1)
                 x (k) = x (k) - lux (uxp+j-1) * x (col)
10            continue 
!             divide by pivot, u (k,k): lux (luxp)
              x (k) = x (k) / lux (luxp)
              k = k - 1

           else 

!             ----------------------------------------------------------
!             more than one pivot
!             ----------------------------------------------------------

              k = k - luk
              do 20 j = 1, ludegr 
                 col = lui (lurp+j-1)
                 w (j) = x (col)
20            continue 
              call dgemv ('n', luk, ludegr, -one,
     >           lux (uxp), luk, w, 1, one, x (k+1), 1)
              call dtrsv ('u', 'n', 'n', luk,
     >           lux (luxp), ludegc + luk, x (k+1), 1)

           endif 

30      continue 
        return
        end subroutine umd2su
        subroutine umd2ut (nlu, npiv, n, lup, lui, lux, x, w)
        integer nlu, npiv, n, lup (nlu), lui (*)
        real(fltp)
     >          lux (*), x (n), w (n)
  
!=== umd2ut ============================================================
!
!  unsymmetric-pattern multifrontal package (umfpack). version 2.2d
!  copyright (c) 1997, timothy a. davis, university of florida, usa.
!  all rights reserved.
!  joint work with iain s. duff, rutherford appleton laboratory, uk.
!  july 7, 1997. work supported by the national science foundation
!  (dms-9223088 and dms-9504974) and the state of florida; and by cray
!  research inc. through the allocation of supercomputing resources.

!***********************************************************************
!* notice:  "the umfpack package may be used solely for educational,   *
!* research, and benchmarking purposes by non-profit organizations and *
!* the u.s. government.  commercial and other organizations may make   *
!* use of umfpack solely for benchmarking purposes only.  umfpack may  *
!* be modified by or on behalf of the user for such use but at no time *
!* shall umfpack or any such modified version of umfpack become the    *
!* property of the user.  umfpack is provided without warranty of any  *
!* kind, either expressed or implied.  neither the authors nor their   *
!* employers shall be liable for any direct or consequential loss or   *
!* damage whatsoever arising out of the use or misuse of umfpack by    *
!* the user.  umfpack must not be sold.  you may make copies of        *
!* umfpack, but this notice and the copyright notice must appear in    *
!* all copies.  any other use of umfpack requires written permission.  *
!* your use of umfpack is an implicit agreement to these conditions."  *
!*                                                                     *
!* the ma38 package in release 12 of the harwell subroutine library    *
!* (hsl) has equivalent functionality (and identical calling interface)*
!* as umfpack (the hsl has single and real(fltp) versions only,  *
!* however).  it is available for commercial use.   technical reports, *
!* information on hsl, and matrices are available via the world wide   *
!* web at http://www.cis.rl.ac.uk/struct/arcd/num.html, or by          *
!* anonymous ftp at seamus.cc.rl.ac.uk/pub.  also contact dr. scott    *
!* roberts, harwell subroutine library, b 552, aea technology,         *
!* harwell, didcot, oxon ox11 0ra, england.                            *
!* telephone (44) 1235 434988, fax (44) 1235 434136                    *
!* email scott.roberts@aeat.co.uk, who will provide details of price   *
!* and conditions of use.                                              *
!***********************************************************************

!=======================================================================
!  not user-callable.

!=======================================================================
!  description:
!=======================================================================
!
!  solves u'x = b, where u is the upper triangular factor of a matrix
!  (if btf not used) or a single diagonal block (if btf is used).
!  b is overwritten with the solution x.

!=======================================================================
!  input:
!=======================================================================
!
!       nlu:            number of lu arrowheads in the lu factors
!       npiv:           number of pivots found (normally n)
!       n:              order of matrix
!       lup (1..nlu):   pointer to lu arrowheads in lui
!       lui ( ... ):    integer values of lu arrowheads
!       lux ( ... ):    real values of lu arroheads
!       x (1..n):       the right-hand-side

!=======================================================================
!  workspace:
!=======================================================================
!
!       w (1..n)

!=======================================================================
!  output:
!=======================================================================
!
!       x (1..n):       the solution to u'x=b

!=======================================================================
!  subroutines and functions called / called by:
!=======================================================================
!
!       called by subroutine:   umd2s2
!       subroutines called:     dtrsv, dgemv

!=======================================================================
!  local scalars:
!=======================================================================

        integer i, k, s, luip, luxp, luk, ludegr, ludegc, lurp, uxp,
     >          lucp, row
        real(fltp)
     >          one

!  s:       an element, or lu arrowhead
!  k:       kth pivot
!  i:       ith column in u2' array in element s
!  luip:    s is in lui (luip...)
!  luxp:    real part of s is in lux (luxp...)
!  luk:     number of pivots in s
!  ludegc:  column degree of non-pivotal part of s
!  ludegr:  row degree of non-pivotal part of s
!  lucp:    pattern of column of s in lui (lucp...lucp+ludegc-1)
!  lurp:    pattern of row of s in lui (lurp...lurp+ludegr-1)
!  uxp:     the luk-by-ludegr u2 block of s is in lux (uxp...)
!  row:     row index

!=======================================================================
!  executable statments:
!=======================================================================

        one = 1
        k = 0
        do 40 s = 1, nlu 

!          -------------------------------------------------------------
!          get s-th lu arrowhead (s = 1..nlu, in pivotal order)
!          -------------------------------------------------------------

           luip   = lup (s)
           luxp   = lui (luip)
           luk    = lui (luip+1)
           ludegr = lui (luip+2)
           ludegc = lui (luip+3)
           lucp   = (luip + 7)
           lurp   = lucp + ludegc

           if (luk .eq. 1) then 

!             ----------------------------------------------------------
!             only one pivot, stride-1 sparse saxpy
!             ----------------------------------------------------------

              k = k + 1
!             divide by pivot, u (k,k): lux (luxp)
              x (k) = x (k) / lux (luxp)
              uxp = luxp + ludegc + 1
!fpp$ nodepchk l
              do 10 i = 1, ludegr 
                 row = lui (lurp+i-1)
!                col: k, u (row,col): lux (uxp+i-1)
                 x (row) = x (row) - lux (uxp+i-1) * x (k)
10            continue 

           else 

!             ----------------------------------------------------------
!             more than one pivot
!             ----------------------------------------------------------

              uxp = luxp + luk * (ludegc + luk)
              call dtrsv ('u', 't', 'n', luk,
     >           lux (luxp), ludegc + luk, x (k+1), 1)
              do 20 i = 1, ludegr 
                 row = lui (lurp+i-1)
                 w (i) = x (row)
20            continue 
              call dgemv ('t', luk, ludegr, -one,
     >           lux (uxp), luk, x (k+1), 1, one, w, 1)
              do 30 i = 1, ludegr 
                 row = lui (lurp+i-1)
                 x (row) = w (i)
30            continue 
              k = k + luk

           endif 

40      continue 
        return
        end subroutine umd2ut
!..
!..
!..
!..
!..
      subroutine mc13e_hsl(n,icn,licn,ip,lenr,arp,ib,num,lowl,numb,prev)
      !implicit none
      !save
!..
!.. arp(i) is one less than the number of unsearched edges leaving
!..        node i.  at the end of the algorithm it is set to a
!..        permutation which puts the matrix in block lower
!..        triangular form.
!..ib(i)   is the position in the ordering of the start of the ith
!..        block.  ib(n+1-i) holds the node number of the ith node
!..        on the stack.
!..lowl(i) is the smallest stack position of any node to which a path
!..        from node i has been found.  it is set to n+1 when node i
!..        is removed from the stack.
!..numb(i) is the position of node i in the stack if it is on
!..        it, is the permuted order of node i for those nodes
!..        whose final position has been found and is otherwise zero.
!..prev(i) is the node at the end of the path when node i was
!..        placed on the stack.
!..
!..declare
      integer n,licn,stp,dummy,ip(n),icn(licn),lenr(n),arp(n),ib(n),
     1        lowl(n),numb(n),prev(n),icnt,num,nnm1,j,iv,ist,i1,i2,
     2        ii,iw,ist1,lcnt,i,isn,k



      return  ! skip this since it is not thread safe.  BP 7/15/2014
      
      
      
      



!..
!..
!..icnt is number of nodes whose positions in final ordering have been found.
!..num is the number of blocks that have been found.
      icnt = 0
      num  = 0
      nnm1 = n + n-1
!..
!..initialization of arrays.
      do 20 j=1,n
       numb(j) = 0
       arp(j)  = lenr(j)-1
20    continue
!..
!..look for a starting node
!..ist is the number of nodes on the stack ... it is the stack pointer.
      do 120 isn=1,n
       if (numb(isn) .ne. 0) go to 120
       iv  = isn
       ist = 1
!..
!..put node iv at beginning of stack.
       lowl(iv) = 1
       numb(iv) = 1
       ib(n)    = iv
!..
!..the body of this loop puts a new node on the stack or backtracks.
       do 110 dummy=1,nnm1
        i1 = arp(iv)
!..
!..have all edges leaving node iv been searched.
        if (i1 .lt. 0) go to 60
        i2 = ip(iv) + lenr(iv) - 1
        i1 = i2 - i1
!..
!..look at edges leaving node iv until one enters a new node or all edges are 
!..exhausted.
        do 50 ii=i1,i2
         iw = icn(ii)
         if (numb(iw) .eq. 0) go to 100
         lowl(iv) = min0(lowl(iv),lowl(iw))
50      continue
!..
!..there are no more edges leaving node iv.
        arp(iv) = -1
!..
!..is node iv the root of a block.
60      if (lowl(iv) .lt. numb(iv)) go to 90
!..
!..order nodes in a block.
        num  = num + 1
        ist1 = n + 1 - ist
        lcnt = icnt + 1
!..
!..peel block off the top of the stack starting at the top and working down to 
!..the root of the block.
        do 70 stp=ist1,n
         iw       = ib(stp)
         lowl(iw) = n + 1
         icnt     = icnt + 1
         numb(iw) = icnt
         if (iw .eq. iv) go to 80
70      continue
80      ist     = n - stp
        ib(num) = lcnt
!..
!..are there any nodes left on the stack.
        if (ist .ne. 0) go to 90
!..
!..have all the nodes been ordered.
        if (icnt. lt. n) go to 120
        go to 130
!..
!..backtrack to previous node on path.
90      iw = iv
        iv = prev(iv)
!..
!..update value of lowl(iv) if necessary.
        lowl(iv) = min0(lowl(iv),lowl(iw))
        go to 110
!..
!..put new node on the stack.
100     arp(iv)  = i2 - ii - 1
        prev(iw) = iv
        iv       = iw
        ist      = ist+1
        lowl(iv) = ist
        numb(iv) = ist
        k        = n+1-ist
        ib(k)    = iv
110    continue
120   continue
!..
!..put permutation in the required form.
130   do 140 i=1,n
       ii      = numb(i)
       arp(ii) = i
140   continue
      return
      end subroutine mc13e_hsl
!..
!..
!..
!..
!..
!..
      subroutine mc21b_hsl(n,icn,licn,ip,lenr,iperm,numnz,pr,arp,cv,out)
      !implicit none
      !save
!..
!..does a row permutation to make the diagonal zero free
!..
!..pr(i) is the previous row to i in the depth first search.
!..     it is used as a work array in the sorting algorithm.
!..     elements (iperm(i),i) i=1, ... n  are non-zero at the end of the
!..     algorithm unless n assignments have not been made.  in which case
!..(iperm(i),i) will be zero for n-numnz entries.
!..cv(i)  is the most recent row extension at which column i was visited.
!..arp(i) is one less than the number of non-zeros in row i
!..       which have not been scanned when looking for a cheap assignment.
!..out(i) is one less than the number of non-zeros in row i
!..       which have not been scanned during one pass through the main loop.
!..
!..declare
      integer n,licn,ip(n),icn(licn),lenr(n),iperm(n),pr(n),cv(n),
     1        arp(n),out(n),i,jord,j,in1,in2,k,ii,ioutk,j1,kk,numnz
!..

      return  ! skip this since it is not thread safe.  BP 7/15/2014
      
      
      
      
      
!..initialization of arrays.
      do 10 i=1,n
       arp(i)   = lenr(i)-1
       cv(i)    = 0
       iperm(i) = 0
10    continue
      numnz=0
!..
!..main loop. each pass round this loop either results in a new assignment
!..or gives a row with no assignment.
      do 130 jord=1,n
       j     = jord
       pr(j) = -1
       do 100 k=1,jord
!..
!..look for a cheap assignment
        in1 = arp(j)
        if (in1 .lt. 0) go to 60
        in2 = ip(j) + lenr(j)-1
        in1 = in2 - in1
        do 50 ii=in1,in2
         i = icn(ii)
         if (iperm(i) .eq. 0) go to 110
50      continue
!..
!..no cheap assignment in row.
!..begin looking for assignment chain starting with row j.
        arp(j) = -1
60      out(j) = lenr(j)-1
!..
!..c inner loop.  extends chain by one or backtracks.
        do 90 kk=1,jord
         in1 = out(j)
         if (in1 .lt. 0) go to 80
         in2 = ip(j)+lenr(j)-1
         in1 = in2 - in1
!..
!..forward scan.
         do 70 ii=in1,in2
          i = icn(ii)
          if (cv(i) .eq. jord) go to 70
!..
!..column i has not yet been accessed during this pass.
          j1      = j
          j       = iperm(i)
          cv(i)   = jord
          pr(j)   = j1
          out(j1) = in2-ii-1
          go to 100
70       continue
!..
!..backtracking step.
80       j = pr(j)
         if (j .eq. -1) go to 130
90      continue
100    continue
!..
!..new assignment is made.
110    iperm(i) = j
       arp(j)   = in2 - ii - 1
       numnz    = numnz + 1
       do 120 k=1,jord
        j = pr(j)
        if (j .eq. -1) go to 130
        ii       = ip(j) + lenr(j) - out(j) - 2
        i        = icn(ii)
        iperm(i) = j
120    continue
130   continue
!..
!..if matrix is structurally singular, we now complete the permutation iperm.
      if (numnz .eq. n) return
      do 140 i=1,n
       arp(i) = 0
140   continue
      k = 0
      do 160 i=1,n
       if (iperm(i) .ne. 0) go to 150
       k      = k + 1
       out(k) = i
       go to 160
150    j      = iperm(i)
       arp(j) = i
160   continue
      k = 0
      do 170 i=1,n
       if (arp(i) .ne. 0) go to 170
       k            = k+1
       ioutk        = out(k)
       iperm(ioutk) = i
170   continue
      return
      end subroutine mc21b_hsl

!#ifdef DBLE
      end module mod_umf_dble
!#else
      end module mod_umf_quad
!#endif
