/* ---------------------------------------------------------------------
 *
 * -- Automatically Tuned Linear Algebra Software (ATLAS)
 *    (C) Copyright 1999 All Rights Reserved
 *
 * -- ATLAS routine -- Version 2.0 -- December 25, 1999
 *
 * -- Suggestions,  comments,  bugs reports should be sent to the follo-
 *    wing e-mail address: atlas@cs.utk.edu
 *
 *  Author         : Antoine P. Petitet
 * University of Tennessee - Innovative Computing Laboratory
 * Knoxville TN, 37996-1301, USA.
 *
 * ---------------------------------------------------------------------
 *
 * -- Copyright notice and Licensing terms:
 *
 * Redistribution  and  use in  source and binary forms, with or without
 * modification, are  permitted provided  that the following  conditions
 * are met:
 *
 * 1) Redistributions  of  source  code  must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2) Redistributions in binary form must reproduce  the above copyright
 *    notice,  this list of  conditions and the  following disclaimer in
 *    the documentation and/or other materials provided with the distri-
 *    bution.
 * 3) All advertising materials mentioning features or use of this soft-
 *    ware must display the folowing acknowledgement:
 *    This product includes software developed by the ATLAS group of the
 *    University of Tennesee, Knoxville and its contributors.
 * 4) The names of the  University of Tennessee,  Knoxville,  the  ATLAS
 *    group, or the names of its contributors may not be used to endorse
 *    or  promote products derived  from  this software without specific
 *    prior written permission.
 *
 * -- Disclaimer:
 *
 * The University of Tennessee, Knoxville,  the ATLAS group,  or the au-
 * thors make  no representations about the suitability of this software
 * for any purpose.  This software is provided ``as is'' without express
 * or implied warranty.
 *
 * ---------------------------------------------------------------------
 */
/*
 * Include files
 */
#include "atlas_refmisc.h"
#include "atlas_reflevel3.h"

void ATL_zrefhemm
(
   const enum ATLAS_SIDE      SIDE,
   const enum ATLAS_UPLO      UPLO,
   const int                  M,
   const int                  N,
   const double               * ALPHA,
   const double               * A,
   const int                  LDA,
   const double               * B,
   const int                  LDB,
   const double               * BETA,
   double                     * C,
   const int                  LDC
)
{
/*
 * Purpose
 * =======
 *
 * ATL_zrefhemm  performs one of the matrix-matrix operations
 *
 *    C := alpha * A * B + beta * C,
 *
 * or
 *
 *    C := alpha * B * A + beta * C,
 *
 * where alpha and beta are scalars,  A is a Hermitian matrix and B and
 * C are m by n matrices.
 *
 * Arguments
 * =========
 *
 * SIDE    (input)                       const enum ATLAS_SIDE
 *         On entry,  SIDE  specifies  whether the  Hermitian  matrix  A
 *         appears  on  the left or right in the operation as follows:
 *
 *            SIDE = AtlasLeft     C := alpha * A * B + beta * C,
 *
 *            SIDE = AtlasRight    C := alpha * B * A + beta * C.
 *
 *         Unchanged on exit.
 *
 * UPLO    (input)                       const enum ATLAS_UPLO
 *         On entry, UPLO  specifies whether the upper or lower triangu-
 *         lar part of the array A is to be referenced as follows:
 *
 *             UPLO = AtlasUpper   Only the upper triangular part of A
 *                                 is to be referenced.
 *
 *             UPLO = AtlasLower   Only the lower triangular part of A
 *                                 is to be referenced.
 *
 *         Unchanged on exit.
 *
 * M       (input)                       const int
 *         On entry,  M  specifies  the number  of rows of the matrix C.
 *         M  must be at least zero. Unchanged on exit.
 *
 * N       (input)                       const int
 *         On entry, N  specifies the number of columns of the matrix C.
 *         N must be at least zero. Unchanged on exit.
 *
 * ALPHA   (input)                       const double *
 *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
 *         supplied  as  zero  then the elements of the matrices A and B
 *         need not be set on input.
 *
 * A       (input)                       const double *
 *         On entry,  A  points  to an array of size equal to or greater
 *         than   LDA * ka * sizeof( double[2] ),  where ka  is  m  when
 *         SIDE = AtlasLeft   and is  n  otherwise.  Before  entry  with
 *         SIDE = AtlasLeft, the  m by m  part of the  array A must con-
 *         tain the Hermitian matrix, such that when  UPLO = AtlasUpper,
 *         the leading m by m upper triangular part of the array A  must
 *         contain the upper triangular part of the Hermitian matrix and
 *         the  strictly lower triangular part of  A  is not referenced,
 *         and when  UPLO = AtlasLower, the  leading m by m lower trian-
 *         gular part of the array A must contain the  lower  triangular
 *         part of the Hermitian matrix and the  strictly upper triangu-
 *         lar part of  A  is not referenced.
 *         Before  entry  with  SIDE = AtlasRight,  the  n by n  part of
 *         the  array  A  must contain the  Hermitian matrix,  such that
 *         when  UPLO = AtlasUpper, the  leading n by n upper triangular
 *         part of the array  A  must  contain the upper triangular part
 *         of the  Hermitian matrix  and the  strictly  lower triangular
 *         part of  A  is not  referenced,  and when  UPLO = AtlasLower,
 *         the leading  n by n  lower  triangular part  of the  array  A
 *         must  contain  the  lower  triangular part  of the  Hermitian
 *         matrix and the  strictly  upper triangular part of  A  is not
 *         referenced.
 *         Note that the imaginary parts of the local entries correspon-
 *         ding to the diagonal  elements of A need not be set and assu-
 *         med to be zero. Unchanged on exit.
 *
 * LDA     (input)                       const int
 *         On entry, LDA  specifies the leading dimension of A as decla-
 *         red  in  the  calling  (sub) program.  LDA  must be  at least
 *         MAX( 1, m ) when SIDE = AtlasLeft, and MAX( 1, n ) otherwise.
 *         Unchanged on exit.
 *
 * B       (input)                       const double *
 *         On entry,  B  points  to an array of size equal to or greater
 *         than   LDB * n * sizeof( double[2] ).  Before entry, the lea-
 *         ding m by n  part of the array  B  must contain the matrix B.
 *         Unchanged on exit.
 *
 * LDB     (input)                       const int
 *         On entry, LDB  specifies the leading dimension of B as decla-
 *         red  in  the  calling  (sub) program.  LDB  must be  at least
 *         MAX( 1, m ). wise. Unchanged on exit.
 *
 * BETA    (input)                       const double *
 *         On entry,  BETA  specifies the scalar  beta.   When  BETA  is
 *         supplied  as  zero  then  the  elements of the matrix C  need
 *         not be set on input. Unchanged on exit.
 *
 * C       (input/output)                double *
 *         On entry,  C  points  to an array of size equal to or greater
 *         than   LDC * n * sizeof( double[2] ).  Before entry, the lea-
 *         ding m by n part of the array  C  must contain the matrix  C,
 *         except when beta is zero,  in which case C need not be set on
 *         entry.  On exit, the array C is overwritten by the m by n up-
 *         dated matrix.
 *
 * LDC     (input)                       const int
 *         On entry, LDC  specifies the leading dimension of A as decla-
 *         red  in  the  calling  (sub) program.  LDC  must be  at least
 *         MAX( 1, m ). Unchanged on exit.
 *
 * ---------------------------------------------------------------------
 */
/*
 * .. Local Variables ..
 */
   int                        i, iaii, iaj, iajj, iajk, iaki, iakj, ibij,
                              ibik, ibkj, icij, ickj, j, jai, jaj, jbj,
                              jbk, jcj, k, lda2 = ( LDA << 1 ),
                              ldb2 = ( LDB << 1 ), ldc2 = ( LDC << 1 );
   register double            t0_i, t0_r, t1_i, t1_r;
/* ..
 * .. Executable Statements ..
 *
 */
   if( ( M == 0 ) || ( N == 0 ) ||
       ( Mdzero( ALPHA[0], ALPHA[1] ) &&
         Mdone ( BETA [0], BETA [1] ) ) ) return;

   if( Mdzero( BETA[0], BETA[1] ) )
   {
      for( j = 0, jcj = 0; j < N; j++, jcj += ldc2 )
      {
         for( i = 0, icij = jcj; i < M; i++, icij += 2 )
         {
            Mset( ATL_dZERO, ATL_dZERO, C[icij], C[icij+1] );
         }
      }
   }
   else if( !Mdone( BETA[0], BETA[1] ) )
   {
      for( j = 0, jcj = 0; j < N; j++, jcj += ldc2 )
      {
         for( i = 0, icij = jcj; i < M; i++, icij += 2 )
         {
            Mdscl( BETA[0], BETA[1], C[icij], C[icij+1] );
         }
      }
   }

   if( Mdzero( ALPHA[0], ALPHA[1] ) ) return;

   if( SIDE == AtlasLeft )
   {
      if( UPLO == AtlasUpper )
      {
         for( j = 0,      jbj  = 0,    jcj  = 0;
              j < N; j++, jbj += ldb2, jcj += ldc2 )
         {
            for( i = 0,      jai  = 0,    ibij  = jbj, icij  = jcj;
                 i < M; i++, jai += lda2, ibij += 2,   icij += 2 )
            {
               Mmul( ALPHA[0], ALPHA[1], B[ibij], B[ibij+1], t0_r, t0_i );
               Mset( ATL_dZERO, ATL_dZERO, t1_r, t1_i );
               for( k = 0,      iaki  = jai, ibkj  = jbj, ickj  = jcj;
                    k < i; k++, iaki += 2,   ibkj += 2,   ickj += 2 )
               {
                  Mmla( A[iaki], A[iaki+1], t0_r, t0_i, C[ickj], C[ickj+1] );
                  Mmla( B[ibkj], B[ibkj+1], A[iaki], -A[iaki+1], t1_r, t1_i );
               }
               iaii       = ( i << 1 ) + jai;
               Mset( C[icij] + t0_r * A[iaii], C[icij+1] + t0_i * A[iaii],
                     C[icij], C[icij+1] );
               Mmla( ALPHA[0], ALPHA[1], t1_r, t1_i, C[icij], C[icij+1] );
            }
         }
      }
      else
      {
         for( j = 0,      jbj  = 0,    jcj  = 0;
              j < N; j++, jbj += ldb2, jcj += ldc2 )
         {
            for( i = M-1,     jai  = (M-1)*lda2, ibij  = ( (M-1) << 1 )+jbj,
                                                 icij  = ( (M-1) << 1 )+jcj;
                 i >= 0; i--, jai -= lda2,       ibij -= 2,
                                                 icij -= 2 )
            {
               Mmul( ALPHA[0], ALPHA[1], B[ibij], B[ibij+1], t0_r, t0_i );
               Mset( ATL_dZERO, ATL_dZERO, t1_r, t1_i );
               for( k = i+1,    iaki  = ( (i+1) << 1 )+jai,
                                ibkj  = ( (i+1) << 1 )+jbj,
                                ickj  = ( (i+1) << 1 )+jcj;
                    k < M; k++, iaki += 2,       ibkj += 2,       ickj += 2 )
               {
                  Mmla( A[iaki], A[iaki+1], t0_r, t0_i, C[ickj], C[ickj+1] );
                  Mmla( B[ibkj], B[ibkj+1], A[iaki], -A[iaki+1], t1_r, t1_i );
               }
               iaii       = ( i << 1 ) + jai;
               Mset( C[icij] + t0_r * A[iaii], C[icij+1] + t0_i * A[iaii],
                     C[icij], C[icij+1] );
               Mmla( ALPHA[0], ALPHA[1], t1_r, t1_i, C[icij], C[icij+1] );
            }
         }
      }
   }
   else
   {
      if( UPLO == AtlasUpper )
      {
         for( j = 0,      iaj  = 0, jaj  = 0,    jbj  = 0,    jcj  = 0;
              j < N; j++, iaj += 2, jaj += lda2, jbj += ldb2, jcj += ldc2 )
         {
            iajj = ( j << 1 ) + jaj;
            Mset( ALPHA[0] * A[iajj], ALPHA[1] * A[iajj], t0_r, t0_i );
            for( i = 0,      ibij  = jbj, icij  = jcj;
                 i < M; i++, ibij += 2,   icij += 2 )
            {
               Mmla( t0_r, t0_i, B[ibij], B[ibij+1], C[icij], C[icij+1] );
            }
            for( k = 0,      iakj  = jaj, jbk  = 0;
                 k < j; k++, iakj += 2,   jbk += ldb2 )
            {
               Mmul( ALPHA[0], ALPHA[1], A[iakj], A[iakj+1], t0_r, t0_i );
               for( i = 0,      ibik  = jbk, icij  = jcj;
                    i < M; i++, ibik += 2,   icij += 2 )
               {
                  Mmla( t0_r, t0_i, B[ibik], B[ibik+1], C[icij], C[icij+1] );
               }
            }
            for( k = j+1,    iajk  = iaj+(j+1)*lda2, jbk  = (j+1)*ldb2;
                 k < N; k++, iajk += lda2,           jbk += ldb2 )
            {
               Mmul( ALPHA[0], ALPHA[1], A[iajk], -A[iajk+1], t0_r, t0_i );
               for( i = 0,      ibik  = jbk, icij = jcj;
                    i < M; i++, ibik += 2,   icij += 2 )
               {
                  Mmla( t0_r, t0_i, B[ibik], B[ibik+1], C[icij], C[icij+1] );
               }
            }
         }
      }
      else
      {
         for( j = 0,      iaj  = 0, jaj  = 0,    jbj  = 0,    jcj  = 0;
              j < N; j++, iaj += 2, jaj += lda2, jbj += ldb2, jcj += ldc2 )
         {
            iajj = ( j << 1 ) + jaj;
            Mset( ALPHA[0] * A[iajj], ALPHA[1] * A[iajj], t0_r, t0_i );
            for( i = 0,      ibij  = jbj, icij  = jcj;
                 i < M; i++, ibij += 2,   icij += 2 )
            {
               Mmla( t0_r, t0_i, B[ibij], B[ibij+1], C[icij], C[icij+1] );
            }
            for( k = 0,      iajk  = iaj,  jbk  = 0;
                 k < j; k++, iajk += lda2, jbk += ldb2 )
            {
               Mmul( ALPHA[0], ALPHA[1], A[iajk], -A[iajk+1], t0_r, t0_i );
               for( i = 0,      ibik  = jbk, icij  = jcj;
                    i < M; i++, ibik += 2,   icij += 2 )
               {
                  Mmla( t0_r, t0_i, B[ibik], B[ibik+1], C[icij], C[icij+1] );
               }
            }
            for( k = j+1,    iakj  = ((j+1) << 1)+jaj, jbk  = (j+1)*ldb2;
                 k < N; k++, iakj += 2,                jbk += ldb2 )
            {
               Mmul( ALPHA[0], ALPHA[1], A[iakj], A[iakj+1], t0_r, t0_i );
               for( i = 0,      ibik  = jbk, icij  = jcj;
                    i < M; i++, ibik += 2,   icij += 2 )
               {
                  Mmla( t0_r, t0_i, B[ibik], B[ibik+1], C[icij], C[icij+1] );
               }
            }
         }
      }
   }
/*
 * End of ATL_zrefhemm
 */
}
