#include "atlas_threads.h"
#include <atlas_lvl3.h>
typedef struct ATL_TTRSMt ATL_TTRSMt;
struct ATL_TTRSMt
{
   enum ATLAS_SIDE Side;
   enum ATLAS_UPLO Uplo;
   enum ATLAS_TRANS TransA;
   enum ATLAS_DIAG Diag;
   int m, n, lda, ldb;
   const TYPE *A;
   TYPE *B;
   SCALAR alpha;
};

#define Cpttrsm  tname(tname(ATL_C,PRE),pttrsm)
#define Cpttrsm2 tname(tname(ATL_C,PRE),pttrsm2)
#define Ctrsm    tname(tname(ATL_C,PRE),trsm)

void *Cpttrsm2(void *vp)
{
   ATL_TTRSMt *mp=vp;

   Ctrsm(mp->Side, mp->Uplo, mp->TransA, mp->Diag, mp->m, mp->n, mp->alpha,
          mp->A, mp->lda, mp->B, mp->ldb);
   return(NULL);
}

void Cpttrsm(enum ATLAS_SIDE side, enum ATLAS_UPLO uplo, enum ATLAS_TRANS ta,
             enum ATLAS_DIAG diag, const int M, const int N, const SCALAR alpha,
             const TYPE *A, const int lda, TYPE *B, int ldb)
{
   const int nb=8;
   const int Np=Mmin(N>>3, ATL_NTHREADS), Np_1=Np-1;
   int nblock, nbpt, n, i;
   ATL_TTRSMt MAT[ATL_NTHREADS];
   pthread_t tp[ATL_NTHREADS];
   pthread_attr_t attr;

   if (Np <= 1)
   {
      Ctrsm(side, uplo, ta, diag, M, N, alpha, A, lda, B, ldb);
      return;
   }
   nblock = N >> 3;
   nbpt = nblock / Np;
   n = nbpt*nb;
   pthread_attr_init(&attr);
   #ifdef IBM_PT_ERROR
      pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_UNDETACHED);
   #endif
   #ifdef UseSystemScope
      pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
   #endif
   for (i=0; i != Np_1; i++)
   {
      MAT[i].Side = side;
      MAT[i].Uplo = uplo;
      MAT[i].Diag = diag;
      MAT[i].Side = side;
      MAT[i].TransA = ta;
      MAT[i].m = M;
      MAT[i].n = n;
      MAT[i].lda = lda;
      MAT[i].ldb = ldb;
      MAT[i].A = A;
      MAT[i].B = B;
      MAT[i].alpha = alpha;
      if (!i) MAT[i].n += N - n*Np;
      pthread_create(&tp[i], &attr, Cpttrsm2, MAT+i);
      B += MAT[i].n * ldb;
   }
   Ctrsm(side, uplo, ta, diag, M, n, alpha, A, lda, B, ldb);
   for (i=0; i != Np_1; i++) pthread_join(tp[i], NULL);
}

#undef  Cpttrsm 
#undef  Cpttrsm2
#undef  Ctrsm   
