Actual source code: bvtensor.c
 
   slepc-3.18.3 2023-03-24
   
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Tensor BV that is represented in compact form as V = (I otimes U) S
 12: */
 14: #include <slepc/private/bvimpl.h>
 15: #include <slepcblaslapack.h>
 17: typedef struct {
 18:   BV          U;        /* first factor */
 19:   Mat         S;        /* second factor */
 20:   PetscScalar *qB;      /* auxiliary matrix used in non-standard inner products */
 21:   PetscScalar *sw;      /* work space */
 22:   PetscInt    d;        /* degree of the tensor BV */
 23:   PetscInt    ld;       /* leading dimension of a single block in S */
 24:   PetscInt    puk;      /* copy of the k value */
 25:   Vec         u;        /* auxiliary work vector */
 26: } BV_TENSOR;
 28: PetscErrorCode BVMultInPlace_Tensor(BV V,Mat Q,PetscInt s,PetscInt e)
 29: {
 30:   BV_TENSOR         *ctx = (BV_TENSOR*)V->data;
 31:   PetscScalar       *pS;
 32:   const PetscScalar *q;
 33:   PetscInt          ldq,lds = ctx->ld*ctx->d;
 35:   MatDenseGetLDA(Q,&ldq);
 36:   MatDenseGetArray(ctx->S,&pS);
 37:   MatDenseGetArrayRead(Q,&q);
 38:   BVMultInPlace_BLAS_Private(V,lds,V->k-V->l,ldq,s-V->l,e-V->l,pS+(V->nc+V->l)*lds,q+V->l*ldq+V->l,PETSC_FALSE);
 39:   MatDenseRestoreArrayRead(Q,&q);
 40:   MatDenseRestoreArray(ctx->S,&pS);
 41:   return 0;
 42: }
 44: PetscErrorCode BVMultInPlaceHermitianTranspose_Tensor(BV V,Mat Q,PetscInt s,PetscInt e)
 45: {
 46:   BV_TENSOR         *ctx = (BV_TENSOR*)V->data;
 47:   PetscScalar       *pS;
 48:   const PetscScalar *q;
 49:   PetscInt          ldq,lds = ctx->ld*ctx->d;
 51:   MatDenseGetLDA(Q,&ldq);
 52:   MatDenseGetArray(ctx->S,&pS);
 53:   MatDenseGetArrayRead(Q,&q);
 54:   BVMultInPlace_BLAS_Private(V,lds,V->k-V->l,ldq,s-V->l,e-V->l,pS+(V->nc+V->l)*lds,q+V->l*ldq+V->l,PETSC_TRUE);
 55:   MatDenseRestoreArrayRead(Q,&q);
 56:   MatDenseRestoreArray(ctx->S,&pS);
 57:   return 0;
 58: }
 60: PetscErrorCode BVDot_Tensor(BV X,BV Y,Mat M)
 61: {
 62:   BV_TENSOR         *x = (BV_TENSOR*)X->data,*y = (BV_TENSOR*)Y->data;
 63:   PetscScalar       *m;
 64:   const PetscScalar *px,*py;
 65:   PetscInt          ldm,lds = x->ld*x->d;
 69:   MatDenseGetLDA(M,&ldm);
 70:   MatDenseGetArrayRead(x->S,&px);
 71:   MatDenseGetArrayRead(y->S,&py);
 72:   MatDenseGetArray(M,&m);
 73:   BVDot_BLAS_Private(X,Y->k-Y->l,X->k-X->l,lds,ldm,py+(Y->nc+Y->l)*lds,px+(X->nc+X->l)*lds,m+X->l*ldm+Y->l,PETSC_FALSE);
 74:   MatDenseRestoreArray(M,&m);
 75:   MatDenseRestoreArrayRead(x->S,&px);
 76:   MatDenseRestoreArrayRead(y->S,&py);
 77:   return 0;
 78: }
 80: PetscErrorCode BVScale_Tensor(BV bv,PetscInt j,PetscScalar alpha)
 81: {
 82:   BV_TENSOR      *ctx = (BV_TENSOR*)bv->data;
 83:   PetscScalar    *pS;
 84:   PetscInt       lds = ctx->ld*ctx->d;
 86:   MatDenseGetArray(ctx->S,&pS);
 87:   if (PetscUnlikely(j<0)) BVScale_BLAS_Private(bv,(bv->k-bv->l)*lds,pS+(bv->nc+bv->l)*lds,alpha);
 88:   else BVScale_BLAS_Private(bv,lds,pS+(bv->nc+j)*lds,alpha);
 89:   MatDenseRestoreArray(ctx->S,&pS);
 90:   return 0;
 91: }
 93: PetscErrorCode BVNorm_Tensor(BV bv,PetscInt j,NormType type,PetscReal *val)
 94: {
 95:   BV_TENSOR         *ctx = (BV_TENSOR*)bv->data;
 96:   const PetscScalar *pS;
 97:   PetscInt          lds = ctx->ld*ctx->d;
 99:   MatDenseGetArrayRead(ctx->S,&pS);
100:   if (j<0) BVNorm_LAPACK_Private(bv,lds,bv->k-bv->l,pS+(bv->nc+bv->l)*lds,type,val,PETSC_FALSE);
101:   else BVNorm_LAPACK_Private(bv,lds,1,pS+(bv->nc+j)*lds,type,val,PETSC_FALSE);
102:   MatDenseRestoreArrayRead(ctx->S,&pS);
103:   return 0;
104: }
106: PetscErrorCode BVCopyColumn_Tensor(BV V,PetscInt j,PetscInt i)
107: {
108:   BV_TENSOR      *ctx = (BV_TENSOR*)V->data;
109:   PetscScalar    *pS;
110:   PetscInt       lds = ctx->ld*ctx->d;
112:   MatDenseGetArray(ctx->S,&pS);
113:   PetscArraycpy(pS+(V->nc+i)*lds,pS+(V->nc+j)*lds,lds);
114:   MatDenseRestoreArray(ctx->S,&pS);
115:   return 0;
116: }
118: static PetscErrorCode BVTensorNormColumn(BV bv,PetscInt j,PetscReal *norm)
119: {
120:   BV_TENSOR         *ctx = (BV_TENSOR*)bv->data;
121:   PetscBLASInt      one=1,lds_;
122:   PetscScalar       sone=1.0,szero=0.0,*x,dot;
123:   const PetscScalar *S;
124:   PetscReal         alpha=1.0,scale=0.0,aval;
125:   PetscInt          i,lds,ld=ctx->ld;
127:   lds = ld*ctx->d;
128:   MatDenseGetArrayRead(ctx->S,&S);
129:   PetscBLASIntCast(lds,&lds_);
130:   if (PetscUnlikely(ctx->qB)) {
131:     x = ctx->sw;
132:     PetscCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&lds_,&sone,ctx->qB,&lds_,S+j*lds,&one,&szero,x,&one));
133:     dot = PetscRealPart(BLASdot_(&lds_,S+j*lds,&one,x,&one));
134:     BV_SafeSqrt(bv,dot,norm);
135:   } else {
136:     /* Compute *norm = BLASnrm2_(&lds_,S+j*lds,&one); */
137:     if (lds==1) *norm = PetscAbsScalar(S[j*lds]);
138:     else {
139:       for (i=0;i<lds;i++) {
140:         aval = PetscAbsScalar(S[i+j*lds]);
141:         if (aval!=0.0) {
142:           if (PetscUnlikely(scale<aval)) {
143:             alpha = 1.0 + alpha*PetscSqr(scale/aval);
144:             scale = aval;
145:           } else alpha += PetscSqr(aval/scale);
146:         }
147:       }
148:       *norm = scale*PetscSqrtReal(alpha);
149:     }
150:   }
151:   MatDenseRestoreArrayRead(ctx->S,&S);
152:   return 0;
153: }
155: PetscErrorCode BVOrthogonalizeGS1_Tensor(BV bv,PetscInt k,Vec v,PetscBool *which,PetscScalar *h,PetscScalar *c,PetscReal *onorm,PetscReal *norm)
156: {
157:   BV_TENSOR         *ctx = (BV_TENSOR*)bv->data;
158:   PetscScalar       *pS,*cc,*x,dot,sonem=-1.0,sone=1.0,szero=0.0;
159:   PetscInt          i,lds = ctx->ld*ctx->d;
160:   PetscBLASInt      lds_,k_,one=1;
161:   const PetscScalar *omega;
164:   MatDenseGetArray(ctx->S,&pS);
165:   if (!c) VecGetArray(bv->buffer,&cc);
166:   else cc = c;
167:   PetscBLASIntCast(lds,&lds_);
168:   PetscBLASIntCast(k,&k_);
170:   if (onorm) BVTensorNormColumn(bv,k,onorm);
172:   if (ctx->qB) x = ctx->sw;
173:   else x = pS+k*lds;
175:   if (PetscUnlikely(bv->orthog_type==BV_ORTHOG_MGS)) {  /* modified Gram-Schmidt */
177:     if (PetscUnlikely(bv->indef)) { /* signature */
178:       VecGetArrayRead(bv->omega,&omega);
179:     }
180:     for (i=-bv->nc;i<k;i++) {
181:       if (which && i>=0 && !which[i]) continue;
182:       if (ctx->qB) PetscCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&lds_,&sone,ctx->qB,&lds_,pS+k*lds,&one,&szero,x,&one));
183:       /* c_i = (s_k, s_i) */
184:       dot = PetscRealPart(BLASdot_(&lds_,pS+i*lds,&one,x,&one));
185:       if (bv->indef) dot /= PetscRealPart(omega[i]);
186:       BV_SetValue(bv,i,0,cc,dot);
187:       /* s_k = s_k - c_i s_i */
188:       dot = -dot;
189:       PetscCallBLAS("BLASaxpy",BLASaxpy_(&lds_,&dot,pS+i*lds,&one,pS+k*lds,&one));
190:     }
191:     if (PetscUnlikely(bv->indef)) VecRestoreArrayRead(bv->omega,&omega);
193:   } else {  /* classical Gram-Schmidt */
194:     if (ctx->qB) PetscCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&lds_,&sone,ctx->qB,&lds_,pS+k*lds,&one,&szero,x,&one));
196:     /* cc = S_{0:k-1}^* s_k */
197:     PetscCallBLAS("BLASgemv",BLASgemv_("C",&lds_,&k_,&sone,pS,&lds_,x,&one,&szero,cc,&one));
199:     /* s_k = s_k - S_{0:k-1} cc */
200:     if (PetscUnlikely(bv->indef)) BV_ApplySignature(bv,k,cc,PETSC_TRUE);
201:     PetscCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&k_,&sonem,pS,&lds_,cc,&one,&sone,pS+k*lds,&one));
202:     if (PetscUnlikely(bv->indef)) BV_ApplySignature(bv,k,cc,PETSC_FALSE);
203:   }
205:   if (norm) BVTensorNormColumn(bv,k,norm);
206:   BV_AddCoefficients(bv,k,h,cc);
207:   MatDenseRestoreArray(ctx->S,&pS);
208:   VecRestoreArray(bv->buffer,&cc);
209:   return 0;
210: }
212: PetscErrorCode BVView_Tensor(BV bv,PetscViewer viewer)
213: {
214:   BV_TENSOR         *ctx = (BV_TENSOR*)bv->data;
215:   PetscViewerFormat format;
216:   PetscBool         isascii;
217:   const char        *bvname,*uname,*sname;
219:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
220:   if (isascii) {
221:     PetscViewerGetFormat(viewer,&format);
222:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
223:       PetscViewerASCIIPrintf(viewer,"number of tensor blocks (degree): %" PetscInt_FMT "\n",ctx->d);
224:       PetscViewerASCIIPrintf(viewer,"number of columns of U factor: %" PetscInt_FMT "\n",ctx->ld);
225:       return 0;
226:     }
227:     BVView(ctx->U,viewer);
228:     MatView(ctx->S,viewer);
229:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
230:       PetscObjectGetName((PetscObject)bv,&bvname);
231:       PetscObjectGetName((PetscObject)ctx->U,&uname);
232:       PetscObjectGetName((PetscObject)ctx->S,&sname);
233:       PetscViewerASCIIPrintf(viewer,"%s=kron(eye(%" PetscInt_FMT "),%s)*%s(:,1:%" PetscInt_FMT ");\n",bvname,ctx->d,uname,sname,bv->k);
234:     }
235:   } else {
236:     BVView(ctx->U,viewer);
237:     MatView(ctx->S,viewer);
238:   }
239:   return 0;
240: }
242: static PetscErrorCode BVTensorUpdateMatrix(BV V,PetscInt ini,PetscInt end)
243: {
244:   BV_TENSOR      *ctx = (BV_TENSOR*)V->data;
245:   PetscInt       i,j,r,c,l,k,ld=ctx->ld,lds=ctx->d*ctx->ld;
246:   PetscScalar    *qB,*sqB;
247:   Vec            u;
248:   Mat            A;
250:   if (!V->matrix) return 0;
251:   l = ctx->U->l; k = ctx->U->k;
252:   /* update inner product matrix */
253:   if (!ctx->qB) {
254:     PetscCalloc2(lds*lds,&ctx->qB,lds,&ctx->sw);
255:     VecDuplicate(ctx->U->t,&ctx->u);
256:   }
257:   ctx->U->l = 0;
258:   for (r=0;r<ctx->d;r++) {
259:     for (c=0;c<=r;c++) {
260:       MatNestGetSubMat(V->matrix,r,c,&A);
261:       if (A) {
262:         qB = ctx->qB+c*ld*lds+r*ld;
263:         for (i=ini;i<end;i++) {
264:           BVGetColumn(ctx->U,i,&u);
265:           MatMult(A,u,ctx->u);
266:           ctx->U->k = i+1;
267:           BVDotVec(ctx->U,ctx->u,qB+i*lds);
268:           BVRestoreColumn(ctx->U,i,&u);
269:           for (j=0;j<i;j++) qB[i+j*lds] = PetscConj(qB[j+i*lds]);
270:           qB[i*lds+i] = PetscRealPart(qB[i+i*lds]);
271:         }
272:         if (PetscUnlikely(c!=r)) {
273:           sqB = ctx->qB+r*ld*lds+c*ld;
274:           for (i=ini;i<end;i++) for (j=0;j<=i;j++) {
275:             sqB[i+j*lds] = PetscConj(qB[j+i*lds]);
276:             sqB[j+i*lds] = qB[j+i*lds];
277:           }
278:         }
279:       }
280:     }
281:   }
282:   ctx->U->l = l; ctx->U->k = k;
283:   return 0;
284: }
286: static PetscErrorCode BVTensorBuildFirstColumn_Tensor(BV V,PetscInt k)
287: {
288:   BV_TENSOR      *ctx = (BV_TENSOR*)V->data;
289:   PetscInt       i,nq=0;
290:   PetscScalar    *pS,*omega;
291:   PetscReal      norm;
292:   PetscBool      breakdown=PETSC_FALSE;
294:   MatDenseGetArray(ctx->S,&pS);
295:   for (i=0;i<ctx->d;i++) {
296:     if (i>=k) BVSetRandomColumn(ctx->U,nq);
297:     else BVCopyColumn(ctx->U,i,nq);
298:     BVOrthogonalizeColumn(ctx->U,nq,pS+i*ctx->ld,&norm,&breakdown);
299:     if (!breakdown) {
300:       BVScaleColumn(ctx->U,nq,1.0/norm);
301:       pS[nq+i*ctx->ld] = norm;
302:       nq++;
303:     }
304:   }
305:   MatDenseRestoreArray(ctx->S,&pS);
307:   BVTensorUpdateMatrix(V,0,nq);
308:   BVTensorNormColumn(V,0,&norm);
309:   BVScale_Tensor(V,0,1.0/norm);
310:   if (V->indef) {
311:     BV_AllocateSignature(V);
312:     VecGetArray(V->omega,&omega);
313:     omega[0] = (norm<0.0)? -1.0: 1.0;
314:     VecRestoreArray(V->omega,&omega);
315:   }
316:   /* set active columns */
317:   ctx->U->l = 0;
318:   ctx->U->k = nq;
319:   return 0;
320: }
322: /*@
323:    BVTensorBuildFirstColumn - Builds the first column of the tensor basis vectors
324:    V from the data contained in the first k columns of U.
326:    Collective on V
328:    Input Parameters:
329: +  V - the basis vectors context
330: -  k - the number of columns of U with relevant information
332:    Notes:
333:    At most d columns are considered, where d is the degree of the tensor BV.
334:    Given V = (I otimes U) S, this function computes the first column of V, that
335:    is, it computes the coefficients of the first column of S by orthogonalizing
336:    the first d columns of U. If k is less than d (or linearly dependent columns
337:    are found) then additional random columns are used.
339:    The computed column has unit norm.
341:    Level: advanced
343: .seealso: BVCreateTensor()
344: @*/
345: PetscErrorCode BVTensorBuildFirstColumn(BV V,PetscInt k)
346: {
349:   PetscUseMethod(V,"BVTensorBuildFirstColumn_C",(BV,PetscInt),(V,k));
350:   return 0;
351: }
353: static PetscErrorCode BVTensorCompress_Tensor(BV V,PetscInt newc)
354: {
355:   BV_TENSOR      *ctx = (BV_TENSOR*)V->data;
356:   PetscInt       nwu=0,nnc,nrow,lwa,r,c;
357:   PetscInt       i,j,k,n,lds=ctx->ld*ctx->d,deg=ctx->d,lock,cs1=V->k,rs1=ctx->U->k,rk=0,offu;
358:   PetscScalar    *S,*M,*Z,*pQ,*SS,*SS2,t,sone=1.0,zero=0.0,mone=-1.0,*p,*tau,*work,*qB,*sqB;
359:   PetscReal      *sg,tol,*rwork;
360:   PetscBLASInt   ld_,cs1_,rs1_,cs1tdeg,n_,info,lw_,newc_,newctdeg,nnc_,nrow_,nnctdeg,lds_,rk_;
361:   Mat            Q,A;
363:   if (!cs1) return 0;
364:   lwa = 6*ctx->ld*lds+2*cs1;
365:   n = PetscMin(rs1,deg*cs1);
366:   lock = ctx->U->l;
367:   nnc = cs1-lock-newc;
368:   nrow = rs1-lock;
369:   PetscCalloc6(deg*newc*nnc,&SS,newc*nnc,&SS2,(rs1+lock+newc)*n,&pQ,deg*rs1,&tau,lwa,&work,6*n,&rwork);
370:   offu = lock*(rs1+1);
371:   M = work+nwu;
372:   nwu += rs1*cs1*deg;
373:   sg = rwork;
374:   Z = work+nwu;
375:   nwu += deg*cs1*n;
376:   PetscBLASIntCast(n,&n_);
377:   PetscBLASIntCast(nnc,&nnc_);
378:   PetscBLASIntCast(cs1,&cs1_);
379:   PetscBLASIntCast(rs1,&rs1_);
380:   PetscBLASIntCast(newc,&newc_);
381:   PetscBLASIntCast(newc*deg,&newctdeg);
382:   PetscBLASIntCast(nnc*deg,&nnctdeg);
383:   PetscBLASIntCast(cs1*deg,&cs1tdeg);
384:   PetscBLASIntCast(lwa-nwu,&lw_);
385:   PetscBLASIntCast(nrow,&nrow_);
386:   PetscBLASIntCast(lds,&lds_);
387:   MatDenseGetArray(ctx->S,&S);
389:   if (newc>0) {
390:     /* truncate columns associated with new converged eigenpairs */
391:     for (j=0;j<deg;j++) {
392:       for (i=lock;i<lock+newc;i++) PetscArraycpy(M+(i-lock+j*newc)*nrow,S+i*lds+j*ctx->ld+lock,nrow);
393:     }
394:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
395: #if !defined (PETSC_USE_COMPLEX)
396:     PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pQ+offu,&rs1_,Z,&n_,work+nwu,&lw_,&info));
397: #else
398:     PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pQ+offu,&rs1_,Z,&n_,work+nwu,&lw_,rwork+n,&info));
399: #endif
400:     SlepcCheckLapackInfo("gesvd",info);
401:     PetscFPTrapPop();
402:     /* SVD has rank min(newc,nrow) */
403:     rk = PetscMin(newc,nrow);
404:     for (i=0;i<rk;i++) {
405:       t = sg[i];
406:       PetscCallBLAS("BLASscal",BLASscal_(&newctdeg,&t,Z+i,&n_));
407:     }
408:     for (i=0;i<deg;i++) {
409:       for (j=lock;j<lock+newc;j++) {
410:         PetscArraycpy(S+j*lds+i*ctx->ld+lock,Z+(newc*i+j-lock)*n,rk);
411:         PetscArrayzero(S+j*lds+i*ctx->ld+lock+rk,(ctx->ld-lock-rk));
412:       }
413:     }
414:     /*
415:       update columns associated with non-converged vectors, orthogonalize
416:       against pQ so that next M has rank nnc+d-1 instead of nrow+d-1
417:     */
418:     for (i=0;i<deg;i++) {
419:       PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pQ+offu,&rs1_,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_,&zero,SS+i*newc*nnc,&newc_));
420:       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pQ+offu,&rs1_,SS+i*newc*nnc,&newc_,&sone,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_));
421:       /* repeat orthogonalization step */
422:       PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pQ+offu,&rs1_,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_,&zero,SS2,&newc_));
423:       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pQ+offu,&rs1_,SS2,&newc_,&sone,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_));
424:       for (j=0;j<newc*nnc;j++) *(SS+i*newc*nnc+j) += SS2[j];
425:     }
426:   }
428:   /* truncate columns associated with non-converged eigenpairs */
429:   for (j=0;j<deg;j++) {
430:     for (i=lock+newc;i<cs1;i++) PetscArraycpy(M+(i-lock-newc+j*nnc)*nrow,S+i*lds+j*ctx->ld+lock,nrow);
431:   }
432:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
433: #if !defined (PETSC_USE_COMPLEX)
434:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pQ+offu+newc*rs1,&rs1_,Z,&n_,work+nwu,&lw_,&info));
435: #else
436:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pQ+offu+newc*rs1,&rs1_,Z,&n_,work+nwu,&lw_,rwork+n,&info));
437: #endif
438:   SlepcCheckLapackInfo("gesvd",info);
439:   PetscFPTrapPop();
440:   tol = PetscMax(rs1,deg*cs1)*PETSC_MACHINE_EPSILON*sg[0];
441:   rk = 0;
442:   for (i=0;i<PetscMin(nrow,nnctdeg);i++) if (sg[i]>tol) rk++;
443:   rk = PetscMin(nnc+deg-1,rk);
444:   /* the SVD has rank (at most) nnc+deg-1 */
445:   for (i=0;i<rk;i++) {
446:     t = sg[i];
447:     PetscCallBLAS("BLASscal",BLASscal_(&nnctdeg,&t,Z+i,&n_));
448:   }
449:   /* update S */
450:   PetscArrayzero(S+cs1*lds,(V->m-cs1)*lds);
451:   k = ctx->ld-lock-newc-rk;
452:   for (i=0;i<deg;i++) {
453:     for (j=lock+newc;j<cs1;j++) {
454:       PetscArraycpy(S+j*lds+i*ctx->ld+lock+newc,Z+(nnc*i+j-lock-newc)*n,rk);
455:       PetscArrayzero(S+j*lds+i*ctx->ld+lock+newc+rk,k);
456:     }
457:   }
458:   if (newc>0) {
459:     for (i=0;i<deg;i++) {
460:       p = SS+nnc*newc*i;
461:       for (j=lock+newc;j<cs1;j++) {
462:         for (k=0;k<newc;k++) S[j*lds+i*ctx->ld+lock+k] = *(p++);
463:       }
464:     }
465:   }
467:   /* orthogonalize pQ */
468:   rk = rk+newc;
469:   PetscBLASIntCast(rk,&rk_);
470:   PetscBLASIntCast(cs1-lock,&nnc_);
471:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
472:   PetscCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&nrow_,&rk_,pQ+offu,&rs1_,tau,work+nwu,&lw_,&info));
473:   SlepcCheckLapackInfo("geqrf",info);
474:   for (i=0;i<deg;i++) {
475:     PetscCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&rk_,&nnc_,&sone,pQ+offu,&rs1_,S+lock*lds+lock+i*ctx->ld,&lds_));
476:   }
477:   PetscCallBLAS("LAPACKorgqr",LAPACKorgqr_(&nrow_,&rk_,&rk_,pQ+offu,&rs1_,tau,work+nwu,&lw_,&info));
478:   SlepcCheckLapackInfo("orgqr",info);
479:   PetscFPTrapPop();
481:   /* update vectors U(:,idx) = U*Q(:,idx) */
482:   rk = rk+lock;
483:   for (i=0;i<lock;i++) pQ[i*(1+rs1)] = 1.0;
484:   MatCreateSeqDense(PETSC_COMM_SELF,rs1,rk,pQ,&Q);
485:   ctx->U->k = rs1;
486:   BVMultInPlace(ctx->U,Q,lock,rk);
487:   MatDestroy(&Q);
489:   if (ctx->qB) {
490:    /* update matrix qB */
491:     PetscBLASIntCast(ctx->ld,&ld_);
492:     PetscBLASIntCast(rk,&rk_);
493:     for (r=0;r<ctx->d;r++) {
494:       for (c=0;c<=r;c++) {
495:         MatNestGetSubMat(V->matrix,r,c,&A);
496:         if (A) {
497:           qB = ctx->qB+r*ctx->ld+c*ctx->ld*lds;
498:           PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&rs1_,&rk_,&rs1_,&sone,qB,&lds_,pQ,&rs1_,&zero,work+nwu,&rs1_));
499:           PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&rk_,&rk_,&rs1_,&sone,pQ,&rs1_,work+nwu,&rs1_,&zero,qB,&lds_));
500:           for (i=0;i<rk;i++) {
501:             for (j=0;j<i;j++) qB[i+j*lds] = PetscConj(qB[j+i*lds]);
502:             qB[i+i*lds] = PetscRealPart(qB[i+i*lds]);
503:           }
504:           for (i=rk;i<ctx->ld;i++) PetscArrayzero(qB+i*lds,ctx->ld);
505:           for (i=0;i<rk;i++) PetscArrayzero(qB+i*lds+rk,(ctx->ld-rk));
506:           if (c!=r) {
507:             sqB = ctx->qB+r*ctx->ld*lds+c*ctx->ld;
508:             for (i=0;i<ctx->ld;i++) for (j=0;j<ctx->ld;j++) sqB[i+j*lds] = PetscConj(qB[j+i*lds]);
509:           }
510:         }
511:       }
512:     }
513:   }
515:   /* free work space */
516:   PetscFree6(SS,SS2,pQ,tau,work,rwork);
517:   MatDenseRestoreArray(ctx->S,&S);
519:   /* set active columns */
520:   if (newc) ctx->U->l += newc;
521:   ctx->U->k = rk;
522:   return 0;
523: }
525: /*@
526:    BVTensorCompress - Updates the U and S factors of the tensor basis vectors
527:    object V by means of an SVD, removing redundant information.
529:    Collective on V
531:    Input Parameters:
532: +  V - the tensor basis vectors context
533: -  newc - additional columns to be locked
535:    Notes:
536:    This function is typically used when restarting Krylov solvers. Truncating a
537:    tensor BV V = (I otimes U) S to its leading columns amounts to keeping the
538:    leading columns of S. However, to effectively reduce the size of the
539:    decomposition, it is necessary to compress it in a way that fewer columns of
540:    U are employed. This can be achieved by means of an update that involves the
541:    SVD of the low-rank matrix [S_0 S_1 ... S_{d-1}], where S_i are the pieces of S.
543:    If newc is nonzero, then newc columns are added to the leading columns of V.
544:    This means that the corresponding columns of the U and S factors will remain
545:    invariant in subsequent operations.
547:    Level: advanced
549: .seealso: BVCreateTensor(), BVSetActiveColumns()
550: @*/
551: PetscErrorCode BVTensorCompress(BV V,PetscInt newc)
552: {
555:   PetscUseMethod(V,"BVTensorCompress_C",(BV,PetscInt),(V,newc));
556:   return 0;
557: }
559: static PetscErrorCode BVTensorGetDegree_Tensor(BV bv,PetscInt *d)
560: {
561:   BV_TENSOR *ctx = (BV_TENSOR*)bv->data;
563:   *d = ctx->d;
564:   return 0;
565: }
567: /*@
568:    BVTensorGetDegree - Returns the number of blocks (degree) of the tensor BV.
570:    Not collective
572:    Input Parameter:
573: .  bv - the basis vectors context
575:    Output Parameter:
576: .  d - the degree
578:    Level: advanced
580: .seealso: BVCreateTensor()
581: @*/
582: PetscErrorCode BVTensorGetDegree(BV bv,PetscInt *d)
583: {
586:   PetscUseMethod(bv,"BVTensorGetDegree_C",(BV,PetscInt*),(bv,d));
587:   return 0;
588: }
590: static PetscErrorCode BVTensorGetFactors_Tensor(BV V,BV *U,Mat *S)
591: {
592:   BV_TENSOR *ctx = (BV_TENSOR*)V->data;
595:   ctx->puk = ctx->U->k;
596:   if (U) *U = ctx->U;
597:   if (S) *S = ctx->S;
598:   return 0;
599: }
601: /*@C
602:    BVTensorGetFactors - Returns the two factors involved in the definition of the
603:    tensor basis vectors object, V = (I otimes U) S.
605:    Logically Collective on V
607:    Input Parameter:
608: .  V - the basis vectors context
610:    Output Parameters:
611: +  U - the BV factor
612: -  S - the Mat factor
614:    Notes:
615:    The returned factors are references (not copies) of the internal factors,
616:    so modifying them will change the tensor BV as well. Some operations of the
617:    tensor BV assume that U has orthonormal columns, so if the user modifies U
618:    this restriction must be taken into account.
620:    The returned factors must not be destroyed. BVTensorRestoreFactors() must
621:    be called when they are no longer needed.
623:    Pass a NULL vector for any of the arguments that is not needed.
625:    Level: advanced
627: .seealso: BVTensorRestoreFactors()
628: @*/
629: PetscErrorCode BVTensorGetFactors(BV V,BV *U,Mat *S)
630: {
632:   PetscUseMethod(V,"BVTensorGetFactors_C",(BV,BV*,Mat*),(V,U,S));
633:   return 0;
634: }
636: static PetscErrorCode BVTensorRestoreFactors_Tensor(BV V,BV *U,Mat *S)
637: {
638:   BV_TENSOR      *ctx = (BV_TENSOR*)V->data;
640:   PetscObjectStateIncrease((PetscObject)V);
641:   if (U) *U = NULL;
642:   if (S) *S = NULL;
643:   BVTensorUpdateMatrix(V,ctx->puk,ctx->U->k);
644:   ctx->puk = -1;
645:   return 0;
646: }
648: /*@C
649:    BVTensorRestoreFactors - Restore the two factors that were obtained with
650:    BVTensorGetFactors().
652:    Logically Collective on V
654:    Input Parameters:
655: +  V - the basis vectors context
656: .  U - the BV factor (or NULL)
657: -  S - the Mat factor (or NULL)
659:    Notes:
660:    The arguments must match the corresponding call to BVTensorGetFactors().
662:    Level: advanced
664: .seealso: BVTensorGetFactors()
665: @*/
666: PetscErrorCode BVTensorRestoreFactors(BV V,BV *U,Mat *S)
667: {
671:   PetscUseMethod(V,"BVTensorRestoreFactors_C",(BV,BV*,Mat*),(V,U,S));
672:   return 0;
673: }
675: PetscErrorCode BVDestroy_Tensor(BV bv)
676: {
677:   BV_TENSOR      *ctx = (BV_TENSOR*)bv->data;
679:   BVDestroy(&ctx->U);
680:   MatDestroy(&ctx->S);
681:   if (ctx->u) {
682:     PetscFree2(ctx->qB,ctx->sw);
683:     VecDestroy(&ctx->u);
684:   }
685:   PetscFree(bv->data);
686:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorBuildFirstColumn_C",NULL);
687:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorCompress_C",NULL);
688:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetDegree_C",NULL);
689:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetFactors_C",NULL);
690:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorRestoreFactors_C",NULL);
691:   return 0;
692: }
694: SLEPC_EXTERN PetscErrorCode BVCreate_Tensor(BV bv)
695: {
696:   BV_TENSOR      *ctx;
698:   PetscNew(&ctx);
699:   bv->data = (void*)ctx;
700:   ctx->puk = -1;
702:   bv->ops->multinplace      = BVMultInPlace_Tensor;
703:   bv->ops->multinplacetrans = BVMultInPlaceHermitianTranspose_Tensor;
704:   bv->ops->dot              = BVDot_Tensor;
705:   bv->ops->scale            = BVScale_Tensor;
706:   bv->ops->norm             = BVNorm_Tensor;
707:   bv->ops->copycolumn       = BVCopyColumn_Tensor;
708:   bv->ops->gramschmidt      = BVOrthogonalizeGS1_Tensor;
709:   bv->ops->destroy          = BVDestroy_Tensor;
710:   bv->ops->view             = BVView_Tensor;
712:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorBuildFirstColumn_C",BVTensorBuildFirstColumn_Tensor);
713:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorCompress_C",BVTensorCompress_Tensor);
714:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetDegree_C",BVTensorGetDegree_Tensor);
715:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetFactors_C",BVTensorGetFactors_Tensor);
716:   PetscObjectComposeFunction((PetscObject)bv,"BVTensorRestoreFactors_C",BVTensorRestoreFactors_Tensor);
717:   return 0;
718: }
720: /*@
721:    BVCreateTensor - Creates a tensor BV that is represented in compact form
722:    as V = (I otimes U) S, where U has orthonormal columns.
724:    Collective on U
726:    Input Parameters:
727: +  U - a basis vectors object
728: -  d - the number of blocks (degree) of the tensor BV
730:    Output Parameter:
731: .  V - the new basis vectors context
733:    Notes:
734:    The new basis vectors object is V = (I otimes U) S, where otimes denotes
735:    the Kronecker product, I is the identity matrix of order d, and S is a
736:    sequential matrix allocated internally. This compact representation is
737:    used e.g. to represent the Krylov basis generated with the linearization
738:    of a matrix polynomial of degree d.
740:    The size of V (number of rows) is equal to d times n, where n is the size
741:    of U. The dimensions of S are d times m rows and m-d+1 columns, where m is
742:    the number of columns of U, so m should be at least d.
744:    The communicator of V will be the same as U.
746:    On input, the content of U is irrelevant. Alternatively, it may contain
747:    some nonzero columns that will be used by BVTensorBuildFirstColumn().
749:    Level: advanced
751: .seealso: BVTensorGetDegree(), BVTensorGetFactors(), BVTensorBuildFirstColumn()
752: @*/
753: PetscErrorCode BVCreateTensor(BV U,PetscInt d,BV *V)
754: {
755:   PetscBool      match;
756:   PetscInt       n,N,m;
757:   BV_TENSOR      *ctx;
761:   PetscObjectTypeCompare((PetscObject)U,BVTENSOR,&match);
764:   BVCreate(PetscObjectComm((PetscObject)U),V);
765:   BVGetSizes(U,&n,&N,&m);
767:   BVSetSizes(*V,d*n,d*N,m-d+1);
768:   PetscObjectChangeTypeName((PetscObject)*V,BVTENSOR);
769:   PetscLogEventBegin(BV_Create,*V,0,0,0);
770:   BVCreate_Tensor(*V);
771:   PetscLogEventEnd(BV_Create,*V,0,0,0);
773:   ctx = (BV_TENSOR*)(*V)->data;
774:   ctx->U  = U;
775:   ctx->d  = d;
776:   ctx->ld = m;
777:   PetscObjectReference((PetscObject)U);
778:   MatCreateSeqDense(PETSC_COMM_SELF,d*m,m-d+1,NULL,&ctx->S);
779:   PetscObjectSetName((PetscObject)ctx->S,"S");
781:   /* Copy user-provided attributes of U */
782:   (*V)->orthog_type  = U->orthog_type;
783:   (*V)->orthog_ref   = U->orthog_ref;
784:   (*V)->orthog_eta   = U->orthog_eta;
785:   (*V)->orthog_block = U->orthog_block;
786:   (*V)->vmm          = U->vmm;
787:   (*V)->rrandom      = U->rrandom;
788:   return 0;
789: }