LoAdSG
MatrixVectorHomogen.h
1//
2// Created by to35jepo on 12/7/22.
3//
4
5#ifndef SGRUN_MATRIXVECTORHOMOGENH_H
6#define SGRUN_MATRIXVECTORHOMOGENH_H
7
8#include "../extemp/vector.h"
9#include "../tests/old_versions/MatrixVectorMultiplicationPrewavelets.h"
10#include "../sgrid/multiDepthHashGrid.h"
11#include "../BasisTransformations/BasisTransformations.h"
12#include "../stencils/PoissonStencil.h"
13#include "../stencils/Stencil.h"
14#include "../iterator/depthIterator.h"
15#include "../cells/CellStructure.h"
16#include "../applications/norms.h"
17#include "../mycomm.h"
18#include "../localStiffnessMatrices/LocalStiffnessMatrices.h"
19
20
21
22#include <omp.h>
23#include <thread>
24#include <sstream>
25#include <sys/time.h>
26
27
28
33public:
34 std::vector<Depth> depths;
35
36
42 MatrixVectorHomogen(AdaptiveSparseGrid& grid, MultiLevelAdaptiveSparseGrid& mgrid) : gM(mgrid), g(grid), z(grid), u(grid),
43 nodal(mgrid),
44 u_new(grid),
45 u_old(grid), P(mgrid), Ax_neu(grid), nodal_test(mgrid), Ax_solution(grid),
46 depthList(grid), cellData(grid){};
47
48
49 bool option_symmetry = false;
50 bool option_mpi = false ;
51
52
59 MatrixVectorHomogen(AdaptiveSparseGrid& grid, MultiLevelAdaptiveSparseGrid& mgrid, bool symmetry) : gM(mgrid), g(grid), z(grid), u(grid),
60 nodal(mgrid),
61 u_new(grid),
62 u_old(grid), P(mgrid), Ax_neu(grid), nodal_test(mgrid), Ax_solution(grid),
63 depthList(grid), option_symmetry(
64 symmetry),cellData(grid){};
72 MatrixVectorHomogen(AdaptiveSparseGrid& grid, MultiLevelAdaptiveSparseGrid& mgrid, bool symmetry, bool mpi_on) : gM(mgrid), g(grid), z(grid), u(grid),
73 nodal(mgrid),
74 u_new(grid),
75 u_old(grid), P(mgrid), Ax_neu(grid), nodal_test(mgrid), Ax_solution(grid),
76 depthList(grid), option_symmetry(
77 symmetry),option_mpi(mpi_on),cellData(grid){};
78
79
81
91 template<class StencilClass>
92 void multiplication(VectorSparseG &prew, VectorSparseG &Ax, StencilClass &stencilClass);
93
94
95 void multiplication_LocalStiffnessMatrix(VectorSparseG &prew, VectorSparseG& Ax, LocalStiffnessMatrices& localStiffnessMatrixAllDepths_){
96
97
98 Ax = 0.0;
99 nodal = 0.0;
100
101 AdaptiveSparseGrid_Base *grid = prew.getSparseGrid();
102 int world_rank = 0;
103
104
105
106
107 grid->WorkOnHangingNodes = true;
108
109
110
111
112#pragma omp parallel
113 {
114
115 MultiDepthHashGrid MultiDepthHashGrid = *grid->getMultiDepthHashGrid();
116 VectorSparseG u_nodal(u.getSparseGrid());
117 int i = 0;
118
119 for (auto it = depthList.begin_all(); it != depthList.end_all(); ++it) {
120
121 if (i % omp_get_num_threads() == omp_get_thread_num()) {
122
123 Depth T = *it;
124 u_nodal = 0.0;
125 calcNodal(u_nodal, prew, T);
126 nodal.setMultiLevelValuesOMP(u_nodal, T, MultiDepthHashGrid);
127 }
128 i++;
129 }
130
131
132 }
133
134
135
136 grid->WorkOnHangingNodes = false;
137
138
139
140
141 stencil_count = 0;
142
143 for (CasesIterator iter2; iter2.goon(); ++iter2) {
144
145 Ax_neu = 0.0;
146
147 bool *restrictions = iter2.getcase();
148 grid->WorkOnHangingNodes = true;
149
150
151 Ax_neu = 0.0;
152 gM = 0.0;
153
154
155 stencil_count = 0;
156
157 caseFunction_LocalStiffnessMatrix(prew, restrictions, localStiffnessMatrixAllDepths_);
158
159 grid->WorkOnHangingNodes = false;
160 Ax = Ax + Ax_neu;
161
162 }
163
164
165
166
167};
168
169 template<class Problem>
170 void multiplication_LocalStiffnessMatrix_mpi(VectorSparseG &prew, VectorSparseG& Ax, Problem& localStiffnessMatrixAllDepths_){
171
172 localStiffnessMatrixAllDepths_.resetActiveWorkers();
173
174#ifdef MY_MPI_ON
175 MPI_Barrier(MPI_COMM_WORLD);
176#endif
177 Ax = 0.0;
178 nodal = 0.0;
179
180 AdaptiveSparseGrid_Base *grid = prew.getSparseGrid();
181
182
183
184
185
186 grid->WorkOnHangingNodes = true;
187
188
189
190
191#pragma omp parallel
192 {
193
194 MultiDepthHashGrid MultiDepthHashGrid = *grid->getMultiDepthHashGrid();
195 VectorSparseG u_nodal(u.getSparseGrid());
196 int i = 0;
197
198 for (auto it = depthList.begin_all(); it != depthList.end_all(); ++it) {
199
200 if (i % omp_get_num_threads() == omp_get_thread_num()) {
201
202 Depth T = *it;
203 u_nodal = 0.0;
204 calcNodal(u_nodal, prew, T);
205 nodal.setMultiLevelValuesOMP(u_nodal, T, MultiDepthHashGrid);
206 }
207 i++;
208 }
209
210
211 }
212
213
214
215 grid->WorkOnHangingNodes = false;
216
217
218
219#ifdef MY_MPI_ON
220
221 MPI_Barrier(MPI_COMM_WORLD);
222 casefunction_mpi_onlyCases_LocalStiffnessMatrix(prew,Ax,localStiffnessMatrixAllDepths_);
223
224
225 MPI_Barrier(MPI_COMM_WORLD);
226#else
227
228 stencil_count = 0;
229
230 for (CasesIterator iter2; iter2.goon(); ++iter2) {
231
232 Ax_neu = 0.0;
233
234 bool *restrictions = iter2.getcase();
235 grid->WorkOnHangingNodes = true;
236
237
238 Ax_neu = 0.0;
239 gM = 0.0;
240
241
242 stencil_count = 0;
243
244 caseFunction_LocalStiffnessMatrix(prew, restrictions, localStiffnessMatrixAllDepths_);
245
246 grid->WorkOnHangingNodes = false;
247 Ax = Ax + Ax_neu;
248
249 }
250#endif
251
252
253#ifdef MY_MPI_ON
254 MPI_Barrier(MPI_COMM_WORLD);
255#endif
256
257 };
258
259
260 int mpi_per_worker=1;
261 template<class StencilClass>
262 void casefunction_mpi(VectorSparseG &prew, VectorSparseG &Ax, StencilClass &stencilClass);
263
264
265
266 template<class StencilClass>
267 void casefunction_mpi_onlyCases(VectorSparseG &prew, VectorSparseG &Ax, StencilClass &stencilClass);
268
269
270 template<class Problem>
271 void casefunction_mpi_onlyCases_LocalStiffnessMatrix(VectorSparseG &prew, VectorSparseG &Ax, Problem& localStiffnessMatrices);
272
273
274
275
276
277
278
279
280
281 DepthList depthList;
282
283private:
284
285 MultiLevelVector gM;
286 MultiLevelVector nodal;
287 MultiLevelVector P;
288 VectorSparseG Ax_neu;
289 VectorSparseG g;
290 VectorSparseG z;
291 VectorSparseG u;
292 VectorSparseG u_new;
293 VectorSparseG u_old;
294 MultiLevelVector nodal_test;
295 VectorSparseG Ax_solution;
296
297 CellData cellData;
298
299
300
301
302
308 void calcNodal(VectorSparseG &prew, Depth &depth);
309
315 void calcNodal(VectorSparseG &nodal_u,VectorSparseG &prew, Depth &depth);
316
317
318 double time_sortdepth;
319 double time_plus;
320
330 template<class StencilClass>
331 void caseFunction(VectorSparseG &prew, bool *restrictions, StencilClass &stencil);
332
333
334
344 template<class Problem>
345 void caseFunction_LocalStiffnessMatrix(VectorSparseG &prew, bool *restrictions, Problem& localStiffnessMatrixAllDepths);
346
347
348
359 template<class StencilClass>
360 void applyStencilGlobal(const VectorSparseG &input, VectorSparseG &output, StencilClass &stencil, Depth &T);
361
372 template<class StencilClass>
373 void applyStencilGlobal_symmetry(VectorSparseG &input, VectorSparseG &output, StencilClass &stencil, Depth &T);
374
385 template<class StencilClass>
386 void applyStencilGlobal_MPI(VectorSparseG &input, VectorSparseG &output, StencilClass &stencil, Depth &T);
387
388
399 template<class StencilClass>
400 void applyStencilGlobal_MPI_2(VectorSparseG &input, VectorSparseG &output, StencilClass &stencil, Depth &T);
401
402
413 template<class StencilClass>
414 void applyStencilGlobal_MPI_3(VectorSparseG &input, VectorSparseG &output, StencilClass &stencil, Depth &T);
415
416
417
418
419
420
432 template<class StencilClass>
433 double applyStencilLocal(const IndexDimension &Index, const VectorSparseG &input, StencilClass &stencil, Depth &T);
434
435
444 inline void prolongation1D_inplace(VectorSparseG &values, int &t, int &d) {
445
446
447
448 AdaptiveSparseGrid_Base *grid = values.getSparseGrid();
449
450 for (auto it = depthList.begin_all(); it != depthList.end_all(); ++it){
451 Depth Tlocal = *it;
452 if (Tlocal.at(d) == t) {
453 SingleDepthHashGrid& depthGrid = grid->getMultiDepthHashGrid()->getGridForDepth(Tlocal);
454 const auto& mapping = depthGrid._mapPosToGridPos;
455 if(depthGrid.getNumberOfEntries()>0) {
456 #pragma omp parallel for
457 for (size_t i = 0; i < mapping.size(); i++) {
458
459 IndexDimension Index = depthGrid._map.getIndexOfTable(i);
460
461 unsigned long j = mapping[i];
462 double value = values.getValue(j);
463 if ((!Index.isAtRightBoundary(d)) && (!Index.isAtLeftBoundary(d)))
464 value = value + 0.5 * (values.getValue(Index.nextRight(d)) + values.getValue(Index.nextLeft(d)));
465
466 values.setValue(j, value);
467
468
469 }
470 }
471 }
472
473 }
474 }
475
485 inline void restriction1D_inverted_inplace(const VectorSparseG &values, int t, int d) {
486
487 AdaptiveSparseGrid_Base *grid = values.getSparseGrid();
488
489 //double value;
490 double valueL;
491 double valueR;
492
493 for (auto it = depthList.begin_all(); it != depthList.end_all(); ++it) {
494 Depth Tlocal = *it;
495
496 // SingleDepthHashGrid& coarseDepthGrid = grid->getMultiDepthHashGrid()->getGridForDepth(Tlocal);
497 if (Tlocal.at(d) == t + 1 && Tlocal > 0) {
498 SingleDepthHashGrid &depthGrid = grid->getMultiDepthHashGrid()->getGridForDepth(Tlocal);
499 const auto &mapping = depthGrid._mapPosToGridPos;
500 if (depthGrid.getNumberOfEntries() > 0) {
501
502 for (size_t i = 0; i < mapping.size(); i++) {
503 IndexDimension Index = depthGrid._map.getIndexOfTable(i);
504
505 unsigned long j = mapping[i];
506 double value = 0.5 * values.getValue(j);
507
508 if (t >= 0) {
509 IndexDimension rightIndex = Index.nextRight(d);
510 if (!rightIndex.isAtRightBoundary(d)) {
511 // values.getValue(rightIndex,value,singleDepthHashGrid)
512//#pragma omp critical
513 values.addToValue(rightIndex, value);
514 // value += values.getValue(rightIndex);
515 }
516
517 IndexDimension leftIndex = Index.nextLeft(d);
518 if (!leftIndex.isAtRightBoundary(d)) {
519 // values.getValue(rightIndex,value,singleDepthHashGrid)
520//#pragma omp critical
521 values.addToValue(leftIndex, value);
522 // value += values.getValue(rightIndex);
523 }
524 }
525 }
526 }
527 }
528
529 }
530 };
531
540 inline void ConvertToPrewavelet2(VectorSparseG &Ax_nodal, VectorSparseG &Ax, Depth &T) {
541 SingleDepthHashGrid& depthGrid = Ax_nodal.getSparseGrid()->getMultiDepthHashGrid()->getGridForDepth(T);
542 const auto& mapping = depthGrid._mapPosToGridPos;
543
544#pragma omp parallel for schedule(runtime)
545 for (size_t i = 0; i < mapping.size(); i++)
546 {
547 if(Ax_nodal.getSparseGrid()->getActiveTable()[mapping[i]]) {
548 IndexDimension I = depthGrid._map.getIndexOfTable(i);
549 double coeff = 0.0;
550 IndexDimension J;
551
552 for (MultiDimFiveCompass mc; mc.goon(); ++mc) {
553 double basis_coeff = 1.0;
554 J = I.nextFiveP(&mc, T, &basis_coeff);
555 if (mc.goon())
556 coeff = coeff + Ax_nodal.getValue(J) * basis_coeff;
557 }
558
559 Ax.setValue(mapping[i], coeff);
560 }
561 }
562 }
563
571 void master(int num_workers,int total_tasks);
572
573
574 int MPI_task_index=0;
575 int MPI_total_tasks=PowerOfTwo<DimensionSparseGrid>::value;
576 int MPI_num_workers=0;
577
578
586 void master_start(int num_workers);
587
588 void master_start_onlyCases(int num_workers);
589
590 template<class Problem>
591 void master_start_onlyCases_LS(int num_workers,Problem &localStiffnessMatrices);
592
593
594
595
601 void master_distribute();
602
603
604 void master_distribute_onlyCases();
605
606 template<class Problem>
607 void master_distribute_onlyCases_LS(Problem& localStiffnessMatrices);
608
615
616 template<class LS_Matrix>
617 void master_end_distribute_LocalStiffnessMatrix(LS_Matrix& localStiffnessMatrices);
618
619
620 void distributeAndApplyLocalStiffnessMatrix(LocalStiffnessMatrices &localStiffnessMatrices);
621
631 template<class StencilClass>
632 void worker(VectorSparseG &prew, VectorSparseG &Ax,StencilClass &stencilClass);
633
634 template<class Problem>
635 void worker_localStiffnessMatrices(VectorSparseG &prew, VectorSparseG &Ax,Problem &localStiffnessMatrices);
636
637
638
645 inline void intToBinary(int num, bool binary[]) {
646 for (int i = DimensionSparseGrid - 1; i >= 0; --i) {
647 binary[i] = num & 1;
648 num >>= 1;
649 }
650 }
651
652};
653
654
655
657
658
659template<class Problem>
660void MatrixVectorHomogen::multiplication(VectorSparseG &prew, VectorSparseG &Ax, Problem &matrixProblem) {
661
662
663if(matrixProblem.getTypeMatrixVectorMultiplication()==StencilOnTheFly) {
664
665
666
667
668 Ax = 0.0;
669 nodal = 0.0;
670
671 AdaptiveSparseGrid_Base *grid = prew.getSparseGrid();
672 int world_rank = 0;
673
674
675#ifdef MY_MPI_ON
676 // Process *mpi = grid->mpi;
677 int k = 0;
678 int num_cases = POW(2,DimensionSparseGrid);
679 // Get the rank of the process
680
681 MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); // TODO use mpi->getMyRank() instead. But is not correctly implemented
682 // Get the number of processes
683 int world_size;
684 MPI_Comm_size(MPI_COMM_WORLD, &world_size); // TODO could be done better with own defined COMM but maybe not needed
685
686 MPI_Barrier(MPI_COMM_WORLD); // TODO do we need this barrier?
687
688#endif
689
690
691 grid->WorkOnHangingNodes = true;
692
693
694#pragma omp parallel
695 {
696
697 MultiDepthHashGrid MultiDepthHashGrid = *grid->getMultiDepthHashGrid();
698 VectorSparseG u_nodal(u.getSparseGrid());
699 int i = 0;
700
701 for (auto it = depthList.begin_all(); it != depthList.end_all(); ++it) {
702
703 if (i % omp_get_num_threads() == omp_get_thread_num()) {
704
705 Depth T = *it;
706 u_nodal = 0.0;
707 calcNodal(u_nodal, prew, T);
708 nodal.setMultiLevelValuesOMP(u_nodal, T, MultiDepthHashGrid);
709 }
710 i++;
711 }
712
713
714 }
715
716
717 grid->WorkOnHangingNodes = false;
718
719
720#ifdef MY_MPI_ON
721
722 casefunction_mpi_onlyCases(prew,Ax,matrixProblem);
723
724
725#else
726
727
728 stencil_count = 0;
729
730 for (CasesIterator iter2; iter2.goon(); ++iter2) {
731
732 Ax_neu = 0.0;
733
734 bool *restrictions = iter2.getcase();
735 grid->WorkOnHangingNodes = true;
736
737
738 Ax_neu = 0.0;
739 gM = 0.0;
740
741
742 stencil_count = 0;
743
744 caseFunction(prew, restrictions, matrixProblem);
745
746 grid->WorkOnHangingNodes = false;
747 Ax = Ax + Ax_neu;
748
749 }
750
751
752#endif
753}else{
754
755 multiplication_LocalStiffnessMatrix_mpi(prew, Ax,matrixProblem);
756 MPI_Barrier(MPI_COMM_WORLD);
757}
758
759}
760
761
771template<class Problem>
772void MatrixVectorHomogen::casefunction_mpi(VectorSparseG &prew, VectorSparseG &Ax, Problem &stencilClass) {
773
774#ifdef MY_MPI_ON
775 int rank, num_procs;
776
777 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
778
779 MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
780 MPI_Barrier(MPI_COMM_WORLD);
781 MPI_Status status;
782 MPI_task_index=0;
783
784 if (rank < mpi_per_worker) {
785
786
787 //distribute the first tasks to the workers
788 if(rank ==0) {
789
790 master_start(num_procs);
791
792
793 //start working
794 while(MPI_task_index<MPI_total_tasks) {
795
796
797 for(int j=1; j < mpi_per_worker; j++){
798 MPI_Send(&MPI_task_index, 1, MPI_INT, j, TAG_TASK, MPI_COMM_WORLD);
799 }
800
801
802 prew.getSparseGrid()->WorkOnHangingNodes = true;
803 Ax_neu = 0.0;
804 gM = 0.0;
805 bool restrictions[DimensionSparseGrid];
806 intToBinary(MPI_task_index, restrictions);
807 MPI_task_index++;
808
809 caseFunction(prew, restrictions, stencilClass);
810 prew.getSparseGrid()->WorkOnHangingNodes = false;
811 Ax = Ax + Ax_neu;
812
813
814 for(int j=1; j < mpi_per_worker; j++){
815 int old_index;
816
817 MPI_Recv(&old_index, 1, MPI_INT, j, TAG_TASK, MPI_COMM_WORLD,&status);
818 }
819 }
820
821
822
823 for(int j=1; j < mpi_per_worker; j++){
824 int old_index;
825 MPI_Send(&MPI_task_index, 1, MPI_INT, j, TAG_TERMINATE, MPI_COMM_WORLD);
826 }
828
829 }else{
830
831 while(1){
832
833 MPI_Recv(&MPI_task_index, 1, MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
834
835
836 if (TAG_TERMINATE == status.MPI_TAG) {
837 break;
838 }
839
840
841
842 prew.getSparseGrid()->WorkOnHangingNodes = true;
843 Ax_neu = 0.0;
844 gM = 0.0;
845 bool restrictions[DimensionSparseGrid];
846 intToBinary(MPI_task_index, restrictions);
847
848 caseFunction(prew, restrictions, stencilClass);
849 prew.getSparseGrid()->WorkOnHangingNodes = false;
850 Ax = Ax + Ax_neu;
851
852 MPI_Send(&MPI_task_index, 1, MPI_INT, 0, TAG_TASK, MPI_COMM_WORLD);
853
854 }
855
856
857
858
859
860 }
861
862
863
864
865
866 } else {
867
868 // Worker processes
869 worker(prew, Ax, stencilClass);
870 }
871
872
873
874
875 if(rank%mpi_per_worker==0 && rank > 0 ){
876 int tag=0;
877 MPI_Send(Ax.getDatatableVector(), Ax.getSparseGrid()->getMaximalOccupiedSecondTable(),MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
878 }
879
880 if(rank==0){
881
882 for(int rank_j=mpi_per_worker; rank_j< num_procs; rank_j+=mpi_per_worker){
883 Ax_neu=0.0;
884 MPI_Recv(Ax_neu.getDatatableVector(), Ax_neu.getSparseGrid()->getMaximalOccupiedSecondTable(),MPI_DOUBLE, rank_j, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
885 Ax = Ax+Ax_neu;
886 }
887 }
888
889#endif
890
891
892
893};
894
895
896template<class Problem>
897void MatrixVectorHomogen::casefunction_mpi_onlyCases(VectorSparseG &prew, VectorSparseG &Ax, Problem &stencilClass) {
898
899#ifdef MY_MPI_ON
900 int rank, num_procs;
901
902 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
903
904 MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
905 MPI_Barrier(MPI_COMM_WORLD);
906 MPI_Status status;
907 MPI_task_index=0;
908 MPI_num_workers=min(num_procs,MPI_total_tasks);
909
910
911
912
913
914 //distribute the first tasks to the workers
915 if(rank ==0) {
916
917
918 master_start_onlyCases(num_procs);
919
920
921
922 //start working
923 while(MPI_task_index<MPI_total_tasks) {
924
925 prew.getSparseGrid()->WorkOnHangingNodes = true;
926 Ax_neu = 0.0;
927 gM = 0.0;
928 bool restrictions[DimensionSparseGrid];
929 intToBinary(MPI_task_index, restrictions);
930 MPI_task_index++;
931
932 caseFunction(prew, restrictions, stencilClass);
933
934 prew.getSparseGrid()->WorkOnHangingNodes = false;
935 Ax = Ax + Ax_neu;
936
937 }
938
939
940
942
943 }else if(rank<MPI_num_workers){
944
945 // Worker processes
946
947 worker(prew, Ax, stencilClass);
948
949 }
950
951
952 MPI_Barrier(MPI_COMM_WORLD);
953 MPI_Allreduce(MPI_IN_PLACE, Ax.getDatatableVector(),Ax.getSparseGrid()->getMaximalOccupiedSecondTable(), MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
954
955
956
957#endif
958
959
960
961};
962
963
964template<class StencilClass>
965void MatrixVectorHomogen::caseFunction(VectorSparseG &prew, bool *restrictions, StencilClass &stencil) {
966
967
968
969 gM= 0.0;
970 AdaptiveSparseGrid_Base *grid = prew.getSparseGrid();
971
972
973 grid->WorkOnHangingNodes = true;
974
975
976
977
978
979
980
981 // Sortiere Tiefen nach Restriktionen/Prolongationen
982 depthList.sortDepths(restrictions);
983
984
985
986 std::list<Depth>* sortedDepths = depthList.getSortierteTiefen();
987 if(depthList.getSortierteTiefen()->size()==0) return;
988
989
990
991
992 // Prolongiere die nodalen Werte
993 P += nodal;
994
995
996
997 for(int d=0; d < DimensionSparseGrid; d++) {
998 if(restrictions[d]==0){
999 for (Depth T: *sortedDepths) {
1000
1001 int t = T.at(d) + 1;
1002 Depth T_fine = T;
1003 T_fine.set(t, d);
1004
1005
1006 if(depthList.isIncluded(T_fine)){
1007
1008
1009 u_new = 0.0;
1010 u_new.setMultiLevelValues2(P,T);
1011 prolongation1D_inplace(u_new, t, d);
1012
1013
1014 P.addMultiLevelValues(u_new, T_fine);
1015
1016 }
1017 }
1018 }
1019 }
1020
1021
1022
1023 for (Depth T: *sortedDepths){
1024
1025 grid->WorkOnHangingNodes = true;
1026
1027
1028 u= 0.0;
1029 u.setMultiLevelValues2(P,T);
1030
1031 z = 0.0;
1032
1033
1034 stencil.initialize(T);
1035
1036
1037
1038
1039 if(option_mpi){
1040 applyStencilGlobal_MPI_3(u,z,stencil,T);
1041
1042 }else {
1043 if (option_symmetry){
1044 applyStencilGlobal_symmetry(u, z, stencil, T);
1045
1046 }
1047 else applyStencilGlobal(u, z, stencil, T);
1048 }
1049
1050#ifdef MY_MPI_ON
1051
1052 int rank;
1053 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
1054 if(rank==0){
1055 master_distribute_onlyCases();
1056 }
1057
1058#endif
1059
1060 z.addMultiLevelValues2(gM,T);
1061
1062
1063
1064 Depth Tcoarse(T);
1065 for (int d = 0; d < DimensionSparseGrid; d++) {
1066 if (restrictions[d]) {
1067 Tcoarse.set(T.at(d) - 1, d);
1068 if(depthList.isIncluded(Tcoarse)){
1069 restriction1D_inverted_inplace(z,Tcoarse.at(d),d);
1070 g = z;
1071 z.addMultiLevelValues2(gM,Tcoarse);
1072 gM.setMultiLevelValues2(g, Tcoarse);
1073 }
1074 }
1075 }
1076
1077
1078
1079 grid->WorkOnHangingNodes=false;
1080
1081 if(depthList.isIncluded(Tcoarse)) ConvertToPrewavelet2(z, Ax_neu, Tcoarse);
1082
1083
1084
1085 }
1086
1087
1088
1089
1090};
1091
1092
1093
1094
1095
1096
1097template<class Problem>
1098void MatrixVectorHomogen::applyStencilGlobal(const VectorSparseG &input, VectorSparseG &output, Problem &matrixProblem, Depth &T) {
1099
1100
1101 AdaptiveSparseGrid_Base *grid = input.getSparseGrid();
1102
1103 for(unsigned long i=0;i<grid->getMaximalOccupiedSecondTable();i++){
1104 IndexDimension I = grid->getIndexOfTable(i);
1105 Depth T_local(I);
1106
1107 if(T_local<=T) {
1108 double val;
1109 val = applyStencilLocal(I, input, matrixProblem, T);
1110 output.setValue(i, val);
1111 }
1112 }
1113
1114
1115/*
1116 #pragma omp parallel
1117 {
1118 auto iter = GetNextSmallerDepthsIteratorInner(T);
1119 do {
1120 Depth Tlocal = *iter;
1121
1122 const SingleDepthHashGrid &depthGrid = grid->getMultiDepthHashGrid()->getGridForDepth(Tlocal);
1123 const auto &mapping = depthGrid._mapPosToGridPos;
1124 const auto &map = depthGrid._map;
1125 const size_t end = mapping.size();
1126
1127
1128 if (depthGrid.getNumberOfEntries() > 0) {
1129
1130 #pragma omp for schedule(runtime)
1131 for (size_t i = 0; i < mapping.size(); i++) {
1132
1133 double val;
1134 IndexDimension I = map.getIndexOfTable(i);
1135
1136 val = applyStencilLocal(I, input, matrixProblem, T);
1137
1138
1139
1140 output.setValue(mapping[i], val);
1141
1142 }
1143 }
1144
1145
1146 } while (iter.next());
1147 }
1148*/
1149
1150
1151
1152
1153
1154};
1155
1156template<class Problem>
1157void MatrixVectorHomogen::applyStencilGlobal_symmetry(VectorSparseG &input, VectorSparseG &output, Problem &matrixProblem, Depth &T) {
1158depths.push_back(T);
1159
1160
1161
1162
1163
1164 AdaptiveSparseGrid_Base *grid = input.getSparseGrid();
1165 int rank = 0;
1166
1167 Depth Tlocal = T;
1168
1169 ++Tlocal;
1170
1171
1172 const SingleDepthHashCellStructure &depthGrid = cellData.getGridForDepth(Tlocal);
1173 const auto &map = depthGrid._map;
1174 const auto end = depthGrid.getNumberOfEntries();
1175
1176
1177#pragma omp parallel for schedule(dynamic)
1178 for (size_t i = 0; i < end; i++) {
1179
1180 CellDimension cellDimension = map.getIndexOfTable(i);
1181
1182 matrixProblem.applyStencilOnCell(cellDimension, input, output);
1183 //matrixProblem.applyStencilOnCell_MPI_OMP(cellDimension,input,output);
1184
1185
1186 }
1187
1188
1189};
1190
1191template<class Problem>
1192void MatrixVectorHomogen::applyStencilGlobal_MPI(VectorSparseG &input, VectorSparseG &output, Problem &matrixProblem, Depth &T) {
1193
1194 Depth Tlocal = T;
1195 ++Tlocal;
1196#ifdef MY_MPI_ON
1197 int rank; int size;
1198 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
1199
1200
1201
1202
1203 int case_size = mpi_per_worker;
1204 int case_rank = rank%mpi_per_worker;
1205
1206#endif
1207
1208
1209 const SingleDepthHashCellStructure &depthGrid = cellData.getGridForDepth(Tlocal);
1210 const auto &map = depthGrid._map;
1211 const auto end_grid = depthGrid.getNumberOfEntries();
1212
1213 for (size_t i = 0; i < end_grid; i++) {
1214
1215
1216#ifdef MY_MPI_ON
1217 if (i % case_size == case_rank) {
1218
1219#endif
1220
1221 CellDimension cellDimension = map.getIndexOfTable(i);
1222 matrixProblem.applyStencilOnCell_MPI_OMP(cellDimension, input, output);
1223#ifdef MY_MPI_ON
1224 }
1225#endif
1226
1227 }
1228
1229
1230
1231#ifdef MY_MPI_ON
1232
1233
1234
1235 //this should be done with MPI communicators
1236 if(case_rank==0){
1237 //case_rank == 0 means master
1238
1239 VectorSparseG output_sum(output.getSparseGrid());
1240
1241
1242 for(int j=1;j<case_size;j++){
1243
1244 MPI_Status status;
1245
1246
1247 MPI_Recv(output_sum.getDatatableVector(), output.getSparseGrid()->getMaximalOccupiedSecondTable(), MPI_DOUBLE, rank+j, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
1248
1249 output = output+output_sum;
1250
1251 }
1252
1253 for(int j=1;j<case_size;j++){
1254
1255
1256 int tag=0;
1257 MPI_Send(output.getDatatableVector(), output.getSparseGrid()->getMaximalOccupiedSecondTable(), MPI_DOUBLE,rank+j, tag, MPI_COMM_WORLD);
1258
1259 }
1260
1261
1262
1263
1264 }else{
1265
1266
1267
1268 int sendTo = rank-case_rank;
1269
1270 int tag=0;
1271 MPI_Status status;
1272
1273 MPI_Send(output.getDatatableVector(), output.getSparseGrid()->getMaximalOccupiedSecondTable(), MPI_DOUBLE,sendTo, tag, MPI_COMM_WORLD);
1274
1275 MPI_Recv(output.getDatatableVector(), output.getSparseGrid()->getMaximalOccupiedSecondTable(), MPI_DOUBLE, sendTo, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
1276
1277 }
1278
1279
1280
1281
1282#endif
1283
1284
1285
1286
1287};
1288
1289
1290
1291
1292template<class Problem>
1293void MatrixVectorHomogen::applyStencilGlobal_MPI_2(VectorSparseG &input, VectorSparseG &output, Problem &matrixProblem, Depth &T) {
1294
1295 Depth Tlocal = T;
1296 ++Tlocal;
1297#ifdef MY_MPI_ON
1298 int rank; int size;
1299 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
1300
1301
1302
1303
1304 int case_size = mpi_per_worker;
1305 if(mpi_per_worker!=2){
1306 cout << "only 2 mpi workers are allowed " << endl;
1307 exit(1);
1308 }
1309 int case_rank = rank%mpi_per_worker;
1310#else
1311 cout << "THIS CAN ONLY USED WITH MPI! " << endl;
1312
1313
1314#endif
1315
1316
1317 const SingleDepthHashCellStructure &depthGrid = cellData.getGridForDepth(Tlocal);
1318 const auto &map = depthGrid._map;
1319 const auto end_grid = depthGrid.getNumberOfEntries();
1320
1321 if(end_grid%2==0){
1322#ifdef MY_MPI_ON
1323 if (case_rank==0) {
1324 #pragma omp parallel for schedule(runtime)
1325 for (size_t i = 0; i < end_grid/2; i++) {
1326 CellDimension cellDimension = map.getIndexOfTable(i);
1327 matrixProblem.applyStencilOnCell(cellDimension, input, output);
1328 }
1329 }
1330 if (case_rank==1){
1331 #pragma omp parallel for schedule(runtime)
1332 for(size_t i=end_grid/2; i < end_grid; i++){
1333 CellDimension cellDimension = map.getIndexOfTable(i);
1334 matrixProblem.applyStencilOnCell(cellDimension, input, output);
1335 }
1336 }
1337#endif
1338 }else{
1339#ifdef MY_MPI_ON
1340 if (case_rank==0) {
1341 #pragma omp parallel for schedule(runtime)
1342 for (size_t i = 0; i < (end_grid-1)/2; i++) {
1343 CellDimension cellDimension = map.getIndexOfTable(i);
1344 matrixProblem.applyStencilOnCell(cellDimension, input, output);
1345 }
1346 }
1347 if (case_rank==1){
1348 #pragma omp parallel for schedule(runtime)
1349 for(size_t i=(end_grid-1)/2; i < end_grid; i++){
1350 CellDimension cellDimension = map.getIndexOfTable(i);
1351 matrixProblem.applyStencilOnCell(cellDimension, input, output);
1352 }
1353 }
1354#endif
1355
1356
1357 }
1358
1359
1360
1361
1362#ifdef MY_MPI_ON
1363
1364
1365
1366
1367 if(case_rank==0){
1368
1369
1370 VectorSparseG output_sum(output.getSparseGrid());
1371 MPI_Status status;
1372 MPI_Recv(output_sum.getDatatableVector(), output.getSparseGrid()->getMaximalOccupiedSecondTable(), MPI_DOUBLE, rank+1, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
1373 output = output+output_sum;
1374 int tag=0;
1375 MPI_Send(output.getDatatableVector(), output.getSparseGrid()->getMaximalOccupiedSecondTable(), MPI_DOUBLE,rank+1, tag, MPI_COMM_WORLD);
1376
1377 }else{
1378 int sendTo = rank-1;
1379 int tag=0;
1380 MPI_Status status;
1381 MPI_Send(output.getDatatableVector(), output.getSparseGrid()->getMaximalOccupiedSecondTable(), MPI_DOUBLE,sendTo, tag, MPI_COMM_WORLD);
1382 MPI_Recv(output.getDatatableVector(), output.getSparseGrid()->getMaximalOccupiedSecondTable(), MPI_DOUBLE, sendTo, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
1383
1384 }
1385
1386
1387
1388
1389#endif
1390
1391
1392
1393
1394};
1395
1396
1397
1398
1399template<class Problem>
1400void MatrixVectorHomogen::applyStencilGlobal_MPI_3(VectorSparseG &input, VectorSparseG &output, Problem &matrixProblem, Depth &T) {
1401
1402 Depth Tlocal = T;
1403 ++Tlocal;
1404#ifdef MY_MPI_ON
1405 int rank; int size;
1406 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
1407
1408
1409
1410
1411 int case_size = mpi_per_worker;
1412
1413 int case_rank = rank%mpi_per_worker;
1414#else
1415 cout << "THIS CAN ONLY USED WITH MPI! " << endl;
1416
1417
1418#endif
1419
1420
1421 const SingleDepthHashCellStructure &depthGrid = cellData.getGridForDepth(Tlocal);
1422 const auto &map = depthGrid._map;
1423 const auto end_grid = depthGrid.getNumberOfEntries();
1424
1425 if(end_grid%mpi_per_worker==0) {
1426#ifdef MY_MPI_ON
1427
1428 for(int j=0; j<mpi_per_worker; j++){
1429 if (case_rank==j){
1430#pragma omp parallel for schedule(runtime)
1431 for (size_t i = j*(end_grid/mpi_per_worker); i < end_grid/mpi_per_worker; i++) {
1432 CellDimension cellDimension = map.getIndexOfTable(i);
1433 matrixProblem.applyStencilOnCell(cellDimension, input, output);
1434 }
1435 }
1436 }
1437
1438#endif
1439 }else{
1440#ifdef MY_MPI_ON
1441 int modulo = end_grid%mpi_per_worker;
1442 int end_grid_new = end_grid-modulo;
1443 for(int j=0; j<mpi_per_worker; j++){
1444 if (case_rank==j){
1445 #pragma omp parallel for schedule(runtime)
1446 for (size_t i = j*(end_grid_new/mpi_per_worker); i < end_grid_new/mpi_per_worker; i++) {
1447 CellDimension cellDimension = map.getIndexOfTable(i);
1448 matrixProblem.applyStencilOnCell(cellDimension, input, output);
1449 }
1450 }
1451 }
1452
1453 for(int j=0; j<modulo; j++){
1454 if (case_rank==j){
1455 CellDimension cellDimension = map.getIndexOfTable(j+end_grid_new);
1456 matrixProblem.applyStencilOnCell(cellDimension, input, output);
1457 }
1458 }
1459
1460
1461
1462#endif
1463
1464
1465 }
1466
1467
1468
1469
1470#ifdef MY_MPI_ON
1471
1472
1473
1474
1475 //this should be done with MPI communicators
1476 if(case_rank==0){
1477 //case_rank == 0 means master
1478
1479 VectorSparseG output_sum(output.getSparseGrid());
1480
1481
1482 for(int j=1;j<case_size;j++){
1483
1484 MPI_Status status;
1485
1486
1487 MPI_Recv(output_sum.getDatatableVector(), output.getSparseGrid()->getMaximalOccupiedSecondTable(), MPI_DOUBLE, rank+j, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
1488
1489 output = output+output_sum;
1490
1491 }
1492
1493 for(int j=1;j<case_size;j++){
1494
1495
1496 int tag=0;
1497 MPI_Send(output.getDatatableVector(), output.getSparseGrid()->getMaximalOccupiedSecondTable(), MPI_DOUBLE,rank+j, tag, MPI_COMM_WORLD);
1498
1499 }
1500
1501
1502
1503
1504 }else{
1505
1506
1507
1508 int sendTo = rank-case_rank;
1509
1510 int tag=0;
1511 MPI_Status status;
1512
1513 MPI_Send(output.getDatatableVector(), output.getSparseGrid()->getMaximalOccupiedSecondTable(), MPI_DOUBLE,sendTo, tag, MPI_COMM_WORLD);
1514
1515 MPI_Recv(output.getDatatableVector(), output.getSparseGrid()->getMaximalOccupiedSecondTable(), MPI_DOUBLE, sendTo, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
1516
1517 }
1518
1519
1520
1521
1522
1523#endif
1524
1525
1526
1527
1528};
1529
1530
1531
1532
1533template<class Problem>
1534double MatrixVectorHomogen::applyStencilLocal(const IndexDimension &Index, const VectorSparseG &input, Problem &matrixProblem, Depth &T) {
1535 double val = 0.0;
1536 IndexDimension NextIndex;
1537
1538
1539
1540 for (MultiDimCompass mc; mc.goon(); ++mc) {
1541 NextIndex = Index.nextThree2(&mc, T.returnTiefen());
1542 unsigned long k;
1543
1544
1545 if(mc.goon()&&input.getSparseGrid()->occupied(k,NextIndex)) {
1546 double value = 0.0;
1547
1548
1549
1550 value = matrixProblem.returnValue(const_cast<IndexDimension &>(Index), mc);
1551
1552
1553
1554
1555
1556 val = val + value * input.getValue(k);
1557
1558
1559
1560
1561
1562 }
1563
1564
1565 }
1566 return val;
1567
1568};
1569
1570
1571
1572template<class Problem>
1573void MatrixVectorHomogen::worker(VectorSparseG &prew, VectorSparseG &Ax,Problem &matrixProblem) {
1574#ifdef MY_MPI_ON
1575 int rank;
1576 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
1577 int task_index;
1578 MPI_Status status;
1579
1580 while (1) {
1581
1582 MPI_Recv(&task_index, 1, MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
1583
1584
1585 if (TAG_TERMINATE == status.MPI_TAG) {
1586
1587 break; // Terminate worker
1588 }
1589
1590 prew.getSparseGrid()->WorkOnHangingNodes = true;
1591
1592 Ax_neu = 0.0;
1593 gM = 0.0;
1594 bool restrictions[DimensionSparseGrid];
1595 intToBinary(task_index, restrictions);
1596
1597
1598 caseFunction(prew, restrictions, matrixProblem);
1599
1600 prew.getSparseGrid()->WorkOnHangingNodes = false;
1601 Ax = Ax + Ax_neu;
1602
1603
1604 MPI_Send(&task_index, 1, MPI_INT, 0, TAG_TASK, MPI_COMM_WORLD);
1605
1606
1607 }
1608#endif
1609
1610}
1611
1612
1613
1614
1615
1616
1618template <class Problem>
1619void MatrixVectorHomogen::caseFunction_LocalStiffnessMatrix(VectorSparseG &prew, bool *restrictions, Problem& localStiffnessMatrixAllDepths) {
1620 int rank;
1621
1622
1623
1624 gM= 0.0;
1625 AdaptiveSparseGrid_Base *grid = prew.getSparseGrid();
1626
1627
1628 grid->WorkOnHangingNodes = true;
1629
1630
1631
1632
1633
1634
1635
1636 // Sortiere Tiefen nach Restriktionen/Prolongationen
1637 depthList.sortDepths(restrictions);
1638
1639
1640
1641 std::list<Depth>* sortedDepths = depthList.getSortierteTiefen();
1642 if(depthList.getSortierteTiefen()->size()==0) return;
1643
1644
1645
1646
1647 // Prolongiere die nodalen Werte
1648 P += nodal;
1649
1650
1651
1652
1653
1654 for(int d=0; d < DimensionSparseGrid; d++) {
1655 if(restrictions[d]==0){
1656 for (Depth T: *sortedDepths) {
1657
1658 int t = T.at(d) + 1;
1659 Depth T_fine = T;
1660 T_fine.set(t, d);
1661
1662 if(depthList.isIncluded(T_fine)){
1663
1664 u_new = 0.0;
1665 u_new.setMultiLevelValues2(P,T);
1666 prolongation1D_inplace(u_new, t, d);
1667
1668
1669 P.addMultiLevelValues(u_new, T_fine);
1670
1671 }
1672 }
1673 }
1674 }
1675
1676 for (Depth T: *sortedDepths) {
1677
1678 grid->WorkOnHangingNodes = true;
1679
1680
1681 u= 0.0;
1682 u.setMultiLevelValues2(P,T);
1683
1684 z = 0.0;
1685
1686
1687
1688
1689 localStiffnessMatrixAllDepths.applyLocalStiffnessMatricesDepth(u,z,T);
1690
1691
1692#ifdef MY_MPI_ON
1693
1694 int rank;
1695 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
1696 if(rank==0){
1697
1698
1699 master_distribute_onlyCases_LS(localStiffnessMatrixAllDepths);
1700 }
1701
1702#endif
1703
1704
1705
1706
1707 z.addMultiLevelValues2(gM,T);
1708
1709
1710
1711 Depth Tcoarse(T);
1712 for (int d = 0; d < DimensionSparseGrid; d++) {
1713 if (restrictions[d]) {
1714 Tcoarse.set(T.at(d) - 1, d);
1715 if(depthList.isIncluded(Tcoarse)){
1716 restriction1D_inverted_inplace(z,Tcoarse.at(d),d);
1717 g = z;
1718 z.addMultiLevelValues2(gM,Tcoarse);
1719 gM.setMultiLevelValues2(g, Tcoarse);
1720 }
1721 }
1722 }
1723
1724
1725
1726 grid->WorkOnHangingNodes=false;
1727
1728
1729 if(depthList.isIncluded(Tcoarse)) ConvertToPrewavelet2(z, Ax_neu, Tcoarse);
1730
1731
1732
1733 }
1734
1735
1736
1737
1738}
1739
1740
1741template <class Problem>
1742void MatrixVectorHomogen::master_start_onlyCases_LS(int num_workers,Problem &localStiffnessMatrices) {
1743#ifdef MY_MPI_ON
1744
1745
1746
1747
1748 MPI_Status status;
1749 MPI_task_index=0;
1750
1751 int num_tasks;
1752 MPI_Comm_size(MPI_COMM_WORLD, &num_tasks);
1753
1754
1755
1756
1757
1758 for (int worker_rank = 1; (worker_rank < num_workers-localStiffnessMatrices.getNumberProcesses()) && (MPI_task_index < MPI_total_tasks); worker_rank++) {
1759
1760 MPI_Send(&MPI_task_index, 1, MPI_INT, worker_rank, TAG_TASK, MPI_COMM_WORLD);
1761
1762
1763 MPI_task_index++;
1764 }
1765
1766
1767
1768#endif
1769
1770}
1771
1772
1773template <class Problem>
1774void MatrixVectorHomogen::worker_localStiffnessMatrices(VectorSparseG &prew, VectorSparseG &Ax,
1775 Problem &localStiffnessMatrices) {
1776#ifdef MY_MPI_ON
1777
1778 int rank;
1779 int num_tasks;
1780 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
1781 MPI_Comm_size(MPI_COMM_WORLD, &num_tasks);
1782
1783
1784
1785 int task_index;
1786 MPI_Status status;
1787 int flag=0;
1788 if(rank<num_tasks-localStiffnessMatrices.getNumberProcesses()){
1789
1790 while (1){
1791
1792 flag =0;
1793 MPI_Iprobe(0,MPI_ANY_TAG,MPI_COMM_WORLD,&flag, &status);
1794 if(flag){
1795
1796 MPI_Recv(&task_index, 1, MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
1797
1798 if (TAG_TERMINATE == status.MPI_TAG) {
1799 break; // Terminate worker
1800 }
1801
1802
1803
1804 prew.getSparseGrid()->WorkOnHangingNodes = true;
1805
1806 Ax_neu = 0.0;
1807 gM = 0.0;
1808 bool restrictions[DimensionSparseGrid];
1809 intToBinary(task_index, restrictions);
1810
1811
1812 caseFunction_LocalStiffnessMatrix(prew, restrictions, localStiffnessMatrices);
1813
1814 prew.getSparseGrid()->WorkOnHangingNodes = false;
1815 Ax = Ax + Ax_neu;
1816
1817
1818 MPI_Send(&task_index, 1, MPI_INT, 0, TAG_TASK, MPI_COMM_WORLD);
1819 }
1820
1821 }
1822
1823
1824 for (int n = num_tasks-localStiffnessMatrices.getNumberProcesses(); n < num_tasks; n++) {
1825
1826 MPI_Send(&MPI_task_index, 1, MPI_INT, n, TAG_LOCALSTIFFNESS_END, MPI_COMM_WORLD);
1827 }
1828 }else {
1829
1830
1831 while(localStiffnessMatrices.active_worker.size()>0){
1832
1833 localStiffnessMatrices.receiveApplySendOnActiveWorkers();
1834 }
1835 }
1836
1837
1838#endif
1839
1840}
1841
1842
1843
1844template <class Problem>
1845void MatrixVectorHomogen::casefunction_mpi_onlyCases_LocalStiffnessMatrix(VectorSparseG &prew, VectorSparseG &Ax, Problem &localStiffnessMatrices) {
1846
1847#ifdef MY_MPI_ON
1848 int rank, num_procs;
1849
1850 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
1851
1852
1853 MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
1854 MPI_Barrier(MPI_COMM_WORLD);
1855 MPI_Status status;
1856 MPI_task_index=0;
1857 MPI_num_workers=num_procs;
1858 Ax = 0.0;
1859
1860
1861
1862
1863
1864 //distribute the first tasks to the workers
1865 if(rank ==0){
1866
1867
1868
1869 master_start_onlyCases_LS(num_procs,localStiffnessMatrices);
1870
1871
1872
1873 //start working
1874
1875 while(MPI_task_index<MPI_total_tasks) {
1876
1877
1878 prew.getSparseGrid()->WorkOnHangingNodes = true;
1879 Ax_neu = 0.0;
1880 gM = 0.0;
1881 bool restrictions[DimensionSparseGrid];
1882 intToBinary(MPI_task_index, restrictions);
1883 MPI_task_index++;
1884
1885 caseFunction_LocalStiffnessMatrix(prew, restrictions, localStiffnessMatrices);
1886
1887 prew.getSparseGrid()->WorkOnHangingNodes = false;
1888 Ax = Ax + Ax_neu;
1889
1890 }
1891
1892
1893 master_end_distribute_LocalStiffnessMatrix(localStiffnessMatrices);
1894
1895 }else{
1896
1897 // Worker processes
1898
1899
1900 worker_localStiffnessMatrices(prew, Ax, localStiffnessMatrices);
1901
1902
1903
1904
1905
1906 }
1907
1908
1909 MPI_Barrier(MPI_COMM_WORLD);
1910
1911 MPI_Allreduce(MPI_IN_PLACE, Ax.getDatatableVector(),Ax.getSparseGrid()->getMaximalOccupiedSecondTable(), MPI_DOUBLE, MPI_SUM,MPI_COMM_WORLD);
1912/*
1913 int n=num_procs-localStiffnessMatrices.getNumberProcesses();
1914 int ranks_mv[n];
1915 for(int i=0;i<n;i++)ranks_mv[i]=i;
1916
1917 MPI_Group world_group;
1918 MPI_Comm_group(MPI_COMM_WORLD, &world_group);
1919 MPI_Group group1;
1920 MPI_Group_incl(world_group, n, ranks_mv, &group1);
1921 MPI_Comm comm1;
1922 MPI_Comm_create(MPI_COMM_WORLD, group1, &comm1);
1923
1924
1925 if (comm1 != MPI_COMM_NULL)MPI_Allreduce(MPI_IN_PLACE, Ax.getDatatableVector(),Ax.getSparseGrid()->getMaximalOccupiedSecondTable(), MPI_DOUBLE, MPI_SUM,comm1);
1926
1927 MPI_Group_free(&world_group);
1928 MPI_Group_free(&group1);
1929
1930 if (comm1 != MPI_COMM_NULL) {
1931 MPI_Comm_free(&comm1);
1932 }
1933*/
1934 MPI_Barrier(MPI_COMM_WORLD);
1935#endif
1936
1937
1938
1939};
1940
1941
1942template <class Problem>
1943void MatrixVectorHomogen::master_distribute_onlyCases_LS(Problem& localStiffnessMatrices) {
1944#ifdef MY_MPI_ON
1945
1946 int rank;
1947 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
1948
1949
1950 MPI_Status status;
1951 int flag=0;
1952 int old_index;
1953
1954
1955
1956
1957
1958 for (int worker =1; worker < MPI_num_workers-localStiffnessMatrices.getNumberProcesses() && MPI_task_index < MPI_total_tasks; worker++) {
1959
1960 MPI_Iprobe(worker, TAG_TASK, MPI_COMM_WORLD, &flag, &status);
1961
1962 if (flag){
1963
1964 MPI_Recv(&old_index, 1, MPI_INT, worker, TAG_TASK, MPI_COMM_WORLD, &status);
1965
1966 MPI_Send(&MPI_task_index, 1, MPI_INT, worker, TAG_TASK, MPI_COMM_WORLD);
1967 MPI_task_index++;
1968 }
1969
1970
1971 }
1972
1973
1974
1975
1976
1977
1978#endif
1979
1980};
1981
1982
1983
1984
1985template <class LS_Matrix>
1986void MatrixVectorHomogen::master_end_distribute_LocalStiffnessMatrix(LS_Matrix &localStiffnessMatrices){
1987
1988#ifdef MY_MPI_ON
1989
1990
1991
1992 MPI_Status status;
1993 int flag=0;
1994 int old_index;
1995
1996
1997
1998 for (int n = MPI_num_workers-localStiffnessMatrices.getNumberProcesses(); n < MPI_num_workers; n++) {
1999
2000 MPI_Send(&MPI_task_index, 1, MPI_INT, n, TAG_LOCALSTIFFNESS_END, MPI_COMM_WORLD);
2001
2002 }
2003
2004
2005 // Send termination signal to workers
2006 for (int worker_rank = 1; worker_rank < MPI_num_workers-localStiffnessMatrices.getNumberProcesses(); worker_rank++) {
2007
2008 //receive message, that worker has completed all of its tasks
2009 MPI_Recv(&old_index, 1, MPI_INT, worker_rank, TAG_TASK, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
2010
2011 //send termination signal to worker
2012
2013 MPI_Send(&MPI_task_index, 1, MPI_INT, worker_rank, TAG_TERMINATE, MPI_COMM_WORLD);
2014
2015
2016 }
2017
2018
2019
2020
2021
2022
2023#endif
2024
2025}
2026
2027
2028
2029
2030
2031#endif //SGRUN_MATRIXVECTORHOMOGENH_H
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2050
2051/* inline void prolongation1D(VectorSparseG &coarse, VectorSparseG &fine, int &t, int &d) {
2052 AdaptiveSparseGrid_Base *grid = fine.getSparseGrid();
2053 unsigned long maxocc = grid->getMaximalOccupiedSecondTable();
2054
2055 ListOfDepthOrderedSubgrids::iterator iter(list);
2056 iter.gotoBegin();
2057 do {
2058 Depth Tlocal = iter.getDepth();
2059 if (Tlocal.at(d) == t) {
2060 SubgridFixedDepth::iterator inneriter(*iter.getSubgrid());
2061 inneriter.gotobegin();
2062 do {
2063 IndexDimension Index = inneriter.getPoint();
2064 unsigned long i = inneriter.geti();
2065 double value = fine.getValue(i);
2066 if ((!Index.isAtRightBoundary(d)) && (!Index.isAtLeftBoundary(d)))
2067 value = value + 0.5 * (fine.getValue(Index.nextRight(d)) + fine.getValue(Index.nextLeft(d)));
2068 //coarse = value | Index;
2069 coarse.setValue(i, value);
2070
2071 } while (inneriter.next());
2072 }
2073
2074 } while (iter.next());
2075
2076*//*
2077
2078 for (unsigned long i = 0; i < maxocc; i++) {
2079 IndexDimension Index = grid->getIndexOfTable(i);
2080
2081 if (Index.getDepth(d) == t) {
2082 double value = fine.getValue(i);
2083*//*
2084*//*
2085 if (Index.isAtLeftBoundary(d))
2086 value = fine.getValue(Index);
2087
2088 if (Index.isAtRightBoundary(d))
2089 value = fine.getValue(Index);
2090*//*
2091
2092
2093 if ((!Index.isAtRightBoundary(d)) && (!Index.isAtLeftBoundary(d)))
2094 value = value + 0.5 *( fine.getValue(Index.nextRight(d))+fine.getValue(Index.nextLeft(d)));
2095 //coarse.setValue(Index, value);
2096 coarse = value | Index;
2097
2098
2099 }
2100
2101
2102 }
2103*//*
2104
2105
2106 }*/
2107
2108
2109/* void prolongation1D_inplace_inverted(VectorSparseG &vec,const int &t,const int &d) {
2110 AdaptiveSparseGrid_Base *grid = vec.getSparseGrid();
2111 unsigned long maxocc = grid->getMaximalOccupiedSecondTable();
2112
2113
2114 ListOfDepthOrderedSubgrids::iterator iter(list);
2115 iter.gotoBegin();
2116 do {
2117
2118 Depth Tlocal = iter.getDepth();
2119 // Tlocal.Print();
2120 // cout << "Nodes: " <<endl;
2121
2122 if (Tlocal.at(d) < t) {
2123 Depth searchDepth = Tlocal;
2124 searchDepth.set(t,d);
2125 //SingleDepthHashGrid& singleDepthHashGrid = grid->getMultiDepthHashGrid()->getGridForDepth(searchDepth);
2126 // Tlocal.Print();
2127 SubgridFixedDepth::iterator inneriter(*iter.getSubgrid());
2128 inneriter.gotobegin();
2129
2130 // int i = 0;
2131 do {
2132 // i++;
2133 IndexDimension Index = inneriter.getPoint();
2134 unsigned long i = inneriter.geti();
2135 double value = vec.getValue(i) * 0.5;
2136 IndexDimension rightIndex = Index.nextRight(d,t);
2137 if (!rightIndex.isAtRightBoundary(d)){
2138 // vec.getValue(rightIndex,value,singleDepthHashGrid)
2139 vec.addToValue(rightIndex,value);
2140 // value += vec.getValue(rightIndex);
2141 }
2142 IndexDimension leftIndex = Index.nextLeft(d,t);
2143 if (!leftIndex.isAtLeftBoundary(d)){
2144 vec.addToValue(leftIndex,value);
2145 // value += vec.getValue(leftIndex);
2146 }
2147 } while (inneriter.next());
2148 // cout << i << ", " << (1<<Tlocal.LoneNorm()-3)<< endl;
2149 }
2150
2151 // cout << "iter" << endl;
2152 } while (iter.next());
2153 // cout << "end" << endl;
2154
2155
2156 }*/
2157
2158/* void prolongation1D_inplace_singleHash(VectorSparseG &vec,const int &t,const int &d) {
2159 AdaptiveSparseGrid_Base *grid = vec.getSparseGrid();
2160 unsigned long maxocc = grid->getMaximalOccupiedSecondTable();
2161
2162
2163
2164 ListOfDepthOrderedSubgrids::iterator iter(list);
2165 iter.gotoBegin();
2166 do {
2167
2168 Depth Tlocal = iter.getDepth();
2169 // Tlocal.Print();
2170 // cout << "Nodes: " <<endl;
2171
2172 if (Tlocal.at(d) == t) {
2173
2174
2175 vector<SingleDepthHashGrid*> fineDepthGrids = grid->getMultiDepthHashGrid()->getGridsForDepthInDirection(Tlocal,d);
2176 SubgridFixedDepth::iterator inneriter(*iter.getSubgrid());
2177 inneriter.gotobegin();
2178 do {
2179 IndexDimension Index = inneriter.getPoint();
2180 unsigned long i = inneriter.geti();
2181 double value = vec.getValue(i);
2182 if ((!Index.isAtRightBoundary(d)) && (!Index.isAtLeftBoundary(d))){
2183 auto right = Index.nextRight(d);
2184 auto left = Index.nextLeft(d);
2185 // auto* gridRight = fineDepthGrids[right.getDepth(d)];
2186 // auto* gridLeft = fineDepthGrids[left.getDepth(d)];
2187 // auto valRight = vec.getValue(right,gridRight);
2188 // auto valLeft = vec.getValue(left,gridLeft);
2189 value = value + 0.5 * (vec.getValue(right,fineDepthGrids[right.getDepth(d)]) + vec.getValue(left,fineDepthGrids[left.getDepth(d)]));
2190 // value = value + 0.5 * (vec.getValue(Index.nextRight(d)) + vec.getValue(Index.nextLeft(d)));
2191 // auto p= vec.getValue(right,fineDepthGrids[right.getDepth(d)]);
2192 }
2193 //coarse = value | Index;
2194 vec.setValue(i, value);
2195 // cout << Depth(Index.nextRight(d)).at(d)<<", \t" <<Depth(Index.nextLeft(d)).at(d) << endl;
2196
2197 // cout << "Left: " ;
2198 // Depth(Index.nextLeft(d)).Print();
2199 // cout << "Right: " ;
2200 // Depth(Index.nextRight(d)).Print();
2201 } while (inneriter.next());
2202
2203
2204 }
2205
2206 // cout << "iter" << endl;
2207 } while (iter.next());
2208 // cout << "end" << endl;
2209
2210
2211 }*/
2212
2213/* inline void prolongation(VectorSparseG &coarse, VectorSparseG &fine, Depth &Tcoarse, Depth &Tfine) {
2214 fine = coarse;
2215 int tcoarse;
2216 int tfine;
2217
2218 for (int d = 0; d < DimensionSparseGrid; d++) {
2219
2220 tcoarse = Tcoarse.at(d) + 1;
2221 tfine = Tfine.at(d);
2222 for (int t = tcoarse; t <= tfine; ++t) {
2223 prolongation1D(fine, fine, t, d);
2224 }
2225 }
2226 }*/
2227
2228
2229
2230/* inline void restriction1D_inverted_inplace( const VectorSparseG &fine,const VectorSparseG &coarse,int t, int d) {
2231
2232
2233 double value=0.0;
2234
2235 AdaptiveSparseGrid_Base* grid = fine.getSparseGrid();
2236
2237 ListOfDepthOrderedSubgrids::iterator iter(list);
2238 iter.gotoBegin();
2239 do {
2240 Depth Tlocal = iter.getDepth();
2241 // SingleDepthHashGrid& coarseDepthGrid = grid->getMultiDepthHashGrid()->getGridForDepth(Tlocal);
2242 if(Tlocal.at(d)<=t){
2243
2244 SubgridFixedDepth::iterator inneriter(*iter.getSubgrid());
2245 inneriter.gotobegin();
2246 // Depth fineDepth = Depth(inneriter.getPoint());
2247 // SingleDepthHashGrid& fineDepthGrid = grid->getMultiDepthHashGrid()->getGridForDepth(Depth(inneriter.getPoint().nextRight(d,t+1)));
2248 do {
2249
2250 unsigned long i = inneriter.geti();
2251 IndexDimension I = inneriter.getPoint();
2252 double value = fine.getValue(i);
2253 unsigned long k;
2254
2255 coarse.setValue(i,value);
2256
2257 } while (inneriter.next());
2258
2259 }
2260
2261 if (Tlocal.at(d) == t + 1 && Tlocal>0) {
2262
2263 SubgridFixedDepth::iterator inneriter(*iter.getSubgrid());
2264 inneriter.gotobegin();
2265 // Depth fineDepth = Depth(inneriter.getPoint());
2266 SingleDepthHashGrid& fineDepthGrid = grid->getMultiDepthHashGrid()->getGridForDepth(Depth(inneriter.getPoint().nextRight(d,t+1)));
2267 do {
2268
2269 IndexDimension Index = inneriter.getPoint();
2270 unsigned long i = inneriter.geti();
2271 value = 0.5 * fine.getValue(i);
2272
2273 if (t >= 0) {
2274
2275 IndexDimension rightIndex = Index.nextRight(d);
2276 if (!Index.isAtRightBoundary(d)) {
2277 // vec.getValue(rightIndex,value,singleDepthHashGrid)
2278 coarse.addToValue(rightIndex, value);
2279 // value += vec.getValue(rightIndex);
2280 }
2281
2282
2283 IndexDimension leftIndex = Index.nextLeft(d);
2284 if (!Index.isAtLeftBoundary(d)) {
2285 // vec.getValue(rightIndex,value,singleDepthHashGrid)
2286 coarse.addToValue(leftIndex, value);
2287 // value += vec.getValue(rightIndex);
2288 }
2289 }
2290
2291 } while (inneriter.next());
2292 }
2293 } while (iter.next());
2294
2295 };*/
2296
2297/* inline void restriction1D_inverted2( const VectorSparseG &fine, VectorSparseG &coarse, int t, int d) {
2298
2299 AdaptiveSparseGrid_Base *grid = fine.getSparseGrid();
2300
2301 double value;
2302 double valueL;
2303 double valueR;
2304
2305 ListOfDepthOrderedSubgrids::iterator iter(list);
2306 iter.gotoBegin();
2307 do {
2308 Depth Tlocal = iter.getDepth();
2309 // SingleDepthHashGrid& coarseDepthGrid = grid->getMultiDepthHashGrid()->getGridForDepth(Tlocal);
2310 if (Tlocal.at(d) == t + 1 && Tlocal > 0) {
2311 SubgridFixedDepth::iterator inneriter(*iter.getSubgrid());
2312 inneriter.gotobegin();
2313 // Depth fineDepth = Depth(inneriter.getPoint());
2314 // SingleDepthHashGrid& fineDepthGrid = grid->getMultiDepthHashGrid()->getGridForDepth(Depth(inneriter.getPoint().nextRight(d,t+1)));
2315 do {
2316
2317 IndexDimension Index = inneriter.getPoint();
2318 unsigned long i = inneriter.geti();
2319 value = 0.5 * fine.getValue(i);
2320
2321 if (t >= 0) {
2322 IndexDimension rightIndex = Index.nextRight(d);
2323 if (!rightIndex.isAtRightBoundary(d)){
2324 // vec.getValue(rightIndex,value,singleDepthHashGrid)
2325 coarse.addToValue(rightIndex,value);
2326 // value += vec.getValue(rightIndex);
2327 }
2328
2329 IndexDimension leftIndex = Index.nextLeft(d);
2330 if (!leftIndex.isAtRightBoundary(d)){
2331 // vec.getValue(rightIndex,value,singleDepthHashGrid)
2332 coarse.addToValue(leftIndex,value);
2333 // value += vec.getValue(rightIndex);
2334 }
2335 }
2336 } while (inneriter.next());
2337 }
2338 } while (iter.next());
2339
2340 };*/
2341
2342
2343
2344/* inline void restriction1D(const VectorSparseG &fine, VectorSparseG &coarse, int t, int d) {
2345
2346 AdaptiveSparseGrid_Base *grid = fine.getSparseGrid();
2347
2348 double value;
2349 double valueL;
2350 double valueR;
2351
2352 ListOfDepthOrderedSubgrids::iterator iter(list);
2353 iter.gotoBegin();
2354 do {
2355 Depth Tlocal = iter.getDepth();
2356 // SingleDepthHashGrid& coarseDepthGrid = grid->getMultiDepthHashGrid()->getGridForDepth(Tlocal);
2357 if (Tlocal.at(d) <= t && Tlocal > 0) {
2358 SubgridFixedDepth::iterator inneriter(*iter.getSubgrid());
2359 inneriter.gotobegin();
2360 // Depth fineDepth = Depth(inneriter.getPoint());
2361 SingleDepthHashGrid& fineDepthGrid = grid->getMultiDepthHashGrid()->getGridForDepth(Depth(inneriter.getPoint().nextRight(d,t+1)));
2362 do {
2363
2364 IndexDimension Index = inneriter.getPoint();
2365
2366
2367
2368 unsigned long i = inneriter.geti();
2369 value = fine.getValue(i);
2370 valueL = 0.0;
2371 valueR = 0.0;
2372 if (t >= 0) {
2373 if (!(Index.isAtRightBoundary(d))) valueL = 0.5 * fine.getValue(Index.nextRight(d, t + 1));
2374 if (!(Index.isAtLeftBoundary(d))) valueR = 0.5 * fine.getValue(Index.nextLeft(d, t + 1));
2375 }
2376
2377
2378 value = value + valueL + valueR;
2379 //coarse.setValue(Index, value);
2380 coarse.setValue(i, value);
2381
2382
2383
2384 } while (inneriter.next());
2385 }
2386 } while (iter.next());
2387
2388 };*/
2389
2390
2391/*
2392void prolongation_inplace(VectorSparseG &vec, const Depth &Tcoarse, const Depth &Tfine) {
2393 int tcoarse;
2394 int tfine;
2395
2396
2397
2398 for (int d = 0; d < DimensionSparseGrid; d++) {
2399
2400 tcoarse = Tcoarse.at(d) + 1;
2401 tfine = Tfine.at(d);
2402 for (int t = tcoarse; t <= tfine; ++t) {
2403 // if(t = tfine)
2404 // prolongation1D_inplace_inverted(vec, t, d);
2405 // else
2406 prolongation1D_inplace(vec, t, d);
2407 }
2408 }
2409}*/
Definition sparseGrid.h:86
IndexDimension getIndexOfTable(unsigned long i)
Definition sparseGrid.h:566
Definition sparseGrid.h:277
Definition MatrixVectorHomogen.h:32
void master_start(int num_workers)
Definition MatrixVectorHomogen.cpp:122
void applyStencilGlobal(const VectorSparseG &input, VectorSparseG &output, StencilClass &stencil, Depth &T)
void intToBinary(int num, bool binary[])
Definition MatrixVectorHomogen.h:645
MatrixVectorHomogen(AdaptiveSparseGrid &grid, MultiLevelAdaptiveSparseGrid &mgrid, bool symmetry)
Constructor for MatrixVectorHomogen.
Definition MatrixVectorHomogen.h:59
MatrixVectorHomogen(AdaptiveSparseGrid &grid, MultiLevelAdaptiveSparseGrid &mgrid, bool symmetry, bool mpi_on)
Constructor for MatrixVectorHomogen.
Definition MatrixVectorHomogen.h:72
void ConvertToPrewavelet2(VectorSparseG &Ax_nodal, VectorSparseG &Ax, Depth &T)
Definition MatrixVectorHomogen.h:540
MatrixVectorHomogen(AdaptiveSparseGrid &grid, MultiLevelAdaptiveSparseGrid &mgrid)
Constructor for MatrixVectorHomogen.
Definition MatrixVectorHomogen.h:42
void caseFunction(VectorSparseG &prew, bool *restrictions, StencilClass &stencil)
Definition MatrixVectorHomogen.h:965
void applyStencilGlobal_MPI(VectorSparseG &input, VectorSparseG &output, StencilClass &stencil, Depth &T)
int stencil_count
Definition MatrixVectorHomogen.h:80
void master(int num_workers, int total_tasks)
Definition MatrixVectorHomogen.cpp:76
void multiplication(VectorSparseG &prew, VectorSparseG &Ax, StencilClass &stencilClass)
void master_distribute()
Definition MatrixVectorHomogen.cpp:181
bool option_mpi
Definition MatrixVectorHomogen.h:50
void applyStencilGlobal_MPI_3(VectorSparseG &input, VectorSparseG &output, StencilClass &stencil, Depth &T)
void master_end_distribute()
Definition MatrixVectorHomogen.cpp:237
void caseFunction_LocalStiffnessMatrix(VectorSparseG &prew, bool *restrictions, Problem &localStiffnessMatrixAllDepths)
Definition MatrixVectorHomogen.h:1619
void prolongation1D_inplace(VectorSparseG &values, int &t, int &d)
Definition MatrixVectorHomogen.h:444
void calcNodal(VectorSparseG &prew, Depth &depth)
Definition MatrixVectorHomogen.cpp:10
void worker(VectorSparseG &prew, VectorSparseG &Ax, StencilClass &stencilClass)
double applyStencilLocal(const IndexDimension &Index, const VectorSparseG &input, StencilClass &stencil, Depth &T)
bool option_symmetry
Definition MatrixVectorHomogen.h:49
void restriction1D_inverted_inplace(const VectorSparseG &values, int t, int d)
Definition MatrixVectorHomogen.h:485
void applyStencilGlobal_symmetry(VectorSparseG &input, VectorSparseG &output, StencilClass &stencil, Depth &T)
void applyStencilGlobal_MPI_2(VectorSparseG &input, VectorSparseG &output, StencilClass &stencil, Depth &T)
Definition index.h:356
Definition index.h:436