diff --git a/AMDiS/src/parallel/PetscHelper.cc b/AMDiS/src/parallel/PetscHelper.cc
index eaf175b8f2c8a347164a5a4389db92828821acec..800709040db4ce7896f4671efabbd50191fe830d 100644
--- a/AMDiS/src/parallel/PetscHelper.cc
+++ b/AMDiS/src/parallel/PetscHelper.cc
@@ -146,23 +146,101 @@ namespace AMDiS {
       int nOverallRows = nOverallRows0 + nOverallRows1;
       int firstRow = firstRow0 + firstRow1;
 
+      int mpiSize = MPI::COMM_WORLD.Get_size();
+      int mpiRank = MPI::COMM_WORLD.Get_rank();
+      vector<int> allFirstRow0(mpiSize + 1, 0);
+      vector<int> allFirstRow1(mpiSize + 1, 0);
+      MPI::COMM_WORLD.Allgather(&nRankRows0, 1, MPI_INT, &(allFirstRow0[1]), 1, MPI_INT);
+      MPI::COMM_WORLD.Allgather(&nRankRows1, 1, MPI_INT, &(allFirstRow1[1]), 1, MPI_INT);
+
+      for (int i = 1; i < mpiSize + 1; i++) {
+	allFirstRow0[i] += allFirstRow0[i - 1];
+	allFirstRow1[i] += allFirstRow1[i - 1];
+      }
+
+      TEST_EXIT_DBG(allFirstRow0[mpiRank] == firstRow0)("Should not happen!\n");
+      TEST_EXIT_DBG(allFirstRow1[mpiRank] == firstRow1)("Should not happen!\n");
+
       MatCreateAIJ(PETSC_COMM_WORLD, 
 		   nRankRows, nRankRows,
 		   nOverallRows, nOverallRows,
-		   10, PETSC_NULL, 10, PETSC_NULL, &mat);
+		   25, PETSC_NULL, 25, PETSC_NULL, &mat);
+      MatSetOption(mat, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
+
+      PetscInt nCols;
+      const PetscInt *cols;
+      const PetscScalar *vals;
 
       for (int i = 0; i < nRankRows0; i++) {
-	PetscInt nCols;
-	const PetscInt *cols;
-	const PetscScalar *vals;
+	PetscInt newRowIndex = firstRow + i;
 
-	MatGetRow(mat00, i + firstRow0, &nCols, &cols, &vals);
+	MatGetRow(mat00, firstRow0 + i, &nCols, &cols, &vals);
+	for (int j = 0; j < nCols; j++) {
+	  int rank = -1;
+	  for (int k = 0; k < mpiSize; k++) {
+	    if (cols[j] >= allFirstRow0[k] && cols[j] < allFirstRow0[k + 1]) {
+	      rank = k;
+	      break;
+	    }
+	  }
+	  TEST_EXIT_DBG(rank != -1)("Should not happen!\n");
+
+	  PetscInt newColIndex = cols[j] + allFirstRow1[rank];
+	  MatSetValue(mat, newRowIndex, newColIndex, vals[j], INSERT_VALUES);
+	}
+	MatRestoreRow(mat00, firstRow0 + i, &nCols, &cols, &vals);
 
+	MatGetRow(mat01, firstRow0 + i, &nCols, &cols, &vals);
 	for (int j = 0; j < nCols; j++) {
-	  MatSetValue(mat, firstRow + i, cols[j], vals[j], INSERT_VALUES);
+	  int rank = -1;
+	  for (int k = 0; k < mpiSize; k++) {
+	    if (cols[j] >= allFirstRow1[k] && cols[j] < allFirstRow1[k + 1]) {
+	      rank = k;
+	      break;
+	    }
+	  }
+	  TEST_EXIT_DBG(rank != -1)("Should not happen!\n");
+
+	  PetscInt newColIndex = cols[j] + allFirstRow0[rank + 1];
+	  MatSetValue(mat, newRowIndex, newColIndex, vals[j], INSERT_VALUES);
 	}
+	MatRestoreRow(mat01, firstRow0 + i, &nCols, &cols, &vals);	
+      }
+
+      for (int i = 0; i < nRankRows1; i++) {
+	PetscInt newRowIndex = firstRow + nRankRows0 + i;
+
+	MatGetRow(mat10, firstRow1 + i, &nCols, &cols, &vals);
+	for (int j = 0; j < nCols; j++) {
+	  int rank = -1;
+	  for (int k = 0; k < mpiSize; k++) {
+	    if (cols[j] >= allFirstRow0[k] && cols[j] < allFirstRow0[k + 1]) {
+	      rank = k;
+	      break;
+	    }
+	  }
+	  TEST_EXIT_DBG(rank != -1)("Should not happen!\n");
 
-	MatRestoreRow(mat00, i + firstRow0, &nCols, &cols, &vals);
+	  PetscInt newColIndex = cols[j] + allFirstRow1[rank];
+	  MatSetValue(mat, newRowIndex, newColIndex, vals[j], INSERT_VALUES);
+	}
+	MatRestoreRow(mat10, firstRow1 + i, &nCols, &cols, &vals);
+
+	MatGetRow(mat11, firstRow1 + i, &nCols, &cols, &vals);
+	for (int j = 0; j < nCols; j++) {
+	  int rank = -1;
+	  for (int k = 0; k < mpiSize; k++) {
+	    if (cols[j] >= allFirstRow1[k] && cols[j] < allFirstRow1[k + 1]) {
+	      rank = k;
+	      break;
+	    }
+	  }
+	  TEST_EXIT_DBG(rank != -1)("Should not happen!\n");
+
+	  PetscInt newColIndex = cols[j] + allFirstRow0[rank + 1];
+	  MatSetValue(mat, newRowIndex, newColIndex, vals[j], INSERT_VALUES);
+	}
+	MatRestoreRow(mat11, firstRow1 + i, &nCols, &cols, &vals);
       }
 
       MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY);
diff --git a/AMDiS/src/parallel/PetscSolverFeti.cc b/AMDiS/src/parallel/PetscSolverFeti.cc
index 583a1f63e26ca6497f5a47dfeea4323250e30216..d07d20f16a713138a831d52f6cc6b25841b1d3c7 100644
--- a/AMDiS/src/parallel/PetscSolverFeti.cc
+++ b/AMDiS/src/parallel/PetscSolverFeti.cc
@@ -960,7 +960,7 @@ namespace AMDiS {
   {
     FUNCNAME("PetscSolverFeti::createMatExplicitAugmentedSchurPrimal()");
 
-    int creationMode = 1;
+    int creationMode = 0;
     Parameters::get("parallel->feti->schur primal creation mode", creationMode);
     if (creationMode == 0) {
       // qj = -Q J
@@ -1018,7 +1018,7 @@ namespace AMDiS {
       Mat nestMat[4] = {mat00, mat01, mat10, mat11};
       MatCreateNest(PETSC_COMM_WORLD, 2, PETSC_NULL, 2, PETSC_NULL, nestMat, &matTmp);
       petsc_helper::matNestConvert(matTmp, mat_schur_primal);
-      
+
       MatDestroy(&mat00);
       MatDestroy(&mat01);
       MatDestroy(&mat10);
@@ -1047,7 +1047,7 @@ namespace AMDiS {
 		     &tmp);
       MatShellSetOperation(tmp, MATOP_MULT, 
 			   (void(*)(void))petscMultMatSchurPrimalAugmented);
-
+      
       MatComputeExplicitOperator(tmp, &mat_schur_primal);
 
       MatDestroy(&tmp);