p***@gmail.com
2007-12-18 21:06:53 UTC
I have written the simple code below in an attempt to figure out a bug
in a larger code. This code attempts to complete a ghost cell exchange
between at least 3 processes (assumes non-periodic boundary
conditions). I believe that the problem lies in including a
std::vector object in an MPI derived datatype. I usually get two types
of errors when I run this code. The first is a crash with the SIGSEGV
signal and the other actually runs through the code but there ends up
being no transfer of data between the processes. When I run it with
DDT (Distributed Debugging Tool) it appears to hang on the message
transfer between node=0 and node=1.
OS -> RedHat Enterprise 4.0
MPI and compiler -> mpich-ch_p4-gcc-1.2.7 (standard with OSCAR which
is what our cluster is built with)
HEADER FILE
//std_vector_web_question.h
#ifndef STD_VECTOR_WEB_QUESTION_H
#define STD_VECTOR_WEB_QUESTION_H
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
using namespace std;
class vect
{
private:
std::vector<int> a;
std::vector<int>::iterator it;
ofstream output;
public:
vect(){}
~vect(){}
void add(const int n){
int i;
for(i=0;i<n;i++)
a.push_back(0);
}
void fill(const int start){
int j=start;
for(it=a.begin();it<a.end();it++){
*it=j;
j++;
}
}
void display(string mes,int node){
output.open("data.txt", ios::out | ios::app);
output << endl << mes << " a vector on process " << node
<< endl;
for(it=a.begin();it<a.end();it++){
output << *it << " ";
}
output.close();
}
};
#endif
SOURCE FILE
//std_vector_web_question.cc
#include <iostream>
#include <vector>
#include <string>
#include "std_vector_web_question.h"
#include "mpi.h"
using namespace std;
int main(int argc,char **argv)
{
int numnodes,mynode;
const int N=5;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numnodes);
MPI_Comm_rank(MPI_COMM_WORLD,&mynode);
vect wsend,esend,wrecv,erecv; //four vectors for passing data
between processes
int eastbor,westbor;
//Get the neighbor process id's
if(mynode==0){
eastbor=9999; //placeholder
westbor=mynode+1;
}
else if(mynode==numnodes-1){
eastbor=mynode-1;
westbor=9999; //placeholder
}
else{
eastbor=mynode-1;
westbor=mynode+1;
}
//set up the vectors in the object and fill in the data
wsend.add(N);
wrecv.add(N);
esend.add(N);
erecv.add(N);
wsend.fill(N);
wrecv.fill(0);
esend.fill(N);
erecv.fill(0);
//now set up the MPI Datatype information
MPI_Datatype onevec;
MPI_Type_contiguous(N,MPI_INT,&onevec);
MPI_Type_commit(&onevec);
//now send data to the neighbors looking at present and after send
values
if(mynode==0){
wsend.display("wsend before",mynode);
wrecv.display("wrecv before",mynode);
}
else if(mynode==(numnodes-1)){
esend.display("esend before",mynode);
erecv.display("erecv before",mynode);
}
else{
wsend.display("wsend before",mynode);
wrecv.display("wrecv before",mynode);
esend.display("esend before",mynode);
erecv.display("erecv before",mynode);
}
MPI_Status status;
if(mynode!=0 && mynode!=(numnodes-1)){
MPI_Sendrecv(&wsend,1,onevec,westbor,2,&wrecv,1,onevec,westbor,
1,MPI_COMM_WORLD,&status);
MPI_Sendrecv(&esend,1,onevec,eastbor,3,&erecv,1,onevec,eastbor,
4,MPI_COMM_WORLD,&status);
}
else if(mynode==0){
MPI_Sendrecv(&wsend,1,onevec,westbor,4,&wrecv,1,onevec,westbor,
3,MPI_COMM_WORLD,&status);
}
else if(mynode==(numnodes-1)){
MPI_Sendrecv(&esend,1,onevec,eastbor,1,&erecv,1,onevec,eastbor,
2,MPI_COMM_WORLD,&status);
}
if(mynode==0){
wsend.display("wsend after",mynode);
wrecv.display("wrecv after",mynode);
}
else if(mynode==(numnodes-1)){
esend.display("esend after",mynode);
erecv.display("erecv after",mynode);
}
else{
wsend.display("wsend after",mynode);
wrecv.display("wrecv after",mynode);
esend.display("esend after",mynode);
erecv.display("erecv after",mynode);
}
//now clean up
MPI_Type_free(&onevec);
MPI_Finalize();
cout << "\nAll done!\n";
return 0;
}
MAKEFILE
FINALOBJECTS=std_vector_web_question.o
vec : $(FINALOBJECTS)
mpiCC -g $(FINALOBJECTS) -lm -o vec
std_vector_web_question.o : std_vector_web_question.cc
std_vector_web_question.h
mpiCC -Wall -g -c std_vector_web_question.cc
Does this work for anyone else? If not can you explain where it is
going wrong? Thanks.
-Darcy
in a larger code. This code attempts to complete a ghost cell exchange
between at least 3 processes (assumes non-periodic boundary
conditions). I believe that the problem lies in including a
std::vector object in an MPI derived datatype. I usually get two types
of errors when I run this code. The first is a crash with the SIGSEGV
signal and the other actually runs through the code but there ends up
being no transfer of data between the processes. When I run it with
DDT (Distributed Debugging Tool) it appears to hang on the message
transfer between node=0 and node=1.
OS -> RedHat Enterprise 4.0
MPI and compiler -> mpich-ch_p4-gcc-1.2.7 (standard with OSCAR which
is what our cluster is built with)
HEADER FILE
//std_vector_web_question.h
#ifndef STD_VECTOR_WEB_QUESTION_H
#define STD_VECTOR_WEB_QUESTION_H
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
using namespace std;
class vect
{
private:
std::vector<int> a;
std::vector<int>::iterator it;
ofstream output;
public:
vect(){}
~vect(){}
void add(const int n){
int i;
for(i=0;i<n;i++)
a.push_back(0);
}
void fill(const int start){
int j=start;
for(it=a.begin();it<a.end();it++){
*it=j;
j++;
}
}
void display(string mes,int node){
output.open("data.txt", ios::out | ios::app);
output << endl << mes << " a vector on process " << node
<< endl;
for(it=a.begin();it<a.end();it++){
output << *it << " ";
}
output.close();
}
};
#endif
SOURCE FILE
//std_vector_web_question.cc
#include <iostream>
#include <vector>
#include <string>
#include "std_vector_web_question.h"
#include "mpi.h"
using namespace std;
int main(int argc,char **argv)
{
int numnodes,mynode;
const int N=5;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numnodes);
MPI_Comm_rank(MPI_COMM_WORLD,&mynode);
vect wsend,esend,wrecv,erecv; //four vectors for passing data
between processes
int eastbor,westbor;
//Get the neighbor process id's
if(mynode==0){
eastbor=9999; //placeholder
westbor=mynode+1;
}
else if(mynode==numnodes-1){
eastbor=mynode-1;
westbor=9999; //placeholder
}
else{
eastbor=mynode-1;
westbor=mynode+1;
}
//set up the vectors in the object and fill in the data
wsend.add(N);
wrecv.add(N);
esend.add(N);
erecv.add(N);
wsend.fill(N);
wrecv.fill(0);
esend.fill(N);
erecv.fill(0);
//now set up the MPI Datatype information
MPI_Datatype onevec;
MPI_Type_contiguous(N,MPI_INT,&onevec);
MPI_Type_commit(&onevec);
//now send data to the neighbors looking at present and after send
values
if(mynode==0){
wsend.display("wsend before",mynode);
wrecv.display("wrecv before",mynode);
}
else if(mynode==(numnodes-1)){
esend.display("esend before",mynode);
erecv.display("erecv before",mynode);
}
else{
wsend.display("wsend before",mynode);
wrecv.display("wrecv before",mynode);
esend.display("esend before",mynode);
erecv.display("erecv before",mynode);
}
MPI_Status status;
if(mynode!=0 && mynode!=(numnodes-1)){
MPI_Sendrecv(&wsend,1,onevec,westbor,2,&wrecv,1,onevec,westbor,
1,MPI_COMM_WORLD,&status);
MPI_Sendrecv(&esend,1,onevec,eastbor,3,&erecv,1,onevec,eastbor,
4,MPI_COMM_WORLD,&status);
}
else if(mynode==0){
MPI_Sendrecv(&wsend,1,onevec,westbor,4,&wrecv,1,onevec,westbor,
3,MPI_COMM_WORLD,&status);
}
else if(mynode==(numnodes-1)){
MPI_Sendrecv(&esend,1,onevec,eastbor,1,&erecv,1,onevec,eastbor,
2,MPI_COMM_WORLD,&status);
}
if(mynode==0){
wsend.display("wsend after",mynode);
wrecv.display("wrecv after",mynode);
}
else if(mynode==(numnodes-1)){
esend.display("esend after",mynode);
erecv.display("erecv after",mynode);
}
else{
wsend.display("wsend after",mynode);
wrecv.display("wrecv after",mynode);
esend.display("esend after",mynode);
erecv.display("erecv after",mynode);
}
//now clean up
MPI_Type_free(&onevec);
MPI_Finalize();
cout << "\nAll done!\n";
return 0;
}
MAKEFILE
FINALOBJECTS=std_vector_web_question.o
vec : $(FINALOBJECTS)
mpiCC -g $(FINALOBJECTS) -lm -o vec
std_vector_web_question.o : std_vector_web_question.cc
std_vector_web_question.h
mpiCC -Wall -g -c std_vector_web_question.cc
Does this work for anyone else? If not can you explain where it is
going wrong? Thanks.
-Darcy