Implement a Parallel ODD-Even Sort algorithm using GPU or ARM equivalent.

OddEven.cu
1:  #include"iostream"  
2:  using namespace std;  
3:  __global__ void sort(int *in, int n)  
4:  {  
5:       int temp;  
6:       bool oddeven=true;  
7:       __shared__ bool oddswap, evenswap;          // shared variables only, don't use private ones  
8:       while(true)  
9:       {  
10:            if(oddeven==true)                         // Even turn  
11:            {  
12:                 __syncthreads();  
13:                 evenswap=false;  
14:                 __syncthreads();  
15:                      int idx=threadIdx.x;  
16:                      if(idx<(n/2))  
17:                      {  
18:                           if (in[2*idx]>in[2*idx+1] )          // swapping  
19:                           {  
20:                                temp = in[2*idx];  
21:                                in[2*idx]=in[2*idx+1];  
22:                                in[2*idx+1]=temp;  
23:                                evenswap=true;  
24:                           }  
25:                      }  
26:                 __syncthreads();  
27:            }  
28:            else                                        // odd turn  
29:            {  
30:                 __syncthreads();  
31:                 oddswap=false;  
32:                 __syncthreads();  
33:                      int idx=threadIdx.x;  
34:                      if(idx<(n/2))  
35:                      {  
36:                           if (in[2*idx+1]>in[2*idx+2] && (2*idx+2)<n)     // swapping  
37:                           {  
38:                                temp= in[2*idx+1];  
39:                                in[2*idx+1]=in[2*idx+2];  
40:                                in[2*idx+2]=temp;  
41:                                oddswap=true;  
42:                           }  
43:                      }  
44:                 __syncthreads();  
45:            }  
46:            if(!(oddswap||evenswap))               // when to stop  
47:                 break;  
48:            oddeven=!oddeven;  
49:       }  
50:  }  
51:  int main()  
52:  {  
53:       int *a, *d_a, n;  
54:       cout<<"\nEnter no. of elements you want to sort: ";  
55:       cin>>n;  
56:       size_t size = sizeof(int)*n;  
57:       a = new int[n];                         // allocate size for a on main memory(RAM)  
58:       cudaMalloc((void**)&d_a, size);             // allocate size for a on global memory(GPU memory)  
59:       cout<<"\n\nEnter no.s to be sorted: \n";  
60:       for(int i=0;i<n;i++)  
61:       {  
62:            cin>>a[i];  
63:       }  
64:       cudaMemcpy(d_a, a, size, cudaMemcpyHostToDevice);  
65:       sort<<<1,n/2>>>(d_a,n);  
66:       cudaMemcpy(a, d_a, size, cudaMemcpyDeviceToHost);  
67:       cout<<"\nSorted array is: \n";  
68:       for (int i=0;i<n;i++)  
69:       {  
70:            cout<<a[i]<<" ";  
71:       }  
72:       delete(a);               // free allocated space from main memory & global memory  
73:       cudaFree(d_a);  
74:       return 0;  
75:  }  

Post a Comment

Previous Post Next Post