位相情報を考慮したMarching Cube法†
CUDAでMC法で解説したMC法のCUDA実装は,頂点の重複を許しており,データの形式としては,
頂点座標の並びでポリゴンを表している.
しかし,この方式ではポリゴン間の接続などは分からない.
一般的には頂点座標と頂点の接続関係の2つに分けてメッシュデータを記述する.
例えば,以下に示す図のメッシュの場合,頂点座標を表す配列が,
頂点座標 |
0 : (x0, y0, z0) |
1 : (x1, y1, z1) |
2 : (x2, y2, z2) |
3 : (x3, y3, z3) |
4 : (x4, y4, z4) |
となり,メッシュとしては,
接続情報 |
a : 0 → 3 → 1 |
b : 1 → 3 → 4 |
c : 1 → 4 → 2 |
となる.
CUDA実装†
CUDAサンプルのMC法で得られたデータを解析して形式を変換しても良いが,
ここでは,CUDA実装そのものを変更してみる.
手順としては,
- 立方体グリッド(ボクセル)を空間に配置
- 各ボクセルの8頂点の内外を判定し,テーブルから頂点数,ポリゴン数を計算
- 頂点数配列をScan(prefix sum)
- ボクセルのエッジごとに頂点座標を計算
- 頂点座標配列をScanし,要素を詰める
- 点の配置と数から三角形メッシュの接続情報を作成
- (必要に応じて法線も計算)
カーネル呼び出しのコードは,
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
| | void CuMCCreateMesh(float threshold, unsigned int &nvrts, unsigned int &ntris)
{
uint lastElement, lastScanElement;
int threads = 128;
dim3 grid(m_uNumVoxels/threads, 1, 1);
if(grid.x > 65535){
grid.y = grid.x/32768;
grid.x = 32768;
}
ClassifyVoxel2<<<grid, threads>>>(m_duVoxelCubeIdx, m_duVoxelVerts, m_duVoxelTriNum, m_duVoxelOccupied,
g_dfVolume, m_u3GridSize, m_uNumVoxels, m_f3VoxelH, threshold);
CUT_CHECK_ERROR("classifyVoxel2 failed");
m_uNumActiveVoxels = m_uNumVoxels;
#if SKIP_EMPTY_VOXELS
cudppScan(m_cudppScanPlan, m_duVoxelOccupiedScan, m_duVoxelOccupied, m_uNumVoxels);
cutilSafeCall(cudaMemcpy((void*)&lastElement, (void*)(m_duVoxelOccupied+m_uNumVoxels-1),
sizeof(uint), cudaMemcpyDeviceToHost));
cutilSafeCall(cudaMemcpy((void*)&lastScanElement, (void*)(m_duVoxelOccupiedScan+m_uNumVoxels-1),
sizeof(uint), cudaMemcpyDeviceToHost));
m_uNumActiveVoxels = lastElement+lastScanElement;
if(!m_uNumActiveVoxels){
nvrts = 0; ntris = 0;
return;
}
CompactVoxels<<<grid, threads>>>(m_duCompactedVoxelArray, m_duVoxelOccupied, m_duVoxelOccupiedScan, m_uNumVoxels);
cutilCheckMsg("CompactVoxels failed");
#endif
cudppScan(m_cudppScanPlan, m_duVoxelVertsScan, m_duVoxelVerts, m_uNumVoxels);
cudppScan(m_cudppScanPlan, m_duVoxelTriNumScan, m_duVoxelTriNum, m_uNumVoxels);
cutilSafeCall(cudaMemcpy((void*)&lastElement, (void*)(m_duVoxelTriNum+m_uNumVoxels-1),
sizeof(uint), cudaMemcpyDeviceToHost));
cutilSafeCall(cudaMemcpy((void*)&lastScanElement, (void*)(m_duVoxelTriNumScan+m_uNumVoxels-1),
sizeof(uint), cudaMemcpyDeviceToHost));
ntris = lastElement+lastScanElement;
uint3 dir[3];
dir[0] = make_uint3(1, 0, 0);
dir[1] = make_uint3(0, 1, 0);
dir[2] = make_uint3(0, 0, 1);
uint cpos = 0;
for(int i = 0; i < 3; ++i){
dim3 grid2(m_uNumEdges[i]/threads, 1, 1);
if(grid2.x > 65535){
grid2.y = grid2.x/32768;
grid2.x = 32768;
}
CalVertexEdge<<<grid2, threads>>>(m_dfEdgeVrts+cpos, m_duEdgeOccupied+cpos,
g_dfVolume, dir[i], m_u3EdgeSize[i], m_u3GridSize,
m_uNumVoxels, m_f3VoxelH, m_f3VoxelMin, threshold);
cpos += m_uNumEdges[i];
}
CUT_CHECK_ERROR("calVertexEdge failed");
cutilSafeCall(cudaThreadSynchronize());
cudppScan(m_cudppScanPlan2, m_duEdgeOccupiedScan, m_duEdgeOccupied, m_uNumEdges[3]);
cutilSafeCall(cudaMemcpy((void *) &lastElement,
(void *) (m_duEdgeOccupied+m_uNumEdges[3]-1),
sizeof(uint), cudaMemcpyDeviceToHost));
cutilSafeCall(cudaMemcpy((void *) &lastScanElement,
(void *) (m_duEdgeOccupiedScan+m_uNumEdges[3]-1),
sizeof(uint), cudaMemcpyDeviceToHost));
m_uNumTotalVerts = lastElement + lastScanElement;
nvrts = m_uNumTotalVerts;
if(m_uNumTotalVerts==0){
nvrts = 0;
return;
}
dim3 grid3(m_uNumEdges[3]/threads, 1, 1);
if(grid3.x > 65535){
grid3.y = grid3.x/32768;
grid3.x = 32768;
}
CompactEdges<<<grid3, threads>>>(m_dfCompactedEdgeVrts,
m_duEdgeOccupied, m_duEdgeOccupiedScan,
m_dfEdgeVrts, m_uNumEdges[3]);
CUT_CHECK_ERROR("compactEdges failed");
cutilSafeCall(cudaThreadSynchronize());
dim3 grid4(m_uNumActiveVoxels/NTHREADS, 1, 1);
if(grid4.x > 65535){
grid4.y = grid4.x/32768;
grid4.x = 32768;
}
uint3 numEdge = make_uint3(m_uNumEdges[0], m_uNumEdges[1], m_uNumEdges[2]);
GenerateTriangles3<<<grid4, NTHREADS>>>(m_du3IndexArray, m_duVoxelTriNumScan, m_duEdgeOccupiedScan, m_duVoxelCubeIdx,
m_u3EdgeSize[0], m_u3EdgeSize[1], m_u3EdgeSize[2], numEdge,
m_duCompactedVoxelArray, m_u3GridSize,
m_uNumActiveVoxels, m_f3VoxelH, threshold, m_uNumTotalVerts, ntris);
CUT_CHECK_ERROR("GenerateTriangles3 failed");
cutilSafeCall(cudaThreadSynchronize());
}
|
ボクセル毎の頂点数,ポリゴン数のカウントのカーネルは,
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
| |
__device__
float sampleVolume2(float *data, uint3 p, uint3 gridSize)
{
p.x = min(p.x, gridSize.x-1);
p.y = min(p.y, gridSize.y-1);
p.z = min(p.z, gridSize.z-1);
uint i = (p.z*gridSize.x*gridSize.y)+(p.y*gridSize.x)+p.x;
return data[i];
}
__global__
void ClassifyVoxel2(uint* voxelCubeIdx, uint *voxelVerts, uint *voxelTris, uint *voxelOccupied, float *volume,
uint3 ngrids, uint nvoxels, float3 voxel_h, float threshold)
{
uint blockId = __mul24(blockIdx.y, gridDim.x)+blockIdx.x;
uint i = __mul24(blockId, blockDim.x)+threadIdx.x;
if(i < nvoxels){
uint3 gridPos = calcGridIdxU(i, ngrids);
float field[8];
field[0] = sampleVolume2(volume, gridPos, ngrids);
field[1] = sampleVolume2(volume, gridPos+make_uint3(1, 0, 0), ngrids);
field[2] = sampleVolume2(volume, gridPos+make_uint3(1, 1, 0), ngrids);
field[3] = sampleVolume2(volume, gridPos+make_uint3(0, 1, 0), ngrids);
field[4] = sampleVolume2(volume, gridPos+make_uint3(0, 0, 1), ngrids);
field[5] = sampleVolume2(volume, gridPos+make_uint3(1, 0, 1), ngrids);
field[6] = sampleVolume2(volume, gridPos+make_uint3(1, 1, 1), ngrids);
field[7] = sampleVolume2(volume, gridPos+make_uint3(0, 1, 1), ngrids);
uint cubeindex;
cubeindex = uint(field[0] < threshold);
cubeindex += uint(field[1] < threshold)*2;
cubeindex += uint(field[2] < threshold)*4;
cubeindex += uint(field[3] < threshold)*8;
cubeindex += uint(field[4] < threshold)*16;
cubeindex += uint(field[5] < threshold)*32;
cubeindex += uint(field[6] < threshold)*64;
cubeindex += uint(field[7] < threshold)*128;
uint numVerts = tex1Dfetch(numVertsTex, cubeindex);
voxelCubeIdx[i] = cubeindex; voxelVerts[i] = numVerts; voxelTris[i] = numVerts/3;
#if SKIP_EMPTY_VOXELS
voxelOccupied[i] = (numVerts > 0); #endif
}
}
|
陰関数値の参照にはサンプルボリュームを用いている.
エッジごとに頂点座標を算出するカーネルは,
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
| |
__device__
float3 vertexInterp(float isolevel, float3 p0, float3 p1, float f0, float f1)
{
float t = (isolevel-f0)/(f1-f0);
return lerp(p0, p1, t);
}
__global__
void CalVertexEdge(float4* edgeVrts, uint *edgeOccupied, float *volume, uint3 dir,
uint3 edgeSize, uint3 ngrids, uint nvoxels,
float3 voxel_h, float3 voxel_min, float threshold)
{
uint blockId = __mul24(blockIdx.y, gridDim.x)+blockIdx.x;
uint i = __mul24(blockId, blockDim.x)+threadIdx.x;
uint3 gridPos = calcGridIdxU(i, edgeSize);
float3 p;
p.x = voxel_min.x+(gridPos.x*voxel_h.x);
p.y = voxel_min.y+(gridPos.y*voxel_h.y);
p.z = voxel_min.z+(gridPos.z*voxel_h.z);
float3 v[2];
v[0] = p;
v[1] = p+make_float3(dir.x*voxel_h.x, dir.y*voxel_h.y, dir.z*voxel_h.z);
float field[2];
field[0] = sampleVolume2(volume, gridPos, ngrids);
field[1] = sampleVolume2(volume, gridPos+dir, ngrids);
uint cubeindex;
cubeindex = uint(field[0] < threshold);
cubeindex += uint(field[1] < threshold)*2;
if(cubeindex == 1 || cubeindex == 2){
float3 vertex, normal = make_float3(0.0);
vertex = vertexInterp(threshold, v[0], v[1], field[0], field[1]);
edgeVrts[i] = make_float4(vertex, 1.0f);
edgeOccupied[i] = 1;
}
else{
edgeOccupied[i] = 0;
}
}
|
頂点情報を詰めるカーネルは,
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
| |
__global__
void CompactEdges(float4 *compactedVrts, uint *occupied, uint *occupiedScan, float4 *vrts, uint num)
{
uint blockId = __mul24(blockIdx.y, gridDim.x)+blockIdx.x;
uint i = __mul24(blockId, blockDim.x)+threadIdx.x;
if(occupied[i] && (i < num)) {
compactedVrts[occupiedScan[i]] = vrts[i];
}
}
|
最終的に頂点の接続関係を計算するカーネルは,
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
| |
__global__
void GenerateTriangles3(uint3 *vertIdx, uint *voxelTrisScan, uint *edgeOccupiedScan, uint *voxelCubeIdx,
uint3 edgeSizeX, uint3 edgeSizeY, uint3 edgeSizeZ, uint3 edgeNum,
uint *compactedVoxelArray, uint3 ngrids, uint activeVoxels,
float3 voxelSize, float isoValue, uint maxVerts, uint numMesh)
{
uint blockId = __mul24(blockIdx.y, gridDim.x)+blockIdx.x;
uint idx = __mul24(blockId, blockDim.x)+threadIdx.x;
if(idx > activeVoxels-1){
idx = activeVoxels-1;
}
#if SKIP_EMPTY_VOXELS
uint voxel = compactedVoxelArray[idx];
#else
uint voxel = idx;
#endif
uint3 gpos = calcGridIdxU(voxel, ngrids);
uint cubeindex = voxelCubeIdx[voxel];
__shared__ uint vertlist[12*NTHREADS];
vertlist[12*threadIdx.x+0] = edgeOccupiedScan[calcGridIdx3(make_uint3(gpos.x, gpos.y, gpos.z), edgeSizeX)];
vertlist[12*threadIdx.x+1] = edgeOccupiedScan[calcGridIdx3(make_uint3(gpos.x+1, gpos.y, gpos.z), edgeSizeY)+edgeNum.x];
vertlist[12*threadIdx.x+2] = edgeOccupiedScan[calcGridIdx3(make_uint3(gpos.x, gpos.y+1, gpos.z), edgeSizeX)];
vertlist[12*threadIdx.x+3] = edgeOccupiedScan[calcGridIdx3(make_uint3(gpos.x, gpos.y, gpos.z), edgeSizeY)+edgeNum.x];
vertlist[12*threadIdx.x+4] = edgeOccupiedScan[calcGridIdx3(make_uint3(gpos.x, gpos.y, gpos.z+1), edgeSizeX)];
vertlist[12*threadIdx.x+5] = edgeOccupiedScan[calcGridIdx3(make_uint3(gpos.x+1, gpos.y, gpos.z+1), edgeSizeY)+edgeNum.x];
vertlist[12*threadIdx.x+6] = edgeOccupiedScan[calcGridIdx3(make_uint3(gpos.x, gpos.y+1, gpos.z+1), edgeSizeX)];
vertlist[12*threadIdx.x+7] = edgeOccupiedScan[calcGridIdx3(make_uint3(gpos.x, gpos.y, gpos.z+1), edgeSizeY)+edgeNum.x];
vertlist[12*threadIdx.x+8] = edgeOccupiedScan[calcGridIdx3(make_uint3(gpos.x, gpos.y, gpos.z), edgeSizeZ)+edgeNum.x+edgeNum.y];
vertlist[12*threadIdx.x+9] = edgeOccupiedScan[calcGridIdx3(make_uint3(gpos.x+1, gpos.y, gpos.z), edgeSizeZ)+edgeNum.x+edgeNum.y];
vertlist[12*threadIdx.x+10] = edgeOccupiedScan[calcGridIdx3(make_uint3(gpos.x+1, gpos.y+1, gpos.z), edgeSizeZ)+edgeNum.x+edgeNum.y];
vertlist[12*threadIdx.x+11] = edgeOccupiedScan[calcGridIdx3(make_uint3(gpos.x, gpos.y+1, gpos.z), edgeSizeZ)+edgeNum.x+edgeNum.y];
__syncthreads();
uint numTri = tex1Dfetch(numVertsTex, cubeindex)/3;
for(int i = 0; i < numTri; ++i){
uint index = voxelTrisScan[voxel]+i;
uint vidx[3];
uint edge[3];
edge[0] = tex1Dfetch(triTex, (cubeindex*16)+3*i);
edge[1] = tex1Dfetch(triTex, (cubeindex*16)+3*i+1);
edge[2] = tex1Dfetch(triTex, (cubeindex*16)+3*i+2);
vidx[0] = min(vertlist[12*threadIdx.x+edge[0]], maxVerts-1);
vidx[2] = min(vertlist[12*threadIdx.x+edge[1]], maxVerts-1);
vidx[1] = min(vertlist[12*threadIdx.x+edge[2]], maxVerts-1);
if(index < numMesh){
vertIdx[index] = make_uint3(vidx[2], vidx[1], vidx[0]);
}
}
}
|
|