デバイスプロパティの取得

デバイスの情報は cudaGetDeviceProperties命令でデバイスプロパティを格納する構造体cudaDeviceProp を介して取得できます.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
    int n;        cutilSafeCall(cudaGetDeviceCount(&n));
 
    for(int i = 0; i < n; ++i){
        cudaDeviceProp dev;
 
                cutilSafeCall(cudaGetDeviceProperties(&dev, i));
 
        printf("device %d\n", i);
        printf(" device name : %s\n", dev.name);
        printf(" total global memory : %d (MB)\n", dev.totalGlobalMem/1024/1024);
        printf(" shared memory / block : %d (KB)\n", dev.sharedMemPerBlock/1024);
        printf(" register / block : %d\n", dev.regsPerBlock);
        printf(" warp size : %d\n", dev.warpSize);
        printf(" max pitch : %d (B)\n", dev.memPitch);
        printf(" max threads / block : %d\n", dev.maxThreadsPerBlock);
        printf(" max size of each dim. of block : (%d, %d, %d)\n", dev.maxThreadsDim[0], dev.maxThreadsDim[1], dev.maxThreadsDim[2]);
        printf(" max size of each dim. of grid  : (%d, %d, %d)\n", dev.maxGridSize[0], dev.maxGridSize[1], dev.maxGridSize[2]);
        printf(" clock rate : %d (MHz)\n", dev.clockRate/1000);
        printf(" total constant memory : %d (KB)\n", dev.totalConstMem/1024);
        printf(" compute capability : %d.%d\n", dev.major, dev.minor);
        printf(" alignment requirement for texture : %d\n", dev.textureAlignment);
        printf(" device overlap : %s\n", (dev.deviceOverlap ? "ok" : "not"));
        printf(" num. of multiprocessors : %d\n", dev.multiProcessorCount);
        printf(" kernel execution timeout : %s\n", (dev.kernelExecTimeoutEnabled ? "on" : "off"));
        printf(" integrated : %s\n", (dev.integrated ? "on" : "off"));
        printf(" host memory mapping : %s\n", (dev.canMapHostMemory ? "on" : "off"));
 
        printf(" compute mode : ");
        if(dev.computeMode == cudaComputeModeDefault) printf("default mode (multiple threads can use) \n");
        else if(dev.computeMode == cudaComputeModeExclusive) printf("exclusive mode (only one thread will be able to use)\n");
        else if(dev.computeMode == cudaComputeModeProhibited) printf("prohibited mode (no threads can use)\n");
        
    }

GeForce GTX 285を搭載したビデオカード上での実行例

device 0
 device name : GeForce GTX 285
 total global memory : 1024 (MB)
 shared memory / block : 16 (KB)
 register / block : 16384
 warp size : 32
 max pitch : 262144 (B)
 max threads / block : 512
 max size of each dim. of block : (512, 512, 64)
 max size of each dim. of grid  : (65535, 65535, 1)
 clock rate : 1476 (MHz)
 total constant memory : 64 (KB)
 compute capability : 1.3
 alignment requirement for texture : 256
 device overlap : ok
 num. of multiprocessors : 30
 kernel execution timeout : off
 integrated : off
 host memory mapping : off
 compute mode : default mode (multiple threads can use)

GeForce GTX 580を搭載したビデオカード上での実行例

device 0
 device name : GeForce GTX 580
 total global memory : 1503 (MB)
 shared memory / block : 48 (KB)
 register / block : 32768
 warp size : 32
 max pitch : 2147483647 (B)
 max threads / block : 1024
 max size of each dim. of block : (1024, 1024, 64)
 max size of each dim. of grid  : (65535, 65535, 65535)
 clock rate : 1544 (MHz)
 total constant memory : 64 (KB)
 compute capability : 2.0
 alignment requirement for texture : 512
 device overlap : ok
 num. of multiprocessors : 16
 kernel execution timeout : on
 integrated : off
 host memory mapping : on
 compute mode : default mode (multiple threads can use)

GeForce GTX TITANを搭載したビデオカード上での実行例

device 0
 device name : GeForce GTX TITAN
 total global memory : 4095 (MB)
 shared memory / block : 48 (KB)
 register / block : 65536
 warp size : 32
 max pitch : 2147483647 (B)
 max threads / block : 1024
 max size of each dim. of block : (1024, 1024, 64)
 max size of each dim. of grid  : (2147483647, 65535, 65535)
 clock rate : 875 (MHz)
 total constant memory : 64 (KB)
 compute capability : 3.5
 alignment requirement for texture : 512
 device overlap : ok
 num. of multiprocessors : 14
 kernel execution timeout : on
 integrated : off
 host memory mapping : on
 compute mode : default mode (multiple threads can use)

トップ   編集 凍結 差分 履歴 添付 複製 名前変更 リロード   新規 一覧 検索 最終更新   ヘルプ   最終更新のRSS
Last-modified: 2022-11-30 (水) 13:48:11