{ "cells": [ { "cell_type": "markdown", "id": "7c76a62f", "metadata": {}, "source": [ "# GPU check\n", "Check NVIDIA driver" ] }, { "cell_type": "code", "execution_count": 1, "id": "5c993d00", "metadata": { "ExecuteTime": { "start_time": "2023-04-13T17:18:49.989079Z", "end_time": "2023-04-13T17:18:50.855038Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Thu Apr 13 17:18:51 2023 \r\n", "+-----------------------------------------------------------------------------+\r\n", "| NVIDIA-SMI 470.82.01 Driver Version: 470.82.01 CUDA Version: 11.4 |\r\n", "|-------------------------------+----------------------+----------------------+\r\n", "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\r\n", "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\r\n", "| | | MIG M. |\r\n", "|===============================+======================+======================|\r\n", "| 0 NVIDIA A10-4Q On | 00000000:00:07.0 Off | N/A |\r\n", "| N/A N/A P0 N/A / N/A | 128MiB / 3932MiB | 0% Default |\r\n", "| | | N/A |\r\n", "+-------------------------------+----------------------+----------------------+\r\n", " \r\n", "+-----------------------------------------------------------------------------+\r\n", "| Processes: |\r\n", "| GPU GI CI PID Type Process name GPU Memory |\r\n", "| ID ID Usage |\r\n", "|=============================================================================|\r\n", "| No running processes found |\r\n", "+-----------------------------------------------------------------------------+\r\n" ] } ], "source": [ "!nvidia-smi" ] }, { "cell_type": "markdown", "source": [ "Import PyTorch to check GPU devices" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 2, "id": "7bfa0d38", "metadata": { "ExecuteTime": { "start_time": "2023-04-13T17:18:50.852002Z", "end_time": "2023-04-13T17:18:51.794978Z" } }, "outputs": [], "source": [ "import torch" ] }, { "cell_type": "markdown", "source": [ "Check the version of PyTorch" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 3, "outputs": [ { "data": { "text/plain": "'2.0.0+cu117'" }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.__version__" ], "metadata": { "collapsed": false, "ExecuteTime": { "start_time": "2023-04-13T17:18:51.799271Z", "end_time": "2023-04-13T17:18:51.827270Z" } } }, { "cell_type": "markdown", "source": [ "Check if PyTorch can call GPUs" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 4, "id": "84e571e1", "metadata": { "ExecuteTime": { "start_time": "2023-04-13T17:18:51.824271Z", "end_time": "2023-04-13T17:18:51.890823Z" } }, "outputs": [ { "data": { "text/plain": "True" }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.cuda.is_available()" ] }, { "attachments": {}, "cell_type": "markdown", "id": "46e03918", "metadata": {}, "source": [ "Check the number of the GPUs of the computer in PyTorch" ] }, { "cell_type": "code", "execution_count": 5, "id": "b37ff8de", "metadata": { "ExecuteTime": { "start_time": "2023-04-13T17:18:51.854727Z", "end_time": "2023-04-13T17:18:51.897822Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n" ] } ], "source": [ "gpu_num = torch.cuda.device_count()\n", "print(gpu_num)" ] }, { "attachments": {}, "cell_type": "markdown", "id": "ea613451", "metadata": {}, "source": [ "Check the GPU type of the computer in PyTorch" ] }, { "cell_type": "code", "execution_count": 6, "id": "b8b9a570", "metadata": { "ExecuteTime": { "start_time": "2023-04-13T17:18:51.882725Z", "end_time": "2023-04-13T17:18:51.897822Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU 0.: NVIDIA A10-4Q\n" ] } ], "source": [ "for i in range(gpu_num):\n", " print('GPU {}.: {}'.format(i,torch.cuda.get_device_name(i)))" ] }, { "cell_type": "markdown", "source": [ "Import TensorFlow to check GPU devices" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 7, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2023-04-13 17:18:52.308723: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2023-04-13 17:18:52.422989: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2023-04-13 17:18:52.453216: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "2023-04-13 17:18:52.918465: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvrtc.so.11.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda-11.2/lib64:/home/limingbo/anaconda3/envs/tensorflow/lib:/home/limingbo/TensorRT-7.2.3.4/lib\n", "2023-04-13 17:18:52.918653: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvrtc.so.11.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda-11.2/lib64:/home/limingbo/anaconda3/envs/tensorflow/lib:/home/limingbo/TensorRT-7.2.3.4/lib\n", "2023-04-13 17:18:52.918661: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" ] } ], "source": [ "import tensorflow as tf" ], "metadata": { "collapsed": false, "ExecuteTime": { "start_time": "2023-04-13T17:18:51.882725Z", "end_time": "2023-04-13T17:18:53.523525Z" } } }, { "cell_type": "markdown", "source": [ "Check the version of TensorFlow" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 8, "outputs": [ { "data": { "text/plain": "'2.10.1'" }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tf.__version__" ], "metadata": { "collapsed": false, "ExecuteTime": { "start_time": "2023-04-13T17:18:53.526524Z", "end_time": "2023-04-13T17:18:53.579778Z" } } }, { "cell_type": "markdown", "source": [ "Check if TensorFlow can call GPUs" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 9, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:From /tmp/ipykernel_345917/2294581100.py:1: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "Use `tf.config.list_physical_devices('GPU')` instead.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2023-04-13 17:18:54.000431: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2023-04-13 17:18:54.001471: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-04-13 17:18:54.012039: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-04-13 17:18:54.012246: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-04-13 17:18:56.755971: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-04-13 17:18:56.756159: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-04-13 17:18:56.756317: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-04-13 17:18:56.756460: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /device:GPU:0 with 1415 MB memory: -> device: 0, name: NVIDIA A10-4Q, pci bus id: 0000:00:07.0, compute capability: 8.6\n" ] }, { "data": { "text/plain": "True" }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tf.test.is_gpu_available()" ], "metadata": { "collapsed": false, "ExecuteTime": { "start_time": "2023-04-13T17:18:53.578771Z", "end_time": "2023-04-13T17:18:56.347645Z" } } }, { "cell_type": "markdown", "source": [ "Check physical GPUs in TensorFlow" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 10, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2023-04-13 17:18:56.761259: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-04-13 17:18:56.761449: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-04-13 17:18:56.761587: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n" ] }, { "data": { "text/plain": "[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]" }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tf.config.list_physical_devices('GPU')" ], "metadata": { "collapsed": false, "ExecuteTime": { "start_time": "2023-04-13T17:18:56.363645Z", "end_time": "2023-04-13T17:18:56.401645Z" } } }, { "cell_type": "markdown", "source": [ "Check the GPU type of the computer in TensorFlow" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 11, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "device: 0, name: NVIDIA A10-4Q, pci bus id: 0000:00:07.0, compute capability: 8.6\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2023-04-13 17:18:56.765979: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-04-13 17:18:56.766168: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-04-13 17:18:56.766340: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-04-13 17:18:56.766507: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-04-13 17:18:56.766647: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "2023-04-13 17:18:56.766791: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /device:GPU:0 with 1415 MB memory: -> device: 0, name: NVIDIA A10-4Q, pci bus id: 0000:00:07.0, compute capability: 8.6\n" ] } ], "source": [ "from tensorflow.python.client import device_lib\n", "\n", "local_device_protos = device_lib.list_local_devices()\n", "for x in local_device_protos:\n", " if x.device_type == 'GPU':\n", " print(x.physical_device_desc)" ], "metadata": { "collapsed": false, "ExecuteTime": { "start_time": "2023-04-13T17:18:56.389649Z", "end_time": "2023-04-13T17:18:56.457653Z" } } }, { "cell_type": "markdown", "source": [ "Import Jax and jaxlib to check GPU devices" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 12, "outputs": [], "source": [ "import jax\n", "import jaxlib" ], "metadata": { "collapsed": false, "ExecuteTime": { "start_time": "2023-04-13T17:18:56.419645Z", "end_time": "2023-04-13T17:18:56.505645Z" } } }, { "cell_type": "markdown", "source": [ "Check the version of jax and jaxlib" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 13, "outputs": [ { "data": { "text/plain": "'0.4.8'" }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "jax.__version__" ], "metadata": { "collapsed": false, "ExecuteTime": { "start_time": "2023-04-13T17:18:56.505645Z", "end_time": "2023-04-13T17:18:56.505645Z" } } }, { "cell_type": "code", "execution_count": 14, "outputs": [ { "data": { "text/plain": "'0.4.7'" }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "jaxlib.__version__" ], "metadata": { "collapsed": false, "ExecuteTime": { "start_time": "2023-04-13T17:18:56.505645Z", "end_time": "2023-04-13T17:18:56.506646Z" } } }, { "cell_type": "markdown", "source": [ "Check all the devices from the default backend of jax" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 15, "outputs": [ { "data": { "text/plain": "[StreamExecutorGpuDevice(id=0, process_index=0, slice_index=0)]" }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "jax.devices()" ], "metadata": { "collapsed": false, "ExecuteTime": { "start_time": "2023-04-13T17:18:56.505645Z", "end_time": "2023-04-13T17:18:56.506646Z" } } }, { "cell_type": "markdown", "source": [ "Check the total number of devices of jax" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 16, "outputs": [ { "data": { "text/plain": "1" }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "jax.device_count()" ], "metadata": { "collapsed": false, "ExecuteTime": { "start_time": "2023-04-13T17:18:56.505645Z", "end_time": "2023-04-13T17:18:56.506646Z" } } }, { "cell_type": "markdown", "source": [ "Check the number of JAX processes associated with the backend of jax" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 17, "outputs": [ { "data": { "text/plain": "1" }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "jax.process_count()" ], "metadata": { "collapsed": false, "ExecuteTime": { "start_time": "2023-04-13T17:18:56.505645Z", "end_time": "2023-04-13T17:18:56.523880Z" } } }, { "cell_type": "markdown", "source": [ "Check the backend of jaxlib" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 18, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "gpu\n" ] } ], "source": [ "from jax.lib import xla_bridge\n", "print(xla_bridge.get_backend().platform)" ], "metadata": { "collapsed": false, "ExecuteTime": { "start_time": "2023-04-13T17:18:56.523880Z", "end_time": "2023-04-13T17:18:56.524833Z" } } }, { "attachments": {}, "cell_type": "markdown", "id": "3f57b47e", "metadata": {}, "source": [ "Import SecretFlow to verify that no errors are reported in this environemnt" ] }, { "cell_type": "code", "execution_count": 19, "id": "c8a443d6", "metadata": { "ExecuteTime": { "start_time": "2023-04-13T17:18:56.523880Z", "end_time": "2023-04-13T17:18:56.927007Z" } }, "outputs": [], "source": [ "import secretflow" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 5 }