defforward(self, x): x = self.pool(F.relu(self.conv1(x))) x = self.pool(F.relu(self.conv2(x))) x = x.view(-1, 16 * 5 * 5) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x
net = Net() parameters = filter(lambda p: p.requires_grad, net.parameters())
correct = 0 total = 0 with torch.no_grad(): for data in testloader: images, labels = data outputs = net(images.to(model_engine.local_rank)) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels.to(model_engine.local_rank)).sum().item() print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))
[2023-08-11 15:28:13,128] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect) [2023-08-11 15:28:14,693] [WARNING] [runner.py:196:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only. [2023-08-11 15:28:14,694] [INFO] [runner.py:555:main] cmd = /home/wangyh/miniconda3/envs/llava2/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbN119 --master_addr=127.0.0.1 --master_port=29500 --enable_each_rank_log=None test.py --deepspeed_config config.json [2023-08-11 15:28:15,862] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect) [2023-08-11 15:28:17,418] [INFO] [launch.py:145:main] WORLD INFO DICT: {'localhost': [7]} [2023-08-11 15:28:17,418] [INFO] [launch.py:151:main] nnodes=1, num_local_procs=1, node_rank=0 [2023-08-11 15:28:17,418] [INFO] [launch.py:162:main] global_rank_mapping=defaultdict(<class 'list'>, {'localhost': [0]}) [2023-08-11 15:28:17,418] [INFO] [launch.py:163:main] dist_world_size=1 [2023-08-11 15:28:17,418] [INFO] [launch.py:165:main] Setting CUDA_VISIBLE_DEVICES=7 [2023-08-11 15:28:18,729] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect) Files already downloaded and verified Files already downloaded and verified [2023-08-11 15:28:21,691] [INFO] [logging.py:96:log_dist] [Rank -1] DeepSpeed info: version=0.9.5, git-hash=unknown, git-branch=unknown [2023-08-11 15:28:21,691] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented [2023-08-11 15:28:21,691] [INFO] [comm.py:594:init_distributed] cdb=None [2023-08-11 15:28:21,691] [INFO] [comm.py:625:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl [2023-08-11 15:28:22,465] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False Installed CUDA version 11.3 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination Using /home/wangyh/.cache/torch_extensions/py310_cu117 as PyTorch extensions root... Detected CUDA files, patching ldflags Emitting ninja build file /home/wangyh/.cache/torch_extensions/py310_cu117/fused_adam/build.ninja... Building extension module fused_adam... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) ninja: no work to do. Loading extension module fused_adam... Time to load fused_adam op: 0.0886220932006836 seconds [2023-08-11 15:28:22,849] [INFO] [logging.py:96:log_dist] [Rank 0] Using DeepSpeed Optimizer param name adam as basic optimizer ......