testtwogpu.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. import os
  2. #测试gpu,有worker的资源脚本
  3. import gym
  4. import ray
  5. from gym.spaces import Discrete, Box
  6. from ray import tune
  7. class SimpleCorridor(gym.Env):
  8. def __init__(self, config):
  9. self.end_pos = config['corridor_length']
  10. self.cur_pos = 0
  11. self.action_space = Discrete(2)
  12. self.observation_space = Box(0.0, self.end_pos, shape=(1,))
  13. def reset(self):
  14. self.cur_pos = 0
  15. return [self.cur_pos]
  16. def step(self, action):
  17. if action == 0 and self.cur_pos > 0:
  18. self.cur_pos -= 1
  19. elif action == 1:
  20. self.cur_pos += 1
  21. done = self.cur_pos >= self.end_pos
  22. return [self.cur_pos], 1 if done else 0, done, {}
  23. if __name__ == '__main__':
  24. from datetime import datetime
  25. start_time = datetime.utcnow()
  26. print('Python start time: {} UTC'.format(start_time))
  27. if 'CLOUD_PROVIDER' in os.environ and os.environ['CLOUD_PROVIDER'] == 'Agit':
  28. from agit import ray_init
  29. ray_init()
  30. else:
  31. ray.init()
  32. print('Ray Cluster Resources: {}'.format(ray.cluster_resources()))
  33. import tensorflow as tf
  34. print('TensorFlow CUDA is available: {}'.format(tf.config.list_physical_devices('GPU')))
  35. import torch
  36. print('pyTorch CUDA is available: {}'.format(torch.cuda.is_available()))
  37. tune.run(
  38. 'PPO',
  39. queue_trials=True, # Don't use this parameter unless you know what you do.
  40. stop={'training_iteration': 10},
  41. config={
  42. 'env': SimpleCorridor,
  43. 'env_config': {'corridor_length': 5},
  44. 'num_gpus': 1,
  45. 'num_gpus_per_worker': 1,
  46. },
  47. )
  48. complete_time = datetime.utcnow()
  49. print('Python complete time: {} UTC'.format(complete_time))
  50. print('Python resource time: {} UTC'.format(complete_time - start_time))