hongdeng98
/
mao


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
							import os

import gym
import ray
from gym.spaces import Discrete, Box
from ray import tune


class SimpleCorridor(gym.Env):
    def __init__(self, config):
        self.end_pos = config['corridor_length']
        self.cur_pos = 0
        self.action_space = Discrete(2)
        self.observation_space = Box(0.0, self.end_pos, shape=(1,))

    def reset(self):
        self.cur_pos = 0
        return [self.cur_pos]

    def step(self, action):
        if action == 0 and self.cur_pos > 0:
            self.cur_pos -= 1
        elif action == 1:
            self.cur_pos += 1
        done = self.cur_pos >= self.end_pos
        return [self.cur_pos], 1 if done else 0, done, {}


if __name__ == '__main__':
    from datetime import datetime

    start_time = datetime.utcnow()

    print('Python start time: {} UTC'.format(start_time))

    if 'CLOUD_PROVIDER' in os.environ and os.environ['CLOUD_PROVIDER'] == 'Agit':
        from agit import ray_init

        ray_init()

        from agit import open

        dataset_path = 'agit://'
    else:
        ray.init()

        dataset_path = './'

    print('Ray Cluster Resources: {}'.format(ray.cluster_resources()))

    import tensorflow as tf

    print('TensorFlow CUDA is available: {}'.format(tf.config.list_physical_devices('GPU')))

    import torch

    print('pyTorch CUDA is available: {}'.format(torch.cuda.is_available()))

    with open(dataset_path + 'expert_data.csv', 'rb') as file:
        raw_data = file.read()

        print(raw_data)

    tune.run(
        'PPO',
        queue_trials=True,  # Don't use this parameter unless you know what you do.
        stop={'training_iteration': 10},
        config={
            'env': SimpleCorridor,
            'env_config': {'corridor_length': 5},
            'num_gpus': 1
        }
    )

    complete_time = datetime.utcnow()

    print('Python complete time: {} UTC'.format(complete_time))

    print('Python resource time: {} UTC'.format(complete_time - start_time))