(06-01-2023, 11:50 AM)Sam Wrote:You can directly interface with RoboDK using our Python API to retrieive the required inputs for your model.
//www.sinclairbody.com/doc/en/PythonAPI/index.html
OK,I am planning to try the following script. Please let me know if anything is wrong.
Code:
import os
import numpy as np
import gym
from gym import spaces
import robodk
from robodk.robodk import Mat
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
import mlflow
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torchvision import transforms
from PIL import Image
from ray.rllib.agents.ppo import PPOTrainer
# Define the RoboDK environment
class RoboDKEnv(gym.Env):
def __init__(self, config):
self.robodk = robodk.Robolink()
self.robot = self.robodk.Item('Kuka iiwa')
self.camera = self.robodk.Item('Camera')
self.target = self.robodk.Item('Target')
self.action_space = spaces.Discrete(6)
self.observation_space = spaces.Box(low=0, high=255, shape=(3, 224, 224), dtype=np.uint8)
def reset(self):
self.robot.MoveJ(self.target)
img = self.capture_image()
return img
def step(self, action):
# Define the action space for the robot
action_space = [
(10, 0, 0, 0, 0, 0),
(-10, 0, 0, 0, 0, 0),
(0, 10, 0, 0, 0, 0),
(0, -10, 0, 0, 0, 0),
(0, 0, 10, 0, 0, 0),
(0, 0, -10, 0, 0, 0),
]
# Execute the selected action
self.robot.MoveJ(self.robot.Pose() * robodk.transl(*action_space[action]))
# Capture the image after the action
img = self.capture_image()
# Calculate the reward based on the distance to the target
distance_to_target = self.robot.Pose().dist(self.target.Pose())
reward = -distance_to_target
# Check if the robot has reached the target
done = distance_to_target < 10
return img, reward, done, {}
def render(self, mode='human'):
# Update the RoboDK simulator view
self.robodk.Render()
def capture_image(self):
img = self.camera.CaptureImage()
img = Image.fromarray(img)
img = transforms.ToTensor () (img)
return img
# Main function
if __name__ == "__main__":
ray.init()
config = {
"env": RoboDKEnv,
"num_workers": 31,
"num_gpus": 2,
"num_cpus_per_worker": 1,
"framework": "torch",
"lr": tune.loguniform(1e-4, 1e-1),
"train_batch_size": 1000,
"sgd_minibatch_size": 128,
"num_sgd_iter": 10,
"rollout_fragment_length": 200,
"model": {
"custom_model": "ppo_model",
"custom_model_config": {
"num_actions": 6,
},
},
}
scheduler = ASHAScheduler(
metric="episode_reward_mean",
mode="max",
max_t=100,
grace_period=10,
reduction_factor=2,
)
回购rter = CLIReporter(metric_columns=["episode_reward_mean", "training_iteration"])
result = tune.run(
PPOTrainer,
resources_per_trial={"cpu": 32, "gpu": 2},
config=config,
num_samples=32,
scheduler=scheduler,
progress_reporter=reporter,
)
best_trial = result.get_best_trial(“episode_reward_mean", "max", "last")
print(f"Best trial config: {best_trial.config}")
print(f"Best trial final episode reward mean: {best_trial.last_result['episode_reward_mean']}")
best_checkpoint_dir = best_trial.checkpoint.value
best_agent = PPOTrainer(config=best_trial.config, env=RoboDKEnv)
best_agent.restore(best_checkpoint_dir)
# Save the trained model
torch.save(best_agent.get_policy().model.state_dict(), "best_model.pth")
This script sets up a reinforcement learning environment using the RoboDK API to control a Kuka iiwa multi-axis robot. The goal is to train the robot to efficiently grasp a target object detected by a camera attached to the robot's hand, move it to a target point, and place it on the target point. The script uses the following components:
- RoboDKEnv: A custom gym environment class that interfaces with the RoboDK simulator. It defines the action and observation spaces, as well as the step,reset,render, and capture_image functions.
- step function: The robot control logic is implemented in the step function. It defines the action space for the robot, executes the selected action, captures the image after the action, calculates the reward based on the distance to the target, and checks if the robot has reached the target.
- capture_image function: Captures an image using the camera attached to the robot's hand and converts it to a PyTorch tensor.
- render function: Updates the RoboDK simulator view to visualize the robot's movements.
- PPOTrainer: The script uses the Proximal Policy Optimization (PPO) algorithm from the Ray RLlib library to train the reinforcement learning agent.
- Training configuration: The configuration for the PPOTrainer includes the number of workers, GPUs, CPUs per worker, learning rate, batch sizes, and model configuration.
- ASHAScheduler: The Asynchronous Successive Halving Algorithm (ASHA) scheduler is used to optimize the training process by early stopping of low-performing trials.
- CLIReporter: A command-line reporter is used to display the training progress, including the episode reward mean and training iteration.
- Training loop: The script runs the PPOTrainer with the specified configuration, number of samples, scheduler, and progress reporter. It logs the training progress using mlflow and saves the best model.