(07-14-2023, 08:21 AM)Albert Wrote:You can have multiple instances of the API connection running in parallel. But one instance of RoboDK can process only one request at a time.
To speed up processing on RoboDK's end you can open multiple instances of RoboDK, using different API ports, and have different connections from the RoboDK API.
I was able to run it with the code below.
The code below is an excerpt.
But I will ask again because the initialization of the camera does not go well.
Code:
# Initialize Robolink with the specified API port
自我。RDK = Robolink(args=["-PORT=" + str(self.api_port)])
自我。project_directory = os.path.abspath(os.path.dirname(__file__))
自我。RDK.AddFile(os.path.join(self.project_directory, f"./rdk/Conveying{self.env_no}.rdk"))
----------------------------------------------------------------------------
class RoboDKEnv(gym.Env):
def __init__(self, config):
自我。env_no = config.worker_index
自我。api_port = 20500 + self.env_no
try:
自我。controller = RoboController.remote(env_no=self.env_no, api_port=self.api_port)
except Exception as e:
raise Exception(f"Failed to create controller: {e}")
自我。controller_initialized = True
print(f"Controller initialized for worker {self.env_no} on port {self.api_port}")
(07-17-2023, 07:10 PM)dylangouw Wrote:That script is meant as a summary. I didn't create a script based on it.I am modifying the RoboDK sample. It's not finished yet, but I think the script below will be helpful for you.(06-04-2023, 02:15 PM)SAT0001 Wrote:(06-01-2023, 11:50 AM)Sam Wrote:You can directly interface with RoboDK using our Python API to retrieive the required inputs for your model.OK,I am planning to try the following script. Please let me know if anything is wrong.
//www.sinclairbody.com/doc/en/PythonAPI/index.html
Code:import os
import numpy as np
import gym
from gym import spaces
import robodk
from robodk.robodk import Mat
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
import mlflow
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torchvision import transforms
from PIL import Image
from ray.rllib.agents.ppo import PPOTrainer
# Define the RoboDK environment
class RoboDKEnv(gym.Env):
def __init__(self, config):
自我。robodk = robodk.Robolink()
自我。robot = self.robodk.Item('Kuka iiwa')
自我。camera = self.robodk.Item('Camera')
自我。target = self.robodk.Item('Target')
自我。action_space = spaces.Discrete(6)
自我。observation_space =空间。箱(低= 0,高= 255, shape=(3, 224, 224), dtype=np.uint8)
def reset(self):
自我。robot.MoveJ(self.target)
img = self.capture_image()
return img
def step(self, action):
# Define the action space for the robot
action_space = [
(10, 0, 0, 0, 0, 0),
(-10, 0, 0, 0, 0, 0),
(0, 10, 0, 0, 0, 0),
(0, -10, 0, 0, 0, 0),
(0, 0, 10, 0, 0, 0),
(0, 0, -10, 0, 0, 0),
]
# Execute the selected action
自我。robot.MoveJ(self.robot.Pose() * robodk.transl(*action_space[action]))
# Capture the image after the action
img = self.capture_image()
# Calculate the reward based on the distance to the target
distance_to_target = self.robot.Pose().dist(self.target.Pose())
reward = -distance_to_target
# Check if the robot has reached the target
done = distance_to_target < 10
return img, reward, done, {}
def render(self, mode='human'):
# RoboDK模拟器更新视图
自我。robodk.Render()
def capture_image(self):
img = self.camera.CaptureImage()
img = Image.fromarray(img)
img = transforms.ToTensor()(img)
return img
# Main function
if __name__ == "__main__":
ray.init()
config = {
"env": RoboDKEnv,
"num_workers": 31,
"num_gpus": 2,
"num_cpus_per_worker": 1,
"framework": "torch",
"lr": tune.loguniform(1e-4, 1e-1),
"train_batch_size": 1000,
"sgd_minibatch_size": 128,
"num_sgd_iter": 10,
"rollout_fragment_length": 200,
"model": {
"custom_model": "ppo_model",
"custom_model_config": {
"num_actions": 6,
},
},
}
scheduler = ASHAScheduler(
metric="episode_reward_mean",
mode="max",
max_t=100,
grace_period=10,
reduction_factor=2,
)
reporter = CLIReporter(metric_columns=["episode_reward_mean", "training_iteration"])
result = tune.run(
PPOTrainer,
resources_per_trial={"cpu": 32, "gpu": 2},
config=config,
num_samples=32,
scheduler=scheduler,
progress_reporter=reporter,
)
best_trial = result.get_best_trial("episode_reward_mean", "max", "last")
print(f"Best trial config: {best_trial.config}")
print(f"Best trial final episode reward mean: {best_trial.last_result['episode_reward_mean']}")
best_checkpoint_dir = best_trial.checkpoint.value
best_agent = PPOTrainer(config=best_trial.config, env=RoboDKEnv)
best_agent.restore(best_checkpoint_dir)
# Save the trained model
torch.save(best_agent.get_policy().model.state_dict(), "best_model.pth")
This script sets up a reinforcement learning environment using the RoboDK API to control a Kuka iiwa multi-axis robot. The goal is to train the robot to efficiently grasp a target object detected by a camera attached to the robot's hand, move it to a target point, and place it on the target point. The script uses the following components:
- RoboDKEnv: A custom gym environment class that interfaces with the RoboDK simulator. It defines the action and observation spaces, as well as the step,reset,render, and capture_image functions.
- step function: The robot control logic is implemented in the step function. It defines the action space for the robot, executes the selected action, captures the image after the action, calculates the reward based on the distance to the target, and checks if the robot has reached the target.
- capture_image function: Captures an image using the camera attached to the robot's hand and converts it to a PyTorch tensor.
- render function: Updates the RoboDK simulator view to visualize the robot's movements.
- PPOTrainer: The script uses the Proximal Policy Optimization (PPO) algorithm from the Ray RLlib library to train the reinforcement learning agent.
- Training configuration: The configuration for the PPOTrainer includes the number of workers, GPUs, CPUs per worker, learning rate, batch sizes, and model configuration.
- ASHAScheduler: The Asynchronous Successive Halving Algorithm (ASHA) scheduler is used to optimize the training process by early stopping of low-performing trials.
- CLIReporter: A command-line reporter is used to display the training progress, including the episode reward mean and training iteration.
- Training loop: The script runs the PPOTrainer with the specified configuration, number of samples, scheduler, and progress reporter. It logs the training progress using mlflow and saves the best model.
Did you get this code to run? I am looking for a gym environment for RoboDK to run a NAF-algorithm on. I guess I could use your code to do so by replacing the PPOtrainer with the NAF-agent
Kind regards,
Dylan
Code:
# this script name is "train.py"
import os
import sys
import random
import argparse
import time
import pickle
import gymnasium as gym
from gymnasium.spaces import Discrete, Box
from gymnasium.spaces import Dict as GymDict
from collections import OrderedDict
import numpy as np
import cv2 as cv
import ray
from ray import air, tune
from ray.tune import Callback
from ray.rllib.env.env_context import EnvContext
from ray.rllib.utils.framework import try_import_torch
from ray.rllib.utils.test_utils import check_learning_achieved
from ray.tune.logger import pretty_print
from ray.tune.registry import get_trainable_cls
from ray.tune.registry import register_env
from ray.air.integrations.mlflow import MLflowLoggerCallback
from ray.air import RunConfig
from ray.air import ScalingConfig
from controller import RoboController, IMAGE_H, IMAGE_W
from torch_model_handler import ModelHandler
from MoveConveyor import RoboDKMoveConveyor
from PartsToConveyor import RoboDKToConveyor, NUM_PARTS
from PartsToPallet import RoboDKToPallet
import mlflow
DEBUG_MODE = False
DISPLAY_IMAGE = True
NUM_WORKERS = 3
NUM_CPUS_PER_WORKER = 1
NUM_GPUS = 1
TIME_LIMIT = 16.0
WINDOW_NAME = 'SICK VSPP-5F2113'
MAX_W, MAX_H = IMAGE_W * 3.0, IMAGE_H * 3.0 # mm
APPROACH = 100 # define default approach distance
SENSOR_VARIABLE = 'SENSOR' # station variable
torch, nn = try_import_torch()
class RoboDKEnv(gym.Env):
def __init__(self, config):
自我。env_no = config.worker_index
自我。api_port = 20500 + self.env_no
try:
自我。controller = RoboController.remote(env_no=self.env_no, api_port=self.api_port)
except Exception as e:
raise Exception(f"Failed to create controller: {e}")
自我。controller_initialized = True
print(f"Controller initialized for worker {self.env_no} on port {self.api_port}")
sys.stdout.flush()
ray.get(self.controller.initialize_conv.remote())
ray.get(self.controller.initialize_robotA.remote())
ray.get(self.controller.initialize_robotB.remote())
ray.get(self.controller.initialize_parts.remote())
ray.get(self.controller.initialize_camera.remote())
自我。conveyor = RoboDKMoveConveyor.remote(self.controller)
自我。robot_conv = RoboDKToConveyor.remote(self.controller)
自我。robot_pallet = RoboDKToPallet.remote (self.controller)
自我。conveyor_run_task = None
自我。robot_conv_run_task = None
# self.projects_info = ray.get(self.controller.get_projects_info.remote())
自我。image = None
self.background_img = None
自我。action_space = GymDict({
"pred_CAM_TX": Box(low=0, high=MAX_W, shape=(1,)),
"pred_CAM_TY": Box(low=0, high=MAX_H, shape=(1,)),
"pred_CAM_RZ": Box(low=-181, high=181, shape=(1,)),
})
自我。observation_space = GymDict({
"image": Box(low=0, high=255, shape=(IMAGE_H, IMAGE_W, 3), dtype=np.uint8),
})
自我。part_position = 0, 0, 0
自我。target_no = NUM_PARTS
自我。reward = 0.0
自我。reward_position = 0.0
自我。reward_posture = 0.0
自我。terminated = False
自我。truncated = False
自我。random_policy_data = []
自我。start_time = time.time()
自我。time_limit = TIME_LIMIT
自我。episode_time = 0.0
自我。keys = []
自我。step_called = False
自我。reset(seed=config.worker_index * config.num_workers)
def reset(self, *, seed=None, options=None):
print("Reset method called")
random.seed(seed)
自我。conveyor_run_task = None
自我。robot_conv_run_task = None
# ray.get(self.robot_conv.reset_object.remote())
自我。start_time = time.time() # Start time for the episode
自我。terminated = False
自我。truncated = False
自我。target_no = NUM_PARTS
自我。episode_time = 0.0
自我。random_policy_data = []
自我。keys = []
ray.get(self.controller.initialize_conv.remote())
ray.get(self.controller.initialize_robotA.remote())
ray.get(self.controller.initialize_robotB.remote())
ray.get(self.controller.initialize_parts.remote())
try:
# Move the robot to the target_inspect position
ray.get(self.controller.move_inspect.remote('robotB'))
self.background_img = ray.get(self.controller.get_image.remote())
if self.background_img is None:
self.background_img = np.zeros((IMAGE_H, IMAGE_W, 3))
自我。observation = OrderedDict([
('image', self.background_img),
])
return self.observation, {}
except Exception as e:
print(f"Error in reset(): {e}")
raise
def step(self, action):
# Run the conveyor
if self.conveyor_run_task==None:
if self.conveyor is None:
print("conveyor is not initialized")
else:
try:
自我。conveyor_run_task = self.conveyor.run.remote()
print("conveyor run task is running")
except Exception as e:
print(f"Error when trying to start conveyor: {e}")
if self.robot_conv_run_task==None:
if self.robot_conv is None:
print("robot_conv is not initialized")
else:
try:
自我。robot_conv_run_task = self.robot_conv.run.remote()
print("Robot conv run task is running")
except Exception as e:
print(f"Error when trying to start conveyor: {e}")
# Define weights for position and orientation errors
weight_position = 1.0
weight_orientation = 0.1
# Capture the target data
gt_CAM_TX, gt_CAM_TY, gt_CAM_RZ, self.image = ray.get(self.controller.get_target.remote(self.target_no))
# self.episode_time = time.time() - self.start_time
# self.terminated = self.check_terminated()
if self.image is None:
自我。image = np.zeros((IMAGE_H, IMAGE_W, 3))
else:
自我。target_no += -1
# Use the CNN model to predict CAM_TX, CAM_TY, and CAM_RZ from the image
pred_CAM_TX, pred_CAM_TY, pred_CAM_RZ = action["pred_CAM_TX"], action["pred_CAM_TY"], action["pred_CAM_RZ"]
# Calculate the reward based on the difference between predicted and ground truth values
position_diff = np.linalg.norm(np.array([pred_CAM_TX, pred_CAM_TY]) - np.array([gt_CAM_TX, gt_CAM_TY])).item()
posture_diff = np.abs(pred_CAM_RZ - gt_CAM_RZ).item()
# Set the desired accuracy for position and orientation
position_accuracy = 15 # +/- 15mm
posture_accuracy = 10 * np.pi / 180 # +/- 10 degrees (converted to radians)
# Calculate the reward based on the accuracy
自我。reward_position = float(self.reward_function(position_diff, 0, position_accuracy))
自我。reward_posture = float(self.reward_function(posture_diff, 0, posture_accuracy))
自我。reward = weight_position * self.reward_position + weight_orientation * self.reward_posture
# Check if the episode is terminated (you can define your own termination condition)
自我。terminated = self.check_terminated()
自我。truncated = self.terminated
自我。observation = OrderedDict([
("image", self.image), # 4D array for image
])
info = {}
# Return the next state, reward, terminated, truncated and any additional information
return self.observation, self.reward, self.terminated, self.truncated, info
def reward_function(self, pred, gt, threshold):
diff = np.abs(pred - gt)
if diff <= threshold:
return 1.0
else:
return -np.square(diff - threshold) / 100.0
def render(self):
cv.imshow(WINDOW_NAME + f"_{self.env_no}", self.image)
cv.waitKey(0)
def predict_values(self, image):
# Convert the image to grayscale
gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
# Apply a threshold to separate the objects from the background
_, thresholded = cv.threshold(gray, 127, 255, cv.THRESH_BINARY)
# Find contours in the thresholded image
contours, _ = cv.findContours(thresholded, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
# Calculate the area of each contour and find the largest one
largest_contour = max(contours, key=cv.contourArea)
# Get the bounding rectangle of the largest contour
x, y, w, h = cv.boundingRect(largest_contour)
# Crop the image to the bounding rectangle
cropped_image = image[y:y + h, x:x + w]
# Convert the cropped image to a PyTorch tensor
image_tensor = torch.from_numpy(cropped_image).float().unsqueeze(0).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
# Predict the position and orientation data using the trained model
with torch.no_grad():
output = self.model(image_tensor)
# Extract the predicted position and orientation data
CAM_TX, CAM_TY, CAM_RZ = output[0].item(), output[1].item(), output[2].item()
return CAM_TX, CAM_TY, CAM_RZ
def check_terminated(self):
# Check if all objects have been captured
if self.target_no == 0:
print("check_terminated 1")
return True
# Check if both reward_position and reward_posture are 1
elif self.reward_position==1 and self.reward_posture==1:
print("check_terminated 2")
return True
elif self.robot_conv.get_status.remote()=="COMPLETED":
print("check_terminated 4 - PartsToConveyor completed")
return True
else:
return False
def random_policy(self, num_steps):
for _ in range(num_steps):
action = self.action_space.sample()
observation, reward, terminated, info = self.step(action)
自我。random_policy_data.append((observation, action, reward, terminated))
if terminated:
自我。reset()
return self.random_policy_data
@staticmethod
def arg_parser():
parser = argparse.ArgumentParser()
parser.add_argument("--run", type=str, default="PPO", help="The RLlib-registered algorithm to use.")
parser.add_argument("--framework", choices=["tf", "tf2", "torch"], default="torch",
help="The DL framework specifier.")
parser.add_argument("--as-test", action="store_true",
help="Whether this script should be run as a test: --stop-reward must be achieved within --stop-timesteps AND --stop-iters.")
parser.add_argument("--stop-iters", type=int, default=5, help="Number of iterations to train.")
parser.add_argument("--stop-timesteps", type=int, default=100000, help="Number of timesteps to train.")
parser.add_argument("--stop-reward", type=float, default=0.1, help="Reward at which we stop training.")
parser.add_argument("--no-tune", action="store_true",
help="Run without Tune using a manual train loop instead. In this case, use PPO without grid search and no TensorBoard.")
parser.add_argument("--local-mode", action="store_true", help="Init Ray in local mode for easier debugging.")
return parser
def get_config(args):
config = (
get_trainable_cls(args.run)
.get_default_config()
.environment("RoboDKEnv")
.framework(args.framework)
.rollouts(num_rollout_workers=NUM_WORKERS)
.training(
model={
“conv_filters”:(
[32, [3, 3], 1],
[64, [3, 3], 1],
[128, [3, 3], 1],
],
}
)
)
return config
def env_creator(config: EnvContext) -> RoboDKEnv:
print(f"---------------Creating RoboDKEnv instance for worker {config.worker_index}")
return RoboDKEnv(config)
if __name__ == "__main__":
with open('status.txt', 'w') as status_file:
status_file.write('RUNNING')
try:
parser = RoboDKEnv.arg_parser()
args = parser.parse_args()
print(f"Running with following CLI options: {args}")
os.environ["RAY_PDB"] = "1"
ray.init(ignore_reinit_error=True, local_mode=DEBUG_MODE)
config = get_config(args)
register_env("RoboDKEnv", lambda config: env_creator(config))
stop = {
"training_iteration": args.stop_iters,
"timesteps_total": args.stop_timesteps,
"episode_reward_mean": args.stop_reward,
}
results = None
if args.no_tune:
if args.run != "PPO":
raise ValueError("Only support --run PPO with --no-tune.")
print("Running manual train loop without Ray Tune.")
config.lr = 1e-3
algo = config.build()
for _ in range(args.stop_iters):
result = algo.train()
print(pretty_print(result))
if (
result["timesteps_total"] >= args.stop_timesteps
or result["episode_reward_mean"] >= args.stop_reward
):
break
algo.stop()
else:
print("Training automatically with Ray Tune")
run_config = RunConfig( # Use RunConfig from ray.air
stop=stop,
name="mlflow_example",
callbacks=[
MLflowLoggerCallback(
tracking_uri="./mlruns",
experiment_name="example",
save_artifact=True,
)
],
)
tuner = tune.Tuner(
args.run,
param_space=config.to_dict(),
run_config=run_config,
)
results = tuner.fit()
print("tuner.fit")
if args.as_test:
print("Checking if learning goals were achieved")
check_learning_achieved(results, args.stop_reward)
if results:
with open('tuning_results.pkl', 'wb') as f:
pickle.dump(results, f)
ray.shutdown()
except Exception as e:
print(f"Exception occurred: {e}")
with open('status.txt', 'w') as status_file:
status_file.write('FAILED')
finally:
with open('status.txt', 'w') as status_file:
status_file.write('COMPLETED')
I used the following as a reference.
https://github.com/ray-project/ray/blob/...tom_env.py