기타 셈플
Contents
기타 셈플#
direction-1#
import math
def reward_function(params):
waypoints = params['waypoints']
closest_waypoints = params['closest_waypoints']
heading = params['heading']
# 현재 에이전트와 가장 가까운 waypoint의 위치를 가져옵니다.
prev_point = waypoints[closest_waypoints[0]]
next_point = waypoints[closest_waypoints[1]]
# track_direction 을 계산
track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
# 각의 차이 (direction_diff) 계산
direction_diff = abs(track_direction - math.radians(heading))
# 보상 결정
reward = math.cos(direction_diff*3)
return float(reward)
direction-2 (더 멀리보기)#
import math
next_interval = 5
def reward_function(params):
waypoints = params['waypoints']
closest_waypoints = params['closest_waypoints']
heading = params['heading']
waypoints_count = len(waypoints) - 1
# 현재 에이전트와 가장 가까운 waypoint의 위치를 가져옵니다.
prev_point = waypoints[closest_waypoints[0]]
next_point = waypoints[(closest_waypoints[1]+next_interval) % waypoints_count]
# track_direction 을 계산
track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
# 각의 차이 (direction_diff) 계산
direction_diff = abs(track_direction - math.radians(heading))
# 보상 결정
reward = math.cos(direction_diff*3)
return float(reward)
Speed - 1#
import math
def reward_function(params):
waypoints = params['waypoints']
closest_waypoints = params['closest_waypoints']
heading = params['heading']
speed = params['speed']
# 현재 에이전트와 가장 가까운 waypoint의 위치를 가져옵니다.
prev_point = waypoints[closest_waypoints[0]]
next_point = waypoints[closest_waypoints[1]]
track_direction_vector = [next_point[0] - prev_point[0], next_point[1] - prev_point[1]]
heading_rad = math.radians(heading)
car_speed_vector = [math.cos(heading_rad) * speed, math.sin(heading_rad) * speed]
inner_product = track_direction_vector[0] * car_speed_vector[0] + track_direction_vector[1] * car_speed_vector[1]
# 보상 결정
reward = inner_product
return float(reward)
Speed -2#
import math
next_interval = 5
def reward_function(params):
waypoints = params['waypoints']
closest_waypoints = params['closest_waypoints']
heading = params['heading']
speed = params['speed']
steering_angle = params['steering_angle']
waypoints_count = len(waypoints) - 1
# 현재 에이전트와 가장 가까운 waypoint의 위치를 가져옵니다.
prev_point = waypoints[closest_waypoints[0]]
next_point = waypoints[(closest_waypoints[1]+next_interval) % waypoints_count]
track_direction_vector = [next_point[0] - prev_point[0], next_point[1] - prev_point[1]]
heading_rad = math.radians(heading)
steering_rad = math.radians(steering_angle)
car_speed_vector = [math.cos(heading_rad + steering_rad) * speed, math.sin(heading_rad + steering_rad) * speed]
inner_product = track_direction_vector[0] * car_speed_vector[0] + track_direction_vector[1] * car_speed_vector[1]
# 보상 결정
reward = inner_product ** 3
return float(reward)
Progress - 1#
참고: steps 값이 2부터 시작 됨
def reward_function(params):
track_length = params['track_length']
steps = params['steps']
progress = params['progress']
global progress_prev
if steps <= 2 :
progress_prev = progress
delta_progress = progress - progress_prev
delta_projection = track_length * (delta_progress/100)
delta_projection = max(min(delta_projection,1),-1)
reward = delta_projection ** 3
progress_prev = progress
return float(reward)
Progress - 2 (최단거리)#
confusion - 1#
import math
def reward_function(params):
########## [reward_wheels] all_wheels_on_track ##########
all_wheels_on_track = params['all_wheels_on_track']
if all_wheels_on_track :
reward_wheels = 1
else:
reward_wheels = 0
############## [reward_speed] speed #############
speed = params['speed']
reward_speed = speed / 2
############## [reward_direction] direction_diff #############
waypoints = params['waypoints']
closest_waypoints = params['closest_waypoints']
heading = params['heading']
# 현재 에이전트와 가장 가까운 waypoint의 위치를 가져옵니다.
next_point = waypoints[closest_waypoints[1]]
prev_point = waypoints[closest_waypoints[0]]
# track_direction 을 계산
track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
# 보상 결정
direction_diff = track_direction - math.radians(heading)
reward_direction = math.cos(direction_diff)
########### [reward weight sum] ############
reward = 1*reward_wheels + 2*reward_speed + 2*reward_direction
return float(reward)
Confusion -2#
import math
def distance_from_line(x, y, x1, y1, x2, y2):
return abs((y2-y1)*x - (x2-x1)*y + (x2*y1 - x1*y2)) / math.sqrt((y2-y1)**2 + (x2-x1)**2)
def reward_function(params):
waypoints2 = # Capston
waypoints = params['waypoints']
waypoints_len = len(waypoints) - 1
closest_waypoints = params['closest_waypoints']
closest_previous, closest_next = closest_waypoints
heading = params['heading']
heading_rad = math.radians(heading)
x_target_prev, y_target_prev, speed_target_prev, steering_target_prev = waypoints2[closest_previous]
x_target_front, y_target_front, speed_target_front, steering_target_front = waypoints2[closest_next]
speed_target_mean = (speed_target_prev + speed_target_front)/2
steering_target_mean = (steering_target_prev + steering_target_front)/2
steering_target_mean_rad = math.radians(steering_target_mean)
target_heading = math.atan2(y_target_front - y_target_prev, x_target_front - x_target_prev)
target_heading_rad = math.radians(target_heading)
x = params['x']
y = params['y']
track_length = params['track_length']
track_length_half = track_length / 2
speed = params['speed']
steering_angle = params['steering_angle']
steering_rad = math.radians(steering_angle)
distance_from_wp2_line = distance_from_line(x, y, x_target_prev, y_target_prev, x_target_front, y_target_front)
reward_position = max(0, 1 - (distance_from_wp2_line / track_length_half))
reward_heading = max(0, 1 - math.cos((target_heading_rad - heading_rad) * 3))
reward_speed = max(0, 1 - ((speed_target_mean - speed) / speed_target_mean) ** 2)
reward_steering = max(0, math.cos((steering_target_mean_rad - steering_rad) * 3))
reward = reward_position * reward_heading * reward_speed * reward_steering
return float(reward)
similar to supervised learning - 1#
import math
next_interval = 5
def reward_function(params):
x = params['x']
y = params['y']
steering_angle = params['steering_angle']
steering_angle_rad = math.radians(steering_angle)
heading = params['heading']
heading_rad = math.radians(heading)
closest_waypoints = params['closest_waypoints']
waypoints = params['waypoints']
waypoints_len = len(waypoints) - 1
# 타켓 웨이포인트 좌표 구하기
closest_previous, closest_next = closest_waypoints
target_waypoint_idx = (closest_next + next_interval) % waypoints_len
target_waypoint = waypoints[target_waypoint_idx]
x_target, y_target = target_waypoint
# 현재 위치에서 타켓 웨이포인트로의 벡터의 라디안 각 구하기
x_target_vector = x_target - x
y_target_vector = y_target - y
target_rad = math.atan2(y_target_vector, x_target_vector)
# 정답 값
target_steering_angle_rad = target_rad - heading_rad
# 손실과 보상
loss = math.sin(target_steering_angle_rad - steering_angle_rad) ** 2
reward = 1 - loss
return float(reward)