기타 셈플#

direction-1#


import math

def reward_function(params):
    waypoints = params['waypoints']
    closest_waypoints = params['closest_waypoints']
    heading = params['heading']
    
    # 현재 에이전트와 가장 가까운 waypoint의 위치를 가져옵니다.
    prev_point = waypoints[closest_waypoints[0]]
    next_point = waypoints[closest_waypoints[1]]

    # track_direction 을 계산 
    track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])

    # 각의 차이 (direction_diff) 계산
    direction_diff = abs(track_direction - math.radians(heading))
    
    # 보상 결정
    reward = math.cos(direction_diff*3)
    
    return float(reward)

direction-2 (더 멀리보기)#

import math

next_interval = 5
def reward_function(params):
    waypoints = params['waypoints']
    closest_waypoints = params['closest_waypoints']
    heading = params['heading']
    
    waypoints_count = len(waypoints) - 1
    
    # 현재 에이전트와 가장 가까운 waypoint의 위치를 가져옵니다.
    prev_point = waypoints[closest_waypoints[0]]
    next_point = waypoints[(closest_waypoints[1]+next_interval) % waypoints_count]

    # track_direction 을 계산 
    track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])

    # 각의 차이 (direction_diff) 계산
    direction_diff = abs(track_direction - math.radians(heading))
    
    # 보상 결정
    reward = math.cos(direction_diff*3)
    
    return float(reward)

Speed - 1#

import math

def reward_function(params):
    waypoints = params['waypoints']
    closest_waypoints = params['closest_waypoints']
    heading = params['heading']
    speed = params['speed']
    
    # 현재 에이전트와 가장 가까운 waypoint의 위치를 가져옵니다.
    prev_point = waypoints[closest_waypoints[0]]
    next_point = waypoints[closest_waypoints[1]]

    track_direction_vector = [next_point[0] - prev_point[0], next_point[1] - prev_point[1]]
    
    heading_rad = math.radians(heading)
    car_speed_vector = [math.cos(heading_rad) * speed, math.sin(heading_rad) * speed]

    inner_product = track_direction_vector[0] * car_speed_vector[0] + track_direction_vector[1] * car_speed_vector[1] 
    
    # 보상 결정
    reward = inner_product
    
    return float(reward)

Speed -2#

import math

next_interval = 5
def reward_function(params):
    waypoints = params['waypoints']
    closest_waypoints = params['closest_waypoints']
    heading = params['heading']
    speed = params['speed']
    steering_angle = params['steering_angle']
    
    waypoints_count = len(waypoints) - 1

    # 현재 에이전트와 가장 가까운 waypoint의 위치를 가져옵니다.
    prev_point = waypoints[closest_waypoints[0]]
    next_point = waypoints[(closest_waypoints[1]+next_interval) % waypoints_count]

    track_direction_vector = [next_point[0] - prev_point[0], next_point[1] - prev_point[1]]
    
    heading_rad = math.radians(heading)
    steering_rad = math.radians(steering_angle)
    car_speed_vector = [math.cos(heading_rad + steering_rad) * speed, math.sin(heading_rad + steering_rad) * speed]

    inner_product = track_direction_vector[0] * car_speed_vector[0] + track_direction_vector[1] * car_speed_vector[1] 
    
    # 보상 결정
    reward = inner_product ** 3
    
    return float(reward)

Progress - 1#

  • 참고: steps 값이 2부터 시작 됨

def reward_function(params):
    track_length = params['track_length']
    steps = params['steps']
    progress = params['progress']

    global progress_prev
    if steps <= 2 : 
        progress_prev = progress
    
    delta_progress = progress - progress_prev
    delta_projection = track_length * (delta_progress/100)
    delta_projection = max(min(delta_projection,1),-1)
    
    reward = delta_projection ** 3
    progress_prev = progress
    return float(reward)

Progress - 2 (최단거리)#

confusion - 1#

import math

def reward_function(params):  
    
     ########## [reward_wheels] all_wheels_on_track ########## 
    all_wheels_on_track = params['all_wheels_on_track']
    if all_wheels_on_track :
        reward_wheels = 1 
    else:
        reward_wheels = 0
    
    ############## [reward_speed] speed #############
    speed = params['speed']
    reward_speed = speed / 2
    
    ############## [reward_direction] direction_diff #############
    waypoints = params['waypoints']
    closest_waypoints = params['closest_waypoints']
    heading = params['heading']
    
    # 현재 에이전트와 가장 가까운 waypoint의 위치를 가져옵니다.
    next_point = waypoints[closest_waypoints[1]]
    prev_point = waypoints[closest_waypoints[0]]

    # track_direction 을 계산 
    track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
    
    # 보상 결정
    direction_diff = track_direction - math.radians(heading)
    reward_direction = math.cos(direction_diff)
        
    ########### [reward weight sum] ############ 
    reward = 1*reward_wheels + 2*reward_speed + 2*reward_direction
    
    return float(reward)

Confusion -2#

import math

def distance_from_line(x, y, x1, y1, x2, y2):
    return abs((y2-y1)*x - (x2-x1)*y + (x2*y1 - x1*y2)) / math.sqrt((y2-y1)**2 + (x2-x1)**2)

def reward_function(params):
    waypoints2 = # Capston
    waypoints = params['waypoints']
    waypoints_len = len(waypoints) - 1
    closest_waypoints = params['closest_waypoints']
    closest_previous, closest_next = closest_waypoints
    heading = params['heading']
    heading_rad = math.radians(heading)

    x_target_prev, y_target_prev, speed_target_prev, steering_target_prev = waypoints2[closest_previous]
    x_target_front, y_target_front, speed_target_front, steering_target_front = waypoints2[closest_next]
    speed_target_mean = (speed_target_prev + speed_target_front)/2
    steering_target_mean = (steering_target_prev + steering_target_front)/2
    steering_target_mean_rad  = math.radians(steering_target_mean)
    target_heading = math.atan2(y_target_front - y_target_prev, x_target_front - x_target_prev)
    target_heading_rad = math.radians(target_heading)

    x = params['x']
    y = params['y']

    track_length = params['track_length']
    track_length_half = track_length / 2

    speed = params['speed']
    steering_angle = params['steering_angle']
    steering_rad = math.radians(steering_angle)

    distance_from_wp2_line = distance_from_line(x, y, x_target_prev, y_target_prev, x_target_front, y_target_front)
    reward_position = max(0, 1 - (distance_from_wp2_line / track_length_half))
    
    reward_heading = max(0, 1 - math.cos((target_heading_rad - heading_rad) * 3))

    reward_speed = max(0, 1 - ((speed_target_mean - speed) / speed_target_mean) ** 2)

    reward_steering = max(0, math.cos((steering_target_mean_rad - steering_rad) * 3))

    reward = reward_position * reward_heading * reward_speed * reward_steering

    return float(reward)

similar to supervised learning - 1#

import math

next_interval = 5
def reward_function(params):
    x = params['x']
    y = params['y']
    steering_angle = params['steering_angle']
    steering_angle_rad = math.radians(steering_angle)
    heading = params['heading']
    heading_rad = math.radians(heading)
    closest_waypoints = params['closest_waypoints']
    waypoints = params['waypoints']
    waypoints_len = len(waypoints) - 1

    # 타켓 웨이포인트 좌표 구하기
    closest_previous, closest_next = closest_waypoints
    target_waypoint_idx = (closest_next + next_interval) % waypoints_len
    target_waypoint = waypoints[target_waypoint_idx]
    x_target, y_target = target_waypoint

    # 현재 위치에서 타켓 웨이포인트로의 벡터의 라디안 각 구하기
    x_target_vector = x_target - x
    y_target_vector = y_target - y
    target_rad = math.atan2(y_target_vector, x_target_vector)

    # 정답 값
    target_steering_angle_rad = target_rad - heading_rad

    # 손실과 보상
    loss = math.sin(target_steering_angle_rad - steering_angle_rad) ** 2
    reward = 1 - loss

    return float(reward)