역전파 알고리즘

코드와 함께 간단하게 정리

import numpy as np
from data_prep import features, targets, features_test, targets_test

# 재현성을 위한 랜덤 시드 설정
np.random.seed(21)

def sigmoid(x):
    """
    시그모이드 활성화 함수
    입력값을 0~1 사이의 값으로 변환
    Parameters:
        x: 입력 값
    Returns:
        sigmoid 함수를 적용한 결과 값
    """
    return 1 / (1 + np.exp(-x))

# 하이퍼파라미터 설정
n_hidden = 2      # 은닉층의 뉴런 개수
epochs = 900      # 전체 데이터셋을 학습할 횟수
learnrate = 0.005 # 학습률 - 가중치 업데이트 스텝 사이즈

# 데이터 차원 정보
n_records, n_features = features.shape
last_loss = None

# 가중치 초기화 - Xavier/Glorot 초기화 방법 사용
weights_input_hidden = np.random.normal(scale=1 / n_features ** .5,
                                      size=(n_features, n_hidden))
weights_hidden_output = np.random.normal(scale=1 / n_features ** .5,
                                       size=n_hidden)

# 학습 시작
for e in range(epochs):
    # 가중치 변화량을 저장할 행렬 초기화
    del_w_input_hidden = np.zeros(weights_input_hidden.shape)
    del_w_hidden_output = np.zeros(weights_hidden_output.shape)
    
    # 각 훈련 데이터에 대해 순전파와 역전파 수행
    for x, y in zip(features.values, targets):
        ### 순전파(Forward Pass) ###
        # 입력층 -> 은닉층
        hidden_input = np.dot(x, weights_input_hidden)    # 은닉층 입력 계산
        hidden_output = sigmoid(hidden_input)             # 은닉층 활성화
        
        # 은닉층 -> 출력층
        output = sigmoid(np.dot(hidden_output, weights_hidden_output))  # 최종 출력
        
        ### 역전파(Backward Pass) ###
        # 출력층 오차 계산
        error = y - output  # 실제값과 예측값의 차이
        
        # 출력층의 델타 계산
        # δ = error * output * (1 - output)
        output_error_term = error * output * (1 - output)
        
        # 은닉층으로 오차 전파
        # 은닉층이 출력층 오차에 기여한 정도 계산
        hidden_error = np.dot(output_error_term, weights_hidden_output)
        
        # 은닉층의 델타 계산
        # δ = hidden_error * hidden_output * (1 - hidden_output)
        hidden_error_term = hidden_error * hidden_output * (1 - hidden_output)
        
        # 가중치 변화량 누적
        del_w_hidden_output += output_error_term * hidden_output  # 은닉층->출력층 가중치
        del_w_input_hidden += hidden_error_term * x[:, None]     # 입력층->은닉층 가중치
    
    # 미니배치의 평균으로 가중치 업데이트
    weights_input_hidden += learnrate * del_w_input_hidden / n_records
    weights_hidden_output += learnrate * del_w_hidden_output / n_records
    
    # 훈련 과정 모니터링 - 매 10%마다 손실 출력
    if e % (epochs / 10) == 0:
        hidden_output = sigmoid(np.dot(x, weights_input_hidden))
        out = sigmoid(np.dot(hidden_output, weights_hidden_output))
        loss = np.mean((out - targets) ** 2)  # MSE 손실 계산
        
        # 손실이 증가하는지 확인 (경고 출력)
        if last_loss and last_loss < loss:
            print("Train loss: ", loss, "  WARNING - Loss Increasing")
        else:
            print("Train loss: ", loss)
        last_loss = loss

# 최종 테스트 세트 성능 평가
hidden = sigmoid(np.dot(features_test, weights_input_hidden))
out = sigmoid(np.dot(hidden, weights_hidden_output))
predictions = out > 0.5  # 0.5를 임계값으로 이진 분류
accuracy = np.mean(predictions == targets_test)
print("Prediction accuracy: {:.3f}".format(accuracy))
코드와 함께 간단하게 정리​

코드와 함께 간단하게 정리