network.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. import numpy as np
  2. import pandas as pd
  3. import torch
  4. import torch.nn as nn
  5. from torch.utils.data import Dataset, DataLoader, TensorDataset
  6. from sklearn.preprocessing import StandardScaler
  7. from sklearn.model_selection import train_test_split
  8. import matplotlib.pyplot as plt
  9. # 1. 加载和预处理数据
  10. def load_data(file_path):
  11. df = pd.read_csv(file_path, header=None)
  12. # 分离特征和目标列(最后1列是目标值)
  13. features = df.iloc[:, 1:-1].values # 忽略第一列索引
  14. target = df.iloc[:, -1].values.reshape(-1, 1)
  15. return features, target
  16. # 2. 数据标准化
  17. def scale_data(features, target):
  18. feature_scaler = StandardScaler()
  19. target_scaler = StandardScaler()
  20. scaled_features = feature_scaler.fit_transform(features)
  21. scaled_target = target_scaler.fit_transform(target)
  22. return scaled_features, scaled_target, feature_scaler, target_scaler
  23. # 3. 创建时间序列数据集
  24. def create_sequences(features, target, seq_length):
  25. xs, ys = [], []
  26. for i in range(len(features) - seq_length):
  27. x = features[i:i + seq_length]
  28. y = target[i + seq_length - 1]
  29. xs.append(x)
  30. ys.append(y)
  31. return np.array(xs), np.array(ys)
  32. # 4. 定义GRU模型
  33. class GRUModel(nn.Module):
  34. def __init__(self, input_size, hidden_size, num_layers, output_size):
  35. super(GRUModel, self).__init__()
  36. self.gru = nn.GRU(
  37. input_size=input_size,
  38. hidden_size=hidden_size,
  39. num_layers=num_layers,
  40. batch_first=True
  41. )
  42. self.fc = nn.Linear(hidden_size, output_size)
  43. def forward(self, x):
  44. out, _ = self.gru(x) # out: (batch, seq, hidden)
  45. out = out[:, -1, :] # 取序列最后一个输出
  46. out = self.fc(out)
  47. return out
  48. # 5. 训练函数
  49. def train_model(model, train_loader, val_loader, criterion, optimizer, epochs):
  50. train_losses, val_losses = [], []
  51. for epoch in range(epochs):
  52. # 训练阶段
  53. model.train()
  54. train_loss = 0
  55. for X_batch, y_batch in train_loader:
  56. optimizer.zero_grad()
  57. outputs = model(X_batch)
  58. loss = criterion(outputs, y_batch)
  59. loss.backward()
  60. optimizer.step()
  61. train_loss += loss.item()
  62. # 验证阶段
  63. model.eval()
  64. val_loss = 0
  65. with torch.no_grad():
  66. for X_val, y_val in val_loader:
  67. outputs = model(X_val)
  68. loss = criterion(outputs, y_val)
  69. val_loss += loss.item()
  70. # 记录损失
  71. avg_train_loss = train_loss / len(train_loader)
  72. avg_val_loss = val_loss / len(val_loader)
  73. train_losses.append(avg_train_loss)
  74. val_losses.append(avg_val_loss)
  75. print(f'Epoch [{epoch + 1}/{epochs}], '
  76. f'Train Loss: {avg_train_loss:.6f}, '
  77. f'Val Loss: {avg_val_loss:.6f}')
  78. return train_losses, val_losses
  79. # 主程序
  80. if __name__ == "__main__":
  81. # 参数设置
  82. SEQ_LENGTH = 10
  83. BATCH_SIZE = 16
  84. HIDDEN_SIZE = 64
  85. NUM_LAYERS = 2
  86. EPOCHS = 100
  87. LR = 0.001
  88. # 1. 加载数据
  89. features, target = load_data('output.csv')
  90. # 2. 数据标准化
  91. scaled_features, scaled_target, feature_scaler, target_scaler = scale_data(features, target)
  92. # 3. 创建序列
  93. X, y = create_sequences(scaled_features, scaled_target, SEQ_LENGTH)
  94. # 4. 划分数据集
  95. X_train, X_temp, y_train, y_temp = train_test_split(
  96. X, y, test_size=0.3, shuffle=False
  97. )
  98. X_val, X_test, y_val, y_test = train_test_split(
  99. X_temp, y_temp, test_size=0.5, shuffle=False
  100. )
  101. # 转换为PyTorch张量
  102. X_train = torch.tensor(X_train, dtype=torch.float32)
  103. y_train = torch.tensor(y_train, dtype=torch.float32)
  104. X_val = torch.tensor(X_val, dtype=torch.float32)
  105. y_val = torch.tensor(y_val, dtype=torch.float32)
  106. X_test = torch.tensor(X_test, dtype=torch.float32)
  107. y_test = torch.tensor(y_test, dtype=torch.float32)
  108. # 创建数据加载器
  109. train_dataset = TensorDataset(X_train, y_train)
  110. val_dataset = TensorDataset(X_val, y_val)
  111. test_dataset = TensorDataset(X_test, y_test)
  112. train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
  113. val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
  114. test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
  115. # 5. 初始化模型
  116. input_size = X_train.shape[2] # 特征数量
  117. output_size = 1
  118. model = GRUModel(input_size, HIDDEN_SIZE, NUM_LAYERS, output_size)
  119. # 损失函数和优化器
  120. criterion = nn.MSELoss()
  121. optimizer = torch.optim.Adam(model.parameters(), lr=LR)
  122. # 6. 训练模型
  123. train_losses, val_losses = train_model(
  124. model, train_loader, val_loader, criterion, optimizer, EPOCHS
  125. )
  126. # 7. 评估测试集
  127. model.eval()
  128. test_loss = 0
  129. predictions = []
  130. actuals = []
  131. with torch.no_grad():
  132. for X_test_batch, y_test_batch in test_loader:
  133. outputs = model(X_test_batch)
  134. loss = criterion(outputs, y_test_batch)
  135. test_loss += loss.item()
  136. # 保存预测结果
  137. predictions.extend(outputs.numpy())
  138. actuals.extend(y_test_batch.numpy())
  139. avg_test_loss = test_loss / len(test_loader)
  140. print(f'Test Loss: {avg_test_loss:.6f}')
  141. # 8. 反标准化结果
  142. predictions = np.array(predictions).reshape(-1, 1)
  143. actuals = np.array(actuals).reshape(-1, 1)
  144. pred_inverse = target_scaler.inverse_transform(predictions)
  145. actual_inverse = target_scaler.inverse_transform(actuals)
  146. # 9. 可视化结果
  147. plt.figure(figsize=(15, 6))
  148. plt.subplot(1, 2, 1)
  149. plt.plot(train_losses, label='Train Loss')
  150. plt.plot(val_losses, label='Validation Loss')
  151. plt.title('Training and Validation Loss')
  152. plt.xlabel('Epochs')
  153. plt.ylabel('MSE Loss')
  154. plt.legend()
  155. plt.grid(True)
  156. plt.subplot(1, 2, 2)
  157. plt.plot(actual_inverse, label='Actual Values', alpha=0.7)
  158. plt.plot(pred_inverse, label='Predicted Values', linestyle='--')
  159. plt.title('Actual vs Predicted Values')
  160. plt.xlabel('Time Steps')
  161. plt.ylabel('Weight Values')
  162. plt.legend()
  163. plt.grid(True)
  164. plt.tight_layout()
  165. plt.savefig('gru_results.png')
  166. # 10. 计算性能指标
  167. from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
  168. mae = mean_absolute_error(actual_inverse, pred_inverse)
  169. rmse = np.sqrt(mean_squared_error(actual_inverse, pred_inverse))
  170. r2 = r2_score(actual_inverse, pred_inverse)
  171. print(f'Performance Metrics:')
  172. print(f'MAE: {mae:.4f}')
  173. print(f'RMSE: {rmse:.4f}')
  174. print(f'R²: {r2:.4f}')