基于逻辑回归-决策树的TE过程故障诊断及可解释性（Python）

大家好，欢迎来到IT知识分享网。

import pyreadr import matplotlib.pyplot as plt import seaborn as sns import numpy as np import pandas as pd

# Only Training data is used df_FaultFree = pyreadr.read_r(r'The TEP Dataset\TEP_FaultFree_Training.RData')['fault_free_training'] df_Faulty = pyreadr.read_r(r'The TEP Dataset\TEP_Faulty_Training.RData')['faulty_training']

DF = pd.concat([df_FaultFree,df_Faulty]) # Join both fault free and faulty dataset DF.head()

Scale the dataset

from sklearn.preprocessing import StandardScaler, MinMaxScaler sc = StandardScaler() sc.fit(df_FaultFree.iloc[:,3:])

StandardScaler()

Select a subset of the Entire Dataset

# Select a subset of the data from simulation runs 1 to 10, and filter out fault numbers 9 and 15 reduced_data = DF.loc[(DF['simulationRun'] >= 1) & (DF['simulationRun'] <= 10 reduced_data='reduced_data[reduced_data['faultNumber']' reduced_data='reduced_data[reduced_data['faultNumber']' apply scaling transformation to selected features of the filtered dataset x='sc.transform(reduced_data[reduced_data['sample']'> 20].iloc[:, 3:]) # Extract the fault numbers that occur after the 20th sample Y = reduced_data[reduced_data['sample'] > 20]['faultNumber'].values

Add a random number coloumn to X as a control feature

control_col = np.random.random(len(X)).reshape(-1,1) X = np.hstack((X,control_col ))

from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

x_train.shape

(72960, 53)

Train the Logistic Regression

from sklearn.linear_model import LogisticRegression LogRegCls = LogisticRegression(max_iter=1000,random_state=0,multi_class='ovr').fit(x_train, y_train)

LogRegCls.score(x_test,y_test)

0.5965

LogRegCls.coef_.shape

(19, 53)

feature_importance = LogRegCls.coef_ feature_name = np.array(reduced_data.columns[3:]) feature_name = np.append(feature_name, "control_feature") Coeff_df = pd.DataFrame(data=feature_importance,columns=feature_name)

Feature Imporatnace for each fault type

# Create barplots for each row of the DataFrame for i in range(len(Coeff_df)): row_data = Coeff_df.iloc[i] plt.figure(figsize=(25, 5)) sns.barplot(x=row_data.index, y=row_data.values) plt.title(f"Fault class: {LogRegCls.classes_[i]}") plt.xticks(rotation=90) plt.show()

Combined feature Importance of all the Fault classes

row_data = Coeff_df.abs().mean() row_data = row_data.sort_values(ascending=False) plt.figure(figsize=(25, 5)) sns.barplot(x=row_data.index, y=row_data.values) plt.title(f"Combine feature Importance") plt.xticks(rotation=90) plt.show()

Train The Decision Tree

from sklearn.tree import DecisionTreeClassifier DT = DecisionTreeClassifier(random_state=0,max_depth=50)

DT.fit(x_train, y_train)

DecisionTreeClassifier(max_depth=50, random_state=0)

DT.score(x_train,y_train)

0.10526

DT.score(x_test,y_test)

0.03509

# get feature importances importances = DT.feature_importances_ feature_name = np.array(reduced_data.columns[3:]) feature_name = np.append(feature_name,"control_feature") # Create a DataFrame from the two arrays feature_importance = pd.DataFrame({'feature_name': feature_name, 'importances': importances}) # Sort the DataFrame by importances in descending order feature_importance = feature_importance.sort_values('importances', ascending=False)

plt.figure(figsize=(25, 5)) sns.barplot(x=feature_importance.feature_name , y=feature_importance.importances) plt.title(f"Decision Tree feature Importance") plt.xticks(rotation=90) plt.show()

Visualize the Decision Tree as a bunch of If… Else rule structure

from sklearn import tree fig = plt.figure(figsize=(25,20)) _ = tree.plot_tree(DT, feature_names=reduced_data.columns[3:], filled=True)

知乎学术咨询：

https://www.zhihu.com/consult/people/?isMe=1

担任《Mechanical System and Signal Processing》《中国电机工程学报》等期刊审稿专家，擅长领域：信号滤波/降噪，机器学习/深度学习，时间序列预分析/预测，设备故障诊断/缺陷检测/异常检测。

分割线分割线

心电信号的傅里叶变换滤波（L1和L2傅里叶变换）、小波处理和PQRST波检测等（Python,Ipynb文件）

import numpy as np import matplotlib.pyplot as plt import scipy from scipy.io import loadmat from scipy.optimize import minimize from scipy.optimize import linprog mat_data = loadmat('s0017lrem.mat') ecg_signal = mat_data['val'][0].squeeze() plt.figure(figsize=(12, 6)) plt.plot(ecg_signal, label='ECG') plt.title('S0017lrem Signal') plt.xlabel('Sample') plt.ylabel('Amplitude') plt.legend() plt.tight_layout() plt.show() mat_data = loadmat('08378m.mat') ecg_signal = mat_data['val'][0].squeeze() plt.figure(figsize=(12, 6)) plt.plot(ecg_signal, label='ECG') plt.title('S0017lrem Signal') plt.xlabel('Sample') plt.ylabel('Amplitude') plt.legend() plt.tight_layout() plt.show()

showing original signal and L1 and L2

wavelet + butterworth PQRST Locating

QRS finding for 08378m

wavelets

def plot_wavelet_level_3(signal, fs): # Perform the wavelet transform at level 3 coeffs = pywt.wavedec(signal, 'sym4', level=4) # Plot only the approximation coefficients at level 3 plt.figure(figsize=(15, 6)) plt.plot(coeffs[0], label="Approximation Coefficients (Level 4)") plt.title("Wavelet Approximation at Level 4") plt.xlabel('Samples') plt.ylabel('Amplitude') plt.legend() plt.grid(True, alpha=0.3) plt.tight_layout() plt.show() # Load ECG data from a .mat file mat_data = loadmat('s0017lrem.mat') fs = 1000 # Sampling frequency ecg_signal = mat_data['val'][0].squeeze() # Plot the wavelet transform of the ECG signal plot_wavelet_level_3(ecg_signal, fs)

fast and slow components example

完整代码：

https://mbd.pub/o/bread/mbd-Z52blZlr

基于维纳滤波和卡尔曼滤波的ECG信号降噪（MATLAB）

完整代码：

https://mbd.pub/o/bread/mbd-Z52blpxq

应用机器学习算法，包括SES、ARIMA、ANN和LSTM，以预测以太坊加密货币的价格（（Python,Ipynb文件）

研究分析了不同模型在预测未来1天、10天和30天价格方面的表现，旨在提高预测的准确性和可靠性，同时为金融分析和决策提供见解和指导。

import numpy as np import pandas as pd from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, r2_score, mean_squared_error, root_mean_squared_error import matplotlib.pylab as plt from sklearn.preprocessing import StandardScaler from sklearn.model_selection import GridSearchCV from scikeras.wrappers import KerasRegressor import tensorflow as tf from keras._tf_keras.keras.models import Model from keras._tf_keras.keras.layers import Dense, Input, Dropout from keras._tf_keras.keras.regularizers import l2 np.random.seed(123) tf.random.set_seed(123)

完整代码：

https://mbd.pub/o/bread/mbd-Z52blp1w

采用XGBoost集成方案提高UHPC的抗压强度预测（Python）

import pandas as pd import numpy as np import seaborn as sns from xgboost import XGBRegressor import matplotlib.pyplot as plt import shap from sklearn.ensemble import AdaBoostRegressor from sklearn.ensemble import BaggingRegressor from sklearn.ensemble import VotingRegressor from sklearn.ensemble import StackingRegressor from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.model_selection import KFold, cross_validate from sklearn.metrics import mean_squared_error, mean_absolute_error from sklearn.inspection import PartialDependenceDisplay

压缩包=数据＋代码＋参考文献

完整代码：

https://mbd.pub/o/bread/mbd-Z52bl5lp

免责声明：本站所有文章内容,图片，视频等均是来源于用户投稿和互联网及文摘转载整编而成，不代表本站观点，不承担相关法律责任。其著作权各归其原作者或其出版社所有。如发现本站有涉嫌抄袭侵权/违法违规的内容,侵犯到您的权益，请在线联系站长,一经查实,本站将立刻删除。本文来自网络,若有侵权，请联系删除，如若转载，请注明出处：https://haidsoft.com/173385.html

基于逻辑回归-决策树的TE过程故障诊断及可解释性（Python）

Scale the dataset

Select a subset of the Entire Dataset

Add a random number coloumn to X as a control feature

Train the Logistic Regression

Feature Imporatnace for each fault type

Combined feature Importance of all the Fault classes

Train The Decision Tree

Visualize the Decision Tree as a bunch of If… Else rule structure

showing original signal and L1 and L2

wavelet + butterworth PQRST Locating

QRS finding for 08378m

wavelets

fast and slow components example

相关推荐

发表回复