删除 2.py

This commit is contained in:
ljy 2025-06-13 10:53:29 +08:00
parent 23fa046560
commit bd9bc1998f

145
2.py
View File

@ -1,145 +0,0 @@
import numpy as np
from scipy import stats
from collections import defaultdict
def detect_outliers_3sigma(series, threshold=3):
"""3倍标准差法检测离群点"""
mean = np.mean(series)
std = np.std(series)
upper_bound = mean + threshold * std
lower_bound = mean - threshold * std
outliers = (series > upper_bound) | (series < lower_bound)
outlier_indices = np.where(outliers)[0]
return {
'series': series,
'series_name': series.name if series.name else '序列',
'mean': mean,
'std': std,
'upper_bound': upper_bound,
'lower_bound': lower_bound,
'outliers': series[outliers],
'outlier_indices': outlier_indices,
'threshold': threshold
}
def detect_outliers_iqr(series, k=1.5):
"""四分位数法检测离群点"""
q1 = series.quantile(0.25)
q3 = series.quantile(0.75)
iqr = q3 - q1
upper_bound = q3 + k * iqr
lower_bound = q1 - k * iqr
outliers = (series > upper_bound) | (series < lower_bound)
outlier_indices = np.where(outliers)[0]
return {
'series': series,
'series_name': series.name if series.name else '序列',
'q1': q1,
'q3': q3,
'iqr': iqr,
'upper_bound': upper_bound,
'lower_bound': lower_bound,
'outliers': series[outliers],
'outlier_indices': outlier_indices,
'k': k
}
def detect_outliers_grubbs(series, alpha=0.05):
"""Grubbs法检测离群点"""
values = series.values
n = len(values)
outlier_indices = []
while True:
if n <= 2:
break
mean = np.mean(values)
std = np.std(values)
abs_dev = np.abs(values - mean)
max_idx = np.argmax(abs_dev)
g = abs_dev[max_idx] / std
t = stats.t.ppf(1 - alpha / (2 * n), n - 2)
critical = (n - 1) / np.sqrt(n) * np.sqrt(t**2 / (n - 2 + t**2))
if g > critical:
outlier_indices.append(series.index.get_loc(series.index[max_idx]))
values = np.delete(values, max_idx)
n -= 1
else:
break
upper_bound = mean + critical * std
lower_bound = mean - critical * std
return {
'series': series,
'series_name': series.name if series.name else '序列',
'mean': mean,
'std': std,
'upper_bound': upper_bound,
'lower_bound': lower_bound,
'outliers': series[outlier_indices],
'outlier_indices': outlier_indices,
'alpha': alpha
}
def detect_outliers_gesd(series, alpha=0.05, max_outliers=None):
"""GESD (广义极端学生化偏差) 方法检测离群点"""
values = series.copy()
n = len(values)
if max_outliers is None:
max_outliers = n // 10 # 默认最多检测10%的数据点为离群点
outlier_indices = []
r_values = []
lambda_values = []
for i in range(1, max_outliers + 1):
mean = np.mean(values)
std = np.std(values)
abs_dev = np.abs(values - mean)
max_idx = np.argmax(abs_dev)
r = abs_dev[max_idx] / std
r_values.append(r)
p = 1 - alpha / (2 * (n - i + 1))
t = stats.t.ppf(p, n - i - 1)
lambda_val = (n - i) * t / np.sqrt((n - i - 1 + t**2) * (n - i + 1))
lambda_values.append(lambda_val)
if r > lambda_val:
original_idx = series.index.get_loc(values.index[max_idx])
outlier_indices.append(original_idx)
values = values.drop(values.index[max_idx])
else:
break
if len(outlier_indices) > 0:
upper_bound = series.iloc[outlier_indices].max() + 0.1 * series.std()
lower_bound = series.iloc[outlier_indices].min() - 0.1 * series.std()
else:
upper_bound = series.mean() + 3 * series.std()
lower_bound = series.mean() - 3 * series.std()
return {
'series': series,
'series_name': series.name if series.name else '序列',
'mean': np.mean(series),
'std': np.std(series),
'upper_bound': upper_bound,
'lower_bound': lower_bound,
'outliers': series[outlier_indices],
'outlier_indices': outlier_indices,
'alpha': alpha,
'max_outliers': max_outliers,
'r_values': r_values,
'lambda_values': lambda_values
}