seaborn-analyzer:回归、分类和分布的数据可视化
项目描述
seaborn 分析仪
使用 Seaborn 库的数据分析和可视化工具。
用法
使用 CustomPairPlot 类的示例
from seaborn_analyzer import CustomPairPlot
import seaborn as sns
titanic = sns.load_dataset("titanic")
cp = CustomPairPlot()
cp.pairanalyzer(titanic, hue='survived')
要求
seaborn-analyzer 0.2.13 需要
蟒蛇> = 3.6
麻木> = 1.20.3
熊猫 >=1.2.4
Matplotlib >=3.1.3
海生 >=0.11.1
Scipy >=1.6.3
Scikit-learn >=0.24.2
LightGBM >=3.3.2
安装 seaborn-analyzer
使用 pip 在PyPI上安装二进制轮子
$ pip install seaborn-analyzer
支持
可以在https://github.com/c60evaporator/seaborn-analyzer/issues报告错误
API 参考
seaborn-analyzer 中包含以下类和方法
CustomPairPlot 类
方法名称 |
概括 |
API 文档 |
例子 |
---|---|---|---|
配对分析仪 |
绘图对图同时包括散点图和相关系数矩阵。 |
组别
方法名称 |
概括 |
API 文档 |
例子 |
---|---|---|---|
plot_normality |
绘制正态性检验结果和 QQ 图。 |
||
fit_dist |
通过最大似然估计拟合分布并计算拟合分数。 |
类图类
方法名称 |
概括 |
API 文档 |
例子 |
---|---|---|---|
class_separator_plot |
绘制任何 scikit-learn 分类器的类分隔线。 |
||
class_proba_plot |
绘制任何 scikit-learn 分类器的类预测概率。 |
||
plot_roc_curve_multiclass |
在多类分类器中绘制 ROC 曲线。 |
||
roc_plot |
绘制具有交叉验证的 ROC 曲线。 |
regplot 类
方法名称 |
概括 |
API 文档 |
例子 |
---|---|---|---|
线性图 |
绘制线性回归线并计算皮尔逊相关系数。 |
||
回归预测真 |
绘制预测与任何 scikit-learn 回归器的真实散点图。 |
||
回归情节_1d |
用一维解释变量绘制任何 scikit-learn 回归器的回归线。 |
||
回归热图 |
绘制具有 2 到 4D 解释变量的任何 scikit-learn 回归器的回归热图。 |
||
平均图 |
通过折线图绘制一个解释变量与预测值之间的关系。 |
例子
CustomPairPlot.pairanalyzer
from seaborn_analyzer import CustomPairPlot
import seaborn as sns
titanic = sns.load_dataset("titanic")
cp = CustomPairPlot()
cp.pairanalyzer(titanic, hue='survived')
hist.plot_normality
from seaborn_analyzer import hist
from sklearn.datasets import load_boston
import pandas as pd
df = pd.DataFrame(load_boston().data, columns= load_boston().feature_names)
hist.plot_normality(df, x='LSTAT', norm_hist=False, rounddigit=5)
hist.fit_dist
from seaborn_analyzer import hist
from sklearn.datasets import load_boston
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
df = pd.DataFrame(load_boston().data, columns= load_boston().feature_names)
all_params, all_scores = hist.fit_dist(df, x='LSTAT', dist=['norm', 'gamma', 'lognorm', 'uniform'])
df_scores = pd.DataFrame(all_scores).T
df_scores
classplot.class_separator_plot
import seaborn as sns
from sklearn.svm import SVC
from seaborn_analyzer import classplot
iris = sns.load_dataset("iris")
clf = SVC()
classplot.class_separator_plot(clf, ['petal_width', 'petal_length'], 'species', iris)
classplot.class_proba_plot
import seaborn as sns
from sklearn.svm import SVC
from seaborn_analyzer import classplot
iris = sns.load_dataset("iris")
clf = SVC()
classplot.class_proba_plot(clf, ['petal_width', 'petal_length'], 'species', iris,
proba_type='imshow')
classplot.plot_roc_curve_multiclass
import seaborn as sns
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from seaborn_analyzer import classplot
# Load dataset
iris = sns.load_dataset("iris")
OBJECTIVE_VARIALBLE = 'species' # Objective variable
USE_EXPLANATORY = ['petal_width', 'petal_length', 'sepal_width', 'sepal_length'] # Explantory variables
y = iris[OBJECTIVE_VARIALBLE].values
X = iris[USE_EXPLANATORY].values
# Add random noise features
random_state = np.random.RandomState(0)
n_samples, n_features = X.shape
X = np.c_[X, random_state.randn(n_samples, 10 * n_features)]
# Plot ROC curve in multiclass classification
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, random_state=42)
estimator = SVC(probability=True, random_state=42)
classplot.plot_roc_curve_multiclass(estimator, X_train, y_train,
X_test=X_test, y_test=y_test)
plt.plot([0, 1], [0, 1], label='Chance', alpha=0.8,
lw=2, color='red', linestyle='--')
plt.legend(loc='lower right')
classplot.roc_plot
from lightgbm import LGBMClassifier
import seaborn as sns
import matplotlib.pyplot as plt
from seaborn_analyzer import classplot
# Load dataset
iris = sns.load_dataset("iris")
OBJECTIVE_VARIALBLE = 'species' # Objective variable
USE_EXPLANATORY = ['petal_width', 'petal_length', 'sepal_width', 'sepal_length'] # Explantory variables
y = iris[OBJECTIVE_VARIALBLE].values
X = iris[USE_EXPLANATORY].values
fit_params = {'verbose': 0,
'early_stopping_rounds': 10,
'eval_metric': 'rmse',
'eval_set': [(X, y)]
}
# Plot ROC curve with cross validation in multiclass classification
estimator = LGBMClassifier(random_state=42, n_estimators=10000)
fig, axes = plt.subplots(4, 1, figsize=(6, 24))
classplot.roc_plot(estimator, X, y, ax=axes, cv=3, fit_params=fit_params)
regplot.linear_plot
from seaborn_analyzer import regplot
import seaborn as sns
iris = sns.load_dataset("iris")
regplot.linear_plot(x='petal_length', y='sepal_length', data=iris)
regplot.regression_pred_true
import pandas as pd
from seaborn_analyzer import regplot
import seaborn as sns
from sklearn.linear_model import LinearRegression
df_temp = pd.read_csv(f'./sample_data/temp_pressure.csv')
regplot.regression_pred_true(LinearRegression(), x=['altitude', 'latitude'], y='temperature', data=df_temp)
regplot.regression_plot_1d
from seaborn_analyzer import regplot
import seaborn as sns
from sklearn.svm import SVR
iris = sns.load_dataset("iris")
regplot.regression_plot_1d(SVR(), x='petal_length', y='sepal_length', data=iris)
regplot.regression_heat_plot
import pandas as pd
from sklearn.linear_model import LinearRegression
from seaborn_analyzer import regplot
df_temp = pd.read_csv(f'./sample_data/temp_pressure.csv')
regplot.regression_heat_plot(LinearRegression(), x=['altitude', 'latitude'], y='temperature', data=df_temp)
regplot.average_plot
import seaborn as sns
from sklearn.svm import SVR
from seaborn_analyzer import regplot
iris = sns.load_dataset("iris")
svr = SVR()
features = ['petal_width', 'petal_length', 'sepal_width']
X = iris[features].values
y = iris['sepal_length'].values
regplot.average_plot(svr, X, y, x_colnames=features, cv=3)
接触
如果您对 seaborn-analyzer 有任何疑问或意见,请随时通过电子邮件与我联系:c60evaporator @ gmail 。com 或 Twitter:https ://twitter.com/c60evaporator 该项目托管在https://github.com/c60evaporator/seaborn-analyzer