notes/pl/py/libfws/ml/pandas/docs/samples.txt
Ihar Hancharenka 5dff80e88e first
2023-03-27 16:52:17 +03:00

20 строки
706 B
Plaintext

import pandas as pd
import numpy as np
df0 = pd.read_csv('data/stat0.csv')
def process(df, val_col):
dfr = df.loc[:,['name', val_col]].sort_values(by=['name', val_col]).groupby('name', as_index=False).apply(lambda grp: grp.iloc[2:-2])
dfr.to_csv('data/{}-percentiled.csv'.format(val_col), index=False)
return dfr
process(df0, 'all').boxplot(by='name')
def process_agg(df, val_col):
dfr = df.loc[:,['name', val_col]].groupby('name', as_index=False).apply(lambda grp: grp.iloc[2:-2])
dfr = dfr.groupby('name').agg([np.min, np.median, np.max])
dfr.columns = dfr.columns.droplevel(0)
return dfr.rename(columns={'amin': 'min', 'amax': 'max'})
process_agg(df0, 'all').plot()