Pandas
Mass setting some rows in pandas
df.iloc[start_row:end_row,col_index] = 0Reset index
# when groupby doesn't work (or when in doubt), reset index:
df.reset_index(inplace=True)Aggregate features
# aggregate features (mainly count)
def add_aggregate_features(df, groupby_cols, aggregate_on, rename=None):
'''
groupby_cols is a list to group by
aggregate_on dictionary, each entry is the kv pair column_name: aggregation (string, function, or list)
rename is a dictionary of old names to new names
'''
all_cols = groupby_cols + list(aggregate_on.keys())
group = df[all_cols].groupby([cols]).agg(aggregate_on).reset_index()
group.columns = ['_'.join(col).strip() for col in group.columns.values]
if rename:
group.rename(index=str, columns=rename)
df = df.merge(group, on=[groupby_cols], how='left')
def add_count(df, cols, target):
colname = '_'.join(a) + '_count'
add_aggregate_features(df, cols, {target: 'count'}, {target+"_count": colname})Another way to add count
Print a groupby object
groupby objectLast updated
Was this helpful?