Pandas

Mass setting some rows in pandas

df.iloc[start_row:end_row,col_index] = 0

Reset index

# when groupby doesn't work (or when in doubt), reset index:
df.reset_index(inplace=True)

Aggregate features

# aggregate features (mainly count)
def add_aggregate_features(df, groupby_cols, aggregate_on, rename=None):
    '''
    groupby_cols is a list to group by
    aggregate_on dictionary, each entry is the kv pair column_name: aggregation (string, function, or list)
        rename is a dictionary of old names to new names
    '''
    all_cols = groupby_cols + list(aggregate_on.keys())
    group = df[all_cols].groupby([cols]).agg(aggregate_on).reset_index()
    group.columns = ['_'.join(col).strip() for col in group.columns.values]
    if rename:
        group.rename(index=str, columns=rename)
    df = df.merge(group, on=[groupby_cols], how='left')

def add_count(df, cols, target):
    colname = '_'.join(a) + '_count'
    add_aggregate_features(df, cols, {target: 'count'}, {target+"_count": colname})

Another way to add count

Last updated

Was this helpful?