from pandas import *
from numpy import *
df = DataFrame({
'sp' : ['MM1', 'MM1', 'MM1', 'MM2', 'MM2', 'MM2', 'MM4', 'MM4', 'MM4'],
'mt' : ['S1', 'S1', 'S3', 'S3', 'S4', 'S4', 'S2', 'S2', 'S2'],
'val' : ['a', 'n', 'cb', 'mk', 'bg', 'dgb', 'rd', 'cb', 'uyi'],
'count' : [3,2,5,8,10,1,2,2,7]
})
df
df_grouped = df.groupby(['sp', 'mt']).agg({'count':'max'})
df_grouped
df_grouped = df_grouped.reset_index()
df_grouped = df_grouped.rename(columns={'count':'count_max'})
df = merge(df, df_grouped, how='left', on=['sp', 'mt'])
df
df = df[df['count'] == df['count_max']]
df
df_grouped = df.groupby(['sp','mt']).agg({'count':'max'})
df_grouped
df_g2=df_grouped(['mt','count']).agg({'cnt2':'max'})
df_g2
df_grouped.rename(columns={'count':'count_max'})
#A little Excel type stuff for Pandas
#Add a Row num
df['count'] = pandas.Series(range(len(df)))
#Add up a running total
df['running total'] = df['count'].cumsum()
#Difference from a scalar
df['diff'] = 2014 - df['count']
#Moving Average
df['moving average'] = df['running total'] / df['count'].astype('float')
#Perform If type statements (If 'moving average > 3' then 1)
df['new column'] = 0
mask = df['moving average'] >= 3
df.loc[mask, 'new column'] = 1
df