Skip to content Skip to sidebar Skip to footer

Melt And Merge On Substring - Python & Pandas

I have data which has data like id name model_# ms bp1 cd1 sf1 sa1 rq1 bp2 cd2 sf2 sa2 rq2 ... 1 John 23984 1 23 234 124

Solution 1:

Solution

Set the index... Use a clever column groupby... Another clever function to apply...

d1 = df.set_index(['id', 'name', 'model_#', 'ms'])

defmelt_(df):
    id_vars = df.index.names
    return df.reset_index().melt(id_vars=id_vars).set_index(id_vars)


d2 = d1.groupby(d1.columns.str.extract('(\D+)', expand=False), axis=1).apply(melt_)

d2.columns = d2.columns.swaplevel(0, 1).map('_'.join)
d2.reset_index()

   id  name  model_#  ms variable_bp  value_bp variable_cd  value_cd variable_rq  value_rq variable_sa  value_sa variable_sf  value_sf01  John    239841         bp1        23         cd1       234         rq1       252         sa1        25         sf1       12412  John    239842         bp1       234         cd1       234         rq1       262         sa1        62         sf1       24221  John    239841         bp2       252         cd2        62         rq2       234         sa2       234         sf2       19432  John    239842         bp2       622         cd2       262         rq2       262         sa2        26         sf2       622

Overly Functionalized

e = lambda d, n: dict(zip(n, d.dtypes))
i = lambda d, n: pd.DataFrame(d.values, d.index, n).astype(e(d, n))
h = lambda d: i(d, d.columns.map(fmt)).reset_index()
m = lambda d: d.reset_index().melt(cols).set_index(cols)
fmt = '{0[1]}_{0[0]}'.format

cols = ['id', 'name', 'model_#', 'ms']

d1 = df.set_index(cols)
g = d1.columns.str.extract('(\D+)', expand=False)
d1.groupby(g, axis=1).apply(m).pipe(h)

   id  name  model_#  ms variable_bp  value_bp variable_cd  value_cd variable_rq  value_rq variable_sa  value_sa variable_sf  value_sf01  John    239841         bp1        23         cd1       234         rq1       252         sa1        25         sf1       12412  John    239842         bp1       234         cd1       234         rq1       262         sa1        62         sf1       24221  John    239841         bp2       252         cd2        62         rq2       234         sa2       234         sf2       19432  John    239842         bp2       622         cd2       262         rq2       262         sa2        26         sf2       622

Old Answer

This is far from pretty and I'm not even sure this is what you want.

d1 = df.set_index(['id', 'name', 'model_#', 'ms'])

cidx = pd.MultiIndex.from_tuples(
    d1.columns.to_series().str.extract('(\D+)(\d+)', expand=False).values.tolist(),
    names=[None, 'variable']
)

d1.columns = cidx

d2 = d1.sort_index(axis=1).stack()

variables = pd.DataFrame(
    (d2.columns + d2.index.get_level_values('variable')[:, None]).tolist(),
    d2.index, d2.columns
)

d3 = pd.concat(
    [variables, d2], axis=1, keys=['variable', 'value']
).reset_index('variable', drop=True).sort_index(axis=1, level=1, sort_remaining=False)

d3.columns = d3.columns.map('_'.join)

d3.reset_index()

   id  name  model_#  ms variable_bp  value_bp variable_cd  value_cd variable_rq  value_rq variable_sa  value_sa variable_sf  value_sf01  John    239841         bp1        23         cd1       234         rq1       252         sa1        25         sf1       12411  John    239841         bp2       252         cd2        62         rq2       234         sa2       234         sf2       19422  John    239842         bp1       234         cd1       234         rq1       262         sa1        62         sf1       24232  John    239842         bp2       622         cd2       262         rq2       262         sa2        26         sf2       622

Post a Comment for "Melt And Merge On Substring - Python & Pandas"