Commit 6235bcc95712f661684228f27e3270220716ac40

Authored by Jean-Michel Garant
1 parent e7acfec9
Exists in stable-0.3 and in 1 other branch master

merge fall back to simple implementation

Showing 1 changed file with 7 additions and 17 deletions   Show diff stats
merge.py
... ... @@ -135,30 +135,20 @@ def merge_g4rna(df, window=60, step=10,
135 135 overlap = window-step
136 136 pd.set_option('display.max_colwidth', -1)
137 137 if 'sequence' in df.columns:
138   - for ite in [0,1,2,3,4]:
139   - print -(overlap+step*ite)
  138 + for ite in range(0,len(df)):
  139 + print any(df.sequence.str[:(overlap+step*ite)].eq(
  140 + df.sequence.str[step:].shift(1)))
  141 + if any(df.sequence.str[:(overlap+step*ite)].eq(
  142 + df.sequence.str[step:].shift(1))) is False:
  143 + break
140 144 df.loc[
141 145 df.sequence.str[:(overlap+step*ite)].eq(
142 146 df.sequence.str[step:].shift(1))
143 147 , 'sequence'] = df.sequence.str[:step].shift(1) + \
144 148 df.sequence.str[:]
145   -
146   -# df.sequence.str[-(overlap+step*ite):].eq(
147   -# df.sequence.str[:(overlap+step*ite)].shift(-1))
148   -# &
149   -# df.sequence.str.len().shift(1) <= window)
150   -# , 'sequence'] = df.sequence.str[:] + \
151   -# df.sequence.str[(overlap+step*ite):].shift(-1)
152   - # df.loc[
153   - # df.sequence.str[-overlap:].eq(
154   - # df.sequence.str[:overlap].shift(-6))
155   -# , 'sequence'] = df.sequence.str[:df.sequence.str.len().shift(-6).fillna(60)]
156   - # , 'sequence'] = df.sequence.str.len().shift(-6).fillna(0)
157   - print df
158   - #print pd.DataFrame(df.sequence,df.sequence.str.len())
159 149 if 'description' in df.columns:
160 150 df_grouped = df.groupby(
161   - [df.description,df.sequence.str[-overlap:]],
  151 + [df.description,df.sequence.str[:overlap]],
162 152 sort=False,
163 153 as_index=False)
164 154 print "******",{k:agg_fct[k] for k in df.columns.drop(['description'])}
... ...