Commit 6f09f14158503aabeb4fc9d99c1f2a86e4195a6c

Authored by Jean-Michel Garant
2 parents 3495fbe1 35d02bd6

Merge branch 'master' of gitlabscottgroup.med.usherbrooke.ca:J-Michel/g4rna_screener

Showing 4 changed files with 16 additions and 12 deletions   Show diff stats
MANUAL.md
... ... @@ -89,7 +89,7 @@ gene,description,chromosome,strand,...
89 89 >
90 90 > + source: _Source of the sequence_
91 91 >
92   -> + genome_build: _Genome build_
  92 +> + genome_assembly: _Genome assembly_
93 93 >
94 94 > + start: _Start position on genomic positive strand_
95 95 >
... ...
g4base.py
... ... @@ -89,7 +89,7 @@ def gen_G4RNA_df(
89 89 [content["full_name"],content["HGNC_id"]] = list(
90 90 regex.search(
91 91 '(?<full_name>.*) \[Source:HGNC Symbol;Acc:HGNC:\
92   -(?<HGNC_ id>\d+)\]',content["gene_description"]).group('full_name','HGNC_id'))
  92 +(?<HGNC_id>\d+)\]',content["gene_description"]).group('full_name','HGNC_id'))
93 93 except:
94 94 if 'stable_id' in infos.keys() and infos.get('stable_id'):
95 95 if infos.get('stable_id')[3] == 'T':
... ...
screen.py
... ... @@ -21,7 +21,7 @@ def apply_network(ann,
21 21 if "all" in columns:
22 22 columns = ['gene_symbol','mrnaAcc','protAcc','gene_stable_id',
23 23 'transcript_stable_id','full_name','HGNC_id','identifier',
24   - 'source','genome_build','chromosome','start','end','strand',
  24 + 'source','genome_assembly','chromosome','start','end','strand',
25 25 'length','sequence','cGcC','G4H','G4NN']
26 26 else:
27 27 columns = regex.split(",", columns.strip("[]"))
... ... @@ -30,10 +30,10 @@ def apply_network(ann,
30 30 if essential not in columns:
31 31 columns.append(essential)
32 32 columns_to_drop.append(essential)
33   - if type(fasta) == 'str' and fasta[0] == '>':
  33 + if fasta[0] == '>':
34 34 RNome_df = gen_G4RNA_df(fasta_str_fetcher(fasta, verbose=verbose),
35 35 columns, 1, int(wdw_len), int(wdw_step), verbose=verbose)
36   - else:
  36 + elif fasta[-3:] == '.fa' or fasta[-4:] in ['.fas', '.txt']:
37 37 RNome_df = gen_G4RNA_df(fasta_fetcher(fasta, 0, 0, verbose=verbose),
38 38 columns, 1, int(wdw_len), int(wdw_step), verbose=verbose)
39 39 if 'G4NN' in columns:
... ... @@ -57,9 +57,7 @@ def apply_network(ann,
57 57 'chromosome'].iloc[0]].end.max()))
58 58 sys.stdout.write('track type=bedGraph name=%s visibility=full \
59 59 color=200,100,0\n'%RNome_df.drop(columns_to_drop, axis=1).columns[-1])
60   - return RNome_df.drop(columns_to_drop, axis=1).to_csv(
61   - path_or_buf=sys.stdout, sep='\t',
62   - index=(bedgraph==None), header=(bedgraph==None))
  60 + return RNome_df.drop(columns_to_drop, axis=1)
63 61  
64 62 def screen_usage(error_message=False):
65 63 """
... ... @@ -90,7 +88,7 @@ provide columns&quot;
90 88 print " HGNC_id \t\tHGNC numeric ID"
91 89 print " identifier \t\tIdentifier"
92 90 print " source \t\tSource of the data"
93   - print " genome_build\t\tGenome build version"
  91 + print " genome_assembly\tGenome build version"
94 92 print " chromosome \t\tChromosome"
95 93 print " start \t\tStart position"
96 94 print " end \t\tEnd position"
... ... @@ -178,7 +176,13 @@ chrome,start,end,[SCORE]\n\
178 176 option_dict.get("-w") or option_dict.get("--window"),
179 177 option_dict.get("-s") or option_dict.get("--step"),
180 178 option_dict.get("-b") or option_dict.get("--bedgraph"),
181   - verbose=option_dict.get("-v") or option_dict.get("--verbose"))
  179 + verbose=option_dict.get("-v") or option_dict.get("--verbose")
  180 + ).to_csv(
  181 + path_or_buf=sys.stdout, sep='\t',
  182 + index=(option_dict.get("-b")==None or
  183 + option_dict.get("--bedgraph")==None),
  184 + header=(option_dict.get("-b")==None or
  185 + option_dict.get("--bedgraph")==None))
182 186 except:
183 187 if "-e" in option_dict.keys() or "--error" in option_dict.keys():
184 188 raise
... ...
utils.py
... ... @@ -135,7 +135,7 @@ def format_description(fas_description, verbose=None):
135 135 '''
136 136 infos = {}
137 137 try:
138   - infos = regex.match("(?<description>(?<genome_build>\D\D\d+)?\
  138 + infos = regex.match("(?<description>(?<genome_assembly>\D\D\d+)?\
139 139 (?:_(?<source>[^_]*))?_?(?:(?P<mrnaAcc>[N|X][M|R]_\d+)|(?P<protAcc>[N|X]P_\d+))\
140 140 (?: range=(?<range>(?<chromosome>chr.*):(?<start>(\d*))-(?<end>(\d*))))?\
141 141 (?: 5'pad=(?<pad5>\d*))?(?: 3'pad=(?<pad3>\d*))?(?: strand=(?<strand>.))?\
... ... @@ -146,7 +146,7 @@ def format_description(fas_description, verbose=None):
146 146 try:
147 147 infos = regex.match("(?<description>(?:(?<identifier>[^ ]*) )?\
148 148 (?<stable_id>ENS[T|G]\d*)(?:\.\d)?(?: (?<info>[^ ]*))?(?: chromosome:\
149   -(?<genome_build>[^:]*):(?<range>(?<chromosome>[^:]*):(?<start>\d*):\
  149 +(?<genome_assembly>[^:]*):(?<range>(?<chromosome>[^:]*):(?<start>\d*):\
150 150 (?<end>\d*)):(?<strand>.)(?:.*))?(?:\|(?<exon_start>\d*)\|(?<exon_end>\d*)\
151 151 )?(?:.*))",
152 152 fas_description).groupdict()
... ...