Compare View

switch
from
...
to
 
Commits (2)
LICENSE
... ... @@ -620,19 +620,9 @@ copy of the Program in return for a fee.
620 620  
621 621 END OF TERMS AND CONDITIONS
622 622  
623   - How to Apply These Terms to Your New Programs
624   -
625   - If you develop a new program, and you want it to be of the greatest
626   -possible use to the public, the best way to achieve this is to make it
627   -free software which everyone can redistribute and change under these terms.
628   -
629   - To do so, attach the following notices to the program. It is safest
630   -to attach them to the start of each source file to most effectively
631   -state the exclusion of warranty; and each file should have at least
632   -the "copyright" line and a pointer to where the full notice is found.
633   -
634 623 Identification of potential RNA G-quadruplexes by G4RNA screener.
635   - Copyright (C) 2017 Jean-Michel Garant
  624 + doi: 10.1093/bioinformatics/btx498.
  625 + Copyright (C) 2018 Jean-Michel Garant
636 626  
637 627 This program is free software: you can redistribute it and/or modify
638 628 it under the terms of the GNU General Public License as published by
... ... @@ -647,24 +637,7 @@ the "copyright" line and a pointer to where the full notice is found.
647 637 You should have received a copy of the GNU General Public License
648 638 along with this program. If not, see <http://www.gnu.org/licenses/>.
649 639  
650   -Also add information on how to contact you by electronic and paper mail.
651   -
652   - If the program does terminal interaction, make it output a short
653   -notice like this when it starts in an interactive mode:
654   -
655   - G4RNA screener Copyright (C) 2017 Jean-Michel Garant
  640 + G4RNA screener Copyright (C) 2018 Jean-Michel Garant
656 641 This program comes with ABSOLUTELY NO WARRANTY. This is free
657 642 software, and you are welcome to redistribute it under certain
658   - conditions.
659   -
660   - You should also get your employer (if you work as a programmer) or school,
661   -if any, to sign a "copyright disclaimer" for the program, if necessary.
662   -For more information on this, and how to apply and follow the GNU GPL, see
663   -<http://www.gnu.org/licenses/>.
664   -
665   - The GNU General Public License does not permit incorporating your program
666   -into proprietary programs. If your program is a subroutine library, you
667   -may consider it more useful to permit linking proprietary applications with
668   -the library. If this is what you want to do, use the GNU Lesser General
669   -Public License instead of this License. But first, please read
670   -<http://www.gnu.org/philosophy/why-not-lgpl.html>.
  643 + conditions <http://www.gnu.org/licenses/>.
... ...
MANUAL.md
1 1 <Use a Markdown document viewer to display this file as an HTML file in your>
2   -<internet browser:>
3   -<Markdown Viewer 1.12 (Firefox extension) *recommended>
4   -<Markdown Reader 1.0.12 (Chrome extension)>
  2 +<internet browser>
5 3  
6 4 **G4RNA SCREENER MANUAL**
7 5 =========================
... ...
README.md
... ... @@ -19,7 +19,9 @@ gitlab username: _J-Michel_
19 19  
20 20 gitlab url: *gitlabscottgroup.med.usherbrooke.ca/J-Michel/g4rna_screener*
21 21  
22   -Version: G4RNA screener 0.2
  22 +Version: G4RNA screener 0.3
  23 +
  24 +**Please consider cloning/downloading a stable branch**
23 25  
24 26  
25 27 ## **DEPENDENCIES**
... ...
g4base.py
1 1 #!/usr/bin/env python
2 2  
  3 +# Identification of potential RNA G-quadruplexes by G4RNA screener.
  4 +# Copyright (C) 2018 Jean-Michel Garant
  5 +#
  6 +# This program is free software: you can redistribute it and/or modify
  7 +# it under the terms of the GNU General Public License as published by
  8 +# the Free Software Foundation, either version 3 of the License, or
  9 +# (at your option) any later version.
  10 +#
  11 +# This program is distributed in the hope that it will be useful,
  12 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 +# GNU General Public License for more details.
  15 +#
  16 +# You should have received a copy of the GNU General Public License
  17 +# along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 +
3 19 from utils import *
4 20 from pybrain.datasets import ClassificationDataSet
5 21  
... ...
screen.py
1 1 #!/usr/bin/env python
2 2  
  3 +# Identification of potential RNA G-quadruplexes by G4RNA screener.
  4 +# Copyright (C) 2018 Jean-Michel Garant
  5 +#
  6 +# This program is free software: you can redistribute it and/or modify
  7 +# it under the terms of the GNU General Public License as published by
  8 +# the Free Software Foundation, either version 3 of the License, or
  9 +# (at your option) any later version.
  10 +#
  11 +# This program is distributed in the hope that it will be useful,
  12 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 +# GNU General Public License for more details.
  15 +#
  16 +# You should have received a copy of the GNU General Public License
  17 +# along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 +
3 19 from g4base import *
4 20 import os
5 21  
... ... @@ -38,23 +54,18 @@ def apply_network(ann,
38 54 or str(fasta)[-4:] in ['.fas', '.txt']\
39 55 or str(fasta)[-6:] == '.fasta'\
40 56 or fasta == "/dev/stdin":
41   - ### without try/except since it catches most errors
42 57 RNome_df = gen_G4RNA_df(fasta_fetcher(fasta, 0, 0, verbose=verbose),
43 58 columns, 1, int(wdw_len), int(wdw_step), verbose=verbose)
44   -# try:
45   -# RNome_df = gen_G4RNA_df(fasta_fetcher(fasta, 0, 0, verbose=verbose),
46   -# columns, 1, int(wdw_len), int(wdw_step), verbose=verbose)
47   -# except:
48   -# screen_usage(52, 'fasta format not respected')
49 59 else:
50 60 screen_usage(52, 'fasta input not specified or not supported')
51 61 if 'G4NN' in columns:
52 62 network_file = open(ann,'r')
53 63 ann = pickle.load(network_file)
54 64 network_file.close()
55   - RNome_trans_df = kmer_transfo(RNome_df, 3, 'length', 'sequence', 'g4',
56   - int(wdw_len), jellyfish=False, overlapped=True,
57   - verbose=verbose)
  65 + RNome_trans_df = trimer_transfo(RNome_df, 'sequence', verbose=verbose)
  66 +# RNome_trans_df = kmer_transfo(RNome_df, 3, 'length', 'sequence', 'g4',
  67 +# int(wdw_len), jellyfish=False, overlapped=True,
  68 +# verbose=verbose)
58 69 RNome_df = submit_seq(ann, RNome_trans_df.drop('G4NN',axis=1),
59 70 [c for c in columns if c != 'G4NN'], "G4NN",
60 71 verbose=verbose)
... ... @@ -138,7 +149,7 @@ def main():
138 149 if arg in ["-?","--help"]:
139 150 screen_usage()
140 151 elif arg in ["-V","--version"]:
141   - print "Version: G4RNA screener 0.2"
  152 + print "Version: G4RNA screener 0.3"
142 153 sys.exit(0)
143 154 elif arg in ["-b","--bedgraph",
144 155 "-v","--verbose",
... ...
utils.py
1 1 #!/usr/bin/env python
2 2  
  3 +# Identification of potential RNA G-quadruplexes by G4RNA screener.
  4 +# Copyright (C) 2018 Jean-Michel Garant
  5 +#
  6 +# This program is free software: you can redistribute it and/or modify
  7 +# it under the terms of the GNU General Public License as published by
  8 +# the Free Software Foundation, either version 3 of the License, or
  9 +# (at your option) any later version.
  10 +#
  11 +# This program is distributed in the hope that it will be useful,
  12 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 +# GNU General Public License for more details.
  15 +#
  16 +# You should have received a copy of the GNU General Public License
  17 +# along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 +
3 19 import time
4 20 import sys
5 21 import regex
... ... @@ -348,3 +364,28 @@ def kmer_transfo(
348 364 df.loc[row,di_ntd] = freq
349 365 verbosify(verbose, "Kmer transformed")
350 366 return df
  367 +
  368 +def trimer_transfo(
  369 + df_,
  370 + sequence_column,
  371 + verbose=False):
  372 + """
  373 + Define sequences by their 3mers proportions and returns a bigger
  374 + dataframe containing it.
  375 + This version always considers overlapping trimers.
  376 +
  377 + Return pandas dataframe.
  378 + """
  379 + df = df_.copy()
  380 + nts = ['A','U','C','G']
  381 + tri_nts = []
  382 + for nt1 in nts:
  383 + for nt2 in nts:
  384 + for nt3 in nts:
  385 + tri_nts.append([nt1+nt2+nt3,
  386 + "(?P<"+nt1+nt2+nt3+">"+nt1+"(?="+nt2+nt3+"))"])
  387 + for each, pattern in tri_nts:
  388 + df[each] = df[sequence_column].str.upper().str.replace(
  389 + 'T','U').str.count(pattern)/(df[sequence_column].str.len()-2)
  390 + verbosify(verbose, "trimer transformed")
  391 + return df
... ...