commit d1a8897a04d290ec1625d733e784b5d3aa54f235
Author: chloe.quignot <chloe.quignot@cluster-i2bc>
Date:   Thu Jun 27 15:17:53 2024 +0200

    rename demo_advanced folder

diff --git a/demo_advanced/codes/Snakefile b/demo_advanced/codes/Snakefile
deleted file mode 100644
index 0465d9d..0000000
--- a/demo_advanced/codes/Snakefile
+++ /dev/null
@@ -1,268 +0,0 @@
-wildcard_constraints:
-    sample="[a-zA-Z0-9_]+",  # matches Unicode word characters
-    colExtra="[0-9]",
-
-
-# Report name:
-report: "report/alignPipeline.rst"
-
-
-onsuccess:
-    print("Workflow finished, no error")
-
-
-onerror:
-    print("An error occurred")
-
-
-rule targets:
-    input:
-        "result/multiqc_report.html",
-        expand("result/fastqc/{sample}_fastqc.zip", sample=config["samples"]),
-        expand("result/bowtie/{sample}.bam.bai", sample=config["samples"]),
-        "result/featureCounts/counts_matrix.txt",
-
-
-rule fastqc:
-    input:
-        lambda wc: config["samples"][wc.sample]
-    output:
-        report(
-            multiext("result/fastqc/{sample}", "_fastqc.zip", "_fastqc.html"),
-            caption="report/fastqc.rst",
-            category="Step 1 fastqc",
-        ),
-    log:
-        stdout="result/logs/{sample}_fastqc.stdout",
-        stderr="result/logs/{sample}_fastqc.stderr",
-    benchmark:
-        "result/benchmark/{sample}_fastqc.tsv"
-    params:
-        outdir="result/fastqc",
-    message:
-        "Executing Fastqc with {threads} threads on the following sample : {wildcards.sample}"
-    threads: 1
-    envmodules:
-        "fastqc/fastqc_v0.11.5",
-    resources:
-        mem="1gb",
-        time_min="00:01:00",
-    shell:
-        """
-          fastqc {input} -outdir {params.outdir} 1> {log.stdout} 2> {log.stderr}
-        """
-
-
-rule bowtieIndex:
-    input:
-        fasta=lookup(dpath="references/fasta", within=config),  # config["references"]["fasta"]
-    output:
-        idxFile=ensure(
-            multiext(
-                config["bowtie"]["idx"],
-                ".1.bt2",
-                ".2.bt2",
-                ".3.bt2",
-                ".4.bt2",
-                ".rev.1.bt2",
-                ".rev.2.bt2",
-            ),
-            non_empty=True,
-        ),
-    log:
-        "result/logs/fastqc/bowtieIndex.txt",
-    benchmark:
-        "result/benchmark/bowtieIndex.tsv"
-    params:
-        idx=config["bowtie"]["idx"],
-    message:
-        "Indexing genome"
-    threads: max(1, config["bowtie"]["idxthreads"])
-    envmodules:
-        "nodes/bowtie2-2.4.2",
-    resources:
-        mem="1gb",
-        time_min="00:05:00",
-    shell:
-        """
-          bowtie2-build --threads {threads} {input.fasta} {params.idx} &> {log}
-        """
-
-
-rule bowtie2:
-    input:
-        fq=lambda wc: config["samples"][wc.sample],
-        idxFile=rules.bowtieIndex.output,
-    output:
-        bam=pipe("result/bowtie/{sample}.sam"),
-    log:
-        bwtout="result/logs/bowtie/{sample}_align.txt",
-        stderr="result/logs/bowtie/{sample}_align.stderr",
-    benchmark:
-        "result/benchmark/{sample}_bowtie2.tsv"
-    params:
-        extra=config["bowtie"]["extra"],
-        idx=config["bowtie"]["idx"],
-        tmpDir=config.get("tempDir", "tmp"),
-    message:
-        "Executing Bowtie2 with {threads} threads on the following sample: {wildcards.sample}"
-    threads: max(1, config["bowtie"]["alignthreads"])
-    envmodules:
-        "nodes/bowtie2-2.4.2",
-    resources:
-        mem="2gb",
-        time_min="00:05:00",
-    shell:
-        """
-          bowtie2 -x {params.idx} -U {input.fq} -p {threads} {params.extra} \
-                  2> {log.bwtout} > {output}
-        """
-
-
-rule samtoolsSort:
-    input:
-        "result/bowtie/{sample}.sam",
-    output:
-        report(
-            protected("result/bowtie/{sample}.bam"),
-            caption="report/bowtie2.rst",
-            category="Step 2 Bowtie2",
-        ),
-    # not a good idea because bam files are heavy
-    log:
-        "result/logs/samtools/{sample}.txt",
-    benchmark:
-        "result/benchmark/{sample}_samtoolsSort.tsv"
-    envmodules:
-        "nodes/samtools-1.11",
-    threads: 1
-    resources:
-        mem="2gb",
-        time_min="00:05:00",
-    params:
-        tmpDir=config.get("tempDir", "tmp"),
-    shell:
-        """
-          samtools sort -O 'bam' -T {params.tmpDir} {input} > {output}
-        """
-
-
-rule samtoolsIndx:
-    input:
-        bam="result/bowtie/{sample}.bam",
-    output:
-        bai="result/bowtie/{sample}.bam.bai",
-    log:
-        "result/logs/bowtie/{sample}_index.txt",
-    threads: 1
-    resources:
-        mem="1gb",
-        time_min="00:05:00",
-    envmodules:
-        "nodes/samtools-1.11",
-    shell:
-        """
-            samtools index -b {input.bam} &> {log}
-        """
-
-
-rule featureCounts:
-    input:
-        bam="result/bowtie/{sample}.bam",
-        annot=config["references"]["annot"],
-    output:
-        report(
-            ensure("result/featureCounts/{sample}_ftc.txt", non_empty=True),
-            caption="report/featureCounts.rst",
-            category="Step 3 featureCounts",
-        ),
-    log:
-        "result/logs/featureCounts/{sample}_ftc.txt",
-    threads: 1
-    resources:
-        mem="1gb",
-        time_min="00:10:00",
-    envmodules:
-        "subread/subread-1.5.2",
-    params:
-        extra=config["featureCounts"]["extra"],
-    shell:
-        """
-           featureCounts {params.extra} -a {input.annot} -o {output} \
-                   {input.bam} &> {log}
-        """
-
-
-rule extractCounts:
-    input:
-        "result/featureCounts/{sample}_ftc.txt",
-    output:
-        temp("result/featureCounts/{sample}_ftc{colExtra}.txt"),
-    log:
-        "result/logs/featureCounts/{sample}_extractCounts{colExtra}.txt",
-    threads: 1
-    resources:
-        mem="1gb",
-        time_min="00:05:00",
-    shell:
-        """
-          cut -f {wildcards.colExtra} {input} | sed 1d > {output} 2> {log}
-        """
-
-
-rule matrixCounts:
-    input:
-        countfile=expand(
-            "result/featureCounts/{sample}_ftc{colExtra}.txt",
-            sample=config["samples"],
-            colExtra="7",
-        ),
-        geneID=expand(
-            "result/featureCounts/{sample}_ftc{colExtra}.txt",
-            sample=config["samples"],
-            colExtra="1",
-        ),
-    output:
-        "result/featureCounts/counts_matrix.txt",
-    log:
-        "result/logs/featureCounts/counts_matrix.txt",
-    threads: 1
-    resources:
-        mem="1gb",
-        time_min="00:05:00",
-    shell:
-        """
-         paste {input.geneID[0]} {input.countfile} > {output} 2> {log}
-       """
-
-
-rule multiqc:
-    input:
-        expand(
-            "result/fastqc/{sample}_fastqc.zip",
-            sample=config["samples"],
-            allow_missing=True,
-        ),
-        expand("result/fastqc/{sample}_fastqc.html", sample=config["samples"]),
-        expand("result/bowtie/{sample}.bam", sample=config["samples"]),
-        expand("result/featureCounts/{sample}_ftc.txt", sample=config["samples"]),
-    output:
-        "result/multiqc_report.html",
-    log:
-        report(
-            "result/logs/multiqc/multiqc.log",
-            caption="report/multiqc.rst",
-            category="Step 4 multiqc",
-        ),
-    threads: 1
-    resources:
-        mem="2gb",
-        time_min="00:30:00",
-    params:
-        dir="result",
-    envmodules:
-        "nodes/multiqc-1.9",
-    shell:
-        """
-            multiqc --outdir {params.dir} {params.dir} &> {log}
-        """
diff --git a/demo_advanced/codes/profile/config.yaml b/demo_advanced/codes/profile/config.yaml
deleted file mode 100644
index 6136211..0000000
--- a/demo_advanced/codes/profile/config.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-restart-times: 3
-latency-wait: 180
-software-deployment-method: env-modules 
-jobs: 16
-keep-going: True
-printshellcmds: True
diff --git a/demo_advanced/codes/report/alignPipeline.rst b/demo_advanced/codes/report/alignPipeline.rst
deleted file mode 100644
index a75c7c2..0000000
--- a/demo_advanced/codes/report/alignPipeline.rst
+++ /dev/null
@@ -1,71 +0,0 @@
-**RNA-seq: Quality control and alignment**
-
-# RNA-seq pipeline
-
-## Step1: Fastqc
-
-Check the sequence quality with Fastqc_ 
-
-.. _Fastqc: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
-
-*Params:* default parameters
-
-*Version:* fastqc/fastqc_v0.11.5
-
-*Cite:* Andrews S. (2010). FastQC: a quality control tool for high throughput sequence data. Available online at: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
-
-
-## Step2: Bowtie2 and Samtools
-
-Using Bowtie2_ for alignment on a reference genome. Samtools_ allowed to filter, index and convert format
-
-.. _Bowtie2: https://bowtie-bio.sourceforge.net/bowtie2/index.shtml
-.. _Samtools: http://www.htslib.org/
-    
-*Params:* {{ snakemake.config["bowtie"]["extra"] }}
-
-*Version:* 
-
-- Bowtie2: nodes/bowtie2-2.4.2
-- Samtools: nodes/samtools-1.11
-
-*Cite:* 
-
-- Bowtie2: Langmead B, Salzberg S. Fast gapped-read alignment with Bowtie 2. Nature Methods. 2012, 9:357-359.
-- Samtools: Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R, and 1000 Genome Project Data Processing Subgroup, The Sequence alignment/map (SAM) format and SAMtools, Bioinformatics (2009) 25(16) 2078-9
-
-
-## Step3: featureCounts
-
-Using FeatureCounts_ to quantify the level of gene expression
-
-.. _FeatureCounts: https://subread.sourceforge.net/featureCounts.html
-
-*Params:* {{ snakemake.config["featureCounts"]["extra"] }}                             
-                                                                                 
-*Version:* subread/subread-1.5.2
-
-*Cite:* Liao Y, Smyth GK and Shi W (2014). featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics, 30(7):923-30. 
-
-
-## Step4: Multiqc
-
-With Multiqc_ combine all results in html (easy-to-use)
-
-.. _Multiqc: https://multiqc.info/
-
-*Params:* default parameters                      
-                                                                                  
-*Version:* nodes/multiqc-1.9
-
-
-*Cite:*  MultiQC: Summarize analysis results for multiple tools and samples in a single report Philip Ewels, Måns Magnusson, Sverker Lundin and Max Käller. Bioinformatics (2016) 
-
-
-# Pipeline 
-
-- Snakemake_ : Mölder, F., Jablonski, K.P., Letcher, B., Hall, M.B., Tomkins-Tinch, C.H., Sochat, V., Forster, J., Lee, S., Twardziok, S.O., Kanitz, A., Wilm, A., Holtgrewe, M., Rahmann, S., Nahnsen, S., Köster, J., 2021. Sustainable data analysis with Snakemake. F1000Res 10, 33.
-- Conda_
-
-.. _Snakemake: https://snakemake.readthedocs.io/en/stable/
-.. _Conda: https://docs.conda.io/en/latest/
diff --git a/demo_advanced/codes/report/bowtie2.rst b/demo_advanced/codes/report/bowtie2.rst
deleted file mode 100644
index 91df891..0000000
--- a/demo_advanced/codes/report/bowtie2.rst
+++ /dev/null
@@ -1 +0,0 @@
-Bowtie2 and samtools
diff --git a/demo_advanced/codes/report/fastqc.rst b/demo_advanced/codes/report/fastqc.rst
deleted file mode 100644
index 25accd6..0000000
--- a/demo_advanced/codes/report/fastqc.rst
+++ /dev/null
@@ -1 +0,0 @@
-Running fastqc with default params
diff --git a/demo_advanced/codes/report/featureCounts.rst b/demo_advanced/codes/report/featureCounts.rst
deleted file mode 100644
index d27475a..0000000
--- a/demo_advanced/codes/report/featureCounts.rst
+++ /dev/null
@@ -1 +0,0 @@
-FeatureCounts from Subread 
diff --git a/demo_advanced/codes/report/multiqc.rst b/demo_advanced/codes/report/multiqc.rst
deleted file mode 100644
index 06ad9d0..0000000
--- a/demo_advanced/codes/report/multiqc.rst
+++ /dev/null
@@ -1 +0,0 @@
-Runiing with default params
diff --git a/demo_advanced/configfile.yaml b/demo_advanced/configfile.yaml
deleted file mode 100644
index b85cf09..0000000
--- a/demo_advanced/configfile.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-samples:
-    SRR3099585_chr18: Data/SRR3099585_chr18.fastq.gz
-    SRR3099586_chr18: Data/SRR3099586_chr18.fastq.gz
-    SRR3099587_chr18: Data/SRR3099587_chr18.fastq.gz
-    SRR3105697_chr18: Data/SRR3105697_chr18.fastq.gz
-    SRR3105698_chr18: Data/SRR3105698_chr18.fastq.gz
-    SRR3105699_chr18: Data/SRR3105699_chr18.fastq.gz
-references:
-    annot: "Data/O.tauri_annotation.gff"
-    fasta: "Data/O.tauri_genome.fna"
-bowtie:
-    idx: "bowtie2index/Otauri"
-    idxthreads: 1
-    extra: ""
-    alignthreads: 8
-featureCounts:
-    extra: '-t "gene" -g "ID" -s "2"'
-tempDir: "/tmp"
diff --git a/demo_advanced/readme_runSnake.txt b/demo_advanced/readme_runSnake.txt
deleted file mode 100644
index cb7a959..0000000
--- a/demo_advanced/readme_runSnake.txt
+++ /dev/null
@@ -1,169 +0,0 @@
-# Snakemake pipeline presentation
-
-## The code
-
-The pipeline's `code` folder contains the following arborescence of files:
-
-```text
-├── Snakefile
-├── profile
-│   └── config.yaml
-└── report
-    ├── alignPipeline.rst
-    ├── bowtie2.rst
-    ├── fastqc.rst
-    ├── featureCounts.rst
-    └── multiqc.rst
-```
-
-The Snakefile is the file that contains all of Snakemake's rules. The configuration file `profile/config.yaml` 
-is a way of specifying options that you would otherwise add to the snakemake command line. The `report` folder
-contains a set of hand-written report templates for each tool or method that was used and that will later
-be incorporated into a final report file generated by Snakemake.
-
-NB: this Snakefile was reformatted using the snakefmt tool (v0.10.0) in order to comply with the commonly 
-accepted (and encouraged) Snakemake formatting standards
-
-## The required data (to download)
-
-This pipeline runs on single-end bulk RNA-seq data and outputs quality reports on the samples provided as well as tables of
-gene expression counts per sample.
-
-Example input files can be found under (this link)[https://doi.org/10.5281/zenodo.8340293] (same as in Exercises 1A, 1B and 1C, see (this page)[https://bioi2.i2bc.paris-saclay.fr/training/snakemake/exercises]):
-
-```text
-Data/
-├── O.tauri_annotation.gff
-├── O.tauri_genome.fna
-├── SRR3099585_chr18.fastq.gz
-├── SRR3099586_chr18.fastq.gz
-├── SRR3099587_chr18.fastq.gz
-├── SRR3105697_chr18.fastq.gz
-├── SRR3105698_chr18.fastq.gz
-└── SRR3105699_chr18.fastq.gz
-```
-
-They contain 6 fastq files (chromosome 18 from SRR309958[5,6,7] and SRR310569[7,8,9]), as well as 
-the genome fasta of their reference species *O. tauri* and its annotation.
-
-# Usage
-
-**Requirements:**
-- tools: snakemake, fastqc, bowtie, featureCounts, multiqc
-- data: fastq files and their corresponding reference genome
-
-**Before you run the pipeline:**
-- edit the line `cd /path/to/the/project/` in the `runRnaseq.sh` file to your actual project folder e.g. `/path/to/snakemake_examples/exercise2_advanced/`
-- edit the configuration file `configfile.yaml` so that the paths to the various samples and reference files are correct (a good habit is to use absolute paths) so that Snakemake can find them
-
-For example, if my working directories look like this:
-
-```text
-/home/john.doe/snakemake_tutorial/Data/
-├── O.tauri_annotation.gff
-├── O.tauri_genome.fna
-├── SRR3099585_chr18.fastq.gz
-├── SRR3099586_chr18.fastq.gz
-├── SRR3099587_chr18.fastq.gz
-├── SRR3105697_chr18.fastq.gz
-├── SRR3105698_chr18.fastq.gz
-└── SRR3105699_chr18.fastq.gz
-```
-and
-```text
-/home/john.doe/snakemake_examples/exercise2_advanced/code/
-├── Snakefile
-├── profile
-│   └── config.yaml
-└── report
-    ├── alignPipeline.rst
-    ├── bowtie2.rst
-    ├── fastqc.rst
-    ├── featureCounts.rst
-    └── multiqc.rst
-```
-
-Then my configuration file should look like this (for example):
-
-```text
-samples:
-    SRR3099585_chr18: /home/john.doe/snakemake_tutorial/Data/SRR3099585_chr18.fastq.gz
-    SRR3099586_chr18: /home/john.doe/snakemake_tutorial/Data/SRR3099586_chr18.fastq.gz
-    SRR3099587_chr18: /home/john.doe/snakemake_tutorial/Data/SRR3099587_chr18.fastq.gz
-    SRR3105697_chr18: /home/john.doe/snakemake_tutorial/Data/SRR3105697_chr18.fastq.gz
-    SRR3105698_chr18: /home/john.doe/snakemake_tutorial/Data/SRR3105698_chr18.fastq.gz
-    SRR3105699_chr18: /home/john.doe/snakemake_tutorial/Data/SRR3105699_chr18.fastq.gz
-references:
-    annot: "/home/john.doe/snakemake_tutorial/Data/O.tauri_annotation.gff"
-    fasta: "/home/john.doe/snakemake_tutorial/Data/O.tauri_genome.fna"
-bowtie:
-    idx: "/home/john.doe/snakemake_examples/exercise2_advanced/result/bowtie2index/Otauri"
-    idxthreads: 1
-    extra: ""
-    alignthreads: 8
-featureCounts:
-    extra: '-t "gene" -g "ID" -s "2"'
-tempDir: "/tmp"
-```
-
-**Running the pipeline:**
-
-Outputs will be generated in the current working directory (i.e. the directory in which you
-are when you run the Snakemake command), within a `result` folder. 
-
-To use this Snakemake pipeline (tested with snakemake version 8.4.6) on the I2BC cluster you can use the little bash script provided:
-
-```bash
-qsub -V -l walltime=4:00:00 -l select=1:ncpus=1:mem=100mb runRnaseq.sh
-```
-
-If you have a look at the runRnaseq.sh script, you will see that we run snakemake twice: the first time to run the pipeline and 
-generate the output files; the second time to create the final report using the `--report myFinalReport.html` option.
-
-
-# Output
-
-```text
-result/
-├── benchmark
-├── bowtie
-├── fastqc
-├── featureCounts
-├── logs
-├── multiqc_data
-└── multiqc_report.html
-```
-
-
-# Les fonctions utilisées
-
-## Fonctions générales
-
-- envmodules : pour spécifier les environnements utilisés pour chaque règles (https://snakemake.readthedocs.io/en/stable/snakefiles/deployment.html#using-environment-modules)
-- wildcard_constraints : permet une verification de la valeur des variables (https://snakemake.readthedocs.io/en/stable/tutorial/additional_features.html#constraining-wildcards)
-
-## rule fastqc
-
-- Utilisation d'une fonction au lieu d'un nom de fichier pour l'input (https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#input-functions)
-- Utilisation de la fonction report pour créer un rapport après exécution du script (https://snakemake.readthedocs.io/en/stable/snakefiles/reporting.html#reports)
-- multiext : variant de la fonction expand (https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#the-multiext-function)
-
-## rule bowtieIndex
-
-- lookup : https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#the-lookup-function
-- ensure : vérification si le fichier est vide (https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#ensuring-output-file-properties-like-non-emptyness-or-checksum-compliance)
-- multiext : variant de la fonction expand (https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#the-multiext-function)
-
-## rule bowtie2 et samtoolsSort
-
-- Utilisation d'une fonction au lieu d'un nom de fichier pour l'input (https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#input-functions)
-- pipe : permet de remplacer l'utilisation de pipe et de scinder les différents éléments de la ligne de commande en plusieurs règles (https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#piped-output)
-- config.get : permet de mettre une valeur par défaut si elle manque dans le fichier de configuration (NB: c'est une fonction de Python qui marche ici car config est un dictionnaire)
-
-## rule extractCounts
-
-- temp : suppression des fichiers une fois qu'ils ne sont plus utils (https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#protected-and-temporary-files)
-
-## rule matrixCounts
-
-- On peut utiliser autant de wildcards que l'on veut. Dans cette règle 2 wildcards sont utilisées (sample et colExtra).
diff --git a/demo_advanced/runRnaseq.sh b/demo_advanced/runRnaseq.sh
deleted file mode 100644
index 0b9b631..0000000
--- a/demo_advanced/runRnaseq.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-module load snakemake/snakemake-8.4.6
-
-cd /path/to/the/project/
-
-# create directory for the pbs logs
-mkdir pbs
-
-snakemake --executor cluster-generic --cluster-generic-submit-cmd "qsub -V -l walltime={resources.time_min} -l select=1:ncpus={threads}:mem={resources.mem} -e pbs/{name}_$PBS_JOBID.err -o pbs/{name}_$PBS_JOBID.out" --snakefile codes/Snakefile --profile codes/profile --configfile configfile.yaml &> smkpipeline.txt
-
-snakemake --executor cluster-generic --cluster-generic-submit-cmd "qsub -V -l walltime={resources.time_min} -l select=1:ncpus={threads}:mem={resources.mem}" --snakefile codes/Snakefile --profile codes/profile --configfile configfile.yaml --report smkRnaseq_report.html
diff --git a/exercise2_advanced/codes/Snakefile b/exercise2_advanced/codes/Snakefile
new file mode 100644
index 0000000..0465d9d
--- /dev/null
+++ b/exercise2_advanced/codes/Snakefile
@@ -0,0 +1,268 @@
+wildcard_constraints:
+    sample="[a-zA-Z0-9_]+",  # matches Unicode word characters
+    colExtra="[0-9]",
+
+
+# Report name:
+report: "report/alignPipeline.rst"
+
+
+onsuccess:
+    print("Workflow finished, no error")
+
+
+onerror:
+    print("An error occurred")
+
+
+rule targets:
+    input:
+        "result/multiqc_report.html",
+        expand("result/fastqc/{sample}_fastqc.zip", sample=config["samples"]),
+        expand("result/bowtie/{sample}.bam.bai", sample=config["samples"]),
+        "result/featureCounts/counts_matrix.txt",
+
+
+rule fastqc:
+    input:
+        lambda wc: config["samples"][wc.sample]
+    output:
+        report(
+            multiext("result/fastqc/{sample}", "_fastqc.zip", "_fastqc.html"),
+            caption="report/fastqc.rst",
+            category="Step 1 fastqc",
+        ),
+    log:
+        stdout="result/logs/{sample}_fastqc.stdout",
+        stderr="result/logs/{sample}_fastqc.stderr",
+    benchmark:
+        "result/benchmark/{sample}_fastqc.tsv"
+    params:
+        outdir="result/fastqc",
+    message:
+        "Executing Fastqc with {threads} threads on the following sample : {wildcards.sample}"
+    threads: 1
+    envmodules:
+        "fastqc/fastqc_v0.11.5",
+    resources:
+        mem="1gb",
+        time_min="00:01:00",
+    shell:
+        """
+          fastqc {input} -outdir {params.outdir} 1> {log.stdout} 2> {log.stderr}
+        """
+
+
+rule bowtieIndex:
+    input:
+        fasta=lookup(dpath="references/fasta", within=config),  # config["references"]["fasta"]
+    output:
+        idxFile=ensure(
+            multiext(
+                config["bowtie"]["idx"],
+                ".1.bt2",
+                ".2.bt2",
+                ".3.bt2",
+                ".4.bt2",
+                ".rev.1.bt2",
+                ".rev.2.bt2",
+            ),
+            non_empty=True,
+        ),
+    log:
+        "result/logs/fastqc/bowtieIndex.txt",
+    benchmark:
+        "result/benchmark/bowtieIndex.tsv"
+    params:
+        idx=config["bowtie"]["idx"],
+    message:
+        "Indexing genome"
+    threads: max(1, config["bowtie"]["idxthreads"])
+    envmodules:
+        "nodes/bowtie2-2.4.2",
+    resources:
+        mem="1gb",
+        time_min="00:05:00",
+    shell:
+        """
+          bowtie2-build --threads {threads} {input.fasta} {params.idx} &> {log}
+        """
+
+
+rule bowtie2:
+    input:
+        fq=lambda wc: config["samples"][wc.sample],
+        idxFile=rules.bowtieIndex.output,
+    output:
+        bam=pipe("result/bowtie/{sample}.sam"),
+    log:
+        bwtout="result/logs/bowtie/{sample}_align.txt",
+        stderr="result/logs/bowtie/{sample}_align.stderr",
+    benchmark:
+        "result/benchmark/{sample}_bowtie2.tsv"
+    params:
+        extra=config["bowtie"]["extra"],
+        idx=config["bowtie"]["idx"],
+        tmpDir=config.get("tempDir", "tmp"),
+    message:
+        "Executing Bowtie2 with {threads} threads on the following sample: {wildcards.sample}"
+    threads: max(1, config["bowtie"]["alignthreads"])
+    envmodules:
+        "nodes/bowtie2-2.4.2",
+    resources:
+        mem="2gb",
+        time_min="00:05:00",
+    shell:
+        """
+          bowtie2 -x {params.idx} -U {input.fq} -p {threads} {params.extra} \
+                  2> {log.bwtout} > {output}
+        """
+
+
+rule samtoolsSort:
+    input:
+        "result/bowtie/{sample}.sam",
+    output:
+        report(
+            protected("result/bowtie/{sample}.bam"),
+            caption="report/bowtie2.rst",
+            category="Step 2 Bowtie2",
+        ),
+    # not a good idea because bam files are heavy
+    log:
+        "result/logs/samtools/{sample}.txt",
+    benchmark:
+        "result/benchmark/{sample}_samtoolsSort.tsv"
+    envmodules:
+        "nodes/samtools-1.11",
+    threads: 1
+    resources:
+        mem="2gb",
+        time_min="00:05:00",
+    params:
+        tmpDir=config.get("tempDir", "tmp"),
+    shell:
+        """
+          samtools sort -O 'bam' -T {params.tmpDir} {input} > {output}
+        """
+
+
+rule samtoolsIndx:
+    input:
+        bam="result/bowtie/{sample}.bam",
+    output:
+        bai="result/bowtie/{sample}.bam.bai",
+    log:
+        "result/logs/bowtie/{sample}_index.txt",
+    threads: 1
+    resources:
+        mem="1gb",
+        time_min="00:05:00",
+    envmodules:
+        "nodes/samtools-1.11",
+    shell:
+        """
+            samtools index -b {input.bam} &> {log}
+        """
+
+
+rule featureCounts:
+    input:
+        bam="result/bowtie/{sample}.bam",
+        annot=config["references"]["annot"],
+    output:
+        report(
+            ensure("result/featureCounts/{sample}_ftc.txt", non_empty=True),
+            caption="report/featureCounts.rst",
+            category="Step 3 featureCounts",
+        ),
+    log:
+        "result/logs/featureCounts/{sample}_ftc.txt",
+    threads: 1
+    resources:
+        mem="1gb",
+        time_min="00:10:00",
+    envmodules:
+        "subread/subread-1.5.2",
+    params:
+        extra=config["featureCounts"]["extra"],
+    shell:
+        """
+           featureCounts {params.extra} -a {input.annot} -o {output} \
+                   {input.bam} &> {log}
+        """
+
+
+rule extractCounts:
+    input:
+        "result/featureCounts/{sample}_ftc.txt",
+    output:
+        temp("result/featureCounts/{sample}_ftc{colExtra}.txt"),
+    log:
+        "result/logs/featureCounts/{sample}_extractCounts{colExtra}.txt",
+    threads: 1
+    resources:
+        mem="1gb",
+        time_min="00:05:00",
+    shell:
+        """
+          cut -f {wildcards.colExtra} {input} | sed 1d > {output} 2> {log}
+        """
+
+
+rule matrixCounts:
+    input:
+        countfile=expand(
+            "result/featureCounts/{sample}_ftc{colExtra}.txt",
+            sample=config["samples"],
+            colExtra="7",
+        ),
+        geneID=expand(
+            "result/featureCounts/{sample}_ftc{colExtra}.txt",
+            sample=config["samples"],
+            colExtra="1",
+        ),
+    output:
+        "result/featureCounts/counts_matrix.txt",
+    log:
+        "result/logs/featureCounts/counts_matrix.txt",
+    threads: 1
+    resources:
+        mem="1gb",
+        time_min="00:05:00",
+    shell:
+        """
+         paste {input.geneID[0]} {input.countfile} > {output} 2> {log}
+       """
+
+
+rule multiqc:
+    input:
+        expand(
+            "result/fastqc/{sample}_fastqc.zip",
+            sample=config["samples"],
+            allow_missing=True,
+        ),
+        expand("result/fastqc/{sample}_fastqc.html", sample=config["samples"]),
+        expand("result/bowtie/{sample}.bam", sample=config["samples"]),
+        expand("result/featureCounts/{sample}_ftc.txt", sample=config["samples"]),
+    output:
+        "result/multiqc_report.html",
+    log:
+        report(
+            "result/logs/multiqc/multiqc.log",
+            caption="report/multiqc.rst",
+            category="Step 4 multiqc",
+        ),
+    threads: 1
+    resources:
+        mem="2gb",
+        time_min="00:30:00",
+    params:
+        dir="result",
+    envmodules:
+        "nodes/multiqc-1.9",
+    shell:
+        """
+            multiqc --outdir {params.dir} {params.dir} &> {log}
+        """
diff --git a/exercise2_advanced/codes/profile/config.yaml b/exercise2_advanced/codes/profile/config.yaml
new file mode 100644
index 0000000..6136211
--- /dev/null
+++ b/exercise2_advanced/codes/profile/config.yaml
@@ -0,0 +1,6 @@
+restart-times: 3
+latency-wait: 180
+software-deployment-method: env-modules 
+jobs: 16
+keep-going: True
+printshellcmds: True
diff --git a/exercise2_advanced/codes/report/alignPipeline.rst b/exercise2_advanced/codes/report/alignPipeline.rst
new file mode 100644
index 0000000..a75c7c2
--- /dev/null
+++ b/exercise2_advanced/codes/report/alignPipeline.rst
@@ -0,0 +1,71 @@
+**RNA-seq: Quality control and alignment**
+
+# RNA-seq pipeline
+
+## Step1: Fastqc
+
+Check the sequence quality with Fastqc_ 
+
+.. _Fastqc: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
+
+*Params:* default parameters
+
+*Version:* fastqc/fastqc_v0.11.5
+
+*Cite:* Andrews S. (2010). FastQC: a quality control tool for high throughput sequence data. Available online at: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
+
+
+## Step2: Bowtie2 and Samtools
+
+Using Bowtie2_ for alignment on a reference genome. Samtools_ allowed to filter, index and convert format
+
+.. _Bowtie2: https://bowtie-bio.sourceforge.net/bowtie2/index.shtml
+.. _Samtools: http://www.htslib.org/
+    
+*Params:* {{ snakemake.config["bowtie"]["extra"] }}
+
+*Version:* 
+
+- Bowtie2: nodes/bowtie2-2.4.2
+- Samtools: nodes/samtools-1.11
+
+*Cite:* 
+
+- Bowtie2: Langmead B, Salzberg S. Fast gapped-read alignment with Bowtie 2. Nature Methods. 2012, 9:357-359.
+- Samtools: Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R, and 1000 Genome Project Data Processing Subgroup, The Sequence alignment/map (SAM) format and SAMtools, Bioinformatics (2009) 25(16) 2078-9
+
+
+## Step3: featureCounts
+
+Using FeatureCounts_ to quantify the level of gene expression
+
+.. _FeatureCounts: https://subread.sourceforge.net/featureCounts.html
+
+*Params:* {{ snakemake.config["featureCounts"]["extra"] }}                             
+                                                                                 
+*Version:* subread/subread-1.5.2
+
+*Cite:* Liao Y, Smyth GK and Shi W (2014). featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics, 30(7):923-30. 
+
+
+## Step4: Multiqc
+
+With Multiqc_ combine all results in html (easy-to-use)
+
+.. _Multiqc: https://multiqc.info/
+
+*Params:* default parameters                      
+                                                                                  
+*Version:* nodes/multiqc-1.9
+
+
+*Cite:*  MultiQC: Summarize analysis results for multiple tools and samples in a single report Philip Ewels, Måns Magnusson, Sverker Lundin and Max Käller. Bioinformatics (2016) 
+
+
+# Pipeline 
+
+- Snakemake_ : Mölder, F., Jablonski, K.P., Letcher, B., Hall, M.B., Tomkins-Tinch, C.H., Sochat, V., Forster, J., Lee, S., Twardziok, S.O., Kanitz, A., Wilm, A., Holtgrewe, M., Rahmann, S., Nahnsen, S., Köster, J., 2021. Sustainable data analysis with Snakemake. F1000Res 10, 33.
+- Conda_
+
+.. _Snakemake: https://snakemake.readthedocs.io/en/stable/
+.. _Conda: https://docs.conda.io/en/latest/
diff --git a/exercise2_advanced/codes/report/bowtie2.rst b/exercise2_advanced/codes/report/bowtie2.rst
new file mode 100644
index 0000000..91df891
--- /dev/null
+++ b/exercise2_advanced/codes/report/bowtie2.rst
@@ -0,0 +1 @@
+Bowtie2 and samtools
diff --git a/exercise2_advanced/codes/report/fastqc.rst b/exercise2_advanced/codes/report/fastqc.rst
new file mode 100644
index 0000000..25accd6
--- /dev/null
+++ b/exercise2_advanced/codes/report/fastqc.rst
@@ -0,0 +1 @@
+Running fastqc with default params
diff --git a/exercise2_advanced/codes/report/featureCounts.rst b/exercise2_advanced/codes/report/featureCounts.rst
new file mode 100644
index 0000000..d27475a
--- /dev/null
+++ b/exercise2_advanced/codes/report/featureCounts.rst
@@ -0,0 +1 @@
+FeatureCounts from Subread 
diff --git a/exercise2_advanced/codes/report/multiqc.rst b/exercise2_advanced/codes/report/multiqc.rst
new file mode 100644
index 0000000..06ad9d0
--- /dev/null
+++ b/exercise2_advanced/codes/report/multiqc.rst
@@ -0,0 +1 @@
+Runiing with default params
diff --git a/exercise2_advanced/configfile.yaml b/exercise2_advanced/configfile.yaml
new file mode 100644
index 0000000..b85cf09
--- /dev/null
+++ b/exercise2_advanced/configfile.yaml
@@ -0,0 +1,18 @@
+samples:
+    SRR3099585_chr18: Data/SRR3099585_chr18.fastq.gz
+    SRR3099586_chr18: Data/SRR3099586_chr18.fastq.gz
+    SRR3099587_chr18: Data/SRR3099587_chr18.fastq.gz
+    SRR3105697_chr18: Data/SRR3105697_chr18.fastq.gz
+    SRR3105698_chr18: Data/SRR3105698_chr18.fastq.gz
+    SRR3105699_chr18: Data/SRR3105699_chr18.fastq.gz
+references:
+    annot: "Data/O.tauri_annotation.gff"
+    fasta: "Data/O.tauri_genome.fna"
+bowtie:
+    idx: "bowtie2index/Otauri"
+    idxthreads: 1
+    extra: ""
+    alignthreads: 8
+featureCounts:
+    extra: '-t "gene" -g "ID" -s "2"'
+tempDir: "/tmp"
diff --git a/exercise2_advanced/readme_runSnake.txt b/exercise2_advanced/readme_runSnake.txt
new file mode 100644
index 0000000..cb7a959
--- /dev/null
+++ b/exercise2_advanced/readme_runSnake.txt
@@ -0,0 +1,169 @@
+# Snakemake pipeline presentation
+
+## The code
+
+The pipeline's `code` folder contains the following arborescence of files:
+
+```text
+├── Snakefile
+├── profile
+│   └── config.yaml
+└── report
+    ├── alignPipeline.rst
+    ├── bowtie2.rst
+    ├── fastqc.rst
+    ├── featureCounts.rst
+    └── multiqc.rst
+```
+
+The Snakefile is the file that contains all of Snakemake's rules. The configuration file `profile/config.yaml` 
+is a way of specifying options that you would otherwise add to the snakemake command line. The `report` folder
+contains a set of hand-written report templates for each tool or method that was used and that will later
+be incorporated into a final report file generated by Snakemake.
+
+NB: this Snakefile was reformatted using the snakefmt tool (v0.10.0) in order to comply with the commonly 
+accepted (and encouraged) Snakemake formatting standards
+
+## The required data (to download)
+
+This pipeline runs on single-end bulk RNA-seq data and outputs quality reports on the samples provided as well as tables of
+gene expression counts per sample.
+
+Example input files can be found under (this link)[https://doi.org/10.5281/zenodo.8340293] (same as in Exercises 1A, 1B and 1C, see (this page)[https://bioi2.i2bc.paris-saclay.fr/training/snakemake/exercises]):
+
+```text
+Data/
+├── O.tauri_annotation.gff
+├── O.tauri_genome.fna
+├── SRR3099585_chr18.fastq.gz
+├── SRR3099586_chr18.fastq.gz
+├── SRR3099587_chr18.fastq.gz
+├── SRR3105697_chr18.fastq.gz
+├── SRR3105698_chr18.fastq.gz
+└── SRR3105699_chr18.fastq.gz
+```
+
+They contain 6 fastq files (chromosome 18 from SRR309958[5,6,7] and SRR310569[7,8,9]), as well as 
+the genome fasta of their reference species *O. tauri* and its annotation.
+
+# Usage
+
+**Requirements:**
+- tools: snakemake, fastqc, bowtie, featureCounts, multiqc
+- data: fastq files and their corresponding reference genome
+
+**Before you run the pipeline:**
+- edit the line `cd /path/to/the/project/` in the `runRnaseq.sh` file to your actual project folder e.g. `/path/to/snakemake_examples/exercise2_advanced/`
+- edit the configuration file `configfile.yaml` so that the paths to the various samples and reference files are correct (a good habit is to use absolute paths) so that Snakemake can find them
+
+For example, if my working directories look like this:
+
+```text
+/home/john.doe/snakemake_tutorial/Data/
+├── O.tauri_annotation.gff
+├── O.tauri_genome.fna
+├── SRR3099585_chr18.fastq.gz
+├── SRR3099586_chr18.fastq.gz
+├── SRR3099587_chr18.fastq.gz
+├── SRR3105697_chr18.fastq.gz
+├── SRR3105698_chr18.fastq.gz
+└── SRR3105699_chr18.fastq.gz
+```
+and
+```text
+/home/john.doe/snakemake_examples/exercise2_advanced/code/
+├── Snakefile
+├── profile
+│   └── config.yaml
+└── report
+    ├── alignPipeline.rst
+    ├── bowtie2.rst
+    ├── fastqc.rst
+    ├── featureCounts.rst
+    └── multiqc.rst
+```
+
+Then my configuration file should look like this (for example):
+
+```text
+samples:
+    SRR3099585_chr18: /home/john.doe/snakemake_tutorial/Data/SRR3099585_chr18.fastq.gz
+    SRR3099586_chr18: /home/john.doe/snakemake_tutorial/Data/SRR3099586_chr18.fastq.gz
+    SRR3099587_chr18: /home/john.doe/snakemake_tutorial/Data/SRR3099587_chr18.fastq.gz
+    SRR3105697_chr18: /home/john.doe/snakemake_tutorial/Data/SRR3105697_chr18.fastq.gz
+    SRR3105698_chr18: /home/john.doe/snakemake_tutorial/Data/SRR3105698_chr18.fastq.gz
+    SRR3105699_chr18: /home/john.doe/snakemake_tutorial/Data/SRR3105699_chr18.fastq.gz
+references:
+    annot: "/home/john.doe/snakemake_tutorial/Data/O.tauri_annotation.gff"
+    fasta: "/home/john.doe/snakemake_tutorial/Data/O.tauri_genome.fna"
+bowtie:
+    idx: "/home/john.doe/snakemake_examples/exercise2_advanced/result/bowtie2index/Otauri"
+    idxthreads: 1
+    extra: ""
+    alignthreads: 8
+featureCounts:
+    extra: '-t "gene" -g "ID" -s "2"'
+tempDir: "/tmp"
+```
+
+**Running the pipeline:**
+
+Outputs will be generated in the current working directory (i.e. the directory in which you
+are when you run the Snakemake command), within a `result` folder. 
+
+To use this Snakemake pipeline (tested with snakemake version 8.4.6) on the I2BC cluster you can use the little bash script provided:
+
+```bash
+qsub -V -l walltime=4:00:00 -l select=1:ncpus=1:mem=100mb runRnaseq.sh
+```
+
+If you have a look at the runRnaseq.sh script, you will see that we run snakemake twice: the first time to run the pipeline and 
+generate the output files; the second time to create the final report using the `--report myFinalReport.html` option.
+
+
+# Output
+
+```text
+result/
+├── benchmark
+├── bowtie
+├── fastqc
+├── featureCounts
+├── logs
+├── multiqc_data
+└── multiqc_report.html
+```
+
+
+# Les fonctions utilisées
+
+## Fonctions générales
+
+- envmodules : pour spécifier les environnements utilisés pour chaque règles (https://snakemake.readthedocs.io/en/stable/snakefiles/deployment.html#using-environment-modules)
+- wildcard_constraints : permet une verification de la valeur des variables (https://snakemake.readthedocs.io/en/stable/tutorial/additional_features.html#constraining-wildcards)
+
+## rule fastqc
+
+- Utilisation d'une fonction au lieu d'un nom de fichier pour l'input (https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#input-functions)
+- Utilisation de la fonction report pour créer un rapport après exécution du script (https://snakemake.readthedocs.io/en/stable/snakefiles/reporting.html#reports)
+- multiext : variant de la fonction expand (https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#the-multiext-function)
+
+## rule bowtieIndex
+
+- lookup : https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#the-lookup-function
+- ensure : vérification si le fichier est vide (https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#ensuring-output-file-properties-like-non-emptyness-or-checksum-compliance)
+- multiext : variant de la fonction expand (https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#the-multiext-function)
+
+## rule bowtie2 et samtoolsSort
+
+- Utilisation d'une fonction au lieu d'un nom de fichier pour l'input (https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#input-functions)
+- pipe : permet de remplacer l'utilisation de pipe et de scinder les différents éléments de la ligne de commande en plusieurs règles (https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#piped-output)
+- config.get : permet de mettre une valeur par défaut si elle manque dans le fichier de configuration (NB: c'est une fonction de Python qui marche ici car config est un dictionnaire)
+
+## rule extractCounts
+
+- temp : suppression des fichiers une fois qu'ils ne sont plus utils (https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#protected-and-temporary-files)
+
+## rule matrixCounts
+
+- On peut utiliser autant de wildcards que l'on veut. Dans cette règle 2 wildcards sont utilisées (sample et colExtra).
diff --git a/exercise2_advanced/runRnaseq.sh b/exercise2_advanced/runRnaseq.sh
new file mode 100644
index 0000000..0b9b631
--- /dev/null
+++ b/exercise2_advanced/runRnaseq.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+module load snakemake/snakemake-8.4.6
+
+cd /path/to/the/project/
+
+# create directory for the pbs logs
+mkdir pbs
+
+snakemake --executor cluster-generic --cluster-generic-submit-cmd "qsub -V -l walltime={resources.time_min} -l select=1:ncpus={threads}:mem={resources.mem} -e pbs/{name}_$PBS_JOBID.err -o pbs/{name}_$PBS_JOBID.out" --snakefile codes/Snakefile --profile codes/profile --configfile configfile.yaml &> smkpipeline.txt
+
+snakemake --executor cluster-generic --cluster-generic-submit-cmd "qsub -V -l walltime={resources.time_min} -l select=1:ncpus={threads}:mem={resources.mem}" --snakefile codes/Snakefile --profile codes/profile --configfile configfile.yaml --report smkRnaseq_report.html