commit 66c1b9caa07d80cda3d214351e20c3d2f1fb6664
Author: Chloé QUIGNOT <chloe.quignot@i2bc.paris-saclay.fr>
Date:   Tue Jun 18 15:43:17 2024 +0200

    add exercise 0 improved after applying what was learnt in exercise 1A

diff --git a/exercise0_improved_after_1A/Snakefile b/exercise0_improved_after_1A/Snakefile
new file mode 100644
index 0000000..b7321ba
--- /dev/null
+++ b/exercise0_improved_after_1A/Snakefile
@@ -0,0 +1,53 @@
+samples=["P01325", "P01308"]
+
+# Update 1: we created a target rule containing the final
+#           output and intermediate outputs of our pipeline
+# Update 2: we used the expand function to generalise our target
+#           rule, reading from a Python list of UniProt ids
+rule targets:
+    input:
+        expand("fasta/{sample}.fasta", sample=samples),
+        "fusionFasta/allSequences.fasta",
+        "mafft/mafft_res.fasta",
+
+
+# Update 3: we generalised the loadData rule using a wildcard 
+#           called "sample" and we access it within the shell
+#           directive using the wildcards variable 
+#           i.e. {wildcards.sample}
+rule loadData:
+    output:
+        "fasta/{sample}.fasta",
+    shell:
+        """
+            wget --directory-prefix $(dirname {output}) \
+               https://www.uniprot.org/uniprot/{wildcards.sample}.fasta
+        """
+
+
+# Update 4: we generalised the fusionFasta rule using the
+#           the expand function, similar to the target rule
+rule fusionFasta:
+    input:
+        expand("fasta/{sample}.fasta", sample=samples),
+    output:
+        "fusionFasta/allSequences.fasta",
+    shell:
+        """
+            cat {input} > {output}
+        """
+
+
+# Update 5: the mafft rule will be executed even if it's at
+#           the end of the code, because the target rule 
+#           specifies the final outputs that we expect
+rule mafft:
+    input: 
+        "fusionFasta/allSequences.fasta",
+    output:
+        "mafft/mafft_res.fasta",
+    shell:
+        """
+            mafft {input} > {output}
+        """
+
diff --git a/exercise0_improved_after_1A/readme_runSnake.txt b/exercise0_improved_after_1A/readme_runSnake.txt
new file mode 100644
index 0000000..f2a84f3
--- /dev/null
+++ b/exercise0_improved_after_1A/readme_runSnake.txt
@@ -0,0 +1,8 @@
+Pour faire fonctionner le pipeline il faut se connecter sur un noeud du cluster puis:
+
+- charger l'environnement snakemake:
+module load snakemake/snakemake-8.4.6
+module load nodes/mafft-7.475
+
+- executer le programme, se placer dans ce dossier et:
+snakemake --cores 1