commit dc329d66d7c21c35ece7c6cb564de7386a206c1f
Author: Chloé QUIGNOT <chloe.quignot@i2bc.paris-saclay.fr>
Date:   Tue Jun 18 15:58:46 2024 +0200

    add exercise 0 improved after applying what was learnt in exercise 1B

diff --git a/exercise0_improved_after_1B/Snakefile b/exercise0_improved_after_1B/Snakefile
new file mode 100644
index 0000000..8e270b9
--- /dev/null
+++ b/exercise0_improved_after_1B/Snakefile
@@ -0,0 +1,65 @@
+# Update 1: integrate Python skills within the SnakeFile using
+#           the yaml module to read a samples.yaml file to import
+#           your Uniprot ids as a list
+#           NB: it's always a good idea to separate generalised 
+#           code and project-specific variable values
+import yaml
+
+with open('samples.yaml', 'r') as file
+   content = yaml.safe_load(file)
+samples = content['samples']
+
+
+rule targets:
+    input:
+        expand("fasta/{sample}.fasta", sample=samples),
+        "fusionFasta/allSequences.fasta",
+        "mafft/mafft_res.fasta",
+
+
+# Update 2: add params directive to specify the output directory
+# Update 3: add log directives to loadData, fusionFasta and mafft 
+#           rules to specify in which files standard output and  
+#           standard error are saved & integrate the necessary
+#           wildcards within the shell directives
+rule loadData:
+    output:
+        "fasta/{sample}.fasta",
+    params:
+        dirFasta = "fasta",      
+    log:
+        stdout = "logs/{sample}_wget.stdout",
+        stderr = "logs/{sample}_wget.stderr",
+    shell:
+        """
+            wget --output-file {log.stderr} \
+               --directory-prefix {params.dirFasta} \
+               https://www.uniprot.org/uniprot/{wildcards.sample}.fasta > {log.stdout}
+        """
+
+
+rule fusionFasta:
+    input:
+        expand("fasta/{sample}.fasta", sample=samples),
+    output:
+        "fusionFasta/allSequences.fasta",
+    log:
+        "logs/fusionData.stderr",
+    shell:
+        """
+            cat {input} > {output} 2> {log}
+        """
+
+
+rule mafft:
+    input: 
+        "fusionFasta/allSequences.fasta",
+    output:
+        "mafft/mafft_res.fasta",
+    log:
+        "logs/whichMafft.txt",        
+    shell:
+        """
+            mafft {input} > {output} 2> {log}
+        """
+
diff --git a/exercise0_improved_after_1B/readme_runSnake.txt b/exercise0_improved_after_1B/readme_runSnake.txt
new file mode 100644
index 0000000..f2a84f3
--- /dev/null
+++ b/exercise0_improved_after_1B/readme_runSnake.txt
@@ -0,0 +1,8 @@
+Pour faire fonctionner le pipeline il faut se connecter sur un noeud du cluster puis:
+
+- charger l'environnement snakemake:
+module load snakemake/snakemake-8.4.6
+module load nodes/mafft-7.475
+
+- executer le programme, se placer dans ce dossier et:
+snakemake --cores 1 
diff --git a/exercise0_improved_after_1B/samples.yaml b/exercise0_improved_after_1B/samples.yaml
new file mode 100644
index 0000000..536674d
--- /dev/null
+++ b/exercise0_improved_after_1B/samples.yaml
@@ -0,0 +1 @@
+samples: ["P01325", "P01308"]
\ No newline at end of file