writer now correctly writes metadata

jdoughertyii · Jan 20, 2012 · 65e2b66 · 65e2b66
1 parent f9150ed
commit 65e2b66
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 7 deletions.
diff --git a/README.rst b/README.rst
@@ -39,7 +39,7 @@ plus three more attributes to handle genotype information:
 
     * ``Record.FORMAT``
     * ``Record.samples``
-    * ``Record.genotypes``
+    * ``Record.genotype``
 
 ``samples`` and ``genotypes``, not being the title of any column, is left lowercase.  The format
 of the fixed fields is from the spec.  Comma-separated lists in the VCF are
@@ -60,16 +60,16 @@ a ``True`` value. Integers and floats are handled exactly as you'd expect::
 ``record.FORMAT`` will be a string specifying the format of the genotype
 fields.  In case the FORMAT column does not exist, ``record.FORMAT`` is
 ``None``.  Finally, ``record.samples`` is a list of dictionaries containing the
-parsed sample column and ``record.genotypes`` is a dictionary of sample names
-to genotype data::
+parsed sample column and ``record.genotype`` is a way of looking up genotypes
+by sample name::
 
     >>> record = vcf_reader.next()
     >>> for sample in record.samples:
     ...     print sample['GT']
     0|0
     0|1
     0/0
-    >>> print record.genotypes['NA00001']['GT']
+    >>> print record.genotype('NA00001')['GT']
     0|0
 
 Metadata regarding the VCF file itself can be investigated through the
@@ -93,7 +93,7 @@ For example::
     'Ancestral Allele'
 
 Random access is supported for files with tabix indexes.  Simply call fetch for the
-region you are interested in:
+region you are interested in::
 
     >>> vcf_reader = vcf.Reader(filename='test/tb.vcf.gz')
     >>> for record in vcf_reader.fetch('20', 1110696-1, 1230237):

diff --git a/vcf.py b/vcf.py
@@ -470,8 +470,35 @@ class Writer(object):
     def __init__(self, stream, template):
         self.writer = csv.writer(stream, delimiter="\t")
         self.template = template
-        for line in template._header_lines:
-            stream.write(line)
+
+        for line in template.metadata.items():
+            stream.write('##%s=%s\n' % line)
+        for line in template.infos.values():
+            stream.write('##INFO=<ID=%s,Number=%s,Type=%s,Description="%s">\n' % line)
+        for line in template.formats.values():
+            stream.write('##FORMAT=<ID=%s,Number=%s,Type=%s,Description="%s">\n' % line)
+
+        for line in template.filters.values():
+            stream.write('##FILTER=<ID=%s,Description="%s">\n' % line)
+
+        self.info_pattern = re.compile(r'''\#\#INFO=<
+            ID=(?P<id>[^,]+),
+            Number=(?P<number>-?\d+|\.|[AG]),
+            Type=(?P<type>Integer|Float|Flag|Character|String),
+            Description="(?P<desc>[^"]*)"
+            >''', re.VERBOSE)
+        self.filter_pattern = re.compile(r'''\#\#FILTER=<
+            ID=(?P<id>[^,]+),
+            Description="(?P<desc>[^"]*)"
+            >''', re.VERBOSE)
+        self.format_pattern = re.compile(r'''\#\#FORMAT=<
+            ID=(?P<id>.+),
+            Number=(?P<number>-?\d+|\.|[AG]),
+            Type=(?P<type>.+),
+            Description="(?P<desc>.*)"
+            >''', re.VERBOSE)
+        self.meta_pattern = re.compile(r'''##(?P<key>.+)=(?P<val>.+)''')
+
         self.write_header()
 
     def write_header(self):