From c602ddef45e5f4734b38ccaf36ad766ac2978273 Mon Sep 17 00:00:00 2001
From: Ryan Dale <dalerr@niddk.nih.gov>
Date: Mon, 30 Apr 2018 09:52:22 -0400
Subject: Support newer bedtools version

---
--- python-gffutils.orig/gffutils/iterators.py
+++ python-gffutils/gffutils/iterators.py
@@ -115,6 +115,7 @@
     Subclass for iterating over features provided as a filename
     """
     def open_function(self, data):
+        data = os.path.expanduser(data)
         if data.endswith('.gz'):
             import gzip
             return gzip.open(data)
--- python-gffutils.orig/gffutils/test/test.py
+++ python-gffutils/gffutils/test/test.py
@@ -880,7 +880,7 @@
     assert len(filelist) == 1, filelist
     assert filelist[0].endswith('.gffutils')
 
-    #...and another one for gff. This time, make sure the suffix 
+    #...and another one for gff. This time, make sure the suffix
     db = gffutils.create_db(
         gffutils.example_filename('FBgn0031208.gff'), ':memory:', _keep_tempfiles=True)
     filelist = os.listdir(tempdir)
@@ -1006,7 +1006,7 @@
 
 
 def test_deprecation_handler():
-    return 
+    return
 
     # TODO: when infer_gene_extent actually gets deprecated, test here.
     assert_raises(ValueError, gffutils.create_db,
@@ -1141,7 +1141,7 @@
     assert f.attributes['null'][0] == '\x00'
     assert f.attributes['comma'][0] == ','
 
-    # Commas indicate 
+    # Commas indicate
     assert f.attributes['Parent'] == ['A,', 'B%', 'C']
     assert str(f) == s
 
@@ -1174,6 +1174,18 @@
     assert db['e']['Note'] == [',']
     assert db['f']['Note'] == [',']
 
+
+def test_issue_105():
+    fn = gffutils.example_filename('FBgn0031208.gtf')
+    home = os.path.expanduser('~')
+    newfn = os.path.join(home, '.gffutils.test')
+    with open(newfn, 'w') as fout:
+        fout.write(open(fn).read())
+    f = gffutils.iterators.DataIterator(newfn)
+    for i in f:
+        pass
+    os.unlink(newfn)
+
 if __name__ == "__main__":
     # this test case fails
     #test_attributes_modify()
--- python-gffutils.orig/gffutils/pybedtools_integration.py
+++ python-gffutils/gffutils/pybedtools_integration.py
@@ -2,11 +2,13 @@
 Module for integration with pybedtools
 """
 
+import os
 import pybedtools
 from pybedtools import featurefuncs
 from gffutils import helpers
 import six
 
+
 def to_bedtool(iterator):
     """
     Convert any iterator into a pybedtools.BedTool object.
@@ -22,7 +24,7 @@
 
 
 def tsses(db, merge_overlapping=False, attrs=None, attrs_sep=":",
-          merge_kwargs=dict(o='distinct', s=True, c=4), as_bed6=False):
+          merge_kwargs=None, as_bed6=False, bedtools_227_or_later=True):
     """
     Create 1-bp transcription start sites for all transcripts in the database
     and return as a sorted pybedtools.BedTool object pointing to a temporary
@@ -74,13 +76,21 @@
         attributes is supplied, e.g. ["gene_id", "transcript_id"], then these
         will be joined by `attr_join_sep` and then placed in the name field.
 
-
     attrs_sep: str
         If `as_bed6=True` or `merge_overlapping=True`, then use this character
         to separate attributes in the name field of the output BED. If also
         using `merge_overlapping=True`, you'll probably want this to be
         different than `merge_sep` in order to parse things out later.
 
+    bedtools_227_or_later : bool
+        In version 2.27, BEDTools changed the output for merge. By default,
+        this function expects BEDTools version 2.27 or later, but set this to
+        False to assume the older behavior.
+
+        For testing purposes, the environment variable
+        GFFUTILS_USES_BEDTOOLS_227_OR_LATER is set to either "true" or "false"
+        and is used to override this argument.
+
     Examples
     --------
 
@@ -146,7 +156,22 @@
 
 
     """
-    _merge_kwargs = dict(o='distinct', s=True, c=4)
+    _override = os.environ.get('GFFUTILS_USES_BEDTOOLS_227_OR_LATER', None)
+    if _override is not None:
+        if _override == 'true':
+            bedtools_227_or_later = True
+        elif _override == 'false':
+            bedtools_227_or_later = False
+        else:
+            raise ValueError(
+                "Unknown value for GFFUTILS_USES_BEDTOOLS_227_OR_LATER "
+                "environment variable: {0}".format(_override))
+
+    if bedtools_227_or_later:
+        _merge_kwargs = dict(o='distinct', s=True, c='4,5,6')
+    else:
+        _merge_kwargs = dict(o='distinct', s=True, c='4')
+
     if merge_kwargs is not None:
         _merge_kwargs.update(merge_kwargs)
 
@@ -195,18 +220,18 @@
         x = x.each(to_bed).saveas()
 
     if merge_overlapping:
-
-        def fix_merge(f):
-            f = featurefuncs.extend_fields(f, 6)
-            return pybedtools.Interval(
-                f.chrom,
-                f.start,
-                f.stop,
-                f[4],
-                '.',
-                f[3])
-
-        x = x.merge(**_merge_kwargs).each(fix_merge).saveas()
-
+        if bedtools_227_or_later:
+            x = x.merge(**_merge_kwargs)
+        else:
+            def fix_merge(f):
+                f = featurefuncs.extend_fields(f, 6)
+                return pybedtools.Interval(
+                    f.chrom,
+                    f.start,
+                    f.stop,
+                    f[4],
+                    '.',
+                    f[3])
+            x = x.merge(**_merge_kwargs).saveas().each(fix_merge).saveas()
 
     return x
