diff --git a/sbol_utilities/excel_to_sbol.py b/sbol_utilities/excel_to_sbol.py
index b57a60ab..eec555d9 100644
--- a/sbol_utilities/excel_to_sbol.py
+++ b/sbol_utilities/excel_to_sbol.py
@@ -6,8 +6,8 @@
 
 import sbol3
 import openpyxl
-import tyto
-from .helper_functions import toplevel_named, strip_sbol2_version, type_to_standard_extension, is_plasmid
+from .helper_functions import toplevel_named, strip_sbol2_version, type_to_standard_extension, is_plasmid, \
+    tyto_lookup_with_caching, string_to_display_id, url_to_identity, strip_filetype_suffix
 
 BASIC_PARTS_COLLECTION = 'BasicParts'
 COMPOSITE_PARTS_COLLECTION = 'CompositeParts'
@@ -68,24 +68,6 @@ def expand_configuration(values: dict) -> dict:
     return values_to_use
 
 
-# TODO: remove after resolution of https://github.com/SynBioDex/pySBOL3/issues/191
-def string_to_display_id(name):
-    def sanitize_character(c):
-        replacements = {' ': '_', '-': '_', '.': '_'}
-        c = replacements.get(c, c)  # first, see if there is a wired replacement
-        if c.isalnum() or c == '_':  # keep allowed characters
-            return c
-        else:  # all others are changed into a reduced & compatible form of their unicode name
-            return f'_{unicodedata.name(c).replace(" SIGN","").replace(" ","_")}'
-
-    # make replacements in order to get a compliant displayID
-    display_id = "".join([sanitize_character(c) for c in name.strip()])
-    # prepend underscore if there is an initial digit
-    if display_id[0].isdigit():
-        display_id = "_"+display_id
-    return display_id
-
-
 def read_metadata(wb: openpyxl.Workbook, doc: sbol3.Document, config: dict):
     """
     Extract metadata and build collections
@@ -123,26 +105,13 @@ def read_metadata(wb: openpyxl.Workbook, doc: sbol3.Document, config: dict):
     doc.add(final_products)
 
     # also collect any necessary data tables from extra sheets
-    source_table = {row[config['source_name_col']].value:row[config['source_uri_col']].value
+    source_table = {row[config['source_name_col']].value: row[config['source_uri_col']].value
                     for row in wb[config['sources_sheet']].iter_rows(min_row=config['sources_first_row'])
                     if row[config['source_literal_col']].value}
 
     # return the set of created collections
     return basic_parts, composite_parts, linear_products, final_products, source_table
 
-# TODO: remove kludge after resolution of https://github.com/SynBioDex/tyto/issues/21
-tyto_cache = {}
-def tyto_lookup_with_caching(term: str) -> str:
-    if term not in tyto_cache:
-        try:
-            tyto_cache[term] = tyto.SO.get_uri_by_term(term)
-        except LookupError as e:
-            tyto_cache[term] = e
-    if isinstance(tyto_cache[term], LookupError):
-        raise tyto_cache[term]
-    else:
-        return tyto_cache[term]
-
 
 def row_to_basic_part(doc: sbol3.Document, row, basic_parts: sbol3.Collection, linear_products: sbol3.Collection,
                       final_products: sbol3.Collection, config: dict, source_table: dict):
@@ -161,8 +130,8 @@ def row_to_basic_part(doc: sbol3.Document, row, basic_parts: sbol3.Collection, l
     name = row[config['basic_name_col']].value
     if name is None:
         return  # skip lines without names
-    try:
-        raw_role = row[config['basic_role_col']].value  # look up with tyto; if fail, leave blank or add to description
+    raw_role = row[config['basic_role_col']].value
+    try:  # look up with tyto; if fail, leave blank or add to description
         role = (tyto_lookup_with_caching(raw_role) if raw_role else None)
     except LookupError:
         logging.warning(f'Role "{raw_role}" could not be found in Sequence Ontology')
@@ -181,6 +150,8 @@ def row_to_basic_part(doc: sbol3.Document, row, basic_parts: sbol3.Collection, l
 
     # identity comes from source if set to a literal table, from display_id if not set
     identity = None
+    display_id = None
+    was_derived_from = None
     if source_id and source_prefix:
         source_prefix = source_prefix.strip()
         if source_prefix in source_table:
@@ -188,7 +159,9 @@ def row_to_basic_part(doc: sbol3.Document, row, basic_parts: sbol3.Collection, l
                 display_id = string_to_display_id(source_id.strip())
                 identity = f'{source_table[source_prefix]}/{display_id}'
             else:  # when there is no prefix, use the bare value (in SBOL3 format)
-                identity = strip_sbol2_version(source_id.strip())
+                raw_url = source_id.strip()
+                identity = url_to_identity(strip_filetype_suffix(strip_sbol2_version(raw_url)))
+                was_derived_from = raw_url
         else:
             logging.info(f'Part "{name}" ignoring non-literal source: {source_prefix}')
     elif source_id:
@@ -202,6 +175,8 @@ def row_to_basic_part(doc: sbol3.Document, row, basic_parts: sbol3.Collection, l
     logging.debug(f'Creating basic part "{name}"')
     component = sbol3.Component(identity or display_id, sbol3.SBO_DNA, name=name,
                                 description=f'{design_notes}\n{description}'.strip())
+    if was_derived_from:
+        component.derived_from.append(was_derived_from)
     doc.add(component)
     if role:
         component.roles.append(role)
@@ -284,7 +259,7 @@ def make_constraint(constraint, part_list):
     try:
         restriction = constraint_dict[m.group(2)]
     except KeyError:
-        raise ValueError(f'Do not recognize constraint relation "{restriction}"')
+        raise ValueError(f'Do not recognize constraint relation in "{constraint}"')
     x = int(m.group(1))
     y = int(m.group(3))
     if x is y:
diff --git a/sbol_utilities/helper_functions.py b/sbol_utilities/helper_functions.py
index a75f9ed3..0a2f42d6 100644
--- a/sbol_utilities/helper_functions.py
+++ b/sbol_utilities/helper_functions.py
@@ -1,20 +1,23 @@
-from typing import Iterable, Union
+import unicodedata
+from typing import Iterable, Union, Optional
 
 import sbol3
-import filecmp
-import difflib
+import tyto
 
 #########################
 # Collection of shared helper functions for utilities package
 
-# Flatten list of lists into a single list
-import tyto
-
 
 def flatten(collection: Iterable[list]) -> list:
+    """Flatten list of lists into a single list
+
+    :param collection: list of lists
+    :return: flattened list
+    """
     return [item for sublist in collection for item in sublist]
 
-def toplevel_named(doc: sbol3.Document, name:str) -> sbol3.Identified:
+
+def toplevel_named(doc: sbol3.Document, name: str) -> Optional[sbol3.Identified]:
     """Find the unique TopLevel document object with the given name (rather than displayID or URI)
 
     :param doc: SBOL document to search
@@ -67,11 +70,81 @@ def strip_sbol2_version(identity: str) -> str:
     last_segment = identity.split('/')[-1]
     try:
         sbol2_version = int(last_segment)  # if last segment is a number...
-        return identity.rsplit('/',1)[0]  # ... then return everything else
+        return identity.rsplit('/', 1)[0]  # ... then return everything else
     except ValueError:  # if last segment was not a number, there is no version to strip
         return identity
 
 
+# TODO: replace with EDAM format entries when SBOL2 and SBOL3 can be differentiated
+GENETIC_DESIGN_FILE_TYPES = {
+    'FASTA': {'.fasta', '.fa'},
+    'GenBank': {'.genbank', '.gb'},
+    'SBOL2': {'.xml'},
+    'SBOL3': {sbol3.NTRIPLES: {'.nt'},
+              sbol3.RDF_XML: {'.rdf'},
+              sbol3.TURTLE: {'.ttl'},
+              sbol3.JSONLD: {'.json', '.jsonld'}
+              }
+}
+
+
+def design_file_type(name: str) -> Optional[str]:
+    """Guess a genetic design file's type from its name
+
+    :param name: file name (path allowed)
+    :return: type name (from GENETIC_DESIGN_FILE_TYPES) if known, None if not
+    """
+    for t, v in GENETIC_DESIGN_FILE_TYPES.items():
+        if isinstance(v, set):
+            if any(x for x in v if name.endswith(x)):
+                return t
+        else:  # dictionary
+            if any(sub for sub in v.values() if any(x for x in sub if name.endswith(x))):
+                return t
+    return None
+
+
+def strip_filetype_suffix(identity: str) -> str:
+    """Prettify a URL by stripping standard genetic design file type suffixes off of it
+
+    :param identity: URL to sanitize
+    :return: sanitized URL
+    """
+    extensions = flatten((flatten(v.values()) if isinstance(v, dict) else v) for v in GENETIC_DESIGN_FILE_TYPES.values())
+    for x in extensions:
+        if identity.endswith(x):
+            return identity.removesuffix(x)
+    return identity
+
+
+# TODO: remove after resolution of https://github.com/SynBioDex/pySBOL3/issues/191
+def string_to_display_id(name):
+    def sanitize_character(c):
+        replacements = {' ': '_', '-': '_', '.': '_'}
+        c = replacements.get(c, c)  # first, see if there is a wired replacement
+        if c.isalnum() or c == '_':  # keep allowed characters
+            return c
+        else:  # all others are changed into a reduced & compatible form of their unicode name
+            return f'_{unicodedata.name(c).replace(" SIGN","").replace(" ","_")}'
+
+    # make replacements in order to get a compliant displayID
+    display_id = "".join([sanitize_character(c) for c in name.strip()])
+    # prepend underscore if there is an initial digit
+    if display_id[0].isdigit():
+        display_id = "_"+display_id
+    return display_id
+
+
+def url_to_identity(url: str) -> str:
+    """Sanitize a URL string for use as an identity, turning everything after the last "/" to sanitize as a displayId
+
+    :param url: URL to sanitize
+    :return: equivalent identity
+    """
+    split = url.rsplit('/',maxsplit=1)
+    return f'{split[0]}/{string_to_display_id(split[1])}'
+
+
 def is_plasmid(obj: Union[sbol3.Component, sbol3.Feature]) -> bool:
     """Check if an SBOL Component or Feature is a plasmid-like structure, i.e., either circular or having a plasmid role
 
@@ -166,3 +239,17 @@ def replace_feature(component, old, new):
     for ct in component.constraints:
         if ct.subject == old.identity: ct.subject = new.identity
         if ct.object == old.identity: ct.object = new.identity
+
+
+# TODO: remove kludge after resolution of https://github.com/SynBioDex/tyto/issues/21
+tyto_cache = {}
+def tyto_lookup_with_caching(term: str) -> str:
+    if term not in tyto_cache:
+        try:
+            tyto_cache[term] = tyto.SO.get_uri_by_term(term)
+        except LookupError as e:
+            tyto_cache[term] = e
+    if isinstance(tyto_cache[term], LookupError):
+        raise tyto_cache[term]
+    else:
+        return tyto_cache[term]
diff --git a/setup.py b/setup.py
index 1ab171da..96a83ca4 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@
       long_description_content_type='text/markdown',
       url='https://github.com/SynBioDex/SBOL-utilities',
       license='MIT License',
-      version='1.0a8',
+      version='1.0a9',
       # See https://pypi.python.org/pypi?%3Aaction=list_classifiers
       classifiers=[
             # How mature is this project? Common values are
diff --git a/test/test_files/simple_library.nt b/test/test_files/simple_library.nt
index 6954d151..0b9b4952 100644
--- a/test/test_files/simple_library.nt
+++ b/test/test_files/simple_library.nt
@@ -671,6 +671,7 @@
 <https://synbiohub.programmingbiology.org/public/Eco1C1G1T1/LmrA> <http://sbols.org/v3#name> "LmrA" .
 <https://synbiohub.programmingbiology.org/public/Eco1C1G1T1/LmrA> <http://sbols.org/v3#type> <https://identifiers.org/SBO:0000251> .
 <https://synbiohub.programmingbiology.org/public/Eco1C1G1T1/LmrA> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://sbols.org/v3#Component> .
+<https://synbiohub.programmingbiology.org/public/Eco1C1G1T1/LmrA> <http://www.w3.org/ns/prov#wasDerivedFrom> <https://synbiohub.programmingbiology.org/public/Eco1C1G1T1/LmrA/1> .
 <https://www.ncbi.nlm.nih.gov/nuccore/ATP07149_1> <http://sbols.org/v3#description> "Cyan FP; codon optimized using IDT tool <https://www.idtdna.com/CodonOpt>\nmCerulean3" .
 <https://www.ncbi.nlm.nih.gov/nuccore/ATP07149_1> <http://sbols.org/v3#displayId> "ATP07149_1" .
 <https://www.ncbi.nlm.nih.gov/nuccore/ATP07149_1> <http://sbols.org/v3#hasNamespace> <http://sbolstandard.org/testfiles/> .
diff --git a/test/test_files/simple_library.xlsx b/test/test_files/simple_library.xlsx
index 6801f497..28eeb503 100644
Binary files a/test/test_files/simple_library.xlsx and b/test/test_files/simple_library.xlsx differ
diff --git a/test/test_files/two_backbones.nt b/test/test_files/two_backbones.nt
index aca33318..fd4b4e3c 100644
--- a/test/test_files/two_backbones.nt
+++ b/test/test_files/two_backbones.nt
@@ -58,8 +58,8 @@
 <http://sbolstandard.org/testfiles/Backbone_variants/VariableFeature1> <http://sbols.org/v3#cardinality> <http://sbols.org/v3#one> .
 <http://sbolstandard.org/testfiles/Backbone_variants/VariableFeature1> <http://sbols.org/v3#displayId> "VariableFeature1" .
 <http://sbolstandard.org/testfiles/Backbone_variants/VariableFeature1> <http://sbols.org/v3#variable> <http://sbolstandard.org/testfiles/Backbone_variants_template/LocalSubComponent1> .
-<http://sbolstandard.org/testfiles/Backbone_variants/VariableFeature1> <http://sbols.org/v3#variant> <http://sbolstandard.org/testfiles/pOpen_v4> .
 <http://sbolstandard.org/testfiles/Backbone_variants/VariableFeature1> <http://sbols.org/v3#variant> <http://sbolstandard.org/testfiles/pSB1C3> .
+<http://sbolstandard.org/testfiles/Backbone_variants/VariableFeature1> <http://sbols.org/v3#variant> <https://freegenes.github.io/genbank/BBF10K_000589> .
 <http://sbolstandard.org/testfiles/Backbone_variants/VariableFeature1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://sbols.org/v3#VariableFeature> .
 <http://sbolstandard.org/testfiles/Backbone_variants> <http://sbols.org/v3#displayId> "Backbone_variants" .
 <http://sbolstandard.org/testfiles/Backbone_variants> <http://sbols.org/v3#hasNamespace> <http://sbolstandard.org/testfiles/> .
@@ -114,8 +114,8 @@
 <http://sbolstandard.org/testfiles/BasicParts> <http://sbols.org/v3#member> <http://parts.igem.org/J364007> .
 <http://sbolstandard.org/testfiles/BasicParts> <http://sbols.org/v3#member> <http://parts.igem.org/J364009> .
 <http://sbolstandard.org/testfiles/BasicParts> <http://sbols.org/v3#member> <http://parts.igem.org/R0040> .
-<http://sbolstandard.org/testfiles/BasicParts> <http://sbols.org/v3#member> <http://sbolstandard.org/testfiles/pOpen_v4> .
 <http://sbolstandard.org/testfiles/BasicParts> <http://sbols.org/v3#member> <http://sbolstandard.org/testfiles/pSB1C3> .
+<http://sbolstandard.org/testfiles/BasicParts> <http://sbols.org/v3#member> <https://freegenes.github.io/genbank/BBF10K_000589> .
 <http://sbolstandard.org/testfiles/BasicParts> <http://sbols.org/v3#name> "Fluorescence calibration basic parts" .
 <http://sbolstandard.org/testfiles/BasicParts> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://sbols.org/v3#Collection> .
 <http://sbolstandard.org/testfiles/CompositeParts> <http://sbols.org/v3#description> "Designs for round 1 if iGEM 2021 calibration experimentation" .
@@ -182,8 +182,8 @@
 <http://sbolstandard.org/testfiles/Two_by_six/VariableFeature2> <http://sbols.org/v3#cardinality> <http://sbols.org/v3#one> .
 <http://sbolstandard.org/testfiles/Two_by_six/VariableFeature2> <http://sbols.org/v3#displayId> "VariableFeature2" .
 <http://sbolstandard.org/testfiles/Two_by_six/VariableFeature2> <http://sbols.org/v3#variable> <http://sbolstandard.org/testfiles/Two_by_six_template/LocalSubComponent2> .
-<http://sbolstandard.org/testfiles/Two_by_six/VariableFeature2> <http://sbols.org/v3#variant> <http://sbolstandard.org/testfiles/pOpen_v4> .
 <http://sbolstandard.org/testfiles/Two_by_six/VariableFeature2> <http://sbols.org/v3#variant> <http://sbolstandard.org/testfiles/pSB1C3> .
+<http://sbolstandard.org/testfiles/Two_by_six/VariableFeature2> <http://sbols.org/v3#variant> <https://freegenes.github.io/genbank/BBF10K_000589> .
 <http://sbolstandard.org/testfiles/Two_by_six/VariableFeature2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://sbols.org/v3#VariableFeature> .
 <http://sbolstandard.org/testfiles/Two_by_six> <http://sbols.org/v3#displayId> "Two_by_six" .
 <http://sbolstandard.org/testfiles/Two_by_six> <http://sbols.org/v3#hasNamespace> <http://sbolstandard.org/testfiles/> .
@@ -246,14 +246,6 @@
 <http://sbolstandard.org/testfiles/Two_by_six_template> <http://sbols.org/v3#hasNamespace> <http://sbolstandard.org/testfiles/> .
 <http://sbolstandard.org/testfiles/Two_by_six_template> <http://sbols.org/v3#type> <https://identifiers.org/SBO:0000251> .
 <http://sbolstandard.org/testfiles/Two_by_six_template> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://sbols.org/v3#Component> .
-<http://sbolstandard.org/testfiles/pOpen_v4> <http://sbols.org/v3#description> "" .
-<http://sbolstandard.org/testfiles/pOpen_v4> <http://sbols.org/v3#displayId> "pOpen_v4" .
-<http://sbolstandard.org/testfiles/pOpen_v4> <http://sbols.org/v3#hasNamespace> <http://sbolstandard.org/testfiles/> .
-<http://sbolstandard.org/testfiles/pOpen_v4> <http://sbols.org/v3#name> "pOpen_v4" .
-<http://sbolstandard.org/testfiles/pOpen_v4> <http://sbols.org/v3#role> <https://identifiers.org/SO:0000155> .
-<http://sbolstandard.org/testfiles/pOpen_v4> <http://sbols.org/v3#type> <https://identifiers.org/SBO:0000251> .
-<http://sbolstandard.org/testfiles/pOpen_v4> <http://sbols.org/v3#type> <https://identifiers.org/SO:0000988> .
-<http://sbolstandard.org/testfiles/pOpen_v4> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://sbols.org/v3#Component> .
 <http://sbolstandard.org/testfiles/pSB1C3> <http://sbols.org/v3#description> "pMB1 replicon, 100-300 copy" .
 <http://sbolstandard.org/testfiles/pSB1C3> <http://sbols.org/v3#displayId> "pSB1C3" .
 <http://sbolstandard.org/testfiles/pSB1C3> <http://sbols.org/v3#hasNamespace> <http://sbolstandard.org/testfiles/> .
@@ -262,3 +254,12 @@
 <http://sbolstandard.org/testfiles/pSB1C3> <http://sbols.org/v3#type> <https://identifiers.org/SBO:0000251> .
 <http://sbolstandard.org/testfiles/pSB1C3> <http://sbols.org/v3#type> <https://identifiers.org/SO:0000988> .
 <http://sbolstandard.org/testfiles/pSB1C3> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://sbols.org/v3#Component> .
+<https://freegenes.github.io/genbank/BBF10K_000589> <http://sbols.org/v3#description> "" .
+<https://freegenes.github.io/genbank/BBF10K_000589> <http://sbols.org/v3#displayId> "BBF10K_000589" .
+<https://freegenes.github.io/genbank/BBF10K_000589> <http://sbols.org/v3#hasNamespace> <http://sbolstandard.org/testfiles/> .
+<https://freegenes.github.io/genbank/BBF10K_000589> <http://sbols.org/v3#name> "pOpen_v4" .
+<https://freegenes.github.io/genbank/BBF10K_000589> <http://sbols.org/v3#role> <https://identifiers.org/SO:0000155> .
+<https://freegenes.github.io/genbank/BBF10K_000589> <http://sbols.org/v3#type> <https://identifiers.org/SBO:0000251> .
+<https://freegenes.github.io/genbank/BBF10K_000589> <http://sbols.org/v3#type> <https://identifiers.org/SO:0000988> .
+<https://freegenes.github.io/genbank/BBF10K_000589> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://sbols.org/v3#Component> .
+<https://freegenes.github.io/genbank/BBF10K_000589> <http://www.w3.org/ns/prov#wasDerivedFrom> <https://freegenes.github.io/genbank/BBF10K_000589.gb> .
diff --git a/test/test_files/two_backbones.xlsx b/test/test_files/two_backbones.xlsx
index 42fc83a0..1add8b40 100644
Binary files a/test/test_files/two_backbones.xlsx and b/test/test_files/two_backbones.xlsx differ
diff --git a/test/test_helpers.py b/test/test_helpers.py
index 5757d2be..3f71b038 100644
--- a/test/test_helpers.py
+++ b/test/test_helpers.py
@@ -1,3 +1,5 @@
+import difflib
+import filecmp
 import unittest
 
 from sbol_utilities.helper_functions import *
@@ -27,11 +29,24 @@ def test_sequence_validators(self):
         assert unambiguous_protein_sequence('tklqpntvir')
         assert not unambiguous_protein_sequence('tklqxpntvir')
 
-    def test_sbol2_version_stripping(self):
+    def test_url_sanitization(self):
+        # SBOL2 version stripping:
         assert strip_sbol2_version('https://synbiohub.programmingbiology.org/public/Eco1C1G1T1/LmrA/1') == \
                'https://synbiohub.programmingbiology.org/public/Eco1C1G1T1/LmrA'
         assert strip_sbol2_version('https://synbiohub.programmingbiology.org/public/Eco1C1G1T1/LmrA') == \
                'https://synbiohub.programmingbiology.org/public/Eco1C1G1T1/LmrA'
 
+        # displayId cleaning:
+        assert string_to_display_id('GB30248.1') == 'GB30248_1'
+        assert url_to_identity('http://foo/bar/baz.qux') == 'http://foo/bar/baz_qux'
+
+        # extension detection and stripping
+        assert design_file_type('something.fasta') == 'FASTA'
+        assert design_file_type('something.xlsx') == None
+        assert design_file_type('something.xml') == 'SBOL2'
+        assert design_file_type('something.nt') == 'SBOL3'
+        assert strip_filetype_suffix('http://foo/bar/baz.gb') == 'http://foo/bar/baz'
+        assert strip_filetype_suffix('http://foo/bar/baz.qux') == 'http://foo/bar/baz.qux'
+
 if __name__ == '__main__':
     unittest.main()