You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
File ~\anaconda3\envs\fundamentalchat_classifier2\lib\site-packages\xbrl\transformations_init_.py:229, in dateMonthDayYearEN(arg)
226 def dateMonthDayYearEN(arg: str) -> str:
227 # Mon(th)(D)D(Y)Y(YY) -> YYYY-MM-DD
228 seg = re.split(r'[^\d\w]+', arg) # split at any char that is not a digit nor a word
--> 229 return f"{yearNorm(seg[2])}-{monthNorm[seg[0]]}-{seg[1].zfill(2)}"
IndexError: list index out of range
i "fixed" it by updating the dateMonthDayYearEN function as follows:
def dateMonthDayYearEN(arg: str) -> str:
# Mon(th)(D)D(Y)Y(YY) -> YYYY-MM-DD
try:
seg = re.split(r'[^\d\w]+', arg) # split at any char that is not a digit nor a word
# Check if the string has three segments and the year segment has 4 digits
if len(seg) != 3 or not (len(seg[2]) == 4 and seg[2].isdigit()):
return arg
return f"{yearNorm(seg[2])}-{monthNorm[seg[0]]}-{seg[1].zfill(2)}"
except Exception as e:
print(f"Error processing date string: {arg}")
raise e
return f"{yearNorm(seg[2])}-{monthNorm[seg[0]]}-{seg[1].zfill(2)}"
The text was updated successfully, but these errors were encountered:
The issue is that the content of the fact "May" does not conform with the format "date-month-day-year-en". This format expects something like "May, 13 2023". However, due to the incorrect format (and missing data), the parser can not parse this filing.
I was getting the following error when parsing "https://www.sec.gov/Archives/edgar/data/40704/000119312523177500/0001193125-23-177500-index.htm"
File ~\anaconda3\envs\fundamentalchat_classifier2\lib\site-packages\xbrl\instance.py:749, in XbrlParser.parse_instance(self, uri, instance_url, encoding)
745 if uri.split('.')[-1] == 'xml' or uri.split('.')[-1] == 'xbrl':
746 return parse_xbrl_url(uri, self.cache) if uri.startswith('http')
747 else parse_xbrl(uri, self.cache, instance_url)
748 return parse_ixbrl_url(uri, self.cache) if uri.startswith('http')
--> 749 else parse_ixbrl(uri, self.cache, instance_url, encoding)
File ~\anaconda3\envs\fundamentalchat_classifier2\lib\site-packages\xbrl\instance.py:511, in parse_ixbrl(instance_path, cache, instance_url, encoding, schema_root)
509 facts.append(NumericFact(concept, context, fact_value, unit, decimals, xml_id))
510 elif fact_elem.tag == '{' + ns_map['ix'] + '}nonNumeric':
--> 511 fact_value: str = _extract_non_numeric_value(fact_elem)
512 facts.append(TextFact(concept, context, str(fact_value), xml_id))
513 #print(f"Added TextFact with value: {fact_value} and concept: {concept.name}")
514 #print(f"Total facts in XbrlInstance: {len(facts)}")
515 #for fact in facts:
516 #if fact.concept.name == "DocumentType":
517 #print(f"Found DocumentType fact with value: {fact.value}")
File ~\anaconda3\envs\fundamentalchat_classifier2\lib\site-packages\xbrl\instance.py:543, in _extract_non_numeric_value(fact_elem)
541 registryNS: str = fact_elem.attrib['ns_map'][registryPrefix]
542 try:
--> 543 fact_value = normalize(registryNS, formatCode, fact_value)
544 #print(f"Normalized fact_value: {fact_value}")
545 except TransformationNotImplemented:
File ~\anaconda3\envs\fundamentalchat_classifier2\lib\site-packages\xbrl\transformations_init_.py:589, in normalize(namespace, formatCode, value)
587 return ixt2formatCode
588 elif namespace == 'http://www.xbrl.org/inlineXBRL/transformation/2015-02-26':
--> 589 return ixt3formatCode
590 elif namespace == 'http://www.xbrl.org/inlineXBRL/transformation/2020-02-12':
591 return ixt4formatCode
File ~\anaconda3\envs\fundamentalchat_classifier2\lib\site-packages\xbrl\transformations_init_.py:229, in dateMonthDayYearEN(arg)
226 def dateMonthDayYearEN(arg: str) -> str:
227 # Mon(th)(D)D(Y)Y(YY) -> YYYY-MM-DD
228 seg = re.split(r'[^\d\w]+', arg) # split at any char that is not a digit nor a word
--> 229 return f"{yearNorm(seg[2])}-{monthNorm[seg[0]]}-{seg[1].zfill(2)}"
IndexError: list index out of range
i "fixed" it by updating the dateMonthDayYearEN function as follows:
def dateMonthDayYearEN(arg: str) -> str:
# Mon(th)(D)D(Y)Y(YY) -> YYYY-MM-DD
try:
seg = re.split(r'[^\d\w]+', arg) # split at any char that is not a digit nor a word
# Check if the string has three segments and the year segment has 4 digits
if len(seg) != 3 or not (len(seg[2]) == 4 and seg[2].isdigit()):
return arg
return f"{yearNorm(seg[2])}-{monthNorm[seg[0]]}-{seg[1].zfill(2)}"
except Exception as e:
print(f"Error processing date string: {arg}")
raise e
return f"{yearNorm(seg[2])}-{monthNorm[seg[0]]}-{seg[1].zfill(2)}"
The text was updated successfully, but these errors were encountered: