refactor: format code

valory-xyz · Jul 24, 2024 · f200434 · f200434
1 parent 7443916
commit f200434
Show file tree

Hide file tree

Showing 3 changed files with 2 additions and 31 deletions.
diff --git a/packages/jhehemann/customs/research/component.yaml b/packages/jhehemann/customs/research/component.yaml
@@ -7,7 +7,7 @@ license: Apache-2.0
 aea_version: '>=1.0.0, <2.0.0'
 fingerprint:
   __init__.py: bafybeidpcd7b3qvijj5ucmjcxvya4o5hbgetndu3siny7itumlfeekkkam
-  research.py: bafybeieuoe2jet5t6qf7arlcn4e23762f6wxtf4g5b6avregsqcy5eljta
+  research.py: bafybeia443k5mbkjdaib2tgrpbmnw2q3kpwdgvyvadsgiobuwktixqfaja
 fingerprint_ignore_patterns: []
 entry_point: research.py
 callable: run

diff --git a/packages/jhehemann/customs/research/research.py b/packages/jhehemann/customs/research/research.py
@@ -337,7 +337,6 @@ def __init__(self, url, html=None, title=None, description=None, publication_dat
         self.chunks_final = []
         self.extract_attribute_names = ["title", "description", "publication_date", "publisher"]
 
-
     def get_title(self, soup, scripts):
         try:
             title = soup.title
@@ -354,14 +353,12 @@ def get_title(self, soup, scripts):
         # If no title was found return "n/a".
         return "n/a"
 
-
     def get_description(self, soup, scripts):
         description = soup.find("meta", attrs={"name": "description"}) or soup.find("meta", attrs={"property": "description"})
         if description and description.get("content"):
             return description["content"].strip()
         return "n/a"
 
-
     def get_publisher(self, soup, scripts):
         for script in scripts:
             try:
@@ -382,7 +379,6 @@ def get_publisher(self, soup, scripts):
         else:
             return "n/a"
 
-
     def get_date(self, soup, scripts):
         for script in scripts:
             try:
@@ -405,7 +401,6 @@ def get_date(self, soup, scripts):
                 return format_date(meta_tag["content"])
         return "n/a"
 
-
     def extract_page_attributes(
         self,
     ) -> object:
@@ -429,7 +424,6 @@ def extract_page_attributes(
 
         return self
 
-
     def to_prompt(self):
         """
         Function to convert article attributes into a structured format for LLM prompts.
@@ -443,7 +437,6 @@ def to_prompt(self):
 
         return page_info
 
-
     def _find_publisher(self, data):
         def extract_names(item, key):
             """Helper function to extract names from a field that could be a list or a single object."""
@@ -496,7 +489,6 @@ def trim_json_formatting(output_string):
         # Return the original string if no match is found
         return output_string
 
-
 def trim_chunks_string(
     chunks_string: str,
     enc: tiktoken.Encoding,
@@ -508,14 +500,12 @@ def trim_chunks_string(
         encoding = encoding[:max_tokens]
     return enc.decode(encoding)
 
-
 def find_release_date_in_data(data):
     for name in RELEASE_DATE_NAMES:
         if name in data:
             return data[name]
     return None
 
-
 def format_date(date_string) -> str:
     # Desired format "February 16, 2024, 3:30 PM"
     format_str = "%B %d, %Y"
@@ -534,7 +524,6 @@ def format_date(date_string) -> str:
         # If there's an error during parsing, return the original string
         return date_string
 
-
 def extract_question(text:str) -> str:
     # Look for a quoted question
     match = re.search(r'["“](.*?\?)["”]', text)
@@ -544,7 +533,6 @@ def extract_question(text:str) -> str:
     # Return prompt if ending with a question mark
     return text if text.strip().endswith('?') else ""
 
-
 def parse_date_str(date_str: str) -> datetime:
     # Desired format "February 16, 2024, 3:30 PM"
     datetime_format = "%B %d, %Y"
@@ -559,28 +547,24 @@ def remove_date_from_query(query: str) -> str:
     new_query = re.sub(date_pattern, "", query)
     return new_query
 
-
 def recursive_character_text_splitter(text, max_tokens, overlap):
     if len(text) <= max_tokens:
         return [text]
     else:
         return [text[i:i+max_tokens] for i in range(0, len(text), max_tokens - overlap)]
 
-
 def count_tokens(text: str, model: str) -> int:
     """Count the number of tokens in a text."""
     enc = encoding_for_model(model)
     return len(enc.encode(text))
 
-
 def get_first_dict_from_list(data):
     """Returns the first item if data is a list of dictionaries"""
     if isinstance(data, list) and len(data) > 0 and isinstance(data[0], dict):
         return data[0]
     else:
         return data  # or raise an appropriate exception
 
-
 def format_additional_information(web_pages: List[WebPage]) -> str:
     """Format the additional information from the web pages"""
     formatted_information = ""
@@ -590,7 +574,6 @@ def format_additional_information(web_pages: List[WebPage]) -> str:
         formatted_information += f"{web_page.final_output}\n\n"
     return formatted_information
 
-
 def search_google(query: str, api_key: str, engine: str, num: int) -> List[str]:
     """Search Google using a custom search engine."""
     service = build("customsearch", "v1", developerKey=api_key)
@@ -605,7 +588,6 @@ def search_google(query: str, api_key: str, engine: str, num: int) -> List[str]:
     )
     return [result["link"] for result in search.get("items", [])]
 
-
 def process_in_batches(
     web_pages: List[WebPage],
     batch_size: int = 15,
@@ -646,7 +628,6 @@ def process_in_batches(
 
             yield get_futures
 
-
 def embed_batch(client: OpenAI, batch):
     """
     Helper function to process a single batch of texts and return the embeddings.
@@ -663,7 +644,6 @@ def embed_batch(client: OpenAI, batch):
     # Return the embeddings
     return [data.embedding for data in response.data]
 
-
 def sort_text_chunks(
     client: OpenAI, query: str, text_chunks_embedded: List[TextChunk]
 ) -> List[TextChunk]:
@@ -684,7 +664,6 @@ def sort_text_chunks(
 
     return [text_chunks_embedded[i] for i in I[0]]
 
-
 def get_embeddings(client: OpenAI, text_chunks: List[TextChunk], enc: tiktoken.Encoding) -> List[TextChunk]:
     """Get embeddings for the text chunks."""
     # Batch the text chunks that the sum of tokens is less than MAX_EMBEDDING_TOKEN_INPUT
@@ -726,7 +705,6 @@ def get_embeddings(client: OpenAI, text_chunks: List[TextChunk], enc: tiktoken.E
 
     return text_chunks
 
-
 def get_chunks(web_pages: List[WebPage]) -> List[WebPage]:
     """Create chunks from the text of all web pages"""
     text_chunks = []
@@ -737,7 +715,6 @@ def get_chunks(web_pages: List[WebPage]) -> List[WebPage]:
 
     return text_chunks
 
-
 def scrape_web_pages(web_pages: List[WebPage], week_interval, max_num_char: int = 10000) -> List[WebPage]:
     """Scrape text from web pages"""
     filtered_web_pages = []
@@ -778,7 +755,6 @@ def scrape_web_pages(web_pages: List[WebPage], week_interval, max_num_char: int
 
     return filtered_web_pages
 
-
 def extract_html_texts(
     web_pages: List[WebPage],
 ) -> List[WebPage]:
@@ -813,7 +789,6 @@ def extract_html_texts(
 
     return parsed_web_pages
 
-
 def get_urls_from_queries(
     queries: List[str],
     api_key: str,
@@ -847,7 +822,6 @@ def get_urls_from_queries(
 
     return list(results)
 
-
 def fetch_queries(
     input_query: str,
     engine="gpt-3.5-turbo",
@@ -919,7 +893,6 @@ def fetch_queries(
                 print("Maximum attempts reached, returning an empty string.")
                 return [], counter_callback
 
-
 def summarize_relevant_chunks(
         web_pages: List[WebPage],
         input_query: str,
@@ -981,7 +954,6 @@ def summarize_for_web_page(web_page: WebPage) -> None:
         web_pages = [web_page for web_page in web_pages if "Error" not in web_page.relevant_chunks_summary]
     return web_pages, counter_callback
 
-
 def summarize_over_summarized_chunks(
     web_pages: List[WebPage],
     input_query: str,
@@ -1059,7 +1031,6 @@ def summarize_over_summarized_chunks(
 
     return modified_web_pages, counter_callback
 
-
 def run(**kwargs) -> Tuple[str, Optional[str], Optional[Dict[str, Any]], Any]:
     """Run the task"""
     with OpenAIClientManager(kwargs["api_keys"]["openai"]):

diff --git a/packages/packages.json b/packages/packages.json
@@ -27,7 +27,7 @@
         "custom/napthaai/prediction_request_reasoning_lite/0.1.0": "bafybeigxs5tq4w7ouamwlvv7vjw3z3jeercynsuka4mcpveshopvej4cyu",
         "custom/valory/prediction_langchain/0.1.0": "bafybeihd3hv2zafscrlr25chtqmsh5weiolv2y7tc75urix665jf2a7zdu",
         "custom/victorpolisetty/gemini_request/0.1.0": "bafybeig5x6b5jtanet2q5sk7er7fdzpippbvh4q5p7uxmxpriq66omjnaq",
-        "custom/jhehemann/research/0.1.0": "bafybeibtqesy65bk4gbda6m44ck46rxrk5lrovhqt72sg3lbdjsismpkri",
+        "custom/jhehemann/research/0.1.0": "bafybeifjcadlvh5yqhltry6i5ntimfcnx5k6psscokvyxgpfy6wyvmdfsy",
         "custom/jhehemann/prediction_with_rules_and_report/0.1.0": "bafybeidfmb45ab336fzoq2vbglo6owpncpuiwzfihx3sm4b4a3asuhpwpm",
         "custom/jhehemann/infer_market_rules/0.1.0": "bafybeibkp6ywtfpgdphuscldtshp3y7oorhkfi7fu6goiwjvjymbbohwfy",
         "custom/gnosis/omen_tools/0.1.0": "bafybeibnjcgvy4l2libl34qz3aqfietjrevhxsbtisttuyfnelp3rfjlge",