Skip to content

Commit

Permalink
Transforming to markdown and processing line breaks
Browse files Browse the repository at this point in the history
  • Loading branch information
namesty committed Dec 22, 2023
1 parent 20c34e4 commit 797a7d2
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
4 changes: 3 additions & 1 deletion evo_researcher/functions/web_scrape.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import os
from markdownify import markdownify
import re
import requests
from bs4 import BeautifulSoup
Expand All @@ -21,7 +22,8 @@ def web_scrape(url: str) -> tuple[str, str]:
[x.extract() for x in soup.findAll('head')]

text = soup.get_text()
text = re.sub('(\n\n)\n*|\n', r'\1', text)
text = markdownify(text)
text = " ".join([x.strip() for x in text.split("\n")])

return (text, url)
else:
Expand Down
4 changes: 2 additions & 2 deletions tests/test_research.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest
from evo_researcher.functions.grade_info import grade_info
from evo_researcher.main import research_langchain
from evo_researcher.test import research
from evo_researcher.autonolas.research import research as research_autonolas

dataset = [
Expand All @@ -15,7 +15,7 @@

@pytest.mark.parametrize("question", [pytest.param(question, id=question) for question in dataset])
def test_research(question: str):
evo_research = research_langchain(question)
evo_research = research(question)
autonolas_research = research_autonolas(question)

evo_research_score = grade_info(evo_research, question)
Expand Down

0 comments on commit 797a7d2

Please sign in to comment.