Skip to content

Commit

Permalink
Fix issues identified by pre-commit hooks
Browse files Browse the repository at this point in the history
  • Loading branch information
Monstertail committed May 30, 2024
1 parent 4fc6fa2 commit 2ad8190
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 15 deletions.
22 changes: 20 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion research_town/evaluators/output_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def validate_overall_score(cls, v)-> int:
if not (0 <= v <= 100):
raise ValueError("Overall score must be between 0 and 100")
return v

class OutputFormatError(Exception):
def __init__(self, message:str="Output format error")-> None:
self.message = message
Expand Down
7 changes: 3 additions & 4 deletions research_town/evaluators/quality_evaluator.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@

import re
from typing import Any, Dict, List
from typing import Any

from ..utils.decorator import parsing_error_exponential_backoff
from ..utils.eval_prompter import (
idea_quality_eval_prompting,
paper_quality_eval_prompting,
review_quality_eval_prompting
review_quality_eval_prompting,
)

from .output_format import (
IdeaEvalOutput,
OutputFormatError,
PaperEvalOutput,
ReviewEvalOutput
ReviewEvalOutput,
)


Expand Down
6 changes: 3 additions & 3 deletions research_town/utils/eval_prompter.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,11 +204,11 @@ def review_quality_eval_prompting(
review_prompt = """
<Instruction>
Please evaluate the review based on the following dimensions. Finally, give an overall score (0-100) and 10 dimension scores (for each dimension, provide a rating (1-10)) as the evaluation for the review. The output format should follow these rules: Overall Score of a review (0-100), with 10 Dimension Scores: [d1, d2, d3, ..., d10], where di is the score of the i-th dimension. An example of output is: 'Overall Score=92. Dimension Scores=[9,9,9,9,9,9,9,9,9,9]'. <Instruction>
Output format:
Output format:
</Instruction>
<Approach> The details of rating are as follows:
{regulations}
</Approach>
</Approach>
Here is the review to evaluate:
idea: {idea}
research trend: {trend}
Expand Down Expand Up @@ -307,4 +307,4 @@ def review_quality_eval_prompting(
# merge results from List[Str] to Str
combined_result = "\n".join(evaluation_result)

return combined_result
return combined_result
9 changes: 4 additions & 5 deletions tests/test_eval.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from typing import Any
from unittest.mock import MagicMock, patch

import pytest

from research_town.evaluators.quality_evaluator import (
IdeaQualityEvaluator,
PaperQualityEvaluator,
ReviewQualityEvaluator
ReviewQualityEvaluator,
)

idea = "The idea behind Mamba is to improve upon existing foundation models in deep learning, which typically rely on the Transformer architecture and its attention mechanism. While subquadratic-time architectures like linear attention, gated convolution, recurrent models, and structured state space models (SSMs) have been developed to address the inefficiency of Transformers on long sequences, they have not matched the performance of attention-based models in key areas such as language processing. Mamba addresses the shortcomings of these models by enabling content-based reasoning and making several key improvements: Adaptive SSM Parameters: By allowing SSM parameters to be functions of the input, Mamba effectively handles discrete modalities. This enables the model to selectively propagate or forget information along the sequence based on the current token.Parallel Recurrent Algorithm: Despite the changes preventing the use of efficient convolutions, Mamba employs a hardware-aware parallel algorithm in recurrent mode to maintain efficiency.Simplified Architecture: Mamba integrates these selective SSMs into a streamlined neural network architecture that does not rely on attention or MLP blocks."
Expand Down Expand Up @@ -162,7 +161,7 @@ def model_name(request: pytest.FixtureRequest) -> str:
# Note(jinwei): please make sure the OPENAI API key is set for real tests with "use_mock=False".
@pytest.mark.parametrize("use_mock", [True])
def test_evaluator_eval_idea(use_mock:bool, model_name: str) -> None:


evaluator = IdeaQualityEvaluator(model_name= model_name)
input_dict = {'idea': idea, 'trend': trend,'pk':0}
Expand All @@ -182,8 +181,8 @@ def test_evaluator_eval_idea(use_mock:bool, model_name: str) -> None:
# Note(jinwei): please make sure the OPENAI API key is set for real tests with "use_mock=False".
@pytest.mark.parametrize("use_mock", [True])
def test_evaluator_eval_paper(use_mock:bool,model_name: str) -> None:


paper = {'title': paper_title, 'abstract':paper_abstract}

input_dict = {'idea': idea, 'paper': paper,'pk':0}
Expand Down

0 comments on commit 2ad8190

Please sign in to comment.