Skip to content

Commit

Permalink
feat(bugfixes): bug fixes for ingestion and summary (#72)
Browse files Browse the repository at this point in the history
* handle exception

* added prompt for summarization

---------

Co-authored-by: Alain Krok <[email protected]>
Co-authored-by: Dinesh Sajwan <[email protected]>
  • Loading branch information
3 people authored Nov 2, 2023
1 parent e2085af commit 338ac81
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,18 @@ def load(self) -> str:
obj = s3.Object(self.bucket, self.key)
encodedpdf = obj.get()['Body'].read()
pdfFile = PdfReader(BytesIO(encodedpdf))
# read pdf
raw_text = []
for page in pdfFile.pages:
raw_text.append(page.extract_text())
return '\n'.join(raw_text)
except s3.meta.client.exceptions.NoSuchBucket as exception:
logger.exception('NoSuchBucket')
return ""
except s3.meta.client.exceptions.NoSuchKey as exception:
logger.exception('NoSuchKey')
return ""
# read pdf
raw_text = []
for page in pdfFile.pages:
raw_text.append(page.extract_text())
return '\n'.join(raw_text)
except Exception as exception:
logger.exception(f"Reason: {exception}")
return ""

Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,17 @@ def load(self) -> str:
obj = s3.Object(self.bucket, self.key)
encodedpdf = obj.get()['Body'].read()
pdfFile = PdfReader(BytesIO(encodedpdf))
# read pdf
raw_text = []
for page in pdfFile.pages:
raw_text.append(page.extract_text())
return '\n'.join(raw_text)
except s3.meta.client.exceptions.NoSuchBucket as exception:
logger.exception('NoSuchBucket')
return ""
except s3.meta.client.exceptions.NoSuchKey as exception:
logger.exception('NoSuchKey')
return ""
# read pdf
raw_text = []
for page in pdfFile.pages:
raw_text.append(page.extract_text())
return '\n'.join(raw_text)
except Exception as exception:
logger.exception(f"Reason: {exception}")
return ""
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from langchain.llms.bedrock import Bedrock
from update_summary_status import updateSummaryJobStatus

from langchain import PromptTemplate
# external files
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain
Expand All @@ -38,6 +38,15 @@
chain_type = os.environ["SUMMARY_LLM_CHAIN_TYPE"]

aws_region = boto3.Session().region_name

params = {
"max_tokens_to_sample": 4000,
"temperature": 0,
"top_k": 250,
"top_p": 1,
"stop_sequences": ["\\n\\nHuman:"],
}

bedrock_client = boto3.client(
service_name='bedrock-runtime',
region_name=aws_region,
Expand Down Expand Up @@ -68,6 +77,8 @@ def handler(event, context: LambdaContext)-> dict:
summary_llm = Bedrock(
model_id="anthropic.claude-v2",
client=bedrock_client,
model_kwargs=params,
streaming=False,
)

redis_host = os.environ.get("REDIS_HOST", "N/A")
Expand Down Expand Up @@ -133,10 +144,16 @@ def generate_summary(_summary_llm,chain_type,inputFile)-> str:

logger.info(f" Using chain_type as {chain_type} for the document")
docs = [Document(page_content=inputFile)]
template = """\n\nHuman: Please read the text:\n{text}\n
Summarize the text in 300 words:
\n\nAssistant:"""
prompt = PromptTemplate(template=template, input_variables=["text"])

chain = load_summarize_chain(
_summary_llm,
chain_type=chain_type,
verbose=False
verbose=False,
prompt=prompt
)
return chain.run(docs)

2 changes: 1 addition & 1 deletion src/patterns/gen-ai/aws-qa-appsync-opensearch/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ export class QaAppsyncOpensearch extends Construct {
}));

const enableOperationalMetric = props.enableOperationalMetric || true;
const solution_id = 'QaAppsyncOpensearch_'+id;
const solution_id = 'genai_cdk_'+id;

if (enableOperationalMetric) {
question_answering_function.addEnvironment(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ export class RagAppsyncStepfnOpensearch extends Construct {
}));

const enableOperationalMetric = props.enableOperationalMetric || true;
const solution_id = 'RagAppsyncStepfnOpensearch_'+id;
const solution_id = 'genai_cdk_'+id;

if (enableOperationalMetric) {
embeddings_job_function.addEnvironment(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ export class SummarizationAppsyncStepfn extends Construct {
);

const enableOperationalMetric = props.enableOperationalMetric || true;
const solution_id = 'SummarizationAppsyncStepfn_'+id;
const solution_id = 'genai_cdk_'+id;

if (enableOperationalMetric) {
documentReaderLambda.addEnvironment(
Expand Down

0 comments on commit 338ac81

Please sign in to comment.