diff --git a/lambda/aws-rag-appsync-stepfn-opensearch/s3_file_transformer/src/helpers/s3inmemoryloader.py b/lambda/aws-rag-appsync-stepfn-opensearch/s3_file_transformer/src/helpers/s3inmemoryloader.py index c8f9a615..b4c1a52d 100644 --- a/lambda/aws-rag-appsync-stepfn-opensearch/s3_file_transformer/src/helpers/s3inmemoryloader.py +++ b/lambda/aws-rag-appsync-stepfn-opensearch/s3_file_transformer/src/helpers/s3inmemoryloader.py @@ -47,14 +47,18 @@ def load(self) -> str: obj = s3.Object(self.bucket, self.key) encodedpdf = obj.get()['Body'].read() pdfFile = PdfReader(BytesIO(encodedpdf)) + # read pdf + raw_text = [] + for page in pdfFile.pages: + raw_text.append(page.extract_text()) + return '\n'.join(raw_text) except s3.meta.client.exceptions.NoSuchBucket as exception: logger.exception('NoSuchBucket') return "" except s3.meta.client.exceptions.NoSuchKey as exception: logger.exception('NoSuchKey') return "" - # read pdf - raw_text = [] - for page in pdfFile.pages: - raw_text.append(page.extract_text()) - return '\n'.join(raw_text) + except Exception as exception: + logger.exception(f"Reason: {exception}") + return "" + diff --git a/lambda/aws-summarization-appsync-stepfn/document_reader/s3inmemoryloader.py b/lambda/aws-summarization-appsync-stepfn/document_reader/s3inmemoryloader.py index 7647be1a..3849e885 100644 --- a/lambda/aws-summarization-appsync-stepfn/document_reader/s3inmemoryloader.py +++ b/lambda/aws-summarization-appsync-stepfn/document_reader/s3inmemoryloader.py @@ -48,14 +48,17 @@ def load(self) -> str: obj = s3.Object(self.bucket, self.key) encodedpdf = obj.get()['Body'].read() pdfFile = PdfReader(BytesIO(encodedpdf)) + # read pdf + raw_text = [] + for page in pdfFile.pages: + raw_text.append(page.extract_text()) + return '\n'.join(raw_text) except s3.meta.client.exceptions.NoSuchBucket as exception: logger.exception('NoSuchBucket') return "" except s3.meta.client.exceptions.NoSuchKey as exception: logger.exception('NoSuchKey') return "" - # read pdf - raw_text = [] - for page in pdfFile.pages: - raw_text.append(page.extract_text()) - return '\n'.join(raw_text) + except Exception as exception: + logger.exception(f"Reason: {exception}") + return "" diff --git a/lambda/aws-summarization-appsync-stepfn/summary_generator/lambda.py b/lambda/aws-summarization-appsync-stepfn/summary_generator/lambda.py index 9d141df6..e8dff747 100644 --- a/lambda/aws-summarization-appsync-stepfn/summary_generator/lambda.py +++ b/lambda/aws-summarization-appsync-stepfn/summary_generator/lambda.py @@ -15,7 +15,7 @@ from langchain.llms.bedrock import Bedrock from update_summary_status import updateSummaryJobStatus - +from langchain import PromptTemplate # external files from langchain.docstore.document import Document from langchain.chains.summarize import load_summarize_chain @@ -38,6 +38,15 @@ chain_type = os.environ["SUMMARY_LLM_CHAIN_TYPE"] aws_region = boto3.Session().region_name + +params = { + "max_tokens_to_sample": 4000, + "temperature": 0, + "top_k": 250, + "top_p": 1, + "stop_sequences": ["\\n\\nHuman:"], + } + bedrock_client = boto3.client( service_name='bedrock-runtime', region_name=aws_region, @@ -68,6 +77,8 @@ def handler(event, context: LambdaContext)-> dict: summary_llm = Bedrock( model_id="anthropic.claude-v2", client=bedrock_client, + model_kwargs=params, + streaming=False, ) redis_host = os.environ.get("REDIS_HOST", "N/A") @@ -133,10 +144,16 @@ def generate_summary(_summary_llm,chain_type,inputFile)-> str: logger.info(f" Using chain_type as {chain_type} for the document") docs = [Document(page_content=inputFile)] + template = """\n\nHuman: Please read the text:\n{text}\n + Summarize the text in 300 words: + \n\nAssistant:""" + prompt = PromptTemplate(template=template, input_variables=["text"]) + chain = load_summarize_chain( _summary_llm, chain_type=chain_type, - verbose=False + verbose=False, + prompt=prompt ) return chain.run(docs) diff --git a/src/patterns/gen-ai/aws-qa-appsync-opensearch/index.ts b/src/patterns/gen-ai/aws-qa-appsync-opensearch/index.ts index 6757218f..f3afecb9 100644 --- a/src/patterns/gen-ai/aws-qa-appsync-opensearch/index.ts +++ b/src/patterns/gen-ai/aws-qa-appsync-opensearch/index.ts @@ -386,7 +386,7 @@ export class QaAppsyncOpensearch extends Construct { })); const enableOperationalMetric = props.enableOperationalMetric || true; - const solution_id = 'QaAppsyncOpensearch_'+id; + const solution_id = 'genai_cdk_'+id; if (enableOperationalMetric) { question_answering_function.addEnvironment( diff --git a/src/patterns/gen-ai/aws-rag-appsync-stepfn-opensearch/index.ts b/src/patterns/gen-ai/aws-rag-appsync-stepfn-opensearch/index.ts index 5d221db2..8197cd3b 100644 --- a/src/patterns/gen-ai/aws-rag-appsync-stepfn-opensearch/index.ts +++ b/src/patterns/gen-ai/aws-rag-appsync-stepfn-opensearch/index.ts @@ -546,7 +546,7 @@ export class RagAppsyncStepfnOpensearch extends Construct { })); const enableOperationalMetric = props.enableOperationalMetric || true; - const solution_id = 'RagAppsyncStepfnOpensearch_'+id; + const solution_id = 'genai_cdk_'+id; if (enableOperationalMetric) { embeddings_job_function.addEnvironment( diff --git a/src/patterns/gen-ai/aws-summarization-appsync-stepfn/index.ts b/src/patterns/gen-ai/aws-summarization-appsync-stepfn/index.ts index 9ea769c4..ea290c83 100644 --- a/src/patterns/gen-ai/aws-summarization-appsync-stepfn/index.ts +++ b/src/patterns/gen-ai/aws-summarization-appsync-stepfn/index.ts @@ -485,7 +485,7 @@ export class SummarizationAppsyncStepfn extends Construct { ); const enableOperationalMetric = props.enableOperationalMetric || true; - const solution_id = 'SummarizationAppsyncStepfn_'+id; + const solution_id = 'genai_cdk_'+id; if (enableOperationalMetric) { documentReaderLambda.addEnvironment(