Skip to content

Commit

Permalink
Feat[TE-15569]: Addon to access downloaded PDF files on the cloud and…
Browse files Browse the repository at this point in the history
… extract the content (#40)

* Feat[TE-15569]: Addon to access downloaded PDF files on the cloud and extract the content

* Feat[TE-15569]: As per the review comment moved the window switching code to finally block
  • Loading branch information
SunilGembali authored Feb 5, 2024
1 parent 80ab082 commit 82cb0ba
Show file tree
Hide file tree
Showing 5 changed files with 373 additions and 0 deletions.
106 changes: 106 additions & 0 deletions pdfdataextractor/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
<?xml version="1.0" encoding="UTF-8"?>
<project
xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.testsigma.addons</groupId>
<artifactId>pdfdataextractor</artifactId>
<version>1.0.0</version>
<packaging>jar</packaging>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<testsigma.sdk.version>1.2.5_cloud</testsigma.sdk.version>
<junit.jupiter.version>5.8.0-M1</junit.jupiter.version>
<testsigma.addon.maven.plugin>1.0.0</testsigma.addon.maven.plugin>
<maven.source.plugin.version>3.2.1</maven.source.plugin.version>
<lombok.version>1.18.20</lombok.version>

</properties>

<dependencies>
<dependency>
<groupId>com.testsigma</groupId>
<artifactId>testsigma-java-sdk</artifactId>
<version>${testsigma.sdk.version}</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>${lombok.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>${junit.jupiter.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.14.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.seleniumhq.selenium/selenium-java -->
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId>
<version>4.14.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/io.appium/java-client -->
<dependency>
<groupId>io.appium</groupId>
<artifactId>java-client</artifactId>
<version>9.0.0</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>2.13.0</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.30</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.14.0</version>
</dependency>
</dependencies>
<build>
<finalName>pdfdataextractor</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.4</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>${maven.source.plugin.version}</version>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package com.testsigma.addons.web;

import com.testsigma.addons.web.util.PdfAndDocUtilities;
import com.testsigma.sdk.ApplicationType;
import com.testsigma.sdk.Result;
import com.testsigma.sdk.WebAction;
import com.testsigma.sdk.annotation.Action;
import com.testsigma.sdk.annotation.RunTimeData;
import com.testsigma.sdk.annotation.TestData;
import lombok.Data;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.openqa.selenium.NoSuchElementException;

import java.io.File;

@Data
@Action(actionText = "PDF: Extract content from latest file in the downloads and store it in runtime-variable variable-name",
description = "Extracts content from latest downloaded file in the downloads and stores that content in a run time variable",
applicationType = ApplicationType.WEB)
public class ExtractLatestDownloadedPDFFileContent extends WebAction {
@TestData(reference = "variable-name",isRuntimeVariable = true)
private com.testsigma.sdk.TestData runtimeVariable;

@RunTimeData
private com.testsigma.sdk.RunTimeData runTimeData;
@Override
protected Result execute() throws NoSuchElementException {
Result result = Result.SUCCESS;
PdfAndDocUtilities pdfAndDocUtilities = new PdfAndDocUtilities(driver,logger);
try {
logger.info("Initiated execution");
File downloadedExcelFile = pdfAndDocUtilities.copyFileFromDownloads("pdf",null);
PDDocument document = PDDocument.load(downloadedExcelFile);
String fileContent = "";
if (!document.isEncrypted()) {
PDFTextStripper pdfTextStripper = new PDFTextStripper();
fileContent = pdfTextStripper.getText(document);
} else {
throw new Exception("The file in the downloads is encrypted one, Unable to access it");
}
logger.info("Local path"+downloadedExcelFile.getAbsolutePath());
runTimeData.setKey(runtimeVariable.getValue().toString());
runTimeData.setValue(fileContent);
logger.info("File content:"+fileContent);
setSuccessMessage("Successfully extracted the data in the file and stored in run time variable "+runTimeData.getKey());
} catch (RuntimeException e){
logger.info("Unable to find the latest file in the downloads"+ ExceptionUtils.getStackTrace(e));
setErrorMessage("Unable to find the latest file in the downloads");
result = Result.FAILED;
}
catch (Exception e) {
logger.info(ExceptionUtils.getStackTrace(e));
setErrorMessage("Unable to read the content in the given pdf file");
result = Result.FAILED;
}
return result;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package com.testsigma.addons.web;

import com.testsigma.addons.web.util.PdfAndDocUtilities;
import com.testsigma.sdk.ApplicationType;
import com.testsigma.sdk.Result;
import com.testsigma.sdk.WebAction;
import com.testsigma.sdk.annotation.Action;
import com.testsigma.sdk.annotation.RunTimeData;
import com.testsigma.sdk.annotation.TestData;
import lombok.Data;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.openqa.selenium.NoSuchElementException;

import java.io.File;

@Data
@Action(actionText = "PDF: Extract content from the file file-name from the downloads and store it in runtime-variable variable-name",
description = "Extracts content by accessing the given file in the downloads and stores that content in a run time variable",
applicationType = ApplicationType.WEB)
public class ExtractPDFFileContentBasedOnFileName extends WebAction {
@TestData(reference = "file-name")
private com.testsigma.sdk.TestData fileName;

@TestData(reference = "variable-name",isRuntimeVariable = true)
private com.testsigma.sdk.TestData runtimeVariable;

@RunTimeData
private com.testsigma.sdk.RunTimeData runTimeData;


@Override
protected Result execute() throws NoSuchElementException {
Result result = Result.SUCCESS;
PdfAndDocUtilities pdfAndDocUtilities = new PdfAndDocUtilities(driver,logger);
try {
logger.info("Initiated execution");
File downloadedExcelFile = pdfAndDocUtilities.copyFileFromDownloads("pdf",fileName.getValue().toString());
PDDocument document = PDDocument.load(downloadedExcelFile);
String fileContent = "";
if (!document.isEncrypted()) {
PDFTextStripper pdfTextStripper = new PDFTextStripper();
fileContent = pdfTextStripper.getText(document);
} else {
throw new Exception("The file in the downloads is encrypted one, Unable to access it");
}
logger.info("Local path"+downloadedExcelFile.getAbsolutePath());
runTimeData.setKey(runtimeVariable.getValue().toString());
runTimeData.setValue(fileContent);
logger.info("File content:"+fileContent);
setSuccessMessage("Successfully extracted the data in the file and stored in run time variable "+runTimeData.getKey());
} catch (RuntimeException e){
logger.info("Unable to find the given file in the downloads"+ ExceptionUtils.getStackTrace(e));
setErrorMessage(e.getMessage());
result = Result.FAILED;
}
catch (Exception e) {
logger.info(ExceptionUtils.getStackTrace(e));
setErrorMessage("Unable to read the data in the given file");
result = Result.FAILED;
}
return result;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
package com.testsigma.addons.web.util;

import com.testsigma.sdk.Logger;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.support.ui.ExpectedCondition;
import org.openqa.selenium.support.ui.WebDriverWait;

import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.Set;

public class PdfAndDocUtilities {

WebDriver driver;
Logger logger;
public PdfAndDocUtilities(WebDriver driver, Logger logger){
this.driver = driver;
this.logger = logger;
}
public File copyFileFromDownloads(String fileFormat,String fileName) throws Exception{
String currentWindowHandle = driver.getWindowHandle();
File downloadedFile = null;
try {
((JavascriptExecutor) driver).executeScript("window.open()");
Set<String> allWindows = driver.getWindowHandles();
ArrayList<String> tabs = new ArrayList<>(allWindows);
driver.switchTo().window(tabs.get(tabs.size() - 1));

driver.navigate().to("chrome://downloads/");
WebDriverWait ww = new WebDriverWait(driver, Duration.ofSeconds(60));
ww.until(new ExpectedCondition<Boolean>() {
@Override
public Boolean apply(WebDriver driver) {
return isFileDownloaded();
}
});
String remoteFilePath = null;
if (fileName != null) {
remoteFilePath = getFilePathByFileNameInDownloads(fileName);
} else {
remoteFilePath = getDownloadedFileLocalPath();
}
logger.info("Downloaded file path=" + remoteFilePath);
downloadedFile = createLocalFileFromDownloadsCopy(remoteFilePath, fileFormat);
//switch to parent window tab
driver.switchTo().window(currentWindowHandle);
} catch (RuntimeException e){
throw new RuntimeException(e.getMessage());
} finally {
driver.switchTo().window(currentWindowHandle);
}
return downloadedFile;
}
private boolean isFileDownloaded() {

if (!driver.getCurrentUrl().startsWith("chrome://downloads")) {
driver.get("chrome://downloads/");
}
JavascriptExecutor js = (JavascriptExecutor) driver;
Object obj = js.executeScript("return document.querySelector('downloads-manager').shadowRoot.querySelector('#downloadsList')" +
".items.filter(e => e.state === 'IN_PROGRESS').map(e => e.filePath || e.file_path || e.fileUrl || e.file_url); ");
if (obj != null && obj instanceof List && !((List) obj).isEmpty()) {
return false;
}
return true;

}

private String getDownloadedFileLocalPath() {
try {
JavascriptExecutor js = (JavascriptExecutor) driver;
Object obj = js.executeScript("return document.querySelector('downloads-manager').shadowRoot.querySelector('#downloadsList').items[0].filePath; ");
return obj.toString();
} catch (Exception e){
logger.info("No files in the downloads");
throw new RuntimeException("No files in the downloads");
}
}
private String getFilePathByFileNameInDownloads(String desiredFileName) {
try {
String script = "return Array.from(document.querySelector('downloads-manager').shadowRoot.querySelector('#downloadsList').items)" +
" .find(item => item.fileName && item.fileName.includes('" + desiredFileName + "')).filePath;";
JavascriptExecutor js = (JavascriptExecutor) driver;
Object obj = js.executeScript(script);
return obj.toString();
} catch(Exception e){
logger.info("There is no file with name:"+desiredFileName);
throw new RuntimeException("There is no file with name:"+desiredFileName);
}
}
private File createLocalFileFromDownloadsCopy(String path, String fileFormat) throws Exception {
JavascriptExecutor js = (JavascriptExecutor) driver;
WebElement elem = (WebElement) js.executeScript("var input = window.document.createElement('INPUT'); " +
"input.setAttribute('type', 'file'); " +
"input.hidden = true; " +
"input.onchange = function (e) { e.stopPropagation() }; " +
"return window.document.documentElement.appendChild(input); ");

//elem._execute('sendKeysToElement', {'value': [path ],'text':path})
elem.sendKeys(path);
long start = System.currentTimeMillis();
Object result = js.executeAsyncScript("var input = arguments[0], callback = arguments[1]; " +
"var reader = new FileReader(); " +
"reader.onload = function (ev) { callback(reader.result) }; " +
"reader.onerror = function (ex) { callback(ex.message) }; " +
"reader.readAsDataURL(input.files[0]); " +
"input.remove(); "
, elem);

long end = System.currentTimeMillis(); System.out.println("Time taken: "+(end-start));
if (result == null || !result.toString().startsWith("data:")) {
throw new RuntimeException("Failed to get file content: " + result);
}
String base64String = result.toString().substring(result.toString().indexOf("base64")+7);
File f = new File(path);
String fileName = f.getName();
byte[] decodedBytes = Base64.getDecoder().decode(base64String);
File downloadedFile = File.createTempFile(fileName,"."+fileFormat);
// String data = new String(decodedBytes);
System.out.println("fileName: "+fileName);
logger.info("Local path"+downloadedFile.getAbsolutePath());
Files.write(Paths.get(downloadedFile.getAbsolutePath()), decodedBytes);
return downloadedFile;

}






}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
testsigma-sdk.api.key=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJkZWYtZHNzZndlc2xkbGY0aTg4c2pkZmoiLCJ1bmlxdWVJZCI6IjQxIiwiZXh0ZXJuYWxUZW5hbnRJZCI6IjEifQ.TPS963FmN4heI_cCJ6_XY-1Slk1TcuaCvpCW6IbepV5HuAHRLa9Mu8TcFeeNfbXF14A4HeBaIFl_vbMas9zkmw

0 comments on commit 82cb0ba

Please sign in to comment.