In this article, we’ll observe how to convert unsearchable PDFs to searchable PDFs in Salesforce. The code will be written in Salesforce Apex. And, PDF.co will be used to perform conversations.
As an input, files stored in Salesforce will be utilized. After the conversation, the output file will be stored back in the Salesforce file system.
Following are the step-by-step instructions for the whole process.
Scanned to Searchable PDF – Step-by-Step Guide
Step 1: Create Remote Site Settings
Create Two remote site settings in the Salesforce Org like below.
The URLs used are as follows:
- https://api.pdf.co
- https://pdf-temp-files.s3.us-west-2.amazonaws.com
Step 2: Create Apex Class in Salesforce
Create an apex class in Salesforce like below and paste the code there. Once you login to Salesforce org, you will see the screen below and click on “Developer Console”
Create an apex class. For this, click on “Files” then “New” then “Apex Class”.
Next, Write the class name “MakeSearchablePDFWithFileUpload” and click “Ok”. Now copy the full source code there. All source code is given in the later section of this article.
Step 3: Add the Key
In the MakeSearchablePDFWithFileUpload file, please add the key by replacing the ‘***********************’;
Now, please add the Destination file name which will be stored as a pdf.
Step 4: Upload PDF to Salesforce
We must upload a PDF file to the Salesforce File System. For that, go to App Launcher, Click on Files. After that, you will see the button “Upload File”. From there, You can upload the file.
Step 5: Verify the Code
To Verify the code, please open the execute Anonymous window and call the method below.
Then Click on “Execute”.
Step 6: Search Files from the “App Launcher”
Now, Search Files from the “App Launcher” and you will see your file there.
You will see your PDF file there.
Source Code
Following are the source codes used in this article.
MakeSearchablePDFWithFileUpload.cls
public class MakeSearchablePDFWithFileUpload { public static String API_KEY = '**************************'; public static string DestinationFile = 'Searchable PDF(Using File Upload)'; String[] fileName = new String[] {'sampleScannedPDF'}; List urlList = new List(); public void startProcessing() { try { for(String fname : fileName) { ContentVersion cv = [select Title, VersionData from ContentVersion where Title = :fname limit 1]; Blob SourceFile = cv.VersionData; //1. Prepare URL for "Get Presigned URL" API call string url = 'https://api.pdf.co/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name=:fname'; HttpRequest req = new HttpRequest(); req.setHeader('x-api-key', API_KEY); req.setEndpoint(url); req.setMethod('GET'); req.setTimeout(60000); Http http = new Http(); HTTPResponse res = http.send(req); if(res.getStatusCode() == 200) { System.Debug('res ' + res); Map<String, Object> deserializedBody = (Map<String, Object>)JSON.deserializeUntyped(res.getBody()); Boolean isError = Boolean.ValueOf(deserializedBody.get('error')); if(isError == false) { // Get URL to use for the file upload String uploadUrl = String.ValueOf(deserializedBody.get('presignedUrl')); // Get URL of uploaded file to use with later API calls String uploadedFileUrl = String.ValueOf(deserializedBody.get('url')); SYstem.debug('uploadedFileUrl :: '+uploadedFileUrl); // 2. UPLOAD THE FILE TO CLOUD. if(uploadFile(API_KEY, uploadUrl, SourceFile)) { urlList.add(uploadedFileUrl); System.debug('urlList ' + urlList); } } } else { System.debug('Error Response ' + res.getBody()); System.Debug(' Status ' + res.getStatus()); System.Debug(' Status Code' + res.getStatusCode()); System.Debug(' Response String' + res.toString()); } } if (urlList.size() > 0) { makePdfTextSearchable(urlList); } } catch(Exception ex) { String errorBody = 'Message: ' + ex.getMessage() + ' -- Cause: ' + ex.getCause() + ' -- Stacktrace: ' + ex.getStackTraceString(); System.Debug(errorBody); } } @TestVisible public static boolean uploadFile(String API_KEY, String url, Blob sourceFile) { HttpRequest req = new HttpRequest(); req.setHeader('x-api-key', API_KEY); req.setHeader('Content-Type', 'application/octet-stream'); req.setEndpoint(url); req.setMethod('PUT'); req.setTimeout(60000); req.setBodyAsBlob(sourceFile); Http http = new Http(); HTTPResponse res = http.send(req); if(res.getStatusCode() == 200) { System.Debug(res.getBody()); return true; } else { System.debug('Error Response ' + res.getBody()); System.Debug(' Status ' + res.getStatus()); System.Debug(' Status Code' + res.getStatusCode()); System.Debug(' Response String' + res.toString()); return false; } } public static void makePdfTextSearchable(List uploadedFileUrl) { System.debug('uploadedFileUrlList[0] :: '+uploadedFileUrl[0]); string SourceFileUrl = uploadedFileUrl[0]; Map<string, Object> parameters = new Map<string, Object>(); parameters.put('async', 'true'); parameters.put('password', ''); parameters.put('name', 'result.pdf'); parameters.put('pages', ''); parameters.put('lang', 'eng'); parameters.put('url', SourceFileUrl); string jsonPayload = Json.serialize(parameters); try { string url = 'https://api.pdf.co/v1/pdf/makesearchable'; HttpRequest req = new HttpRequest(); req.setBody(jsonPayload); req.setHeader('x-api-key', API_KEY); req.setHeader('Content-Type', 'application/json'); req.setEndpoint(url); req.setMethod('POST'); req.setTimeout(60000); Http http = new Http(); HTTPResponse res = http.send(req); if(res.getStatusCode() == 200) { System.Debug('res.getBody()' + res.getBody()); Map<String, Object> deserializedBody = (Map<String, Object>)JSON.deserializeUntyped(res.getBody()); String urlVal = (String)deserializedBody.get('url'); String jobId = (String)deserializedBody.get('jobId'); SYstem.debug('urlVal >>> '+urlVal); do { String statusVal = checkJobStatus(jobId); if(statusVal == 'success') { downloadFile(urlVal, DestinationFile); break; } else if(statusVal == 'working') { sleep(3000); } else { break; } }while(true); } else { System.debug('Success Response ' + res.getBody()); System.Debug(' Status ' + res.getStatus()); System.Debug(' Status Code' + res.getStatusCode()); System.Debug(' Status String' + res.toString()); } } catch(Exception ex) { String errorBody = 'Message: ' + ex.getMessage() + ' -- Cause: ' + ex.getCause() + ' -- Stacktrace: ' + ex.getStackTraceString(); System.Debug(errorBody); } } public static String checkJobStatus(String jobId) { String statusVal; try { string url = 'https://api.pdf.co/v1/job/check?jobid=' + jobId; HttpRequest req = new HttpRequest(); req.setHeader('x-api-key', API_KEY); req.setHeader('Content-Type', 'application/json'); req.setEndpoint(url); req.setMethod('POST'); req.setTimeout(60000); Http http = new Http(); HTTPResponse res = http.send(req); if(res.getStatusCode() == 200) { System.Debug('res ' + res.getBody()); Map<String, Object> deserializedBody = (Map<String, Object>)JSON.deserializeUntyped(res.getBody()); statusVal = (String)deserializedBody.get('status'); } else { System.debug('Success Response ' + res.getBody()); System.Debug(' Status ' + res.getStatus()); System.Debug(' Status Code' + res.getStatusCode()); System.Debug(' Status String' + res.toString()); } } catch(Exception ex) { String errorBody = 'Message: ' + ex.getMessage() + ' -- Cause: ' + ex.getCause() + ' -- Stacktrace: ' + ex.getStackTraceString(); System.Debug(errorBody); } return statusVal; } @TestVisible private static void downloadFile(String extFileUrl, String DestinationFile) { Http h = new Http(); HttpRequest req = new HttpRequest(); extFileUrl = extFileUrl.replace(' ', '%20'); req.setEndpoint(extFileUrl); req.setMethod('GET'); req.setHeader('Content-Type', 'application/pdf'); req.setCompressed(true); req.setTimeout(60000); //Now Send HTTP Request HttpResponse res = h.send(req); if(res.getStatusCode() == 200) { blob fileContent = res.getBodyAsBlob(); ContentVersion conVer = new ContentVersion(); conVer.ContentLocation = 'S'; // to use S specify this document is in Salesforce, to use E for external files conVer.PathOnClient = DestinationFile + '.pdf'; // The files name, extension is very important here which will help the file in preview. conVer.Title = DestinationFile; // Display name of the files conVer.VersionData = fileContent; insert conVer; System.Debug('Success'); } else { System.debug('Error Response ' + res.getBody()); System.Debug(' Status ' + res.getStatus()); System.Debug(' Status Code' + res.getStatusCode()); System.Debug(' Response String' + res.toString()); } } public static void sleep(integer milliseconds) { Long timeDiff = 0; DateTime firstTime = System.now(); do { timeDiff = System.now().getTime() - firstTime.getTime(); } while(timeDiff <= milliseconds); } }
MakeSearchablePDFWithFileUploadTest.cls
@isTest private class MakeSearchablePDFWithFileUploadTest { @isTest static void testStartProcessing() { ContentVersion con = new ContentVersion(); con.Title = 'sampleScannedPDF'; con.VersionData=Blob.valueOf('Test Document'); con.PathOnClient = 'sampleScannedPDF.pdf'; insert con; Test.startTest(); Test.setMock(HttpCalloutMock.class, new MakeSearchablePDFCallOutMock()); MakeSearchablePDFWithFileUpload searchablePDF = new MakeSearchablePDFWithFileUpload(); MakeSearchablePDFWithFileUpload.API_KEY = 'testapikey'; searchablePDF.startProcessing(); Test.stopTest(); List cv = [select Id,Title from ContentVersion]; System.assertEquals('sampleScannedPDF', cv[0].Title); } @isTest static void testStartProcessingForCatch() { ContentVersion con = new ContentVersion(); con.Title = 'sampleScannedPDF'; con.VersionData=Blob.valueOf('Test Document'); con.PathOnClient = 'sampleScannedPDF.pdf'; insert con; Test.startTest(); MakeSearchablePDFWithFileUpload searchablePDF = new MakeSearchablePDFWithFileUpload(); MakeSearchablePDFWithFileUpload.API_KEY = 'testapikey'; searchablePDF.startProcessing(); Test.stopTest(); List cv = [select Id,Title from ContentVersion]; System.assertEquals('sampleScannedPDF', cv[0].Title); } @isTest static void testmakePdfTextSearchableJobSuccess() { List urlList = new List(); urlList.add('test'); Test.startTest(); Test.setMock(HttpCalloutMock.class, new MakeSearchablePDFCallOutMockForCheckStatusSuccess()); MakeSearchablePDFWithFileUpload.makePdfTextSearchable(urlList); Test.stopTest(); } @isTest static void testmakePdfTextSearchableJobSuccessForCatch() { List urlList = new List(); urlList.add('test'); Test.startTest(); MakeSearchablePDFWithFileUpload.makePdfTextSearchable(urlList); Test.stopTest(); } @isTest static void testSleep() { Test.startTest(); MakeSearchablePDFWithFileUpload.sleep(3000); Test.stopTest(); } public class MakeSearchablePDFCallOutMock implements HttpCalloutMock { public HTTPResponse respond(HTTPRequest request) { // Create a fake response HttpResponse response = new HttpResponse(); response.setHeader('Content-Type', 'application/json'); response.setBody('{"presignedUrl":"https://pdf-temp-files.s3-us-west-2.amazonaws.com/0c72bf56341142ba83c8f98b47f14d62/test.pdf?X-Amz-Expires=900&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIZJDPLX6D7EHVCKA/20200302/us-west-2/s3/aws4_request&X-Amz-Date=20200302T143951Z&X-Amz-SignedHeaders=host&X-Amz-Signature=8650913644b6425ba8d52b78634698e5fc8970157d971a96f0279a64f4ba87fc","url": "https://pdf-temp-files.s3.amazonaws.com/a0d52f35504e47148d1771fce875db7b/test.pdf", "pageCount": 1, "error": false, "Status": 200, "name": "test.pdf", "remainingCredits": 99033681, "credits": 35 } '); response.setStatusCode(200); return response; } } public class MakeSearchablePDFCallOutMockForCheckStatusSuccess implements HttpCalloutMock { public HTTPResponse respond(HTTPRequest request) { // Create a fake response HttpResponse response = new HttpResponse(); response.setHeader('Content-Type', 'application/json'); response.setBody('{ "status": "success", "remainingCredits": 60227,"url": "https://pdf-temp-files.s3.amazonaws.com/a0d52f35504e47148d1771fce875db7b/result.pdf" } '); response.setStatusCode(200); return response; } } }
This source code is also available at this GitHub repository.
Useful Resources
- Sign-Up at PDF.co. Get your API Key by Signing-Up at PDF.co
- PDF.co Documentations
- PDF.co Samples
I hope this article is useful for getting started with PDF.co in Salesforce apex. Please try this on your own for better understanding.