In this short article, we’ll demonstrate source code for how to extract invoice information from PDF using SharePoint and PDF.co Web API. Let’s get into the source code.
- Extract Invoices – Source Code Snippets
- Source Code at GitHub
- Extract Invoices – Demo Video
- Extract Invoices – Sample Screenshots
Extract Invoices – Source Code Snippets
In order to get started with invoice extraction, we should analyze the code first. Take a look at the markup for Web Part visual control.
VisualWebPart1UserControl.ascx
<%@ Assembly Name="$SharePoint.Project.AssemblyFullName$" %> <%@ Assembly Name="Microsoft.Web.CommandUI, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %> <%@ Register Tagprefix="SharePoint" Namespace="Microsoft.SharePoint.WebControls" Assembly="Microsoft.SharePoint, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %> <%@ Register Tagprefix="Utilities" Namespace="Microsoft.SharePoint.Utilities" Assembly="Microsoft.SharePoint, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %> <%@ Register Tagprefix="asp" Namespace="System.Web.UI" Assembly="System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35" %> <%@ Import Namespace="Microsoft.SharePoint" %> <%@ Register Tagprefix="WebPartPages" Namespace="Microsoft.SharePoint.WebPartPages" Assembly="Microsoft.SharePoint, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c" %> <%@ Control Language="C#" AutoEventWireup="true" CodeBehind="VisualWebPart1UserControl.ascx.cs" Inherits="ParseSimpleDocumentWebPart.VisualWebPart1.VisualWebPart1UserControl" %> Chose source file<br /> <asp:FileUpload ID="FileUpload1" runat="server" Width="600px" /> <br /> <br /> Template<br /> <asp:TextBox ID="TemplateTextBox" runat="server" Height="80px" TextMode="MultiLine" Width="600px"></asp:TextBox> <br /> <br /> <asp:Button ID="StartButton" runat="server" OnClick="StartButton_Click" Text="Convert to CSV" style="width: 610px; padding-left: 0px; margin-left: 0px; padding-right: 0px; padding-right: 0px;"/> <br /> <br /> Log<br /> <asp:TextBox ID="LogTextBox" runat="server" Height="80px" TextMode="MultiLine" Width="600px"></asp:TextBox> <br /> <br /> Result<br /> <asp:TextBox ID="ResultTextBox" runat="server" Height="500px" TextMode="MultiLine" Width="600px"></asp:TextBox>
This is code behind for Web Part User Control.
VisualWebPart1UserControl.ascx.cs
using Newtonsoft.Json; using Newtonsoft.Json.Linq; using System; using System.Collections.Generic; using System.Net; using System.Threading; using System.Web.UI; using System.Web.UI.WebControls; using System.Web.UI.WebControls.WebParts; namespace ParseSimpleDocumentWebPart.VisualWebPart1 { public partial class VisualWebPart1UserControl : UserControl { // The authentication key (API Key). // Get your own by registering at https://app.pdf.co/documentation/api string API_KEY = Utils.API_KEY; // PDF document password. Leave empty for unprotected documents. const string Password = ""; // (!) Make asynchronous job const bool Async = true; protected void Page_Load(object sender, EventArgs e) { } protected void StartButton_Click(object sender, EventArgs e) { // Template text. Use Document Parser SDK (https://bytescout.com/products/developer/documentparsersdk/index.html) // to create templates. // Create standard .NET web client instance ServicePointManager.Expect100Continue = true; ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12; WebClient webClient = new WebClient(); if (!FileUpload1.HasFile && String.IsNullOrWhiteSpace(TemplateTextBox.Text)) { LogTextBox.Text += "Select file and template \n"; return; } // Set API Key webClient.Headers.Add("x-api-key", API_KEY); // 1. RETRIEVE THE PRESIGNED URL TO UPLOAD THE FILE. // * If you already have a direct file URL, skip to the step 3. // Prepare URL for `Get Presigned URL` API call string query = Uri.EscapeUriString(string.Format( "https://api.pdf.co/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name={0}", FileUpload1.FileName)); try { // Execute request string response = webClient.DownloadString(query); // Parse JSON response JObject json = JObject.Parse(response); if (json["error"].ToObject() == false) { // Get URL to use for the file upload string uploadUrl = json["presignedUrl"].ToString(); string uploadedFileUrl = json["url"].ToString(); // 2. UPLOAD THE FILE TO CLOUD. webClient.Headers.Add("content-type", "application/octet-stream"); webClient.UploadData(uploadUrl, "PUT", FileUpload1.FileBytes); webClient.Headers.Remove("content-type"); // 3. PARSE UPLOADED PDF DOCUMENT // URL of `Document Parser` API call string url = "https://api.pdf.co/v1/pdf/documentparser"; Dictionary<string, object> requestBody = new Dictionary<string, object>(); requestBody.Add("template", TemplateTextBox.Text); requestBody.Add("name", FileUpload1.FileName); requestBody.Add("url", uploadedFileUrl); requestBody.Add("async", Async); // Convert dictionary of params to JSON string jsonPayload = JsonConvert.SerializeObject(requestBody); // Execute request response = webClient.UploadString(url, "POST", jsonPayload); // Parse JSON response json = JObject.Parse(response); if (json["error"].ToObject() == false) { // Asynchronous job ID string jobId = json["jobId"].ToString(); // Get URL of generated JSON file string resultFileUrl = json["url"].ToString(); // Check the job status in a loop. // If you don't want to pause the main thread you can rework the code // to use a separate thread for the status checking and completion. do { string status = CheckJobStatus(jobId); // Possible statuses: "working", "failed", "aborted", "success". // Display timestamp and status (for demo purposes) LogTextBox.Text += DateTime.Now.ToLongTimeString() + ": " + status + "\n"; if (status == "success") { // Download JSON result var result = webClient.DownloadString(resultFileUrl); LogTextBox.Text += "Generated JSON.\n"; ResultTextBox.Text += result; break; } else if (status == "working") { // Pause for a few seconds Thread.Sleep(3000); } else { LogTextBox.Text += status + " \n"; break; } } while (true); } else { LogTextBox.Text += json["message"].ToString() + " \n"; } } else { LogTextBox.Text += json["message"].ToString() + " \n"; } } catch (Exception ex) { LogTextBox.Text += ex.ToString() + " \n"; } webClient.Dispose(); LogTextBox.Text += "\n"; LogTextBox.Text += "Done...\n"; } protected string CheckJobStatus(string jobId) { using (WebClient webClient = new WebClient()) { // Set API Key webClient.Headers.Add("x-api-key", API_KEY); string url = "https://api.pdf.co/v1/job/check?jobid=" + jobId; string response = webClient.DownloadString(url); JObject json = JObject.Parse(response); return Convert.ToString(json["status"]); } } } }
Source Code at GitHub
You can explore the full source code for this sample at this GitHub link.
Extract Invoices – Demo Video
Extract Invoices – Sample Screenshots
I hope this code snippet is useful to you. Please try it yourself to get to know more. Thank You!
Similar Pages
- How to Convert Invoice to CSV using PHP
- How to Convert Invoice to XLS using Zapier
- Parse Invoices Automatically using Zapier
- Parsing PDF Invoices from Dropbox via PDF.co Document Parser for Make
- How to Extract Text From PDF Invoices and Bulk Save to Spreadsheet
- Convert PDF Invoices to Google Sheets using Zapier
- Convert PDF Invoices to Google Sheets using Integromat
- Convert PDF Invoice to Google Sheet using PDF.co and Google Apps Script
- Extract Text from Scanned PDF in PHP using PDF.co Web API
- Extract Text from Scanned PDF in JavaScript using PDF.co Web API
- How to Extract Text from PDF and Paste in Excel using Python and PDF.co Web API
- How to Read PDF Invoices in Python using PDF.co Web API
- Google Invoice Parser to Read PDF Invoices and Orders with Google Script and PDF.co
- How to Extract Invoice Data from Image in Python using PDF.co Web API
- Parse Invoice and Send Data to Airtable with PDF.co using Zapier
- Parse Invoice and Send Data to Airtable using PDF.co and Make
- Extract Invoice Data from PDF using PDF.co and UiPath
- Parse Invoice using Salesforce Apex using PDF.co
- Extract Invoice Information with SharePoint and PDF.co
- Extract Data from Invoices to Avoid Fraud using PDF.co Document Parser