C# Sample Notebook
Download the C# Sample Notebook
Options
string dcApiUrl = "https://knowledge-enrichment.ai.experience.hyland.com/latest/api/data-curation";
dynamic dcOptions = new {
normalization = new {
quotations = true, // Normalize quotation marks
dashes = true // Normalize dashes
},
chunking = true, // Enables chunking of the text
chunk_size = 1000, // Desired size of each text chunk (may be longer to prevent breaking sentences/paragraphs)
embedding = false, // Generates text embeddings for the chunks (chunking is required)
json_schema = false // "FULL", "MDAST", "PIPELINE", or false
pii = false // "redaction", "detection", or false
};
Credentials
// OAuth client credentials need to be cofigured in the admin console.
// https://admin.experience.hyland.com/external-systems/external-applications
// Ensure the external application has the `environment_authorization` scope and is configured for an environment
// that has the `cin-data-curation-api` application (environment has a Data Curation API subscription).
string oauthUrl = "https://auth.iam.experience.hyland.com/idp";
string oauthClientId = "<<REDACTED>>";
string oauthClientSecret = "<<REDACTED>>";
Helper Functions
using System.IO;
using System.Text.Json;
using System.Text.Json.Nodes;
using System.Net.Http;
using System.Net.Http.Headers;
HttpClient httpClient = new();
/// <summary>
/// Sends an HTTP request and returns the response body as a dictionary.
/// </summary>
string SendHttpRequest(HttpMethod method, string url, Dictionary<string, string> headers = null, HttpContent content = null)
{
using HttpRequestMessage request = new HttpRequestMessage(method, url);
if (headers != null)
{
foreach (KeyValuePair<string, string> header in headers)
{
request.Headers.Add(header.Key, header.Value);
}
}
if (content != null)
{
request.Content = content;
}
Console.WriteLine($"Sending request to: {request.Method} {request.RequestUri}");
string responseBody = null;
try
{
using HttpResponseMessage response = httpClient.Send(request);
responseBody = response.Content.ReadAsStringAsync().Result;
}
catch (HttpRequestException e)
{
Console.WriteLine($"HTTP Error: {e.StatusCode} - {e.Message}");
throw;
}
return responseBody;
}
/// <summary>
/// Sends a GET request to the specified URL with optional headers.
/// </summary>
string SendGetRequest(string url, Dictionary<string, string> headers = null)
{
return SendHttpRequest(HttpMethod.Get, url, headers);
}
/// <summary>
/// Sends a POST request to the specified URL with optional headers and data.
/// </summary>
string SendPostRequest(string url, Dictionary<string, string> headers, HttpContent content)
{
return SendHttpRequest(HttpMethod.Post, url, headers, content);
}
/// <summary>
/// Sends a POST request to the specified URL with optional headers and JSON data.
/// </summary>
string SendJsonPostRequest(string url, Dictionary<string, string> headers, object data)
{
using StringContent jsonContent = new(JsonDumpString(data), Encoding.UTF8, "application/json");
return SendPostRequest(url, headers, jsonContent);
}
/// <summary>
/// Sends a POST request to the specified URL with optional headers and form data.
/// </summary>
string SendFormPostRequest(string url, Dictionary<string, string> headers, Dictionary<string, string> data)
{
using FormUrlEncodedContent formContent = new(data);
return SendPostRequest(url, headers, formContent);
}
/// <summary>
/// Sends an HTTP request and returns the response body as a dictionary.
/// </summary>
string SendPutRequest(string url, Dictionary<string, string> headers, byte[] data)
{
using ByteArrayContent binaryContent = new(data);
binaryContent.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream");
return SendHttpRequest(HttpMethod.Put, url, headers, binaryContent);
}
/// <summary>
/// Refreshes the access token using the OAuth client credentials.
/// </summary>
string RefreshAccessToken()
{
Console.WriteLine("Getting a new access token");
string accessToken = null;
string tokenUrl = $"{oauthUrl}/connect/token";
Dictionary<string, string> tokenHeaders = new()
{
{ "Accept", "application/json" },
{ "Authorization", $"Basic {Base64Encode($"{oauthClientId}:{oauthClientSecret}")}" },
};
Dictionary<string, string> tokenData = new()
{
{ "grant_type", "client_credentials" },
{ "scope", "environment_authorization" },
};
JsonNode tokenResponse = JsonLoadString(SendFormPostRequest(tokenUrl, tokenHeaders, tokenData));
accessToken = tokenResponse["access_token"]?.ToString();
IsAccessTokenValid(accessToken);
return accessToken;
}
/// <summary>
/// Decodes the access token to extract its JSON payload.
/// </summary>
JsonNode DecodeAccessToken(string token)
{
return JsonLoadString(Base64Decode(token.Split(".")[1]));
}
/// <summary>
/// Validates the access token by checking its expiration time.
/// </summary>
bool IsAccessTokenValid(string token)
{
if (token == null || token.Trim() == "")
{
Console.WriteLine("Access token is empty");
Console.WriteLine();
return false;
}
try
{
JsonNode tokenJson = DecodeAccessToken(token);
double tokenExpiration = tokenJson["exp"]?.GetValue<double>() ?? 0;
DateTime dateTime = new DateTime(1970, 1, 1, 0, 0, 0, 0, DateTimeKind.Utc);
TimeSpan tokenExpirationTime = dateTime.AddSeconds(tokenExpiration) - DateTime.UtcNow;
Console.WriteLine($"Access Token Expires In: {tokenExpirationTime}");
Console.WriteLine();
return tokenExpirationTime > TimeSpan.FromSeconds(30); // Check if token is valid for at least 30 seconds
}
catch (Exception e)
{
Console.WriteLine($"Error validating access token: {e.Message}");
Console.WriteLine();
}
return false;
}
/// <summary>
/// Encodes a string to base64.
/// </summary>
string Base64Encode(string data)
{
return Convert.ToBase64String(Encoding.UTF8.GetBytes(data));;
}
/// <summary>
/// Decodes a base64 encoded string.
/// </summary>
string Base64Decode(string data)
{
byte[] jsonBytes;
try
{
jsonBytes = Convert.FromBase64String(data);
}
catch (FormatException)
{
jsonBytes = Convert.FromBase64String(data + "==");
}
return Encoding.UTF8.GetString(jsonBytes);
}
/// <summary>
/// Converts an object to a JSON string with indentation.
/// </summary>
string JsonDumpString(object obj)
{
JsonSerializerOptions options = new() { WriteIndented = true };
return JsonSerializer.Serialize(obj, options);
}
/// <summary>
/// Converts an object to a JSON string with indentation.
/// </summary>
JsonNode JsonLoadString(string jsonData)
{
return JsonSerializer.Deserialize<JsonNode>(jsonData);
}
/// <summary>
/// Lists files in the specified directory.
/// </summary>
string[] ListFiles(string directory)
{
string directoryAbs = Path.GetFullPath(directory); // Ensure the directory is absolute
string[] fileList = Directory.EnumerateFiles(directoryAbs).ToArray();
$"Listing files in path: {directoryAbs}\r\n- {string.Join("\r\n- ", fileList.Select(file => Path.GetFileName(file)))}".DisplayAs("text/markdown");
return fileList;
}
List Files to Upload
// Expects there to be an "input" directory with files to process
string[] inputFileNames = ListFiles("input");
Sample Output
Listing files in path: c:\path\to\files
- test.docx
Get Access Token
using System.Text.Json.Nodes;
string accessToken = RefreshAccessToken();
if (accessToken != null)
{
JsonNode tokenJson = DecodeAccessToken(accessToken);
Console.WriteLine($"Access Token: {JsonDumpString(tokenJson)}");
}
Sample Output
Getting a new access token
Sending request to: POST https://auth.iam.experience.hyland.com/idp/connect/token
Access Token Expires In: 00:14:59.7781088
Access Token:
{
"iss": "https://auth.iam.experience.hyland.com/idp",
"nbf": 1755622366,
"iat": 1755622366,
"exp": 1755623266,
"aud": "hxp.authorization",
"scope": [
"environment_authorization"
],
"amr": [
"urn:hyland:params:oauth:grant-type:api-credentials"
],
"client_id": "<<REDACTED>>",
"sub": "<<REDACTED>>",
"auth_time": 1755622366,
"idp": "local",
"hxp_account": "<<REDACTED>>",
"hxp_authorization": {
"account_id": "<<REDACTED>>",
"environment_id": "<<REDACTED>>",
"subscription_id": "<<REDACTED>>",
"appkey": "cin-data-curation-api",
"role": [
"cin-data-curation.user"
],
"permission": [
"cin-data-curation.api.submit"
]
}
}
Get Presign URL
using System.Text.Json.Nodes;
if (!IsAccessTokenValid(accessToken))
{
accessToken = RefreshAccessToken();
}
public record PresignResponse(string JobId, string PutUrl, string GetUrl, string FileName);
List<PresignResponse> presignResponses = new();
foreach (string inputFileName in inputFileNames)
{
string presignUrl = $"{dcApiUrl}/presign";
Dictionary<string, string> presignHeaders = new()
{
{ "Accept", "application/json" },
{ "Authorization", $"Bearer {accessToken}" },
};
JsonNode presignResponseJson = JsonLoadString(SendJsonPostRequest(presignUrl, presignHeaders, dcOptions));
Console.WriteLine($"Presign Response for {inputFileName}:");
Console.WriteLine(presignResponseJson);
Console.WriteLine();
PresignResponse presignResponse = new(
JobId: presignResponseJson["job_id"]?.ToString(),
PutUrl: presignResponseJson["put_url"]?.ToString(),
GetUrl: presignResponseJson["get_url"]?.ToString(),
FileName: inputFileName
);
if (presignResponse.JobId != null && presignResponse.PutUrl != null && presignResponse.GetUrl != null)
{
presignResponses.Add(presignResponse);
}
}
Sample Output
Access Token Expires In: 00:14:57.3636512
Sending request to: POST https://knowledge-enrichment.ai.experience.hyland.com/latest/api/data-curation/presign
Presign Response for c:\path\to\files\test.docx:
{
"job_id": "API_14c155d8-47d0-44a8-b011-4b6a7a847599",
"put_url": "https://data-curation-api-drop-prod.s3.amazonaws.com/API_14c155d8-47d0-44a8-b011-4b6a7a847599?Expires=1755625972",
"get_url": "https://data-curation-api-results-prod.s3.amazonaws.com/API_14c155d8-47d0-44a8-b011-4b6a7a847599?Expires=1755625972"
}
Upload Files
using System.IO;
foreach (PresignResponse presignResponse in presignResponses)
{
Console.WriteLine($"Uploading file: {presignResponse.FileName}");
byte[] inputBytes = File.ReadAllBytes(presignResponse.FileName);
_ = SendPutRequest(presignResponse.PutUrl, null, inputBytes);
Console.WriteLine($"Upload succeeded for {presignResponse.JobId}");
Console.WriteLine();
}
Sample Output
Uploading file: c:\path\to\files\test.docx
Sending request to: PUT https://data-curation-api-drop-prod.s3.amazonaws.com/API_14c155d8-47d0-44a8-b011-4b6a7a847599?Expires=1755625972
Upload succeeded for API_14c155d8-47d0-44a8-b011-4b6a7a847599
Get Status
using System.Text.Json.Nodes;
if (!IsAccessTokenValid(accessToken))
{
accessToken = RefreshAccessToken();
}
public record StatusResponse(string JobId, string Status);
foreach (PresignResponse presignResponse in presignResponses)
{
string statusUrl = $"{dcApiUrl}/status/{presignResponse.JobId}";
Dictionary<string, string> statusHeaders = new()
{
{ "Accept", "application/json" },
{ "Authorization", $"Bearer {accessToken}" },
};
JsonNode statusResponseJson = JsonLoadString(SendGetRequest(statusUrl, statusHeaders));
StatusResponse statusResponse = new(
JobId: statusResponseJson["jobId"]?.ToString(),
Status: statusResponseJson["status"]?.ToString()
);
Console.WriteLine($"Status for {presignResponse.JobId} = {statusResponse.Status}");
Console.WriteLine();
}
Sample Output
Access Token Expires In: 00:14:48.5219737
Sending request to: GET https://knowledge-enrichment.ai.experience.hyland.com/latest/api/data-curation/status/API_14c155d8-47d0-44a8-b011-4b6a7a847599
Status for API_14c155d8-47d0-44a8-b011-4b6a7a847599 = Done
Download Results
using System.Text.Json.Nodes;
List<JsonNode> outputResultsJson = new();
foreach (PresignResponse presignResponse in presignResponses)
{
Console.WriteLine($"Downloading results for: {presignResponse.JobId}");
JsonNode getResponseJson = JsonLoadString(SendGetRequest(presignResponse.GetUrl));
Console.WriteLine($"Results for {presignResponse.JobId}:");
Console.WriteLine($"{getResponseJson}");
Console.WriteLine();
if (getResponseJson != null)
{
outputResultsJson.Add(getResponseJson);
}
}
Sample Output
Downloading results for: API_14c155d8-47d0-44a8-b011-4b6a7a847599
Sending request to: GET https://data-curation-api-results-prod.s3.amazonaws.com/API_14c155d8-47d0-44a8-b011-4b6a7a847599?Expires=1755625972
Results for API_14c155d8-47d0-44a8-b011-4b6a7a847599:
{
"markdown": {
"output": "\u003C!-- LOC: 1, (96,120,720,151) --\u003E\n# Test Document\n\n\u003C!-- LOC: 1, (96,164,720,189) --\u003E\n## Header 2\n\n\u003C!-- LOC: 1, (96,196,720,214) --\u003E\nTest paragraph.\n\n### Header 3\n\n\u003C!-- LOC: 1, (96,254,720,272) --\u003E\nAnother paragraph.\n\n#### Header 4\n\n\u003C!-- LOC: 1, (96,305,720,323) --\u003E\nEven more paragraph.\n\n\u003C!-- LOC: 1, (96,334,720,360) --\u003E\n## Bullets\n\n\u003C!-- LOC: 1, (120,366,720,429) --\u003E\n* Point 1\n* Point 2\n* Point 3\n \u003C!-- LOC: 1, (168,430,720,466) --\u003E\n * Sub-point 3.1\n * Sub-point 3.2\n\n\u003C!-- LOC: 1, (96,478,720,503) --\u003E\n## Table\n\n\u003C!-- LOC: 1, (96,510,719,581) --\u003E\n| \u003C!-- LOC: 1, (103,510,297,528) --\u003E**Name** | \u003C!-- LOC: 1, (310,510,504,528) --\u003E**Value** | \u003C!-- LOC: 1, (518,510,712,528) --\u003E**Notes** |\n| ------------------------------------------ | ------------------------------------------- | ------------------------------------------------- |\n| \u003C!-- LOC: 1, (103,528,297,546) --\u003EText | \u003C!-- LOC: 1, (310,528,504,546) --\u003EABC | \u003C!-- LOC: 1, (518,528,712,546) --\u003EText Value |\n| \u003C!-- LOC: 1, (103,545,297,563) --\u003ENumber | \u003C!-- LOC: 1, (310,545,504,563) --\u003E123 | \u003C!-- LOC: 1, (518,545,712,563) --\u003ENumerical Value |\n| \u003C!-- LOC: 1, (103,563,297,581) --\u003ESymbol | \u003C!-- LOC: 1, (310,563,504,581) --\u003E\uD83E\uDD23\uD83E\uDD23\uD83E\uDD23 | \u003C!-- LOC: 1, (518,563,712,581) --\u003EEmoji symbols |\n\n",
"locations": [
"\u003C!-- LOC: 1, (96,164,720,189) --\u003E",
"\u003C!-- LOC: 1, (96,196,720,214) --\u003E",
"\u003C!-- LOC: 1, (96,254,720,272) --\u003E",
"\u003C!-- LOC: 1, (96,305,720,323) --\u003E",
"\u003C!-- LOC: 1, (120,366,720,429) --\u003E",
"\u003C!-- LOC: 1, (96,510,719,581) --\u003E"
],
"chunks": [
"# Test Document\n\n",
"## Header 2\n\n Test paragraph.",
"### Header 3\n\n Another paragraph.",
"#### Header 4\n\n Even more paragraph. \n\n",
"## Bullets\n\n * Point 1 * Point 2 * Point 3 * Sub-point 3.1 * Sub-point 3.2 \n\n",
"## Table\n\n \n| **Name** | **Value** | **Notes** |\n| ------------------------------------------ | ------------------------------------------- | ------------------------------------------------- |\n| Text | ABC | Text Value |\n| Number | 123 | Numerical Value |\n| Symbol | \uD83E\uDD23\uD83E\uDD23\uD83E\uDD23 | Emoji symbols |"
]
}
}
Parse Results
using System.Text.Json.Nodes;
public record ApiResponseChunk(string Text, string Location, double[] Embeddings);
public record ApiResponseMarkdown(string Output, ApiResponseChunk[] Chunks);
public record ApiResponse(ApiResponseMarkdown Markdown, JsonNode Json);
List<ApiResponse> outputResults = new();
foreach (JsonNode outputResultJson in outputResultsJson)
{
JsonArray locationsJson = outputResultJson["markdown"]["locations"]?.AsArray();
JsonArray chunksJson = outputResultJson["markdown"]["chunks"]?.AsArray();
JsonArray chunkWithEmbeddingsJson = outputResultJson["markdown"]["chunks_with_embeddings"]?.AsArray();
List<ApiResponseChunk> chunks = new();
if (chunkWithEmbeddingsJson != null)
{
for (int i = 0; i < chunkWithEmbeddingsJson.Count; i++)
{
JsonArray embeddingsJson = chunkWithEmbeddingsJson[i]["embeddings"]?.AsArray();
double[] embeddings = embeddingsJson?.Select(e => e.GetValue<double>())?.ToArray();
chunks.Add(new ApiResponseChunk(
Text: chunkWithEmbeddingsJson[i]["chunk"]?.ToString(),
Location: locationsJson[i]?.ToString(),
Embeddings: embeddings
));
}
}
else if (chunksJson != null)
{
for (int i = 0; i < chunksJson.Count; i++)
{
chunks.Add(new ApiResponseChunk(
Text: chunksJson[i]?.ToString(),
Location: locationsJson[i]?.ToString(),
Embeddings: null
));
}
}
ApiResponse apiResponse = new(
Markdown: new(
Output: outputResultJson["markdown"]["output"]?.ToString(),
Chunks: chunks.ToArray()
),
Json: outputResultJson["json"]
);
outputResults.Add(apiResponse);
}
display(outputResults);
| index | value | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 |
|
| Markdown |
|
| Output |
| ||||||||||||||||||||||||||||||||||||||||||||||||||
| Chunks |
1 |
|
| Text | ## Header 2 |
| Location | <!-- LOC: 1, (96,196,720,214) --> |
| Embeddings | <null> |
ApiResponseChunk
| Text | ### Header 3 |
| Location | <!-- LOC: 1, (96,254,720,272) --> |
| Embeddings | <null> |
ApiResponseChunk
| Text | #### Header 4 |
| Location | <!-- LOC: 1, (96,305,720,323) --> |
| Embeddings | <null> |
ApiResponseChunk
| Text | ## Bullets |
| Location | <!-- LOC: 1, (120,366,720,429) --> |
| Embeddings | <null> |
ApiResponseChunk
| Text | ## Table |
| Location | <!-- LOC: 1, (96,510,719,581) --> |
| Embeddings | <null> |
<null>
Display Markdown
for (int i=0; i < outputResults.Count; i++)
{
Console.WriteLine(presignResponses[i].FileName);
outputResults[i].Markdown.Output.DisplayAs("text/markdown");
}
c:\path\to\files\test.docx
Test Document
Header 2
Test paragraph.
Header 3
Another paragraph.
Header 4
Even more paragraph.
Bullets
- Point 1
- Point 2
- Point 3
- Sub-point 3.1
- Sub-point 3.2
Table
| Name | Value | Notes |
|---|---|---|
| Text | ABC | Text Value |
| Number | 123 | Numerical Value |
| Symbol | 🤣🤣🤣 | Emoji symbols |
Display JSON
for (int i=0; i < outputResults.Count; i++)
{
Console.WriteLine(presignResponses[i].FileName);
display(JsonDumpString(JsonLoadString(outputResults[i].Json.ToString())));
}