Long sentence recognition
- Print
- PDF
Long sentence recognition
- Print
- PDF
Article summary
Did you find this summary helpful?
Thank you for your feedback
Available in Classic and VPC
This document introduces the long sentence recognition examples of the CLOVA Speech service.
Java
The following is a Java-based sample code for the API.
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.12</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpmime</artifactId>
<version>4.3.1</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.5</version>
</dependency>
package org.example.clovaspeech.client;
import java.io.File;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicHeader;
import org.apache.http.util.EntityUtils;
import com.google.gson.Gson;
public class ClovaSpeechClient {
// Clova Speech secret key
private static final String SECRET = "";
// Clova Speech invoke URL
private static final String INVOKE_URL = "";
private CloseableHttpClient httpClient = HttpClients.createDefault();
private Gson gson = new Gson();
private static final Header[] HEADERS = new Header[] {
new BasicHeader("Accept", "application/json"),
new BasicHeader("X-CLOVASPEECH-API-KEY", SECRET),
};
public static class Boosting {
private String words;
public String getWords() {
return words;
}
public void setWords(String words) {
this.words = words;
}
}
public static class Diarization {
private Boolean enable = Boolean.FALSE;
private Integer speakerCountMin;
private Integer speakerCountMax;
public Boolean getEnable() {
return enable;
}
public void setEnable(Boolean enable) {
this.enable = enable;
}
public Integer getSpeakerCountMin() {
return speakerCountMin;
}
public void setSpeakerCountMin(Integer speakerCountMin) {
this.speakerCountMin = speakerCountMin;
}
public Integer getSpeakerCountMax() {
return speakerCountMax;
}
public void setSpeakerCountMax(Integer speakerCountMax) {
this.speakerCountMax = speakerCountMax;
}
}
public static class Sed {
private Boolean enable = Boolean.FALSE;
public Boolean getEnable() {
return enable;
}
public void setEnable(Boolean enable) {
this.enable = enable;
}
}
public static class NestRequestEntity {
private String language = "ko-KR";
//completion optional, sync/async (Set how response results are returned (sync/async), not required parameter)
private String completion = "sync";
//optional, used to receive the analyzed results (For retrieving analyzed results, not required parameter)
private String callback;
//optional, any data (Enter any callback URL value, not required parameter)
private Map<String, Object> userdata;
private Boolean wordAlignment = Boolean.TRUE;
private Boolean fullText = Boolean.TRUE;
//boosting object array (Keyword boosting object array)
private List<Boosting> boostings;
//comma separated words (Comma-separated keywords)
private String forbiddens;
private Diarization diarization;
private Sed sed;
public Sed getSed() {
return sed;
}
public void setSed(Sed sed) {
this.sed = sed;
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = language;
}
public String getCompletion() {
return completion;
}
public void setCompletion(String completion) {
this.completion = completion;
}
public String getCallback() {
return callback;
}
public Boolean getWordAlignment() {
return wordAlignment;
}
public void setWordAlignment(Boolean wordAlignment) {
this.wordAlignment = wordAlignment;
}
public Boolean getFullText() {
return fullText;
}
public void setFullText(Boolean fullText) {
this.fullText = fullText;
}
public void setCallback(String callback) {
this.callback = callback;
}
public Map<String, Object> getUserdata() {
return userdata;
}
public void setUserdata(Map<String, Object> userdata) {
this.userdata = userdata;
}
public String getForbiddens() {
return forbiddens;
}
public void setForbiddens(String forbiddens) {
this.forbiddens = forbiddens;
}
public List<Boosting> getBoostings() {
return boostings;
}
public void setBoostings(List<Boosting> boostings) {
this.boostings = boostings;
}
public Diarization getDiarization() {
return diarization;
}
public void setDiarization(Diarization diarization) {
this.diarization = diarization;
}
}
/**
* recognize media using URL (Request speech recognition with external file URL)
* @param url required, the media URL (Required parameter, external file URL)
* @param nestRequestEntity optional (Not required parameter)
* @return string (Return string)
*/
public String url(String url, NestRequestEntity nestRequestEntity) {
HttpPost httpPost = new HttpPost(INVOKE_URL + "/recognizer/url");
httpPost.setHeaders(HEADERS);
Map<String, Object> body = new HashMap<>();
body.put("url", url);
body.put("language", nestRequestEntity.getLanguage());
body.put("completion", nestRequestEntity.getCompletion());
body.put("callback", nestRequestEntity.getCallback());
body.put("userdata", nestRequestEntity.getCallback());
body.put("wordAlignment", nestRequestEntity.getWordAlignment());
body.put("fullText", nestRequestEntity.getFullText());
body.put("forbiddens", nestRequestEntity.getForbiddens());
body.put("boostings", nestRequestEntity.getBoostings());
body.put("diarization", nestRequestEntity.getDiarization());
body.put("sed", nestRequestEntity.getSed());
HttpEntity httpEntity = new StringEntity(gson.toJson(body), ContentType.APPLICATION_JSON);
httpPost.setEntity(httpEntity);
return execute(httpPost);
}
/**
* recognize media using Object Storage (Request speech recognition with file URL in Object Storage on NAVER Cloud Platform)
* @param dataKey required, the Object Storage key (Required parameter, Object Storage key value)
* @param nestRequestEntity optional (Not required parameter)
* @return string (Return string)
*/
public String objectStorage(String dataKey, NestRequestEntity nestRequestEntity) {
HttpPost httpPost = new HttpPost(INVOKE_URL + "/recognizer/object-storage");
httpPost.setHeaders(HEADERS);
Map<String, Object> body = new HashMap<>();
body.put("dataKey", dataKey);
body.put("language", nestRequestEntity.getLanguage());
body.put("completion", nestRequestEntity.getCompletion());
body.put("callback", nestRequestEntity.getCallback());
body.put("userdata", nestRequestEntity.getCallback());
body.put("wordAlignment", nestRequestEntity.getWordAlignment());
body.put("fullText", nestRequestEntity.getFullText());
body.put("forbiddens", nestRequestEntity.getForbiddens());
body.put("boostings", nestRequestEntity.getBoostings());
body.put("diarization", nestRequestEntity.getDiarization());
body.put("sed", nestRequestEntity.getSed());
StringEntity httpEntity = new StringEntity(gson.toJson(body), ContentType.APPLICATION_JSON);
httpPost.setEntity(httpEntity);
return execute(httpPost);
}
/**
*
* recognize media using a file (Request speech recognition after uploading local file)
* @param file required, the media file (Required parameter, local file)
* @param nestRequestEntity optional (Not required parameter)
* @return string (Return string)
*/
public String upload(File file, NestRequestEntity nestRequestEntity) {
HttpPost httpPost = new HttpPost(INVOKE_URL + "/recognizer/upload");
httpPost.setHeaders(HEADERS);
HttpEntity httpEntity = MultipartEntityBuilder.create()
.addTextBody("params", gson.toJson(nestRequestEntity), ContentType.APPLICATION_JSON)
.addBinaryBody("media", file, ContentType.MULTIPART_FORM_DATA, file.getName())
.build();
httpPost.setEntity(httpEntity);
return execute(httpPost);
}
private String execute(HttpPost httpPost) {
try (final CloseableHttpResponse httpResponse = httpClient.execute(httpPost)) {
final HttpEntity entity = httpResponse.getEntity();
return EntityUtils.toString(entity, StandardCharsets.UTF_8);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static void main(String[] args) {
final ClovaSpeechClient clovaSpeechClient = new ClovaSpeechClient();
NestRequestEntity requestEntity = new NestRequestEntity();
final String result =
clovaSpeechClient.upload(new File("/data/sample.mp4"), requestEntity);
//final String result = clovaSpeechClient.url("file URL", requestEntity);
//final String result = clovaSpeechClient.objectStorage("Object Storage key", requestEntity);
System.out.println(result);
}
}
Python
The following is a Python-based sample code for the API.
import requests
import json
class ClovaSpeechClient:
# CLOVA Speech invoke URL (Invoke URL issued when registering the app)
invoke_url = ''
# CLOVA Speech secret key (Secret key issued when registering the app)
secret = ''
def req_url(self, url, completion, callback=None, userdata=None, forbiddens=None, boostings=None, wordAlignment=True, fullText=True, diarization=None, sed=None):
request_body = {
'url': url,
'language': 'ko-KR',
'completion': completion,
'callback': callback,
'userdata': userdata,
'wordAlignment': wordAlignment,
'fullText': fullText,
'forbiddens': forbiddens,
'boostings': boostings,
'diarization': diarization,
'sed': sed,
}
headers = {
'Accept': 'application/json;UTF-8',
'Content-Type': 'application/json;UTF-8',
'X-CLOVASPEECH-API-KEY': self.secret
}
return requests.post(headers=headers,
url=self.invoke_url + '/recognizer/url',
data=json.dumps(request_body).encode('UTF-8'))
def req_object_storage(self, data_key, completion, callback=None, userdata=None, forbiddens=None, boostings=None,
wordAlignment=True, fullText=True, diarization=None, sed=None):
request_body = {
'dataKey': data_key,
'language': 'ko-KR',
'completion': completion,
'callback': callback,
'userdata': userdata,
'wordAlignment': wordAlignment,
'fullText': fullText,
'forbiddens': forbiddens,
'boostings': boostings,
'diarization': diarization,
'sed': sed,
}
headers = {
'Accept': 'application/json;UTF-8',
'Content-Type': 'application/json;UTF-8',
'X-CLOVASPEECH-API-KEY': self.secret
}
return requests.post(headers=headers,
url=self.invoke_url + '/recognizer/object-storage',
data=json.dumps(request_body).encode('UTF-8'))
def req_upload(self, file, completion, callback=None, userdata=None, forbiddens=None, boostings=None,
wordAlignment=True, fullText=True, diarization=None, sed=None):
request_body = {
'language': 'ko-KR',
'completion': completion,
'callback': callback,
'userdata': userdata,
'wordAlignment': wordAlignment,
'fullText': fullText,
'forbiddens': forbiddens,
'boostings': boostings,
'diarization': diarization,
'sed': sed,
}
headers = {
'Accept': 'application/json;UTF-8',
'X-CLOVASPEECH-API-KEY': self.secret
}
print(json.dumps(request_body, ensure_ascii=False).encode('UTF-8'))
files = {
'media': open(file, 'rb'),
'params': (None, json.dumps(request_body, ensure_ascii=False).encode('UTF-8'), 'application/json')
}
response = requests.post(headers=headers, url=self.invoke_url + '/recognizer/upload', files=files)
return response
if __name__ == '__main__':
# res = ClovaSpeechClient().req_url(url='http://example.com/media.mp3', completion='sync')
# res = ClovaSpeechClient().req_object_storage(data_key='data/media.mp3', completion='sync')
res = ClovaSpeechClient().req_upload(file='/data/media.mp3', completion='sync')
print(res.text)
PHP
The following is a PHP-based sample code for the long sentence recognition API.
<?php
$secret = '';
$invoke_url = '';
function req_url($url, $completion, $callback, $userdata, $forbiddens, $boostings,
$wordAlignment, $fullText, $diarization, $sed)
{
$object = (object)[
'language' => 'ko-KR',
'completion' => $completion,
'callback' => $callback,
'url' => $url,
'userdata' => $userdata,
'forbiddens' => $forbiddens,
'boostings' => $boostings,
'wordAlignment' => $wordAlignment,
'fullText' => $fullText,
'diarization' => $diarization,
'sed' => $sed,
];
return execute('/recognizer/url', json_encode($object), array('Content-Type: application/json'));
}
function req_object_storage($dataKey, $completion, $callback, $userdata, $forbiddens, $boostings,
$wordAlignment, $fullText, $diarization, $sed)
{
$object = (object)[
'language' => 'ko-KR',
'completion' => $completion,
'callback' => $callback,
'dataKey' => $dataKey,
'userdata' => $userdata,
'forbiddens' => $forbiddens,
'boostings' => $boostings,
'wordAlignment' => $wordAlignment,
'fullText' => $fullText,
'diarization' => $diarization,
'sed' => $sed,
];
return execute('/recognizer/object-storage', json_encode($object), array('Content-Type: application/json'));
}
function req_upload($filePath, $completion, $callback, $userdata, $forbiddens, $boostings,
$wordAlignment, $fullText, $diarization, $sed)
{
$object = (object)[
'language' => 'ko-KR',
'completion' => $completion,
'callback' => $callback,
'userdata' => $userdata,
'forbiddens' => $forbiddens,
'boostings' => $boostings,
'wordAlignment' => $wordAlignment,
'fullText' => $fullText,
'diarization' => $diarization,
'sed' => $sed,
];
$fields = array(
'media' => new CURLFile($filePath),
'params' => json_encode($object),
);
return execute('/recognizer/upload', $fields, null);
}
function execute($uri, $postFields, $customHeaders)
{
try {
$ch = curl_init($GLOBALS['invoke_url'] . $uri);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST');
curl_setopt($ch, CURLOPT_POSTFIELDS, $postFields);
curl_setopt($ch, CURLOPT_VERBOSE, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 600);
$headers = array();
$headers[] = 'X-CLOVASPEECH-API-KEY: ' . $GLOBALS['secret'];
if (!is_null($customHeaders)) {
$headers = array_merge($headers, $customHeaders);
}
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$response = curl_exec($ch);
$err = curl_error($ch);
curl_close($ch);
if ($err) {
echo 'cURL Error #:' . $err;
return $err;
}
return $response;
} catch (Exception $E) {
echo 'Response: ' . $E . '\n';
return $E->lastResponse;
}
}
//$response = req_url('https://example.com/sample.mp4', 'sync', null, null, null, null, null, null, null);
//$response = req_object_storage('data/sample.mp4', 'sync', null, null, null, null, null, null, null);
$response = req_upload('/data/sample.mp4', 'sync', null, null, null, null, null, null, null);
echo $response;
?>
C#
The following is a C#-based sample code for the API.
using System;
using System.Globalization;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text.RegularExpressions;
using System.Threading.Channels;
using System.Threading.Tasks;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Text;
using System.Diagnostics;
namespace HttpClientStatus
{
public class ClovaSpeechRequest
{
public string language { get; set; }
public string completion { get; set; }
// Other parameters are returned. For the list of available parameters, see "Request recognition with Object Storage file URL (https://api.ncloud-docs.com/release-20241121/docs/ai-application-service-clovaspeech-longsentence/objectstorageurl)," "Request recognition with external URL (https://api.ncloud-docs.com/release-20241121/docs/ai-application-service-clovaspeech-longsentence/externalurl)," and "Request recognition after uploading local file (https://api.ncloud-docs.com/release-20241121/docs/ai-application-service-clovaspeech-longsentence/local)."
}
public class Program
{
private static readonly string secretKey = "";
private static readonly string invokeUrl = "";
public static async Task<string> Upload(ClovaSpeechRequest clovaSpeechRequest, string path)
{
using (var client = new HttpClient())
{
var multiForm = new MultipartFormDataContent();
multiForm.Headers.Add("X-CLOVASPEECH-API-KEY", secretKey);
multiForm.Add(new StringContent(JsonSerializer.Serialize(clovaSpeechRequest)), "params");
FileStream fs = File.OpenRead(path);
Console.WriteLine(Path.GetFileName(path));
multiForm.Add(new StreamContent(fs), "media", Path.GetFileName(path));
var message = await client.PostAsync(invokeUrl+ "/recognizer/upload", multiForm);
return await message.Content.ReadAsStringAsync();
}
}
static async Task Main(string[] args)
{
var clovaSpeechRequest = new ClovaSpeechRequest
{
language = "ko-KR",
completion = "sync"
};
var result = await Upload(clovaSpeechRequest, @"D:\media\video\\sample.mp3");
Console.WriteLine(result);
}
}
}
Was this article helpful?