長文認識
- 印刷する
- PDF
長文認識
- 印刷する
- PDF
記事の要約
この要約は役に立ちましたか?
ご意見ありがとうございます
Classic/VPC環境で利用できます。
CLOVA Speechサービスの長文認識のユースケースを紹介します。
Java
Javaベースの APIのサンプルコードは次の通りです。
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.12</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpmime</artifactId>
<version>4.3.1</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.5</version>
</dependency>
package org.example.clovaspeech.client;
import java.io.File;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicHeader;
import org.apache.http.util.EntityUtils;
import com.google.gson.Gson;
public class ClovaSpeechClient {
// Clova Speech secret key
private static final String SECRET = "";
// Clova Speech invoke URL
private static final String INVOKE_URL = "";
private CloseableHttpClient httpClient = HttpClients.createDefault();
private Gson gson = new Gson();
private static final Header[] HEADERS = new Header[] {
new BasicHeader("Accept", "application/json"),
new BasicHeader("X-CLOVASPEECH-API-KEY", SECRET),
};
public static class Boosting {
private String words;
public String getWords() {
return words;
}
public void setWords(String words) {
this.words = words;
}
}
public static class Diarization {
private Boolean enable = Boolean.FALSE;
private Integer speakerCountMin;
private Integer speakerCountMax;
public Boolean getEnable() {
return enable;
}
public void setEnable(Boolean enable) {
this.enable = enable;
}
public Integer getSpeakerCountMin() {
return speakerCountMin;
}
public void setSpeakerCountMin(Integer speakerCountMin) {
this.speakerCountMin = speakerCountMin;
}
public Integer getSpeakerCountMax() {
return speakerCountMax;
}
public void setSpeakerCountMax(Integer speakerCountMax) {
this.speakerCountMax = speakerCountMax;
}
}
public static class Sed {
private Boolean enable = Boolean.FALSE;
public Boolean getEnable() {
return enable;
}
public void setEnable(Boolean enable) {
this.enable = enable;
}
}
public static class NestRequestEntity {
private String language = "ko-KR";
//completion optional, sync/async(レスポンス結果を返す方式(sync/async)設定、必須パラメータではない)
private String completion = "sync";
//optional, used to receive the analyzed results(分析された結果の照会に使用、必須パラメータではない)
private String callback;
//optional, any data(任意の Callback URLを入力、必須パラメータではない)
private Map<String, Object> userdata;
private Boolean wordAlignment = Boolean.TRUE;
private Boolean fullText = Boolean.TRUE;
//boosting object array(キーワードブーストオブジェクトの配列)
private List<Boosting> boostings;
//comma separated words(コンマで区切られたキーワード)
private String forbiddens;
private Diarization diarization;
private Sed sed;
public Sed getSed() {
return sed;
}
public void setSed(Sed sed) {
this.sed = sed;
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = language;
}
public String getCompletion() {
return completion;
}
public void setCompletion(String completion) {
this.completion = completion;
}
public String getCallback() {
return callback;
}
public Boolean getWordAlignment() {
return wordAlignment;
}
public void setWordAlignment(Boolean wordAlignment) {
this.wordAlignment = wordAlignment;
}
public Boolean getFullText() {
return fullText;
}
public void setFullText(Boolean fullText) {
this.fullText = fullText;
}
public void setCallback(String callback) {
this.callback = callback;
}
public Map<String, Object> getUserdata() {
return userdata;
}
public void setUserdata(Map<String, Object> userdata) {
this.userdata = userdata;
}
public String getForbiddens() {
return forbiddens;
}
public void setForbiddens(String forbiddens) {
this.forbiddens = forbiddens;
}
public List<Boosting> getBoostings() {
return boostings;
}
public void setBoostings(List<Boosting> boostings) {
this.boostings = boostings;
}
public Diarization getDiarization() {
return diarization;
}
public void setDiarization(Diarization diarization) {
this.diarization = diarization;
}
}
/**
* recognize media using URL(外部ファイル URLで音声認識をリクエスト)
* @param url required, the media URL(必須パラメータ、外部ファイル URL)
* @param nestRequestEntity optional(必須パラメータではない)
* @return string(文字列を返す)
*/
public String url(String url, NestRequestEntity nestRequestEntity) {
HttpPost httpPost = new HttpPost(INVOKE_URL + "/recognizer/url");
httpPost.setHeaders(HEADERS);
Map<String, Object> body = new HashMap<>();
body.put("url", url);
body.put("language", nestRequestEntity.getLanguage());
body.put("completion", nestRequestEntity.getCompletion());
body.put("callback", nestRequestEntity.getCallback());
body.put("userdata", nestRequestEntity.getCallback());
body.put("wordAlignment", nestRequestEntity.getWordAlignment());
body.put("fullText", nestRequestEntity.getFullText());
body.put("forbiddens", nestRequestEntity.getForbiddens());
body.put("boostings", nestRequestEntity.getBoostings());
body.put("diarization", nestRequestEntity.getDiarization());
body.put("sed", nestRequestEntity.getSed());
HttpEntity httpEntity = new StringEntity(gson.toJson(body), ContentType.APPLICATION_JSON);
httpPost.setEntity(httpEntity);
return execute(httpPost);
}
/**
* recognize media using Object Storage(NAVERクラウドプラットフォームの Object Storage内ファイルの URLで音声認識をリクエスト)
* @param dataKey required, the Object Storage key (必須パラメータ、Object Storageのキー)
* @param nestRequestEntity optional(必須パラメータではない)
* @return string(文字列を返す)
*/
public String objectStorage(String dataKey, NestRequestEntity nestRequestEntity) {
HttpPost httpPost = new HttpPost(INVOKE_URL + "/recognizer/object-storage");
httpPost.setHeaders(HEADERS);
Map<String, Object> body = new HashMap<>();
body.put("dataKey", dataKey);
body.put("language", nestRequestEntity.getLanguage());
body.put("completion", nestRequestEntity.getCompletion());
body.put("callback", nestRequestEntity.getCallback());
body.put("userdata", nestRequestEntity.getCallback());
body.put("wordAlignment", nestRequestEntity.getWordAlignment());
body.put("fullText", nestRequestEntity.getFullText());
body.put("forbiddens", nestRequestEntity.getForbiddens());
body.put("boostings", nestRequestEntity.getBoostings());
body.put("diarization", nestRequestEntity.getDiarization());
body.put("sed", nestRequestEntity.getSed());
StringEntity httpEntity = new StringEntity(gson.toJson(body), ContentType.APPLICATION_JSON);
httpPost.setEntity(httpEntity);
return execute(httpPost);
}
/**
*
* recognize media using a file(ローカルファイルアップロードして音声認識をリクエスト)
* @param file required, the media file(必須パラメータ、ローカルファイル)
* @param nestRequestEntity optional(必須パラメータではない)
* @return string(文字列を返す)
*/
public String upload(File file, NestRequestEntity nestRequestEntity) {
HttpPost httpPost = new HttpPost(INVOKE_URL + "/recognizer/upload");
httpPost.setHeaders(HEADERS);
HttpEntity httpEntity = MultipartEntityBuilder.create()
.addTextBody("params", gson.toJson(nestRequestEntity), ContentType.APPLICATION_JSON)
.addBinaryBody("media", file, ContentType.MULTIPART_FORM_DATA, file.getName())
.build();
httpPost.setEntity(httpEntity);
return execute(httpPost);
}
private String execute(HttpPost httpPost) {
try (final CloseableHttpResponse httpResponse = httpClient.execute(httpPost)) {
final HttpEntity entity = httpResponse.getEntity();
return EntityUtils.toString(entity, StandardCharsets.UTF_8);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static void main(String[] args) {
final ClovaSpeechClient clovaSpeechClient = new ClovaSpeechClient();
NestRequestEntity requestEntity = new NestRequestEntity();
final String result =
clovaSpeechClient.upload(new File("/data/sample.mp4"), requestEntity);
//final String result = clovaSpeechClient.url("file URL", requestEntity);
//final String result = clovaSpeechClient.objectStorage("Object Storage key", requestEntity);
System.out.println(result);
}
}
Python
Pythonベースの APIのサンプルコードは次の通りです。
import requests
import json
class ClovaSpeechClient:
# Clova Speech invoke URL(アプリの登録時に発行された Invoke URL)
invoke_url = ''
# Clova Speech secret key(アプリの登録時に発行された Secret Key)
secret = ''
def req_url(self, url, completion, callback=None, userdata=None, forbiddens=None, boostings=None, wordAlignment=True, fullText=True, diarization=None, sed=None):
request_body = {
'url': url,
'language': 'ko-KR',
'completion': completion,
'callback': callback,
'userdata': userdata,
'wordAlignment': wordAlignment,
'fullText': fullText,
'forbiddens': forbiddens,
'boostings': boostings,
'diarization': diarization,
'sed': sed,
}
headers = {
'Accept': 'application/json;UTF-8',
'Content-Type': 'application/json;UTF-8',
'X-CLOVASPEECH-API-KEY': self.secret
}
return requests.post(headers=headers,
url=self.invoke_url + '/recognizer/url',
data=json.dumps(request_body).encode('UTF-8'))
def req_object_storage(self, data_key, completion, callback=None, userdata=None, forbiddens=None, boostings=None,
wordAlignment=True, fullText=True, diarization=None, sed=None):
request_body = {
'dataKey': data_key,
'language': 'ko-KR',
'completion': completion,
'callback': callback,
'userdata': userdata,
'wordAlignment': wordAlignment,
'fullText': fullText,
'forbiddens': forbiddens,
'boostings': boostings,
'diarization': diarization,
'sed': sed,
}
headers = {
'Accept': 'application/json;UTF-8',
'Content-Type': 'application/json;UTF-8',
'X-CLOVASPEECH-API-KEY': self.secret
}
return requests.post(headers=headers,
url=self.invoke_url + '/recognizer/object-storage',
data=json.dumps(request_body).encode('UTF-8'))
def req_upload(self, file, completion, callback=None, userdata=None, forbiddens=None, boostings=None,
wordAlignment=True, fullText=True, diarization=None, sed=None):
request_body = {
'language': 'ko-KR',
'completion': completion,
'callback': callback,
'userdata': userdata,
'wordAlignment': wordAlignment,
'fullText': fullText,
'forbiddens': forbiddens,
'boostings': boostings,
'diarization': diarization,
'sed': sed,
}
headers = {
'Accept': 'application/json;UTF-8',
'X-CLOVASPEECH-API-KEY': self.secret
}
print(json.dumps(request_body, ensure_ascii=False).encode('UTF-8'))
files = {
'media': open(file, 'rb'),
'params': (None, json.dumps(request_body, ensure_ascii=False).encode('UTF-8'), 'application/json')
}
response = requests.post(headers=headers, url=self.invoke_url + '/recognizer/upload', files=files)
return response
if __name__ == '__main__':
# res = ClovaSpeechClient().req_url(url='http://example.com/media.mp3', completion='sync')
# res = ClovaSpeechClient().req_object_storage(data_key='data/media.mp3', completion='sync')
res = ClovaSpeechClient().req_upload(file='/data/media.mp3', completion='sync')
print(res.text)
PHP
PHPベースの長文認識 APIのサンプルコードは次の通りです。
<?php
$secret = '';
$invoke_url = '';
function req_url($url, $completion, $callback, $userdata, $forbiddens, $boostings,
$wordAlignment, $fullText, $diarization, $sed)
{
$object = (object)[
'language' => 'ko-KR',
'completion' => $completion,
'callback' => $callback,
'url' => $url,
'userdata' => $userdata,
'forbiddens' => $forbiddens,
'boostings' => $boostings,
'wordAlignment' => $wordAlignment,
'fullText' => $fullText,
'diarization' => $diarization,
'sed' => $sed,
];
return execute('/recognizer/url', json_encode($object), array('Content-Type: application/json'));
}
function req_object_storage($dataKey, $completion, $callback, $userdata, $forbiddens, $boostings,
$wordAlignment, $fullText, $diarization, $sed)
{
$object = (object)[
'language' => 'ko-KR',
'completion' => $completion,
'callback' => $callback,
'dataKey' => $dataKey,
'userdata' => $userdata,
'forbiddens' => $forbiddens,
'boostings' => $boostings,
'wordAlignment' => $wordAlignment,
'fullText' => $fullText,
'diarization' => $diarization,
'sed' => $sed,
];
return execute('/recognizer/object-storage', json_encode($object), array('Content-Type: application/json'));
}
function req_upload($filePath, $completion, $callback, $userdata, $forbiddens, $boostings,
$wordAlignment, $fullText, $diarization, $sed)
{
$object = (object)[
'language' => 'ko-KR',
'completion' => $completion,
'callback' => $callback,
'userdata' => $userdata,
'forbiddens' => $forbiddens,
'boostings' => $boostings,
'wordAlignment' => $wordAlignment,
'fullText' => $fullText,
'diarization' => $diarization,
'sed' => $sed,
];
$fields = array(
'media' => new CURLFile($filePath),
'params' => json_encode($object),
);
return execute('/recognizer/upload', $fields, null);
}
function execute($uri, $postFields, $customHeaders)
{
try {
$ch = curl_init($GLOBALS['invoke_url'] . $uri);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST');
curl_setopt($ch, CURLOPT_POSTFIELDS, $postFields);
curl_setopt($ch, CURLOPT_VERBOSE, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 600);
$headers = array();
$headers[] = 'X-CLOVASPEECH-API-KEY: ' . $GLOBALS['secret'];
if (!is_null($customHeaders)) {
$headers = array_merge($headers, $customHeaders);
}
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$response = curl_exec($ch);
$err = curl_error($ch);
curl_close($ch);
if ($err) {
echo 'cURL Error #:' . $err;
return $err;
}
return $response;
} catch (Exception $E) {
echo 'Response: ' . $E . '\n';
return $E->lastResponse;
}
}
//$response = req_url('https://example.com/sample.mp4', 'sync', null, null, null, null, null, null, null);
//$response = req_object_storage('data/sample.mp4', 'sync', null, null, null, null, null, null, null);
$response = req_upload('/data/sample.mp4', 'sync', null, null, null, null, null, null, null);
echo $response;
?>
C#
C# ベースの APIのサンプルコードは次の通りです。
using System;
using System.Globalization;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text.RegularExpressions;
using System.Threading.Channels;
using System.Threading.Tasks;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Text;
using System.Diagnostics;
namespace HttpClientStatus
{
public class ClovaSpeechRequest
{
public string language { get; set; }
public string completion { get; set; }
// 他のパラメータは返されます。使用可能なパラメータリストは、「Object Storageファイル URLで認識をリクエスト」(https://api.ncloud-docs.com/release-20241017/docs/ai-application-service-clovaspeech-longsentence/objectstorageurl),「外部 URLで認識をリクエスト」(https://api.ncloud-docs.com/release-20241017/docs/ai-application-service-clovaspeech-longsentence/externalurl)、「ローカルファイルをアップロードして認識をリクエスト」(https://api.ncloud-docs.com/release-20241017/docs/ai-application-service-clovaspeech-longsentence/local)をご参照ください。
}
public class Program
{
private static readonly string secretKey = "";
private static readonly string invokeUrl = "";
public static async Task<string> Upload(ClovaSpeechRequest clovaSpeechRequest, string path)
{
using (var client = new HttpClient())
{
var multiForm = new MultipartFormDataContent();
multiForm.Headers.Add("X-CLOVASPEECH-API-KEY", secretKey);
multiForm.Add(new StringContent(JsonSerializer.Serialize(clovaSpeechRequest)), "params");
FileStream fs = File.OpenRead(path);
Console.WriteLine(Path.GetFileName(path));
multiForm.Add(new StreamContent(fs), "media", Path.GetFileName(path));
var message = await client.PostAsync(invokeUrl+ "/recognizer/upload", multiForm);
return await message.Content.ReadAsStringAsync();
}
}
static async Task Main(string[] args)
{
var clovaSpeechRequest = new ClovaSpeechRequest
{
language = "ko-KR",
completion = "sync"
};
var result = await Upload(clovaSpeechRequest, @"D:\media\video\\sample.mp3");
Console.WriteLine(result);
}
}
}
この記事は役に立ちましたか?