長文認識
    • PDF

    長文認識

    • PDF

    記事の要約

    Classic/VPC環境で利用できます。

    CLOVA Speechサービスの長文認識のユースケースを紹介します。

    Java

    Javaベースの APIのサンプルコードは次の通りです。

    <dependency>
        <groupId>org.apache.httpcomponents</groupId>
        <artifactId>httpclient</artifactId>
        <version>4.5.12</version>
    </dependency>
    <dependency>
        <groupId>org.apache.httpcomponents</groupId>
        <artifactId>httpmime</artifactId>
        <version>4.3.1</version>
    </dependency>
    <dependency>
        <groupId>com.google.code.gson</groupId>
        <artifactId>gson</artifactId>
        <version>2.8.5</version>
    </dependency>
    
    package org.example.clovaspeech.client;
    
    import java.io.File;
    import java.nio.charset.StandardCharsets;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    
    import org.apache.http.Header;
    import org.apache.http.HttpEntity;
    import org.apache.http.client.methods.CloseableHttpResponse;
    import org.apache.http.client.methods.HttpPost;
    import org.apache.http.entity.ContentType;
    import org.apache.http.entity.StringEntity;
    import org.apache.http.entity.mime.MultipartEntityBuilder;
    import org.apache.http.impl.client.CloseableHttpClient;
    import org.apache.http.impl.client.HttpClients;
    import org.apache.http.message.BasicHeader;
    import org.apache.http.util.EntityUtils;
    
    import com.google.gson.Gson;
    
    public class ClovaSpeechClient {
    
        // Clova Speech secret key
    	private static final String SECRET = "";
        // Clova Speech invoke URL
    	private static final String INVOKE_URL = "";
    
    	private CloseableHttpClient httpClient = HttpClients.createDefault();
    	private Gson gson = new Gson();
    
    	private static final Header[] HEADERS = new Header[] {
    		new BasicHeader("Accept", "application/json"),
    		new BasicHeader("X-CLOVASPEECH-API-KEY", SECRET),
    	};
    
        	public static class Boosting {
    		private String words;
    
    		public String getWords() {
    			return words;
    		}
    
    		public void setWords(String words) {
    			this.words = words;
    		}
    	}
    
    	public static class Diarization {
    		private Boolean enable = Boolean.FALSE;
    		private Integer speakerCountMin;
    		private Integer speakerCountMax;
    
    		public Boolean getEnable() {
    			return enable;
    		}
    
    		public void setEnable(Boolean enable) {
    			this.enable = enable;
    		}
    
    		public Integer getSpeakerCountMin() {
    			return speakerCountMin;
    		}
    
    		public void setSpeakerCountMin(Integer speakerCountMin) {
    			this.speakerCountMin = speakerCountMin;
    		}
    
    		public Integer getSpeakerCountMax() {
    			return speakerCountMax;
    		}
    
    		public void setSpeakerCountMax(Integer speakerCountMax) {
    			this.speakerCountMax = speakerCountMax;
    		}
    	}
    
        public static class Sed {
    		private Boolean enable = Boolean.FALSE;
    
    		public Boolean getEnable() {
    			return enable;
    		}
    
    		public void setEnable(Boolean enable) {
    			this.enable = enable;
    		}
    	}
    
    	public static class NestRequestEntity {
    		private String language = "ko-KR";
    		//completion optional, sync/async(レスポンス結果を返す方式(sync/async)設定、必須パラメータではない)
    		private String completion = "sync";
    		//optional, used to receive the analyzed results(分析された結果の照会に使用、必須パラメータではない)
    		private String callback;
    		//optional, any data(任意の Callback URLを入力、必須パラメータではない)
    		private Map<String, Object> userdata;
    		private Boolean wordAlignment = Boolean.TRUE;
    		private Boolean fullText = Boolean.TRUE;
    		//boosting object array(キーワードブーストオブジェクトの配列)
    		private List<Boosting> boostings;
    		//comma separated words(コンマで区切られたキーワード)
    		private String forbiddens;
    		private Diarization diarization;
            private Sed sed;
    
            public Sed getSed() {
    			return sed;
    		}
    
    		public void setSed(Sed sed) {
    			this.sed = sed;
    		}
    
    		public String getLanguage() {
    			return language;
    		}
    
    		public void setLanguage(String language) {
    			this.language = language;
    		}
    
    		public String getCompletion() {
    			return completion;
    		}
    
    		public void setCompletion(String completion) {
    			this.completion = completion;
    		}
    
    		public String getCallback() {
    			return callback;
    		}
    
    		public Boolean getWordAlignment() {
    			return wordAlignment;
    		}
    
    		public void setWordAlignment(Boolean wordAlignment) {
    			this.wordAlignment = wordAlignment;
    		}
    
    		public Boolean getFullText() {
    			return fullText;
    		}
    
    		public void setFullText(Boolean fullText) {
    			this.fullText = fullText;
    		}
    
    		public void setCallback(String callback) {
    			this.callback = callback;
    		}
    
    		public Map<String, Object> getUserdata() {
    			return userdata;
    		}
    
    		public void setUserdata(Map<String, Object> userdata) {
    			this.userdata = userdata;
    		}
    
    		public String getForbiddens() {
    			return forbiddens;
    		}
    
    		public void setForbiddens(String forbiddens) {
    			this.forbiddens = forbiddens;
    		}
    
    		public List<Boosting> getBoostings() {
    			return boostings;
    		}
    
    		public void setBoostings(List<Boosting> boostings) {
    			this.boostings = boostings;
    		}
    
    		public Diarization getDiarization() {
    			return diarization;
    		}
    
    		public void setDiarization(Diarization diarization) {
    			this.diarization = diarization;
    		}
    	}
    
    	/**
    	 * recognize media using URL(外部ファイル URLで音声認識をリクエスト)
    	 * @param url required, the media URL(必須パラメータ、外部ファイル URL)
    	 * @param nestRequestEntity optional(必須パラメータではない)
    	 * @return string(文字列を返す)
    	 */
    	public String url(String url, NestRequestEntity nestRequestEntity) {
    		HttpPost httpPost = new HttpPost(INVOKE_URL + "/recognizer/url");
    		httpPost.setHeaders(HEADERS);
    		Map<String, Object> body = new HashMap<>();
    		body.put("url", url);
    		body.put("language", nestRequestEntity.getLanguage());
    		body.put("completion", nestRequestEntity.getCompletion());
    		body.put("callback", nestRequestEntity.getCallback());
    		body.put("userdata", nestRequestEntity.getCallback());
    		body.put("wordAlignment", nestRequestEntity.getWordAlignment());
    		body.put("fullText", nestRequestEntity.getFullText());
    		body.put("forbiddens", nestRequestEntity.getForbiddens());
    		body.put("boostings", nestRequestEntity.getBoostings());
    		body.put("diarization", nestRequestEntity.getDiarization());
            body.put("sed", nestRequestEntity.getSed());
    		HttpEntity httpEntity = new StringEntity(gson.toJson(body), ContentType.APPLICATION_JSON);
    		httpPost.setEntity(httpEntity);
    		return execute(httpPost);
    	}
    
    	/**
    	 * recognize media using Object Storage(NAVERクラウドプラットフォームの Object Storage内ファイルの URLで音声認識をリクエスト)
    	 * @param dataKey required, the Object Storage key (必須パラメータ、Object Storageのキー)
    	 * @param nestRequestEntity optional(必須パラメータではない)
    	 * @return string(文字列を返す)
    	 */
    	public String objectStorage(String dataKey, NestRequestEntity nestRequestEntity) {
    		HttpPost httpPost = new HttpPost(INVOKE_URL + "/recognizer/object-storage");
    		httpPost.setHeaders(HEADERS);
    		Map<String, Object> body = new HashMap<>();
    		body.put("dataKey", dataKey);
    		body.put("language", nestRequestEntity.getLanguage());
    		body.put("completion", nestRequestEntity.getCompletion());
    		body.put("callback", nestRequestEntity.getCallback());
    		body.put("userdata", nestRequestEntity.getCallback());
    		body.put("wordAlignment", nestRequestEntity.getWordAlignment());
    		body.put("fullText", nestRequestEntity.getFullText());
    		body.put("forbiddens", nestRequestEntity.getForbiddens());
    		body.put("boostings", nestRequestEntity.getBoostings());
    		body.put("diarization", nestRequestEntity.getDiarization());
            body.put("sed", nestRequestEntity.getSed());
    		StringEntity httpEntity = new StringEntity(gson.toJson(body), ContentType.APPLICATION_JSON);
    		httpPost.setEntity(httpEntity);
    		return execute(httpPost);
    	}
    
    	/**
    	 *
    	 * recognize media using a file(ローカルファイルアップロードして音声認識をリクエスト)
    	 * @param file required, the media file(必須パラメータ、ローカルファイル)
    	 * @param nestRequestEntity optional(必須パラメータではない)
    	 * @return string(文字列を返す)
    	 */
    	public String upload(File file, NestRequestEntity nestRequestEntity) {
    		HttpPost httpPost = new HttpPost(INVOKE_URL + "/recognizer/upload");
    		httpPost.setHeaders(HEADERS);
    		HttpEntity httpEntity = MultipartEntityBuilder.create()
    			.addTextBody("params", gson.toJson(nestRequestEntity), ContentType.APPLICATION_JSON)
    			.addBinaryBody("media", file, ContentType.MULTIPART_FORM_DATA, file.getName())
    			.build();
    		httpPost.setEntity(httpEntity);
    		return execute(httpPost);
    	}
    
    	private String execute(HttpPost httpPost) {
    		try (final CloseableHttpResponse httpResponse = httpClient.execute(httpPost)) {
    			final HttpEntity entity = httpResponse.getEntity();
    			return EntityUtils.toString(entity, StandardCharsets.UTF_8);
    		} catch (Exception e) {
    			throw new RuntimeException(e);
    		}
    	}
    
    	public static void main(String[] args) {
    		final ClovaSpeechClient clovaSpeechClient = new ClovaSpeechClient();
    		NestRequestEntity requestEntity = new NestRequestEntity();
    		final String result =
    			clovaSpeechClient.upload(new File("/data/sample.mp4"), requestEntity);
    		//final String result = clovaSpeechClient.url("file URL", requestEntity); 
    		//final String result = clovaSpeechClient.objectStorage("Object Storage key", requestEntity);
    		System.out.println(result);
    	}
    }
    

    Python

    Pythonベースの APIのサンプルコードは次の通りです。

    import requests
    import json
    
    
    class ClovaSpeechClient:
        # Clova Speech invoke URL(アプリの登録時に発行された Invoke URL)
        invoke_url = ''
        # Clova Speech secret key(アプリの登録時に発行された Secret Key)
        secret = ''
    
        def req_url(self, url, completion, callback=None, userdata=None, forbiddens=None, boostings=None, wordAlignment=True, fullText=True, diarization=None, sed=None):
            request_body = {
                'url': url,
                'language': 'ko-KR',
                'completion': completion,
                'callback': callback,
                'userdata': userdata,
                'wordAlignment': wordAlignment,
                'fullText': fullText,
                'forbiddens': forbiddens,
                'boostings': boostings,
                'diarization': diarization,
                'sed': sed,
            }
            headers = {
                'Accept': 'application/json;UTF-8',
                'Content-Type': 'application/json;UTF-8',
                'X-CLOVASPEECH-API-KEY': self.secret
            }
            return requests.post(headers=headers,
                                 url=self.invoke_url + '/recognizer/url',
                                 data=json.dumps(request_body).encode('UTF-8'))
    
        def req_object_storage(self, data_key, completion, callback=None, userdata=None, forbiddens=None, boostings=None,
                               wordAlignment=True, fullText=True, diarization=None, sed=None):
            request_body = {
                'dataKey': data_key,
                'language': 'ko-KR',
                'completion': completion,
                'callback': callback,
                'userdata': userdata,
                'wordAlignment': wordAlignment,
                'fullText': fullText,
                'forbiddens': forbiddens,
                'boostings': boostings,
                'diarization': diarization,
                'sed': sed,
            }
            headers = {
                'Accept': 'application/json;UTF-8',
                'Content-Type': 'application/json;UTF-8',
                'X-CLOVASPEECH-API-KEY': self.secret
            }
            return requests.post(headers=headers,
                                 url=self.invoke_url + '/recognizer/object-storage',
                                 data=json.dumps(request_body).encode('UTF-8'))
    
        def req_upload(self, file, completion, callback=None, userdata=None, forbiddens=None, boostings=None,
                       wordAlignment=True, fullText=True, diarization=None, sed=None):
            request_body = {
                'language': 'ko-KR',
                'completion': completion,
                'callback': callback,
                'userdata': userdata,
                'wordAlignment': wordAlignment,
                'fullText': fullText,
                'forbiddens': forbiddens,
                'boostings': boostings,
                'diarization': diarization,
                'sed': sed,
            }
            headers = {
                'Accept': 'application/json;UTF-8',
                'X-CLOVASPEECH-API-KEY': self.secret
            }
            print(json.dumps(request_body, ensure_ascii=False).encode('UTF-8'))
            files = {
                'media': open(file, 'rb'),
                'params': (None, json.dumps(request_body, ensure_ascii=False).encode('UTF-8'), 'application/json')
            }
            response = requests.post(headers=headers, url=self.invoke_url + '/recognizer/upload', files=files)
            return response
    
    if __name__ == '__main__':
        # res = ClovaSpeechClient().req_url(url='http://example.com/media.mp3', completion='sync')
        # res = ClovaSpeechClient().req_object_storage(data_key='data/media.mp3', completion='sync')
        res = ClovaSpeechClient().req_upload(file='/data/media.mp3', completion='sync')
        print(res.text)
    

    PHP

    PHPベースの長文認識 APIのサンプルコードは次の通りです。

    <?php
    
    $secret = '';
    $invoke_url = '';
    
    function req_url($url, $completion, $callback, $userdata, $forbiddens, $boostings,
                     $wordAlignment, $fullText, $diarization, $sed)
    {
        $object = (object)[
            'language' => 'ko-KR',
            'completion' => $completion,
            'callback' => $callback,
            'url' => $url,
            'userdata' => $userdata,
            'forbiddens' => $forbiddens,
            'boostings' => $boostings,
            'wordAlignment' => $wordAlignment,
            'fullText' => $fullText,
            'diarization' => $diarization,
            'sed' => $sed,
        ];
        return execute('/recognizer/url', json_encode($object), array('Content-Type: application/json'));
    }
    
    function req_object_storage($dataKey, $completion, $callback, $userdata, $forbiddens, $boostings,
                                $wordAlignment, $fullText, $diarization, $sed)
    {
        $object = (object)[
            'language' => 'ko-KR',
            'completion' => $completion,
            'callback' => $callback,
            'dataKey' => $dataKey,
            'userdata' => $userdata,
            'forbiddens' => $forbiddens,
            'boostings' => $boostings,
            'wordAlignment' => $wordAlignment,
            'fullText' => $fullText,
            'diarization' => $diarization,
            'sed' => $sed,
        ];
        return execute('/recognizer/object-storage', json_encode($object), array('Content-Type: application/json'));
    }
    
    function req_upload($filePath, $completion, $callback, $userdata, $forbiddens, $boostings,
                        $wordAlignment, $fullText, $diarization, $sed)
    {
        $object = (object)[
            'language' => 'ko-KR',
            'completion' => $completion,
            'callback' => $callback,
            'userdata' => $userdata,
            'forbiddens' => $forbiddens,
            'boostings' => $boostings,
            'wordAlignment' => $wordAlignment,
            'fullText' => $fullText,
            'diarization' => $diarization,
            'sed' => $sed,
        ];
        $fields = array(
            'media' => new CURLFile($filePath),
            'params' => json_encode($object),
        );
        return execute('/recognizer/upload', $fields, null);
    }
    
    function execute($uri, $postFields, $customHeaders)
    {
        try {
            $ch = curl_init($GLOBALS['invoke_url'] . $uri);
            curl_setopt($ch, CURLOPT_POST, true);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
            curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
            curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST');
            curl_setopt($ch, CURLOPT_POSTFIELDS, $postFields);
            curl_setopt($ch, CURLOPT_VERBOSE, true);
            curl_setopt($ch, CURLOPT_TIMEOUT, 600);
            $headers = array();
            $headers[] = 'X-CLOVASPEECH-API-KEY: ' . $GLOBALS['secret'];
            if (!is_null($customHeaders)) {
                $headers = array_merge($headers, $customHeaders);
            }
            curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
            $response = curl_exec($ch);
            $err = curl_error($ch);
            curl_close($ch);
            if ($err) {
                echo 'cURL Error #:' . $err;
                return $err;
            }
            return $response;
        } catch (Exception $E) {
            echo 'Response: ' . $E . '\n';
            return $E->lastResponse;
        }
    }
    
    //$response = req_url('https://example.com/sample.mp4', 'sync', null, null, null, null, null, null, null);
    //$response = req_object_storage('data/sample.mp4', 'sync', null, null, null, null, null, null, null);
    $response = req_upload('/data/sample.mp4', 'sync', null, null, null, null, null, null, null);
    echo $response;
    ?>
    

    C#

    C# ベースの APIのサンプルコードは次の通りです。

    using System;
    using System.Globalization;
    using System.Net.Http;
    using System.Net.Http.Headers;
    using System.Text.RegularExpressions;
    using System.Threading.Channels;
    using System.Threading.Tasks;
    using System.Text.Json;
    using System.Text.Json.Serialization;
    using System.Text;
    using System.Diagnostics;
    
    namespace HttpClientStatus
    {
        public class ClovaSpeechRequest
        {
            public string language { get; set; }
            public string completion { get; set; }
    
    		// 他のパラメータは返されます。使用可能なパラメータリストは、「Object Storageファイル URLで認識をリクエスト」(https://api.ncloud-docs.com/release-20241017/docs/ai-application-service-clovaspeech-longsentence/objectstorageurl),「外部 URLで認識をリクエスト」(https://api.ncloud-docs.com/release-20241017/docs/ai-application-service-clovaspeech-longsentence/externalurl)、「ローカルファイルをアップロードして認識をリクエスト」(https://api.ncloud-docs.com/release-20241017/docs/ai-application-service-clovaspeech-longsentence/local)をご参照ください。
        }
        public class Program
        {
            private static readonly string secretKey = "";
            private static readonly string invokeUrl = "";
            public static async Task<string> Upload(ClovaSpeechRequest clovaSpeechRequest, string path)
            {
    
                using (var client = new HttpClient())
                {
                    var multiForm = new MultipartFormDataContent();
                    multiForm.Headers.Add("X-CLOVASPEECH-API-KEY", secretKey);
                    multiForm.Add(new StringContent(JsonSerializer.Serialize(clovaSpeechRequest)), "params");
                    FileStream fs = File.OpenRead(path);
                    Console.WriteLine(Path.GetFileName(path));
                    multiForm.Add(new StreamContent(fs), "media", Path.GetFileName(path));
                    var message = await client.PostAsync(invokeUrl+ "/recognizer/upload", multiForm);
                    return await message.Content.ReadAsStringAsync();
                }
            }
    
            static async Task Main(string[] args)
            {
                var clovaSpeechRequest = new ClovaSpeechRequest
                {
                    language = "ko-KR",
                    completion = "sync"
                };
    
                var result = await Upload(clovaSpeechRequest, @"D:\media\video\\sample.mp3");
                Console.WriteLine(result);
            }
        }
    }
    

    この記事は役に立ちましたか?

    Changing your password will log you out immediately. Use the new password to log back in.
    First name must have atleast 2 characters. Numbers and special characters are not allowed.
    Last name must have atleast 1 characters. Numbers and special characters are not allowed.
    Enter a valid email
    Enter a valid password
    Your profile has been successfully updated.