stt (Speech-To-Text)
    • PDF

    stt (Speech-To-Text)

    • PDF

    Article Summary

    Overview

    CLOVA Speech Recognition (CSR) API is an HTTP based REST API that gets audio input in the specified language and returns the result of the speech recognition as text.
    The supported input speech data formats are MP3, AAC, AC3, OGG, FLAC, and WAV.

    Request

    MethodRequest URI
    POSThttps://naveropenapi.apigw.ntruss.com/recog/v1/stt

    Request Parameters

    ParameterTypeDescriptionRequired
    langstringLanguage to use for speech recognition
    - Kor: Korean
    - Jpn: Japanese
    - Chn: Chinese
    - Eng: English
    Required

    Request Header

    HeaderDescription
    X-NCP-APIGW-API-KEY-IDClient ID issued when registering an app
    X-NCP-APIGW-API-KEY-ID:{Client ID}
    X-NCP-APIGW-API-KEYClient Secret issued when registering an app
    X-NCP-APIGW-API-KEY:{Client Secret}
    Content-TypeSet this to application/octet-stream.
    Content-Type: application/octet-stream

    Request Body

    FieldRequiredTypeLimitationsDescription
    imageYmp3, aac, ac3, ogg, flac, wavBinary sound data (up to 60 sec.)Speech file

    Response

    Response Body

    FieldData typeDescription
    textstringText for the speech data

    Examples

    Request Example

    [HTTP Request URL]
    https://naveropenapi.apigw.ntruss.com/recog/v1/stt?lang=Kor
    
    [HTTP Request Body]
    --- binary sound data ---
    

    Request Example

    {
      "text": "Hello"
    }
    

    API examples

    This section provides code examples of using the CSR API for each language.

    import java.io.*;
    import java.net.HttpURLConnection;
    import java.net.URL;
    
    public class Main {
    
        public static void main(String[] args) {
            String clientId = "YOUR_CLIENT_ID";             // Application Client ID";
            String clientSecret = "YOUR_CLIENT_SECRET";     // Application Client Secret";
    
            try {
                String imgFile = "Speech file path";
                File voiceFile = new File(imgFile);
    
                String language = "Kor";        // Language code (Kor, Jpn, Eng, Chn)
                String apiURL = "https://naveropenapi.apigw.ntruss.com/recog/v1/stt?lang=" + language;
                URL url = new URL(apiURL);
    
                HttpURLConnection conn = (HttpURLConnection)url.openConnection();
                conn.setUseCaches(false);
                conn.setDoOutput(true);
                conn.setDoInput(true);
                conn.setRequestProperty("Content-Type", "application/octet-stream");
                conn.setRequestProperty("X-NCP-APIGW-API-KEY-ID", clientId);
                conn.setRequestProperty("X-NCP-APIGW-API-KEY", clientSecret);
    
                OutputStream outputStream = conn.getOutputStream();
                FileInputStream inputStream = new FileInputStream(voiceFile);
                byte[] buffer = new byte[4096];
                int bytesRead = -1;
                while ((bytesRead = inputStream.read(buffer)) != -1) {
                    outputStream.write(buffer, 0, bytesRead);
                }
                outputStream.flush();
                inputStream.close();
                BufferedReader br = null;
                int responseCode = conn.getResponseCode();
                if(responseCode == 200) { // Normal
                    br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
                } else { // Error occurred.
                    System.out.println("error!!!!!!! responseCode= " + responseCode);
                    br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
                }
                String inputLine;
    
                if(br != null) {
                    StringBuffer response = new StringBuffer();
                    while ((inputLine = br.readLine()) != null) {
                        response.append(inputLine);
                    }
                    br.close();
                    System.out.println(response.toString());
                } else {
                    System.out.println("error !!!");
                }
            } catch (Exception e) {
                System.out.println(e);
            }
        }
    }
    
    <?php
    
    $curl = curl_init();
    $file_path = "Speech file path";
    
    $lang = "Kor";  // Language code (Kor, Jpn, Eng, Chn)
    $client_id = "YOUR_CLIENT_KEY";
    $client_secret = "YOUR_CLIENT_SECRET";
    
    curl_setopt_array($curl, array(
      CURLOPT_URL => "https://naveropenapi.apigw.ntruss.com/recog/v1/stt?lang=".$lang,
      CURLOPT_RETURNTRANSFER => 1,
      CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
      CURLOPT_POST => 1,
      CURLOPT_POSTFIELDS => file_get_contents($file_path),
      CURLOPT_HTTPHEADER => array(
        "Content-Type: application/octet-stream",
        "X-NCP-APIGW-API-KEY-ID: ".$client_id,
        "X-NCP-APIGW-API-KEY: ".$client_secret
      ),
    ));
    
    $response = curl_exec($curl);
    $err = curl_error($curl);
    
    curl_close($curl);
    
    if ($err) {
      echo "cURL Error #:" . $err;
    } else {
      echo $response;
    }
    
    ?>
    
    const fs = require('fs');
    const request = require('request');
    
    const clientId = 'YOUR_CLIENT_ID';
    const clientSecret = 'YOUR_CLIENT_SECRET';
    
    // language => Language code (Kor, Jpn, Eng, Chn)
    function stt(language, filePath) {
        const url = `https://naveropenapi.apigw.ntruss.com/recog/v1/stt?lang=${language}`;
        const requestConfig = {
            url: url,
            method: 'POST',
            headers: {
                'Content-Type': 'application/octet-stream',
                'X-NCP-APIGW-API-KEY-ID': clientId,
                'X-NCP-APIGW-API-KEY': clientSecret
            },
            body: fs.createReadStream(filePath)
        };
    
        request(requestConfig, (err, response, body) => {
            if (err) {
                console.log(err);
                return;
            }
    
            console.log(response.statusCode);
            console.log(body);
        });
    }
    
    stt('Kor', 'Speech file path (ex: ./test.wav)');
    
    import sys
    import requests
    client_id = "YOUR_CLIENT_ID"
    client_secret = "YOUR_CLIENT_SECRET"
    lang = "Kor" # Language code (Kor, Jpn, Eng, Chn)
    url = "https://naveropenapi.apigw.ntruss.com/recog/v1/stt?lang=" + lang
    data = open('Speech file path', 'rb')
    headers = {
        "X-NCP-APIGW-API-KEY-ID": client_id,
        "X-NCP-APIGW-API-KEY": client_secret,
        "Content-Type": "application/octet-stream"
    }
    response = requests.post(url,  data=data, headers=headers)
    rescode = response.status_code
    if(rescode == 200):
        print (response.text)
    else:
        print("Error : " + response.text)
    
    using System;
    using System.Net;
    using System.Text;
    using System.IO;
    using System.Collections.Generic;
    using System.Collections.Specialized;
    
    namespace NaverAPI_Guide
    {
        class APIExamSTT
        {
            static void Main(string[] args)
            {
                string FilePath = "YOUR_FILE_NAME";
                FileStream fs = new FileStream(FilePath, FileMode.Open, FileAccess.Read);
                byte[] fileData = new byte[fs.Length];
                fs.Read(fileData, 0, fileData.Length);
                fs.Close();
    
                string lang = "Kor";        // Language code (Kor, Jpn, Eng, Chn)
                string url = $"https://naveropenapi.apigw.ntruss.com/recog/v1/stt?lang={lang}";
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
                request.Headers.Add("X-NCP-APIGW-API-KEY-ID", "YOUR_CLIENT_ID");
                request.Headers.Add("X-NCP-APIGW-API-KEY", "YOUR_CLIENT_SECRET");
                request.Method = "POST";
                request.ContentType = "application/octet-stream";
                request.ContentLength = fileData.Length;
                using (Stream requestStream = request.GetRequestStream())
                {
                    requestStream.Write(fileData, 0, fileData.Length);
                    requestStream.Close();
                }
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream stream = response.GetResponseStream();
                StreamReader reader = new StreamReader(stream, Encoding.UTF8);
                string text = reader.ReadToEnd();
                stream.Close();
                response.Close();
                reader.Close();
                Console.WriteLine(text);
            }
        }
    }
    

    Error Codes

    HTTP status codeError codeError messageDescription
    413STT000Request Entity Too LargeSpeech data volume exceeded (up to 3 MB).
    413STT001Exceed Sound Data lengthSpeech data length exceeded (60 sec.).
    400STT002Invalid Content TypeThe content-type is not application/octet-stream.
    400STT003Empty Sound DataNo speech data entered.
    400STT004Empty LanguageNo language parameter entered.
    400STT005Invalid LanguageInvalid language specified.
    500STT006Failed to pre-processingError occurred while pre-processing speech recognition. Check if speech data is a valid WAV, MP3, or FLAC file.
    400STT007Too Short Sound DataThe voice data length is too short. (400ms or less)
    500STT998Failed to STTError occurred during speech recognition. Contact us and we will take action as soon as possible.
    500STT999Internal Server ErrorUnknown error occurred. Contact us and we will take action as soon as possible.

    Was this article helpful?

    What's Next
    Changing your password will log you out immediately. Use the new password to log back in.
    First name must have atleast 2 characters. Numbers and special characters are not allowed.
    Last name must have atleast 1 characters. Numbers and special characters are not allowed.
    Enter a valid email
    Enter a valid password
    Your profile has been successfully updated.