stt (Speech-To-Text)
- Print
- PDF
stt (Speech-To-Text)
- Print
- PDF
Article Summary
Share feedback
Thanks for sharing your feedback!
Overview
CLOVA Speech Recognition (CSR) API is an HTTP based REST API that gets audio input in the specified language and returns the result of the speech recognition as text.
The supported input speech data formats are MP3, AAC, AC3, OGG, FLAC, and WAV.
Request
Method | Request URI |
---|---|
POST | https://naveropenapi.apigw.ntruss.com/recog/v1/stt |
Request Parameters
Parameter | Type | Description | Required |
---|---|---|---|
lang | string | Language to use for speech recognition - Kor: Korean - Jpn: Japanese - Chn: Chinese - Eng: English | Required |
Request Header
Header | Description |
---|---|
X-NCP-APIGW-API-KEY-ID | Client ID issued when registering an appX-NCP-APIGW-API-KEY-ID:{Client ID} |
X-NCP-APIGW-API-KEY | Client Secret issued when registering an appX-NCP-APIGW-API-KEY:{Client Secret} |
Content-Type | Set this to application/octet-stream .Content-Type: application/octet-stream |
Request Body
Field | Required | Type | Limitations | Description |
---|---|---|---|---|
image | Y | mp3, aac, ac3, ogg, flac, wav | Binary sound data (up to 60 sec.) | Speech file |
Response
Response Body
Field | Data type | Description |
---|---|---|
text | string | Text for the speech data |
Examples
Request Example
[HTTP Request URL]
https://naveropenapi.apigw.ntruss.com/recog/v1/stt?lang=Kor
[HTTP Request Body]
--- binary sound data ---
Request Example
{
"text": "Hello"
}
API examples
This section provides code examples of using the CSR API for each language.
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
public class Main {
public static void main(String[] args) {
String clientId = "YOUR_CLIENT_ID"; // Application Client ID";
String clientSecret = "YOUR_CLIENT_SECRET"; // Application Client Secret";
try {
String imgFile = "Speech file path";
File voiceFile = new File(imgFile);
String language = "Kor"; // Language code (Kor, Jpn, Eng, Chn)
String apiURL = "https://naveropenapi.apigw.ntruss.com/recog/v1/stt?lang=" + language;
URL url = new URL(apiURL);
HttpURLConnection conn = (HttpURLConnection)url.openConnection();
conn.setUseCaches(false);
conn.setDoOutput(true);
conn.setDoInput(true);
conn.setRequestProperty("Content-Type", "application/octet-stream");
conn.setRequestProperty("X-NCP-APIGW-API-KEY-ID", clientId);
conn.setRequestProperty("X-NCP-APIGW-API-KEY", clientSecret);
OutputStream outputStream = conn.getOutputStream();
FileInputStream inputStream = new FileInputStream(voiceFile);
byte[] buffer = new byte[4096];
int bytesRead = -1;
while ((bytesRead = inputStream.read(buffer)) != -1) {
outputStream.write(buffer, 0, bytesRead);
}
outputStream.flush();
inputStream.close();
BufferedReader br = null;
int responseCode = conn.getResponseCode();
if(responseCode == 200) { // Normal
br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
} else { // Error occurred.
System.out.println("error!!!!!!! responseCode= " + responseCode);
br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
}
String inputLine;
if(br != null) {
StringBuffer response = new StringBuffer();
while ((inputLine = br.readLine()) != null) {
response.append(inputLine);
}
br.close();
System.out.println(response.toString());
} else {
System.out.println("error !!!");
}
} catch (Exception e) {
System.out.println(e);
}
}
}
<?php
$curl = curl_init();
$file_path = "Speech file path";
$lang = "Kor"; // Language code (Kor, Jpn, Eng, Chn)
$client_id = "YOUR_CLIENT_KEY";
$client_secret = "YOUR_CLIENT_SECRET";
curl_setopt_array($curl, array(
CURLOPT_URL => "https://naveropenapi.apigw.ntruss.com/recog/v1/stt?lang=".$lang,
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_POST => 1,
CURLOPT_POSTFIELDS => file_get_contents($file_path),
CURLOPT_HTTPHEADER => array(
"Content-Type: application/octet-stream",
"X-NCP-APIGW-API-KEY-ID: ".$client_id,
"X-NCP-APIGW-API-KEY: ".$client_secret
),
));
$response = curl_exec($curl);
$err = curl_error($curl);
curl_close($curl);
if ($err) {
echo "cURL Error #:" . $err;
} else {
echo $response;
}
?>
const fs = require('fs');
const request = require('request');
const clientId = 'YOUR_CLIENT_ID';
const clientSecret = 'YOUR_CLIENT_SECRET';
// language => Language code (Kor, Jpn, Eng, Chn)
function stt(language, filePath) {
const url = `https://naveropenapi.apigw.ntruss.com/recog/v1/stt?lang=${language}`;
const requestConfig = {
url: url,
method: 'POST',
headers: {
'Content-Type': 'application/octet-stream',
'X-NCP-APIGW-API-KEY-ID': clientId,
'X-NCP-APIGW-API-KEY': clientSecret
},
body: fs.createReadStream(filePath)
};
request(requestConfig, (err, response, body) => {
if (err) {
console.log(err);
return;
}
console.log(response.statusCode);
console.log(body);
});
}
stt('Kor', 'Speech file path (ex: ./test.wav)');
import sys
import requests
client_id = "YOUR_CLIENT_ID"
client_secret = "YOUR_CLIENT_SECRET"
lang = "Kor" # Language code (Kor, Jpn, Eng, Chn)
url = "https://naveropenapi.apigw.ntruss.com/recog/v1/stt?lang=" + lang
data = open('Speech file path', 'rb')
headers = {
"X-NCP-APIGW-API-KEY-ID": client_id,
"X-NCP-APIGW-API-KEY": client_secret,
"Content-Type": "application/octet-stream"
}
response = requests.post(url, data=data, headers=headers)
rescode = response.status_code
if(rescode == 200):
print (response.text)
else:
print("Error : " + response.text)
using System;
using System.Net;
using System.Text;
using System.IO;
using System.Collections.Generic;
using System.Collections.Specialized;
namespace NaverAPI_Guide
{
class APIExamSTT
{
static void Main(string[] args)
{
string FilePath = "YOUR_FILE_NAME";
FileStream fs = new FileStream(FilePath, FileMode.Open, FileAccess.Read);
byte[] fileData = new byte[fs.Length];
fs.Read(fileData, 0, fileData.Length);
fs.Close();
string lang = "Kor"; // Language code (Kor, Jpn, Eng, Chn)
string url = $"https://naveropenapi.apigw.ntruss.com/recog/v1/stt?lang={lang}";
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.Headers.Add("X-NCP-APIGW-API-KEY-ID", "YOUR_CLIENT_ID");
request.Headers.Add("X-NCP-APIGW-API-KEY", "YOUR_CLIENT_SECRET");
request.Method = "POST";
request.ContentType = "application/octet-stream";
request.ContentLength = fileData.Length;
using (Stream requestStream = request.GetRequestStream())
{
requestStream.Write(fileData, 0, fileData.Length);
requestStream.Close();
}
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream stream = response.GetResponseStream();
StreamReader reader = new StreamReader(stream, Encoding.UTF8);
string text = reader.ReadToEnd();
stream.Close();
response.Close();
reader.Close();
Console.WriteLine(text);
}
}
}
Error Codes
HTTP status code | Error code | Error message | Description |
---|---|---|---|
413 | STT000 | Request Entity Too Large | Speech data volume exceeded (up to 3 MB). |
413 | STT001 | Exceed Sound Data length | Speech data length exceeded (60 sec.). |
400 | STT002 | Invalid Content Type | The content-type is not application/octet-stream. |
400 | STT003 | Empty Sound Data | No speech data entered. |
400 | STT004 | Empty Language | No language parameter entered. |
400 | STT005 | Invalid Language | Invalid language specified. |
500 | STT006 | Failed to pre-processing | Error occurred while pre-processing speech recognition. Check if speech data is a valid WAV, MP3, or FLAC file. |
400 | STT007 | Too Short Sound Data | The voice data length is too short. (400ms or less) |
500 | STT998 | Failed to STT | Error occurred during speech recognition. Contact us and we will take action as soon as possible. |
500 | STT999 | Internal Server Error | Unknown error occurred. Contact us and we will take action as soon as possible. |
Was this article helpful?