Live streaming recognition

Prev Next

Available in Classic and VPC

This document introduces live streaming examples from the CLOVA Speech service.

Java

The following is a Java-based sample code for the API.

  • Project Structure

    ├───pom.xml
    │   │
    └───src
    │   ├───main
    │   │   ├───java
    │   │   │   └───com
    │   │   │       └───example
    │   │   │           └───grpc
    │   │   │                   GRpcClient.java
    │   │   │
    │   │   ├───proto
    │   │   │       nest.proto
    
  • pom.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0"
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
    
        <groupId>com.example</groupId>
        <artifactId>clova-speech-grpc</artifactId>
        <version>1.0-SNAPSHOT</version>
        <properties>
            <java.version>1.8</java.version>
            <maven.compiler.source>${java.version}</maven.compiler.source>
            <maven.compiler.target>${java.version}</maven.compiler.target>
            <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
            <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
            <netty.version>4.1.52.Final</netty.version>
            <grpc.version>1.35.0</grpc.version>
            <protoc.version>3.14.0</protoc.version>
        </properties>
    
        <dependencies>
            <dependency>
                <groupId>io.grpc</groupId>
                <artifactId>grpc-netty</artifactId>
                <version>${grpc.version}</version>
            </dependency>
            <dependency>
                <groupId>io.grpc</groupId>
                <artifactId>grpc-netty-shaded</artifactId>
                <version>${grpc.version}</version>
            </dependency>
            <dependency>
                <groupId>io.grpc</groupId>
                <artifactId>grpc-protobuf</artifactId>
                <version>${grpc.version}</version>
            </dependency>
            <dependency>
                <groupId>io.grpc</groupId>
                <artifactId>grpc-stub</artifactId>
                <version>${grpc.version}</version>
            </dependency>
            <dependency>
                <groupId>org.projectlombok</groupId>
                <artifactId>lombok</artifactId>
                <optional>true</optional>
                <version>1.18.12</version>
            </dependency>
        </dependencies>
    
        <build>
            <extensions>
                <extension>
                    <groupId>kr.motd.maven</groupId>
                    <artifactId>os-maven-plugin</artifactId>
                    <version>1.6.1</version>
                </extension>
            </extensions>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <version>3.1</version>
                    <executions>
                        <execution>
                            <id>compile</id>
                            <phase>compile</phase>
                            <goals>
                                <goal>compile</goal>
                            </goals>
                        </execution>
                        <execution>
                            <id>testCompile</id>
                            <phase>test-compile</phase>
                            <goals>
                                <goal>testCompile</goal>
                            </goals>
                        </execution>
                    </executions>
                    <configuration>
                        <showDeprecation>true</showDeprecation>
                        <encoding>${project.build.sourceEncoding}</encoding>
                    </configuration>
                </plugin>
                <plugin>
                    <groupId>org.xolstice.maven.plugins</groupId>
                    <artifactId>protobuf-maven-plugin</artifactId>
                    <version>0.6.1</version>
                    <configuration>
                        <protocArtifact>
                            com.google.protobuf:protoc:${protoc.version}:exe:${os.detected.classifier}
                        </protocArtifact>
                        <pluginId>grpc-java</pluginId>
                        <pluginArtifact>
                            io.grpc:protoc-gen-grpc-java:${grpc.version}:exe:${os.detected.classifier}
                        </pluginArtifact>
                    </configuration>
                    <executions>
                        <execution>
                            <goals>
                                <goal>compile</goal>
                                <goal>compile-custom</goal>
                            </goals>
                        </execution>
                    </executions>
                </plugin>
            </plugins>
        </build>
    </project>
    
  • Java

    package com.example.grpc;
    
    import java.io.FileInputStream;
    import java.util.concurrent.CountDownLatch;
    
    import com.google.protobuf.ByteString;
    import com.nbp.cdncp.nest.grpc.proto.v1.NestConfig;
    import com.nbp.cdncp.nest.grpc.proto.v1.NestData;
    import com.nbp.cdncp.nest.grpc.proto.v1.NestRequest;
    import com.nbp.cdncp.nest.grpc.proto.v1.NestResponse;
    import com.nbp.cdncp.nest.grpc.proto.v1.NestServiceGrpc;
    import com.nbp.cdncp.nest.grpc.proto.v1.RequestType;
    import io.grpc.ManagedChannel;
    import io.grpc.Metadata;
    import io.grpc.StatusRuntimeException;
    import io.grpc.netty.NettyChannelBuilder;
    import io.grpc.stub.MetadataUtils;
    import io.grpc.stub.StreamObserver;
    
    public class GRpcClient {
        public static void main(String[] args) throws Exception {
    
            CountDownLatch latch = new CountDownLatch(1);
            ManagedChannel channel = NettyChannelBuilder
                .forTarget("clovaspeech-gw.ncloud.com:50051")
                .useTransportSecurity()
                .build();
            NestServiceGrpc.NestServiceStub client = NestServiceGrpc.newStub(channel);
            Metadata metadata = new Metadata();
            metadata.put(Metadata.Key.of("Authorization", Metadata.ASCII_STRING_MARSHALLER),
                "Bearer ${secretKey}");
            client = MetadataUtils.attachHeaders(client, metadata);
    
            StreamObserver<NestResponse> responseObserver = new StreamObserver<NestResponse>() {
                @Override
                public void onNext(NestResponse response) {
                    System.out.println("Received response: " + response.getContents());
                }
    
                @Override
                public void onError(Throwable t) {
                    if(t instanceof StatusRuntimeException) {
                        StatusRuntimeException error = (StatusRuntimeException)t;
                        System.out.println(error.getStatus().getDescription());
                    }
                    latch.countDown();
                }
    
                @Override
                public void onCompleted() {
                    System.out.println("completed");
                    latch.countDown();
                }
            };
    
            StreamObserver<NestRequest> requestObserver = client.recognize(responseObserver);
    
            requestObserver.onNext(NestRequest.newBuilder()
                .setType(RequestType.CONFIG)
                .setConfig(NestConfig.newBuilder()
                    .setConfig("{\"transcription\":{\"language\":\"ko\"}}")
                    .build())
                .build());
    
            java.io.File file = new java.io.File("~/media/42s.wav");
            byte[] buffer = new byte[32000];
            int bytesRead;
            FileInputStream inputStream = new FileInputStream(file);
            while ((bytesRead = inputStream.read(buffer)) != -1) {
                requestObserver.onNext(NestRequest.newBuilder()
                    .setType(RequestType.DATA)
                    .setData(NestData.newBuilder()
                        .setChunk(ByteString.copyFrom(buffer, 0, bytesRead))
                        .setExtraContents("{ \"seqId\": 0, \"epFlag\": false}")
                        .build())
                    .build());
            }
            requestObserver.onCompleted();
            latch.await();
            channel.shutdown();
        }
    }
    

Python

The following is a Python-based sample code for the API.

import grpc
import json

import nest_pb2
import nest_pb2_grpc

AUDIO_PATH = "path/to/audio/file"          #Enter the path to the audio file to be recognized. (PCM (headerless raw wave) format at 16 kHz, 1 channel, 16 bits per sample)
CLIENT_SECRET = "Long sentence recognition secretKey"

def generate_requests(audio_path):
    # Initial setup request: set up speech recognition
    yield nest_pb2.NestRequest(
        type=nest_pb2.RequestType.CONFIG,
        config=nest_pb2.NestConfig(
            config=json.dumps({"transcription": {"language": "ko"}})
        )
    )

    # Open an audio file and read 32,000 bytes at a time
    with open(audio_path, "rb") as audio_file:
        while True:
            chunk = audio_file.read(32000)  # Read chunks of an audio file
            if not chunk:
                break  # Exit the loop when there is no more data
            yield nest_pb2.NestRequest(
                type=nest_pb2.RequestType.DATA,
                data=nest_pb2.NestData(
                    chunk=chunk,
                    extra_contents=json.dumps({"seqId": 0, "epFlag": False})
                )
            )

def main():
    # Set up a secure gRPC channel to the CLOVA Speech server
    channel = grpc.secure_channel(
        "clovaspeech-gw.ncloud.com:50051",
        grpc.ssl_channel_credentials()
    )
    stub = nest_pb2_grpc.NestServiceStub(channel)  # Create a stub for NestService
    metadata = (("authorization", f"Bearer {CLIENT_SECRET}"),)  # Set up metadata with authentication tokens
    responses = stub.recognize(generate_requests(AUDIO_PATH), metadata=metadata)  # Call the recognize method with the generated request

    try:
        # Process responses from the server repeatedly
        for response in responses:
            print("Received response: " + response.contents)
    except grpc.RpcError as e:
        # Handle gRPC errors
        print(f"Error: {e.details()}")
    finally:
        channel.close()  # Close the channel when finished

if __name__ == "__main__":
    main()