Extract Video Frames as Images with OpenGL and SurfaceTexture on Android
This guide explains how to convert video stream data into individual bitmap images on Android by using SurfaceTexture to receive decoded frames, OpenGL to render them as external textures, and glReadPixels to capture the RGBA data for saving as PNG files.
The previous article covered video decoding and playback using TextureView without handling the raw video stream; this tutorial shows how to preprocess each frame using OpenGL and SurfaceTexture.
SurfaceTexture converts video stream data into an external GL texture, while OpenGL renders that texture.
To extract frames, decode the video into a SurfaceTexture, let it become a GL texture, then read the rendered RGBA data with glReadPixels and store it in a Bitmap via copyPixelsFromBuffer.
Create Rendering Environment
Initialize the OpenGL environment because the final texture must be drawn with OpenGL. The diagram below shows the OpenGL rendering flow before drawing.
The OpenGL initialization code is included later in the full source.
Create External Texture
Instantiate a SurfaceTexture as the decoder output.
mSurfaceTexture = new SurfaceTexture(textureId);Set listener
mSurfaceTexture.setOnFrameAvailableListener(this);Feed decoded data to SurfaceTexture
boolean doRender = (info.size != 0);
decoder.releaseOutputBuffer(decoderStatus, doRender);
if (doRender) {
if (mStopped.get()) {
outputDone = true;
} else {
// wait for SurfaceTexture update
outputSurface.awaitNewImage();
// draw texture
outputSurface.drawImage(true);
}
}Perform Rendering
When the decoder outputs to the SurfaceTexture, a listener notifies that new content is available; the following method draws the external texture onto the current EGL surface.
/**
* Draws the external texture in HJSurfaceTexture onto the current EGL surface.
*/
public void drawFrame(android.graphics.SurfaceTexture st, boolean invert) {
checkGlError("onDrawFrame start");
st.getTransformMatrix(mSTMatrix);
if (invert) {
mSTMatrix[5] = -mSTMatrix[5];
mSTMatrix[13] = 1.0f - mSTMatrix[13];
}
GLES20.glClearColor(0.0f, 1.0f, 0.0f, 1.0f);
GLES20.glClear(GLES20.GL_COLOR_BUFFER_BIT);
GLES20.glUseProgram(mProgram);
GLES20.glActiveTexture(GLES20.GL_TEXTURE0);
GLES20.glBindTexture(GLES11Ext.GL_TEXTURE_EXTERNAL_OES, mTextureID);
mTriangleVertices.position(TRIANGLE_VERTICES_DATA_POS_OFFSET);
GLES20.glVertexAttribPointer(maPositionHandle, 3, GLES20.GL_FLOAT, false,
TRIANGLE_VERTICES_DATA_STRIDE_BYTES, mTriangleVertices);
GLES20.glEnableVertexAttribArray(maPositionHandle);
mTriangleVertices.position(TRIANGLE_VERTICES_DATA_UV_OFFSET);
GLES20.glVertexAttribPointer(maTextureHandle, 2, GLES20.GL_FLOAT, false,
TRIANGLE_VERTICES_DATA_STRIDE_BYTES, mTriangleVertices);
GLES20.glEnableVertexAttribArray(maTextureHandle);
Matrix.setIdentityM(mMVPMatrix, 0);
GLES20.glUniformMatrix4fv(muMVPMatrixHandle, 1, false, mMVPMatrix, 0);
GLES20.glUniformMatrix4fv(muSTMatrixHandle, 1, false, mSTMatrix, 0);
GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, 0, 4);
GLES20.glBindTexture(GLES11Ext.GL_TEXTURE_EXTERNAL_OES, 0);
}Read Rendered Data as Image
1. Use glReadPixels to copy the current frame into a ByteBuffer. 2. Convert the buffer to a Bitmap with copyPixelsFromBuffer and save it as PNG.
// ByteBuffer
mPixelBuf.rewind();
GLES20.glReadPixels(0, 0, mWidth, mHeight, GLES20.GL_RGBA, GLES20.GL_UNSIGNED_BYTE,
mPixelBuf);
BufferedOutputStream bos = null;
try {
bos = new BufferedOutputStream(new FileOutputStream(filename));
Bitmap bmp = Bitmap.createBitmap(mWidth, mHeight, Bitmap.Config.ARGB_8888);
mPixelBuf.rewind();
bmp.copyPixelsFromBuffer(mPixelBuf);
bmp.compress(Bitmap.CompressFormat.PNG, 90, bos);
bmp.recycle();
} finally {
if (bos != null) bos.close();
}The resulting bitmap is the extracted video frame.
Note
This method is suitable for real‑time high‑efficiency frame extraction. For simple thumbnail generation, MediaMetadataRetriever.getFrameAtTime is easier but slower (≈300‑500 ms per frame).
Complete Demo
The following demo extracts frames synchronously (the previous article used asynchronous decoding).
/**
* Created by yuxueting on 2018-1-5.
*/
public class MediaBitmapFrameExtractor {
private static final String TAG = MediaBitmapFrameExtractor.class.getSimpleName();
private static final boolean DEBUG = AppEnv.DEBUG;
private static final int TIMEOUT_USEC = 10000; // read timeout (µs)
private static final int MAX_CACHE_FRAME_SIZE = 5; // max cached frames
private LinkedBlockingQueue<Bitmap> mFrames = new LinkedBlockingQueue<>(MAX_CACHE_FRAME_SIZE);
private MediaExtractor mExtractor;
private int mVideoTrackIndex = -1;
private AtomicBoolean mStopped = new AtomicBoolean(true);
private boolean mLoop = true;
private long mAllFrameGenTime;
private int mAllFrameCount;
public MediaBitmapFrameExtractor() {}
/** Start extracting frames */
public void startExtract(String inputVideo) {
startExtract(inputVideo, 0, 0);
}
/** Start extracting frames with optional width/height */
public void startExtract(final String inputVideo, final int width, final int height) {
new Thread(new Runnable() {
@Override
public void run() {
mStopped.set(false);
try {
MediaFormat videoFormat = extractVideoFormat(inputVideo);
if (videoFormat != null) {
String mime = videoFormat.getString(MediaFormat.KEY_MIME);
CodecOutputSurface outputSurface;
if (width == 0 || height == 0) {
int nWidth = videoFormat.containsKey(MediaFormat.KEY_WIDTH) ?
videoFormat.getInteger(MediaFormat.KEY_WIDTH) : 0;
int nHeight = videoFormat.containsKey(MediaFormat.KEY_HEIGHT) ?
videoFormat.getInteger(MediaFormat.KEY_HEIGHT) : 0;
outputSurface = new CodecOutputSurface(nWidth, nHeight);
} else {
outputSurface = new CodecOutputSurface(width, height);
}
MediaCodec decoder = MediaCodec.createDecoderByType(mime);
decoder.configure(videoFormat, outputSurface.getSurface(), null, 0);
decoder.start();
doExtract(decoder, outputSurface);
}
} catch (Exception e) {
e.printStackTrace();
mStopped.set(true);
}
}
}).start();
}
/** Stop extraction */
public void stopExtract() {
mStopped.set(true);
while (true) {
Bitmap bitmap = mFrames.poll();
if (bitmap == null) {
mFrames.clear();
break;
}
BitmapUtils.recycleBitmap(bitmap);
}
}
public boolean isStopped() { return mStopped.get(); }
private void doExtract(MediaCodec decoder, CodecOutputSurface outputSurface) throws IOException {
ByteBuffer[] decoderInputBuffers = decoder.getInputBuffers();
MediaCodec.BufferInfo info = new MediaCodec.BufferInfo();
int inputChunk = 0;
boolean outputDone = false;
boolean inputDone = false;
while (!outputDone) {
if (!inputDone) {
int inputBufIndex = decoder.dequeueInputBuffer(TIMEOUT_USEC);
if (inputBufIndex >= 0) {
ByteBuffer inputBuf = decoderInputBuffers[inputBufIndex];
int chunkSize = mExtractor.readSampleData(inputBuf, 0);
if (chunkSize < 0) {
decoder.queueInputBuffer(inputBufIndex, 0, 0, 0L,
MediaCodec.BUFFER_FLAG_END_OF_STREAM);
inputDone = true;
} else {
long presentationTimeUs = mExtractor.getSampleTime();
decoder.queueInputBuffer(inputBufIndex, 0, chunkSize,
presentationTimeUs, 0);
inputChunk++;
mExtractor.advance();
}
}
}
if (!outputDone) {
int decoderStatus = decoder.dequeueOutputBuffer(info, TIMEOUT_USEC);
if (decoderStatus >= 0) {
boolean needLoop = false;
if ((info.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) {
if (!mLoop) {
outputDone = true;
} else {
needLoop = true;
}
}
boolean doRender = (info.size != 0);
decoder.releaseOutputBuffer(decoderStatus, doRender);
if (doRender) {
if (mStopped.get()) {
outputDone = true;
} else {
outputSurface.awaitNewImage();
outputSurface.drawImage(true);
try {
mFrames.put(outputSurface.genFrameBitmap());
} catch (InterruptedException e) { }
}
}
if (needLoop && !mStopped.get()) {
mExtractor.seekTo(0, MediaExtractor.SEEK_TO_CLOSEST_SYNC);
inputDone = false;
decoder.flush();
}
}
}
}
}
public Bitmap getFrame() { return mFrames.poll(); }
private MediaFormat extractVideoFormat(String inputFile) {
mExtractor = new MediaExtractor();
try {
mExtractor.setDataSource(inputFile);
mVideoTrackIndex = selectTrack(mExtractor);
if (mVideoTrackIndex < 0) {
throw new RuntimeException("No video track found in " + inputFile);
}
mExtractor.selectTrack(mVideoTrackIndex);
return mExtractor.getTrackFormat(mVideoTrackIndex);
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
private static int selectTrack(MediaExtractor extractor) {
int numTracks = extractor.getTrackCount();
for (int i = 0; i < numTracks; i++) {
MediaFormat format = extractor.getTrackFormat(i);
String mime = format.getString(MediaFormat.KEY_MIME);
if (mime.startsWith("video/")) {
return i;
}
}
return -1;
}
}The OpenGL and SurfaceTexture environment code is provided in CodecOutputSurface.java (shown below).
public class CodecOutputSurface implements android.graphics.SurfaceTexture.OnFrameAvailableListener {
private static final String TAG = CodecOutputSurface.class.getSimpleName();
private static final boolean DEBUG = AppEnv.DEBUG;
private STextureRender mTextureRender;
private HJSurfaceTexture mSurfaceTexture;
private Surface mSurface;
private EGLDisplay mEGLDisplay = EGL14.EGL_NO_DISPLAY;
private EGLContext mEGLContext = EGL14.EGL_NO_CONTEXT;
private EGLSurface mEGLSurface = EGL14.EGL_NO_SURFACE;
int mWidth;
int mHeight;
private final Object mFrameSyncObject = new Object();
private boolean mFrameAvailable;
private ByteBuffer mPixelBuf;
public CodecOutputSurface(int width, int height) {
if (width <= 0 || height <= 0) {
throw new IllegalArgumentException();
}
mWidth = width;
mHeight = height;
eglSetup();
makeCurrent();
setup();
}
private void setup() {
mTextureRender = new STextureRender();
mTextureRender.surfaceCreated();
mSurfaceTexture = new HJSurfaceTexture(mTextureRender.getTextureId());
mSurfaceTexture.setOnFrameAvailableListener(this);
mSurface = new Surface(mSurfaceTexture);
mPixelBuf = ByteBuffer.allocateDirect(mWidth * mHeight * 4);
mPixelBuf.order(ByteOrder.LITTLE_ENDIAN);
}
private void eglSetup() {
mEGLDisplay = EGL14.eglGetDisplay(EGL14.EGL_DEFAULT_DISPLAY);
if (mEGLDisplay == EGL14.EGL_NO_DISPLAY) {
throw new RuntimeException("unable to get EGL14 display");
}
int[] version = new int[2];
if (!EGL14.eglInitialize(mEGLDisplay, version, 0, version, 1)) {
throw new RuntimeException("unable to initialize EGL14");
}
int[] attribList = {
EGL14.EGL_RED_SIZE, 8,
EGL14.EGL_GREEN_SIZE, 8,
EGL14.EGL_BLUE_SIZE, 8,
EGL14.EGL_ALPHA_SIZE, 8,
EGL14.EGL_RENDERABLE_TYPE, EGL14.EGL_OPENGL_ES2_BIT,
EGL14.EGL_SURFACE_TYPE, EGL14.EGL_PBUFFER_BIT,
EGL14.EGL_NONE
};
EGLConfig[] configs = new EGLConfig[1];
int[] numConfigs = new int[1];
if (!EGL14.eglChooseConfig(mEGLDisplay, attribList, 0, configs, 0, configs.length,
numConfigs, 0)) {
throw new RuntimeException("unable to find RGB888+recordable ES2 EGL config");
}
int[] attrib_list = {EGL14.EGL_CONTEXT_CLIENT_VERSION, 2, EGL14.EGL_NONE};
mEGLContext = EGL14.eglCreateContext(mEGLDisplay, configs[0], EGL14.EGL_NO_CONTEXT,
attrib_list, 0);
if (mEGLContext == null) {
throw new RuntimeException("null context");
}
int[] surfaceAttribs = {EGL14.EGL_WIDTH, mWidth, EGL14.EGL_HEIGHT, mHeight, EGL14.EGL_NONE};
mEGLSurface = EGL14.eglCreatePbufferSurface(mEGLDisplay, configs[0], surfaceAttribs, 0);
if (mEGLSurface == null) {
throw new RuntimeException("surface was null");
}
}
public void release() {
if (mEGLDisplay != EGL14.EGL_NO_DISPLAY) {
EGL14.eglDestroySurface(mEGLDisplay, mEGLSurface);
EGL14.eglDestroyContext(mEGLDisplay, mEGLContext);
EGL14.eglReleaseThread();
EGL14.eglTerminate(mEGLDisplay);
}
mEGLDisplay = EGL14.EGL_NO_DISPLAY;
mEGLContext = EGL14.EGL_NO_CONTEXT;
mEGLSurface = EGL14.EGL_NO_SURFACE;
mSurface.release();
mTextureRender = null;
mSurface = null;
mSurfaceTexture = null;
}
public void makeCurrent() {
if (!EGL14.eglMakeCurrent(mEGLDisplay, mEGLSurface, mEGLSurface, mEGLContext)) {
throw new RuntimeException("eglMakeCurrent failed");
}
}
public Surface getSurface() { return mSurface; }
public void awaitNewImage() {
final int TIMEOUT_MS = 2500;
synchronized (mFrameSyncObject) {
while (!mFrameAvailable) {
try {
mFrameSyncObject.wait(TIMEOUT_MS);
if (!mFrameAvailable) {
throw new RuntimeException("frame wait timed out");
}
} catch (InterruptedException ie) {
throw new RuntimeException(ie);
}
}
mFrameAvailable = false;
}
mTextureRender.checkGlError("before updateTexImage");
mSurfaceTexture.updateTexImage();
}
public void drawImage(boolean invert) {
mTextureRender.drawFrame(mSurfaceTexture, invert);
}
@Override
public void onFrameAvailable(android.graphics.SurfaceTexture st) {
synchronized (mFrameSyncObject) {
if (mFrameAvailable) {
throw new RuntimeException("mFrameAvailable already set, frame could be dropped");
}
mFrameAvailable = true;
mFrameSyncObject.notifyAll();
}
}
public Bitmap genFrameBitmap() throws IOException {
mPixelBuf.rewind();
GLES20.glReadPixels(0, 0, mWidth, mHeight, GLES20.GL_RGBA, GLES20.GL_UNSIGNED_BYTE,
mPixelBuf);
Bitmap bmp = Bitmap.createBitmap(mWidth, mHeight, Bitmap.Config.ARGB_8888);
mPixelBuf.rewind();
bmp.copyPixelsFromBuffer(mPixelBuf);
return bmp;
}
// ... (STextureRender inner class omitted for brevity) ...
}Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
Qizhuo Club
360 Mobile tech channel sharing practical experience and original insights from 360 Mobile Security and other teams across Android, iOS, big data, AI, and more.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
