refine VA_FOOL

Signed-off-by: Austin Yuan <shengquan.yuan@gmail.com>
diff --git a/va/va.c b/va/va.c
index a9d0fd6..10befe8 100644
--- a/va/va.c
+++ b/va/va.c
@@ -55,6 +55,13 @@
         trace_func(__VA_ARGS__);                \
     }
 
+extern int fool_decode;
+extern int fool_encode;
+#define VA_FOOL(fool_func,...)                 \
+    if (fool_decode || fool_encode) {          \
+        ret = fool_func(__VA_ARGS__);          \
+    }
+
 /*
  * read a config "env" for libva.conf or from environment setting
  * liva.conf has higher priority
@@ -432,8 +439,7 @@
   CHECK_DISPLAY(dpy);
   old_ctx = CTX(dpy);
 
-  if (old_ctx->handle)
-  {
+  if (old_ctx->handle) {
       vaStatus = old_ctx->vtable.vaTerminate(old_ctx);
       dlclose(old_ctx->handle);
       old_ctx->handle = NULL;
@@ -555,11 +561,13 @@
 {
   VADriverContextP ctx;
   VAStatus vaStatus = VA_STATUS_SUCCESS;
+  int ret = 0;
   
   CHECK_DISPLAY(dpy);
   ctx = CTX(dpy);
 
-  va_FoolCreateConfig(dpy, profile, entrypoint, attrib_list, num_attribs, config_id);
+  VA_FOOL(va_FoolCreateConfig, dpy, profile, entrypoint, attrib_list, num_attribs, config_id);
+  
   vaStatus =  ctx->vtable.vaCreateConfig ( ctx, profile, entrypoint, attrib_list, num_attribs, config_id );
 
   VA_TRACE(va_TraceCreateConfig, dpy, profile, entrypoint, attrib_list, num_attribs, config_id);
@@ -606,14 +614,16 @@
 {
   VADriverContextP ctx;
   VAStatus vaStatus;
-
+  int ret = 0;
+  
   CHECK_DISPLAY(dpy);
   ctx = CTX(dpy);
 
   vaStatus = ctx->vtable.vaCreateSurfaces( ctx, width, height, format, num_surfaces, surfaces );
 
   VA_TRACE(va_TraceCreateSurface, dpy, width, height, format, num_surfaces, surfaces);
-  va_FoolCreateSurfaces(dpy, width, height, format, num_surfaces, surfaces);
+
+  VA_FOOL(va_FoolCreateSurfaces, dpy, width, height, format, num_surfaces, surfaces);
   
   return vaStatus;
 }
@@ -682,9 +692,12 @@
   VADriverContextP ctx;
   CHECK_DISPLAY(dpy);
   ctx = CTX(dpy);
-
-  if (va_FoolCreateBuffer(dpy, context, type, size, num_elements, data, buf_id))
+  int ret = 0;
+  
+  VA_FOOL(va_FoolCreateBuffer, dpy, context, type, size, num_elements, data, buf_id);
+  if (ret)
       return VA_STATUS_SUCCESS;
+  
   return ctx->vtable.vaCreateBuffer( ctx, context, type, size, num_elements, data, buf_id);
 }
 
@@ -710,12 +723,15 @@
 {
   VADriverContextP ctx;
   VAStatus va_status;
+  int ret = 0;
   
   CHECK_DISPLAY(dpy);
   ctx = CTX(dpy);
   
-  if (va_FoolMapBuffer(dpy, buf_id, pbuf))
+  VA_FOOL(va_FoolMapBuffer, dpy, buf_id, pbuf);
+  if (ret)
       return VA_STATUS_SUCCESS;
+  
   va_status = ctx->vtable.vaMapBuffer( ctx, buf_id, pbuf );
 
   if (va_status == VA_STATUS_SUCCESS)
@@ -732,9 +748,12 @@
   VADriverContextP ctx;
   CHECK_DISPLAY(dpy);
   ctx = CTX(dpy);
-
-  if (va_FoolUnmapBuffer(dpy, buf_id))
+  int ret = 0;
+  
+  VA_FOOL(va_FoolUnmapBuffer, dpy, buf_id);
+  if (ret)
       return VA_STATUS_SUCCESS;
+  
   return ctx->vtable.vaUnmapBuffer( ctx, buf_id );
 }
 
@@ -773,12 +792,15 @@
 )
 {
   VADriverContextP ctx;
+  int ret = 0;
+  
   CHECK_DISPLAY(dpy);
   ctx = CTX(dpy);
 
   VA_TRACE(va_TraceBeginPicture, dpy, context, render_target);
 
-  if (va_FoolBeginPicture(dpy, context, render_target))
+  VA_FOOL(va_FoolBeginPicture, dpy, context, render_target);
+  if (ret)
       return VA_STATUS_SUCCESS;
 
   return ctx->vtable.vaBeginPicture( ctx, context, render_target );
@@ -792,10 +814,13 @@
 )
 {
   VADriverContextP ctx;
+  int ret = 0;
+  
   CHECK_DISPLAY(dpy);
   ctx = CTX(dpy);
 
-  if (va_FoolRenderPicture(dpy, context, buffers, num_buffers))
+  VA_FOOL(va_FoolRenderPicture, dpy, context, buffers, num_buffers);
+  if (ret)
       return VA_STATUS_SUCCESS;
 
   VA_TRACE(va_TraceRenderPicture, dpy, context, buffers, num_buffers);
@@ -810,12 +835,15 @@
 {
   VAStatus va_status;
   VADriverContextP ctx;
+  int ret = 0;
+  
   CHECK_DISPLAY(dpy);
   ctx = CTX(dpy);
 
-  if (va_FoolEndPicture(dpy, context)) {
-    VA_TRACE(va_TraceEndPicture, dpy, context);
-    return VA_STATUS_SUCCESS;
+  VA_FOOL(va_FoolEndPicture, dpy, context);
+  if (ret) {
+      VA_TRACE(va_TraceEndPicture, dpy, context);
+      return VA_STATUS_SUCCESS;
   }
   
   va_status = ctx->vtable.vaEndPicture( ctx, context );
@@ -832,11 +860,14 @@
 {
   VAStatus va_status;
   VADriverContextP ctx;
+  int ret = 0;
+  
   CHECK_DISPLAY(dpy);
   ctx = CTX(dpy);
 
-  if (va_FoolSyncSurface( dpy, render_target))
-    return VA_STATUS_SUCCESS;
+  VA_FOOL(va_FoolSyncSurface, dpy, render_target);
+  if (ret)
+      return VA_STATUS_SUCCESS;
   
   va_status = ctx->vtable.vaSyncSurface( ctx, render_target );
   VA_TRACE(va_TraceSyncSurface, dpy, render_target);
@@ -1073,11 +1104,15 @@
 )
 {
   VADriverContextP ctx;
+  int ret = 0;
+  
   CHECK_DISPLAY(dpy);
   ctx = CTX(dpy);
-
-  if (va_FoolQuerySubpictureFormats(dpy, format_list, flags, num_formats))
+  
+  VA_FOOL(va_FoolQuerySubpictureFormats, dpy, format_list, flags, num_formats);
+  if (ret)
       return VA_STATUS_SUCCESS;
+  
   return ctx->vtable.vaQuerySubpictureFormats ( ctx, format_list, flags, num_formats);
 }
 
diff --git a/va/va_fool.c b/va/va_fool.c
index 1edbd6d..8d4ca48 100644
--- a/va/va_fool.c
+++ b/va/va_fool.c
@@ -40,7 +40,6 @@
 #include <unistd.h>
 #include <time.h>
 #include "va_fool_264.h"
-#include "va_getframe.h"
 
 /*
  * Do dummy decode/encode, ignore the input data
@@ -49,10 +48,10 @@
  *
  * LIBVA_FOOL_DECODE:
  * . if set, decode does nothing, but fill in some YUV data
- * LIBVA_FOOL_ENCODE:
- * . if set, encode does nothing, but fill in a hard-coded 720P clip into coded buffer.
+ * LIBVA_FOOL_ENCODE=<clip name>:
+ * . if set, encode does nothing, but fill in the coded buffer from a H264 clip.
  * . VA CONTEXT/CONFIG/SURFACE will call into drivers, but VA Buffer creation is done in here
- * . Bypass all ~SvaBeginPic/vaRenderPic/vaEndPic~T
+ * . Bypass all "vaBeginPic/vaRenderPic/vaEndPic"
  * LIBVA_FOOL_POSTP:
  * . if set, do nothing for vaPutSurface
  */
@@ -61,13 +60,18 @@
 /* global settings */
 
 /* LIBVA_FOOL_DECODE/LIBVA_FOOL_ENCODE/LIBVA_FOOL_POSTP */
-static int fool_decode = 0;
-static int fool_encode = 0;
+int fool_decode = 0;
+int fool_encode = 0;
 int fool_postp  = 0;
-FILE *input_fd;
+
 static char *frame_buf;
 
+
+#define NAL_BUF_SIZE  65536  // maximum NAL unit size
+#define RING_BUF_SIZE  8192  // input ring buffer size, MUST be a power of two!
 #define MAX_FRAME 16
+#define SLICE_NUM 4
+
 #define FOOL_CONTEXT_MAX 4
 /* per context settings */
 static struct _fool_context {
@@ -91,22 +95,23 @@
 
 #define FOOL_DECODE(idx) (fool_decode && (fool_context[idx].fool_entrypoint == VAEntrypointVLD))
 #define FOOL_ENCODE(idx)                                                \
-    (fool_encode                                                            \
-     && (fool_context[idx].fool_entrypoint == VAEntrypointEncSlice)        \
-     && (fool_context[idx].fool_profile >= VAProfileH264Baseline)           \
+    (fool_encode                                                        \
+     && (fool_context[idx].fool_entrypoint == VAEntrypointEncSlice)     \
+     && (fool_context[idx].fool_profile >= VAProfileH264Baseline)       \
      && (fool_context[idx].fool_profile <= VAProfileH264High))
 
 
 
-#define DPY2INDEX(dpy)                                 \
-    int idx;                                           \
-\
-for (idx = 0; idx < FOOL_CONTEXT_MAX; idx++)       \
-if (fool_context[idx].dpy == dpy)              \
-break;                                     \
-\
-if (idx == FOOL_CONTEXT_MAX)                       \
-return 0;  /* let driver go */                 \
+#define DPY2INDEX(dpy)                                  \
+    int idx;                                            \
+                                                        \
+    for (idx = 0; idx < FOOL_CONTEXT_MAX; idx++)        \
+        if (fool_context[idx].dpy == dpy)               \
+            break;                                      \
+                                                        \
+    if (idx == FOOL_CONTEXT_MAX)                        \
+        return 0;  /* let driver go */
+
 
 /* Prototype declarations (functions defined in va.c) */
 
@@ -122,7 +127,7 @@
         VABufferType *type,		/* out */
         unsigned int *size,		/* out */
         unsigned int *num_elements	/* out */
-        );
+);
 
 VAStatus vaLockSurface(VADisplay dpy,
         VASurfaceID surface,
@@ -135,11 +140,11 @@
         unsigned int *chroma_v_offset,
         unsigned int *buffer_name,
         void **buffer 
-        );
+);
 
 VAStatus vaUnlockSurface(VADisplay dpy,
         VASurfaceID surface
-        );
+);
 
 
 void va_FoolInit(VADisplay dpy)
@@ -167,12 +172,13 @@
 
 
     if (va_parseConfig("LIBVA_FOOL_ENCODE", &env_value[0]) == 0) {
-        input_fd = fopen(env_value, "r");
+        fool_context[fool_index].fool_fp_codedclip = fopen(env_value, "r");
 
-        if (input_fd)
-            fool_context[fool_index].fool_fp_codedclip = input_fd;
-        fool_encode = 1;
-        va_infoMessage("LIBVA_FOOL_ENCODE is on, dummy encode\n");
+        if (fool_context[fool_index].fool_fp_codedclip) {
+            fool_encode = 1;
+            va_infoMessage("LIBVA_FOOL_ENCODE is on, dummy encode\n");            
+        } else
+            fool_encode = 0;
     }
 
     if (fool_encode || fool_decode)
@@ -186,10 +192,13 @@
 
     DPY2INDEX(dpy);
 
-    for (i = 0; i < VABufferTypeMax; i++) /* free memory */
+    for (i = 0; i < VABufferTypeMax; i++) {/* free memory */
         if (fool_context[idx].fool_buf[i])
             free(fool_context[idx].fool_buf[i]);
-
+    }
+    if (fool_context[idx].fool_fp_codedclip)
+        fclose(fool_context[idx].fool_fp_codedclip);
+            
     memset(&fool_context[idx], sizeof(struct _fool_context), 0);
     return 0;
 }
@@ -208,7 +217,7 @@
         VAConfigAttrib *attrib_list,
         int num_attribs,
         VAConfigID *config_id /* out */
-        )
+)
 {
     DPY2INDEX(dpy);
 
@@ -225,7 +234,7 @@
         unsigned char *V_start, int V_pitch,
         int UV_interleave, int box_width, int row_shift,
         int field
-        )
+)
 {
     int row;
 
@@ -292,7 +301,7 @@
         int format,
         int num_surfaces,
         VASurfaceID *surfaces	/* out */
-        )
+)
 {
     int i;
     unsigned int fourcc; /* following are output argument */
@@ -363,7 +372,7 @@
         unsigned int num_elements,	/* in */
         void *data,			/* in */
         VABufferID *buf_id		/* out */
-        )
+)
 {
     DPY2INDEX(dpy);
 
@@ -400,7 +409,7 @@
         VADisplay dpy,
         VABufferID buf_id,	/* in */
         void **pbuf 	/* out */
-        )
+)
 {
     VABufferType type;
     unsigned int size;
@@ -410,7 +419,7 @@
     if (FOOL_ENCODE(idx) || FOOL_DECODE(idx)) { /* fool buffer creation */
         unsigned int buf_idx = buf_id & 0xff;
 
-        /*Image buffer?*/
+        /* Image buffer? */
         vaBufferInfo(dpy, fool_context[idx].context, buf_id, &type, &size, &num_elements);
         if (type == VAImageBufferType  && FOOL_ENCODE(idx))
             return 0;
@@ -432,7 +441,7 @@
 #endif
             frame_buf = malloc(MAX_FRAME*SLICE_NUM*NAL_BUF_SIZE*sizeof(char));
             memset(frame_buf,0,SLICE_NUM*NAL_BUF_SIZE);
-            va_FoolGetFrame(frame_buf);
+            va_FoolGetFrame(fool_context[idx].fool_fp_codedclip, frame_buf);
             *pbuf=frame_buf;
         }
         return 1; /* don't call into driver */
@@ -446,12 +455,11 @@
         VADisplay dpy,
         VAContextID context,
         VASurfaceID render_target
-        )
+)
 {
     DPY2INDEX(dpy);
 
-    if (FOOL_ENCODE(idx) || FOOL_DECODE(idx))
-    {
+    if (FOOL_ENCODE(idx) || FOOL_DECODE(idx)) {
         if (fool_context[idx].context == 0)
             fool_context[idx].context = context;
         return 1; /* don't call into driver level */
@@ -465,7 +473,7 @@
         VAContextID context,
         VABufferID *buffers,
         int num_buffers
-        )
+)
 {
     DPY2INDEX(dpy);
 
@@ -479,7 +487,7 @@
 int va_FoolEndPicture(
         VADisplay dpy,
         VAContextID context
-        )
+)
 {
     DPY2INDEX(dpy);
 
@@ -505,23 +513,23 @@
 
 int va_FoolSyncSurface(
         VADisplay dpy, 
-        VASurfaceID render_target)
+        VASurfaceID render_target
+)
 {
     DPY2INDEX(dpy);
+
     /*Fill in black and white squares. */
     if (FOOL_DECODE(idx) || FOOL_DECODE(idx))
-    {
         return 1;
-    }
 
     return 0;
 
 }
 
-VAStatus va_FoolUnmapBuffer (
+VAStatus va_FoolUnmapBuffer(
         VADisplay dpy,
         VABufferID buf_id	/* in */
-        )
+)
 {
     DPY2INDEX(dpy);
 
@@ -532,12 +540,12 @@
     return 0;
 }
 
-VAStatus va_FoolQuerySubpictureFormats (
+VAStatus va_FoolQuerySubpictureFormats(
         VADisplay dpy,
         VAImageFormat *format_list,
         unsigned int *flags,
         unsigned int *num_formats
-        )
+)
 {
     DPY2INDEX(dpy);
 
diff --git a/va/va_getframe.c b/va/va_getframe.c
index e3a0661..0b2a8d9 100644
--- a/va/va_getframe.c
+++ b/va/va_getframe.c
@@ -1,6 +1,30 @@
+/* The code refers to
+ * http://keyj.s2000.at/files/projects/h264-src.tar.gz
+ */
 #include <string.h>
 #include <stdio.h>
-#include "va_getframe.h"
+
+#define SLICE_NUM 4 
+#define NAL_BUF_SIZE  65536  // maximum NAL unit size
+#define RING_BUF_SIZE  8192  // input ring buffer size, MUST be a power of two!
+
+typedef struct _nal_unit {
+  int NumBytesInNALunit;
+  int forbidden_zero_bit;
+  int nal_ref_idc;
+  int nal_unit_type;
+  unsigned char *last_rbsp_byte;
+} nal_unit;
+ typedef struct _slice_header {
+  int first_mb_in_slice;
+} slice_header;
+ 
+static int get_next_nal_unit(FILE *input_fp, nal_unit *nalu); 
+static int get_unsigned_exp_golomb();
+static void decode_slice_header(slice_header *sh);
+static void input_read(FILE *input_fp, unsigned char *dest, int size);
+static int input_get_bits(int bit_count);
+int va_FoolGetFrame(FILE *input_fp, char *frame_buf); 
 
 static unsigned char nal_buf[NAL_BUF_SIZE];
 static unsigned char ring_buf[RING_BUF_SIZE];
@@ -8,7 +32,6 @@
 static int ring_pos = 0;
 static int nal_pos;
 static int nal_bit;
-extern FILE *input_fd;
 static int frame_no = 0, cur_frame_no = 0;
 
 #define SLICE_NUM 4 
@@ -18,13 +41,13 @@
 #define gnn_advance() do { \
 	ring_pos = (ring_pos+1)&RING_MOD; \
 	--input_remain; \
-	if(ring_pos==0) input_read(&ring_buf[HALF_RING],HALF_RING); \
-	if(ring_pos==HALF_RING) input_read(&ring_buf[0],HALF_RING); \
+	if (ring_pos==0) input_read(input_fp, &ring_buf[HALF_RING],HALF_RING); \
+	if (ring_pos==HALF_RING) input_read(input_fp, &ring_buf[0],HALF_RING); \
 } while(0)
 
 #define gnn_add_segment(end) do { \
 	int size = end-segment_start; \
-	if(size>0) { \
+	if (size>0) { \
 		memcpy(&nal_buf[nalu_size],&ring_buf[segment_start],size); \
 		nalu_size += size; \
 	} \
@@ -33,146 +56,151 @@
 
 static int input_get_bits(int bit_count) 
 {
-	int res = 0;
-	register unsigned int x = 
-		(nal_buf[nal_pos]<<24)|
-		(nal_buf[nal_pos+1]<<16)|
-		(nal_buf[nal_pos+2]<<8)|
-		nal_buf[nal_pos+3];
-	res = (x>>(32-bit_count-nal_bit))&((1<<bit_count)-1);
-	nal_bit += bit_count;
-	nal_pos += nal_bit>>3;
-	nal_bit &= 7;
-	return res;
+    int res = 0;
+    register unsigned int x = 
+        (nal_buf[nal_pos]<<24)|
+        (nal_buf[nal_pos+1]<<16)|
+        (nal_buf[nal_pos+2]<<8)|
+        nal_buf[nal_pos+3];
+
+    res = (x>>(32-bit_count-nal_bit))&((1<<bit_count)-1);
+    nal_bit += bit_count;
+    nal_pos += nal_bit>>3;
+    nal_bit &= 7;
+
+    return res;
 }
 
-int input_get_one_bit() 
+static int input_get_one_bit() 
 {
-	int res = (nal_buf[nal_pos]>>(7-nal_bit))&1;
-	if(++nal_bit>7) 
-	{
-		++nal_pos;
-		nal_bit = 0;
-	}
-	return res;
+    int res = (nal_buf[nal_pos]>>(7-nal_bit))&1;
+
+    if (++nal_bit>7) {
+        ++nal_pos;
+        nal_bit = 0;
+    }
+    return res;
 }
 
-int get_unsigned_exp_golomb() 
+static int get_unsigned_exp_golomb() 
 {
-	int exp;
-	for(exp = 0; !input_get_one_bit(); ++exp);
-	if(exp) return (1<<exp)-1+input_get_bits(exp);
-	else return 0;
+    int exp;
+
+    for(exp = 0; !input_get_one_bit(); ++exp);
+    
+    if (exp)
+        return (1<<exp) - 1 + input_get_bits(exp);
+    else
+        return 0;
 }
 
-void decode_slice_header(slice_header *sh ) 
+static void decode_slice_header(slice_header *sh ) 
 {
-	memset((void*)sh,0,sizeof(slice_header));
-	sh->first_mb_in_slice = get_unsigned_exp_golomb(); 
+    memset((void*)sh,0,sizeof(slice_header));
+    sh->first_mb_in_slice = get_unsigned_exp_golomb(); 
 }
 
-static void input_read(unsigned char *dest, int size) 
+static void input_read(FILE *input_fp, unsigned char *dest, int size) 
 {
-	int count = fread(dest,1,size,input_fd);
-	input_remain += count;
+    int count = fread(dest, 1, size, input_fp);
+
+    input_remain += count;
 }
 
-static int get_next_nal_unit(nal_unit *nalu) 
+static int get_next_nal_unit(FILE *input_fp, nal_unit *nalu)
 {
     int i,segment_start;
     int nalu_size = 0;
     int NumBytesInRbsp = 0;
 
-	// search for the next NALU start
-	// here is the sync that the start of the NALU is 0x00000001
-	for(;;) 
-	{
-		if(input_remain<= 4) 
-		{
-            //clip restart
-			memset(ring_buf,0,sizeof(char)*RING_BUF_SIZE);
-			memset(nal_buf,0,sizeof(char)*NAL_BUF_SIZE);
+    /* search for the next NALU start
+     * here is the sync that the start of the NALU is 0x00000001
+     */
+    for (;;) {
+        if (input_remain<= 4) {
+            /* clip restart */
+            memset(ring_buf,0,sizeof(char)*RING_BUF_SIZE);
+            memset(nal_buf,0,sizeof(char)*NAL_BUF_SIZE);
 
-			fseek(input_fd,0,SEEK_SET);
-			input_remain = 0;
-			input_read(ring_buf,RING_BUF_SIZE);
-			ring_pos = 0;
-			return 1;
-		}
-		if((!ring_buf[ring_pos]) &&
-				(!ring_buf[(ring_pos+1)&RING_MOD]) &&
-				(!ring_buf[(ring_pos+2)&RING_MOD]) &&
-				( ring_buf[(ring_pos+3)&RING_MOD]==1))
-			break;
-		gnn_advance();
-	}
-	for(i=0;i<4;++i) gnn_advance();
+            fseek(input_fp,0,SEEK_SET);
+            input_remain = 0;
+            input_read(input_fp, ring_buf, RING_BUF_SIZE);
+            ring_pos = 0;
+            return 1;
+        }
+        if ((!ring_buf[ring_pos]) &&
+           (!ring_buf[(ring_pos+1)&RING_MOD]) &&
+           (!ring_buf[(ring_pos+2)&RING_MOD]) &&
+           ( ring_buf[(ring_pos+3)&RING_MOD]==1))
+            break;
+        gnn_advance();
+    }
+    
+    for(i=0;i<4;++i)
+        gnn_advance();
 
-	// add bytes to the NALU until the end is found
-	segment_start = ring_pos;
-	while(input_remain) 
-	{
-		if((!ring_buf[ring_pos]) &&
-				(!ring_buf[(ring_pos+1)&RING_MOD]) &&
-				(!ring_buf[(ring_pos+2)&RING_MOD]))
-			break;
-		ring_pos = (ring_pos+1)&RING_MOD;
-		--input_remain;
-		if(ring_pos==0) 
-		{
-			gnn_add_segment(RING_BUF_SIZE);
-			input_read(&ring_buf[HALF_RING],HALF_RING);
-		}
-		if(ring_pos==HALF_RING)
-		{
-			gnn_add_segment(HALF_RING);
-			input_read(&ring_buf[0],HALF_RING);
-		}
-	}
+    /* add bytes to the NALU until the end is found */
+    segment_start = ring_pos;
+    while (input_remain) {
+        if ((!ring_buf[ring_pos]) &&
+           (!ring_buf[(ring_pos+1)&RING_MOD]) &&
+           (!ring_buf[(ring_pos+2)&RING_MOD]))
+            break;
+        ring_pos = (ring_pos+1)&RING_MOD;
+        --input_remain;
+        
+        if (ring_pos==0) {
+            gnn_add_segment(RING_BUF_SIZE);
+            input_read(input_fp, &ring_buf[HALF_RING],HALF_RING);
+        }
 
-	gnn_add_segment(ring_pos);
-	if(!nalu_size) 
-		fclose(input_fd);
+        if (ring_pos==HALF_RING) {
+            gnn_add_segment(HALF_RING);
+            input_read(input_fp, &ring_buf[0], HALF_RING);
+        }
+    }
 
-	// read the NAL unit
-	nal_pos = 0; nal_bit = 0;
-	nalu->forbidden_zero_bit = input_get_bits(1);
-	nalu->nal_ref_idc = input_get_bits(2);
-	nalu->nal_unit_type = input_get_bits(5);
-	nalu->last_rbsp_byte = &nal_buf[nalu_size-1];
-	nalu->NumBytesInNALunit = nalu_size; 
-	return 1;
+    gnn_add_segment(ring_pos);
+
+    /* read the NAL unit */
+    nal_pos = 0; nal_bit = 0;
+    nalu->forbidden_zero_bit = input_get_bits(1);
+    nalu->nal_ref_idc = input_get_bits(2);
+    nalu->nal_unit_type = input_get_bits(5);
+    nalu->last_rbsp_byte = &nal_buf[nalu_size-1];
+    nalu->NumBytesInNALunit = nalu_size; 
+
+    return 1;
 }
 
-int va_FoolGetFrame(char *frame_buf) 
+int va_FoolGetFrame(FILE *input_fp, char *frame_buf) 
 {
-	int i = 0, frame_pos = 0;
-	static slice_header sh; 
-	static nal_unit nalu;
+    int i = 0, frame_pos = 0;
+    static slice_header sh; 
+    static nal_unit nalu;
 
-    //save the current frame number
-	cur_frame_no = frame_no;
-    //read the clip , here is the first frame, let the clip go on frame by frame 
-	if(!frame_no)
-		input_read(ring_buf,RING_BUF_SIZE);
+    /* save the current frame number */
+    cur_frame_no = frame_no;
+    
+    /* read the clip , here is the first frame,
+     * &let the clip go on frame by frame
+     */
+    if (!frame_no)
+        input_read(input_fp, ring_buf,RING_BUF_SIZE);
 
-	while(get_next_nal_unit(&nalu))
-	{
-		if(nalu.nal_unit_type == 1 || nalu.nal_unit_type == 5) 
-		{
-			decode_slice_header(&sh);
-			if(0 == sh.first_mb_in_slice)
-			{
-				++frame_no;
-				frame_pos = 0;
-			}
-			if(frame_no>(cur_frame_no+1))
-				break;
-			memcpy(frame_buf+frame_pos, nal_buf+1, sizeof(char)*(nalu.NumBytesInNALunit-1));
-			frame_pos += nalu.NumBytesInNALunit;
-		}
-	}
-    //close the env_value clip ;if some other area use this clip need to open it again
-	fclose(input_fd);
-	return 1; 
+    while (get_next_nal_unit(input_fp, &nalu)) {
+        if (nalu.nal_unit_type == 1 || nalu.nal_unit_type == 5) {
+            decode_slice_header(&sh);
+            if (0 == sh.first_mb_in_slice) {
+                ++frame_no;
+                frame_pos = 0;
+            }
+            if (frame_no > (cur_frame_no+1))
+                break;
+            memcpy(frame_buf+frame_pos, nal_buf+1, sizeof(char)*(nalu.NumBytesInNALunit-1));
+            frame_pos += nalu.NumBytesInNALunit;
+        }
+    }
+    
+    return 1; 
 }
diff --git a/va/va_getframe.h b/va/va_getframe.h
index 399c8b0..707f8c0 100644
--- a/va/va_getframe.h
+++ b/va/va_getframe.h
@@ -1,25 +1,4 @@
 #ifndef __NAL_H__
 #define __NAL_H__
 
-#define SLICE_NUM 4 
-#define NAL_BUF_SIZE  65536  // maximum NAL unit size
-#define RING_BUF_SIZE  8192  // input ring buffer size, MUST be a power of two!
-
-typedef struct _nal_unit {
-  int NumBytesInNALunit;
-  int forbidden_zero_bit;
-  int nal_ref_idc;
-  int nal_unit_type;
-  unsigned char *last_rbsp_byte;
-} nal_unit;
- typedef struct _slice_header {
-  int first_mb_in_slice;
-} slice_header;
- 
-static int get_next_nal_unit(nal_unit *nalu); 
-static int get_unsigned_exp_golomb();
-static void decode_slice_header(slice_header *sh);
-static void input_read(unsigned char *dest, int size);
-static int input_get_bits(int bit_count);
-int va_FoolGetFrame(char *frame_buf); 
 #endif /*__NAL_H__*/