linux-solo compiles as far as the gamma driver, which seems independently
broken?
diff --git a/src/mesa/drivers/dri/gamma/gamma_context.c b/src/mesa/drivers/dri/gamma/gamma_context.c
index f7db2ad..b27851b 100644
--- a/src/mesa/drivers/dri/gamma/gamma_context.c
+++ b/src/mesa/drivers/dri/gamma/gamma_context.c
@@ -48,9 +48,9 @@
 #include "gamma_vb.h"
 #include "gamma_tris.h"
 
-extern const struct gl_pipeline_stage _gamma_render_stage;
+extern const struct tnl_pipeline_stage _gamma_render_stage;
 
-static const struct gl_pipeline_stage *gamma_pipeline[] = {
+static const struct tnl_pipeline_stage *gamma_pipeline[] = {
    &_tnl_vertex_transform_stage,
    &_tnl_normal_transform_stage,
    &_tnl_lighting_stage,
diff --git a/src/mesa/drivers/dri/gamma/gamma_render.c b/src/mesa/drivers/dri/gamma/gamma_render.c
index b8bf617..d05db93 100644
--- a/src/mesa/drivers/dri/gamma/gamma_render.c
+++ b/src/mesa/drivers/dri/gamma/gamma_render.c
@@ -178,7 +178,7 @@
    tnl->Driver.Render.PrimitiveNotify( ctx, flags & PRIM_MODE_MASK );
    tnl->Driver.Render.BuildVertices( ctx, start, count, ~0 );
    tnl->Driver.Render.PrimTabVerts[flags&PRIM_MODE_MASK]( ctx, start, count, flags );
-   GAMMA_CONTEXT(ctx)->SetupNewInputs = VERT_BIT_CLIP;
+   GAMMA_CONTEXT(ctx)->SetupNewInputs = VERT_BIT_POS;
 }
 
 static const GLuint hw_prim[GL_POLYGON+1] = {
@@ -236,7 +236,7 @@
 
 
 static GLboolean gamma_run_render( GLcontext *ctx,
-				  struct gl_pipeline_stage *stage )
+				  struct tnl_pipeline_stage *stage )
 {
    gammaContextPtr gmesa = GAMMA_CONTEXT(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
@@ -271,9 +271,9 @@
 
 
 static void gamma_check_render( GLcontext *ctx,
-				 struct gl_pipeline_stage *stage )
+				 struct tnl_pipeline_stage *stage )
 {
-   GLuint inputs = VERT_BIT_CLIP | VERT_BIT_COLOR0;
+   GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
 
    if (ctx->RenderMode == GL_RENDER) {
       if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
@@ -293,13 +293,13 @@
 }
 
 
-static void dtr( struct gl_pipeline_stage *stage )
+static void dtr( struct tnl_pipeline_stage *stage )
 {
    (void)stage;
 }
 
 
-const struct gl_pipeline_stage _gamma_render_stage =
+const struct tnl_pipeline_stage _gamma_render_stage =
 {
    "gamma render",
    (_DD_NEW_SEPARATE_SPECULAR |
diff --git a/src/mesa/drivers/dri/gamma/gamma_vb.c b/src/mesa/drivers/dri/gamma/gamma_vb.c
index 2e2f9ed..f4ed08b 100644
--- a/src/mesa/drivers/dri/gamma/gamma_vb.c
+++ b/src/mesa/drivers/dri/gamma/gamma_vb.c
@@ -278,7 +278,7 @@
    if (!newinputs)
       return;
 
-   if (newinputs & VERT_BIT_CLIP) {
+   if (newinputs & VERT_BIT_POS) {
       setup_tab[gmesa->SetupIndex].emit( ctx, start, count, v, stride );
    } else {
       GLuint ind = 0;
diff --git a/src/mesa/drivers/dri/i810/i810context.c b/src/mesa/drivers/dri/i810/i810context.c
index da1764c..d678aab 100644
--- a/src/mesa/drivers/dri/i810/i810context.c
+++ b/src/mesa/drivers/dri/i810/i810context.c
@@ -107,9 +107,9 @@
    NULL
 };
 
-extern const struct gl_pipeline_stage _i810_render_stage;
+extern const struct tnl_pipeline_stage _i810_render_stage;
 
-static const struct gl_pipeline_stage *i810_pipeline[] = {
+static const struct tnl_pipeline_stage *i810_pipeline[] = {
    &_tnl_vertex_transform_stage,
    &_tnl_normal_transform_stage,
    &_tnl_lighting_stage,
diff --git a/src/mesa/drivers/dri/i810/i810render.c b/src/mesa/drivers/dri/i810/i810render.c
index 8d388d8..d1c1adc 100644
--- a/src/mesa/drivers/dri/i810/i810render.c
+++ b/src/mesa/drivers/dri/i810/i810render.c
@@ -105,7 +105,7 @@
    tnl->Driver.Render.BuildVertices( ctx, start, count, ~0 );
    tnl->Driver.Render.PrimTabVerts[flags&PRIM_MODE_MASK]( ctx, start, 
 							  count, flags );
-   I810_CONTEXT(ctx)->SetupNewInputs = VERT_BIT_CLIP;
+   I810_CONTEXT(ctx)->SetupNewInputs = VERT_BIT_POS;
 }
 
 
@@ -137,12 +137,12 @@
 
 
 static GLboolean i810_run_render( GLcontext *ctx,
-				  struct gl_pipeline_stage *stage )
+				  struct tnl_pipeline_stage *stage )
 {
    i810ContextPtr imesa = I810_CONTEXT(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb;
-   GLuint i, length, flags = 0;
+   GLuint i;
 
    /* Don't handle clipping or indexed vertices.
     */
@@ -150,17 +150,21 @@
       return GL_TRUE;
    }
 
-   imesa->SetupNewInputs = VERT_BIT_CLIP;
+   imesa->SetupNewInputs = VERT_BIT_POS;
 
    tnl->Driver.Render.Start( ctx );
 
-   for (i = VB->FirstPrimitive ; !(flags & PRIM_LAST) ; i += length)
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
    {
-      flags = VB->Primitive[i];
-      length= VB->PrimitiveLength[i];
-      if (length)
-	 i810_render_tab_verts[flags & PRIM_MODE_MASK]( ctx, i, i + length,
-							flags );
+      GLuint prim = VB->Primitive[i].mode;
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+
+      if (!length)
+	 continue;
+
+      i810_render_tab_verts[prim & PRIM_MODE_MASK]( ctx, start, start + length,
+						    prim );
    }
 
    tnl->Driver.Render.Finish( ctx );
@@ -169,9 +173,9 @@
 }
 
 
-static void i810_check_render( GLcontext *ctx, struct gl_pipeline_stage *stage )
+static void i810_check_render( GLcontext *ctx, struct tnl_pipeline_stage *stage )
 {
-   GLuint inputs = VERT_BIT_CLIP | VERT_BIT_COLOR0;
+   GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
 
    if (ctx->RenderMode == GL_RENDER) {
       if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
@@ -191,13 +195,13 @@
 }
 
 
-static void dtr( struct gl_pipeline_stage *stage )
+static void dtr( struct tnl_pipeline_stage *stage )
 {
    (void)stage;
 }
 
 
-const struct gl_pipeline_stage _i810_render_stage =
+const struct tnl_pipeline_stage _i810_render_stage =
 {
    "i810 render",
    (_DD_NEW_SEPARATE_SPECULAR |
diff --git a/src/mesa/drivers/dri/i810/i810screen.c b/src/mesa/drivers/dri/i810/i810screen.c
index 9bc784e..9e457f4 100644
--- a/src/mesa/drivers/dri/i810/i810screen.c
+++ b/src/mesa/drivers/dri/i810/i810screen.c
@@ -68,10 +68,10 @@
 {
    drmBufMapPtr retval;
 
-   retval = (drmBufMapPtr)ALIGN_MALLOC(sizeof(drmBufMap));
+   retval = (drmBufMapPtr)MALLOC(sizeof(drmBufMap));
    if(retval == NULL) return NULL;
    memset(retval, 0, sizeof(drmBufMap));
-   retval->list = (drmBufPtr)ALIGN_MALLOC(sizeof(drmBuf) * I810_DMA_BUF_NR);
+   retval->list = (drmBufPtr)MALLOC(sizeof(drmBuf) * I810_DMA_BUF_NR);
    if(retval->list == NULL) {
       Xfree(retval);
       return NULL;
diff --git a/src/mesa/drivers/dri/i810/i810vb.c b/src/mesa/drivers/dri/i810/i810vb.c
index ada7f6d..58979ce 100644
--- a/src/mesa/drivers/dri/i810/i810vb.c
+++ b/src/mesa/drivers/dri/i810/i810vb.c
@@ -383,7 +383,7 @@
    if (!newinputs)
       return;
 
-   if (newinputs & VERT_BIT_CLIP) {
+   if (newinputs & VERT_BIT_POS) {
       setup_tab[imesa->SetupIndex].emit( ctx, start, count, v, stride );
    } else {
       GLuint ind = 0;
diff --git a/src/mesa/drivers/dri/i830/i830_context.c b/src/mesa/drivers/dri/i830/i830_context.c
index 254fa28..36abd2f 100644
--- a/src/mesa/drivers/dri/i830/i830_context.c
+++ b/src/mesa/drivers/dri/i830/i830_context.c
@@ -167,9 +167,9 @@
 };
 
 
-extern const struct gl_pipeline_stage _i830_render_stage;
+extern const struct tnl_pipeline_stage _i830_render_stage;
 
-static const struct gl_pipeline_stage *i830_pipeline[] = {
+static const struct tnl_pipeline_stage *i830_pipeline[] = {
    &_tnl_vertex_transform_stage,
    &_tnl_normal_transform_stage,
    &_tnl_lighting_stage,
diff --git a/src/mesa/drivers/dri/i830/i830_render.c b/src/mesa/drivers/dri/i830/i830_render.c
index 5df05df..f66acd3 100644
--- a/src/mesa/drivers/dri/i830/i830_render.c
+++ b/src/mesa/drivers/dri/i830/i830_render.c
@@ -122,7 +122,7 @@
    tnl->Driver.Render.BuildVertices( ctx, start, count, ~0 );
    tnl->Driver.Render.PrimTabVerts[flags&PRIM_MODE_MASK]( ctx, start, 
 							  count, flags );
-   I830_CONTEXT(ctx)->SetupNewInputs = VERT_BIT_CLIP;
+   I830_CONTEXT(ctx)->SetupNewInputs = VERT_BIT_POS;
 }
 
 
@@ -159,24 +159,27 @@
    int nr_rprims = 0;
    int nr_rverts = 0;
    int rprim = 0;
-   int i = 0, length, flags = 0;
+   int i;
 
    
-   for (i = VB->FirstPrimitive ; !(flags & PRIM_LAST) ; i += length) {
-      flags = VB->Primitive[i];
-      length = VB->PrimitiveLength[i];
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
+   {
+      GLuint prim = VB->Primitive[i].mode;
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+
       if (!length)
 	 continue;
 
-      if (!hw_prim[flags & PRIM_MODE_MASK])
+      if (!hw_prim[prim & PRIM_MODE_MASK])
 	 return GL_FALSE;
 
       nr_prims++;
-      nr_rverts += length * scale_prim[flags & PRIM_MODE_MASK];
+      nr_rverts += length * scale_prim[prim & PRIM_MODE_MASK];
 
-      if (reduced_prim[flags&PRIM_MODE_MASK] != rprim) {
+      if (reduced_prim[prim&PRIM_MODE_MASK] != rprim) {
 	 nr_rprims++;
-	 rprim = reduced_prim[flags&PRIM_MODE_MASK];
+	 rprim = reduced_prim[prim&PRIM_MODE_MASK];
       }
    }
 
@@ -192,7 +195,7 @@
 
 
 static GLboolean i830_run_render( GLcontext *ctx, 
-				 struct gl_pipeline_stage *stage )
+				 struct tnl_pipeline_stage *stage )
 {
    i830ContextPtr imesa = I830_CONTEXT(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
@@ -205,16 +208,21 @@
       return GL_TRUE;
    }
 
-   imesa->SetupNewInputs = VERT_BIT_CLIP;
+   imesa->SetupNewInputs = VERT_BIT_POS;
 
    tnl->Driver.Render.Start( ctx );
    
-   for (i = VB->FirstPrimitive ; !(flags & PRIM_LAST) ; i += length) {
-      flags = VB->Primitive[i];
-      length= VB->PrimitiveLength[i];
-      if (length)
-	 i830_render_tab_verts[flags & PRIM_MODE_MASK]( ctx, i, i + length,
-						        flags );
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
+   {
+      GLuint prim = VB->Primitive[i].mode;
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+
+      if (!length)
+	 continue;
+
+      i830_render_tab_verts[prim & PRIM_MODE_MASK]( ctx, start, start + length,
+						    prim );
    }
       
    tnl->Driver.Render.Finish( ctx );
@@ -224,9 +232,9 @@
 
 
 static void i830_check_render( GLcontext *ctx, 
-			       struct gl_pipeline_stage *stage )
+			       struct tnl_pipeline_stage *stage )
 {
-   GLuint inputs = VERT_BIT_CLIP | VERT_BIT_COLOR0;
+   GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
    if (ctx->RenderMode == GL_RENDER) {
       if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
 	 inputs |= VERT_BIT_COLOR1;
@@ -244,13 +252,13 @@
    stage->inputs = inputs;
 }
 
-static void dtr( struct gl_pipeline_stage *stage )
+static void dtr( struct tnl_pipeline_stage *stage )
 {
    (void)stage;
 }
 
 
-const struct gl_pipeline_stage _i830_render_stage =
+const struct tnl_pipeline_stage _i830_render_stage =
 {
    "i830 render",
    (_DD_NEW_SEPARATE_SPECULAR |
diff --git a/src/mesa/drivers/dri/i830/i830_screen.c b/src/mesa/drivers/dri/i830/i830_screen.c
index 58358eb..cd3c74c 100644
--- a/src/mesa/drivers/dri/i830/i830_screen.c
+++ b/src/mesa/drivers/dri/i830/i830_screen.c
@@ -57,7 +57,7 @@
    drmBufPtr buf;
    int i;
 
-   buffer = ALIGN_MALLOC(I830_DMA_BUF_SZ);
+   buffer = MALLOC(I830_DMA_BUF_SZ);
    if(buffer == NULL) return -1;
    for(i = 0; i < I830_DMA_BUF_NR; i++) {
       buf = &(buffers->list[i]);
@@ -71,10 +71,10 @@
 {
    drmBufMapPtr retval;
 
-   retval = (drmBufMapPtr)ALIGN_MALLOC(sizeof(drmBufMap));
+   retval = (drmBufMapPtr)MALLOC(sizeof(drmBufMap));
    if(retval == NULL) return NULL;
    memset(retval, 0, sizeof(drmBufMap));
-   retval->list = (drmBufPtr)ALIGN_MALLOC(sizeof(drmBuf) * I830_DMA_BUF_NR);
+   retval->list = (drmBufPtr)MALLOC(sizeof(drmBuf) * I830_DMA_BUF_NR);
    if(retval->list == NULL) {
       Xfree(retval);
       return NULL;
diff --git a/src/mesa/drivers/dri/i830/i830_vb.c b/src/mesa/drivers/dri/i830/i830_vb.c
index a7ac054..c9c5408 100644
--- a/src/mesa/drivers/dri/i830/i830_vb.c
+++ b/src/mesa/drivers/dri/i830/i830_vb.c
@@ -445,7 +445,7 @@
    if (!newinputs)
       return;
 
-   if (newinputs & VERT_BIT_CLIP) {
+   if (newinputs & VERT_BIT_POS) {
       setup_tab[imesa->SetupIndex].emit( ctx, start, count, v, stride );
    } else {
       GLuint ind = 0;
diff --git a/src/mesa/drivers/dri/mga/mga_xmesa.c b/src/mesa/drivers/dri/mga/mga_xmesa.c
index 16dc349..49e8cbe 100644
--- a/src/mesa/drivers/dri/mga/mga_xmesa.c
+++ b/src/mesa/drivers/dri/mga/mga_xmesa.c
@@ -239,9 +239,9 @@
 }
 
 
-extern const struct gl_pipeline_stage _mga_render_stage;
+extern const struct tnl_pipeline_stage _mga_render_stage;
 
-static const struct gl_pipeline_stage *mga_pipeline[] = {
+static const struct tnl_pipeline_stage *mga_pipeline[] = {
    &_tnl_vertex_transform_stage, 
    &_tnl_normal_transform_stage, 
    &_tnl_lighting_stage,	
@@ -647,7 +647,7 @@
 
    if (*(dPriv->pStamp) != mmesa->lastStamp) {
       mmesa->lastStamp = *(dPriv->pStamp);
-      mmesa->SetupNewInputs |= VERT_BIT_CLIP;
+      mmesa->SetupNewInputs |= VERT_BIT_POS;
       mmesa->dirty_cliprects = (MGA_FRONT|MGA_BACK);
       mgaUpdateRects( mmesa, (MGA_FRONT|MGA_BACK) );
    }
diff --git a/src/mesa/drivers/dri/mga/mgarender.c b/src/mesa/drivers/dri/mga/mgarender.c
index 516a990..5fb7af9 100644
--- a/src/mesa/drivers/dri/mga/mgarender.c
+++ b/src/mesa/drivers/dri/mga/mgarender.c
@@ -101,7 +101,7 @@
    tnl->Driver.Render.PrimitiveNotify( ctx, flags & PRIM_MODE_MASK );
    tnl->Driver.Render.BuildVertices( ctx, start, count, ~0 );
    tnl->Driver.Render.PrimTabVerts[flags&PRIM_MODE_MASK]( ctx, start, count, flags );
-   MGA_CONTEXT(ctx)->SetupNewInputs |= VERT_BIT_CLIP;
+   MGA_CONTEXT(ctx)->SetupNewInputs |= VERT_BIT_POS;
 }
 
 #define LOCAL_VARS mgaContextPtr mmesa = MGA_CONTEXT(ctx) 
@@ -131,12 +131,12 @@
 
 
 static GLboolean mga_run_render( GLcontext *ctx,
-				  struct gl_pipeline_stage *stage )
+				  struct tnl_pipeline_stage *stage )
 {
    mgaContextPtr mmesa = MGA_CONTEXT(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb; 
-   GLuint i, length, flags = 0;
+   GLuint i;
 
    /* Don't handle clipping or indexed vertices or vertex manipulations.
     */
@@ -147,13 +147,17 @@
    tnl->Driver.Render.Start( ctx );
    mmesa->SetupNewInputs = ~0;      
 
-   for (i = VB->FirstPrimitive ; !(flags & PRIM_LAST) ; i += length)
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
    {
-      flags = VB->Primitive[i];
-      length= VB->PrimitiveLength[i];	
-      if (length)
-	 mga_render_tab_verts[flags & PRIM_MODE_MASK]( ctx, i, i + length,
-						       flags );
+      GLuint prim = VB->Primitive[i].mode;
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+
+      if (!length)
+	 continue;
+
+      mga_render_tab_verts[prim & PRIM_MODE_MASK]( ctx, start, start + length, 
+						   prim);
    } 
 
    tnl->Driver.Render.Finish( ctx );
@@ -162,9 +166,9 @@
 }
 
 
-static void mga_check_render( GLcontext *ctx, struct gl_pipeline_stage *stage )
+static void mga_check_render( GLcontext *ctx, struct tnl_pipeline_stage *stage )
 {
-   GLuint inputs = VERT_BIT_CLIP | VERT_BIT_COLOR0;
+   GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
 
    if (ctx->RenderMode == GL_RENDER) {
       if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) 
@@ -184,13 +188,13 @@
 }
 
 
-static void dtr( struct gl_pipeline_stage *stage )
+static void dtr( struct tnl_pipeline_stage *stage )
 {
    (void)stage;
 }
 
 
-const struct gl_pipeline_stage _mga_render_stage = 
+const struct tnl_pipeline_stage _mga_render_stage = 
 { 
    "mga render",
    (_DD_NEW_SEPARATE_SPECULAR |
diff --git a/src/mesa/drivers/dri/mga/mgavb.c b/src/mesa/drivers/dri/mga/mgavb.c
index 3490686..29b6d05 100644
--- a/src/mesa/drivers/dri/mga/mgavb.c
+++ b/src/mesa/drivers/dri/mga/mgavb.c
@@ -359,7 +359,7 @@
    if (!newinputs)
       return;
 
-   if (newinputs & VERT_BIT_CLIP) {
+   if (newinputs & VERT_BIT_POS) {
       setup_tab[mmesa->SetupIndex].emit( ctx, start, count, v, stride );   
    } else {
       GLuint ind = 0;
diff --git a/src/mesa/drivers/dri/r128/r128_vb.c b/src/mesa/drivers/dri/r128/r128_vb.c
index 7738901..48cdcd5 100644
--- a/src/mesa/drivers/dri/r128/r128_vb.c
+++ b/src/mesa/drivers/dri/r128/r128_vb.c
@@ -379,7 +379,7 @@
    if (!newinputs)
       return;
 
-   if (newinputs & VERT_BIT_CLIP) {
+   if (newinputs & VERT_BIT_POS) {
       setup_tab[rmesa->SetupIndex].emit( ctx, start, count, v, stride );
    } else {
       GLuint ind = 0;
diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c
index 48b438c..5a6586b 100644
--- a/src/mesa/drivers/dri/r200/r200_swtcl.c
+++ b/src/mesa/drivers/dri/r200/r200_swtcl.c
@@ -673,8 +673,7 @@
 		 _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK), 
 		 start, start+length);
 
-      if (length)
-	 tab[prim & PRIM_MODE_MASK]( ctx, start, start + length, flags );
+      tab[prim & PRIM_MODE_MASK]( ctx, start, start + length, flags );
    }
 
    tnl->Driver.Render.Finish( ctx );
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
index 835cecb..7ade6c3 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -147,11 +147,11 @@
     NULL
 };
 
-extern const struct gl_pipeline_stage _radeon_texrect_stage;
-extern const struct gl_pipeline_stage _radeon_render_stage;
-extern const struct gl_pipeline_stage _radeon_tcl_stage;
+extern const struct tnl_pipeline_stage _radeon_texrect_stage;
+extern const struct tnl_pipeline_stage _radeon_render_stage;
+extern const struct tnl_pipeline_stage _radeon_tcl_stage;
 
-static const struct gl_pipeline_stage *radeon_pipeline[] = {
+static const struct tnl_pipeline_stage *radeon_pipeline[] = {
 
    /* Try and go straight to t&l
     */
@@ -392,6 +392,9 @@
    radeonInitState( rmesa );
    radeonInitSwtcl( ctx );
 
+   _mesa_vector4f_alloc( &rmesa->tcl.ObjClean, 0, 
+			 ctx->Const.MaxArrayLockSize, 32 );
+
    rmesa->iw.irq_seq = -1;
    rmesa->irqsEmitted = 0;
    rmesa->do_irqs = (rmesa->radeonScreen->irq && !getenv("RADEON_NO_IRQS"));
@@ -486,6 +489,8 @@
       rmesa->glCtx->DriverCtx = NULL;
       _mesa_destroy_context( rmesa->glCtx );
 
+      _mesa_vector4f_free( &rmesa->tcl.ObjClean );
+
       if (rmesa->state.scissor.pClipRects) {
 	 FREE(rmesa->state.scissor.pClipRects);
 	 rmesa->state.scissor.pClipRects = 0;
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h
index 5f1f965..113d1de 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_context.h
@@ -57,6 +57,8 @@
 #include "radeon_screen.h"
 #include "mm.h"
 
+#include "math/m_vector.h"
+
 /* Flags for software fallback cases */
 /* See correponding strings in radeon_swtcl.c */
 #define RADEON_FALLBACK_TEXTURE		0x0001
@@ -503,6 +505,11 @@
    GLint last_offset;
    GLuint hw_primitive;
 
+   /* Temporary for cases where incoming vertex data is incompatible
+    * with maos code.
+    */
+   GLvector4f ObjClean;
+
    struct radeon_dma_region *aos_components[8];
    GLuint nr_aos_components;
 
@@ -698,12 +705,6 @@
    GLuint Fallback;
    GLuint NewGLState;
 
-   
-   /* Temporaries for translating away float colors:
-    */
-   struct gl_client_array UbyteColor;
-   struct gl_client_array UbyteSecondaryColor;
-
    /* Vertex buffers
     */
    struct radeon_ioctl ioctl;
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h b/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h
index b379bad..c16234a 100644
--- a/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h
+++ b/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h
@@ -39,13 +39,14 @@
 		       void *dest )
 {
    LOCALVARS
-   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+      struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
    GLuint (*tc0)[4], (*tc1)[4], (*tc2)[4];
+   GLfloat (*col)[4], (*spec)[4];
    GLfloat (*fog)[4];
    GLuint (*norm)[4];
-   GLubyte (*col)[4], (*spec)[4];
    GLuint tc0_stride, tc1_stride, col_stride, spec_stride, fog_stride;
    GLuint tc2_stride, norm_stride;
+   GLuint fill_tex = 0;
    GLuint (*coord)[4];
    GLuint coord_stride; /* object coordinates */
    GLubyte dummy[4];
@@ -56,36 +57,20 @@
    if (RADEON_DEBUG & DEBUG_VERTS)
       fprintf(stderr, "%s\n", __FUNCTION__); 
 
-   /* The vertex code expects Obj to be clean to element 3.  To fix
-    * this, add more vertex code (for obj-2, obj-3) or preferably move
-    * to maos.  
-    */
-   if (VB->ObjPtr->size < 3) {
-      if (VB->ObjPtr->flags & VEC_NOT_WRITEABLE) {
-	 VB->import_data( ctx, VERT_BIT_POS, VEC_NOT_WRITEABLE );
-      }
-      _mesa_vector4f_clean_elem( VB->ObjPtr, VB->Count, 2 );
-   }
-
-   if (DO_W && VB->ObjPtr->size < 4) {
-      if (VB->ObjPtr->flags & VEC_NOT_WRITEABLE) {
-	 VB->import_data( ctx, VERT_BIT_POS, VEC_NOT_WRITEABLE );
-      }
-      _mesa_vector4f_clean_elem( VB->ObjPtr, VB->Count, 3 );
-   }
-
    coord = (GLuint (*)[4])VB->ObjPtr->data;
    coord_stride = VB->ObjPtr->stride;
 
    if (DO_TEX2) {
-      const GLuint t2 = GET_TEXSOURCE(2);
-      tc2 = (GLuint (*)[4])VB->TexCoordPtr[t2]->data;
-      tc2_stride = VB->TexCoordPtr[t2]->stride;
-      if (DO_PTEX && VB->TexCoordPtr[t2]->size < 4) {
-	 if (VB->TexCoordPtr[t2]->flags & VEC_NOT_WRITEABLE) {
-	    VB->import_data( ctx, VERT_BIT_TEX2, VEC_NOT_WRITEABLE );
+      if (VB->TexCoordPtr[2]) {
+	 const GLuint t2 = GET_TEXSOURCE(2);
+	 tc2 = (GLuint (*)[4])VB->TexCoordPtr[t2]->data;
+	 tc2_stride = VB->TexCoordPtr[t2]->stride;
+	 if (DO_PTEX && VB->TexCoordPtr[t2]->size < 4) {
+	    fill_tex |= (1<<2);
 	 }
-	 _mesa_vector4f_clean_elem( VB->TexCoordPtr[t2], VB->Count, 3 );
+      } else {
+	 tc2 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX2];
+	 tc2_stride = 0;
       }
    }
 
@@ -95,13 +80,10 @@
 	 tc1 = (GLuint (*)[4])VB->TexCoordPtr[t1]->data;
 	 tc1_stride = VB->TexCoordPtr[t1]->stride;
 	 if (DO_PTEX && VB->TexCoordPtr[t1]->size < 4) {
-	    if (VB->TexCoordPtr[t1]->flags & VEC_NOT_WRITEABLE) {
-	       VB->import_data( ctx, VERT_BIT_TEX1, VEC_NOT_WRITEABLE );
-	    }
-	    _mesa_vector4f_clean_elem( VB->TexCoordPtr[t1], VB->Count, 3 );
+	    fill_tex |= (1<<1);
 	 }
       } else {
-	 tc1 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX1]; /* could be anything, really */
+	 tc1 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX1];
 	 tc1_stride = 0;
       }
    }
@@ -112,13 +94,10 @@
 	 tc0_stride = VB->TexCoordPtr[t0]->stride;
 	 tc0 = (GLuint (*)[4])VB->TexCoordPtr[t0]->data;
 	 if (DO_PTEX && VB->TexCoordPtr[t0]->size < 4) {
-	    if (VB->TexCoordPtr[t0]->flags & VEC_NOT_WRITEABLE) {
-	       VB->import_data( ctx, VERT_BIT_TEX0, VEC_NOT_WRITEABLE );
-	    }
-	    _mesa_vector4f_clean_elem( VB->TexCoordPtr[t0], VB->Count, 3 );
+	    fill_tex |= (1<<0);
 	 }
       } else {
-	 tc0 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX0]; /* could be anything, really */
+	 tc0 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX0];
 	 tc0_stride = 0;
       }
 	 
@@ -136,28 +115,20 @@
 
    if (DO_RGBA) {
       if (VB->ColorPtr[0]) {
-	 /* This is incorrect when colormaterial is enabled:
-	  */
-	 if (VB->ColorPtr[0]->Type != GL_UNSIGNED_BYTE) {
-	    if (0) fprintf(stderr, "IMPORTING FLOAT COLORS\n");
-	    IMPORT_FLOAT_COLORS( ctx );
-	 }
-	 col = (GLubyte (*)[4])VB->ColorPtr[0]->Ptr;
-	 col_stride = VB->ColorPtr[0]->StrideB;
+	 col = VB->ColorPtr[0]->data;
+	 col_stride = VB->ColorPtr[0]->stride;
       } else {
-	 col = &dummy; /* any old memory is fine */
+	 col = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
 	 col_stride = 0;
       }
    }
 
    if (DO_SPEC) {
       if (VB->SecondaryColorPtr[0]) {
-	 if (VB->SecondaryColorPtr[0]->Type != GL_UNSIGNED_BYTE)
-	    IMPORT_FLOAT_SPEC_COLORS( ctx );
-	 spec = (GLubyte (*)[4])VB->SecondaryColorPtr[0]->Ptr;
-	 spec_stride = VB->SecondaryColorPtr[0]->StrideB;
+	 spec = VB->SecondaryColorPtr[0]->data;
+	 spec_stride = VB->SecondaryColorPtr[0]->stride;
       } else {
-	 spec = &dummy;
+	 spec = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR1];
 	 spec_stride = 0;
       }
    }
@@ -173,33 +144,33 @@
    }
    
    
-   if (VB->importable_data) {
-      if (start) {
-	 coord =  (GLuint (*)[4])((GLubyte *)coord + start * coord_stride);
-	 if (DO_TEX0)
-	    tc0 =  (GLuint (*)[4])((GLubyte *)tc0 + start * tc0_stride);
-	 if (DO_TEX1) 
-	    tc1 =  (GLuint (*)[4])((GLubyte *)tc1 + start * tc1_stride);
-	 if (DO_TEX2) 
-	    tc2 =  (GLuint (*)[4])((GLubyte *)tc2 + start * tc2_stride);
-	 if (DO_NORM) 
-	    norm =  (GLuint (*)[4])((GLubyte *)norm + start * norm_stride);
-	 if (DO_RGBA) 
-	    STRIDE_4UB(col, start * col_stride);
-	 if (DO_SPEC)
-	    STRIDE_4UB(spec, start * spec_stride);
-	 if (DO_FOG)
-	    fog =  (GLfloat (*)[4])((GLubyte *)fog + start * fog_stride);
-      }
+   if (start) {
+      coord =  (GLuint (*)[4])((GLubyte *)coord + start * coord_stride);
+      if (DO_TEX0)
+	 tc0 =  (GLuint (*)[4])((GLubyte *)tc0 + start * tc0_stride);
+      if (DO_TEX1) 
+	 tc1 =  (GLuint (*)[4])((GLubyte *)tc1 + start * tc1_stride);
+      if (DO_TEX2) 
+	 tc2 =  (GLuint (*)[4])((GLubyte *)tc2 + start * tc2_stride);
+      if (DO_NORM) 
+	 norm =  (GLuint (*)[4])((GLubyte *)norm + start * norm_stride);
+      if (DO_RGBA) 
+	 STRIDE_4F(col, start * col_stride);
+      if (DO_SPEC)
+	 STRIDE_4F(spec, start * spec_stride);
+      if (DO_FOG)
+	 STRIDE_4F(fog, start * fog_stride);
+   }
 
+
+   {
       for (i=start; i < end; i++) {
+	 
 	 v[0].ui = coord[0][0];
 	 v[1].ui = coord[0][1];
 	 v[2].ui = coord[0][2];
-	 if (TCL_DEBUG) fprintf(stderr, "%d: %.2f %.2f %.2f ", i, v[0].f, v[1].f, v[2].f);
 	 if (DO_W) {
 	    v[3].ui = coord[0][3];
-	    if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[3].f);
 	    v += 4;
 	 } 
 	 else
@@ -210,26 +181,27 @@
 	    v[0].ui = norm[0][0];
 	    v[1].ui = norm[0][1];
 	    v[2].ui = norm[0][2];
-	    if (TCL_DEBUG) fprintf(stderr, "norm: %.2f %.2f %.2f ", v[0].f, v[1].f, v[2].f);
 	    v += 3;
 	    norm =  (GLuint (*)[4])((GLubyte *)norm +  norm_stride);
 	 }
 	 if (DO_RGBA) {
-	    v[0].ui = LE32_TO_CPU(*(GLuint *)&col[0]);
-	    STRIDE_4UB(col, col_stride);
-	    if (TCL_DEBUG) fprintf(stderr, "%x ", v[0].ui);
+	    UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.red, col[0][0]);
+	    UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.green, col[0][1]);
+	    UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.blue, col[0][2]);
+	    UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.alpha, col[0][3]);
+	    STRIDE_4F(col, col_stride);
 	    v++;
 	 }
 	 if (DO_SPEC || DO_FOG) {
 	    if (DO_SPEC) {
-	       v[0].specular.red   = spec[0][0];
-	       v[0].specular.green = spec[0][1];
-	       v[0].specular.blue  = spec[0][2];
-	       STRIDE_4UB(spec, spec_stride);
+	       UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.red, spec[0][0]);
+	       UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.green, spec[0][1]);
+	       UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.blue, spec[0][2]);
+	       STRIDE_4F(spec, spec_stride);
 	    }
 	    if (DO_FOG) {
-	       v[0].specular.alpha = fog[0][0] * 255.0;
-               fog = (GLfloat (*)[4])((GLubyte *)fog + fog_stride);
+	       UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.alpha, fog[0][0]);
+	       fog = (GLfloat (*)[4])((GLubyte *)fog + fog_stride);
 	    }
 	    if (TCL_DEBUG) fprintf(stderr, "%x ", v[0].ui);
 	    v++;
@@ -239,7 +211,10 @@
 	    v[1].ui = tc0[0][1];
 	    if (TCL_DEBUG) fprintf(stderr, "t0: %.2f %.2f ", v[0].f, v[1].f);
 	    if (DO_PTEX) {
-	       v[2].ui = tc0[0][3];
+	       if (fill_tex & (1<<0))
+		  v[2].f = 1.0;
+	       else
+		  v[2].ui = tc0[0][3];
 	       if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
 	       v += 3;
 	    } 
@@ -252,7 +227,10 @@
 	    v[1].ui = tc1[0][1];
 	    if (TCL_DEBUG) fprintf(stderr, "t1: %.2f %.2f ", v[0].f, v[1].f);
 	    if (DO_PTEX) {
-	       v[2].ui = tc1[0][3];
+	       if (fill_tex & (1<<1))
+		  v[2].f = 1.0;
+	       else
+		  v[2].ui = tc1[0][3];
 	       if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
 	       v += 3;
 	    } 
@@ -264,7 +242,10 @@
 	    v[0].ui = tc2[0][0];
 	    v[1].ui = tc2[0][1];
 	    if (DO_PTEX) {
-	       v[2].ui = tc2[0][3];
+	       if (fill_tex & (1<<2))
+		  v[2].f = 1.0;
+	       else
+		  v[2].ui = tc2[0][3];
 	       v += 3;
 	    } 
 	    else
@@ -273,71 +254,6 @@
 	 } 
 	 if (TCL_DEBUG) fprintf(stderr, "\n");
       }
-   } else {
-      for (i=start; i < end; i++) {
-	 v[0].ui = coord[i][0];
-	 v[1].ui = coord[i][1];
-	 v[2].ui = coord[i][2];
-	 if (DO_W) {
-	    v[3].ui = coord[i][3];
-	    v += 4;
-	 } 
-	 else
-	    v += 3;
-
-	 if (DO_NORM) {
-	    v[0].ui = norm[i][0];
-	    v[1].ui = norm[i][1];
-	    v[2].ui = norm[i][2];
-	    v += 3;
-	 }
-	 if (DO_RGBA) {
-	    v[0].ui = LE32_TO_CPU(*(GLuint *)&col[i]);
-	    v++;
-	 }
-	 if (DO_SPEC || DO_FOG) {
-	    if (DO_SPEC) {
-	       v[0].specular.red   = spec[i][0];
-	       v[0].specular.green = spec[i][1];
-	       v[0].specular.blue  = spec[i][2];
-	    }
-	    if (DO_FOG) {
-               GLfloat *f = (GLfloat *) ((GLubyte *)fog + fog_stride);
-               v[0].specular.alpha = *f * 255.0;
-	    }
-	    v++;
-	 }
-	 if (DO_TEX0) {
-	    v[0].ui = tc0[i][0];
-	    v[1].ui = tc0[i][1];
-	    if (DO_PTEX) {
-	       v[2].ui = tc0[i][3];
-	       v += 3;
-	    } 
-	    else
-	       v += 2;
-	 }
-	 if (DO_TEX1) {
-	    v[0].ui = tc1[i][0];
-	    v[1].ui = tc1[i][1];
-	    if (DO_PTEX) {
-	       v[2].ui = tc1[i][3];
-	       v += 3;
-	    } 
-	    else
-	       v += 2;
-	 } 
-	 if (DO_TEX2) {
-	    v[0].ui = tc2[i][0];
-	    v[1].ui = tc2[i][1];
-	    if (DO_PTEX) {
-	       v[2].ui = tc2[i][3];
-	       v += 3;
-	    } 
-	    else
-	       v += 2;
-	 } 
-      }
    }
 }
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
index 39b1f57..09e8a35 100644
--- a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
+++ b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
@@ -40,7 +40,6 @@
 #include "array_cache/acache.h"
 #include "tnl/tnl.h"
 #include "tnl/t_pipeline.h"
-#include "tnl/t_imm_debug.h"
 
 #include "radeon_context.h"
 #include "radeon_state.h"
@@ -53,7 +52,7 @@
 
 #define RADEON_TCL_MAX_SETUP 13
 
-union emit_union { float f; GLuint ui; radeon_color_t specular; };
+union emit_union { float f; GLuint ui; radeon_color_t rgba; };
 
 static struct {
    void   (*emit)( GLcontext *, GLuint, GLuint, void * );
@@ -308,6 +307,41 @@
 			      setup_tab[i].vertex_size * 4, 
 			      4);
 
+   /* The vertex code expects Obj to be clean to element 3.  To fix
+    * this, add more vertex code (for obj-2, obj-3) or preferably move
+    * to maos.  
+    */
+   if (VB->ObjPtr->size < 3 || 
+       (VB->ObjPtr->size == 3 && 
+	(setup_tab[i].vertex_format & RADEON_CP_VC_FRMT_W0))) {
+
+      _math_trans_4f( rmesa->tcl.ObjClean.data,
+		      VB->ObjPtr->data,
+		      VB->ObjPtr->stride,
+		      GL_FLOAT,
+		      VB->ObjPtr->size,
+		      0,
+		      VB->Count );
+
+      switch (VB->ObjPtr->size) {
+      case 1:
+	    _mesa_vector4f_clean_elem(&rmesa->tcl.ObjClean, VB->Count, 1);
+      case 2:
+	    _mesa_vector4f_clean_elem(&rmesa->tcl.ObjClean, VB->Count, 2);
+      case 3:
+	 if (setup_tab[i].vertex_format & RADEON_CP_VC_FRMT_W0) {
+	    _mesa_vector4f_clean_elem(&rmesa->tcl.ObjClean, VB->Count, 3);
+	 }
+      case 4:
+      default:
+	 break;
+      }
+
+      VB->ObjPtr = &rmesa->tcl.ObjClean;
+   }
+
+
+
    setup_tab[i].emit( ctx, 0, VB->Count, 
 		      rmesa->tcl.indexed_verts.address + 
 		      rmesa->tcl.indexed_verts.start );
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
index 7b1bbe7..c15f341 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
@@ -2115,10 +2115,30 @@
    radeonVtxfmtInvalidate( ctx );
 }
 
+
+/* A hack.  Need a faster way to find this out.
+ */
+static GLboolean check_material( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLint i;
+
+   for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT; 
+	i < _TNL_ATTRIB_MAT_BACK_INDEXES; 
+	i++)
+      if (tnl->vb.AttribPtr[i] &&
+	  tnl->vb.AttribPtr[i]->stride)
+	 return GL_TRUE;
+
+   return GL_FALSE;
+}
+      
+
 static void radeonWrapRunPipeline( GLcontext *ctx )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLboolean has_material;
 
    if (0)
       fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
@@ -2128,7 +2148,9 @@
    if (rmesa->NewGLState)
       radeonValidateState( ctx );
 
-   if (tnl->vb.Material) {
+   has_material = (ctx->Light.Enabled && check_material( ctx ));
+
+   if (has_material) {
       TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_MATERIAL, GL_TRUE );
    }
 
@@ -2136,7 +2158,7 @@
     */ 
    _tnl_run_pipeline( ctx );
 
-   if (tnl->vb.Material) {
+   if (has_material) {
       TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_MATERIAL, GL_FALSE );
       radeonUpdateMaterial( ctx ); /* not needed any more? */
    }
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
index 926b152..48e57c8 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -44,8 +44,8 @@
 #include "math/m_translate.h"
 #include "tnl/tnl.h"
 #include "tnl/t_context.h"
-#include "tnl/t_imm_exec.h"
 #include "tnl/t_pipeline.h"
+#include "tnl/t_vtx_api.h"	/* for _tnl_FlushVertices */
 
 #include "radeon_context.h"
 #include "radeon_ioctl.h"
@@ -567,7 +567,7 @@
    tnl->Driver.Render.PrimitiveNotify( ctx, flags & PRIM_MODE_MASK );
    tnl->Driver.Render.BuildVertices( ctx, start, count, ~0 );
    tnl->Driver.Render.PrimTabVerts[flags&PRIM_MODE_MASK]( ctx, start, count, flags );
-   RADEON_CONTEXT(ctx)->swtcl.SetupNewInputs = VERT_BIT_CLIP;
+   RADEON_CONTEXT(ctx)->swtcl.SetupNewInputs = VERT_BIT_POS;
 }
 
 static void ELT_FALLBACK( GLcontext *ctx,
@@ -579,7 +579,7 @@
    tnl->Driver.Render.PrimitiveNotify( ctx, flags & PRIM_MODE_MASK );
    tnl->Driver.Render.BuildVertices( ctx, start, count, ~0 );
    tnl->Driver.Render.PrimTabElts[flags&PRIM_MODE_MASK]( ctx, start, count, flags );
-   RADEON_CONTEXT(ctx)->swtcl.SetupNewInputs = VERT_BIT_CLIP;
+   RADEON_CONTEXT(ctx)->swtcl.SetupNewInputs = VERT_BIT_POS;
 }
 
 
@@ -667,7 +667,7 @@
 
 
 static GLboolean radeon_run_render( GLcontext *ctx,
-				    struct gl_pipeline_stage *stage )
+				    struct tnl_pipeline_stage *stage )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
@@ -701,18 +701,22 @@
 	    return GL_TRUE;	/* too many vertices */
    }
 
-   for (i = 0 ; !(flags & PRIM_LAST) ; i += length)
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
    {
-      flags = VB->Primitive[i];
-      length = VB->PrimitiveLength[i];
+      GLuint prim = VB->Primitive[i].mode;
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+
+      if (!length)
+	 continue;
 
       if (RADEON_DEBUG & DEBUG_PRIMS)
-	 fprintf(stderr, "radeon_render.c: prim %s %d..%d\n", 
-		 _mesa_lookup_enum_by_nr(flags & PRIM_MODE_MASK), 
-		 i, i+length);
+	 fprintf(stderr, "r200_render.c: prim %s %d..%d\n", 
+		 _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK), 
+		 start, start+length);
 
       if (length)
-	 tab[flags & PRIM_MODE_MASK]( ctx, i, i + length, flags );
+	 tab[prim & PRIM_MODE_MASK]( ctx, start, start + length, flags );
    }
 
    tnl->Driver.Render.Finish( ctx );
@@ -723,9 +727,9 @@
 
 
 static void radeon_check_render( GLcontext *ctx,
-				 struct gl_pipeline_stage *stage )
+				 struct tnl_pipeline_stage *stage )
 {
-   GLuint inputs = VERT_BIT_POS | VERT_BIT_CLIP | VERT_BIT_COLOR0;
+   GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
 
    if (ctx->RenderMode == GL_RENDER) {
       if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
@@ -745,13 +749,13 @@
 }
 
 
-static void dtr( struct gl_pipeline_stage *stage )
+static void dtr( struct tnl_pipeline_stage *stage )
 {
    (void)stage;
 }
 
 
-const struct gl_pipeline_stage _radeon_render_stage =
+const struct tnl_pipeline_stage _radeon_render_stage =
 {
    "radeon render",
    (_DD_NEW_SEPARATE_SPECULAR |
@@ -784,7 +788,7 @@
 
 
 static GLboolean run_texrect_stage( GLcontext *ctx,
-				    struct gl_pipeline_stage *stage )
+				    struct tnl_pipeline_stage *stage )
 {
    struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage);
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
@@ -826,7 +830,7 @@
 /* Called the first time stage->run() is invoked.
  */
 static GLboolean alloc_texrect_data( GLcontext *ctx,
-				     struct gl_pipeline_stage *stage )
+				     struct tnl_pipeline_stage *stage )
 {
    struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
    struct texrect_stage_data *store;
@@ -848,7 +852,7 @@
 
 
 static void check_texrect( GLcontext *ctx,
-			   struct gl_pipeline_stage *stage )
+			   struct tnl_pipeline_stage *stage )
 {
    GLuint flags = 0;
 
@@ -864,7 +868,7 @@
 }
 
 
-static void free_texrect_data( struct gl_pipeline_stage *stage )
+static void free_texrect_data( struct tnl_pipeline_stage *stage )
 {
    struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage);
    GLuint i;
@@ -879,7 +883,7 @@
 }
 
 
-const struct gl_pipeline_stage _radeon_texrect_stage =
+const struct tnl_pipeline_stage _radeon_texrect_stage =
 {
    "radeon texrect stage",			/* name */
    _NEW_TEXTURE,	/* check_state */
@@ -1271,7 +1275,7 @@
 
 void radeonFlushVertices( GLcontext *ctx, GLuint flags )
 {
-   _tnl_flush_vertices( ctx, flags );
+   _tnl_FlushVertices( ctx, flags );
 
    if (flags & FLUSH_STORED_VERTICES)
       RADEON_NEWPRIM( RADEON_CONTEXT( ctx ) );
@@ -1320,13 +1324,4 @@
       rmesa->swtcl.verts = 0;
    }
 
-   if (rmesa->UbyteSecondaryColor.Ptr) {
-      ALIGN_FREE(rmesa->UbyteSecondaryColor.Ptr);
-      rmesa->UbyteSecondaryColor.Ptr = 0;
-   }
-
-   if (rmesa->UbyteColor.Ptr) {
-      ALIGN_FREE(rmesa->UbyteColor.Ptr);
-      rmesa->UbyteColor.Ptr = 0;
-   }
 }
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
index 651194a..ffa09d1 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@@ -277,7 +277,7 @@
 /* TCL render.
  */
 static GLboolean radeon_run_tcl_render( GLcontext *ctx,
-					struct gl_pipeline_stage *stage )
+					struct tnl_pipeline_stage *stage )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
@@ -297,24 +297,19 @@
 
    rmesa->tcl.Elts = VB->Elts;
 
-   for (i = VB->FirstPrimitive ; !(flags & PRIM_LAST) ; i += length)
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
    {
-      flags = VB->Primitive[i];
-      length = VB->PrimitiveLength[i];
-
-      if (RADEON_DEBUG & DEBUG_PRIMS)
-	 fprintf(stderr, "%s: prim %s %d..%d\n", 
-		 __FUNCTION__,
-		 _mesa_lookup_enum_by_nr(flags & PRIM_MODE_MASK), 
-		 i, i+length);
+      GLuint prim = VB->Primitive[i].mode;
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
 
       if (!length)
 	 continue;
 
       if (rmesa->tcl.Elts)
-	 radeonEmitEltPrimitive( ctx, i, i+length, flags );
+	 radeonEmitEltPrimitive( ctx, start, start+length, prim );
       else
-	 radeonEmitPrimitive( ctx, i, i+length, flags );
+	 radeonEmitPrimitive( ctx, start, start+length, prim );
    }
 
    return GL_FALSE;		/* finished the pipe */
@@ -323,7 +318,7 @@
 
 
 static void radeon_check_tcl_render( GLcontext *ctx,
-				     struct gl_pipeline_stage *stage )
+				     struct tnl_pipeline_stage *stage )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    GLuint inputs = VERT_BIT_POS;
@@ -374,13 +369,13 @@
 }
 
 static void radeon_init_tcl_render( GLcontext *ctx,
-				    struct gl_pipeline_stage *stage )
+				    struct tnl_pipeline_stage *stage )
 {
    stage->check = radeon_check_tcl_render;
    stage->check( ctx, stage );
 }
 
-static void dtr( struct gl_pipeline_stage *stage )
+static void dtr( struct tnl_pipeline_stage *stage )
 {
    (void)stage;
 }
@@ -388,7 +383,7 @@
 
 /* Initial state for tcl stage.  
  */
-const struct gl_pipeline_stage _radeon_tcl_stage =
+const struct tnl_pipeline_stage _radeon_tcl_stage =
 {
    "radeon render",
    (_DD_NEW_SEPARATE_SPECULAR |
diff --git a/src/mesa/drivers/dri/radeon/radeon_vtxfmt.c b/src/mesa/drivers/dri/radeon/radeon_vtxfmt.c
index b613e9e..e8c4e4e 100644
--- a/src/mesa/drivers/dri/radeon/radeon_vtxfmt.c
+++ b/src/mesa/drivers/dri/radeon/radeon_vtxfmt.c
@@ -973,8 +973,8 @@
    vfmt->FogCoordfEXT = _mesa_noop_FogCoordfEXT;
    vfmt->EdgeFlag = _mesa_noop_EdgeFlag;
    vfmt->EdgeFlagv = _mesa_noop_EdgeFlagv;
-   vfmt->Indexi = _mesa_noop_Indexi;
-   vfmt->Indexiv = _mesa_noop_Indexiv;
+   vfmt->Indexf = _mesa_noop_Indexf;
+   vfmt->Indexfv = _mesa_noop_Indexfv;
 
 
    /* Active but unsupported -- fallback if we receive these:
diff --git a/src/mesa/drivers/dri/radeon/radeon_vtxfmt_c.c b/src/mesa/drivers/dri/radeon/radeon_vtxfmt_c.c
index 188e34a..0d5c2bc 100644
--- a/src/mesa/drivers/dri/radeon/radeon_vtxfmt_c.c
+++ b/src/mesa/drivers/dri/radeon/radeon_vtxfmt_c.c
@@ -831,16 +831,10 @@
 {
    vfmt->Color3f = choose_Color3f;
    vfmt->Color3fv = choose_Color3fv;
-   vfmt->Color3ub = choose_Color3ub;
-   vfmt->Color3ubv = choose_Color3ubv;
    vfmt->Color4f = choose_Color4f;
    vfmt->Color4fv = choose_Color4fv;
-   vfmt->Color4ub = choose_Color4ub;
-   vfmt->Color4ubv = choose_Color4ubv;
    vfmt->SecondaryColor3fEXT = choose_SecondaryColor3fEXT;
    vfmt->SecondaryColor3fvEXT = choose_SecondaryColor3fvEXT;
-   vfmt->SecondaryColor3ubEXT = choose_SecondaryColor3ubEXT;
-   vfmt->SecondaryColor3ubvEXT = choose_SecondaryColor3ubvEXT;
    vfmt->MultiTexCoord1fARB = choose_MultiTexCoord1fARB;
    vfmt->MultiTexCoord1fvARB = choose_MultiTexCoord1fvARB;
    vfmt->MultiTexCoord2fARB = choose_MultiTexCoord2fARB;
@@ -855,6 +849,15 @@
    vfmt->Vertex2fv = choose_Vertex2fv;
    vfmt->Vertex3f = choose_Vertex3f;
    vfmt->Vertex3fv = choose_Vertex3fv;
+
+#if 0
+   vfmt->Color3ub = choose_Color3ub;
+   vfmt->Color3ubv = choose_Color3ubv;
+   vfmt->Color4ub = choose_Color4ub;
+   vfmt->Color4ubv = choose_Color4ubv;
+   vfmt->SecondaryColor3ubEXT = choose_SecondaryColor3ubEXT;
+   vfmt->SecondaryColor3ubvEXT = choose_SecondaryColor3ubvEXT;
+#endif
 }
 
 
diff --git a/src/mesa/drivers/dri/sis/sis_vb.c b/src/mesa/drivers/dri/sis/sis_vb.c
index 6d56451..5c17bd3 100644
--- a/src/mesa/drivers/dri/sis/sis_vb.c
+++ b/src/mesa/drivers/dri/sis/sis_vb.c
@@ -361,7 +361,7 @@
    if (!newinputs)
       return;
 
-   if (newinputs & VERT_BIT_CLIP) {
+   if (newinputs & VERT_BIT_POS) {
       setup_tab[smesa->SetupIndex].emit( ctx, start, count, v, stride );
    } else {
       GLuint ind = 0;
diff --git a/src/mesa/drivers/glide/fxdd.c b/src/mesa/drivers/glide/fxdd.c
index 09844a6..386fa5b 100644
--- a/src/mesa/drivers/glide/fxdd.c
+++ b/src/mesa/drivers/glide/fxdd.c
@@ -2,7 +2,7 @@
  * fxDDReadPixels888 does not convert 8A8R8G8B into 5R5G5B
  */
 
-/* $Id: fxdd.c,v 1.100 2003/10/02 17:36:44 brianp Exp $ */
+/* $Id: fxdd.c,v 1.100.2.1 2003/11/21 13:40:21 keithw Exp $ */
 
 /*
  * Mesa 3-D graphics library
@@ -960,7 +960,7 @@
  }
 }
 
-static const struct gl_pipeline_stage *fx_pipeline[] = {
+static const struct tnl_pipeline_stage *fx_pipeline[] = {
    &_tnl_vertex_transform_stage,	/* TODO: Add the fastpath here */
    &_tnl_normal_transform_stage,
    &_tnl_lighting_stage,
diff --git a/src/mesa/drivers/glide/fxvb.c b/src/mesa/drivers/glide/fxvb.c
index da9108b..2923530 100644
--- a/src/mesa/drivers/glide/fxvb.c
+++ b/src/mesa/drivers/glide/fxvb.c
@@ -1,4 +1,4 @@
-/* $Id: fxvb.c,v 1.19 2003/10/02 17:36:45 brianp Exp $ */
+/* $Id: fxvb.c,v 1.19.2.1 2003/11/21 13:40:21 keithw Exp $ */
 
 /*
  * Mesa 3-D graphics library
@@ -301,7 +301,7 @@
    if (!newinputs)
       return;
 
-   if (newinputs & VERT_BIT_CLIP) {
+   if (newinputs & VERT_BIT_POS) {
       setup_tab[fxMesa->SetupIndex].emit( ctx, start, count, v );   
    } else {
       GLuint ind = 0;
diff --git a/src/mesa/main/api_noop.c b/src/mesa/main/api_noop.c
index 74af0fc..f622873 100644
--- a/src/mesa/main/api_noop.c
+++ b/src/mesa/main/api_noop.c
@@ -789,6 +789,7 @@
 }
 
 
+
 /* Build a vertexformat full of things to use outside begin/end pairs.
  * 
  * TODO -- build a whole dispatch table for this purpose, and likewise
diff --git a/src/mesa/main/macros.h b/src/mesa/main/macros.h
index fcb63f5..e0ede08 100644
--- a/src/mesa/main/macros.h
+++ b/src/mesa/main/macros.h
@@ -109,6 +109,8 @@
 #define STRIDE_UI(p, i)  (p = (GLuint *)((GLubyte *)p + i))
 /** Stepping a GLubyte[4] pointer by a byte stride */
 #define STRIDE_4UB(p, i)  (p = (GLubyte (*)[4])((GLubyte *)p + i))
+/** Stepping a GLfloat[4] pointer by a byte stride */
+#define STRIDE_4F(p, i)  (p = (GLfloat (*)[4])((GLubyte *)p + i))
 /** Stepping a GLchan[4] pointer by a byte stride */
 #define STRIDE_4CHAN(p, i)  (p = (GLchan (*)[4])((GLubyte *)p + i))
 /** Stepping a GLchan pointer by a byte stride */
diff --git a/src/mesa/tnl/t_vb_light.c b/src/mesa/tnl/t_vb_light.c
index 88b62cf..d787267 100644
--- a/src/mesa/tnl/t_vb_light.c
+++ b/src/mesa/tnl/t_vb_light.c
@@ -97,7 +97,7 @@
    store->mat_count = 0;
    store->mat_bitmask = 0;
 
-   /* If ColorMaterial enabled, set overwrite effected AttrPtr's with
+   /* If ColorMaterial enabled, overwrite affected AttrPtr's with
     * the color pointer.  This could be done earlier.
     */
    if (ctx->Light.ColorMaterialEnabled) {
@@ -166,7 +166,8 @@
 }
 
 
-static GLboolean run_lighting( GLcontext *ctx, struct tnl_pipeline_stage *stage )
+static GLboolean run_lighting( GLcontext *ctx, 
+			       struct tnl_pipeline_stage *stage )
 {
    struct light_stage_data *store = LIGHT_STAGE_DATA(stage);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
@@ -286,6 +287,11 @@
    _mesa_vector4f_alloc( &store->LitIndex[0], 0, size, 32 );
    _mesa_vector4f_alloc( &store->LitIndex[1], 0, size, 32 );
 
+   store->LitIndex[0].size = 1;
+   store->LitIndex[0].stride = sizeof(GLfloat);
+   store->LitIndex[1].size = 1;
+   store->LitIndex[1].stride = sizeof(GLfloat);
+
    /* Now validate the stage derived data...
     */
    stage->run = run_validate_lighting;
diff --git a/src/mesa/tnl/t_vb_render.c b/src/mesa/tnl/t_vb_render.c
index cdec73b..ec0d11b 100644
--- a/src/mesa/tnl/t_vb_render.c
+++ b/src/mesa/tnl/t_vb_render.c
@@ -306,7 +306,7 @@
    {
       GLint i;
 
-      for (i = 0 ; i < VB->PrimitiveCount ; i ++)
+      for (i = 0 ; i < VB->PrimitiveCount ; i++)
       {
 	 GLuint prim = VB->Primitive[i].mode;
 	 GLuint start = VB->Primitive[i].start;
diff --git a/src/mesa/tnl/t_vtx_exec.c b/src/mesa/tnl/t_vtx_exec.c
index d9f2f66..4c59e5d 100644
--- a/src/mesa/tnl/t_vtx_exec.c
+++ b/src/mesa/tnl/t_vtx_exec.c
@@ -34,6 +34,30 @@
 #include "t_vtx_api.h"
 #include "t_pipeline.h"
 
+static void _tnl_print_vtx( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint count = tnl->vtx.initial_counter - tnl->vtx.counter;
+   GLuint i;
+
+   _mesa_debug(0, "%s: %u vertices %d primitives, %d vertsize\n",
+	       __FUNCTION__,
+               count,
+	       tnl->vtx.prim_count,
+	       tnl->vtx.vertex_size);
+
+   for (i = 0 ; i < tnl->vtx.prim_count ; i++) {
+      struct tnl_prim *prim = &tnl->vtx.prim[i];
+      _mesa_debug(0, "   prim %d: %s %d..%d %s %s\n",
+		  i, 
+		  _mesa_lookup_enum_by_nr(prim->mode & PRIM_MODE_MASK),
+		  prim->start, 
+		  prim->start + prim->count,
+		  (prim->mode & PRIM_BEGIN) ? "BEGIN" : "(wrap)",
+		  (prim->mode & PRIM_END) ? "END" : "(wrap)");
+   }
+}
+
 GLboolean *_tnl_translate_edgeflag( GLcontext *ctx, const GLfloat *data,
 				    GLuint count, GLuint stride )
 {
@@ -241,6 +265,9 @@
 {
    TNLcontext *tnl = TNL_CONTEXT(ctx);
 
+   if (0)
+      _tnl_print_vtx( ctx );
+
    if (tnl->vtx.prim_count && 
        tnl->vtx.counter != tnl->vtx.initial_counter) {
 
diff --git a/src/mesa/tnl_dd/t_dd_vbtmp.h b/src/mesa/tnl_dd/t_dd_vbtmp.h
index 230edf0..14b4a28 100644
--- a/src/mesa/tnl_dd/t_dd_vbtmp.h
+++ b/src/mesa/tnl_dd/t_dd_vbtmp.h
@@ -248,9 +248,9 @@
 	 STRIDE_4F(col, col_stride);
       }
       if (DO_SPEC) {
-	 UNCLAMPED_FLOAT_TO_UBYTE(v->v.specular.red, col[0][0]);
-	 UNCLAMPED_FLOAT_TO_UBYTE(v->v.specular.green, col[0][1]);
-	 UNCLAMPED_FLOAT_TO_UBYTE(v->v.specular.blue, col[0][2]);
+	 UNCLAMPED_FLOAT_TO_UBYTE(v->v.specular.red, spec[0][0]);
+	 UNCLAMPED_FLOAT_TO_UBYTE(v->v.specular.green, spec[0][1]);
+	 UNCLAMPED_FLOAT_TO_UBYTE(v->v.specular.blue, spec[0][2]);
 	 STRIDE_4F(spec, spec_stride);
       }
       if (DO_FOG) {
diff --git a/src/mesa/x86/gen_matypes.c b/src/mesa/x86/gen_matypes.c
index b4e7e6b..e57c732 100644
--- a/src/mesa/x86/gen_matypes.c
+++ b/src/mesa/x86/gen_matypes.c
@@ -178,7 +178,6 @@
    DEFINE( "VERT_BIT_END_VB        ", VERT_BIT_END_VB );
    DEFINE( "VERT_BIT_POINT_SIZE    ", VERT_BIT_POINT_SIZE );
    DEFINE( "VERT_BIT_EYE           ", VERT_BIT_EYE );
-   DEFINE( "VERT_BIT_CLIP          ", VERT_BIT_CLIP );
    printf( "\n" );
    DEFINE( "VERT_BIT_OBJ_23        ", VERT_BIT_OBJ_3 );
    DEFINE( "VERT_BIT_OBJ_234       ", VERT_BIT_OBJ_4 );