| // Copyright ©2016 The gonum Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // +build !noasm,!appengine |
| |
| #include "textflag.h" |
| |
| // func L1Norm(x []float64) float64 |
| TEXT ·L1Norm(SB), NOSPLIT, $0 |
| MOVQ x_base+0(FP), SI // SI = &x |
| MOVQ x_len+8(FP), CX // CX = len(x) |
| XORQ AX, AX // i = 0 |
| PXOR X0, X0 // p_sum_i = 0 |
| PXOR X1, X1 |
| PXOR X2, X2 |
| PXOR X3, X3 |
| PXOR X4, X4 |
| PXOR X5, X5 |
| PXOR X6, X6 |
| PXOR X7, X7 |
| CMPQ CX, $0 // if CX == 0 { return 0 } |
| JE absum_end |
| MOVQ CX, BX |
| ANDQ $7, BX // BX = len(x) % 8 |
| SHRQ $3, CX // CX = floor( len(x) / 8 ) |
| JZ absum_tail_start // if CX == 0 { goto absum_tail_start } |
| |
| absum_loop: // do { |
| // p_sum += max( p_sum + x[i], p_sum - x[i] ) |
| MOVUPS (SI)(AX*8), X8 // X_i = x[i:i+1] |
| MOVUPS 16(SI)(AX*8), X9 |
| MOVUPS 32(SI)(AX*8), X10 |
| MOVUPS 48(SI)(AX*8), X11 |
| ADDPD X8, X0 // p_sum_i += X_i ( positive values ) |
| ADDPD X9, X2 |
| ADDPD X10, X4 |
| ADDPD X11, X6 |
| SUBPD X8, X1 // p_sum_(i+1) -= X_i ( negative values ) |
| SUBPD X9, X3 |
| SUBPD X10, X5 |
| SUBPD X11, X7 |
| MAXPD X1, X0 // p_sum_i = max( p_sum_i, p_sum_(i+1) ) |
| MAXPD X3, X2 |
| MAXPD X5, X4 |
| MAXPD X7, X6 |
| MOVAPS X0, X1 // p_sum_(i+1) = p_sum_i |
| MOVAPS X2, X3 |
| MOVAPS X4, X5 |
| MOVAPS X6, X7 |
| ADDQ $8, AX // i += 8 |
| LOOP absum_loop // } while --CX > 0 |
| |
| // p_sum_0 = \sum_{i=1}^{3}( p_sum_(i*2) ) |
| ADDPD X3, X0 |
| ADDPD X5, X7 |
| ADDPD X7, X0 |
| |
| // p_sum_0[0] = p_sum_0[0] + p_sum_0[1] |
| MOVAPS X0, X1 |
| SHUFPD $0x3, X0, X0 // lower( p_sum_0 ) = upper( p_sum_0 ) |
| ADDSD X1, X0 |
| CMPQ BX, $0 |
| JE absum_end // if BX == 0 { goto absum_end } |
| |
| absum_tail_start: // Reset loop registers |
| MOVQ BX, CX // Loop counter: CX = BX |
| XORPS X8, X8 // X_8 = 0 |
| |
| absum_tail: // do { |
| // p_sum += max( p_sum + x[i], p_sum - x[i] ) |
| MOVSD (SI)(AX*8), X8 // X_8 = x[i] |
| MOVSD X0, X1 // p_sum_1 = p_sum_0 |
| ADDSD X8, X0 // p_sum_0 += X_8 |
| SUBSD X8, X1 // p_sum_1 -= X_8 |
| MAXSD X1, X0 // p_sum_0 = max( p_sum_0, p_sum_1 ) |
| INCQ AX // i++ |
| LOOP absum_tail // } while --CX > 0 |
| |
| absum_end: // return p_sum_0 |
| MOVSD X0, sum+24(FP) |
| RET |