Reputation: 13
I have a javascript that assigns a certain bucket to a customer according to the algorithm number and test number (function getBuckets). However I am having trouble bringing this function over from the javascript and defining it into standard sql. So I can use it on specific databases.
Here is the Input Schema from Big Query that I am trying to get a bucket number for:
7354430 AS customerId,
4 AS algorithmIndex,
5947 AS testId
Here is what the output should be after running my sql:
customerId,
bucketNumber
Does anyone know how to bring in the getBuckets function from the javascript into big query standard sql in order to get the bucketNumber?
Javascript is below:
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* SHA-1 implementation in JavaScript (c) Chris Veness 2002-2014 / MIT Licence */
/* */
/* - see http://csrc.nist.gov/groups/ST/toolkit/secure_hashing.html */
/* http://csrc.nist.gov/groups/ST/toolkit/examples.html */
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* jshint node:true *//* global define, escape, unescape */
'use strict';
/**
* SHA-1 hash function reference implementation.
*
* @namespace
*/
var Sha1 = {};
/**
* Generates SHA-1 hash of string.
*
* @param {string} msg - (Unicode) string to be hashed.
* @returns {string} Hash of msg as hex character string.
*/
Sha1.hash = function(msg) {
// convert string to UTF-8, as SHA only deals with byte-streams
msg = msg.utf8Encode();
// constants [§4.2.1]
var K = [ 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 ];
// PREPROCESSING
msg += String.fromCharCode(0x80); // add trailing '1' bit (+ 0's padding) to string [§5.1.1]
// convert string msg into 512-bit/16-integer blocks arrays of ints [§5.2.1]
var l = msg.length/4 + 2; // length (in 32-bit integers) of msg + ‘1’ + appended length
var N = Math.ceil(l/16); // number of 16-integer-blocks required to hold 'l' ints
var M = new Array(N);
for (var i=0; i<N; i++) {
M[i] = new Array(16);
for (var j=0; j<16; j++) { // encode 4 chars per integer, big-endian encoding
M[i][j] = (msg.charCodeAt(i*64+j*4)<<24) | (msg.charCodeAt(i*64+j*4+1)<<16) |
(msg.charCodeAt(i*64+j*4+2)<<8) | (msg.charCodeAt(i*64+j*4+3));
} // note running off the end of msg is ok 'cos bitwise ops on NaN return 0
}
// add length (in bits) into final pair of 32-bit integers (big-endian) [§5.1.1]
// note: most significant word would be (len-1)*8 >>> 32, but since JS converts
// bitwise-op args to 32 bits, we need to simulate this by arithmetic operators
M[N-1][14] = ((msg.length-1)*8) / Math.pow(2, 32); M[N-1][14] = Math.floor(M[N-1][14]);
M[N-1][15] = ((msg.length-1)*8) & 0xffffffff;
// set initial hash value [§5.3.1]
var H0 = 0x67452301;
var H1 = 0xefcdab89;
var H2 = 0x98badcfe;
var H3 = 0x10325476;
var H4 = 0xc3d2e1f0;
// HASH COMPUTATION [§6.1.2]
var W = new Array(80); var a, b, c, d, e;
for (var i=0; i<N; i++) {
// 1 - prepare message schedule 'W'
for (var t=0; t<16; t++) W[t] = M[i][t];
for (var t=16; t<80; t++) W[t] = Sha1.ROTL(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1);
// 2 - initialise five working variables a, b, c, d, e with previous hash value
a = H0; b = H1; c = H2; d = H3; e = H4;
// 3 - main loop
for (var t=0; t<80; t++) {
var s = Math.floor(t/20); // seq for blocks of 'f' functions and 'K' constants
var T = (Sha1.ROTL(a,5) + Sha1.f(s,b,c,d) + e + K[s] + W[t]) & 0xffffffff;
e = d;
d = c;
c = Sha1.ROTL(b, 30);
b = a;
a = T;
}
// 4 - compute the new intermediate hash value (note 'addition modulo 2^32')
H0 = (H0+a) & 0xffffffff;
H1 = (H1+b) & 0xffffffff;
H2 = (H2+c) & 0xffffffff;
H3 = (H3+d) & 0xffffffff;
H4 = (H4+e) & 0xffffffff;
}
return Sha1.toHexStr(H0) + Sha1.toHexStr(H1) + Sha1.toHexStr(H2) +
Sha1.toHexStr(H3) + Sha1.toHexStr(H4);
};
/**
* Function 'f' [§4.1.1].
* @private
*/
Sha1.f = function(s, x, y, z) {
switch (s) {
case 0: return (x & y) ^ (~x & z); // Ch()
case 1: return x ^ y ^ z; // Parity()
case 2: return (x & y) ^ (x & z) ^ (y & z); // Maj()
case 3: return x ^ y ^ z; // Parity()
}
};
/**
* Rotates left (circular left shift) value x by n positions [§3.2.5].
* @private
*/
Sha1.ROTL = function(x, n) {
return (x<<n) | (x>>>(32-n));
};
/**
* Hexadecimal representation of a number.
* @private
*/
Sha1.toHexStr = function(n) {
// note can't use toString(16) as it is implementation-dependant,
// and in IE returns signed numbers when used on full words
var s="", v;
for (var i=7; i>=0; i--) { v = (n>>>(i*4)) & 0xf; s += v.toString(16); }
return s;
};
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/** Extend String object with method to encode multi-byte string to utf8
* - monsur.hossa.in/2012/07/20/utf-8-in-javascript.html */
if (typeof String.prototype.utf8Encode == 'undefined') {
String.prototype.utf8Encode = function() {
return unescape( encodeURIComponent( this ) );
};
}
/** Extend String object with method to decode utf8 string to multi-byte */
if (typeof String.prototype.utf8Decode == 'undefined') {
String.prototype.utf8Decode = function() {
try {
return decodeURIComponent( escape( this ) );
} catch (e) {
return this; // invalid UTF-8? return as-is
}
};
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
if (typeof module != 'undefined' && module.exports) module.exports = Sha1; // CommonJs export
if (typeof define == 'function' && define.amd) define([], function() { return Sha1; }); // AMD
Sha1.getDeterministicBuckets = function(customerId,algorithmIndex,testNumber) {
var testDescriptors = {
28:{s:100, d:'1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1'},
29:{s:21, d:'40/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3'}
};
var seed = (customerId+testNumber).toString();
var sha1 = Sha1.hash(seed);
var end = sha1.substr(36)
var c = parseInt(end, 16)/65535.0;
var result = '';
/*
if ( algorithmIndex == 3 )
if ( c < 0.10 ) result = 9;
else if ( c < 0.20 ) result = 8;
else if ( c < 0.30 ) result = 7;
else if ( c < 0.40 ) result = 6;
else if ( c < 0.50 ) result = 5;
else if ( c < 0.60 ) result = 4;
else if ( c < 0.70 ) result = 3;
else if ( c < 0.80 ) result = 2;
else if ( c < 0.90 ) result = 1;
else result = 0;
*/
/* (on 6/3/13) Bad inputs (and/or logged out users) yield a -1. */
if ( customerId == null || algorithmIndex == null || testNumber == null ) {
result = -1;
if( algorithmIndex < 1 || algorithmIndex > 27)
result = 0;
/* 25/25/50 */
}else if ( algorithmIndex == 1 ) {
if ( c < 0.25 ) result = 2;
else if ( c < 0.50 ) result = 1;
else result = 0;
/* 1/99 */
}else if ( algorithmIndex == 2 ) {
if ( c < 0.01 ) result = 1
else result = 0;
/* 10/10/10/... */
}else if ( algorithmIndex == 3 ){
if ( c < 0.10 ) result = 9;
else if ( c < 0.20 ) result = 8;
else if ( c < 0.30 ) result = 7;
else if ( c < 0.40 ) result = 6;
else if ( c < 0.50 ) result = 5;
else if ( c < 0.60 ) result = 4;
else if ( c < 0.70 ) result = 3;
else if ( c < 0.80 ) result = 2;
else if ( c < 0.90 ) result = 1;
else result = 0;
/* 25/25/25/25 */
}else if ( algorithmIndex == 4 ) {
if ( c < 0.25 ) result = 3;
else if ( c < 0.50 ) result = 2;
else if ( c < 0.75 ) result = 1;
else result = 0;
/* 50/50 */
}else if ( algorithmIndex == 5 ) {
if ( c < 0.50 ) result = 1;
else result = 0;
/* 10/90 */
}else if ( algorithmIndex == 6 ) {
if ( c < 0.10 ) result = 1;
else result = 0;
/* 10/10/10/10/10/50 */
}else if ( algorithmIndex == 7 ) {
if ( c < 0.10 ) result = 5;
else if ( c < 0.20 ) result = 4;
else if ( c < 0.30 ) result = 3;
else if ( c < 0.40 ) result = 2;
else if ( c < 0.50 ) result = 1;
else result = 0;
/* 20/20/20/20/20 */
}else if ( algorithmIndex == 8 ){
if ( c < 0.20 ) result = 4;
else if ( c < 0.40 ) result = 3;
else if ( c < 0.60 ) result = 2;
else if ( c < 0.80 ) result = 1;
else result = 0;
/* 96/2/2 */
}else if ( algorithmIndex == 9 ) {
if ( c < 0.02 ) result = 2;
else if ( c < 0.04 ) result = 1;
else result = 0;
/* 80/20 */
}else if ( algorithmIndex == 10 ) {
if ( c < 0.20 ) result = 1;
else result = 0;
/* 12.5/12.5/12.5/etc */
}else if ( algorithmIndex == 11 ) {
if ( c < 0.125 ) result = 7;
else if ( c < 0.250 ) result = 6;
else if ( c < 0.375 ) result = 5;
else if ( c < 0.500 ) result = 4;
else if ( c < 0.625 ) result = 3;
else if ( c < 0.750 ) result = 2;
else if ( c < 0.875 ) result = 1;
else result = 0;
/* 50/10/20/20 */
}else if ( algorithmIndex == 12 ) {
if ( c < 0.20 ) result = 3;
else if ( c < 0.40 ) result = 2;
else if ( c < 0.50 ) result = 1;
else result = 0;
/* 100 */
}else if ( algorithmIndex == 13 ) {
result = 0;
/* 80/2/2/2/2/2/2/2/2/2/2 */
}else if ( algorithmIndex == 14 ) {
if ( c < 0.02 ) result = 10;
else if ( c < 0.04 ) result = 9;
else if ( c < 0.06 ) result = 8;
else if ( c < 0.08 ) result = 7;
else if ( c < 0.10 ) result = 6;
else if ( c < 0.12 ) result = 5;
else if ( c < 0.14 ) result = 4;
else if ( c < 0.16 ) result = 3;
else if ( c < 0.18 ) result = 2;
else if ( c < 0.20 ) result = 1;
else result = 0;
/* 11/11/3/3/3/3/.... */
}else if ( algorithmIndex == 15 ) {
if ( c < 0.03 ) result = 27;
else if ( c < 0.06 ) result = 26;
else if ( c < 0.09 ) result = 25;
else if ( c < 0.12 ) result = 24;
else if ( c < 0.15 ) result = 23;
else if ( c < 0.18 ) result = 22;
else if ( c < 0.21 ) result = 21;
else if ( c < 0.24 ) result = 20;
else if ( c < 0.27 ) result = 19;
else if ( c < 0.30 ) result = 18;
else if ( c < 0.33 ) result = 17;
else if ( c < 0.36 ) result = 16;
else if ( c < 0.39 ) result = 15;
else if ( c < 0.42 ) result = 14;
else if ( c < 0.45 ) result = 13;
else if ( c < 0.48 ) result = 12;
else if ( c < 0.51 ) result = 11;
else if ( c < 0.54 ) result = 10;
else if ( c < 0.57 ) result = 9;
else if ( c < 0.60 ) result = 8;
else if ( c < 0.63 ) result = 7;
else if ( c < 0.66 ) result = 6;
else if ( c < 0.69 ) result = 5;
else if ( c < 0.72 ) result = 4;
else if ( c < 0.75 ) result = 3;
else if ( c < 0.78 ) result = 2;
else if ( c < 0.89 ) result = 1;
else result = 0;
/* 23/23/23/23/8 */
} else if ( algorithmIndex == 16 ) {
if ( c < 0.08 ) result = 4;
else if ( c < 0.31 ) result = 3;
else if ( c < 0.54 ) result = 2;
else if ( c < 0.77 ) result = 1;
else result = 0;
/* 97/0.5/0.5/0.5/0.5/0.5?0.5 */
}else if ( algorithmIndex == 17 ) {
if ( c < 0.005 ) result = 6;
else if ( c < 0.010 ) result = 5;
else if ( c < 0.015 ) result = 4;
else if ( c < 0.020 ) result = 3;
else if ( c < 0.025 ) result = 2;
else if ( c < 0.030 ) result = 1;
else result = 0;
/* 80/10/10 */
} else if ( algorithmIndex == 18 ){
if ( c < 0.10 ) result = 2;
else if ( c < 0.20 ) result = 1;
else result = 0;
/* 70/10/10/10 */
} else if ( algorithmIndex == 19 ){
if ( c < 0.10 ) result = 3;
else if ( c < 0.20 ) result = 2;
else if ( c < 0.30 ) result = 1;
else result = 0;
/* 90/5/5 */
}else if ( algorithmIndex == 20 ){
if ( c < 0.05 ) result = 2;
else if ( c < 0.10 ) result = 1;
else result = 0;
/* 80/5/5/5/5 */
}else if ( algorithmIndex == 21 ){
if ( c < 0.05 ) result = 4;
else if ( c < 0.10 ) result = 3;
else if ( c < 0.15 ) result = 2;
else if ( c < 0.20 ) result = 1;
else result = 0;
/* 45/45/10 */
}else if ( algorithmIndex == 22 ) {
if ( c < 0.10 ) result = 2;
else if ( c < 0.55 ) result = 1;
else result = 0;
/* 5.88 x 17 ... seriously? */
} else if ( algorithmIndex == 23 ){
if ( c < 0.0588 ) result = 16;
else if ( c < 0.1176 ) result = 15
else if ( c < 0.1764 ) result = 14;
else if ( c < 0.2352 ) result = 13;
else if ( c < 0.2940 ) result = 12;
else if ( c < 0.3528 ) result = 11;
else if ( c < 0.4116 ) result = 10;
else if ( c < 0.4704 ) result = 9;
else if ( c < 0.5292 ) result = 8;
else if ( c < 0.5880 ) result = 7;
else if ( c < 0.6468 ) result = 6;
else if ( c < 0.7056 ) result = 5;
else if ( c < 0.7644 ) result = 4;
else if ( c < 0.8232 ) result = 3;
else if ( c < 0.8820 ) result = 2;
else if ( c < 0.9401 ) result = 1;
else result = 0;
/* 97.5/2.5 */
}else if ( algorithmIndex == 24 ) {
if ( c < 0.025 ) result = 1;
else result = 0;
/* 92.5/7.5 */
}else if ( algorithmIndex == 25 ) {
if ( c < 0.075 ) result = 1;
else result = 0;
/* 50/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5 */
/* not sure why this one was setup with results asc vs desc; results were validated and match webstore */
}else if ( algorithmIndex == 26 ) {
if ( c < 0.025 ) result = 1;
else if ( c < 0.050 ) result = 2;
else if ( c < 0.075 ) result = 3;
else if ( c < 0.100 ) result = 4;
else if ( c < 0.125 ) result = 5;
else if ( c < 0.150 ) result = 6;
else if ( c < 0.175 ) result = 7;
else if ( c < 0.200 ) result = 8;
else if ( c < 0.225 ) result = 9;
else if ( c < 0.250 ) result = 10;
else if ( c < 0.275 ) result = 11;
else if ( c < 0.300 ) result = 12;
else if ( c < 0.325 ) result = 13;
else if ( c < 0.350 ) result = 14;
else if ( c < 0.375 ) result = 15;
else if ( c < 0.400 ) result = 16;
else if ( c < 0.425 ) result = 17;
else if ( c < 0.450 ) result = 18;
else if ( c < 0.475 ) result = 19;
else if ( c < 0.500 ) result = 20;
else result = 0;
/* 33.3/33.3/33.3 */
} else if ( algorithmIndex == 27 ) {
if ( c < 0.333 ) result = 2;
else if ( c < 0.666 ) result = 1;
else result = 0;
} else {
if (algorithmIndex in testDescriptors) {
var distributions = testDescriptors[algorithmIndex].d.split('/');
var threshold = 0.0;
result = 0;
for (var i = 1; i < distributions.length; i++) {
threshold += parseFloat(distributions[i])/100.0;
if (c < threshold) {
result = i;
break;
}
}
}
}
return result;
};
function getBuckets(row, emit) {
var result = Sha1.getDeterministicBuckets(row.inCustomerId,row.inAlgorithmIndex,row.inTestNumber);
emit({bucketNumber: result , CustomerId: row.inCustomerId});
};
bigquery.defineFunction(
'getBuckets', // Name of the function exported to SQL
['inCustomerId','inAlgorithmIndex','inTestNumber'], // Names of input columns
[{'name': 'bucketNumber', 'type': 'integer'}, // Output schema
{'name': 'CustomerId', 'type': 'integer'}],
getBuckets // Reference to JavaScript UDF
);
Upvotes: 1
Views: 726
Reputation: 59335
I fixed it for #standardSQL.
First, add these 3 lines at the beginning, to define a JS UDF:
CREATE TEMP FUNCTION getDeterministicBuckets(customerId INT64, algorithmIndex INT64, testId INT64)
RETURNS STRUCT<bucketNumber INT64, CustomerId INT64>
LANGUAGE js AS """
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* SHA-1 implementation in JavaScript (c) Chris Veness 2002-2014 / MIT Licence */
/* */
/* - see http://csrc.nist.gov/groups/ST/toolkit/secure_hashing.html */
/* http://csrc.nist.gov/groups/ST/toolkit/examples.html */
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
At the end, instead of a signature for a function:
emit()
return
insteadLike in:
return {bucketNumber: Sha1.getDeterministicBuckets(customerId, algorithmIndex, testId) , CustomerId: customerId};
""";
That will allow you to call the function like this:
SELECT getDeterministicBuckets(7354430,4,5947) x
Complete working code:
CREATE TEMP FUNCTION getDeterministicBuckets(customerId INT64, algorithmIndex INT64, testId INT64)
RETURNS STRUCT<bucketNumber INT64, CustomerId INT64>
LANGUAGE js AS """
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* SHA-1 implementation in JavaScript (c) Chris Veness 2002-2014 / MIT Licence */
/* */
/* - see http://csrc.nist.gov/groups/ST/toolkit/secure_hashing.html */
/* http://csrc.nist.gov/groups/ST/toolkit/examples.html */
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* jshint node:true *//* global define, escape, unescape */
'use strict';
/**
* SHA-1 hash function reference implementation.
*
* @namespace
*/
var Sha1 = {};
/**
* Generates SHA-1 hash of string.
*
* @param {string} msg - (Unicode) string to be hashed.
* @returns {string} Hash of msg as hex character string.
*/
Sha1.hash = function(msg) {
// convert string to UTF-8, as SHA only deals with byte-streams
msg = msg.utf8Encode();
// constants [§4.2.1]
var K = [ 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 ];
// PREPROCESSING
msg += String.fromCharCode(0x80); // add trailing '1' bit (+ 0's padding) to string [§5.1.1]
// convert string msg into 512-bit/16-integer blocks arrays of ints [§5.2.1]
var l = msg.length/4 + 2; // length (in 32-bit integers) of msg + ‘1’ + appended length
var N = Math.ceil(l/16); // number of 16-integer-blocks required to hold 'l' ints
var M = new Array(N);
for (var i=0; i<N; i++) {
M[i] = new Array(16);
for (var j=0; j<16; j++) { // encode 4 chars per integer, big-endian encoding
M[i][j] = (msg.charCodeAt(i*64+j*4)<<24) | (msg.charCodeAt(i*64+j*4+1)<<16) |
(msg.charCodeAt(i*64+j*4+2)<<8) | (msg.charCodeAt(i*64+j*4+3));
} // note running off the end of msg is ok 'cos bitwise ops on NaN return 0
}
// add length (in bits) into final pair of 32-bit integers (big-endian) [§5.1.1]
// note: most significant word would be (len-1)*8 >>> 32, but since JS converts
// bitwise-op args to 32 bits, we need to simulate this by arithmetic operators
M[N-1][14] = ((msg.length-1)*8) / Math.pow(2, 32); M[N-1][14] = Math.floor(M[N-1][14]);
M[N-1][15] = ((msg.length-1)*8) & 0xffffffff;
// set initial hash value [§5.3.1]
var H0 = 0x67452301;
var H1 = 0xefcdab89;
var H2 = 0x98badcfe;
var H3 = 0x10325476;
var H4 = 0xc3d2e1f0;
// HASH COMPUTATION [§6.1.2]
var W = new Array(80); var a, b, c, d, e;
for (var i=0; i<N; i++) {
// 1 - prepare message schedule 'W'
for (var t=0; t<16; t++) W[t] = M[i][t];
for (var t=16; t<80; t++) W[t] = Sha1.ROTL(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1);
// 2 - initialise five working variables a, b, c, d, e with previous hash value
a = H0; b = H1; c = H2; d = H3; e = H4;
// 3 - main loop
for (var t=0; t<80; t++) {
var s = Math.floor(t/20); // seq for blocks of 'f' functions and 'K' constants
var T = (Sha1.ROTL(a,5) + Sha1.f(s,b,c,d) + e + K[s] + W[t]) & 0xffffffff;
e = d;
d = c;
c = Sha1.ROTL(b, 30);
b = a;
a = T;
}
// 4 - compute the new intermediate hash value (note 'addition modulo 2^32')
H0 = (H0+a) & 0xffffffff;
H1 = (H1+b) & 0xffffffff;
H2 = (H2+c) & 0xffffffff;
H3 = (H3+d) & 0xffffffff;
H4 = (H4+e) & 0xffffffff;
}
return Sha1.toHexStr(H0) + Sha1.toHexStr(H1) + Sha1.toHexStr(H2) +
Sha1.toHexStr(H3) + Sha1.toHexStr(H4);
};
/**
* Function 'f' [§4.1.1].
* @private
*/
Sha1.f = function(s, x, y, z) {
switch (s) {
case 0: return (x & y) ^ (~x & z); // Ch()
case 1: return x ^ y ^ z; // Parity()
case 2: return (x & y) ^ (x & z) ^ (y & z); // Maj()
case 3: return x ^ y ^ z; // Parity()
}
};
/**
* Rotates left (circular left shift) value x by n positions [§3.2.5].
* @private
*/
Sha1.ROTL = function(x, n) {
return (x<<n) | (x>>>(32-n));
};
/**
* Hexadecimal representation of a number.
* @private
*/
Sha1.toHexStr = function(n) {
// note can't use toString(16) as it is implementation-dependant,
// and in IE returns signed numbers when used on full words
var s="", v;
for (var i=7; i>=0; i--) { v = (n>>>(i*4)) & 0xf; s += v.toString(16); }
return s;
};
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/** Extend String object with method to encode multi-byte string to utf8
* - monsur.hossa.in/2012/07/20/utf-8-in-javascript.html */
if (typeof String.prototype.utf8Encode == 'undefined') {
String.prototype.utf8Encode = function() {
return unescape( encodeURIComponent( this ) );
};
}
/** Extend String object with method to decode utf8 string to multi-byte */
if (typeof String.prototype.utf8Decode == 'undefined') {
String.prototype.utf8Decode = function() {
try {
return decodeURIComponent( escape( this ) );
} catch (e) {
return this; // invalid UTF-8? return as-is
}
};
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
if (typeof module != 'undefined' && module.exports) module.exports = Sha1; // CommonJs export
if (typeof define == 'function' && define.amd) define([], function() { return Sha1; }); // AMD
Sha1.getDeterministicBuckets = function(customerId,algorithmIndex,testNumber) {
var testDescriptors = {
28:{s:100, d:'1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1'},
29:{s:21, d:'40/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3'}
};
var seed = (customerId+testNumber).toString();
var sha1 = Sha1.hash(seed);
var end = sha1.substr(36)
var c = parseInt(end, 16)/65535.0;
var result = '';
/*
if ( algorithmIndex == 3 )
if ( c < 0.10 ) result = 9;
else if ( c < 0.20 ) result = 8;
else if ( c < 0.30 ) result = 7;
else if ( c < 0.40 ) result = 6;
else if ( c < 0.50 ) result = 5;
else if ( c < 0.60 ) result = 4;
else if ( c < 0.70 ) result = 3;
else if ( c < 0.80 ) result = 2;
else if ( c < 0.90 ) result = 1;
else result = 0;
*/
/* (on 6/3/13) Bad inputs (and/or logged out users) yield a -1. */
if ( customerId == null || algorithmIndex == null || testNumber == null ) {
result = -1;
if( algorithmIndex < 1 || algorithmIndex > 27)
result = 0;
/* 25/25/50 */
}else if ( algorithmIndex == 1 ) {
if ( c < 0.25 ) result = 2;
else if ( c < 0.50 ) result = 1;
else result = 0;
/* 1/99 */
}else if ( algorithmIndex == 2 ) {
if ( c < 0.01 ) result = 1
else result = 0;
/* 10/10/10/... */
}else if ( algorithmIndex == 3 ){
if ( c < 0.10 ) result = 9;
else if ( c < 0.20 ) result = 8;
else if ( c < 0.30 ) result = 7;
else if ( c < 0.40 ) result = 6;
else if ( c < 0.50 ) result = 5;
else if ( c < 0.60 ) result = 4;
else if ( c < 0.70 ) result = 3;
else if ( c < 0.80 ) result = 2;
else if ( c < 0.90 ) result = 1;
else result = 0;
/* 25/25/25/25 */
}else if ( algorithmIndex == 4 ) {
if ( c < 0.25 ) result = 3;
else if ( c < 0.50 ) result = 2;
else if ( c < 0.75 ) result = 1;
else result = 0;
/* 50/50 */
}else if ( algorithmIndex == 5 ) {
if ( c < 0.50 ) result = 1;
else result = 0;
/* 10/90 */
}else if ( algorithmIndex == 6 ) {
if ( c < 0.10 ) result = 1;
else result = 0;
/* 10/10/10/10/10/50 */
}else if ( algorithmIndex == 7 ) {
if ( c < 0.10 ) result = 5;
else if ( c < 0.20 ) result = 4;
else if ( c < 0.30 ) result = 3;
else if ( c < 0.40 ) result = 2;
else if ( c < 0.50 ) result = 1;
else result = 0;
/* 20/20/20/20/20 */
}else if ( algorithmIndex == 8 ){
if ( c < 0.20 ) result = 4;
else if ( c < 0.40 ) result = 3;
else if ( c < 0.60 ) result = 2;
else if ( c < 0.80 ) result = 1;
else result = 0;
/* 96/2/2 */
}else if ( algorithmIndex == 9 ) {
if ( c < 0.02 ) result = 2;
else if ( c < 0.04 ) result = 1;
else result = 0;
/* 80/20 */
}else if ( algorithmIndex == 10 ) {
if ( c < 0.20 ) result = 1;
else result = 0;
/* 12.5/12.5/12.5/etc */
}else if ( algorithmIndex == 11 ) {
if ( c < 0.125 ) result = 7;
else if ( c < 0.250 ) result = 6;
else if ( c < 0.375 ) result = 5;
else if ( c < 0.500 ) result = 4;
else if ( c < 0.625 ) result = 3;
else if ( c < 0.750 ) result = 2;
else if ( c < 0.875 ) result = 1;
else result = 0;
/* 50/10/20/20 */
}else if ( algorithmIndex == 12 ) {
if ( c < 0.20 ) result = 3;
else if ( c < 0.40 ) result = 2;
else if ( c < 0.50 ) result = 1;
else result = 0;
/* 100 */
}else if ( algorithmIndex == 13 ) {
result = 0;
/* 80/2/2/2/2/2/2/2/2/2/2 */
}else if ( algorithmIndex == 14 ) {
if ( c < 0.02 ) result = 10;
else if ( c < 0.04 ) result = 9;
else if ( c < 0.06 ) result = 8;
else if ( c < 0.08 ) result = 7;
else if ( c < 0.10 ) result = 6;
else if ( c < 0.12 ) result = 5;
else if ( c < 0.14 ) result = 4;
else if ( c < 0.16 ) result = 3;
else if ( c < 0.18 ) result = 2;
else if ( c < 0.20 ) result = 1;
else result = 0;
/* 11/11/3/3/3/3/.... */
}else if ( algorithmIndex == 15 ) {
if ( c < 0.03 ) result = 27;
else if ( c < 0.06 ) result = 26;
else if ( c < 0.09 ) result = 25;
else if ( c < 0.12 ) result = 24;
else if ( c < 0.15 ) result = 23;
else if ( c < 0.18 ) result = 22;
else if ( c < 0.21 ) result = 21;
else if ( c < 0.24 ) result = 20;
else if ( c < 0.27 ) result = 19;
else if ( c < 0.30 ) result = 18;
else if ( c < 0.33 ) result = 17;
else if ( c < 0.36 ) result = 16;
else if ( c < 0.39 ) result = 15;
else if ( c < 0.42 ) result = 14;
else if ( c < 0.45 ) result = 13;
else if ( c < 0.48 ) result = 12;
else if ( c < 0.51 ) result = 11;
else if ( c < 0.54 ) result = 10;
else if ( c < 0.57 ) result = 9;
else if ( c < 0.60 ) result = 8;
else if ( c < 0.63 ) result = 7;
else if ( c < 0.66 ) result = 6;
else if ( c < 0.69 ) result = 5;
else if ( c < 0.72 ) result = 4;
else if ( c < 0.75 ) result = 3;
else if ( c < 0.78 ) result = 2;
else if ( c < 0.89 ) result = 1;
else result = 0;
/* 23/23/23/23/8 */
} else if ( algorithmIndex == 16 ) {
if ( c < 0.08 ) result = 4;
else if ( c < 0.31 ) result = 3;
else if ( c < 0.54 ) result = 2;
else if ( c < 0.77 ) result = 1;
else result = 0;
/* 97/0.5/0.5/0.5/0.5/0.5?0.5 */
}else if ( algorithmIndex == 17 ) {
if ( c < 0.005 ) result = 6;
else if ( c < 0.010 ) result = 5;
else if ( c < 0.015 ) result = 4;
else if ( c < 0.020 ) result = 3;
else if ( c < 0.025 ) result = 2;
else if ( c < 0.030 ) result = 1;
else result = 0;
/* 80/10/10 */
} else if ( algorithmIndex == 18 ){
if ( c < 0.10 ) result = 2;
else if ( c < 0.20 ) result = 1;
else result = 0;
/* 70/10/10/10 */
} else if ( algorithmIndex == 19 ){
if ( c < 0.10 ) result = 3;
else if ( c < 0.20 ) result = 2;
else if ( c < 0.30 ) result = 1;
else result = 0;
/* 90/5/5 */
}else if ( algorithmIndex == 20 ){
if ( c < 0.05 ) result = 2;
else if ( c < 0.10 ) result = 1;
else result = 0;
/* 80/5/5/5/5 */
}else if ( algorithmIndex == 21 ){
if ( c < 0.05 ) result = 4;
else if ( c < 0.10 ) result = 3;
else if ( c < 0.15 ) result = 2;
else if ( c < 0.20 ) result = 1;
else result = 0;
/* 45/45/10 */
}else if ( algorithmIndex == 22 ) {
if ( c < 0.10 ) result = 2;
else if ( c < 0.55 ) result = 1;
else result = 0;
/* 5.88 x 17 ... seriously? */
} else if ( algorithmIndex == 23 ){
if ( c < 0.0588 ) result = 16;
else if ( c < 0.1176 ) result = 15
else if ( c < 0.1764 ) result = 14;
else if ( c < 0.2352 ) result = 13;
else if ( c < 0.2940 ) result = 12;
else if ( c < 0.3528 ) result = 11;
else if ( c < 0.4116 ) result = 10;
else if ( c < 0.4704 ) result = 9;
else if ( c < 0.5292 ) result = 8;
else if ( c < 0.5880 ) result = 7;
else if ( c < 0.6468 ) result = 6;
else if ( c < 0.7056 ) result = 5;
else if ( c < 0.7644 ) result = 4;
else if ( c < 0.8232 ) result = 3;
else if ( c < 0.8820 ) result = 2;
else if ( c < 0.9401 ) result = 1;
else result = 0;
/* 97.5/2.5 */
}else if ( algorithmIndex == 24 ) {
if ( c < 0.025 ) result = 1;
else result = 0;
/* 92.5/7.5 */
}else if ( algorithmIndex == 25 ) {
if ( c < 0.075 ) result = 1;
else result = 0;
/* 50/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5 */
/* not sure why this one was setup with results asc vs desc; results were validated and match webstore */
}else if ( algorithmIndex == 26 ) {
if ( c < 0.025 ) result = 1;
else if ( c < 0.050 ) result = 2;
else if ( c < 0.075 ) result = 3;
else if ( c < 0.100 ) result = 4;
else if ( c < 0.125 ) result = 5;
else if ( c < 0.150 ) result = 6;
else if ( c < 0.175 ) result = 7;
else if ( c < 0.200 ) result = 8;
else if ( c < 0.225 ) result = 9;
else if ( c < 0.250 ) result = 10;
else if ( c < 0.275 ) result = 11;
else if ( c < 0.300 ) result = 12;
else if ( c < 0.325 ) result = 13;
else if ( c < 0.350 ) result = 14;
else if ( c < 0.375 ) result = 15;
else if ( c < 0.400 ) result = 16;
else if ( c < 0.425 ) result = 17;
else if ( c < 0.450 ) result = 18;
else if ( c < 0.475 ) result = 19;
else if ( c < 0.500 ) result = 20;
else result = 0;
/* 33.3/33.3/33.3 */
} else if ( algorithmIndex == 27 ) {
if ( c < 0.333 ) result = 2;
else if ( c < 0.666 ) result = 1;
else result = 0;
} else {
if (algorithmIndex in testDescriptors) {
var distributions = testDescriptors[algorithmIndex].d.split('/');
var threshold = 0.0;
result = 0;
for (var i = 1; i < distributions.length; i++) {
threshold += parseFloat(distributions[i])/100.0;
if (c < threshold) {
result = i;
break;
}
}
}
}
return result;
};
return {bucketNumber: Sha1.getDeterministicBuckets(customerId, algorithmIndex, testId) , CustomerId: customerId};
""";
SELECT getDeterministicBuckets(7354430,4,5947) x
Upvotes: 1
Reputation: 173190
Looks like your JavaScript is SHA-1 implementation in JavaScript
So, how about simply using built-in SHA1 function
Upvotes: 0
Reputation: 5518
If your question is how to call your JS function in the example, you can follow https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions#including-javascript-libraries to:
CREATE OR REPLACE FUNCTION yourDataset.getBuckets(customerId STRING, algorithmIndex FLOAT64, testId STRING)
RETURNS STRING
LANGUAGE js
OPTIONS (
library=["gs://my-bucket/path/to/lib1.js", "gs://my-bucket/path/to/lib2.js"]
)
AS """
return yourJS.getBuckets(...);
"""
Upvotes: 0