studio portable manager sqlite function standard-deviation

portable - Desviación estándar para SQLite



sqlite studio (8)

Busqué en los documentos de SQLite y no pude encontrar nada, pero también busqué en Google y aparecieron algunos resultados.

¿SQLite tiene alguna función integrada de Desviación Estándar?


Agregó un poco de detección de errores en las funciones de Python

class StdevFunc: """ For use as an aggregate function in SQLite """ def __init__(self): self.M = 0.0 self.S = 0.0 self.k = 0 def step(self, value): try: # automatically convert text to float, like the rest of SQLite val = float(value) # if fails, skips this iteration, which also ignores nulls tM = self.M self.k += 1 self.M += ((val - tM) / self.k) self.S += ((val - tM) * (val - self.M)) except: pass def finalize(self): if self.k <= 1: # avoid division by zero return none else: return math.sqrt(self.S / (self.k-1))


Implementé el método de Welford (igual que extension-functions.c ) como un UDF de SQLite:

$db->sqliteCreateAggregate(''stdev'', function (&$context, $row, $data) // step callback { if (isset($context) !== true) // $context is null at first { $context = array ( ''k'' => 0, ''m'' => 0, ''s'' => 0, ); } if (isset($data) === true) // the standard is non-NULL values only { $context[''s''] += ($data - $context[''m'']) * ($data - ($context[''m''] += ($data - $context[''m'']) / ++$context[''k''])); } return $context; }, function (&$context, $row) // fini callback { if ($context[''k''] > 0) // return NULL if no non-NULL values exist { return sqrt($context[''s''] / $context[''k'']); } return null; }, 1);

Eso está en PHP ( $db es el objeto PDO) pero debería ser trivial para portar a otro idioma.

SQLite es tan genial <3



No, busqué en este mismo problema y terminé de hacer los cálculos con mi aplicación (PHP)


Puedes calcular la varianza en SQL:

create table t (row int); insert into t values (1),(2),(3); SELECT AVG((t.row - sub.a) * (t.row - sub.a)) as var from t, (SELECT AVG(row) AS a FROM t) AS sub; 0.666666666666667

Sin embargo, todavía tiene que calcular la raíz cuadrada para obtener la desviación estándar.


Todavía no hay una función stdev incorporada en sqlite. Sin embargo, puede definir (como lo ha hecho Alix) una función agregadora definida por el usuario. Aquí hay un ejemplo completo en Python:

import sqlite3 import math class StdevFunc: def __init__(self): self.M = 0.0 self.S = 0.0 self.k = 1 def step(self, value): if value is None: return tM = self.M self.M += (value - tM) / self.k self.S += (value - tM) * (value - self.M) self.k += 1 def finalize(self): if self.k < 3: return None return math.sqrt(self.S / (self.k-2)) with sqlite3.connect('':memory:'') as con: con.create_aggregate("stdev", 1, StdevFunc) cur = con.cursor() cur.execute("create table test(i)") cur.executemany("insert into test(i) values (?)", [(1,), (2,), (3,), (4,), (5,)]) cur.execute("insert into test(i) values (null)") cur.execute("select avg(i) from test") print("avg: %f" % cur.fetchone()[0]) cur.execute("select stdev(i) from test") print("stdev: %f" % cur.fetchone()[0])

Esto imprimirá:

avg: 3.000000 stdev: 1.581139

Compare con MySQL: http://sqlfiddle.com/#!2/ad42f3/3/0


un pequeño truco

select ((sum(value)*sum(value) - sum(value * value))/((count(*)-1)*(count(*)))) from the_table ;

entonces lo único que queda es calcular sqrt fuera.


#!/usr/bin/python # -*- coding: utf-8 -*- #Values produced by this script can be verified by follwing the steps #found at https://support.microsoft.com/en-us/kb/213930 to Verify #by chosing a non memory based database. import sqlite3 import math import random import os import sys import traceback import random class StdevFunc: def __init__(self): self.M = 0.0 #Mean self.V = 0.0 #Used to Calculate Variance self.S = 0.0 #Standard Deviation self.k = 1 #Population or Small def step(self, value): try: if value is None: return None tM = self.M self.M += (value - tM) / self.k self.V += (value - tM) * (value - self.M) self.k += 1 except Exception as EXStep: pass return None def finalize(self): try: if ((self.k - 1) < 3): return None #Now with our range Calculated, and Multiplied finish the Variance Calculation self.V = (self.V / (self.k-2)) #Standard Deviation is the Square Root of Variance self.S = math.sqrt(self.V) return self.S except Exception as EXFinal: pass return None def Histogram(Population): try: BinCount = 6 More = 0 #a = 1 #For testing Trapping #b = 0 #and Trace Back #c = (a / b) #with Detailed Info #If you want to store the Database #uncDatabase = os.path.join(os.getcwd(),"BellCurve.db3") #con = sqlite3.connect(uncDatabase) #If you want the database in Memory con = sqlite3.connect('':memory:'') #row_factory allows accessing fields by Row and Col Name con.row_factory = sqlite3.Row #Add our Non Persistent, Runtime Standard Deviation Function to the Database con.create_aggregate("Stdev", 1, StdevFunc) #Lets Grab a Cursor cur = con.cursor() #Lets Initialize some tables, so each run with be clear of previous run cur.executescript(''drop table if exists MyData;'') #executescript requires ; at the end of the string cur.execute("create table IF NOT EXISTS MyData(''ID'' INTEGER PRIMARY KEY AUTOINCREMENT, ''Val'' FLOAT)") cur.executescript(''drop table if exists Bins;'') #executescript requires ; at the end of the string cur.execute("create table IF NOT EXISTS Bins(''ID'' INTEGER PRIMARY KEY AUTOINCREMENT, ''Bin'' UNSIGNED INTEGER, ''Val'' FLOAT, ''Frequency'' UNSIGNED BIG INT)") #Lets generate some random data, and insert in to the Database for n in range(0,(Population)): sql = "insert into MyData(Val) values ({0})".format(random.uniform(-1,1)) #If Whole Number Integer greater that value of 2, Range Greater that 1.5 #sql = "insert into MyData(Val) values ({0})".format(random.randint(-1,1)) cur.execute(sql) pass #Now let’s calculate some built in Aggregates, that SQLite comes with cur.execute("select Avg(Val) from MyData") Average = cur.fetchone()[0] cur.execute("select Max(Val) from MyData") Max = cur.fetchone()[0] cur.execute("select Min(Val) from MyData") Min = cur.fetchone()[0] cur.execute("select Count(Val) from MyData") Records = cur.fetchone()[0] #Now let’s get Standard Deviation using our function that we added cur.execute("select Stdev(Val) from MyData") Stdev = cur.fetchone()[0] #And Calculate Range Range = float(abs(float(Max)-float(Min))) if (Stdev == None): print("================================ Data Error ===============================") print(" Insufficient Population Size, Or Bad Data.") print("*****************************************************************************") elif (abs(Max-Min) == 0): print("================================ Data Error ===============================") print(" The entire Population Contains Identical values, Distribution Incalculable.") print("******************************************************************************") else: Bin = [] #Holds the Bin Values Frequency = [] #Holds the Bin Frequency for each Bin #Establish the 1st Bin, which is based on (Standard Deviation * 3) being subtracted from the Mean Bin.append(float((Average - ((3 * Stdev))))) Frequency.append(0) #Establish the remaining Bins, which is basically adding 1 Standard Deviation #for each interation, -3, -2, -1, 1, 2, 3 for b in range(0,(BinCount) + 1): Bin.append((float(Bin[(b)]) + Stdev)) Frequency.append(0) for b in range(0,(BinCount) + 1): #Lets exploit the Database and have it do the hard work calculating distribution #of all the Bins, with SQL''s between operator, but making it left inclusive, right exclusive. sqlBinFreq = "select count(*) as Frequency from MyData where val between {0} and {1} and Val < {2}". / format(float((Bin[b])), float(Bin[(b + 1)]), float(Bin[(b + 1)])) #If the Database Reports Values that fall between the Current Bin, Store the Frequency to a Bins Table. for rowBinFreq in cur.execute(sqlBinFreq): Frequency[(b + 1)] = rowBinFreq[''Frequency''] sqlBinFreqInsert = "insert into Bins (Bin, Val, Frequency) values ({0}, {1}, {2})". / format(b, float(Bin[b]), Frequency[(b)]) cur.execute(sqlBinFreqInsert) #Allthough this Demo is not likley produce values that #fall outside of Standard Distribution #if this demo was to Calculate with real data, we want to know #how many non-Standard data points we have. More = (More + Frequency[b]) More = abs((Records - More)) #Add the More value sqlBinFreqInsert = "insert into Bins (Bin, Val, Frequency) values ({0}, {1}, {2})". / format((BinCount + 1), float(0), More) cur.execute(sqlBinFreqInsert) #Now Report the Analysis print("================================ The Population ==============================") print(" {0} {1} {2} {3} {4} {5}". / format("Size".rjust(10, '' ''), / "Max".rjust(10, '' ''), / "Min".rjust(10, '' ''), / "Mean".rjust(10, '' ''), / "Range".rjust(10, '' ''), / "Stdev".rjust(10, '' ''))) print("Aggregates: {0:10d} {1:10.4f} {2:10.4f} {3:10.4f} {4:10.4f} {5:10.4f}". / format(Population, Max, Min, Average, Range, Stdev)) print("================================= The Bell Curve =============================") LabelString = "{0} {1} {2} {3}". / format("Bin".ljust(8, '' ''), / "Ranges".rjust(8, '' ''), / "Frequency".rjust(8, '' ''), / "Histogram".rjust(6, '' '')) print(LabelString) print("------------------------------------------------------------------------------") #Let''s Paint a Histogram sqlChart = "select * from Bins order by Bin asc" for rowChart in cur.execute(sqlChart): if (rowChart[''Bin''] == 7): #Bin 7 is not really a bin, but where we place the values that did not fit into the #Normal Distribution. This script was tested against Excel''s Bell Curve Example #https://support.microsoft.com/en-us/kb/213930 #and produces the same results. Feel free to test it. BinName = "More" ChartString = "{0:<6} {1:<10} {2:10.0f}". / format(BinName, / "", / More) else: #Theses are the actual bins where values fall within the distribution. BinName = (rowChart[''Bin''] + 1) #Scale the Chart fPercent = ((float(rowChart[''Frequency'']) / float(Records) * 100)) iPrecent = int(math.ceil(fPercent)) ChartString = "{0:<6} {1:10.4f} {2:10.0f} {3}". / format(BinName, / rowChart[''Val''], / rowChart[''Frequency''], / "".rjust(iPrecent, ''#'')) print(ChartString) print("******************************************************************************") #Commit to Database con.commit() #Clean Up cur.close() con.close() except Exception as EXBellCurve: pass TraceInfo = traceback.format_exc() raise Exception(TraceInfo)