import time
import sys
import math
import random
'''
Typically, prefix sum idea/approach gives you some
information about a slice of a list, without checking
repeatedly the items in that particular slice.
One of the most simple examples is querying sum of values in the slice.
Given a list L, we create an additional prefix sum list,
before we start querying. Example:
L = [ 3, 2, 5, 0, 2, 1, 4]
prefSumL = [ 3, 5,10,10,12,13,17]
which is just a list of sum of all items in L from the beginning,
up to a particular position.
Code is simple:
'''
def prefSum( L ):
prefSumL = [L[0]]
for i in range(1, len(L)):
prefSumL.append( prefSumL[-1] + L[i] )
return prefSumL
'''
Now, when we are asked to find the sum of all items in a given slice,
say L[1:5], it is enough to subtract
prefSumL[4] - prefSum[0], it equals to 12 - 3 = 9.
Really the sum of L[1:5] is sum of [2,5,0,2] = 9.
Clearly the approach works for any length of the array,
and any length of queried slices, the ammount of work
in finding the sum of a slice is always constant
- just a substraction of two values, nothing more.
The principle can be easily modified for other problems:
For example, how many zeroes are there in a particular slice?
Again, create a list containg the numbers of zeroes
in the original list up to a given position. Example:
L = [1,0,0,0,1,1,0,1,0,0,1,1,0]
prefixZerosCount = [0,1,2,3,3,3,4,4,5,6,6,6,7]
Again, code is simple:
'''
def prefixZerosCount( L ):
prefCount0L = [ int(L[0]==0) ]
for i in range(1, len(L)):
prefCount0L.append( prefCount0L[-1] + int(L[i]==0) )
return prefCount0L
'''
Now the strategy of answering queries is completely analogous
to the previous case.
Note
The case when the queried range starts at position 0,
needs a separate treatment.
If you do not want to apply a separate treatment in this case,
you may instead consider prepending prefixZerosCount list with
a sentinel - a single 0 - describing the numbers of zeros on a
non-existing imaginary position preceding the first position in the real list.
Then, querying number of zeros in a slice L[a:b]
would need to calculate prefixZerosCount[b+1]-prefixZerosCount[a],
always, without distingushing the extra corner case with a == 0.
'''
# --------------------------------------------------
# Prefix sum idea and application, in more complex situation
# Daily road incident statistics:
# A typical query: What was the total number of
# incidents between date1 and date2?
# Regarding the dates in queries:
# -- they are not predictable and
# -- relatively few possible date pairs will be ever queried
# Keeping a table with precomputed no. of incidents for
# each pair (date1, date2) tends to be wasteful.
# Applying prefix sum approach is an effective method.
# For simplicity, the dates are considered to be just
# integers: 0,1,2,3, ....n-1. The date then also serves
# as an index in the list of road incidents. For example,
# the value at position (index) 27 in the list is the
# number of incidents on day 27.
# --------------------------------------------------
# Below are three support functions which generate example data
# used in comparison experiment further below.
# [1] Utility function used to generate the example data.
# Create a list filled with randomly chosen values.
# The seed parameter can be an arbitrary integer value.
def randomFill( n, maxval ):
random.seed(1234321)
randomList = [random.randint(0, maxval) for x in range(n)]
return randomList
# [2] Create a list of length n with each data value in range [0, maxVal].
# Each value is interpreted as number of road incidents in a particular day.
def makeStatistics( maxDate, maxVal ):
return randomFill( maxDate, maxVal )
# [3] Create list of queries, each query is nothing more
# than one randomly chosen pair od dates [date1, date2]
# where 0 <= date1 <= date2 <= maxDate.
def makeQueries( nQueries, maxDate ):
q = []
for i in range( nQueries ):
t1 = random.randint(0, maxDate)
t2 = random.randint(0, maxDate)
q.append( [min(t1, t2), max(t1, t2)] )
return q
# ------------------------------------------------------------
# Comparison experiment
# ------------------------------------------------------------
# In the experiment we want to calculate repeatedly
# the total number of road incidents between various
# dates. We are given a day-by-day list of number of incidents
# in a long time period. Also, we are given a list of queries,
# each query asks how many incidents happened between two dates.
# Eqach query is represented by pair of integers [date1, date2].
# To perform the experiment we create two functions
# processQueriesPlain and processQueriesPrefSum.
# The first first function does not use any precomputation
# or prefix sum array.
# The second method depends on prefix sum array.
# Finally, the function compareMethods performs the whole experiment.
def processQueriesPlain( statData, queries ):
responses = []
for query in queries:
date1 = query[0]
date2 = query[1]
responses.append( sum(statData[date1: date2+1]) )
return responses
# Prefix sum array of intList array
def prefixSum( intList ):
prefSum = intList.copy()
# note the loops starts at positon 1
for i in range( 1, len(intList) ):
prefSum[i] += prefSum[i-1]
return prefSum
def processQueriesPrefSum( statData, queries ):
responses = []
prefSum = prefixSum( statData )
for query in queries:
date1 = query[0]
date2 = query[1]
if date1 == 0:
responses.append( prefSum[date2] )
else:
responses.append( prefSum[date2]-prefSum[date1-1] )
return responses
# ------------------------------------------------------------
# Comparison experiment main function
def compareMethods( dataSize, maxDataVal, nQueries ):
statData = makeStatistics( dataSize, maxDataVal )
queries = makeQueries( nQueries, dataSize-1 )
t1 = time.time()
responses1 = processQueriesPlain( statData, queries )
t2 = time.time()
responses2 = processQueriesPrefSum( statData, queries )
t3 = time.time()
# check small data visually
if dataSize <= 20 and nQueries <= 20:
print( statData )
print( queries )
print( responses1 )
print( responses2 )
print( "Data size ", dataSize )
print( "No of queries ", nQueries )
print( "Responses plain time ", t2-t1 )
print( "Responses prefix sum time ", t3-t2 )
print( )
aList = [11, 16, 9, 2, 14, 1, 17, 4, 11, 4]
prSum = prefixSum( aList )
print("---------------")
print( aList )
print( prSum )
low = 3; high = 6
print( "sum of elems at positions from", low, "to", high, "is", prSum[high] - prSum[low-1] )
# Note that the size of the data has nearly no influence
# on the processing with queries prefix sum time.
# On the other hand, the plain (naive) method suffers
# greatly from increased data size
compareMethods( 10, 20, 6 )
compareMethods( 100, 20, 100000 )
compareMethods( 1000, 20, 100000 )
compareMethods( 10000, 20, 100000 )
# processing the data of size 100 000
# by the plain method takes about 1 minute.
compareMethods( 100000, 20, 100000 )