import random
import time

# Searching for a single value few times is best done by built-in
# Python tools and functions
# --- relation "in"
# --- index() function

# ... it is OK to use library functions and data structures
#  BUT it may be also useful to think of  one's own approach

# Searching REPEATEDLY many times, in the same data structure,
# for various items may result in slow performance of the code.
# The reason is that, in fact, both "in" relation and index() function
# rely internally  on a very straightforward "brute force" approach:
# Scan the list(array) from its beginning until the desired item is found.

# For REPETITIVE search(es), there exist much more efficient approaches
# which internally create and manage additional data structures.
# These additional data structures support fast search,
# without scanning the original array (list) for a given item.

# A simplest example of an additional data structure
# is an indicator vector (characteristic vector), utilized in
# Examples 3 below.
# More advanced structure -- hash table -- is used in dictionary.
# It allows the search in a dictionary to be exteremely fast
# and effective.

# Perform the experimnts in the MAIN part below
# to compare the effectivity of various search methods.

# Exercise:
# modify the queryArray functions in such way
#  that each function returns also a list of hits.
#  Each element of the list would be a pair
# (hit value, hit position)

# ----------------------------------------------------------------------
#     U T I L I T I E S
# ----------------------------------------------------------------------

# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .

#  Easy generator of random queries and lists(arrays)

# Python documentation:
#     random.randint(a, b)
#     Return a random integer N such that a <= N <= b.
# Note: 'a <= N <= b' is not exactly a 'range', it includes b.

def randomQueries( Nqueries, loBound, upBound ):
    return [ random.randint(loBound, upBound) for foo in range (Nqueries)]

def randomArray( arrLength, loBound, upBound ):
    return [ random.randint(loBound, upBound) for foo in range (arrLength)]

# Note:
# A variant
#   return random.sample( range(loBound, upBound+1), arrLength )
# does not work when range < Nqueries (it is then not a 'sample')

# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .

# Searching repeatedly for many values might be improved by additional ideas
# like e.g. indicator (characteristic) or histogram vector.

def indicator( arr ):
    indicator = [0] * (max(arr)+1)
    for val in arr:
        indicator[val] = 1
    return indicator

def histogram( arr ):
    indicator = [0] * (max(arr)+1)
    for val in arr:
        indicator[val] += 1
    return indicator

# The major drawback of methods based on characteristic/histogram vector
# is the sensitivity to the range of values in arr.
# If the range is huge, like in e.g. [1, 2, 1000000]
# the characteristic/histogram vector is equally huge, it may even
# overflow the available memory.
# On the other hand, the smaller the range, the more effective these methods are.

# Exercise:
# Modify characteristic/histogram so that they admit also
# negative integers in the given array arr.

# ----------------------------------------------------------------------
#     R E P E A T E D   S E A R C H  0
#     Very elementary DIY version, classical,
#     uses no extra Python support
# ----------------------------------------------------------------------

def queryArray0First( arr, queries ):
    hitsN = 0
    for query in queries:
        # search for first query occurrence in arr
        i = 0
        while True:
            if i >= len(arr):
                break;
            if query == arr[i]:
                hitsN += 1
                break;
            i += 1
    return hitsN

def queryArray0All( arr, queries ):
    hitsN = 0
    for query in queries:
        # search for all query occurrences in arr
        i = 0
        while True:
            if i >= len(arr):
                break;
            if query == arr[i]:
                hitsN += 1
                # break; the only difference from queryArray0First
            i += 1
    return hitsN


# ----------------------------------------------------------------------
#     R E P E A T E D   S E A R C H  1
#     Membership test operation  "in"

# pretty, and possibly deceptive

def queryArray1First( arr, queries ):
    hitsN = 0
    for query in queries:
        if query in arr:
            hitsN += 1
    return hitsN

def queryArray1All( arr, queries ):
    pass
    # It is not possible to detect all occurrences
    # of a query in an array (list) using just 'in' operator

# ----------------------------------------------------------------------
#     R E P E A T E D   S E A R C H  2
#     Detect query location by index() function

# pretty, and also possibly deceptive

def queryArray2First( arr, queries ):
    hitsN = 0
    for query in queries:
        try:
            if arr.index( query ) < len(arr):
                hitsN += 1
        except ValueError:
            # not found, do nothing
            pass

    return hitsN


def queryArray2All( arr,  queries ):
    hitsN = 0
    for query in queries:
        queryPosition = -1  # why -1? Explain!
        while True:
            try:
                queryPosition = arr.index( query, queryPosition + 1 )
                #  the second parameter in .index() defines
                # the start position of the search
            except ValueError:
                # not found, stop searching
                break

            # found
            hitsN += 1

    return hitsN



# ----------------------------------------------------------------------
#     R E P E A T E D   S E A R C H   3
#     Applying indicator and histogram vectors

def queryArray3First( arr, queries ):
    # do precomputing:
    indi = indicator( arr )

    hitsN = 0
    for query in queries:
        if query >= len(indi): # query surely too big
            continue
        if indi[query] == 1:
            hitsN += 1

    return hitsN


def queryArray3All( arr, queries ):
    # do precomputing:
    hist = histogram( arr )
    hitsN = 0
    for query in queries:
        if query >= len(hist): # query surely too big
            continue
        hitsN += hist[query]
    return hitsN


# ----------------------------------------------------------------------
#     R E P E A T E D   S E A R C H   4
#     Use dictionary in the role of histogram


def queryArray4First( arr, queries ):
    # do precomputing:
    dict = {}
    for val in arr:
        if not val in dict:
            dict[val]= 1

    # all queries
    hitsN = 0
    for query in queries:
        if query in dict:
            hitsN += 1

    return hitsN


def queryArray4All( arr, queries ):
    # do precomputing:
    dict = {}
    for val in arr:
        if val in dict:
            dict[val] += 1
        else:
            dict[val]= 1

    # all queries
    hitsN = 0
    for query in queries:
        if query in dict:
            hitsN += dict[query]

    return hitsN


# ----------------------------------------------------------------------
#     M A I N
# ----------------------------------------------------------------------

# -------------------- Experiments: ------------------------------------
# As the length of the array, where the data are stored, grows
# the effectivity of the "in" relation and index() function
# deteriorate very quickly.
# Try setting   arrLen = 5, 10, 100, 1000, 10000, ... etc.
# and see the result.

random.seed( 12312322 ) # arbitrary random seed

# data range:
loBound = 1
upBound = 2000
# data size
arrLen = 1000
queriesN = 100000

print("Generating...", end = "")
arr = randomArray( arrLen, loBound, upBound )
queries = randomQueries( queriesN, loBound, upBound )
print("Done.")
print( "arrLen =", arrLen, "  queriesN =", queriesN )
print( "data range =", [loBound,upBound] )


#arr = [ 2, 10, 9, 10, 10 ]
#queries = [ 8, 9, 10, 11 ]
if len(arr) < 20 : print( arr )
if len(queries) < 20: print( queries )
print()

if False:
    print( "--- 0 --- Classic DIY")
    t1 = time.time()
    NhitsF = queryArray0First( arr, queries )
    t2 = time.time()
    NhitsA = queryArray0All( arr, queries )
    t3 = time.time()
    print( "Number of hits:  First, All:", NhitsF, NhitsA )
    print( "Execution Times: First, All:", "%.2f  %.2f\n" % ((t2-t1), (t3-t2)) )

if True:
    print( "--- 1 --- Use 'in' operation ")
    t1 = time.time()
    NhitsF = queryArray1First( arr, queries )
    t2 = time.time()
    NhitsA = queryArray1All( arr, queries )
    t3 = time.time()
    print( "Number of hits:  First, All:", NhitsF, NhitsA )
    print( "Execution Times: First, All:", "%.2f  XXX%.2f\n" % ((t2-t1), (t3-t2)) )

if True:
    print( "--- 2 --- Use index() function")
    t1 = time.time()
    NhitsF = queryArray2First( arr, queries )
    t2 = time.time()
    NhitsA = queryArray2All( arr, queries )
    t3 = time.time()
    print( "Number of hits:  First, All:", NhitsF, NhitsA )
    print( "Execution Times: First, All:", "%.2f  %.2f\n" % ((t2-t1), (t3-t2)) )

if True:
    print( "--- 3 --- Use indicator/histogram ")
    t1 = time.time()
    NhitsF = queryArray3First( arr, queries )
    t2 = time.time()
    NhitsA = queryArray3All( arr, queries )
    t3 = time.time()
    print( "Number of hits:  First, All:", NhitsF, NhitsA )
    print( "Execution Times: First, All:", "%.2f  %.2f\n" % ((t2-t1), (t3-t2)) )

if True:
    print( "--- 4 --- Use dictionary")
    t1 = time.time()
    NhitsF = queryArray4First( arr, queries )
    t2 = time.time()
    NhitsA = queryArray4All( arr, queries )
    t3 = time.time()
    print( "Number of hits:  First, All:", NhitsF, NhitsA )
    print( "Execution Times: First, All:", "%.2f  %.2f\n" % ((t2-t1), (t3-t2)) )







