code
Maximum sum of a contiguous subsequence
Hi-Lo game and 1D battleships game, with variants
1D array problems
2D array problems
Text files
String manipulation
Searching, Sorting
Recursion I
Recursive tree processing
Queue BFS
Using Matplotlib library
Using CSV files
Asymptotic complexity
Graph processing
Text files solved example

code

Maximum sum of a contiguous subsequence

# ( Part of BE5B33PGE course, FEL CTU, 2019 )
#
#    MAXIMUM SUM OF A CONTIGUOUS SUBSEQUENCE
#
# This a notorious problem in programming because
# it is simple and it illustrates in a dramatic way
# the relative efficiency of a cubic, quadratic
# and linear solutions.
#
# The task is easy to formulate:
# Given a sequence of N numbers find a contiguous
# subsequence which sum of elements is maximum possible.
#
# When the sequence contains only non-negative numbers
# the solution to the problem is obviously the whole original sequence.
# When the sequence contains some negative values
# the solution is not obvious and a suitable algorithm
# has to be applied.
 
 
import random
import time
import sys
 
def printf(format, *args):
    sys.stdout.write(format % args)
 
# ----------------------------------------------------------------------------------
# Cubic, quadratic and linear solution of the problem
# Suppose the sequence is stored in a usual list [...]
 
 
# Naive brute force solution
#   Cubic complexity
# Compute the sums of all countiguous subsequences,
# compare them and return the maximum one.
 
def maxSumCubic( aList ):
    ##  start value
    mmax = aList[0]
    ## search
    for i1 in range( 0, len(aList)-1 ):
        for i2 in range( i1, len(aList)):
            iSum = sum( aList[i1:i2] )
            if iSum > mmax:
                mmax = iSum
                besti1, besti2 = i1, i2
    return  besti1, besti2, mmax
 
# Less naive  solution
#   Quadratic complexity
# For each possible start X of a countiguous subsequence
# set sum to 0 and then scan the sequence from X to the right
# updating the sum by each subsequent element.
# Return the maximum sum found.
 
def maxSumQuadratic( aList ):
    ##  start value
    mmax = aList[0]
    ## search
    for i1 in range( 0, len(aList)-1 ):
        iSum = 0
        for i2 in range( i1, len(aList)):
            iSum  += aList[i2]
            if iSum > mmax:
                mmax = iSum
                besti1, besti2 = i1, i2+1
    return  besti1, besti2, mmax
 
 
# Fast (standard)  solution
#   Linear complexity
# Set the sum to 0. Scan the sequence once, update the sum by the current element.
# Whenever the sum becomes negative, reset it to zero.
# Return the maximum sum found.
 
def maxSumLinear( aList ):
    ##  start value
    mmax = aList[0]; besti1 = 0; besti2 = 0
    ## search
    sum = 0; starti = 0
    for i1 in range( 0, len(aList) ):
        sum += aList[i1]
        if sum < 0:
            sum = 0
            starti = i1+1
        elif sum > mmax:
            mmax = sum
            besti1, besti2 = starti, i1+1
    return  besti1, besti2, mmax
 
# ----------------------------------------------------------------------------------
# Manage sum function calls, aggregate more calls into a single method
 
def runMaxSum( speed, aList ):
    t_start = time.time()
    if   speed == 1:
        besti, bestj, mmax = maxSumLinear( aList )
    elif speed == 2:
        besti, bestj, mmax = maxSumQuadratic( aList )
    elif speed == 3:
        besti, bestj, mmax = maxSumCubic( aList )
    t_end = time.time()
    allTime = t_end - t_start
    return besti, bestj, mmax, allTime
 
def runAllSpeedsOnList(  aList ):
    besti, bestj, mmax, runTimeLinear = runMaxSum( 1, aList )
    besti, bestj, mmax, runTimeQuadratic = runMaxSum( 2, aList )
    besti, bestj, mmax, runTimeCubic = runMaxSum( 3, aList )
    print("Sequence length =", len(aList))
    print("Times   linear, quadratic, cubic")
    printf(" %12.2f %6.2f    %6.2f\n", runTimeLinear, runTimeQuadratic, runTimeCubic )
 
def runLinQuadOnList(  aList ):
    besti, bestj, mmax, runTimeLinear = runMaxSum( 1, aList )
    besti, bestj, mmax, runTimeQuadratic = runMaxSum( 2, aList )
    print("Sequence length =", len(aList))
    print("Times   linear, quadratic ")
    printf(" %12.2f %6.2f \n", runTimeLinear, runTimeQuadratic )
 
def runLinearOnList(  aList ):
    besti, bestj, mmax, runTimeLinear = runMaxSum( 1, aList )
    print("Sequence length =", len(aList))
    print("Time   linear ")
    printf(" %12.2f \n", runTimeLinear )
 
 
# ----------------------------------------------------------------------------------
# Crerate example data and run maxSum functions on the data
 
random.seed( 830 )
 
list10 = [random.randint(-9, 9) for k in range(10) ]
list100 = [random.randint(-9, 9) for k in range(100) ]
list200 = [random.randint(-9, 9) for k in range(200) ]
list400 = [random.randint(-9, 9) for k in range(400) ]
list600 = [random.randint(-9, 9) for k in range(600) ]
list1000 = [random.randint(-9, 9) for k in range(1000) ]
list10000 = [random.randint(-9, 9) for k in range(10000) ]
list100000 = [random.randint(-9, 9) for k in range(100000) ]
#list1000000 = [random.randint(-9, 9) for k in range(1000000) ]
#list10000000 = [random.randint(-9, 9) for k in range(10000000) ]
 
runAllSpeedsOnList( list10 )
runAllSpeedsOnList( list100 )
runAllSpeedsOnList( list200 )
runAllSpeedsOnList( list400 )
runAllSpeedsOnList( list600 )
runAllSpeedsOnList( list1000 )
runLinQuadOnList( list10000 )
runLinearOnList( list100000 )
 
 
# ---------------------------------------------------------------------------------- 
# Determine (without running the program!) how long would it take to
# 1. process the list of length  5 000 by the cubic procedure,
# 2. process the list of length 10 000 by the cubic procedure,
# 3. process the list of length 100 000 by the cubic procedure,
# 4. process the list of length 1 000 000 by the cubic procedure,
# 5. process the list of length 100 000 by the quadratic procedure,
# 6. process the list of length 1 000 000 by the quadratic procedure,
#

Hi-Lo game and 1D battleships game, with variants

# ( Part of BE5B33PGE course, FEL CTU, 2019 )
#
#     The Hi-Lo (meaning High-Low) game
#
# The program chooses an integer unknown to the user.
# The user has to determine the number by inputting her guesses
# to each of which the program reacts in three possible ways:
# It informs the user that her guess is either too high or too low
# or, maybe, spot-on. In the latter case, the user "wins" the game.
#
# The task is to determine the number by minimum number of guesses.
#
 
import random
import time
 
# initialize the random number generator by a random value - system time.
random.seed(time.time())
 
def hilo( minval, maxval ):
    print("Guess a hidden number between", minval,"and", maxval)
    guesses = 0
    hidden = random.randint( minval, maxval )
    while True:
        print("Your guess: ", end = "")
        guess = int( input())
        guesses += 1
        if guess < hidden:
            print(" too low! ")
        elif guess > hidden:
            print(" too high! ")
        else:
            print( "Got it in", guesses, "guesses.")
            break
 
 
# An "extended" version of the High-Low game.
# The program chooses TWO numbers which are unknown
# to the user and which have to be determined.
# User repeatedly inputs ONE number as a guess  and the program
# initially reacts in three possible ways: It informs the user that
# her guess is either greater than both unknown number or it is smaller
# than both numbers or it is between the numbers or, maybe, it is spot-on
# one of the unknown numbers. When the user determines one of the numbers
# the program continues as in the simple case until the user
# determines the second number and this "wins" the game.
 
def hilo2( minval, maxval ):
    print("Guess TWO hidden numbers between", minval,"and", maxval)
    print("Input always a SINGLE value as a guess.")
    guesses = 0
    # generate hidden values
    while True:
        hidden1 = random.randint( minval, maxval )
        hidden2 = random.randint( minval, maxval )
        if hidden1 < hidden2: break
    # state of the game
    found1 = False
    found2 = False
    while True:
        print("Your guess: ", end = "")
        guess = int( input())
        guesses += 1
        if not(found1) and not(found2):
            if guess < hidden1:
                print(" too low! ")
            elif guess == hidden1:
                print("Got one value correct! Now for another one:")
                found1 = True
            elif guess < hidden2:
                print(" somewhere in between!")
            elif guess == hidden2:
                print("Got one value correct! Now for another one:")
                found2 = True
            else:
                print(" too high! ")
        elif found1:
            if guess < hidden2:
                print(" too low! ")
            elif guess > hidden2:
                print(" too high! ")
            else:
                print( "Got it all in", guesses, "guesses.")
                break
        elif found2:
            if guess < hidden1:
                print(" too low! ")
            elif guess > hidden1:
                print(" too high! ")
            else:
                print( "Got it all in", guesses, "guesses.")
                break
 
# ----------------------------------------------------------------------------------
# Run the game
 
hilo( 10, 70 )
# hilo2( 10, 70 )
 
 
# ----------------------------------------------------------------------------------
# Modifications of the game
#
# 1. The program registers the sum of distances of the guesses from the
# hidden value.
# For example, if the hidden value is 24 and the user guesses are 17, 26, 24
# the program totals |17-24| + |26-24| + |24-24| = 7+2+0 = 9.
# The program also prints out the sum of the distances after each guess
# but not after the first or the second guess, as it would be too easy to the user
# to calculate the hidden number.
#
#
# 2. The game is played by two players who take turns.
# The player who first determines the hidden number wins.
# The program alternates between prompting one player and the other player
# to input their guesses.
#
#
# 3. After each guess, there is a 50% probability that the program will
# increase or decrease the hidden number by 1 or 2. The player has no information
# whether this had happened and still has to determine correctly the
# (final) value of the hidden number.
# For example, The hidden number is 24.
#   User guess 22, too low
#   User guess 26, too high, 24 changed to 25
#   User guess 24, too low, 25 changed to 24
#   User guess 25, too high, no change
#   User guess 24, hit 24, player "wins".
#
#
# 4.  The program chooses FIVE hidden numbers. After each guess the
# player is informed how many numbers are above her guess and how many
# numbers are bellow her guess. The game stops when the player had
# hid all hidden numbers.
#
 
# ----------------------------------------------------------------------------------
# More suggestions
#
# 5. 1D battleships
# The program chooses FIVE hidden CONSECUTIVE integers (= a battleship made of 5 pieces).
# The player's task is to determine all of them.
# After each guess the program only informs
# the player if she had or had not hit any of the hidden integers, it does
# not inform her where in the relation to the hidden numbers is her guess located.
# The game ends when all hidden integers have been hit at least once.
#
#
# 6. Modify any of the given variants to be played by two players.
#
#
# 7. Moving 1D battleships
# After each hit, the sequence of the hidden integers (a battleship)
# is moved by 1 to the left or to the right unknown to the user. The program
# remembers which parts of the battleship have been hit.
# For example, the battleship is (11,12,13,14,15) and 12 had been hit. Then
# the battleship is moved 1 to the right ad it becomes (12,13,14,15,16) and
# program registers that, in fact, 13 had been hit.
#

1D array problems

# ( Part of BE5B33PGE course, FEL CTU, 2019 )
#  
# -------------------------------------------------------------------- 
#     1D list/array problems
# -------------------------------------------------------------------- 
#
#   Guidelines:
#
#   1.  Use only arrays/lists to solve the following problems.
#   2.  Write each solution as a function and call the function in the main program
#   3.  If the problems at the top of the list are too easy to you 
#       start at the bottom of the list.
#   4.  Some tasks are marked with (**complexity!**) label.
#       Explain in a few sentences, how the size of the input data
#       affects the execution time of the code. 
#       A typical additional question related to the task might be:
#       "Can the code process the data of size 10^3, 10^4, 10^6, 
#       in about one second? Why?"   
 
 
 
'''
[1] (**complexity!**)
Check if in an array the following holds:
a) the number of leading values 0 is equal to the number of
   the trailing values 0 (ex.: 001...100 -- OK, 01...0001 -- bad)
b) each value 1 is surrounded by values 0 (ex ...010... OK, ...11... bad ) 
c) each value 1 is followed by at least two values 0.
 
[2]
Write a method zerosToStart(aa) which will move all zero values in the array aa
to the beginning of the array. The order of the nonzero elements will not be changed. In addition, the
function should not declare any other array and should do the task in linear time.
Ex.:  (3,0,0,2,1,0,7,5)  -->  (0,0,0,3,2,1,7,5)
 
[3] (**complexity!**)
Write a  function H(aa) which will substitute some elements in the array aa by the minimum
value of the array aa. An element, say E, should be substituted 
when its value is bigger than the value of all elements which index is bigger
than the index of E. (In other words, E is bigger than all elements to the right of E.)
The method should do the task in linear time with respect to the length of aa.
Ex.:  (1,2,30,5,4,20,10,5,7) --> (1,2,1,5,4,1,1,5,7)
 
[4] (**complexity!**)
Write a  function  F( aa, bb) which will check if a list aa contains the same values
as a list bb. The lists aa and bb are sorted in increasing order. The task should be done in linear time
with respect to the value max(len(aa), len(bb)). Note that containing same values does not necessarily
means that the lists are identical, see the example:
Ex.: aa = (1,1,1,4,7,7,12), bb = (1,4,4,4,4,7,12,12,12), result = true
 
[5]  (**complexity!**)
Write a method with one integer parameter n which will write out all possible pairs of positive integers.
First element of the pair must be odd, the second one must be even. All elements must be smaller then n.
Ex.:
Input:  n = 5
Output: 1 2
        1 4
        3 2
        3 4
 
[6]  (**complexity!**)
Write a method with one integer parameter n which will write out all possible different triples of positive integers.
Each element of the triple must be smaller then n and the first and the third element of the triple must be equal.
Ex.:
Input:  n = 3
Output: 1 1 1
        1 2 1
        2 1 2
        2 2 2
 
[7]
Verify if lengths of streaks of zeros in a sequence do not decrease, as we are moving from left to right and checking the streaks.
A streak is a contiguous subsequence of maximum length containing a repetitive single value.
Example: 5 2 0 2 3 0 0 4 0 0 2 6 1 0 0 0 3 --- Yes,  
         4 2 0 0 5 1 1 0 0 2 4 0 5 1 0 0 -- No
 
[8]  (**complexity!**)
Two lists A and B are given. Decide if each element A[i] in list A is equal to the sum 
of all such elements in array B which are smaller than A[i].
 
[9]
Find sum of all perfect squares in the interval [a, b]. The values a and b will be the input parameters of the function.
# Perfect squares are integers 0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, ...
# For example, there are 5 perfect squares in the interval[50, 150].
 
[10]
Find sum of all perfect cubes in the interval [a, b]
# Perfect cubes are integers 0, 1, 8, 27, 64, 125, 216, 343, 512, 729, 1000, ...
# For example, there are 10 perfect cubes in the interval[500, 5000].
 
[11] (**complexity!**)
In array A, the element A[0] is given. For each next element A[i+1] (i >= 0) it holds that
A[i+1] is equal to the number of occurrences of the value A[i] among all elements A[0], A[1], ..., A[i-1].
Ex: A[0] = 2, A = [2 0 0 1 0 2 1 1 2 2 3 0 3 1 ... ]
Generate array A with length 10, 100, 1000, 10000, million.
 
[12] (**complexity!**)
Two indices i < j in an array A are called guards when each element with index
between i and j is smaller than min(A[i], A[j]). Find all guards in a given array.
Example: A = [8, 2, 7, 5, 2, 3, 4, 2, 6, 1]
Guards are 2, 8, because A[2] = 7, A[8] = 6 and all elements between  indices 2 and 8 are smaller than min(A[2], A[8]).

2D array problems

some basic reference for beginners

# ( Part of BE5B33PGE course, FEL CTU, 2019 )
#
#     2D array problems
#
# Use only arrays/lists to solve the following problems
# if the problems at the top of the list are too easy for you
# start at the bottom of the list.
#
#   Some tasks are marked with (**complexity!**) label.
#   Explain in a few sentences, how the size of the input data
#   affects the execution time of the code. 
#   A typical additional question related to the task might be:
#   "Can the code process the data of size 10^3, 10^4, 10^6, 
#   in about one second? Why?"   
 
 
'''
[1]  (**complexity!**)
Write a function which will check a 2d array. It will return
-1 ... if there are more odd values than even values in the array
 1 ... if there are more even values than odd values in the array
 0 ... if the number of odd and even values in the array is the same
 
[2]  (**complexity!**)
Write a function which will detect, in a 2d array, all columns
which contain an ascending sequence of values (the values should ascend
from top to bottom). The function will return a new 1D array containing
indices of all columns with ascending sequences.
 
[3]  (**complexity!**)
Write a function which decides whether a 2D array contains so called
"saddle point". A saddle point is an element X in the array with the property
 that it is the maximum value of all elements in its column and also
 it is the minimum value of all elements in its row.
 
[4]  (**complexity!**)
Write a function which generates a 2D array of size MxN with
values 1,2,3,...,M+N, diagonally in the following manner:
  1   2   3   4 ...   N
  2   3   4   5 ... N+1
  3   4   5   6 ... N+2
  . ... ... ... ... ...
  M M+1 M+2 M+3 ... M+N
The values of M and N are the input parameters of the function.
The function returns the generated array.
 
[5] (**complexity!**)
Write a function which flips 2D array horizontally.
  Example:
  2 0 6 5        5 6 0 2 
  4 1 7 2   -->  2 7 1 4
  8 4 4 1        1 4 4 8
 
[6]
Write a function which creates a 2D square array of size NxN
and fills it with values 1,2,3,...,N in the following way:
1 2 3 4 5 ... N
2 2 3 4 5 ... N
3 3 3 4 5 ... N
4 4 4 4 5 ... N
5 5 5 5 5 ... N
. . . . . ... .
N N N N N ... N
 
[7]
Write a function which creates a 2D square array of size NxN
and fills it with values 1,2,3,...,N in the following way:
1 1 1 1 1 ... 1
1 2 2 2 2 ... 2
1 2 3 3 3 ... 3
1 2 3 4 4 ... 4
1 2 3 4 5 ... 5
. . . . . ... .
1 2 3 4 5 ... 5
 
 
[8A]
Write a function which prints all diagonals of a given 2D array.
A diagonal starts at the left or at the top of the table representing the 
array and then it runs in the south-east direction.
Ex:
Input:
-1   0   8   1   3
 1  -2   4  -3  -3 
 6   1  -2   1   0
 1  -3   4   5  12
Output:
1
6 -3
1 1 4
-1 -2 -2 5
0 4 1 12
8 -3 0
1 -3
3
 
[8B]
Write a function which prints all antidiagonals of a given 2D array.
An antidiagonal starts at the left or at the bottom of the table representing the 
array and then it runs in the north-east direction.
Ex:
Input:
-1   0   8   1   3
 1  -2   4  -3  -3 
 6   1  -2   1   0
 1  -3   4   5  12
Output:
-1
1 0
6 -2 8
1 1 4 1
-3 -2 -3 3
4 1 -3
5 0
12
 
[9] (**complexity!**)
Write a function which finds in a 2D array a partial diagonal with
maximum sum. The diagonal may run in in "north-west" or in "north-east"
direction. The diagonal may contain just a few elements from the entire diagonal, 
nonetheless, the diagonal cannot be split into two or more separate, 
non-contiguous pieces.
Note that the diagonal need not to be aligned with the corners of the array,
it may be located anywhere in the array.
The array is not necessarily square, it may be rectangular.
A single element is considered to be a diagonal as well.
Ex1:
 4   7  -2  -3  
-1   0   2   1
-2   3  -4  -2
-1   0   8   1
 1  -2   4  -3
 6   1  -2   1 
 1  -3   4   5 
The maximum is attained at the diagonal with values 6 -2 8 -2 
(starting in the 6-th row and going upwards right) taking only
the sub-sequence 6 -2  8 which sum is 12.
Ex2:
-1 -1 -1
-1  6 -1
-1 -1 -1
The maximum is attained at the main diagonal or main antidiagonal (in SW - NE direction)
  -1, 6, -1, taking only the central item 6.
 
'''

Text files

Climate data

Each of the following data files contains a sequence of daily average temperatures at some meteorological station in Europe.
These data and many more are publicly accessible at the webpage of European Climate Assessment & Dataset project (http://www.ecad.eu) In particular, we are using excerpts from Daily mean temperature blended data set(You do not have to download it, it has about 372Mb.)

The data time span is quite large, the oldest recording started in late 18th century and all of them had been carried out daily since that. The format of the files is relatively simple: First, there are few lines/paragraphs of text describing the data set and then there are uniformly structured lines, each of which contains the date of the measurement and the temperature on the date plus some (relatively neglectable) additional technical information. Open a file in your text editor and see it for yourself.

Data files: Choose any data file for your exercises and code debugging, preferably process more of them.

tg_staid005464.txt, tg_staid000027.txt, tg_staid000048.txt, tg_staid000048.txt, tg_staid000169.txt, tg_staid000173.txt, tg_staid000271.txt, tg_staid005464.txt, tg_staid005465.txt, tg_staid011744.txt.

The tasks

1. Process a climate text file.
Find the average temperature on 1.1. during the whole 20th century 
at a selected meteorological station station. 
That is, compute the average of  temperatures 
on 1.1.1901, 1.1.1902, 1.1.1903, ..., 1.1.1998, 1.1.1999, 1.1.2000.

[See an example solution at the page bottom. ]


2. Process a climate text file.
Compute the average temperature of each month in the years
1800, 1810, 1820, 1830 , ... 1990, 2000, 2010, if the measurements
for those years which data are completely present in the data file.

3. Process a climate text file.
Find the longest continguous time interval (measured in days)
in which the the temperatures were negative on each day.

4. Process a climate text file.
Find the year in which the average temperature
was the lowest.
Do not consider years with incomplete information or
with missing data for some days of the year.

Plain text -- online book

Project Gutenberg (https://www.gutenberg.org/) offers over 58,000 free eBooks in various format.

You may download and experiment with any of them, preferably start with the books in plain .txt format.

In this practice we offer The Adventures of Sherlock Holmes by Arthur Conan Doyle linked here book . If you do not like this particular author/title go to the project webpage and download any other suitable book in plain .txt format.

The tasks

-----------------------------------------------------------------------------
1. Write out all longest words in the file. Solve cases A. and B. separately

A.
A word is sequence of characters which is surrounded by spaces.
Using this definition, punctuation symbols become part of a word.
For example, string "Hello, nice to see you again!"  will also contain words
"Hello," and "again!".

B.
A word is sequence of characters which is surrounded by spaces or by
punctuation marks.
Using this definition, punctuation symbols are not part of a word.
For example, string "Hello, nice to see you again!"  will contain words
"Hello", "nice", "to", "see", "you", "again".
Hint: First substitute each punctuation character by a space and then treat the
string as in case A.

In both cases A and B neglect the words which are divided at the end of the line.
Only if you want to challenge yourself include those words too in your search.  

-----------------------------------------------------------------------------
2. Create a list of sentences in the file

A plain sentence is a string which starts with capital letter
and ends with a dot, a question mark or an exclamation mark.
A sentence is either a plain sentence or a plain sentence enclosed in
double quotation marks.

Plain text -- list of topics

The following file contains a list of various computer science topics, each topic is listed on a single line of text.

List of topics .

The tasks

1. 
Extract and print all topics (= lines in the file) related to trees. 
A topic is related to a tree
if the corresponding text line contains either word "tree" or a word "trees".

2.
Extract and print all topics (= lines in the file) containing some abbreviation.
An abbreviation is a sequence of 2, 3 or 4  capital letters.
Example:
  reconstructing tree from DFS and BFS traversal
  Fast Fourier Transform (FFT) (will get TLE)
  finding LCA in a DAG

3.
Extract and print all topics (= lines in the file)
containing some text enclosed in round brackets.
Both brackets -- the opening and the closing one must be present.
Example:
  quadratic equations (or binary search)
  cycle finding (Floyd's cycle finding algorithm)
  negative (positive) cycles

String manipulation

The tasks

1.
Write a method that interleaves two strings: 
it should take one character from the first string, then one from the second string, 
another from the first string and so on. 
Once one string has no characters left it should carry on with the other string. 
For example, interleaving the strings "anna" and "patrik" 
should give the result "apnantarik" (or "paantnraik"). 

2.
Write a function which returns a list of strings.
When the strings are printed a "picture" should appear on the output.
The picture is a square with two diagonals. The size of the square
is a parameter of the function.
Example for size 8 and 9:
xxxxxxxx
xx    xx
x x  x x
x  xx  x
x  xx  x
x x  x x
xx    xx
xxxxxxxx

xxxxxxxxx
xx     xx
x x   x x
x  x x  x
x   x   x
x  x x  x
x x   x x
xx     xx
xxxxxxxxx

Searching, Sorting

A. Finish the problems from the previous sections which you have not solved yet.

B.

1.
Modify characteristic/histogram so that they can admit also negative integers in the given array.

1.99:
You can use and modify the given example to produce your own random lists: 
  # produce a randomly generated list of 1000 values in the range 50, 90
  randList = [ random.randint( 50, 90) for foo in range (1000) ]

2. 
Write a function which will move all suspicious values in the list L to the beginning of the list.
The mutual order of the suspicious values should not change. 
Also, the mutual order of the unsuspicious values should not change.
A value X is suspicious when it is in the range [10..20].
Ex: Input:  L = [ 5, 400, 11, 13, 2, 1, 15, 0, 18 ] 
    Output: L = [ 11, 13, 15, 18, 5, 400, 2, 1, 0 ] 
Demonstrate by an experiment that the task can be completed in less than 1 second when length(L) = 10^6.

The function header may look like:
def moveValuesToBegin( myList, rangeMin, rangeMax): # suspicious range is [rangeMin ... rangeMax] 
    ...
    

3.
You must find E = 1000 most expensive items 
in an unsorted price list containing N = 10 000 000 different items. 
Two schemes of solution are as follows.

Scheme A: Repeat E times the search for an item without changing order of items in the list.
Scheme B: Sort the list and extract the 1000 top items.
Determine by experiment which method is more effective.


4.
Write a function which takes as an input two strings A and B.
The function will return true if B consists only of some number of repetitions of A .
Otherwise, function will return false.
Examples: A = "one",  B = "oneoneoneone" -- true
          A = "one",  B = "oneon" -- false
          A = "banana", B = "banabanana" -- false
          A = "baba", B = "bababa" -- false
          A = "baba", B = "baba" -- true
          
5.
Write a function which takes as an input a list L of strings. 
The function should return a new string B satisfying the properties:
-- B is a continguous substring of each string in L
-- The length of B is maximum possible
Ex: L = ["xybcaz", "zbcaxyyy", "cccbcahh", "uuxbcazy"]
    return "bca" 

7.
Write a function which takes as an input a list L of strings and positive integer K. 
The function should return a new string B satisfying the properties:
-- B is a continguous substring of at least K strings in L
-- The length of B is maximum possible

Example 1:
L = ["abcdy", "dcba", "xbcde", "ycdxy", "aabcdx" ], K = 2
solution: string "abcd", it is a substring of K = 2 strings: "abcdy" and "abcdx".
Example 2:
L = ["abcdy", "dcba", "xbcde", "ycdxy", "aabcdx" ], K = 3
solution: string "bcd", it is a substring of K = 3 strings: "abcdy" and "xbcde" and "abcdx".
     


8. 
There is an array A filled randomly with integers from range 0..5.
Associate with each cell (with A[i]) the following information:
D[i] .. absolute value of the difference between the two neighbours of A[i]
M[i] .. sum of S[i] and A[i], modulo 4
S[i] .. the sum of the two neighbours of A[i]
P[i] .. the product of the two neighbours of A[i]
  The neighbour of the first/last element in the array, is the last/first
  element in the array (the array is "circular")

Sort A in such way that D is the most important criterion, the next criterion is
 M, the next is S, then P and finally, the last criterion is A
The elements of A should be sorted in ascending order by all criteria.

Example:
A = [ 2, 0, 1, 1, 5, 3, 4, 2, 3 ]
D = [ 3, 1, 1, 4, 2, 1, 1, 1, 0 ]
M = [ 1, 3, 2, 3, 1, 0, 1, 1, 3 ]
S = [ 3, 3, 1, 6, 4, 9, 5, 7, 4 ]
P = [ 0, 2, 0, 5, 3,20, 6,12, 4 ]

sorted 
A = [ 3, 3, 4, 2, 1, 0, 5, 2, 1 ]

Recursion I

Solutions

Upload your working solution of a problem below to the form PGE Practices 06 _ basic recursion.

Try to upload at least one solution in each of the sections [A], [B], [C], [D].

shared solutions: onlinegdb.com

The tasks

The problems 01., 02., 1. - 13. are (hopefully!) simple introductory problems, their difficulty should not vary too much. A little bit more of challenge might be presented in problems 14. and 15., though they also belong to the very standard repository of classical recursion tasks. Try to solve them just by yourself first, and only later consult the web.

Beginners
Another recursion explanation is on GeeksForGeeks. As a part of your training, you may consider also the problems listed in the “Output based practice problems for beginners” section (scroll down the page).

----------------------------------------------------------------------------------------
  P R O B L E M S   S E T    [A]
----------------------------------------------------------------------------------------

-- Problem 01. --

Take a recursive function in the lecture file  recursive1.py
and modify in such way that instead of printing the values
the function only returns a list of values without printing anything.
Use functions  rec_seq_down, rec_seq_up, rec_seq_updown.

An example of modification is below: 
# original function 
# -----------------
def rec_seq_downup( N ):
    if N < 4:
        print( " bottom reached ", end = '' )
        return
    print( N, end = ' ' )
    rec_seq_downup( N-1 )
    print( N, end = ' ' )


# modified function and its usage
# -------------------------------
def rec_list_downup( N ):
    if N < 0:
        return []
    partial_list = rec_list_downup( N-1 )
    result_list = [N] + partial_list + [N]    
    return result_list

downup = rec_list_downup( 12 )
print(downup)

-- Problem 02. --
Take a recursive function in the lecture file  recursive1.py
and modify in such way that instead of printing all values
the function only prints even numbers and each printed number
is followed by an underscore. 
Use functions  rec_seq_down, rec_seq_up, rec_seq_updown.

An example of modification is below: 
# modified function
# -----------------

def rec_even_downup( N ):
    if N < 0:
        print( " bottom reached ", end = '' )
        return
    if N%2 == 0: print( str(N)+"_ ", end = '' )
    rec_even_downup( N-1 )
    if N%2 == 0: print( str(N)+"_ ", end = '' )

----------------------------------------------------------------------------------------
  P R O B L E M S   S E T    [B]
----------------------------------------------------------------------------------------

Determine the output the function call WITHOUT running the program.

Example and analysis
====================

def rec1( n ):
    if n < 1: return 2
    return 1 + rec1( n-1 ) + rec1( n-2 )
# call:
rec1(5)

Each node n the tree diagram represents one function call,
the value in the node is the value of the parameter
                                  ____________________[5]____________________
                      ___________[4]____________                    _______[3]_____
           ________[3]_____             ______[2]___         ______[2]___      __[1]___
    ______[2]____      __[1]__      ___[1]___     [0]     __[1]___     [0]    [0]   [-1]
 __[1]__      [0]     [0]   [-1]   [0]    [-1]           [0]   [-1]
[0]   [-1]

The next tree scheme is based on the previous one,
the return value of the corresponding function call is appended
to each node.
                                  ____________________[5]38__________________
                      ___________[4]23__________                    _______[3]14___
           ________[3]14___             ______[2]8__         ______[2]8__      __[1]5__
    ______[2]8___      __[1]5_      ___[1]5__     [0]2    __[1]5__     [0]2   [0]2  [-1]2
 __[1]5_      [0]2    [0]2  [-1]2  [0]2   [-1]2          [0]2  [-1]2
[0]2  [-1]2

The function call rec1(5) returns value 38.

PROBLEMS
========

-- Problem 1. --
def rec2( n ):
    if n < 1: return 1
    return 2 + rec2( n-1 ) + rec2( n-2 )
# call:
rec1(5)

-- Problem 2. --
def rec3( n ):
    if n < 2: return 1
    return 1 + rec3( n-1 ) + rec3( n-2 )
# call:
rec1(5)

-- Problem 3. --
def rec4( n ):
    if n < 2: return 2
    return 1 + rec4( n-2 ) + rec4( n-3 )
# call:
rec1(6)

----------------------------------------------------------------------------------------
  P R O B L E M S   S E T    [C]
----------------------------------------------------------------------------------------

Describe how the return value of the function depends on
 the value(s) of its input parameter.

Example
def rec5( x, y ):
    if x >= y: return x
    return rec5( x+1, y )

The function returns maximum of x and y.
( If x is bigger than or equal to y then it is immediately returned
  as a result.
  If, on the other hand, x is smaller then y then the function
  repeatedly calls itself and increases the value of x until
  it is equal to y, which was originally the bigger of the two.)

Functions to analyze

-- Problem 4. --
def rec6( x, y ):
    if x < y: return rec6(x+1,y)
    return x

-- Problem 5. --
def rec7( x, y ):
    if y > 0: return rec7(x, y-1) + 1
    return x;

-- Problem 6. --
def rec8( x, y ):
    if y > 0: return rec8(x-1, y) - 1
    return y;

----------------------------------------------------------------------------------------
  P R O B L E M S   S E T    [D]
----------------------------------------------------------------------------------------

-- Problem 7. --
Determine the return value of the call f7(100).
Executing this call on an usual notebook would take about 10 000 000 000 000 000 years.
However, with little analysis, the exact return value can be easily found 
without running the function.

def f7( N ):
    if N < 1: return 2
    return f7( N-1 ) + f7( N-1 )
    
Hint:  Calculate f7(1), f7(2), f7(3), f7(4), etc. Observe the pattern and make a conclusion.     

-- Problem 8. --
Write a recursive function which will print an isosceles right triangle filled with character 'x'.
The number of lines in the triangle will be the parameter of the function.
(Isosceles triangle = the length of both shorter sides is the same )
Example:
f(5)
#
##
###
####
#####

-- Problem 9. --
Write a recursive function which will print an right triangle filled with character 'x'.
The number of x's along the longer leg will be twice as big as the
 number of of x's along the shorter leg.
The number of lines in the triangle will be the parameter of the function.
Example
f(5)
##########
  ########
    ######
      ####
        ##


-- Problem 10. --
Write a recursive function which will return the sum of the squares
of the first N positive integers
It holds for f:  f(1) == 1, f(2) = 1+4 = 5, f(3) = 1 + 4 + 9 = 14, f(4) = 30, etc.

-- Problem 11. --
Write a recursive function which will return the sum of all positive integers less than or
equal to N which are not divisible by 3.
It holds for f:  f(1) == 1, f(2) = 1+2 = 3, f(3) = 1 + 2 (3 is not included, it si divisible by 3),
f(4) = 1 + 2 + 4 = 7, f(5) = 1 + 2 + 4 + 5 = 12,
f(6) = 1 + 2 + 4 + 5 = 12  (6 is not included, it si divisible by 3),
f(7) = 1 + 2 + 4 + 5 + 7 = 19, etc.

-- Problem 12. --
Write a recursive function which will return a list of all pairs of the values
in a given list of integers. The list will be the input parameter of the function.
It must hold, for each pair in the output, that the sum of the pair is divisible by 3.
(You may consider implementing a loop in the body of the function.)

-- Problem 13. --
Write a recursive function which will return a list of all triples of the values
in a given list of integers. The list will be the input parameter of the function.
It must hold, for each triple in the output, that the sum of the triple is positive.

-- Problem 14. --
Write a recursive function which will return a list of tuples of the values
in a given list of integers. The list will be the input parameter of the function.
It must hold, for each tuple in the output, that the sum of the elements in the tuple
is equal to another given value K.
The tuples may be of different lengths.
Example
input: [ 2, 7, 3, 5, 11, 4, 8 ], K = 20
output:
[3, 5, 4, 8]
[7, 5, 8]
[2, 7, 3, 8]
The method you should avoid is to generate all possible tuples and only later
remove those which sum is not equal to K.

-- Problem 15. --
Determine the value of A(4, 4). Function A(m, n) is defined:
def A( m, n ):
    if m == 0: return  n + 1
    if n == 0: return A( m-1, 1 )
    if m > 0 and n > 0: return A( m-1, A(m, n-1) )

Recursive tree processing

With example solutions below.

Write a recursive function which will process a binary tree.
Check its performance by generating few trees with depth less than 7
and applying the function to each of the trees.
If a function changes the structure or the contents of the tree
print each tree twice: before the function is applied and after the function is applied.
Visually check if the function processed the tree correctly.

Follow the example in the file ADTtreeExample.py in the lecture notes.
Optionally, copy and modify the functions from the main lecture file ADTtree2.py.


1.
The function removes from the tree all leaves with odd key value.

2.
The function returns sum of all keys in leaves of the tree.

3.
The function returns sum of all keys in all nodes which depth is equal to a given value D.

4.
The function returns the number of all nodes which height (see definition in the lecture code)
is equal to a given value H.

5.
The function returns the biggest value of the keys of the nodes in the maximum depth in the tree.

6.
The function returns the smallest value of the keys of all **leaves** in the minimum depth in the tree.

7.
The function returns the smallest value of the keys of all **leaves** in the minimum depth in the tree.

8.
The function creates a tree which contains 2*N+1 nodes and contains only two leaves, the depth
 of both leaves is N.

9.
The function creates a duplicate of the given tree.
The input parameter is the root of the original tree.

10.
The function substitutes the key value of a node
by the sum of depths of all nodes in its left and right subtrees.

11.
The function adds to the tree some number of nodes. After the operation, the depths of all
 leaves in the tree is equal and also the number of added nodes is minimum possible.

Queue BFS

0.
Solve any problems in the previous tree related section(s),
as a kind of repetition and a reinforcement of your tree manipulation skills.

1.
Modify the BFS procedure in such way that it prints only the keys in the leaves of the tree,
it does not print the keys in the internal nodes of the tree.

2.
Modify the BFS procedure in such way that it prints the keys in each particular level
of the tree on a separate line. Specifically, the key of the root will be printed also on a separate line.

3.
Modify the BFS procedure in such way that it prints the keys in each particular level
in order from right to left.
Example:
_______24________
____12___ ____3____
13 17 __22__ 8
6 1
Output: 24 3 12 8 22 17 13 1 6

4.
Modify the BFS procedure in such way that it prints the keys in the tree
only up to a given depth D.
Example, using the tree in the previous Example:
D = 0
Output: 24
D = 1
Output: 24 12 3
D = 2
Output: 24 12 3 13 17 22 8

5.
Use BFS procedure to locate in the tree and print all such nodes
which key is bigger then the parent key. Suppose that a reference
to the parent is not part of the node representation.

6.
Use BFS procedure to create a physical duplicate of a tree.
The shape of the duplicate will be the same as that of the original
and each node key in the duplicate will be twice as big as the key in
the corresponding original node.

7.
Use BFS to check whether the shape of two given trees is identical.
The key values in both trees may be arbitrary and should not be considered in the check.

Example solutions of problems 2. and 4. in recursive tree processing paragraph above.
The functions and the main program are an extension of the tree manipulation code presented in Lecture 7 code ADT tree 2.

# ...
    # simple auxilliary function
    def isLeaf(self, node):
        if node == None: return True
        return node.left == None and node.right == None
 
    #2.
    #The function returns sum of all keys in the leaves of the tree.
    def f2(self, node):
        if node == None: return 0
        if self.isLeaf( node ):
            return node.key
        #else:
        return self.f2(node.left) + self.f2(node.right)
 
    #4.
    # The function returns the number of all nodes which height
    # (see definition in the lecture code) is equal to a given value H.
    # Solution:
    #   Each recursive call of the function returns two values:
    #      -- the number of the nodes with the given property
    #         in the whole sub-tree which root is in the current node,
    #      -- the height of the current node
    def f4(self, node, givenH ):
        # (Compare the returned -1 value to the analogous approach
        # in the keysToHeights function in the lecture code )
        if node == None: return 0, -1
 
        # Extract necessary information from the left and the right subtree
        countL, heightL = self.f4( node.left, givenH )
        countR, heightR = self.f4( node.right, givenH )
 
        # combine the extracted information from the subtrees
        # to construct the information relevant to the current node
 
        nodeHeight = 1+ max(heightL, heightR)
        if nodeHeight == givenH:
            nodeCount = 1 + countL + countR
        else:
            nodeCount = 0 + countL + countR
 
        return nodeCount, nodeHeight
 
    def markNodesWithHeight(self, node, givenH):
        if node == None: return -1
 
        heightL = self.markNodesWithHeight( node.left, givenH)
        heightR = self.markNodesWithHeight( node.right, givenH)
        nodeHeight = 1 + max( heightL, heightR )
 
        if nodeHeight == givenH:
            node.tag = '*'
 
        return nodeHeight
 
 
    def unmarkAllNodes(self, node):
        if node == None: return
        self.unmarkAllNodes( node.left )
        self.unmarkAllNodes( node.right )
        node.tag = ' '
 
# ............................................................................
#                M A I N   P R O G R A M
# ............................................................................
 
#...        
 
print( "sum of keys in the leaves")
print( t.f2(t.root) )
 
 
givenHeight = 2
t.markNodesWithHeight( t.root, givenHeight )
t.display()
print( "Number of nodes with the given height", givenHeight)
count, rootheight = t.f4(t.root, givenHeight)
print( count )
t.unmarkAllNodes( t.root )
 
#############################################################################################
turtle example(s)
b = []
posx, posy, dir, trc, penup = 0,0,2, 'o', True
ddir = [ [0,1], [-1,1], [-1,0], [-1,-1], [0,-1], [1,-1], [1,0], [1,1] ]
dirname = ["E", "NE", "N", "NW", "W", "SW", "S", "SE",  ]
def init( w, h ):
    global b
    b = [ ['.']*w for i in range(h) ]
 
def disp():    
    global b
    b[posy][posx] = 'T'
    for i in range(len(b)):
        line = b[i]
        for c in line:
            print( ' '+c, end = '')
        if i == posy:
            print( " dir = " + dirname[dir], end = '' )
        print()
    print("-" * len(b[0]))
 
def setpos( x, y ):
    global posx, posy
    posx, posy = x,y
 
 
def fd( n ):
    global posx, posy, b
    for i in range( n ):
        if 0<=posy<len(b) and 0<=posx<len(b[0]):
            b[posy][posx] = trc
        posy += ddir[dir][0]
        posx += ddir[dir][1]
 
def lt():
    global dir
    dir = (dir+1) % 8
 
def rt():
    global dir
    dir = (8+dir-1) % 8
 
def spir( n ):
    if n <= 0: return
    fd(n); lt(); #lt(); 
    spir( n-1 )
 
init( 40,40 )
setpos( 39,28 )
spir(14)
disp()
'''
fd(2)
lt()
fd(4)
rt()
fd( 2 )
disp()
'''

Using Matplotlib library

Recall the climate text files in the section Climate data above.

1.
Process a climate text file.
Find the average temperature on each day in all years on record.
That is, compute the average temperature for each day separately.
There will be an average temperature of 1.1., an average temperature of 2.1.,
etc. up to average temperature on 31.12.
Thus, there will be sequence of 365 average temperatures, for each day in a year.
For simplicity, skip and do not process 29.2. in the leap years.

Display all average temperatures in a plot
using  matplotlib library.
There will be 365 data points to display in the plot.
Choose yourself visual parameters of the plot
(points shape/color, grid, etc.)

The climate file will be a parameter of your solution.

( If you want to add some more features to the display,
consult the documentation references in the lecture or search the web. )  

[Apart form the lecture examples, see also another example solution at the page bottom.]

Using CSV files

Using data from https://www.gapminder.org

Downloaded data file: net_users_num.csv, population_total.csv, accessible also here: zipped .

Problems:

1.
Find those countries which population was between 1M and 2M in 2015.

2.
Find countries which had less than 100k internet users in 2015.

3. Find the year in which the worldwide increase 
   of the net user was the biggest, compared to the previous year.
   (the difference of the total number of net users between two
    successive years is maximized.)

Asymptotic complexity

1D array problems and 2D array problems.

 Determine the asymptotic complexity of a function which solves 
 a problem marked by the label  
 (**complexity!**) in the sections 1D array problems and 2D array problems above.
 
 You may either use some of your code solutions of those problems 
 or you may work completely analytically without coding.
 
 Explain your reasoning in a single short paragraph, 
 sometimes even 2-3 lines of explanation are sufficient.
 Send your finding to berezovs@fel.cvut.cz.

Graph processing

Write a program which processes a graph.
Use the graphs stored in the archive . Download the archive and run your program on all data.

The tasks:

Reminder: The degree of node X is the number of neighbours of X, that is, 
the number of nodes connected directly to X by a single edge.
We denote en edge connecting two nodes X and Y by symbol (X, Y)

1. An edge is a balanced edge if the degree of its both end nodes 
   is the same. 
   Print the number of balanced edges in the graph.

2. The slope of an edge is the absolute value of the difference between 
   the degrees of its and nodes. (The slope of a balanced edge is 0.)
   Count then number of edges with maximum slope in the graph.

3. Three different nodes X, Y, Z define an basic open path 
   if there are edges (X,Y) and (Y,Z) in the graph 
   and there is no edge between X and Z.
   Print the number of all basic open paths in the graph. 
   Each path is counted only once, that is, 
   consider the triples X, Y, Z and Z, Y, X  to be identical.      

4. Analogously to the previous problem, four different nodes X, Y, Z, W 
   define an open path of length 3 if there are edges (X,Y), (Y,Z), (Z,W) 
   and there is no edge (X, W) in the graph.
   Print the number of all open paths of length 3 in the graph. 
   
5. Count the number of triangles in the graph. 
    A triangle is a triple of nodes X, Y, Z, such that
   there are edges (X,Y), (Y,Z) and (Z,X) in the graph.
   
6. Calculate the distance between each two nodes in the graph using BFS. 
   Print out the maximum distance between two nodes.
   This value is called the diameter of the graph.

A graph and its picture:

Text files solved example

#  Solution of task 1 in file processing
 
def dateOfInterest( date ):
    # skip year 1900
    if date[0:4] == "1900": return False
 
    if   ( date[0:2] == "19" and date[4:8] == "0101" )\
        or date == "20000101" :
        return True
    else: return False
 
 
# -------------------------------------------------------------
# The avg20century funcion depends in all details
# on the exact format of the data.
# Check the data file visually to see how the data are organized.
 
def avg20century(fileName):
    # named constants do help
    dateColumn = 2
    temperColumn = 3
    qualityColumn = 4
    qualityValid = 0
    qualitySuspect = 1
    qualityMissing = 9
 
    # start reading
    file = open( fileName, "r" )
 
    # skip the file header
    while True:
        line = file.readline()
        if line[0:5] == "STAID": break
 
    minTemper = 10000 # start with big temperature
    dateMinTemper = 0
 
    # to compute average
    totalTemp = 0
    noOfDays = 0
 
    # process all lines
    while True:
        line = file.readline()
        # detect possible empty lines to stop
        if line == None or line.strip(" ") == "":
            break
 
        # extract info from a text line
        lineVals = line.split(",")
        date =  lineVals[dateColumn]
        #date =  lineVals[2]
 
        temper = int( lineVals[temperColumn] )
        quality = int( lineVals[qualityColumn] )
 
        # do not accept dubious data
        if quality != qualityValid:
            if dateOfInterest( date ):
                print( line, end = "" )
            continue
 
        # check 20 century
        if dateOfInterest( date ):
            totalTemp += temper
            noOfDays += 1
            #print(line, date, temper, totalTemp )
 
    file.close()
 
    print("total temp:", totalTemp)
    print("no of days:", noOfDays)
    # in data file temperatures are x 10
    print( "average ", (totalTemp/noOfDays)/10 )
 
    #end of coldestDay
 
# ------------------------------------
#   M A I N   P R O G R A M
 
path ="d:\\Iskola\\PGE2021\\data\\"
fname = "TG_STAID000169.txt"
 
avg20century(path + fname)
 
# if the data file is in your working directory
# it is enough to set 
# path = ""

Table of Contents

code

Maximum sum of a contiguous subsequence

Hi-Lo game and 1D battleships game, with variants

1D array problems

2D array problems

Text files

Climate data

Plain text -- online book

Plain text -- list of topics

String manipulation

Searching, Sorting

Recursion I

Recursive tree processing

Queue BFS

Using Matplotlib library

Using CSV files

Asymptotic complexity

Graph processing

Text files solved example