User:Alice/Code Exercises: Difference between revisions

From XPUB & Lens-Based wiki
No edit summary
Line 96: Line 96:


I wrote a script that would turn a list of words of different lengths into a pattern similar to the one Carl Andre typed by hand with a typewriter.
I wrote a script that would turn a list of words of different lengths into a pattern similar to the one Carl Andre typed by hand with a typewriter.
So far, all the tests pass. When receiving as input a long list of words, it raises a ValueError, which means it still needs debugging...
It takes a text file as input. In the context of Special Issue V, the text will be the result of OCR-ing a page scanned from my reader. The script selects all words that are shorter or equal to the number given as argument maxlength, then arragnes them into an ascending/descending pattern.




Line 106: Line 106:
import pytest
import pytest
from math import ceil
from math import ceil
from pprint import pprint
from sys import stdout


def pop_items(words, num_items):
    ''' Removes num_items from words.'''
    if not words:
        return [], []


def grabber(words, numgrab):
    if num_items > len(words):
    grabbedwords = []
         raise ValueError('Not enough items!')
    for number in range(numgrab):
         grabbedwords.append(words.pop(0))
    return (grabbedwords, words)


    popped = []
    for number in range(num_items):
        removed = words.pop(0)
        popped.append(removed)
    return popped, words


def pattern(words, maxlength):
def all_words_less_than(words, maxlength):
    ''' Checks if the words have the correct length given in maxlength'''
    for word in words:
        if len(word) > maxlength:
            return False
    return True
 
def filterwords(words, maxlength):
    ''' Puts the words which have the correct length in a new list '''
     goodwords = []
     goodwords = []
     for word in words:right_one = ['a', 'aa', 'aaa', 'aa', 'a', 'b', 'bb', 'bbb', 'bb', 'b']
     for word in words:
         if len(word) <= maxlength:
         if len(word) <= maxlength and len(word) >=2:
             goodwords.append(word)
             goodwords.append(word)
    return goodwords
def pattern(words, maxlength):
    goodwords = filterwords(words, maxlength)
    items_pattern = maxlength + (maxlength -4)


    items_pattern = maxlength + (maxlength -1)
     if len(goodwords) % items_pattern != 0:
     if len(goodwords) % items_pattern != 0:
         raise ValueError
         rest = len(goodwords) % items_pattern
        difference = len(goodwords) - rest
        goodwords = goodwords[:difference]


     times = int(len(words) / items_pattern)
     times = int(len(words) / items_pattern)
     final_pattern = []
     final_pattern = []
     for each_time in range(times):
     for each_time in range(times):
         grabbed, whatisleft = grabber(goodwords, items_pattern)
         popped, whatisleft = pop_items(goodwords, items_pattern)
        if not popped:
            continue
         goodwords = whatisleft
         goodwords = whatisleft
         middle = ceil(len(grabbed)/2)
 
         sorted_pattern = (
         middle = ceil(len(popped)/2)
            sorted(grabbed[:middle]) +
 
            sorted(grabbed[middle:], reverse=True)
         ascending = sorted(popped[:middle], key=len)
         )
        descending = sorted(popped[middle:], key=len, reverse=True)        #print ("{} items".format(len(l)))
 
 
         sorted_pattern = ascending + descending
         final_pattern.append(sorted_pattern)
         final_pattern.append(sorted_pattern)


     return final_pattern
     return final_pattern


def test_pattern_returns_list():
def test_pattern_returns_list():
     assert type(pattern(['a', 'b', 'c', 'd', 'e'], 3)) == type([])
     list_items = ['a', 'b', 'c', 'd', 'e']
    assert type(pattern(list_items, 3)) == type([])


def test_pattern_removes_over_max_len():
def test_pattern_removes_over_max_len():
     right_one = [['a', 'aa', 'aaa', 'aa', 'a']]
     list_words_right_length = [['a', 'aa', 'aaa', 'aa', 'a']]
     assert pattern(right_one[0] + ['aaaaa'], 3) == right_one
     words_wrong_length = list_words_right_length[0] + ['aaaaa']
    assert pattern(words_wrong_length, 3) == list_words_right_length


def test_pattern_too_short_wont_work():
def test_pop_items():
    assert pop_items(['a', 'aaa'], 1) == (['a'], ['aaa'])
 
def test_pop_items_empty_list():
    assert pop_items([], 70) == ([], [])
 
def test_pop_items_num_too_big():
     with pytest.raises(ValueError):
     with pytest.raises(ValueError):
         pattern(['a', 'aa'], 3)
         pop_items(['a', 'b'], 3)
 
def test_cuts_for_pattern():
    list_with_nine = ['a'] * 9
    result = pattern(list_with_nine, 3)
    assert len(result[0]) == 5
 
def test_empty_list_for_pattern():
    result = pattern([], 3)
    assert result == []
 
def test_list_too_short_for_pattern():
    list_too_short = ['a', 'aa']
    result = pattern(list_too_short, 3)
    assert result == []
 
if __name__ == '__main__':
    with open('ghhh.txt', 'r') as handle:
        contents = handle.read()
    splitted = contents.split()
 
    ll = (pattern(splitted, 8))
    for l in ll:
        for x in l:
            print(x)
        #print()


def test_grabber():
      
     assert grabber(['a', 'aaa'], 1) == (['a'], ['aaa'])


def test_two_patterns():
    right_one = ['a', 'aa', 'aaa', 'aa', 'a', 'b', 'bb', 'bbb', 'bb', 'b']
    result = [['a', 'aa', 'aaa', 'aa', 'a'], ['b', 'bb', 'bbb', 'bb', 'b']]
    assert pattern(right_one, 3) == result
</source>
</source>

Revision as of 18:06, 11 March 2018

Oulipo exercise

Improved the N + 7 code we wrote in Prototyping, by debugging and adding some tests. Work in progress.

Improvements

Bug example: the for loop was skipping capitalized words, because it could not find them in the input file with nouns.

To fix, I added the lower method for strings to turn all text to lowercase.


 for word in separated:
        word = word.lower() + '\n'


To test it, I added a test.


def test_seven():
    assert seven('Baboons') == 'babushkas'


Full script (in progress)


def seven(sentence):
    fpath = open('91K nouns.txt')
    nouns = fpath.readlines()
    separated = sentence.split()    
    #print(separated)
    new_separated = []
    for word in separated:
        word = word.lower() + '\n'
        if word in nouns:
            position = nouns.index(word)
            new_word = nouns[position + 7]
            #print(" replacing", new_position)
            new_separated.append(new_word.strip())
        else:
            #print("notinlist")
            #print("adding to new_separated ", word)
            new_separated.append(word.strip())
    #print(new_separated)
    return ' '.join(new_separated)

#sentence = input('What is your sentence? ')
#seven(sentence)

# pytest requires that you name your tests with test_<your-name>
# run with the 'pytest' command in your terminal
def test_seven():
    assert seven('Baboons') == 'babushkas'
    assert seven('Baboons,') == 'babushkas'

Tesseract exercise

Initial test to train tesseract to recognise an image as a character/word

First, using imagemagick, convert the jpg file into a tiff file, for better OCR results

convert -density 300 flower3.jpg -depth 8 -strip -background white -alpha off flower3.tiff

Using tesseract page segmentation -8 and -10, I tested it to see what kind of text output I would get when it considers the image as single character or as a word.

tesseract flower.tiff  -psm 8 output
tesseract flower.tiff  -psm 10 output2

results were

a

<23

I created a boxfile for the best result (with psm -10)

tesseract flower4.tiff -psm 10 flower4 makebox

I then opened the image/boxfile combination with moshpytt, and edited the content of the box, in order to recognise it as the word 'flower'.

Image: 700 pixels


python moshpytt.py


Python exercise inspired by the work of Carl Andre

I wrote a script that would turn a list of words of different lengths into a pattern similar to the one Carl Andre typed by hand with a typewriter. It takes a text file as input. In the context of Special Issue V, the text will be the result of OCR-ing a page scanned from my reader. The script selects all words that are shorter or equal to the number given as argument maxlength, then arragnes them into an ascending/descending pattern.


import pytest
from math import ceil
from pprint import pprint
from sys import stdout

def pop_items(words, num_items):
    ''' Removes num_items from words.'''
    if not words:
         return [], []

    if num_items > len(words):
        raise ValueError('Not enough items!')

    popped = []
    for number in range(num_items):
        removed = words.pop(0)
        popped.append(removed)
    return popped, words

def all_words_less_than(words, maxlength):
    ''' Checks if the words have the correct length given in maxlength'''
    for word in words:
        if len(word) > maxlength:
            return False
    return True

def filterwords(words, maxlength):
    ''' Puts the words which have the correct length in a new list '''
    goodwords = []
    for word in words:
        if len(word) <= maxlength and len(word) >=2:
            goodwords.append(word)
    return goodwords


def pattern(words, maxlength):
    goodwords = filterwords(words, maxlength)
    items_pattern = maxlength + (maxlength -4)

    if len(goodwords) % items_pattern != 0:
        rest = len(goodwords) % items_pattern
        difference = len(goodwords) - rest
        goodwords = goodwords[:difference]

    times = int(len(words) / items_pattern)

    final_pattern = []
    for each_time in range(times):
        popped, whatisleft = pop_items(goodwords, items_pattern)
        if not popped:
            continue
        goodwords = whatisleft

        middle = ceil(len(popped)/2)

        ascending = sorted(popped[:middle], key=len)
        descending = sorted(popped[middle:], key=len, reverse=True)        #print ("{} items".format(len(l)))


        sorted_pattern = ascending + descending
        final_pattern.append(sorted_pattern)

    return final_pattern


def test_pattern_returns_list():
    list_items = ['a', 'b', 'c', 'd', 'e']
    assert type(pattern(list_items, 3)) == type([])

def test_pattern_removes_over_max_len():
    list_words_right_length = [['a', 'aa', 'aaa', 'aa', 'a']]
    words_wrong_length = list_words_right_length[0] + ['aaaaa']
    assert pattern(words_wrong_length, 3) == list_words_right_length

def test_pop_items():
    assert pop_items(['a', 'aaa'], 1) == (['a'], ['aaa'])

def test_pop_items_empty_list():
    assert pop_items([], 70) == ([], [])

def test_pop_items_num_too_big():
    with pytest.raises(ValueError):
        pop_items(['a', 'b'], 3)

def test_cuts_for_pattern():
    list_with_nine = ['a'] * 9
    result = pattern(list_with_nine, 3)
    assert len(result[0]) == 5

def test_empty_list_for_pattern():
    result = pattern([], 3)
    assert result == []

def test_list_too_short_for_pattern():
    list_too_short = ['a', 'aa']
    result = pattern(list_too_short, 3)
    assert result == []

if __name__ == '__main__':
    with open('ghhh.txt', 'r') as handle:
        contents = handle.read()
    splitted = contents.split()

    ll = (pattern(splitted, 8))
    for l in ll:
        for x in l:
            print(x)
        #print()