PythonLabZalan: Difference between revisions

From XPUB & Lens-Based wiki
No edit summary
Line 43: Line 43:
[[File:Screen Shot 2018-03-24 at 16.14.04.png|thumb|Output 2]]
[[File:Screen Shot 2018-03-24 at 16.14.04.png|thumb|Output 2]]
[[File:Screen Shot 2018-03-24 at 16.12.30.png|thumb|NLTK Analysis outcome]]
[[File:Screen Shot 2018-03-24 at 16.12.30.png|thumb|NLTK Analysis outcome]]
<code>import nltk
from nltk import word_tokenize
from nltk import FreqDist
from nltk.tokenize import sent_tokenize
from sys import stdin,stdout
import re
import sys, string
#importing nltk library word_tokenize
from collections import Counter
text = open ("readertest.txt")
content = text.read()
#importing and reading the content
#print(content)
words = content.split(" ")
#the string content needs to signifier - needs to be splitted to be able to read it, it detects if a new words begins based on the " "
splitting_statistic = sorted (set (words))
# the content is splitted
#print(splitting_statistic)
wordsamount_statistic = f'{len(words)} Amount of the words'
#amount of the words
print(wordsamount_statistic)
string=(content)
count1=0
count2=0
for i in string:
      if(i.islower()):
            count1=count1+1
      elif(i.isupper()):
            count2=count2+1
print("The number of lowercase characters is:")
print(count1)
print("The number of uppercase characters is:")
print(count2)
#counts the lowercase and uppercase letters in the text
fdist = FreqDist(content)
print("10 most common characters:")
print(fdist.most_common(10))
#print out the 10 most common letters
fdist = FreqDist(words)
print("10 most common words:")
print(fdist.most_common(10))
#print out the 10 most common words
#new_list = fdist.most_common()
#print(new_list)
#for word, _ in new_list:  #_ ignores the second variable, dictionary (key, value)
    #print(' ',_)
#prints a list of the most common words - how to make it better in one line
def vowel_or_consonants (c):
if not c.isalpha():
return 'Neither'
vowels = 'aeiou'
if c.lower() in vowels:
return 'Vowel'
else:
return 'Consonant'
#for c in (content):
#print(c, vowel_or_consonants(c))
 
#print(sent_tokenize(content))
#splitting text into sentences
#for word in (words):
#print(word)
#control structure, each word in a seperate line
#fdist = FreqDist(words)
#print("hapaxes:")
#print(fdist.hapaxes())
#words that occur once only, the so-called hapaxes
V = set(words)
long_words = [w for w in V if len(w) > 15]
print("printing the more than 15 character long words of the text")
print(sorted(long_words))
#printing the more than 15 character long words of the text
tokenized_content = word_tokenize(content)
#the content is tokenized (nltk library)
statistic3 = nltk.pos_tag(tokenized_content)
#each word becomes a tag if is a verb, noun, adverb, pronoun, adjective)
#print(statistic3)
verbscounter = 0
verblist = []
for word, tag in statistic3:
if tag in {'VB','VBD','VBG','VBN','VBP','VBZ'}:
verbscounter = verbscounter + 1
verblist.append(word)
verb_statistic = f'{verbscounter} Verbs'
# shows the amount of verbs in the text
print(verb_statistic)
print(verblist)
#creating a list from the verb counter
#creating a dictionary from a list
nouncounter = 0
nounlist = []
for word, tag in statistic3:
if tag in {'NNP','NNS','NN', 'NNPS'}:
nouncounter = nouncounter + 1
nounlist.append(word)
nouns_statistic = f'{nouncounter} Nouns'
#shows the amount of nouns in the text
print(nouns_statistic)
print(nounlist)
verblist2 = verblist
nounlist2 = nounlist
verb_noun_dictionary = {}
for i in range (len(verblist2)):
verb_noun_dictionary[verblist2[i]] = nounlist2 [i]
verblist_and_nounlists = zip (verblist2, nounlist2)
verb_noun_dictionary = dict(verblist_and_nounlists)
verblist_and_nounlists = dict(zip(verblist2, nounlist2))
print(verblist_and_nounlists)
print(len(verblist))
characters = [words]
#print(words)
'''from itertools import groupby
def n_letter_dictionary(string):
    result = {}
    for key, group in groupby(sorted(string.split(), key = lambda x: len(x)), lambda x: len(x)):
        result[key] = list(group)
    return result
print(n_letter_dictionary)'''
adverbscounter = 0
adverblist = []
for word, tag in statistic3:
if tag in {'RB','RBR','RBS','WRB'}:
adverbscounter = adverbscounter + 1
adverblist.append(word)
adverb_statistic = f'{adverbscounter} Adverbs'
#shows the amount of adverbs in the text
print(adverb_statistic)
print(adverblist)
pronounscounter = 0
pronounslist = []
for word, tag in statistic3:
if tag in {'PRP','PRP$'}:
pronounscounter = pronounscounter + 1
pronounslist.append(word)
pronoun_statistic = f'{pronounscounter} Pronouns'
#shows the amount of pronouns in the text
print(pronoun_statistic)
print(pronounslist)
adjectivscounter = 0
adjectivslist = []
for word, tag in statistic3:
if tag in {'JJ','JJR','JJS'}:
adjectivscounter = adjectivscounter + 1
adjectivslist.append(word)
adjectiv_statistic = f'{adjectivscounter} Adjectives'
#shows the amount of adjectives in the text
print(adjectiv_statistic)
print(adjectivslist)
coordinating_conjuction_counter = 0
for word, tag in statistic3:
if tag in {'CC'}:
coordinating_conjuction_counter = coordinating_conjuction_counter + 1
coordinating_conjuction_statistic = f'{coordinating_conjuction_counter} Coordinating conjuctions'
#shows the amount of coordinating_conjuction in the text
print(coordinating_conjuction_statistic)
cardinal_number = 0
for word, tag in statistic3:
if tag in {'CC'}:
cardinal_number = cardinal_number + 1
cardinal_number_statistic = f'{cardinal_number} Cardinal numbers'
#shows the amount of cardinal_number in the text
print(cardinal_number_statistic)
determiner_counter = 0
for word, tag in statistic3:
if tag in {'D'}:
determiner_counter = determiner_counter + 1
determiner_statistic = f'{determiner_counter} Determiners'
#shows the amount of Determiners in the text
print(determiner_statistic)
existential_there_counter = 0
for word, tag in statistic3:
if tag in {'EX'}:
existential_there_counter = existential_there_counter + 1
existential_there_statistic = f'{existential_there_counter} Existential there'
#shows the amount of Existential there in the text
print(existential_there_statistic)
foreing_words_counter = 0
for word, tag in statistic3:
if tag in {'FW'}:
foreing_words_counter = foreing_words_counter + 1
foreing_words_statistic = f'{foreing_words_counter} Foreing words'
#shows the amount of foreing words in the text
print(foreing_words_statistic)
preposition_or_subordinating_conjunctionlist = []
preposition_or_subordinating_conjunction_counter = 0
for word, tag in statistic3:
if tag in {'IN'}:
preposition_or_subordinating_conjunction_counter = preposition_or_subordinating_conjunction_counter + 1
preposition_or_subordinating_conjunctionlist.append(word)
preposition_or_subordinating_conjunction_statistic = f'{preposition_or_subordinating_conjunction_counter} Preposition or subordinating conjunctions'
#shows the amount of preposition_or_subordinating_conjunction in the text
print(preposition_or_subordinating_conjunction_statistic)
print(preposition_or_subordinating_conjunctionlist)
list_item_marker_counter = 0
for word, tag in statistic3:
if tag in {'LS'}:
list_item_marker_counter = list_item_marker_counter + 1
list_item_marker_statistic = f'{list_item_marker_counter} List item markers'
#shows the amount of list item markers in the text
print(list_item_marker_statistic )
modals_counter = 0
for word, tag in statistic3:
if tag in {'LS'}:
modals_counter = modals_counter + 1
modals_statistic = f'{modals_counter} Modals'
#shows the amount of modals in the text
print(modals_statistic)
Predeterminer_counter = 0
for word, tag in statistic3:
if tag in {'PDT'}:
Predeterminer_counter = Predeterminer_counter  + 1
Predeterminer_statistic = f'{Predeterminer_counter } Predeterminers'
#shows the amount of Predeterminers in the text
print(Predeterminer_statistic)
Possessive_ending_counter = 0
for word, tag in statistic3:
if tag in {'PDT'}:
Possessive_ending_counter = Possessive_ending_counter + 1
Possessive_ending_statistic = f'{Possessive_ending_counter} Possessive endings'
#shows the amount of Possessive endings in the text
print(Possessive_ending_statistic)
particle_counter = 0
for word, tag in statistic3:
if tag in {'RP'}:
Particle_counter = particle_counter + 1
particle_statistic = f'{particle_counter} Particles'
#shows the amount of Particles endings in the text
print(particle_statistic)
symbol_counter = 0
for word, tag in statistic3:
if tag in {'SYM'}:
symbol_counter = symbol_counter + 1
symbol_statistic = f'{symbol_counter} Symbols'
#shows the amount of symbols in the text
print(symbol_statistic)
to_counter = 0
for word, tag in statistic3:
if tag in {'TO'}:
to_counter = to_counter + 1
to_statistic = f'{to_counter} to'
#shows the amount of to in the text
print(to_statistic)
interjection_counter = 0
for word, tag in statistic3:
if tag in {'TO'}:
interjection_counter = interjection_counter + 1
interjection_statistic = f'{interjection_counter} Interjections'
#shows the amount of interjections in the text
print(interjection_statistic)
Wh_determiner_counter = 0
for word, tag in statistic3:
if tag in {'TO'}:
Wh_determiner_counter = Wh_determiner_counter + 1
Wh_determiner_statistic = f'{Wh_determiner_counter} Wh determiners'
#shows the amount of Wh determiners in the text
print(Wh_determiner_statistic)
Wh_pronoun_counter = 0
for word, tag in statistic3:
if tag in {'TO'}:
Wh_pronoun_counter = Wh_pronoun_counter + 1
Wh_pronoun_statistic = f'{Wh_pronoun_counter} Wh pronouns'
#shows the amount of Wh pronouns in the text
print(Wh_pronoun_statistic)
Possessive_wh_pronoun_counter = 0
for word, tag in statistic3:
if tag in {'TO'}:
Possessive_wh_pronoun_counter  = Possessive_wh_pronoun_counter  + 1
Possessive_wh_pronoun_statistic = f'{Possessive_wh_pronoun_counter} Possessive wh pronouns'
#shows the amount of Possessive wh pronouns in the text
print(Possessive_wh_pronoun_statistic)
dic1 =([len (i) for i in verblist])
print(dic1)
dic2=([len (i) for i in nounlist])
print(dic2)
dic3=([len (i) for i in adjectivslist])
print(dic3)
dic4=([len (i) for i in preposition_or_subordinating_conjunctionlist])
print(dic4)
#print([len (i) for i in verblist_and_nounlists])
#print([len (i) for i in words])
double_numbers1 = []
for n in dic1:
double_numbers1.append(n*100)
print(double_numbers1)
double_numbers2 = []
for n in dic2:
double_numbers2.append(n*100)
print(double_numbers2)
double_numbers3 = []
for n in dic3:
double_numbers3.append(n*100)
print(double_numbers3)
double_numbers4 = []
for n in dic4:
double_numbers4.append(n*100)
print(double_numbers4)
div_numbers1= []
for n in dic1:
div_numbers1.append(n/100)
print(div_numbers1)
div_numbers2= []
for n in dic2:
div_numbers2.append(n/100)
print(div_numbers2)
div_numbers3= []
for n in dic3:
div_numbers3.append(n/100)
print(div_numbers3)
div_numbers4= []
for n in dic4:
div_numbers4.append(n/100)
print(div_numbers4)
'''lst1 = [[double_numbers1], [double_numbers2], [double_numbers3], [double_numbers4]]
print((zip(*lst1))[0])'''
'''lst1 = [[double_numbers1], [double_numbers2], [double_numbers3], [double_numbers4]]
lst2 = []
lst2.append([x[0]for x in lst1])
print(lst2 [0])'''
'''lst1 = [[double_numbers1], [double_numbers2], [double_numbers3], [double_numbers4]]
outputlist = []
for values in lst1:
outputlist.append(values[-1])
print(outputlist)'''
n1 = double_numbers1
n1_a = (n1[0])
print(n1_a)
n2 = double_numbers2
#print(n2[0])
n3 = double_numbers3
#print(n3[0])
n4 = double_numbers4
#print(n4[0])
n5 = double_numbers1
#print(n5[1])
n6 = double_numbers2
#print(n6[1])
n7 = double_numbers3
#print(n7[1])
n8 = double_numbers3
#print(n8[1])
print((n1[0], n2[0]), (n3[0], n4[0]), (n5[1], n6[1]), (n7[1], n8[1]))
n1a = div_numbers1
#print(n1a[0])
n2a = div_numbers2
#print(n2a[0])
n3a = div_numbers3
#print(n3a[0])
n4a = div_numbers4
#print(n4a[0])
print(n1a[0], n2a[0], n3a[0], n4a[0])
text_file = open ("Output.txt", "w")
text_file.write(n1_a)
text_file.close()
wordsnumber_statistic = len(content.split())
#number of words
#print(wordsnumber_statistic)
numberoflines_statistic = len(content.splitlines())
#number of lines
print("Number of lines:")
print(numberoflines_statistic)
numberofcharacters_statistic = len(content)
#number of characters
print("Number of characters:")
print(numberofcharacters_statistic)
d ={}
for word in words:
d[word] = d.get(word, 0) + 1
#how many times a word accuers in the text, not sorted yet(next step)
#print(d)
word_freq =[]
for key, value in d.items():
word_freq.append((value, key))
#sorted the word count - converting a dictionary into a list
#print(word_freq)
lettercounter = Counter(content)
#counts the letters in the text
#print(lettercounter)</code>





Revision as of 16:27, 24 March 2018

Terminal

Firstly I looked into basic command line functions File:Commands terminal.pdf and their operations for creating a solid base for Python3.

Optical character recognition + Tesseract

Secondarily I experimented in Terminal how to translate PDF or JPG to .txt files with tesseract and imagemagick (convert).

Optical character recognition

Input 1
Output 1

Tesseract (with languages you will be using)

  • Mac brew install tesseract --all-languages

imagemagick

  • Mac brew install imagemagick

How to use it?

tesseract - png - name of the txt file

tesseracttest SZAKACS$ tesseract namefile.png text2.txt

Getting 1 page from PDF file with PDFTK burst

pdftk yourfile.pdf burst

Or use imagemagick

convert -density 300 Typewriter\ Art\ -\ Riddell\ Alan.pdf Typewriter-%03d.tiff

Chose page you want to convert

Convert PDF to bit-map using imagemagick, with some options to optimize OCR

  • convert -density 300 page.pdf -depth 8 -strip -background white -alpha off ouput.tiff
  • -density 300 resolution 300DPI. Lower resolutions will create errors :)
  • -depth 8number of bits for color. 8bit depth == grey-scale
  • -strip -background white -alpha off removes alpha channel (opacity), and makes the background white
  • output.tiffin previous versions Tesseract only accepted images as tiffs, but currently more bitmap formats are accepted

Python3

Input 2
Output 2
NLTK Analysis outcome


import nltk

from nltk import word_tokenize

from nltk import FreqDist

from nltk.tokenize import sent_tokenize

from sys import stdin,stdout

import re

import sys, string

  1. importing nltk library word_tokenize

from collections import Counter

text = open ("readertest.txt") content = text.read()

  1. importing and reading the content
  1. print(content)

words = content.split(" ")

  1. the string content needs to signifier - needs to be splitted to be able to read it, it detects if a new words begins based on the " "


splitting_statistic = sorted (set (words))

  1. the content is splitted
  1. print(splitting_statistic)


wordsamount_statistic = f'{len(words)} Amount of the words'

  1. amount of the words

print(wordsamount_statistic)


string=(content) count1=0 count2=0 for i in string:

     if(i.islower()):
           count1=count1+1
     elif(i.isupper()):
           count2=count2+1

print("The number of lowercase characters is:") print(count1) print("The number of uppercase characters is:") print(count2)

  1. counts the lowercase and uppercase letters in the text


fdist = FreqDist(content)

print("10 most common characters:") print(fdist.most_common(10))

  1. print out the 10 most common letters


fdist = FreqDist(words)

print("10 most common words:") print(fdist.most_common(10))

  1. print out the 10 most common words


  1. new_list = fdist.most_common()
  1. print(new_list)


  1. for word, _ in new_list: #_ ignores the second variable, dictionary (key, value)
   #print(' ',_)

  1. prints a list of the most common words - how to make it better in one line


def vowel_or_consonants (c): if not c.isalpha(): return 'Neither' vowels = 'aeiou'

if c.lower() in vowels: return 'Vowel'

else: return 'Consonant'

  1. for c in (content):

#print(c, vowel_or_consonants(c))


  1. print(sent_tokenize(content))
  1. splitting text into sentences


  1. for word in (words):

#print(word)

  1. control structure, each word in a seperate line


  1. fdist = FreqDist(words)
  1. print("hapaxes:")
  2. print(fdist.hapaxes())
  1. words that occur once only, the so-called hapaxes


V = set(words) long_words = [w for w in V if len(w) > 15]

print("printing the more than 15 character long words of the text") print(sorted(long_words))

  1. printing the more than 15 character long words of the text


tokenized_content = word_tokenize(content)

  1. the content is tokenized (nltk library)


statistic3 = nltk.pos_tag(tokenized_content)

  1. each word becomes a tag if is a verb, noun, adverb, pronoun, adjective)
  1. print(statistic3)


verbscounter = 0

verblist = []


for word, tag in statistic3: if tag in {'VB','VBD','VBG','VBN','VBP','VBZ'}: verbscounter = verbscounter + 1 verblist.append(word)

verb_statistic = f'{verbscounter} Verbs'

  1. shows the amount of verbs in the text

print(verb_statistic)

print(verblist)

  1. creating a list from the verb counter


  1. creating a dictionary from a list


nouncounter = 0

nounlist = []

for word, tag in statistic3: if tag in {'NNP','NNS','NN', 'NNPS'}: nouncounter = nouncounter + 1 nounlist.append(word)

nouns_statistic = f'{nouncounter} Nouns'

  1. shows the amount of nouns in the text

print(nouns_statistic)

print(nounlist)


verblist2 = verblist

nounlist2 = nounlist

verb_noun_dictionary = {}

for i in range (len(verblist2)): verb_noun_dictionary[verblist2[i]] = nounlist2 [i]

verblist_and_nounlists = zip (verblist2, nounlist2)

verb_noun_dictionary = dict(verblist_and_nounlists)

verblist_and_nounlists = dict(zip(verblist2, nounlist2))

print(verblist_and_nounlists)

print(len(verblist))

characters = [words]

  1. print(words)


from itertools import groupby

def n_letter_dictionary(string):

   result = {}
   for key, group in groupby(sorted(string.split(), key = lambda x: len(x)), lambda x: len(x)):
       result[key] = list(group)
   return result
print(n_letter_dictionary)


adverbscounter = 0

adverblist = []

for word, tag in statistic3: if tag in {'RB','RBR','RBS','WRB'}: adverbscounter = adverbscounter + 1 adverblist.append(word)


adverb_statistic = f'{adverbscounter} Adverbs'

  1. shows the amount of adverbs in the text

print(adverb_statistic) print(adverblist)


pronounscounter = 0 pronounslist = []

for word, tag in statistic3: if tag in {'PRP','PRP$'}: pronounscounter = pronounscounter + 1 pronounslist.append(word)

pronoun_statistic = f'{pronounscounter} Pronouns'

  1. shows the amount of pronouns in the text

print(pronoun_statistic)

print(pronounslist)


adjectivscounter = 0

adjectivslist = []

for word, tag in statistic3: if tag in {'JJ','JJR','JJS'}: adjectivscounter = adjectivscounter + 1 adjectivslist.append(word)

adjectiv_statistic = f'{adjectivscounter} Adjectives'

  1. shows the amount of adjectives in the text

print(adjectiv_statistic) print(adjectivslist)

coordinating_conjuction_counter = 0

for word, tag in statistic3: if tag in {'CC'}: coordinating_conjuction_counter = coordinating_conjuction_counter + 1

coordinating_conjuction_statistic = f'{coordinating_conjuction_counter} Coordinating conjuctions'

  1. shows the amount of coordinating_conjuction in the text

print(coordinating_conjuction_statistic)


cardinal_number = 0

for word, tag in statistic3: if tag in {'CC'}: cardinal_number = cardinal_number + 1

cardinal_number_statistic = f'{cardinal_number} Cardinal numbers'

  1. shows the amount of cardinal_number in the text

print(cardinal_number_statistic)


determiner_counter = 0

for word, tag in statistic3: if tag in {'D'}: determiner_counter = determiner_counter + 1

determiner_statistic = f'{determiner_counter} Determiners'

  1. shows the amount of Determiners in the text

print(determiner_statistic)


existential_there_counter = 0

for word, tag in statistic3: if tag in {'EX'}: existential_there_counter = existential_there_counter + 1

existential_there_statistic = f'{existential_there_counter} Existential there'

  1. shows the amount of Existential there in the text

print(existential_there_statistic)


foreing_words_counter = 0

for word, tag in statistic3: if tag in {'FW'}: foreing_words_counter = foreing_words_counter + 1

foreing_words_statistic = f'{foreing_words_counter} Foreing words'

  1. shows the amount of foreing words in the text

print(foreing_words_statistic)


preposition_or_subordinating_conjunctionlist = []

preposition_or_subordinating_conjunction_counter = 0

for word, tag in statistic3: if tag in {'IN'}: preposition_or_subordinating_conjunction_counter = preposition_or_subordinating_conjunction_counter + 1 preposition_or_subordinating_conjunctionlist.append(word) preposition_or_subordinating_conjunction_statistic = f'{preposition_or_subordinating_conjunction_counter} Preposition or subordinating conjunctions'

  1. shows the amount of preposition_or_subordinating_conjunction in the text

print(preposition_or_subordinating_conjunction_statistic)

print(preposition_or_subordinating_conjunctionlist)


list_item_marker_counter = 0

for word, tag in statistic3: if tag in {'LS'}: list_item_marker_counter = list_item_marker_counter + 1

list_item_marker_statistic = f'{list_item_marker_counter} List item markers'

  1. shows the amount of list item markers in the text

print(list_item_marker_statistic )


modals_counter = 0

for word, tag in statistic3: if tag in {'LS'}: modals_counter = modals_counter + 1

modals_statistic = f'{modals_counter} Modals'

  1. shows the amount of modals in the text

print(modals_statistic)


Predeterminer_counter = 0

for word, tag in statistic3: if tag in {'PDT'}: Predeterminer_counter = Predeterminer_counter + 1

Predeterminer_statistic = f'{Predeterminer_counter } Predeterminers'

  1. shows the amount of Predeterminers in the text

print(Predeterminer_statistic)


Possessive_ending_counter = 0

for word, tag in statistic3: if tag in {'PDT'}: Possessive_ending_counter = Possessive_ending_counter + 1

Possessive_ending_statistic = f'{Possessive_ending_counter} Possessive endings'

  1. shows the amount of Possessive endings in the text

print(Possessive_ending_statistic)


particle_counter = 0

for word, tag in statistic3: if tag in {'RP'}: Particle_counter = particle_counter + 1

particle_statistic = f'{particle_counter} Particles'

  1. shows the amount of Particles endings in the text

print(particle_statistic)


symbol_counter = 0

for word, tag in statistic3: if tag in {'SYM'}: symbol_counter = symbol_counter + 1

symbol_statistic = f'{symbol_counter} Symbols'

  1. shows the amount of symbols in the text

print(symbol_statistic)


to_counter = 0

for word, tag in statistic3: if tag in {'TO'}: to_counter = to_counter + 1

to_statistic = f'{to_counter} to'

  1. shows the amount of to in the text

print(to_statistic)


interjection_counter = 0

for word, tag in statistic3: if tag in {'TO'}: interjection_counter = interjection_counter + 1

interjection_statistic = f'{interjection_counter} Interjections'

  1. shows the amount of interjections in the text

print(interjection_statistic)


Wh_determiner_counter = 0

for word, tag in statistic3: if tag in {'TO'}: Wh_determiner_counter = Wh_determiner_counter + 1

Wh_determiner_statistic = f'{Wh_determiner_counter} Wh determiners'

  1. shows the amount of Wh determiners in the text

print(Wh_determiner_statistic)


Wh_pronoun_counter = 0

for word, tag in statistic3: if tag in {'TO'}: Wh_pronoun_counter = Wh_pronoun_counter + 1

Wh_pronoun_statistic = f'{Wh_pronoun_counter} Wh pronouns'

  1. shows the amount of Wh pronouns in the text

print(Wh_pronoun_statistic)


Possessive_wh_pronoun_counter = 0

for word, tag in statistic3: if tag in {'TO'}: Possessive_wh_pronoun_counter = Possessive_wh_pronoun_counter + 1

Possessive_wh_pronoun_statistic = f'{Possessive_wh_pronoun_counter} Possessive wh pronouns'

  1. shows the amount of Possessive wh pronouns in the text

print(Possessive_wh_pronoun_statistic)

dic1 =([len (i) for i in verblist]) print(dic1)

dic2=([len (i) for i in nounlist]) print(dic2)

dic3=([len (i) for i in adjectivslist]) print(dic3)

dic4=([len (i) for i in preposition_or_subordinating_conjunctionlist]) print(dic4)

  1. print([len (i) for i in verblist_and_nounlists])
  2. print([len (i) for i in words])


double_numbers1 = [] for n in dic1: double_numbers1.append(n*100) print(double_numbers1)

double_numbers2 = [] for n in dic2: double_numbers2.append(n*100) print(double_numbers2)

double_numbers3 = [] for n in dic3: double_numbers3.append(n*100) print(double_numbers3)

double_numbers4 = [] for n in dic4: double_numbers4.append(n*100) print(double_numbers4)

div_numbers1= [] for n in dic1: div_numbers1.append(n/100) print(div_numbers1)

div_numbers2= [] for n in dic2: div_numbers2.append(n/100) print(div_numbers2)

div_numbers3= [] for n in dic3: div_numbers3.append(n/100) print(div_numbers3)

div_numbers4= [] for n in dic4: div_numbers4.append(n/100) print(div_numbers4)


lst1 = [[double_numbers1], [double_numbers2], [double_numbers3], [double_numbers4]] print((zip(*lst1))[0])

lst1 = [[double_numbers1], [double_numbers2], [double_numbers3], [double_numbers4]] lst2 = [] lst2.append([x[0]for x in lst1]) print(lst2 [0])

lst1 = [[double_numbers1], [double_numbers2], [double_numbers3], [double_numbers4]] outputlist = [] for values in lst1: outputlist.append(values[-1]) print(outputlist)


n1 = double_numbers1 n1_a = (n1[0]) print(n1_a)

n2 = double_numbers2

  1. print(n2[0])

n3 = double_numbers3

  1. print(n3[0])

n4 = double_numbers4

  1. print(n4[0])

n5 = double_numbers1

  1. print(n5[1])

n6 = double_numbers2

  1. print(n6[1])

n7 = double_numbers3

  1. print(n7[1])

n8 = double_numbers3

  1. print(n8[1])

print((n1[0], n2[0]), (n3[0], n4[0]), (n5[1], n6[1]), (n7[1], n8[1]))

n1a = div_numbers1

  1. print(n1a[0])

n2a = div_numbers2

  1. print(n2a[0])

n3a = div_numbers3

  1. print(n3a[0])

n4a = div_numbers4

  1. print(n4a[0])

print(n1a[0], n2a[0], n3a[0], n4a[0])

text_file = open ("Output.txt", "w")

text_file.write(n1_a) text_file.close()



wordsnumber_statistic = len(content.split())

  1. number of words
  1. print(wordsnumber_statistic)


numberoflines_statistic = len(content.splitlines())

  1. number of lines

print("Number of lines:") print(numberoflines_statistic)


numberofcharacters_statistic = len(content)

  1. number of characters

print("Number of characters:") print(numberofcharacters_statistic)


d ={}

for word in words: d[word] = d.get(word, 0) + 1

  1. how many times a word accuers in the text, not sorted yet(next step)
  1. print(d)


word_freq =[]

for key, value in d.items(): word_freq.append((value, key))

  1. sorted the word count - converting a dictionary into a list
  1. print(word_freq)


lettercounter = Counter(content)

  1. counts the letters in the text
  1. print(lettercounter)







Natural Language Tool Kit

DrawBot

ACCP (Analogue Circular Communication Protocol