NLTK text analysis: Difference between revisions
(Replaced content with "=Natural Language Tool Kit_141020_Michael= url = "https://git.xpub.nl/XPUB/S13-Words-for-the-Future-notebooks/raw/branch/master/txt/words-for-the-future/UNDECIDABILITY.tx...") Tag: Replaced |
|||
Line 1: | Line 1: | ||
=Natural Language Tool Kit_141020_Michael= | =Natural Language Tool Kit_141020_Michael= | ||
url = "https://git.xpub.nl/XPUB/S13-Words-for-the-Future-notebooks/raw/branch/master/txt/words-for-the-future/UNDECIDABILITY.txt" | { | ||
from urllib.request import urlopen | "cells": [ | ||
r = urlopen(url) | { | ||
r | "cell_type": "code", | ||
rawtext = r.read() | "execution_count": null, | ||
text = rawtext.decode() | "metadata": {}, | ||
type(text) | "outputs": [], | ||
text = urlopen(url).read().decode() | "source": [] | ||
len(text) | }, | ||
text[-1] | { | ||
text[0] | "cell_type": "code", | ||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 1, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"url = \"https://git.xpub.nl/XPUB/S13-Words-for-the-Future-notebooks/raw/branch/master/txt/words-for-the-future/UNDECIDABILITY.txt\"" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 2, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"'https://git.xpub.nl/XPUB/S13-Words-for-the-Future-notebooks/raw/branch/master/txt/words-for-the-future/UNDECIDABILITY.txt'" | |||
] | |||
}, | |||
"execution_count": 2, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"url" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 3, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"from urllib.request import urlopen" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 4, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"<http.client.HTTPResponse at 0x7f2f424be6a0>" | |||
] | |||
}, | |||
"execution_count": 4, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"r = urlopen(url)\n", | |||
"r" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 5, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"rawtext = r.read()" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 6, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"text = rawtext.decode()" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 7, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"str" | |||
] | |||
}, | |||
"execution_count": 7, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"type(text)" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 8, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"text = urlopen(url).read().decode()" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 9, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"15990" | |||
] | |||
}, | |||
"execution_count": 9, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"len(text)" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 10, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"'\\r'" | |||
] | |||
}, | |||
"execution_count": 10, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"text[-1]" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 11, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"'U'" | |||
] | |||
}, | |||
"execution_count": 11, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"text[0]" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 33, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"words = text.split()" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 13, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"2548" | |||
] | |||
}, | |||
"execution_count": 13, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"len(words)" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 14, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"'Undecidability'" | |||
] | |||
}, | |||
"execution_count": 14, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"words[0]" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 15, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"'Multiplying'" | |||
] | |||
}, | |||
"execution_count": 15, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"words[3]" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 16, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"'158-172.'" | |||
] | |||
}, | |||
"execution_count": 16, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"words[-1]" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 17, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"from nltk import word_tokenize, Text" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 18, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"tokens = word_tokenize(text)" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 19, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"3047" | |||
] | |||
}, | |||
"execution_count": 19, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"len(tokens)" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 20, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"'.'" | |||
] | |||
}, | |||
"execution_count": 20, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"tokens[-1]" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 21, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"['Undecidability',\n", | |||
" 'Silvia',\n", | |||
" 'Bottiroli',\n", | |||
" 'Multiplying',\n", | |||
" 'the',\n", | |||
" 'Visible',\n", | |||
" 'The',\n", | |||
" 'word',\n", | |||
" '[',\n", | |||
" 'i']" | |||
] | |||
}, | |||
"execution_count": 21, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"tokens[:10]" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 22, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"['Memos', 'for', 'the', 'Next', 'Millennium', '[', 'i', ']', 'written']" | |||
] | |||
}, | |||
"execution_count": 22, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"tokens[21:30] # not including 30th word" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 23, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"strengers = Text(tokens)" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 38, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"<Text: Silvia Bottiroli Multiplying the Visible The word [ i>" | |||
] | |||
}, | |||
"execution_count": 38, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"strengers" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 40, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Displaying 11 of 11 matches:\n", | |||
" ] attempts to escape the vortex of multiplicity are useless. ” [ 6 ] In his fifth m\n", | |||
" , he subsequently focuses on [ i ] multiplicity [ i ] as a way for literature to co\n", | |||
"fore , let ’ s think visibility and multiplicity together , as : a multiplication of\n", | |||
"n the contrary , it is generating a multiplicity of different gazes that are all leg\n", | |||
"ed and thus incomplete and open . A Multiplicity of Gazes An undecidable artwork is \n", | |||
"ics today , is that they generate a multiplicity of gazes and of forms of spectators\n", | |||
" positions and points of view . The multiplicity of gazes produced and gathered by u\n", | |||
"tes a radical collectivity based on multiplicity and on conflicting positions that a\n", | |||
"ility and from its encounter with a multiplicity of gazes . Preserving it is possibl\n", | |||
"encounter between undecidable art , multiplicity of gazes , and a curatorial dimensi\n", | |||
" ibid , p. 98 . 7 . Italo Calvino , Multiplicity , [ i ] Six Memos for the Next Mill\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"strengers.concordance(\"multiplicity\", width = 84, lines = 72)" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 26, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"idability Silvia Bottiroli Multiplying the Visible The word [ i ] undecidable [ i\n", | |||
"lvia Bottiroli Multiplying the Visible The word [ i ] undecidable [ i ] appears i\n", | |||
"e [ i ] appears in [ i ] Six Memos for the Next Millennium [ i ] written by Italo\n", | |||
"ry lectures at Harvard University . In the last months of his life Calvino worked\n", | |||
"rishly on these lectures , but died in the process . In the five memos he left be\n", | |||
"ectures , but died in the process . In the five memos he left behind , he did not\n", | |||
"i ] Visibility [ i ] , revolves around the capacity of literature to generate ima\n", | |||
"flow continuously . Calvino focuses on the imagination as “ the repertory of what\n", | |||
"alvino focuses on the imagination as “ the repertory of what is potential ; what \n", | |||
" exist but might have existed. ” [ 2 ] The main concern that he brings forth lies\n", | |||
"ncern that he brings forth lies within the relation between contemporary culture \n", | |||
"contemporary culture and imagination : the risk to definitely lose , in the overp\n", | |||
"ion : the risk to definitely lose , in the overproduction of images , the power o\n", | |||
"se , in the overproduction of images , the power of bringing visions into focus w\n", | |||
"g [ i ] in terms of images. ” [ 3 ] In the last pages of the lecture , he propose\n", | |||
"f images. ” [ 3 ] In the last pages of the lecture , he proposes a shift from und\n", | |||
"he proposes a shift from understanding the fantastic world of the artist , not as\n", | |||
"m understanding the fantastic world of the artist , not as indefinable , but as [\n", | |||
"th this word , Calvino means to define the coexistence and the relation , within \n", | |||
"no means to define the coexistence and the relation , within any literary work , \n", | |||
", between three different dimensions . The first dimension is the artist ’ s imag\n", | |||
"nt dimensions . The first dimension is the artist ’ s imagination – a world of po\n", | |||
"at no work will succeed in realizing . The second is the reality as we experience\n", | |||
"l succeed in realizing . The second is the reality as we experience it by living \n", | |||
"we experience it by living . Finally , the third is the world of the actual work \n", | |||
" it by living . Finally , the third is the world of the actual work , made by the\n", | |||
" . Finally , the third is the world of the actual work , made by the layers of si\n", | |||
"the world of the actual work , made by the layers of signs that accumulate in it \n", | |||
"ns that accumulate in it ; compared to the first two worlds , it is “ also infini\n", | |||
"ctory to formulation. ” [ 4 ] He calls the link between these three worlds “ the \n", | |||
" the link between these three worlds “ the undecidable , the paradox of an infini\n", | |||
"these three worlds “ the undecidable , the paradox of an infinite whole that cont\n", | |||
"ino , artistic operations involve , by the means of the infinity of linguistic po\n", | |||
"c operations involve , by the means of the infinity of linguistic possibilities ,\n", | |||
"infinity of linguistic possibilities , the infinity of the artist ’ s imagination\n", | |||
"uistic possibilities , the infinity of the artist ’ s imagination , and the infin\n", | |||
"ty of the artist ’ s imagination , and the infinity of contingencies . Therefore \n", | |||
"ity of contingencies . Therefore , “ [ the ] attempts to escape the vortex of mul\n", | |||
"erefore , “ [ the ] attempts to escape the vortex of multiplicity are useless. ” \n", | |||
" as a way for literature to comprehend the complex nature of the world that for t\n", | |||
"re to comprehend the complex nature of the world that for the author is a whole o\n", | |||
"e complex nature of the world that for the author is a whole of wholes , where th\n", | |||
"he author is a whole of wholes , where the acts of watching and knowing also inte\n", | |||
"watching and knowing also intervene in the observed reality and alter it . Calvin\n", | |||
"are readable as different narratives . The lecture revolves around some novels th\n", | |||
"ain multiple worlds and make space for the readers ’ imaginations . The common so\n", | |||
"space for the readers ’ imaginations . The common source to all these experiments\n", | |||
"all these experiments seems to rely in the understanding of the contemporary nove\n", | |||
" seems to rely in the understanding of the contemporary novel “ as an encyclopedi\n", | |||
" , as a network of connections between the events , the people , and the things o\n", | |||
"rk of connections between the events , the people , and the things of the world. \n", | |||
" between the events , the people , and the things of the world. ” [ 7 ] Therefore\n", | |||
"vents , the people , and the things of the world. ” [ 7 ] Therefore , let ’ s thi\n", | |||
"ic production and define a context for the undecidable , or rather for undecidabi\n", | |||
"le , or rather for undecidability , as the quality of being undecidable . Calvino\n", | |||
"tion modes and doesn ’ t fade out from the scene of the ‘ real ’ world . We might\n", | |||
"d doesn ’ t fade out from the scene of the ‘ real ’ world . We might stretch this\n", | |||
" s potentiality is that of multiplying the visible as an actual counterstrategy t\n", | |||
"isible as an actual counterstrategy to the proliferation of images that surrounds\n", | |||
"ly articulates , redefines , or alters the complex system of links , bounds , and\n", | |||
"specific to some artworks within which the three worlds that Calvino describes me\n", | |||
"tains and under certain terms performs the possibility of its actualisation , a w\n", | |||
"into one actual form . In particular , the potentiality generated by undecidable \n", | |||
"c of ‘ and… and… and… ’ as opposite to the logic of ‘ either… or… ’ that seems to\n", | |||
"ature and just exist as such . None of the images of an artwork are being more or\n", | |||
"twork are being more or less real than the others , no matter whether they come a\n", | |||
"vidual or collective fantasies . It is the art ( work ) as such that creates a gr\n", | |||
"s such that creates a ground where all the images that come into visibility share\n", | |||
"images that come into visibility share the same gradient of reality , no matter w\n", | |||
"itors or spectators to enter into – if the invitation of art is often that of los\n", | |||
"itation of art is often that of losing the contact with known worlds in order to \n", | |||
"Here , spectators are invited to enter the work ’ s fictional world carrying with\n", | |||
"ctional world carrying with themselves the so-called real world and all their oth\n", | |||
"ll these worlds are equally welcomed . The artwork may then be navigated either b\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"for line in strengers.concordance_list(\"the\", width=82, lines=74):\n", | |||
" print (line.left_print, line.query, line.right_print)" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 27, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Undecidability\n", | |||
"University\n", | |||
"visibility\n", | |||
"Visibility\n", | |||
"capacity\n", | |||
"reality\n", | |||
"infinity\n", | |||
"infinity\n", | |||
"infinity\n", | |||
"multiplicity\n", | |||
"multiplicity\n", | |||
"reality\n", | |||
"visibility\n", | |||
"multiplicity\n", | |||
"undecidability\n", | |||
"quality\n", | |||
"potentiality\n", | |||
"visibility\n", | |||
"undecidability\n", | |||
"undecidability\n", | |||
"quality\n", | |||
"possibility\n", | |||
"potentiality\n", | |||
"potentiality\n", | |||
"reality\n", | |||
"reality\n", | |||
"visibility\n", | |||
"reality\n", | |||
"undecidability\n", | |||
"reality\n", | |||
"contemporaneity\n", | |||
"possibility\n", | |||
"possibility\n", | |||
"possibility\n", | |||
"undecidability\n", | |||
"community\n", | |||
"possibility\n", | |||
"multiplicity\n", | |||
"Multiplicity\n", | |||
"multiplicity\n", | |||
"multiplicity\n", | |||
"community\n", | |||
"collectivity\n", | |||
"multiplicity\n", | |||
"reality\n", | |||
"responsibility\n", | |||
"undecidability\n", | |||
"potentiality\n", | |||
"undecidability\n", | |||
"collectivity\n", | |||
"visibility\n", | |||
"Undecidability\n", | |||
"possibility\n", | |||
"potentiality\n", | |||
"quality\n", | |||
"undecidability\n", | |||
"multiplicity\n", | |||
"intensity\n", | |||
"multiplicity\n", | |||
"Visibility\n", | |||
"University\n", | |||
"Multiplicity\n", | |||
"University\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"for w in strengers:\n", | |||
" if w.endswith(\"ity\"):\n", | |||
" print (w) # but then this will show overlapping, looping.." | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 41, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"# and now collected in a list, and squashing case, and using a \"set\" to remove dupliates.\n", | |||
"\n", | |||
"ity = []\n", | |||
"for w in strengers :\n", | |||
" if w.endswith(\"ity\"):\n", | |||
" #print(w)\n", | |||
" ity.append(w.lower())\n", | |||
" #strengers.concordance()\n", | |||
"ity = set(ity) \n", | |||
"\n" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 43, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"with open(\"nami_undecidibility_Michael_NLTK_141020.text\", \"w\") as output:\n", | |||
"\n", | |||
" s = 0\n", | |||
"\n", | |||
" for word in ity:\n", | |||
" #strengers.concordance(word, width = 84)\n", | |||
" for line in strengers.concordance_list(word, width=82, lines=74):\n", | |||
" t = line.left_print + \" \" * (2 + int(s)) + line.query + \" \" * (2 + int(s)) + line.right_print \n", | |||
" #print(s)\n", | |||
" print (t[:82], file = output)#0-82 limited\n", | |||
" s = s + 0.3\n", | |||
" \n", | |||
" \n", | |||
" \n", | |||
" " | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"for w in strengers:\n", | |||
" if w.endswith(\"le\"):\n", | |||
" print (w) # but then this will show overlapping, looping.." | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"# and now collected in a list, and squashing case, and using a set to remove dupliates\n", | |||
"\n", | |||
"le = []\n", | |||
"for w in strengers :\n", | |||
" if w.endswith(\"le\"):\n", | |||
" #print(w)\n", | |||
" le.append(w.lower())\n", | |||
" #strengers.concordance()\n", | |||
"le = set(le) \n", | |||
"\n" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"for word in le:\n", | |||
" strengers.concordance(word, width = 84)" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 45, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Displaying 11 of 11 matches:\n", | |||
"cape the vortex of multiplicity are useless. ” [ 6\n", | |||
"y focuses on [ i ] multiplicity [ i ] as a way for\n", | |||
"ink visibility and multiplicity together , as : a \n", | |||
"it is generating a multiplicity of different gazes\n", | |||
"plete and open . A Multiplicity of Gazes An undeci\n", | |||
"at they generate a multiplicity of gazes and of fo\n", | |||
"ints of view . The multiplicity of gazes produced \n", | |||
"lectivity based on multiplicity and on conflicting\n", | |||
"s encounter with a multiplicity of gazes . Preserv\n", | |||
" undecidable art , multiplicity of gazes , and a c\n", | |||
" . Italo Calvino , Multiplicity , [ i ] Six Memos \n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"strengers.concordance(\"multiplicity\", width = 50)" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 44, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"kind world logic space practice undecidable visibility capacity images\n", | |||
"and repertory overproduction power thinking understanding means\n", | |||
"coexistence layers paradox whole\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"strengers.similar(\"multiplicity\")" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"strengers.common_contexts([\"undecidability\", \"multiplicity\"])" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"strengers.dispersion_plot([\"the\", \"multiplicity\", \"performance\"])" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"from IPython.core.pylabtools import figsize" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"fizsize(20.0, 20.0) #make the graph more longe" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"from nltk.probability import FreqDist" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"freq = FreqDist(tokens) # frequency distribution" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"freq.keys()" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"freq[\"the\"]" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"freq.plot()" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"freq.plot(50, cumulative = True)" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"freq.plot(30)" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [] | |||
} | |||
], | |||
"metadata": { | |||
"kernelspec": { | |||
"display_name": "Python 3", | |||
"language": "python", | |||
"name": "python3" | |||
}, | |||
"language_info": { | |||
"codemirror_mode": { | |||
"name": "ipython", | |||
"version": 3 | |||
}, | |||
"file_extension": ".py", | |||
"mimetype": "text/x-python", | |||
"name": "python", | |||
"nbconvert_exporter": "python", | |||
"pygments_lexer": "ipython3", | |||
"version": "3.7.3" | |||
} | |||
}, | |||
"nbformat": 4, | |||
"nbformat_minor": 4 | |||
} |
Revision as of 20:16, 21 October 2020
Natural Language Tool Kit_141020_Michael
{
"cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "url = \"https://git.xpub.nl/XPUB/S13-Words-for-the-Future-notebooks/raw/branch/master/txt/words-for-the-future/UNDECIDABILITY.txt\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'https://git.xpub.nl/XPUB/S13-Words-for-the-Future-notebooks/raw/branch/master/txt/words-for-the-future/UNDECIDABILITY.txt'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "url" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from urllib.request import urlopen" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<http.client.HTTPResponse at 0x7f2f424be6a0>" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "r = urlopen(url)\n", "r" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "rawtext = r.read()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "text = rawtext.decode()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "str" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(text)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "text = urlopen(url).read().decode()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "15990" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(text)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'\\r'" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "text[-1]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'U'" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "text[0]" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "words = text.split()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2548" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(words)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Undecidability'" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "words[0]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Multiplying'" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "words[3]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'158-172.'" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "words[-1]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "from nltk import word_tokenize, Text" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "tokens = word_tokenize(text)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3047" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(tokens)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'.'" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokens[-1]" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Undecidability',\n", " 'Silvia',\n", " 'Bottiroli',\n", " 'Multiplying',\n", " 'the',\n", " 'Visible',\n", " 'The',\n", " 'word',\n", " '[',\n", " 'i']" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokens[:10]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Memos', 'for', 'the', 'Next', 'Millennium', '[', 'i', ']', 'written']" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokens[21:30] # not including 30th word" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "strengers = Text(tokens)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<Text: Silvia Bottiroli Multiplying the Visible The word [ i>" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "strengers" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Displaying 11 of 11 matches:\n", " ] attempts to escape the vortex of multiplicity are useless. ” [ 6 ] In his fifth m\n", " , he subsequently focuses on [ i ] multiplicity [ i ] as a way for literature to co\n", "fore , let ’ s think visibility and multiplicity together , as : a multiplication of\n", "n the contrary , it is generating a multiplicity of different gazes that are all leg\n", "ed and thus incomplete and open . A Multiplicity of Gazes An undecidable artwork is \n", "ics today , is that they generate a multiplicity of gazes and of forms of spectators\n", " positions and points of view . The multiplicity of gazes produced and gathered by u\n", "tes a radical collectivity based on multiplicity and on conflicting positions that a\n", "ility and from its encounter with a multiplicity of gazes . Preserving it is possibl\n", "encounter between undecidable art , multiplicity of gazes , and a curatorial dimensi\n", " ibid , p. 98 . 7 . Italo Calvino , Multiplicity , [ i ] Six Memos for the Next Mill\n" ] } ], "source": [ "strengers.concordance(\"multiplicity\", width = 84, lines = 72)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "idability Silvia Bottiroli Multiplying the Visible The word [ i ] undecidable [ i\n", "lvia Bottiroli Multiplying the Visible The word [ i ] undecidable [ i ] appears i\n", "e [ i ] appears in [ i ] Six Memos for the Next Millennium [ i ] written by Italo\n", "ry lectures at Harvard University . In the last months of his life Calvino worked\n", "rishly on these lectures , but died in the process . In the five memos he left be\n", "ectures , but died in the process . In the five memos he left behind , he did not\n", "i ] Visibility [ i ] , revolves around the capacity of literature to generate ima\n", "flow continuously . Calvino focuses on the imagination as “ the repertory of what\n", "alvino focuses on the imagination as “ the repertory of what is potential ; what \n", " exist but might have existed. ” [ 2 ] The main concern that he brings forth lies\n", "ncern that he brings forth lies within the relation between contemporary culture \n", "contemporary culture and imagination : the risk to definitely lose , in the overp\n", "ion : the risk to definitely lose , in the overproduction of images , the power o\n", "se , in the overproduction of images , the power of bringing visions into focus w\n", "g [ i ] in terms of images. ” [ 3 ] In the last pages of the lecture , he propose\n", "f images. ” [ 3 ] In the last pages of the lecture , he proposes a shift from und\n", "he proposes a shift from understanding the fantastic world of the artist , not as\n", "m understanding the fantastic world of the artist , not as indefinable , but as [\n", "th this word , Calvino means to define the coexistence and the relation , within \n", "no means to define the coexistence and the relation , within any literary work , \n", ", between three different dimensions . The first dimension is the artist ’ s imag\n", "nt dimensions . The first dimension is the artist ’ s imagination – a world of po\n", "at no work will succeed in realizing . The second is the reality as we experience\n", "l succeed in realizing . The second is the reality as we experience it by living \n", "we experience it by living . Finally , the third is the world of the actual work \n", " it by living . Finally , the third is the world of the actual work , made by the\n", " . Finally , the third is the world of the actual work , made by the layers of si\n", "the world of the actual work , made by the layers of signs that accumulate in it \n", "ns that accumulate in it ; compared to the first two worlds , it is “ also infini\n", "ctory to formulation. ” [ 4 ] He calls the link between these three worlds “ the \n", " the link between these three worlds “ the undecidable , the paradox of an infini\n", "these three worlds “ the undecidable , the paradox of an infinite whole that cont\n", "ino , artistic operations involve , by the means of the infinity of linguistic po\n", "c operations involve , by the means of the infinity of linguistic possibilities ,\n", "infinity of linguistic possibilities , the infinity of the artist ’ s imagination\n", "uistic possibilities , the infinity of the artist ’ s imagination , and the infin\n", "ty of the artist ’ s imagination , and the infinity of contingencies . Therefore \n", "ity of contingencies . Therefore , “ [ the ] attempts to escape the vortex of mul\n", "erefore , “ [ the ] attempts to escape the vortex of multiplicity are useless. ” \n", " as a way for literature to comprehend the complex nature of the world that for t\n", "re to comprehend the complex nature of the world that for the author is a whole o\n", "e complex nature of the world that for the author is a whole of wholes , where th\n", "he author is a whole of wholes , where the acts of watching and knowing also inte\n", "watching and knowing also intervene in the observed reality and alter it . Calvin\n", "are readable as different narratives . The lecture revolves around some novels th\n", "ain multiple worlds and make space for the readers ’ imaginations . The common so\n", "space for the readers ’ imaginations . The common source to all these experiments\n", "all these experiments seems to rely in the understanding of the contemporary nove\n", " seems to rely in the understanding of the contemporary novel “ as an encyclopedi\n", " , as a network of connections between the events , the people , and the things o\n", "rk of connections between the events , the people , and the things of the world. \n", " between the events , the people , and the things of the world. ” [ 7 ] Therefore\n", "vents , the people , and the things of the world. ” [ 7 ] Therefore , let ’ s thi\n", "ic production and define a context for the undecidable , or rather for undecidabi\n", "le , or rather for undecidability , as the quality of being undecidable . Calvino\n", "tion modes and doesn ’ t fade out from the scene of the ‘ real ’ world . We might\n", "d doesn ’ t fade out from the scene of the ‘ real ’ world . We might stretch this\n", " s potentiality is that of multiplying the visible as an actual counterstrategy t\n", "isible as an actual counterstrategy to the proliferation of images that surrounds\n", "ly articulates , redefines , or alters the complex system of links , bounds , and\n", "specific to some artworks within which the three worlds that Calvino describes me\n", "tains and under certain terms performs the possibility of its actualisation , a w\n", "into one actual form . In particular , the potentiality generated by undecidable \n", "c of ‘ and… and… and… ’ as opposite to the logic of ‘ either… or… ’ that seems to\n", "ature and just exist as such . None of the images of an artwork are being more or\n", "twork are being more or less real than the others , no matter whether they come a\n", "vidual or collective fantasies . It is the art ( work ) as such that creates a gr\n", "s such that creates a ground where all the images that come into visibility share\n", "images that come into visibility share the same gradient of reality , no matter w\n", "itors or spectators to enter into – if the invitation of art is often that of los\n", "itation of art is often that of losing the contact with known worlds in order to \n", "Here , spectators are invited to enter the work ’ s fictional world carrying with\n", "ctional world carrying with themselves the so-called real world and all their oth\n", "ll these worlds are equally welcomed . The artwork may then be navigated either b\n" ] } ], "source": [ "for line in strengers.concordance_list(\"the\", width=82, lines=74):\n", " print (line.left_print, line.query, line.right_print)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Undecidability\n", "University\n", "visibility\n", "Visibility\n", "capacity\n", "reality\n", "infinity\n", "infinity\n", "infinity\n", "multiplicity\n", "multiplicity\n", "reality\n", "visibility\n", "multiplicity\n", "undecidability\n", "quality\n", "potentiality\n", "visibility\n", "undecidability\n", "undecidability\n", "quality\n", "possibility\n", "potentiality\n", "potentiality\n", "reality\n", "reality\n", "visibility\n", "reality\n", "undecidability\n", "reality\n", "contemporaneity\n", "possibility\n", "possibility\n", "possibility\n", "undecidability\n", "community\n", "possibility\n", "multiplicity\n", "Multiplicity\n", "multiplicity\n", "multiplicity\n", "community\n", "collectivity\n", "multiplicity\n", "reality\n", "responsibility\n", "undecidability\n", "potentiality\n", "undecidability\n", "collectivity\n", "visibility\n", "Undecidability\n", "possibility\n", "potentiality\n", "quality\n", "undecidability\n", "multiplicity\n", "intensity\n", "multiplicity\n", "Visibility\n", "University\n", "Multiplicity\n", "University\n" ] } ], "source": [ "for w in strengers:\n", " if w.endswith(\"ity\"):\n", " print (w) # but then this will show overlapping, looping.." ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "# and now collected in a list, and squashing case, and using a \"set\" to remove dupliates.\n", "\n", "ity = []\n", "for w in strengers :\n", " if w.endswith(\"ity\"):\n", " #print(w)\n", " ity.append(w.lower())\n", " #strengers.concordance()\n", "ity = set(ity) \n", "\n" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "with open(\"nami_undecidibility_Michael_NLTK_141020.text\", \"w\") as output:\n", "\n", " s = 0\n", "\n", " for word in ity:\n", " #strengers.concordance(word, width = 84)\n", " for line in strengers.concordance_list(word, width=82, lines=74):\n", " t = line.left_print + \" \" * (2 + int(s)) + line.query + \" \" * (2 + int(s)) + line.right_print \n", " #print(s)\n", " print (t[:82], file = output)#0-82 limited\n", " s = s + 0.3\n", " \n", " \n", " \n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for w in strengers:\n", " if w.endswith(\"le\"):\n", " print (w) # but then this will show overlapping, looping.." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# and now collected in a list, and squashing case, and using a set to remove dupliates\n", "\n", "le = []\n", "for w in strengers :\n", " if w.endswith(\"le\"):\n", " #print(w)\n", " le.append(w.lower())\n", " #strengers.concordance()\n", "le = set(le) \n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for word in le:\n", " strengers.concordance(word, width = 84)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Displaying 11 of 11 matches:\n", "cape the vortex of multiplicity are useless. ” [ 6\n", "y focuses on [ i ] multiplicity [ i ] as a way for\n", "ink visibility and multiplicity together , as : a \n", "it is generating a multiplicity of different gazes\n", "plete and open . A Multiplicity of Gazes An undeci\n", "at they generate a multiplicity of gazes and of fo\n", "ints of view . The multiplicity of gazes produced \n", "lectivity based on multiplicity and on conflicting\n", "s encounter with a multiplicity of gazes . Preserv\n", " undecidable art , multiplicity of gazes , and a c\n", " . Italo Calvino , Multiplicity , [ i ] Six Memos \n" ] } ], "source": [ "strengers.concordance(\"multiplicity\", width = 50)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "kind world logic space practice undecidable visibility capacity images\n", "and repertory overproduction power thinking understanding means\n", "coexistence layers paradox whole\n" ] } ], "source": [ "strengers.similar(\"multiplicity\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "strengers.common_contexts([\"undecidability\", \"multiplicity\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "strengers.dispersion_plot([\"the\", \"multiplicity\", \"performance\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.core.pylabtools import figsize" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fizsize(20.0, 20.0) #make the graph more longe" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from nltk.probability import FreqDist" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "freq = FreqDist(tokens) # frequency distribution" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "freq.keys()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "freq[\"the\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "freq.plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "freq.plot(50, cumulative = True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "freq.plot(30)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4
}