Whoosh: Difference between revisions
(Created page with "{{youtube|gRvZbYtwTeo}}") |
|||
(8 intermediate revisions by 2 users not shown) | |||
Line 1: | Line 1: | ||
{{youtube|gRvZbYtwTeo}} | Whoosh is a text indexing software (the core indexing part of a search engine), written in 100% Python (so it's easy to install and use on any platform where python is available). | ||
* [http://bitbucket.org/mchaput/whoosh/wiki/Home Project page] | |||
{{youtube|gRvZbYtwTeo}} <br> | |||
<source lang="python"> | |||
#make index for your search | |||
import os.path | |||
from whoosh.index import create_in | |||
from whoosh.fields import Schema, TEXT | |||
from whoosh.index import open_dir | |||
from whoosh.fields import Schema, STORED, ID, KEYWORD, TEXT | |||
schema = Schema(title=TEXT(stored=True), content=TEXT(stored=True)) | |||
if not os.path.exists("index"): | |||
os.mkdir("index") | |||
ix = create_in("index", schema) #only do this once | |||
ix = open_dir("index") | |||
writer = ix.writer() | |||
f = open("vulgar.txt").read().decode("utf-8") | |||
writer.add_document(title=u"Vulgar", content=f) | |||
f = open("liz.txt").read().decode("utf-8") | |||
writer.add_document(title=u"liz", content=f) | |||
writer.commit() | |||
</source> | |||
<source lang="python"> | |||
#search your index | |||
import os.path | |||
from whoosh.index import open_dir | |||
if not os.path.exists("index"): | |||
os.mkdir("index") | |||
ix = open_dir("index") | |||
searcher = ix.searcher() | |||
from whoosh.qparser import QueryParser | |||
parser = QueryParser("content", ix.schema) | |||
q = parser.parse(u"love monkey") | |||
r = searcher.search(q) | |||
print "searching ", ix.doc_count() | |||
for hit in r: | |||
print (hit["title"], hit.highlights("content")) #sq brackets dict/lists | |||
</source> | |||
== SEARCH CGI == | |||
<source lang ="python">#!/usr/bin/env python | |||
print "Content-type:text/html;charset=utf-8" | |||
print | |||
import os.path | |||
from whoosh.index import open_dir | |||
import cgi # for the FieldStorage, to read variables. | |||
from xml.sax.saxutils import quoteattr #formating it for a form | |||
inputs = cgi.FieldStorage() | |||
thesearch = inputs.getvalue("q", "love monkey").decode("utf-8") | |||
if not os.path.exists("index"): | |||
os.mkdir("index") | |||
ix = open_dir("index") | |||
searcher = ix.searcher() | |||
from whoosh.qparser import QueryParser | |||
parser = QueryParser("content", ix.schema) | |||
q = parser.parse(thesearch) | |||
r = searcher.search(q) | |||
print "<form>" | |||
print '<input type="text" name="q" value='+quoteattr(thesearch)+' />' | |||
print "</form>" | |||
# print "searching ", ix.doc_count() | |||
for hit in r: | |||
print "<div>" | |||
print "<h3>", hit["title"], "</h3>" #sq brackets dict/lists | |||
print hit.highlights("content") | |||
print "</div>" | |||
</source> |
Latest revision as of 12:18, 2 May 2017
Whoosh is a text indexing software (the core indexing part of a search engine), written in 100% Python (so it's easy to install and use on any platform where python is available).
#make index for your search
import os.path
from whoosh.index import create_in
from whoosh.fields import Schema, TEXT
from whoosh.index import open_dir
from whoosh.fields import Schema, STORED, ID, KEYWORD, TEXT
schema = Schema(title=TEXT(stored=True), content=TEXT(stored=True))
if not os.path.exists("index"):
os.mkdir("index")
ix = create_in("index", schema) #only do this once
ix = open_dir("index")
writer = ix.writer()
f = open("vulgar.txt").read().decode("utf-8")
writer.add_document(title=u"Vulgar", content=f)
f = open("liz.txt").read().decode("utf-8")
writer.add_document(title=u"liz", content=f)
writer.commit()
#search your index
import os.path
from whoosh.index import open_dir
if not os.path.exists("index"):
os.mkdir("index")
ix = open_dir("index")
searcher = ix.searcher()
from whoosh.qparser import QueryParser
parser = QueryParser("content", ix.schema)
q = parser.parse(u"love monkey")
r = searcher.search(q)
print "searching ", ix.doc_count()
for hit in r:
print (hit["title"], hit.highlights("content")) #sq brackets dict/lists
SEARCH CGI
#!/usr/bin/env python
print "Content-type:text/html;charset=utf-8"
print
import os.path
from whoosh.index import open_dir
import cgi # for the FieldStorage, to read variables.
from xml.sax.saxutils import quoteattr #formating it for a form
inputs = cgi.FieldStorage()
thesearch = inputs.getvalue("q", "love monkey").decode("utf-8")
if not os.path.exists("index"):
os.mkdir("index")
ix = open_dir("index")
searcher = ix.searcher()
from whoosh.qparser import QueryParser
parser = QueryParser("content", ix.schema)
q = parser.parse(thesearch)
r = searcher.search(q)
print "<form>"
print '<input type="text" name="q" value='+quoteattr(thesearch)+' />'
print "</form>"
# print "searching ", ix.doc_count()
for hit in r:
print "<div>"
print "<h3>", hit["title"], "</h3>" #sq brackets dict/lists
print hit.highlights("content")
print "</div>"