Whoosh: Difference between revisions
No edit summary |
No edit summary |
||
Line 3: | Line 3: | ||
* [http://bitbucket.org/mchaput/whoosh/wiki/Home Project page] | * [http://bitbucket.org/mchaput/whoosh/wiki/Home Project page] | ||
{{youtube|gRvZbYtwTeo}} | {{youtube|gRvZbYtwTeo}} <br> | ||
indexwhoosh.py #make index for your search | indexwhoosh.py #make index for your search | ||
Revision as of 16:34, 11 March 2014
Whoosh is a text indexing software (the core indexing part of a search engine), written in 100% Python (so it's easy to install and use on any platform where python is available).
indexwhoosh.py #make index for your search
import os.path
from whoosh.index import create_in
from whoosh.fields import Schema, TEXT
from whoosh.index import open_dir
from whoosh.fields import Schema, STORED, ID, KEYWORD, TEXT
schema = Schema(title=TEXT(stored=True), content=TEXT(stored=True))
if not os.path.exists("index"):
os.mkdir("index")
ix = create_in("index", schema) #only do this once
ix = open_dir("index")
writer = ix.writer()
f = open("vulgar.txt").read().decode("utf-8")
writer.add_document(title=u"Vulgar", content=f)
f = open("liz.txt").read().decode("utf-8")
writer.add_document(title=u"liz", content=f)
writer.commit()
#search your index
import os.path
from whoosh.index import open_dir
if not os.path.exists("index"):
os.mkdir("index")
ix = open_dir("index")
searcher = ix.searcher()
from whoosh.qparser import QueryParser
parser = QueryParser("content", ix.schema)
q = parser.parse(u"love monkey")
r = searcher.search(q)
print "searching ", ix.doc_count()
for hit in r:
print (hit["title"], hit.highlights("content")) #sq brackets dict/lists