Whoosh: Difference between revisions

From XPUB & Lens-Based wiki
(Created page with "{{youtube|gRvZbYtwTeo}}")
 
 
(8 intermediate revisions by 2 users not shown)
Line 1: Line 1:
{{youtube|gRvZbYtwTeo}}
Whoosh is a text indexing software (the core indexing part of a search engine), written in 100% Python (so it's easy to install and use on any platform where python is available).
 
* [http://bitbucket.org/mchaput/whoosh/wiki/Home Project page]
 
{{youtube|gRvZbYtwTeo}} <br>
 
<source lang="python">
#make index for your search
import os.path
from whoosh.index import create_in
from whoosh.fields import Schema, TEXT
from whoosh.index import open_dir
 
from whoosh.fields import Schema, STORED, ID, KEYWORD, TEXT
schema = Schema(title=TEXT(stored=True), content=TEXT(stored=True))
 
if not os.path.exists("index"):
    os.mkdir("index")
ix = create_in("index", schema) #only do this once
 
ix = open_dir("index")
 
writer =  ix.writer()
f = open("vulgar.txt").read().decode("utf-8")
writer.add_document(title=u"Vulgar", content=f)
f = open("liz.txt").read().decode("utf-8")
writer.add_document(title=u"liz", content=f)
writer.commit()
</source>
 
<source lang="python">
#search your index
import os.path
from whoosh.index import open_dir
 
if not os.path.exists("index"):
    os.mkdir("index")
ix = open_dir("index")
 
searcher = ix.searcher()
 
from whoosh.qparser import QueryParser
parser = QueryParser("content", ix.schema)
q = parser.parse(u"love monkey")
 
r = searcher.search(q)
 
print "searching ", ix.doc_count()
for hit in r:
    print (hit["title"], hit.highlights("content")) #sq brackets dict/lists
</source>
 
== SEARCH CGI ==
 
<source lang ="python">#!/usr/bin/env python
 
print "Content-type:text/html;charset=utf-8"
print
 
import os.path
from whoosh.index import open_dir
import cgi # for the FieldStorage, to read variables.
from xml.sax.saxutils import quoteattr #formating it for a form
 
inputs = cgi.FieldStorage()
thesearch = inputs.getvalue("q", "love monkey").decode("utf-8")
 
if not os.path.exists("index"):
    os.mkdir("index")
ix = open_dir("index")
 
searcher = ix.searcher()
 
from whoosh.qparser import QueryParser
parser = QueryParser("content", ix.schema)
q = parser.parse(thesearch)
 
r = searcher.search(q)
 
print "<form>"
print '<input type="text" name="q" value='+quoteattr(thesearch)+' />'
print "</form>"
 
# print "searching ", ix.doc_count()
for hit in r:
    print "<div>"
    print "<h3>", hit["title"], "</h3>" #sq brackets dict/lists
    print hit.highlights("content")
    print "</div>"
</source>

Latest revision as of 12:18, 2 May 2017

Whoosh is a text indexing software (the core indexing part of a search engine), written in 100% Python (so it's easy to install and use on any platform where python is available).


#make index for your search 
import os.path
from whoosh.index import create_in
from whoosh.fields import Schema, TEXT
from whoosh.index import open_dir

from whoosh.fields import Schema, STORED, ID, KEYWORD, TEXT
schema = Schema(title=TEXT(stored=True), content=TEXT(stored=True))

if not os.path.exists("index"):
    os.mkdir("index")
ix = create_in("index", schema) #only do this once

ix = open_dir("index")

writer =  ix.writer()
f = open("vulgar.txt").read().decode("utf-8")
writer.add_document(title=u"Vulgar", content=f)
f = open("liz.txt").read().decode("utf-8")
writer.add_document(title=u"liz", content=f)
writer.commit()
#search your index
import os.path
from whoosh.index import open_dir

if not os.path.exists("index"):
    os.mkdir("index")
ix = open_dir("index")

searcher = ix.searcher()

from whoosh.qparser import QueryParser
parser = QueryParser("content", ix.schema)
q = parser.parse(u"love monkey")

r = searcher.search(q)

print "searching ", ix.doc_count()
for hit in r:
    print (hit["title"], hit.highlights("content")) #sq brackets dict/lists

SEARCH CGI

#!/usr/bin/env python 

print "Content-type:text/html;charset=utf-8"
print 

import os.path
from whoosh.index import open_dir
import cgi # for the FieldStorage, to read variables. 
from xml.sax.saxutils import quoteattr #formating it for a form

inputs = cgi.FieldStorage()
thesearch = inputs.getvalue("q", "love monkey").decode("utf-8")

if not os.path.exists("index"):
    os.mkdir("index")
ix = open_dir("index")

searcher = ix.searcher()

from whoosh.qparser import QueryParser
parser = QueryParser("content", ix.schema)
q = parser.parse(thesearch)

r = searcher.search(q)

print "<form>" 
print '<input type="text" name="q" value='+quoteattr(thesearch)+' />'
print "</form>"

# print "searching ", ix.doc_count()
for hit in r:
    print "<div>"
    print "<h3>", hit["title"], "</h3>" #sq brackets dict/lists 
    print hit.highlights("content")
    print "</div>"