MediaWiki REL1_33
captcha.py
Go to the documentation of this file.
1#!/usr/bin/python
2#
3# Script to generate distorted text images for a captcha system.
4#
5# Copyright (C) 2005 Neil Harris
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License along
18# with this program; if not, write to the Free Software Foundation, Inc.,
19# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20# http://www.gnu.org/copyleft/gpl.html
21#
22# Further tweaks by Brion Vibber <brion@pobox.com>:
23# 2006-01-26: Add command-line options for the various parameters
24# 2007-02-19: Add --dirs param for hash subdirectory splits
25# Tweaks by Greg Sabino Mullane <greg@turnstep.com>:
26# 2008-01-06: Add regex check to skip words containing other than a-z
27
28import random
29import math
30import hashlib
31from optparse import OptionParser
32import os
33import sys
34import re
35import multiprocessing
36import time
37
38try:
39 from PIL import Image
40 from PIL import ImageFont
41 from PIL import ImageDraw
42 from PIL import ImageEnhance
43 from PIL import ImageOps
44 from PIL import ImageMath
45except:
46 sys.exit("This script requires the Python Imaging Library - http://www.pythonware.com/products/pil/")
47
48nonalpha = re.compile('[^a-z]') # regex to test for suitability of words
49
50# Does X-axis wobbly copy, sandwiched between two rotates
51def wobbly_copy(src, wob, col, scale, ang):
52 x, y = src.size
53 f = random.uniform(4*scale, 5*scale)
54 p = random.uniform(0, math.pi*2)
55 rr = ang+random.uniform(-10, 10) # vary, but not too much
56 int_d = Image.new('RGB', src.size, 0) # a black rectangle
57 rot = src.rotate(rr, Image.BILINEAR)
58 # Do a cheap bounding-box op here to try to limit work below
59 bbx = rot.getbbox()
60 if bbx == None:
61 return src
62 else:
63 l, t, r, b= bbx
64 # and only do lines with content on
65 for i in range(t, b+1):
66 # Drop a scan line in
67 xoff = int(math.sin(p+(i*f/y))*wob)
68 xoff += int(random.uniform(-wob*0.5, wob*0.5))
69 int_d.paste(rot.crop((0, i, x, i+1)), (xoff, i))
70 # try to stop blurring from building up
71 int_d = int_d.rotate(-rr, Image.BILINEAR)
72 enh = ImageEnhance.Sharpness(int_d)
73 return enh.enhance(2)
74
75
76def gen_captcha(text, fontname, fontsize, file_name):
77 """Generate a captcha image"""
78 # white text on a black background
79 bgcolor = 0x0
80 fgcolor = 0xffffff
81 # create a font object
82 font = ImageFont.truetype(fontname,fontsize)
83 # determine dimensions of the text
84 dim = font.getsize(text)
85 # create a new image significantly larger that the text
86 edge = max(dim[0], dim[1]) + 2*min(dim[0], dim[1])
87 im = Image.new('RGB', (edge, edge), bgcolor)
88 d = ImageDraw.Draw(im)
89 x, y = im.size
90 # add the text to the image
91 d.text((x/2-dim[0]/2, y/2-dim[1]/2), text, font=font, fill=fgcolor)
92 k = 2
93 wob = 0.09*dim[1]
94 rot = 45
95 # Apply lots of small stirring operations, rather than a few large ones
96 # in order to get some uniformity of treatment, whilst
97 # maintaining randomness
98 for i in range(k):
99 im = wobbly_copy(im, wob, bgcolor, i*2+3, rot+0)
100 im = wobbly_copy(im, wob, bgcolor, i*2+1, rot+45)
101 im = wobbly_copy(im, wob, bgcolor, i*2+2, rot+90)
102 rot += 30
103
104 # now get the bounding box of the nonzero parts of the image
105 bbox = im.getbbox()
106 bord = min(dim[0], dim[1])/4 # a bit of a border
107 im = im.crop((bbox[0]-bord, bbox[1]-bord, bbox[2]+bord, bbox[3]+bord))
108
109 # Create noise
110 nblock = 4
111 nsize = (im.size[0] / nblock, im.size[1] / nblock)
112 noise = Image.new('L', nsize, bgcolor)
113 data = noise.load()
114 for x in range(nsize[0]):
115 for y in range(nsize[1]):
116 r = random.randint(0, 65)
117 gradient = 70 * x / nsize[0]
118 data[x, y] = r + gradient
119 # Turn speckles into blobs
121 # Add to the image
122 im = ImageMath.eval('convert(convert(a, "L") / 3 + b, "RGB")', a=im, b=noise)
123
124 # and turn into black on white
125 im = ImageOps.invert(im)
126
127 # save the image, in format determined from filename
128 im.save(file_name)
129
130def gen_subdir(basedir, md5hash, levels):
131 """Generate a subdirectory path out of the first _levels_
132 characters of _hash_, and ensure the directories exist
133 under _basedir_."""
134 subdir = None
135 for i in range(0, levels):
136 char = md5hash[i]
137 if subdir:
138 subdir = os.path.join(subdir, char)
139 else:
140 subdir = char
141 fulldir = os.path.join(basedir, subdir)
142 if not os.path.exists(fulldir):
143 os.mkdir(fulldir)
144 return subdir
145
146def try_pick_word(words, blacklist, verbose, nwords, min_length, max_length):
147 if words is not None:
148 word = words[random.randint(0,len(words)-1)]
149 while nwords > 1:
150 word2 = words[random.randint(0,len(words)-1)]
151 word = word + word2
152 nwords = nwords - 1
153 else:
154 word = ''
155 max_length = max_length if max_length > 0 else 10
156 for i in range(0, random.randint(min_length, max_length)):
157 word = word + chr(97 + random.randint(0,25))
158
159 if verbose:
160 print("word is %s" % word)
161
162 if len(word) < min_length:
163 if verbose:
164 print("skipping word pair '%s' because it has fewer than %d characters" % (word, min_length))
165 return None
166
167 if max_length > 0 and len(word) > max_length:
168 if verbose:
169 print("skipping word pair '%s' because it has more than %d characters" % (word, max_length))
170 return None
171
172 if nonalpha.search(word):
173 if verbose:
174 print("skipping word pair '%s' because it contains non-alphabetic characters" % word)
175 return None
176
177 for naughty in blacklist:
178 if naughty in word:
179 if verbose:
180 print("skipping word pair '%s' because it contains blacklisted word '%s'" % (word, naughty))
181 return None
182 return word
183
184def pick_word(words, blacklist, verbose, nwords, min_length, max_length):
185 for x in range(1000): # If we can't find a valid combination in 1000 tries, just give up
186 word = try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
187 if word:
188 return word
189 sys.exit("Unable to find valid word combinations")
190
191def read_wordlist(filename):
192 f = open(filename)
193 words = [x.strip().lower() for x in f.readlines()]
194 f.close()
195 return words
196
197def run_in_thread(object):
198 count = object[0];
199 words = object[1]
200 blacklist = object[2]
201 opts = object[3]
202 font = object[4]
203 fontsize = object[5]
204
205 for i in range(count):
206 word = pick_word(words, blacklist, verbose, opts.number_words, opts.min_length, opts.max_length)
207 salt = "%08x" % random.randrange(2**32)
208 # 64 bits of hash is plenty for this purpose
209 md5hash = hashlib.md5((key+salt+word+key+salt).encode('utf-8')).hexdigest()[:16]
210 filename = "image_%s_%s.png" % (salt, md5hash)
211 if dirs:
212 subdir = gen_subdir(output, md5hash, dirs)
213 filename = os.path.join(subdir, filename)
214 if verbose:
215 print(filename)
216 gen_captcha(word, font, fontsize, os.path.join(output, filename))
217
218if __name__ == '__main__':
219 """This grabs random words from the dictionary 'words' (one
220 word per line) and generates a captcha image for each one,
221 with a keyed salted hash of the correct answer in the filename.
222
223 To check a reply, hash it in the same way with the same salt and
224 secret key, then compare with the hash value given.
225 """
226 script_dir = os.path.dirname(os.path.realpath(__file__))
227 parser = OptionParser()
228 parser.add_option("--wordlist", help="A list of words (required)", metavar="WORDS.txt")
229 parser.add_option("--random", help="Use random charcters instead of a wordlist", action="store_true")
230 parser.add_option("--key", help="The passphrase set as $wgCaptchaSecret (required)", metavar="KEY")
231 parser.add_option("--output", help="The directory to put the images in - $wgCaptchaDirectory (required)", metavar="DIR")
232 parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf")
233 parser.add_option("--font-size", help="The font size (default 40)", metavar="N", type='int', default=40)
234 parser.add_option("--count", help="The maximum number of images to make (default 20)", metavar="N", type='int', default=20)
235 parser.add_option("--blacklist", help="A blacklist of words that should not be used", metavar="FILE", default=os.path.join(script_dir, "blacklist"))
236 parser.add_option("--fill", help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs", metavar="N", type='int')
237 parser.add_option("--dirs", help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels", metavar="N", type='int')
238 parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true')
239 parser.add_option("--number-words", help="Number of words from the wordlist which make a captcha challenge (default 2)", type='int', default=2)
240 parser.add_option("--min-length", help="Minimum length for a captcha challenge", type='int', default=1)
241 parser.add_option("--max-length", help="Maximum length for a captcha challenge", type='int', default=-1)
242 parser.add_option("--threads", help="Maximum number of threads to be used to generate captchas.", type='int', default=1)
243
244 opts, args = parser.parse_args()
245
246 if opts.wordlist:
247 wordlist = opts.wordlist
248 elif opts.random:
249 wordlist = None
250 else:
251 sys.exit("Need to specify a wordlist")
252 if opts.key:
254 else:
255 sys.exit("Need to specify a key")
256 if opts.output:
257 output = opts.output
258 else:
259 sys.exit("Need to specify an output directory")
262 else:
263 sys.exit("Need to specify the location of a font")
264
269 verbose = opts.verbose
270 fontsize = opts.font_size
271 threads = opts.threads
272
273 if fill:
274 count = max(0, fill - len(os.listdir(output)))
275
276 words = None
277 if wordlist:
278 words = read_wordlist(wordlist)
279 words = [x for x in words
280 if len(x) in (4,5) and x[0] != "f"
281 and x[0] != x[1] and x[-1] != x[-2]]
282
283 if count == 0:
284 sys.exit("No need to generate CAPTCHA images.")
285
286 if count < threads:
287 chunks = 1
288 threads = 1
289 else:
290 chunks = (count / threads)
291
293 data = []
294 print("Generating %s CAPTCHA images separated in %s image(s) per chunk run by %s threads..." % (count, chunks, threads))
295 for i in range(0, threads):
296 data.append([chunks, words, blacklist, opts, font, fontsize])
297
298 p.map(run_in_thread, data)
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
while(( $__line=Maintenance::readconsole()) !==false) print
Definition eval.php:64
try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
Definition captcha.py:146
gen_captcha(text, fontname, fontsize, file_name)
Definition captcha.py:76
pick_word(words, blacklist, verbose, nwords, min_length, max_length)
Definition captcha.py:184
wobbly_copy(src, wob, col, scale, ang)
Definition captcha.py:51
read_wordlist(filename)
Definition captcha.py:191
gen_subdir(basedir, md5hash, levels)
Definition captcha.py:130
run_in_thread(object)
Definition captcha.py:197