MediaWiki REL1_30
captcha-old.py
Go to the documentation of this file.
1#!/usr/bin/python
2#
3# Script to generate distorted text images for a captcha system.
4#
5# Copyright (C) 2005 Neil Harris
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License along
18# with this program; if not, write to the Free Software Foundation, Inc.,
19# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20# http://www.gnu.org/copyleft/gpl.html
21#
22# Further tweaks by Brion Vibber <brion@pobox.com>:
23# 2006-01-26: Add command-line options for the various parameters
24# 2007-02-19: Add --dirs param for hash subdirectory splits
25# Tweaks by Greg Sabino Mullane <greg@turnstep.com>:
26# 2008-01-06: Add regex check to skip words containing other than a-z
27
28import random
29import math
30import hashlib
31from optparse import OptionParser
32import os
33import sys
34import re
35import multiprocessing
36import time
37
38try:
39 from PIL import Image
40 from PIL import ImageFont
41 from PIL import ImageDraw
42 from PIL import ImageEnhance
43 from PIL import ImageOps
44except:
45 sys.exit("This script requires the Python Imaging Library - http://www.pythonware.com/products/pil/")
46
47nonalpha = re.compile('[^a-z]') # regex to test for suitability of words
48
49# Does X-axis wobbly copy, sandwiched between two rotates
50def wobbly_copy(src, wob, col, scale, ang):
51 x, y = src.size
52 f = random.uniform(4*scale, 5*scale)
53 p = random.uniform(0, math.pi*2)
54 rr = ang+random.uniform(-30, 30) # vary, but not too much
55 int_d = Image.new('RGB', src.size, 0) # a black rectangle
56 rot = src.rotate(rr, Image.BILINEAR)
57 # Do a cheap bounding-box op here to try to limit work below
58 bbx = rot.getbbox()
59 if bbx == None:
60 return src
61 else:
62 l, t, r, b= bbx
63 # and only do lines with content on
64 for i in range(t, b+1):
65 # Drop a scan line in
66 xoff = int(math.sin(p+(i*f/y))*wob)
67 xoff += int(random.uniform(-wob*0.5, wob*0.5))
68 int_d.paste(rot.crop((0, i, x, i+1)), (xoff, i))
69 # try to stop blurring from building up
70 int_d = int_d.rotate(-rr, Image.BILINEAR)
71 enh = ImageEnhance.Sharpness(int_d)
72 return enh.enhance(2)
73
74
75def gen_captcha(text, fontname, fontsize, file_name):
76 """Generate a captcha image"""
77 # white text on a black background
78 bgcolor = 0x0
79 fgcolor = 0xffffff
80 # create a font object
81 font = ImageFont.truetype(fontname,fontsize)
82 # determine dimensions of the text
83 dim = font.getsize(text)
84 # create a new image significantly larger that the text
85 edge = max(dim[0], dim[1]) + 2*min(dim[0], dim[1])
86 im = Image.new('RGB', (edge, edge), bgcolor)
87 d = ImageDraw.Draw(im)
88 x, y = im.size
89 # add the text to the image
90 d.text((x/2-dim[0]/2, y/2-dim[1]/2), text, font=font, fill=fgcolor)
91 k = 3
92 wob = 0.20*dim[1]/k
93 rot = 45
94 # Apply lots of small stirring operations, rather than a few large ones
95 # in order to get some uniformity of treatment, whilst
96 # maintaining randomness
97 for i in range(k):
98 im = wobbly_copy(im, wob, bgcolor, i*2+3, rot+0)
99 im = wobbly_copy(im, wob, bgcolor, i*2+1, rot+45)
100 im = wobbly_copy(im, wob, bgcolor, i*2+2, rot+90)
101 rot += 30
102
103 # now get the bounding box of the nonzero parts of the image
104 bbox = im.getbbox()
105 bord = min(dim[0], dim[1])/4 # a bit of a border
106 im = im.crop((bbox[0]-bord, bbox[1]-bord, bbox[2]+bord, bbox[3]+bord))
107 # and turn into black on white
108 im = ImageOps.invert(im)
109
110 # save the image, in format determined from filename
111 im.save(file_name)
112
113def gen_subdir(basedir, md5hash, levels):
114 """Generate a subdirectory path out of the first _levels_
115 characters of _hash_, and ensure the directories exist
116 under _basedir_."""
117 subdir = None
118 for i in range(0, levels):
119 char = md5hash[i]
120 if subdir:
121 subdir = os.path.join(subdir, char)
122 else:
123 subdir = char
124 fulldir = os.path.join(basedir, subdir)
125 if not os.path.exists(fulldir):
126 os.mkdir(fulldir)
127 return subdir
128
129def try_pick_word(words, blacklist, verbose, nwords, min_length, max_length):
130 if words is not None:
131 word = words[random.randint(0,len(words)-1)]
132 while nwords > 1:
133 word2 = words[random.randint(0,len(words)-1)]
134 word = word + word2
135 nwords = nwords - 1
136 else:
137 word = ''
138 max_length = max_length if max_length > 0 else 10
139 for i in range(0, random.randint(min_length, max_length)):
140 word = word + chr(97 + random.randint(0,25))
141
142 if verbose:
143 print("word is %s" % word)
144
145 if len(word) < min_length:
146 if verbose:
147 print("skipping word pair '%s' because it has fewer than %d characters" % (word, min_length))
148 return None
149
150 if max_length > 0 and len(word) > max_length:
151 if verbose:
152 print("skipping word pair '%s' because it has more than %d characters" % (word, max_length))
153 return None
154
155 if nonalpha.search(word):
156 if verbose:
157 print("skipping word pair '%s' because it contains non-alphabetic characters" % word)
158 return None
159
160 for naughty in blacklist:
161 if naughty in word:
162 if verbose:
163 print("skipping word pair '%s' because it contains blacklisted word '%s'" % (word, naughty))
164 return None
165 return word
166
167def pick_word(words, blacklist, verbose, nwords, min_length, max_length):
168 for x in range(1000): # If we can't find a valid combination in 1000 tries, just give up
169 word = try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
170 if word:
171 return word
172 sys.exit("Unable to find valid word combinations")
173
174def read_wordlist(filename):
175 f = open(filename)
176 words = [x.strip().lower() for x in f.readlines()]
177 f.close()
178 return words
179
180def run_in_thread(object):
181 count = object[0];
182 words = object[1]
183 blacklist = object[2]
184 opts = object[3]
185 font = object[4]
186 fontsize = object[5]
187
188 for i in range(count):
189 word = pick_word(words, blacklist, verbose, opts.number_words, opts.min_length, opts.max_length)
190 salt = "%08x" % random.randrange(2**32)
191 # 64 bits of hash is plenty for this purpose
192 md5hash = hashlib.md5((key+salt+word+key+salt).encode('utf-8')).hexdigest()[:16]
193 filename = "image_%s_%s.png" % (salt, md5hash)
194 if dirs:
195 subdir = gen_subdir(output, md5hash, dirs)
196 filename = os.path.join(subdir, filename)
197 if verbose:
198 print(filename)
199 gen_captcha(word, font, fontsize, os.path.join(output, filename))
200
201if __name__ == '__main__':
202 """This grabs random words from the dictionary 'words' (one
203 word per line) and generates a captcha image for each one,
204 with a keyed salted hash of the correct answer in the filename.
205
206 To check a reply, hash it in the same way with the same salt and
207 secret key, then compare with the hash value given.
208 """
209 script_dir = os.path.dirname(os.path.realpath(__file__))
210 parser = OptionParser()
211 parser.add_option("--wordlist", help="A list of words (required)", metavar="WORDS.txt")
212 parser.add_option("--random", help="Use random charcters instead of a wordlist", action="store_true")
213 parser.add_option("--key", help="The passphrase set as $wgCaptchaSecret (required)", metavar="KEY")
214 parser.add_option("--output", help="The directory to put the images in - $wgCaptchaDirectory (required)", metavar="DIR")
215 parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf")
216 parser.add_option("--font-size", help="The font size (default 40)", metavar="N", type='int', default=40)
217 parser.add_option("--count", help="The maximum number of images to make (default 20)", metavar="N", type='int', default=20)
218 parser.add_option("--blacklist", help="A blacklist of words that should not be used", metavar="FILE", default=os.path.join(script_dir, "blacklist"))
219 parser.add_option("--fill", help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs", metavar="N", type='int')
220 parser.add_option("--dirs", help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels", metavar="N", type='int')
221 parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true')
222 parser.add_option("--number-words", help="Number of words from the wordlist which make a captcha challenge (default 2)", type='int', default=2)
223 parser.add_option("--min-length", help="Minimum length for a captcha challenge", type='int', default=1)
224 parser.add_option("--max-length", help="Maximum length for a captcha challenge", type='int', default=-1)
225 parser.add_option("--threads", help="Maximum number of threads to be used to generate captchas.", type='int', default=1)
226
227 opts, args = parser.parse_args()
228
229 if opts.wordlist:
230 wordlist = opts.wordlist
231 elif opts.random:
232 wordlist = None
233 else:
234 sys.exit("Need to specify a wordlist")
235 if opts.key:
236 key = opts.key
237 else:
238 sys.exit("Need to specify a key")
239 if opts.output:
240 output = opts.output
241 else:
242 sys.exit("Need to specify an output directory")
243 if opts.font and os.path.exists(opts.font):
244 font = opts.font
245 else:
246 sys.exit("Need to specify the location of a font")
247
248 blacklist = read_wordlist(opts.blacklist)
249 count = opts.count
250 fill = opts.fill
251 dirs = opts.dirs
252 verbose = opts.verbose
253 fontsize = opts.font_size
254 threads = opts.threads
255
256 if fill:
257 count = max(0, fill - len(os.listdir(output)))
258
259 words = None
260 if wordlist:
261 words = read_wordlist(wordlist)
262 words = [x for x in words
263 if len(x) in (4,5) and x[0] != "f"
264 and x[0] != x[1] and x[-1] != x[-2]]
265
266 if count == 0:
267 sys.exit("No need to generate CAPTCHA images.")
268
269 if count < threads:
270 chunks = 1
271 threads = 1
272 else:
273 chunks = int(count / threads)
274
275 p = multiprocessing.Pool(threads);
276 data = []
277 print("Generating %s CAPTCHA images separated in %s image(s) per chunk run by %s threads..." % (count, chunks, threads))
278 for i in range(0, threads):
279 data.append([chunks, words, blacklist, opts, font, fontsize])
280
281 p.map(run_in_thread, data)
282
print
Definition cleanup.php:99
pick_word(words, blacklist, verbose, nwords, min_length, max_length)
try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
read_wordlist(filename)
gen_subdir(basedir, md5hash, levels)
gen_captcha(text, fontname, fontsize, file_name)
wobbly_copy(src, wob, col, scale, ang)
run_in_thread(object)