MediaWiki REL1_33
captcha-old.py
Go to the documentation of this file.
1#!/usr/bin/python
2#
3# Script to generate distorted text images for a captcha system.
4#
5# Copyright (C) 2005 Neil Harris
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License along
18# with this program; if not, write to the Free Software Foundation, Inc.,
19# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20# http://www.gnu.org/copyleft/gpl.html
21#
22# Further tweaks by Brion Vibber <brion@pobox.com>:
23# 2006-01-26: Add command-line options for the various parameters
24# 2007-02-19: Add --dirs param for hash subdirectory splits
25# Tweaks by Greg Sabino Mullane <greg@turnstep.com>:
26# 2008-01-06: Add regex check to skip words containing other than a-z
27
28import random
29import math
30import hashlib
31from optparse import OptionParser
32import os
33import sys
34import re
35import multiprocessing
36import time
37
38try:
39 from PIL import Image
40 from PIL import ImageFont
41 from PIL import ImageDraw
42 from PIL import ImageEnhance
43 from PIL import ImageOps
44except:
45 sys.exit("This script requires the Python Imaging Library - http://www.pythonware.com/products/pil/")
46
47nonalpha = re.compile('[^a-z]') # regex to test for suitability of words
48
49# Does X-axis wobbly copy, sandwiched between two rotates
50def wobbly_copy(src, wob, col, scale, ang):
51 x, y = src.size
52 f = random.uniform(4*scale, 5*scale)
53 p = random.uniform(0, math.pi*2)
54 rr = ang+random.uniform(-30, 30) # vary, but not too much
55 int_d = Image.new('RGB', src.size, 0) # a black rectangle
56 rot = src.rotate(rr, Image.BILINEAR)
57 # Do a cheap bounding-box op here to try to limit work below
58 bbx = rot.getbbox()
59 if bbx == None:
60 return src
61 else:
62 l, t, r, b= bbx
63 # and only do lines with content on
64 for i in range(t, b+1):
65 # Drop a scan line in
66 xoff = int(math.sin(p+(i*f/y))*wob)
67 xoff += int(random.uniform(-wob*0.5, wob*0.5))
68 int_d.paste(rot.crop((0, i, x, i+1)), (xoff, i))
69 # try to stop blurring from building up
70 int_d = int_d.rotate(-rr, Image.BILINEAR)
71 enh = ImageEnhance.Sharpness(int_d)
72 return enh.enhance(2)
73
74
75def gen_captcha(text, fontname, fontsize, file_name):
76 """Generate a captcha image"""
77 # white text on a black background
78 bgcolor = 0x0
79 fgcolor = 0xffffff
80 # create a font object
81 font = ImageFont.truetype(fontname,fontsize)
82 # determine dimensions of the text
83 dim = font.getsize(text)
84 # create a new image significantly larger that the text
85 edge = max(dim[0], dim[1]) + 2*min(dim[0], dim[1])
86 im = Image.new('RGB', (edge, edge), bgcolor)
87 d = ImageDraw.Draw(im)
88 x, y = im.size
89 # add the text to the image
90 d.text((x/2-dim[0]/2, y/2-dim[1]/2), text, font=font, fill=fgcolor)
91 k = 3
92 wob = 0.20*dim[1]/k
93 rot = 45
94 # Apply lots of small stirring operations, rather than a few large ones
95 # in order to get some uniformity of treatment, whilst
96 # maintaining randomness
97 for i in range(k):
98 im = wobbly_copy(im, wob, bgcolor, i*2+3, rot+0)
99 im = wobbly_copy(im, wob, bgcolor, i*2+1, rot+45)
100 im = wobbly_copy(im, wob, bgcolor, i*2+2, rot+90)
101 rot += 30
102
103 # now get the bounding box of the nonzero parts of the image
104 bbox = im.getbbox()
105 bord = min(dim[0], dim[1])/4 # a bit of a border
106 im = im.crop((bbox[0]-bord, bbox[1]-bord, bbox[2]+bord, bbox[3]+bord))
107 # and turn into black on white
108 im = ImageOps.invert(im)
109
110 # save the image, in format determined from filename
111 im.save(file_name)
112
113def gen_subdir(basedir, md5hash, levels):
114 """Generate a subdirectory path out of the first _levels_
115 characters of _hash_, and ensure the directories exist
116 under _basedir_."""
117 subdir = None
118 for i in range(0, levels):
119 char = md5hash[i]
120 if subdir:
121 subdir = os.path.join(subdir, char)
122 else:
123 subdir = char
124 fulldir = os.path.join(basedir, subdir)
125 if not os.path.exists(fulldir):
126 os.mkdir(fulldir)
127 return subdir
128
129def try_pick_word(words, blacklist, verbose, nwords, min_length, max_length):
130 if words is not None:
131 word = words[random.randint(0,len(words)-1)]
132 while nwords > 1:
133 word2 = words[random.randint(0,len(words)-1)]
134 word = word + word2
135 nwords = nwords - 1
136 else:
137 word = ''
138 max_length = max_length if max_length > 0 else 10
139 for i in range(0, random.randint(min_length, max_length)):
140 word = word + chr(97 + random.randint(0,25))
141
142 if verbose:
143 print("word is %s" % word)
144
145 if len(word) < min_length:
146 if verbose:
147 print("skipping word pair '%s' because it has fewer than %d characters" % (word, min_length))
148 return None
149
150 if max_length > 0 and len(word) > max_length:
151 if verbose:
152 print("skipping word pair '%s' because it has more than %d characters" % (word, max_length))
153 return None
154
155 if nonalpha.search(word):
156 if verbose:
157 print("skipping word pair '%s' because it contains non-alphabetic characters" % word)
158 return None
159
160 for naughty in blacklist:
161 if naughty in word:
162 if verbose:
163 print("skipping word pair '%s' because it contains blacklisted word '%s'" % (word, naughty))
164 return None
165 return word
166
167def pick_word(words, blacklist, verbose, nwords, min_length, max_length):
168 for x in range(1000): # If we can't find a valid combination in 1000 tries, just give up
169 word = try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
170 if word:
171 return word
172 sys.exit("Unable to find valid word combinations")
173
174def read_wordlist(filename):
175 f = open(filename)
176 words = [x.strip().lower() for x in f.readlines()]
177 f.close()
178 return words
179
180def run_in_thread(object):
181 count = object[0];
182 words = object[1]
183 blacklist = object[2]
184 opts = object[3]
185 font = object[4]
186 fontsize = object[5]
187
188 for i in range(count):
189 word = pick_word(words, blacklist, verbose, opts.number_words, opts.min_length, opts.max_length)
190 salt = "%08x" % random.randrange(2**32)
191 # 64 bits of hash is plenty for this purpose
192 md5hash = hashlib.md5((key+salt+word+key+salt).encode('utf-8')).hexdigest()[:16]
193 filename = "image_%s_%s.png" % (salt, md5hash)
194 if dirs:
195 subdir = gen_subdir(output, md5hash, dirs)
196 filename = os.path.join(subdir, filename)
197 if verbose:
198 print(filename)
199 gen_captcha(word, font, fontsize, os.path.join(output, filename))
200
201if __name__ == '__main__':
202 """This grabs random words from the dictionary 'words' (one
203 word per line) and generates a captcha image for each one,
204 with a keyed salted hash of the correct answer in the filename.
205
206 To check a reply, hash it in the same way with the same salt and
207 secret key, then compare with the hash value given.
208 """
209 script_dir = os.path.dirname(os.path.realpath(__file__))
210 parser = OptionParser()
211 parser.add_option("--wordlist", help="A list of words (required)", metavar="WORDS.txt")
212 parser.add_option("--random", help="Use random charcters instead of a wordlist", action="store_true")
213 parser.add_option("--key", help="The passphrase set as $wgCaptchaSecret (required)", metavar="KEY")
214 parser.add_option("--output", help="The directory to put the images in - $wgCaptchaDirectory (required)", metavar="DIR")
215 parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf")
216 parser.add_option("--font-size", help="The font size (default 40)", metavar="N", type='int', default=40)
217 parser.add_option("--count", help="The maximum number of images to make (default 20)", metavar="N", type='int', default=20)
218 parser.add_option("--blacklist", help="A blacklist of words that should not be used", metavar="FILE", default=os.path.join(script_dir, "blacklist"))
219 parser.add_option("--fill", help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs", metavar="N", type='int')
220 parser.add_option("--dirs", help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels", metavar="N", type='int')
221 parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true')
222 parser.add_option("--number-words", help="Number of words from the wordlist which make a captcha challenge (default 2)", type='int', default=2)
223 parser.add_option("--min-length", help="Minimum length for a captcha challenge", type='int', default=1)
224 parser.add_option("--max-length", help="Maximum length for a captcha challenge", type='int', default=-1)
225 parser.add_option("--threads", help="Maximum number of threads to be used to generate captchas.", type='int', default=1)
226
227 opts, args = parser.parse_args()
228
229 if opts.wordlist:
230 wordlist = opts.wordlist
231 elif opts.random:
232 wordlist = None
233 else:
234 sys.exit("Need to specify a wordlist")
235 if opts.key:
237 else:
238 sys.exit("Need to specify a key")
239 if opts.output:
240 output = opts.output
241 else:
242 sys.exit("Need to specify an output directory")
245 else:
246 sys.exit("Need to specify the location of a font")
247
252 verbose = opts.verbose
253 fontsize = opts.font_size
254 threads = opts.threads
255
256 if fill:
257 count = max(0, fill - len(os.listdir(output)))
258
259 words = None
260 if wordlist:
261 words = read_wordlist(wordlist)
262 words = [x for x in words
263 if len(x) in (4,5) and x[0] != "f"
264 and x[0] != x[1] and x[-1] != x[-2]]
265
266 if count == 0:
267 sys.exit("No need to generate CAPTCHA images.")
268
269 if count < threads:
270 chunks = 1
271 threads = 1
272 else:
273 chunks = int(count / threads)
274
276 data = []
277 print("Generating %s CAPTCHA images separated in %s image(s) per chunk run by %s threads..." % (count, chunks, threads))
278 for i in range(0, threads):
279 data.append([chunks, words, blacklist, opts, font, fontsize])
280
281 p.map(run_in_thread, data)
282
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
while(( $__line=Maintenance::readconsole()) !==false) print
Definition eval.php:64
pick_word(words, blacklist, verbose, nwords, min_length, max_length)
try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
read_wordlist(filename)
gen_subdir(basedir, md5hash, levels)
gen_captcha(text, fontname, fontsize, file_name)
wobbly_copy(src, wob, col, scale, ang)
run_in_thread(object)