MediaWiki  1.29.2
captcha.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 #
3 # Script to generate distorted text images for a captcha system.
4 #
5 # Copyright (C) 2005 Neil Harris
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 # http://www.gnu.org/copyleft/gpl.html
21 #
22 # Further tweaks by Brion Vibber <brion@pobox.com>:
23 # 2006-01-26: Add command-line options for the various parameters
24 # 2007-02-19: Add --dirs param for hash subdirectory splits
25 # Tweaks by Greg Sabino Mullane <greg@turnstep.com>:
26 # 2008-01-06: Add regex check to skip words containing other than a-z
27 
28 import random
29 import math
30 import hashlib
31 from optparse import OptionParser
32 import os
33 import sys
34 import re
35 
36 try:
37  from PIL import Image
38  from PIL import ImageFont
39  from PIL import ImageDraw
40  from PIL import ImageEnhance
41  from PIL import ImageOps
42  from PIL import ImageMath
43 except:
44  sys.exit("This script requires the Python Imaging Library - http://www.pythonware.com/products/pil/")
45 
46 nonalpha = re.compile('[^a-z]') # regex to test for suitability of words
47 
48 # Does X-axis wobbly copy, sandwiched between two rotates
49 def wobbly_copy(src, wob, col, scale, ang):
50  x, y = src.size
51  f = random.uniform(4*scale, 5*scale)
52  p = random.uniform(0, math.pi*2)
53  rr = ang+random.uniform(-10, 10) # vary, but not too much
54  int_d = Image.new('RGB', src.size, 0) # a black rectangle
55  rot = src.rotate(rr, Image.BILINEAR)
56  # Do a cheap bounding-box op here to try to limit work below
57  bbx = rot.getbbox()
58  if bbx == None:
59  return src
60  else:
61  l, t, r, b= bbx
62  # and only do lines with content on
63  for i in range(t, b+1):
64  # Drop a scan line in
65  xoff = int(math.sin(p+(i*f/y))*wob)
66  xoff += int(random.uniform(-wob*0.5, wob*0.5))
67  int_d.paste(rot.crop((0, i, x, i+1)), (xoff, i))
68  # try to stop blurring from building up
69  int_d = int_d.rotate(-rr, Image.BILINEAR)
70  enh = ImageEnhance.Sharpness(int_d)
71  return enh.enhance(2)
72 
73 
74 def gen_captcha(text, fontname, fontsize, file_name):
75  """Generate a captcha image"""
76  # white text on a black background
77  bgcolor = 0x0
78  fgcolor = 0xffffff
79  # create a font object
80  font = ImageFont.truetype(fontname,fontsize)
81  # determine dimensions of the text
82  dim = font.getsize(text)
83  # create a new image significantly larger that the text
84  edge = max(dim[0], dim[1]) + 2*min(dim[0], dim[1])
85  im = Image.new('RGB', (edge, edge), bgcolor)
86  d = ImageDraw.Draw(im)
87  x, y = im.size
88  # add the text to the image
89  d.text((x/2-dim[0]/2, y/2-dim[1]/2), text, font=font, fill=fgcolor)
90  k = 2
91  wob = 0.09*dim[1]
92  rot = 45
93  # Apply lots of small stirring operations, rather than a few large ones
94  # in order to get some uniformity of treatment, whilst
95  # maintaining randomness
96  for i in range(k):
97  im = wobbly_copy(im, wob, bgcolor, i*2+3, rot+0)
98  im = wobbly_copy(im, wob, bgcolor, i*2+1, rot+45)
99  im = wobbly_copy(im, wob, bgcolor, i*2+2, rot+90)
100  rot += 30
101 
102  # now get the bounding box of the nonzero parts of the image
103  bbox = im.getbbox()
104  bord = min(dim[0], dim[1])/4 # a bit of a border
105  im = im.crop((bbox[0]-bord, bbox[1]-bord, bbox[2]+bord, bbox[3]+bord))
106 
107  # Create noise
108  nblock = 4
109  nsize = (im.size[0] / nblock, im.size[1] / nblock)
110  noise = Image.new('L', nsize, bgcolor)
111  data = noise.load()
112  for x in range(nsize[0]):
113  for y in range(nsize[1]):
114  r = random.randint(0, 65)
115  gradient = 70 * x / nsize[0]
116  data[x, y] = r + gradient
117  # Turn speckles into blobs
118  noise = noise.resize(im.size, Image.BILINEAR)
119  # Add to the image
120  im = ImageMath.eval('convert(convert(a, "L") / 3 + b, "RGB")', a=im, b=noise)
121 
122  # and turn into black on white
123  im = ImageOps.invert(im)
124 
125  # save the image, in format determined from filename
126  im.save(file_name)
127 
128 def gen_subdir(basedir, md5hash, levels):
129  """Generate a subdirectory path out of the first _levels_
130  characters of _hash_, and ensure the directories exist
131  under _basedir_."""
132  subdir = None
133  for i in range(0, levels):
134  char = md5hash[i]
135  if subdir:
136  subdir = os.path.join(subdir, char)
137  else:
138  subdir = char
139  fulldir = os.path.join(basedir, subdir)
140  if not os.path.exists(fulldir):
141  os.mkdir(fulldir)
142  return subdir
143 
144 def try_pick_word(words, blacklist, verbose, nwords, min_length, max_length):
145  if words is not None:
146  word = words[random.randint(0,len(words)-1)]
147  while nwords > 1:
148  word2 = words[random.randint(0,len(words)-1)]
149  word = word + word2
150  nwords = nwords - 1
151  else:
152  word = ''
153  max_length = max_length if max_length > 0 else 10
154  for i in range(0, random.randint(min_length, max_length)):
155  word = word + chr(97 + random.randint(0,25))
156 
157  if verbose:
158  print("word is %s" % word)
159 
160  if len(word) < min_length:
161  if verbose:
162  print("skipping word pair '%s' because it has fewer than %d characters" % (word, min_length))
163  return None
164 
165  if max_length > 0 and len(word) > max_length:
166  if verbose:
167  print("skipping word pair '%s' because it has more than %d characters" % (word, max_length))
168  return None
169 
170  if nonalpha.search(word):
171  if verbose:
172  print("skipping word pair '%s' because it contains non-alphabetic characters" % word)
173  return None
174 
175  for naughty in blacklist:
176  if naughty in word:
177  if verbose:
178  print("skipping word pair '%s' because it contains blacklisted word '%s'" % (word, naughty))
179  return None
180  return word
181 
182 def pick_word(words, blacklist, verbose, nwords, min_length, max_length):
183  for x in range(1000): # If we can't find a valid combination in 1000 tries, just give up
184  word = try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
185  if word:
186  return word
187  sys.exit("Unable to find valid word combinations")
188 
189 def read_wordlist(filename):
190  f = open(filename)
191  words = [x.strip().lower() for x in f.readlines()]
192  f.close()
193  return words
194 
195 if __name__ == '__main__':
196  """This grabs random words from the dictionary 'words' (one
197  word per line) and generates a captcha image for each one,
198  with a keyed salted hash of the correct answer in the filename.
199 
200  To check a reply, hash it in the same way with the same salt and
201  secret key, then compare with the hash value given.
202  """
203  script_dir = os.path.dirname(os.path.realpath(__file__))
204  parser = OptionParser()
205  parser.add_option("--wordlist", help="A list of words (required)", metavar="WORDS.txt")
206  parser.add_option("--random", help="Use random charcters instead of a wordlist", action="store_true")
207  parser.add_option("--key", help="The passphrase set as $wgCaptchaSecret (required)", metavar="KEY")
208  parser.add_option("--output", help="The directory to put the images in - $wgCaptchaDirectory (required)", metavar="DIR")
209  parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf")
210  parser.add_option("--font-size", help="The font size (default 40)", metavar="N", type='int', default=40)
211  parser.add_option("--count", help="The maximum number of images to make (default 20)", metavar="N", type='int', default=20)
212  parser.add_option("--blacklist", help="A blacklist of words that should not be used", metavar="FILE", default=os.path.join(script_dir, "blacklist"))
213  parser.add_option("--fill", help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs", metavar="N", type='int')
214  parser.add_option("--dirs", help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels", metavar="N", type='int')
215  parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true')
216  parser.add_option("--number-words", help="Number of words from the wordlist which make a captcha challenge (default 2)", type='int', default=2)
217  parser.add_option("--min-length", help="Minimum length for a captcha challenge", type='int', default=1)
218  parser.add_option("--max-length", help="Maximum length for a captcha challenge", type='int', default=-1)
219 
220  opts, args = parser.parse_args()
221 
222  if opts.wordlist:
223  wordlist = opts.wordlist
224  elif opts.random:
225  wordlist = None
226  else:
227  sys.exit("Need to specify a wordlist")
228  if opts.key:
229  key = opts.key
230  else:
231  sys.exit("Need to specify a key")
232  if opts.output:
233  output = opts.output
234  else:
235  sys.exit("Need to specify an output directory")
236  if opts.font and os.path.exists(opts.font):
237  font = opts.font
238  else:
239  sys.exit("Need to specify the location of a font")
240 
241  blacklist = read_wordlist(opts.blacklist)
242  count = opts.count
243  fill = opts.fill
244  dirs = opts.dirs
245  verbose = opts.verbose
246  fontsize = opts.font_size
247 
248  if fill:
249  count = max(0, fill - len(os.listdir(output)))
250 
251  words = None
252  if wordlist:
253  words = read_wordlist(wordlist)
254  words = [x for x in words
255  if len(x) in (4,5) and x[0] != "f"
256  and x[0] != x[1] and x[-1] != x[-2]]
257 
258  for i in range(count):
259  word = pick_word(words, blacklist, verbose, opts.number_words, opts.min_length, opts.max_length)
260  salt = "%08x" % random.randrange(2**32)
261  # 64 bits of hash is plenty for this purpose
262  md5hash = hashlib.md5((key+salt+word+key+salt).encode('utf-8')).hexdigest()[:16]
263  filename = "image_%s_%s.png" % (salt, md5hash)
264  if dirs:
265  subdir = gen_subdir(output, md5hash, dirs)
266  filename = os.path.join(subdir, filename)
267  if verbose:
268  print(filename)
269  gen_captcha(word, font, fontsize, os.path.join(output, filename))
270 
captcha.gen_subdir
def gen_subdir(basedir, md5hash, levels)
Definition: captcha.py:128
captcha.pick_word
def pick_word(words, blacklist, verbose, nwords, min_length, max_length)
Definition: captcha.py:182
captcha.read_wordlist
def read_wordlist(filename)
Definition: captcha.py:189
captcha.gen_captcha
def gen_captcha(text, fontname, fontsize, file_name)
Definition: captcha.py:74
Makefile.open
open
Definition: Makefile.py:18
captcha.try_pick_word
def try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
Definition: captcha.py:144
captcha.wobbly_copy
def wobbly_copy(src, wob, col, scale, ang)
Definition: captcha.py:49