MediaWiki  1.30.0
captcha-old.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 #
3 # Script to generate distorted text images for a captcha system.
4 #
5 # Copyright (C) 2005 Neil Harris
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 # http://www.gnu.org/copyleft/gpl.html
21 #
22 # Further tweaks by Brion Vibber <brion@pobox.com>:
23 # 2006-01-26: Add command-line options for the various parameters
24 # 2007-02-19: Add --dirs param for hash subdirectory splits
25 # Tweaks by Greg Sabino Mullane <greg@turnstep.com>:
26 # 2008-01-06: Add regex check to skip words containing other than a-z
27 
28 import random
29 import math
30 import hashlib
31 from optparse import OptionParser
32 import os
33 import sys
34 import re
35 import multiprocessing
36 import time
37 
38 try:
39  from PIL import Image
40  from PIL import ImageFont
41  from PIL import ImageDraw
42  from PIL import ImageEnhance
43  from PIL import ImageOps
44 except:
45  sys.exit("This script requires the Python Imaging Library - http://www.pythonware.com/products/pil/")
46 
47 nonalpha = re.compile('[^a-z]') # regex to test for suitability of words
48 
49 # Does X-axis wobbly copy, sandwiched between two rotates
50 def wobbly_copy(src, wob, col, scale, ang):
51  x, y = src.size
52  f = random.uniform(4*scale, 5*scale)
53  p = random.uniform(0, math.pi*2)
54  rr = ang+random.uniform(-30, 30) # vary, but not too much
55  int_d = Image.new('RGB', src.size, 0) # a black rectangle
56  rot = src.rotate(rr, Image.BILINEAR)
57  # Do a cheap bounding-box op here to try to limit work below
58  bbx = rot.getbbox()
59  if bbx == None:
60  return src
61  else:
62  l, t, r, b= bbx
63  # and only do lines with content on
64  for i in range(t, b+1):
65  # Drop a scan line in
66  xoff = int(math.sin(p+(i*f/y))*wob)
67  xoff += int(random.uniform(-wob*0.5, wob*0.5))
68  int_d.paste(rot.crop((0, i, x, i+1)), (xoff, i))
69  # try to stop blurring from building up
70  int_d = int_d.rotate(-rr, Image.BILINEAR)
71  enh = ImageEnhance.Sharpness(int_d)
72  return enh.enhance(2)
73 
74 
75 def gen_captcha(text, fontname, fontsize, file_name):
76  """Generate a captcha image"""
77  # white text on a black background
78  bgcolor = 0x0
79  fgcolor = 0xffffff
80  # create a font object
81  font = ImageFont.truetype(fontname,fontsize)
82  # determine dimensions of the text
83  dim = font.getsize(text)
84  # create a new image significantly larger that the text
85  edge = max(dim[0], dim[1]) + 2*min(dim[0], dim[1])
86  im = Image.new('RGB', (edge, edge), bgcolor)
87  d = ImageDraw.Draw(im)
88  x, y = im.size
89  # add the text to the image
90  d.text((x/2-dim[0]/2, y/2-dim[1]/2), text, font=font, fill=fgcolor)
91  k = 3
92  wob = 0.20*dim[1]/k
93  rot = 45
94  # Apply lots of small stirring operations, rather than a few large ones
95  # in order to get some uniformity of treatment, whilst
96  # maintaining randomness
97  for i in range(k):
98  im = wobbly_copy(im, wob, bgcolor, i*2+3, rot+0)
99  im = wobbly_copy(im, wob, bgcolor, i*2+1, rot+45)
100  im = wobbly_copy(im, wob, bgcolor, i*2+2, rot+90)
101  rot += 30
102 
103  # now get the bounding box of the nonzero parts of the image
104  bbox = im.getbbox()
105  bord = min(dim[0], dim[1])/4 # a bit of a border
106  im = im.crop((bbox[0]-bord, bbox[1]-bord, bbox[2]+bord, bbox[3]+bord))
107  # and turn into black on white
108  im = ImageOps.invert(im)
109 
110  # save the image, in format determined from filename
111  im.save(file_name)
112 
113 def gen_subdir(basedir, md5hash, levels):
114  """Generate a subdirectory path out of the first _levels_
115  characters of _hash_, and ensure the directories exist
116  under _basedir_."""
117  subdir = None
118  for i in range(0, levels):
119  char = md5hash[i]
120  if subdir:
121  subdir = os.path.join(subdir, char)
122  else:
123  subdir = char
124  fulldir = os.path.join(basedir, subdir)
125  if not os.path.exists(fulldir):
126  os.mkdir(fulldir)
127  return subdir
128 
129 def try_pick_word(words, blacklist, verbose, nwords, min_length, max_length):
130  if words is not None:
131  word = words[random.randint(0,len(words)-1)]
132  while nwords > 1:
133  word2 = words[random.randint(0,len(words)-1)]
134  word = word + word2
135  nwords = nwords - 1
136  else:
137  word = ''
138  max_length = max_length if max_length > 0 else 10
139  for i in range(0, random.randint(min_length, max_length)):
140  word = word + chr(97 + random.randint(0,25))
141 
142  if verbose:
143  print("word is %s" % word)
144 
145  if len(word) < min_length:
146  if verbose:
147  print("skipping word pair '%s' because it has fewer than %d characters" % (word, min_length))
148  return None
149 
150  if max_length > 0 and len(word) > max_length:
151  if verbose:
152  print("skipping word pair '%s' because it has more than %d characters" % (word, max_length))
153  return None
154 
155  if nonalpha.search(word):
156  if verbose:
157  print("skipping word pair '%s' because it contains non-alphabetic characters" % word)
158  return None
159 
160  for naughty in blacklist:
161  if naughty in word:
162  if verbose:
163  print("skipping word pair '%s' because it contains blacklisted word '%s'" % (word, naughty))
164  return None
165  return word
166 
167 def pick_word(words, blacklist, verbose, nwords, min_length, max_length):
168  for x in range(1000): # If we can't find a valid combination in 1000 tries, just give up
169  word = try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
170  if word:
171  return word
172  sys.exit("Unable to find valid word combinations")
173 
174 def read_wordlist(filename):
175  f = open(filename)
176  words = [x.strip().lower() for x in f.readlines()]
177  f.close()
178  return words
179 
180 def run_in_thread(object):
181  count = object[0];
182  words = object[1]
183  blacklist = object[2]
184  opts = object[3]
185  font = object[4]
186  fontsize = object[5]
187 
188  for i in range(count):
189  word = pick_word(words, blacklist, verbose, opts.number_words, opts.min_length, opts.max_length)
190  salt = "%08x" % random.randrange(2**32)
191  # 64 bits of hash is plenty for this purpose
192  md5hash = hashlib.md5((key+salt+word+key+salt).encode('utf-8')).hexdigest()[:16]
193  filename = "image_%s_%s.png" % (salt, md5hash)
194  if dirs:
195  subdir = gen_subdir(output, md5hash, dirs)
196  filename = os.path.join(subdir, filename)
197  if verbose:
198  print(filename)
199  gen_captcha(word, font, fontsize, os.path.join(output, filename))
200 
201 if __name__ == '__main__':
202  """This grabs random words from the dictionary 'words' (one
203  word per line) and generates a captcha image for each one,
204  with a keyed salted hash of the correct answer in the filename.
205 
206  To check a reply, hash it in the same way with the same salt and
207  secret key, then compare with the hash value given.
208  """
209  script_dir = os.path.dirname(os.path.realpath(__file__))
210  parser = OptionParser()
211  parser.add_option("--wordlist", help="A list of words (required)", metavar="WORDS.txt")
212  parser.add_option("--random", help="Use random charcters instead of a wordlist", action="store_true")
213  parser.add_option("--key", help="The passphrase set as $wgCaptchaSecret (required)", metavar="KEY")
214  parser.add_option("--output", help="The directory to put the images in - $wgCaptchaDirectory (required)", metavar="DIR")
215  parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf")
216  parser.add_option("--font-size", help="The font size (default 40)", metavar="N", type='int', default=40)
217  parser.add_option("--count", help="The maximum number of images to make (default 20)", metavar="N", type='int', default=20)
218  parser.add_option("--blacklist", help="A blacklist of words that should not be used", metavar="FILE", default=os.path.join(script_dir, "blacklist"))
219  parser.add_option("--fill", help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs", metavar="N", type='int')
220  parser.add_option("--dirs", help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels", metavar="N", type='int')
221  parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true')
222  parser.add_option("--number-words", help="Number of words from the wordlist which make a captcha challenge (default 2)", type='int', default=2)
223  parser.add_option("--min-length", help="Minimum length for a captcha challenge", type='int', default=1)
224  parser.add_option("--max-length", help="Maximum length for a captcha challenge", type='int', default=-1)
225  parser.add_option("--threads", help="Maximum number of threads to be used to generate captchas.", type='int', default=1)
226 
227  opts, args = parser.parse_args()
228 
229  if opts.wordlist:
230  wordlist = opts.wordlist
231  elif opts.random:
232  wordlist = None
233  else:
234  sys.exit("Need to specify a wordlist")
235  if opts.key:
236  key = opts.key
237  else:
238  sys.exit("Need to specify a key")
239  if opts.output:
240  output = opts.output
241  else:
242  sys.exit("Need to specify an output directory")
243  if opts.font and os.path.exists(opts.font):
244  font = opts.font
245  else:
246  sys.exit("Need to specify the location of a font")
247 
248  blacklist = read_wordlist(opts.blacklist)
249  count = opts.count
250  fill = opts.fill
251  dirs = opts.dirs
252  verbose = opts.verbose
253  fontsize = opts.font_size
254  threads = opts.threads
255 
256  if fill:
257  count = max(0, fill - len(os.listdir(output)))
258 
259  words = None
260  if wordlist:
261  words = read_wordlist(wordlist)
262  words = [x for x in words
263  if len(x) in (4,5) and x[0] != "f"
264  and x[0] != x[1] and x[-1] != x[-2]]
265 
266  if count == 0:
267  sys.exit("No need to generate CAPTCHA images.")
268 
269  if count < threads:
270  chunks = 1
271  threads = 1
272  else:
273  chunks = int(count / threads)
274 
275  p = multiprocessing.Pool(threads);
276  data = []
277  print("Generating %s CAPTCHA images separated in %s image(s) per chunk run by %s threads..." % (count, chunks, threads))
278  for i in range(0, threads):
279  data.append([chunks, words, blacklist, opts, font, fontsize])
280 
281  p.map(run_in_thread, data)
282 
captcha-old.wobbly_copy
def wobbly_copy(src, wob, col, scale, ang)
Definition: captcha-old.py:50
captcha-old.try_pick_word
def try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
Definition: captcha-old.py:129
captcha-old.run_in_thread
def run_in_thread(object)
Definition: captcha-old.py:180
Makefile.open
open
Definition: Makefile.py:18
captcha-old.read_wordlist
def read_wordlist(filename)
Definition: captcha-old.py:174
captcha-old.pick_word
def pick_word(words, blacklist, verbose, nwords, min_length, max_length)
Definition: captcha-old.py:167
captcha-old.gen_captcha
def gen_captcha(text, fontname, fontsize, file_name)
Definition: captcha-old.py:75
captcha-old.gen_subdir
def gen_subdir(basedir, md5hash, levels)
Definition: captcha-old.py:113