Author Archives: claude

Properly uploading files to Amazon S3

Here is a little script I wrote and I though ought to be shared. I use it to upload static files like images, css and javascript so that they can be served by Amazon S3 instead of the main application server (like Google App Engine).

It’s written in Python and does interesting things like compressing and minifying what needs to be. It takes 3 arguments and as 2 options:

Usage: s3uploader.py [-xm] src_folder destination_bucket_name prefix

1	Usage: s3uploader.py [-xm] src_folder destination_bucket_name prefix

src_folder: path to the local folder containing the static files to upload
destination_bucket_name: name of the S3 bucket to upload to (e.g. static.example.com)
prefix: a prefix to use for the destination key (kind of a folder on the destination bucket, I use it to specify a release version to defeat browser caching)
x: if set, the script will set a far future expiry for all files, otherwise the S3 default will be used (one day if I remember well)
m: if set, the script will minify css and javascript files

First you will have to install some dependencies, namely boto, jsmin and cssmin. Installation procedure will depend on your OS but on my Mac I do the following:

sudo easy_install boto
sudo easy_install jsmin
sudo easy_install cssmin

sudo easy_install boto

sudo easy_install jsmin

sudo easy_install cssmin

And here is the script itself:

#! /usr/bin/env python
import os, sys, boto, mimetypes, zipfile, gzip
from io import StringIO, BytesIO
from optparse import OptionParser
from jsmin import *
from cssmin import *

# Boto picks up configuration from the env.
os.environ['AWS_ACCESS_KEY_ID'] = 'Your AWS access key id goes here'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'Your AWS secret access key goes here'

# The list of content types to gzip, add more if needed
COMPRESSIBLE = [ 'text/plain', 'text/csv', 'application/xml',
                'application/javascript', 'text/css' ]

def main():
    parser = OptionParser(usage='usage: {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}prog [options] src_folder destination_bucket_name prefix')
    parser.add_option('-x', '--expires', action='store_true', help='set far future expiry for all files')
    parser.add_option('-m', '--minify', action='store_true', help='minify javascript files')
    (options, args) = parser.parse_args()
    if len(args) != 3:
        parser.error("incorrect number of arguments")
    src_folder = os.path.normpath(args[0])
    bucket_name = args[1]
    prefix = args[2]

    conn = boto.connect_s3()
    bucket = conn.get_bucket(bucket_name)

    namelist = []
    for root, dirs, files in os.walk(src_folder):
        if files and not '.svn' in root:
            path = os.path.relpath(root, src_folder)
            namelist += [os.path.normpath(os.path.join(path, f)) for f in files]

    print 'Uploading {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}d files to bucket {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}s' {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2} (len(namelist), bucket.name)
    for name in namelist:
        content = open(os.path.join(src_folder, name))
        key = bucket.new_key(os.path.join(prefix, name))
        type, encoding = mimetypes.guess_type(name)
        type = type or 'application/octet-stream'
        headers = { 'Content-Type': type, 'x-amz-acl': 'public-read' }
        states = [type]

        if options.expires:
            # We only use HTTP 1.1 headers because they are relative to the time of download
            # instead of being hardcoded.
            headers['Cache-Control'] = 'max-age {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}d' {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2} (3600 * 24 * 365)

        if options.minify and type == 'application/javascript':
            outs = StringIO()
            JavascriptMinify().minify(content, outs)
            content.close()
            content = outs.getvalue()
            if len(content) > 0 and content[0] == '\n':
                content = content[1:]
            content = BytesIO(content)
            states.append('minified')

        if options.minify and type == 'text/css':
            outs = cssmin(content.read())
            content.close()
            content = outs
            if len(content) > 0 and content[0] == '\n':
                content = content[1:]
            content = BytesIO(content)
            states.append('minified')

        if type in COMPRESSIBLE:
            headers['Content-Encoding'] = 'gzip'
            compressed = StringIO()
            gz = gzip.GzipFile(filename=name, fileobj=compressed, mode='w')
            gz.writelines(content)
            gz.close()
            content.close
            content = BytesIO(compressed.getvalue())
            states.append('gzipped')

        states = ', '.join(states)
        print '- {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}s => {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}s ({5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}s)' {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2} (name, key.name, states)
        key.set_contents_from_file(content, headers)
        content.close();

if __name__ == '__main__':
    main()

#! /usr/bin/env python

import os, sys, boto, mimetypes, zipfile, gzip

from io import StringIO, BytesIO

from optparse import OptionParser

from jsmin import *

from cssmin import *

# Boto picks up configuration from the env.

os.environ['AWS_ACCESS_KEY_ID'] = 'Your AWS access key id goes here'

os.environ['AWS_SECRET_ACCESS_KEY'] = 'Your AWS secret access key goes here'

# The list of content types to gzip, add more if needed

COMPRESSIBLE = [ 'text/plain', 'text/csv', 'application/xml',

'application/javascript', 'text/css' ]

def main():

parser = OptionParser(usage='usage: {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}prog [options] src_folder destination_bucket_name prefix')

parser.add_option('-x', '--expires', action='store_true', help='set far future expiry for all files')

parser.add_option('-m', '--minify', action='store_true', help='minify javascript files')

(options, args) = parser.parse_args()

if len(args) != 3:

parser.error("incorrect number of arguments")

src_folder = os.path.normpath(args[0])

bucket_name = args[1]

prefix = args[2]

conn = boto.connect_s3()

bucket = conn.get_bucket(bucket_name)

namelist = []

for root, dirs, files in os.walk(src_folder):

if files and not '.svn' in root:

path = os.path.relpath(root, src_folder)

namelist += [os.path.normpath(os.path.join(path, f)) for f in files]

print 'Uploading {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}d files to bucket {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}s' {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2} (len(namelist), bucket.name)

for name in namelist:

content = open(os.path.join(src_folder, name))

key = bucket.new_key(os.path.join(prefix, name))

type, encoding = mimetypes.guess_type(name)

type = type or 'application/octet-stream'

headers = { 'Content-Type': type, 'x-amz-acl': 'public-read' }

states = [type]

if options.expires:

# We only use HTTP 1.1 headers because they are relative to the time of download

# instead of being hardcoded.

headers['Cache-Control'] = 'max-age {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}d' {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2} (3600 * 24 * 365)

if options.minify and type == 'application/javascript':

outs = StringIO()

JavascriptMinify().minify(content, outs)

content.close()

content = outs.getvalue()

if len(content) > 0 and content[0] == '\n':

content = content[1:]

content = BytesIO(content)

states.append('minified')

if options.minify and type == 'text/css':

outs = cssmin(content.read())

content.close()

content = outs

if len(content) > 0 and content[0] == '\n':

content = content[1:]

content = BytesIO(content)

states.append('minified')

if type in COMPRESSIBLE:

headers['Content-Encoding'] = 'gzip'

compressed = StringIO()

gz = gzip.GzipFile(filename=name, fileobj=compressed, mode='w')

gz.writelines(content)

gz.close()

content.close

content = BytesIO(compressed.getvalue())

states.append('gzipped')

states = ', '.join(states)

print '- {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}s => {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}s ({5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}s)' {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2} (name, key.name, states)

key.set_contents_from_file(content, headers)

content.close();

if __name__ == '__main__':

main()

Thanks to Nico for the expiry trick :)

Spare me the talk about privacy, they’re all clueless anyway…

3 Replies

With all the talks and posts and whatnot about privacy on the Internet it’s easy for anyone to turn into a privacy control freak.

And I really was starting to freak out myself. After all, a good bunch of my own life is on the Net: Facebook, Twitter, Flickr, LinkedIn, this blog, all the Google applications and all the other services I use, or I test… But this morning I received a letter, not an e-mail, a paper letter. From Google AdWords. Sent from France. In German!

I guess they just assumed that since I was living in Switzerland I was talking German, like when ebay.com redirects me to ebay.de, but I don’t speak nor read German.

And it reminded me something I learned a long time ago, when I was working for Singularis – a now defunct start-up that was collecting users preferences about TV programs: You can collect as many data as you want, if you don’t know how to use it it’s only worth the cost of the storage.

And the more you have the harder it is.

Image Credits: Michell Zappa

SÃ©vices AprÃ¨s Vente

12 Replies

Dans un monde chaque jour un peu plus numÃ©risÃ© n’oublions pas que ce sont toujours les mÃªme vieux trucs qui fonctionnent: si vis pacem, para bellum!

!!Attention!! Ce billet est long et ennuyeux…

Ceux qui me suivent sur Twitter ou Facebook se souviendront qu’en novembre dernier (le vendredi 13 exactement) mon appartement avait Ã©tÃ© cambriolÃ© pendant que j’Ã©tais chez le dentiste (Ã§a fais beaucoup pour un vendredi 13). Parmi les objets qui m’avaient Ã©tÃ© volÃ©s se trouvait mon MacBook Pro, qui est mon seul et unique outil de travail.

J’avais donc rapidement besoin d’une nouvelle machine. AprÃ¨s avoir fait le tour des revendeurs Apple de la rÃ©gion pour dÃ©couvrir que seules des configurations de base Ã©taient en stock, je me rend, sans grand espoir, Ã la FNAC de Lausanne.

Je n’achÃ¨te jamais de matÃ©riel Ã©lectronique de ce prix Ã la FNAC – en dessous de 200,-CHF le rapport prix/rapiditÃ© de l’achat est assez favorable pour que je ne cherche pas plus loin mais au-delÃ j’ai toujours pu trouver moins cher ailleurs. Mais, ce samedi 14, je dÃ©couvre avec bonheur que la FNAC possÃ¨de en stock un MacBook Pro dont la configuration approche de trÃ¨s prÃ©s la configuration que je recherche: 15″, 3.06GHZ, 4Go de RAM et un disque dur de 500Go Ã 7200 tr/mn. Je l’achÃ¨te donc, pour le prix de 3299,-CHF (moins le rabais adhÃ©rents).

Le 29 dÃ©cembre dernier (un mois et demi plus tard), en allant me coucher, je dÃ©cide de laisser mon MacBook allumÃ© sur la table du salon afin qu’il puisse participer au rÃ©seau BOINC et dÃ©dier quelques cycles Ã la recherche extra-terrestre. Je n’ai pas d’animaux, pas d’enfants et la machine est posÃ©e sur un endroit dÃ©gagÃ© oÃ¹ la ventilation n’est pas obstruÃ©e.

Le lendemain matin, ayant pris mon petit dÃ©jeuner, je m’en vais consulter mes e-mails. Ã‰trangement, alors qu’une simple caresse suffit d’habitude, mon Mac ne veut pas se rÃ©veiller. Ã‰tonnÃ©, je vÃ©rifie que je ne l’ai pas laissÃ© sans alimentation: non, le cordon est bien lÃ , branchÃ© et alimentÃ©, il ne s’agit donc pas d’un Ã©puisement des batteries. De plus, un ronron trÃ¨s lÃ©ger m’indique que la machine semble toujours Ãªtre en marche. Je force donc un shutdown en maintenant la touche on/off enfoncÃ©e et j’entends distinctement ce petit bruit caractÃ©ristique qui signale l’arrÃªt d’un moteur Ã©lectrique quelque part dans la machine. Je l’allume de nouveau et lÃ un bruit de moteur se fait Ã©galement entendre mais Ã par cela rien, l’Ã©cran reste dÃ©sespÃ©rÃ©ment aveugle. AprÃ¨s une ou deux autres tentatives aussi infructueuses je dÃ©cide d’amener la machine au SAV de la FNAC.

Continue reading →

Experiencing viral growth

1 Reply

This is something to hear and talk about it but this is something totally different to experience it, it’s thrilling, even on modest scales.

Since my LibraryThing application for Facebook is out it has clearly had a viral growth curve. So far there are only 435 users and every week I am looking for an inflection of this tendency. I know there will be one because there is a limited number of LibraryThing users on Facebook. My goal, right now, is to attract as many of them as possible on this application.

The next step will be to attract Facebook users to LibraryThing. But I know that for this I will need help from Tim Spalding and the LibraryThing team. I have always been grateful for their work but I must admit that I have been quite disappointed recently as I was trying to contact them and they constantly ignored me.

I am also thinking about open-sourcing the application, because I think it is both a good use case for people who are developing Python/Django applications on Google AppEngine and those who are developing for the Facebook platform. I still have to choose a license but the GNU Affero General Public License seems like a good match.

Anyway, if you love books, got plenty of them and want to share your readings, do not forget to give LibraryThing a try and once your are convinced, join the Facebook application, with this application you can:

Add a tab and a box to your profile, listing your most recent books
Choose the number of books to display in your profile tab
Choose whether you want to display them with covers only or as a list which will include your ratings and reviews
If you grant the application the right to publish to your stream it will publish books you add to LibraryThing on your wall
It will also publish reviews as you write them on LibraryThing

You can also:

Browse your Facebook friend’s books
Find books on the search page
Share a book you like or comment on it (those are Facebook only features and will not appear in LibraryThing)
Add a book to your LibraryThing collection with a single click

Enjoy :)

A year in review

My Fontself is better than your font

3 Replies

For those of you who were at the Lift conference 2008 you might remember of Fontself. Franz Hoffman and Marc Escher, the two founders of the company, were there to offer everyone the opportunity to fill in a grid with their own hand writing, scan it, and use it on the Lift website.

Today, the Fontself team has grown and is celebrating their first release of a product. Together with Netlog, the european online social portal, they are now giving the opportunity to the Netlog community members to send messages, post blog entries or post comments using personalized character fonts.

Congratulation to them, they have been working long and hard for their ideas to come out and I am proud I helped them make their dream come true.

[fontself font=”_9905c72628cf93321a6ce43c146071af09cb7d2339b3b1cfc8eb764ccf6d87ff” size=”30″]And this also gives me some advantages, like being able to use a Fontself font on my own blog and give you a glimpse at what the future of web fonts might be![/fontself]
[fontself font=”_b72139c4df1037c8971033917d5bf684f05f9e33f8b990b9f3fef046823e596a” size=”20″]Among other things, you will appreciate the ability to select, copy and paste the text :P[/fontself]
For now, the feature is only available to the french version of the platform but there is no doubt that it will rapidly extend to the rest of the 35 million Netlog members throughout Europe and that the Fontself team will continue to develop their technology and enhance the web.

If you want to stay informed about Fontself and their technology you can either subscribe to their newsletter, become a friend of their Netlog page, follow them on twitter or keep following this blog…

Image Credits: Fontself

Sidewiki RSS

3 Replies

Last week Google announced Google Sidewiki, a new service that enables anyone to comment on any page.

There has been a lot of comments already about Sidewiki but the thing that instantly stroke me is the fact that there’s no easy way to keep up with what others are saying about your own pages. So I took a look at the Sidewiki API and built the Sidewiki RSS service.

This free service (hope you won’t mind the Google Ads) enables webmasters to get the URL to the recent Sidewiki entries for their pages. There’s even a bookmarklet that you can drop in your browser’s toolbar and use to get the feed of the page you are browsing.

Hope you will like it ;)

JMeter distributed testing with Amazon EC2

9 Replies

Recently I had to setup a performance testing infrastructure for one of my client. The goal was to put their web application under heavy load to prove it was able to scale properly and do some capacity planning.

I chose Apache JMeter to generate the load, created a few test plans and started to nudge the application. Of course I quickly understood that my MacBook won’t be enough have the server sweat.

To serve this application we are using Amazon EC2 instances managed with the Sclar.net service. One day I should write something about Scalr, but for now, enough is to say that a Scalr farm defines the configuration of a cluster of EC2 instances. Each instance in a farm belongs to a role (an EC2 AMI) and the farm defines how many instances of each role should be launched.

Since JMeter can be used in a master/slave mode (see details here) I decided to use Scalr to create a farm of JMeter slaves that would put the application under pressure.

The first problem I faced is that the JMeter master and its slaves must be in the same sub-network to be able to communicate, so my JMeter farm had to define two different roles, one for the master (jmeter-master) with only one instance and one for the slaves (jmeter-slave) with as many instances as necessary.

The second problem was concerning the IP addresses of the slaves, I did not want to write down the slaves’ IPs and manually enter them in the JMeter command line. By chance, with Scalr, each instance in a farm is informed of its peers’ IP addresses, so I wrote a small Python script that would get those IPs and launch the JMeter master with a given test plan.

#! /usr/bin/python
import os, sys, subprocess, datetime

JMETER_CMD = '/usr/share/jmeter/bin/jmeter'
SCRIPTS_ROOT = '/var/testing/'
# Instance IPs for a given role are filenames in the '/etc/aws/hosts' folder
SLAVES = os.listdir('/etc/aws/hosts/jmeter-slave')

def jmeter(script):
    logname = datetime.datetime.now().strftime('{5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}Y{5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}m{5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}d{5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}H{5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}M{5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}S') + '.log'
    script = os.path.join(SCRIPTS_ROOT, script)
    cmd = [ JMETER_CMD, '-n' ]
    cmd += [ '-t', script ]
    cmd += [ '-R', ','.join(SLAVES) ]
    cwd = SCRIPTS_ROOT
    subprocess.check_call(cmd, cwd=cwd, stderr=sys.stderr, stdout=sys.stdout)

if __name__ == '__main__':
    jmeter(sys.argv[1])

#! /usr/bin/python

import os, sys, subprocess, datetime

JMETER_CMD = '/usr/share/jmeter/bin/jmeter'

SCRIPTS_ROOT = '/var/testing/'

# Instance IPs for a given role are filenames in the '/etc/aws/hosts' folder

SLAVES = os.listdir('/etc/aws/hosts/jmeter-slave')

def jmeter(script):

logname = datetime.datetime.now().strftime('{5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}Y{5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}m{5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}d{5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}H{5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}M{5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}S') + '.log'

script = os.path.join(SCRIPTS_ROOT, script)

cmd = [ JMETER_CMD, '-n' ]

cmd += [ '-t', script ]

cmd += [ '-R', ','.join(SLAVES) ]

cwd = SCRIPTS_ROOT

subprocess.check_call(cmd, cwd=cwd, stderr=sys.stderr, stdout=sys.stdout)

if __name__ == '__main__':

jmeter(sys.argv[1])

This was working pretty nicely for my simpler test plans (like the one that only GETs the home page) but as soon as I tried to POST (like during the login process) this was not enough. The thing is that the POST data that JMeter is using are not stored in the test plan itself but in companion .binary files, and those files are not sent by the master to the slaves like the test plans are.

I thus had to find a way to send those files by myself before the launch of the test plans. Rsync seemed the easiest thing to do, so I wrote another Python script to synchronize the slaves.

#! /usr/bin/python
import os, sys, subprocess

SCRIPTS_ROOT = '/var/testing/'
# Instance IPs for a given role are filenames in the '/etc/aws/hosts' folder
SLAVES = os.listdir('/etc/aws/hosts/jmeter-slave')

def sync():
    for slave in SLAVES:
        dest = '{5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}s:/var/testing' {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2} slave
        cmd = ( 'rsync', '-r', '-e', 'ssh -q -i /var/testing/farm.key', SCRIPT_ROOT, dest)
        subprocess.check_call(cmd, stderr=sys.stderr, stdout=sys.stdout)

if __name__ == '__main__':
    sync()

#! /usr/bin/python

import os, sys, subprocess

SCRIPTS_ROOT = '/var/testing/'

# Instance IPs for a given role are filenames in the '/etc/aws/hosts' folder

SLAVES = os.listdir('/etc/aws/hosts/jmeter-slave')

def sync():

for slave in SLAVES:

dest = '{5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2}s:/var/testing' {5f676304cfd4ae2259631a2f5a3ea815e87ae216a7b910a3d060a7b08502a4b2} slave

cmd = ( 'rsync', '-r', '-e', 'ssh -q -i /var/testing/farm.key', SCRIPT_ROOT, dest)

subprocess.check_call(cmd, stderr=sys.stderr, stdout=sys.stdout)

if __name__ == '__main__':

sync()

The above script requires only three things:

a valid RSA private key (here /var/testing/farm.key), which you can download using the Scalr.net farm’s menu
the /var/testing folder must already exist on the slaves
and, of course, you need to initially get the files on the master. I use svn up.

Once you have prepared and tested everything, using one master and one slave, you can rebundle the instances you used and then start to spawn tens of slaves to stress your application.

If you have already done something similar or have ideas for improving my setup do not hesitate in letting me know in the comments :)

UPDATE: With the release of the Amazon Virtual Private Cloud it should be possible now to have slaves running in the cloud and a master running on your workstation, they would all be in your own sub-network. However, you will need to find another way to synchronize the POST data with the slaves.

Posting multipart form data using PHP

15 Replies

vedovini.net

Founded in 1969, building software since 1985.

Author Archives: claude

Properly uploading files to Amazon S3

Spare me the talk about privacy, they’re all clueless anyway…

SÃ©vices AprÃ¨s Vente

Experiencing viral growth

A year in review

My Fontself is better than your font

Sidewiki RSS

JMeter distributed testing with Amazon EC2

Posting multipart form data using PHP