#!/usr/bin/perl # # lang_srom.per -- periodic task for the Languages Sucks/Rules chart # # This code updates the content at: # # http://www.mrob.com/lang_srom.html # http://www.mrob.com/lang_srom.txt # # It uses the SimpleGet.pl library, a stand-alone file containing # the bare minimum of code necessary to implement the functionality # of the LWP::Simple package. For more info about SimpleGet, go # here: # # http://www.mrob.com/SimpleGet.txt # # Important: If you publish web pages that contain links to Alta Vista # (as does the page generated by this script) you must agree with terms # of use specified by the AltaVista companiy. Their noncommercial use # terms are quite reasonable. Find the "terms of use" link at the # bottom of their home page. # # # # This script is based on the Operating System Sucks-Rules-O-Meter (SROM) # Here is its original copyright notice: # # Copyright 1998 Electric Lichen L.L.C. # Don Marti # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # If you got this from the website www.mrob.com, the GNU General Public # License may be retrieved from the following URL: # # http://www.mrob.com/ries/COPYING.txt # # Otherwise, or for more information, go to: # # http://www.fsf.org/copyleft/copyleft.html # # # # Revision history: # Don Marti: # revised 19 Mar 1998 -- added $rule_offset # revised 3 June 1999 -- new AltaVista result page format # # Robert Munafo: # 20000113 Removed dependency on LWP; still depends on IO::Socket. I also replaced OS names with language names because http://orwant.www.media.mit.edu/tpj/rules is broken today and I wanted to know which sucked worse: Fortran or Cobol. # 20000214 Use SimpleGet.pl library. Copy to /rhtf; convert into a .per script. Write code to generate 2-D scatter plot of results. # 20000215 Add forth, lisp, postscript, tcl, smalltalk. Recognize "1 page found" returns (gives better results for rare languages). Add trademark notices. Add color to symbols, allowing for 5 different P's. # 20000217 Add note about special cases Python, C. # 20000414 "rightmost" example must have more than 100 hits # 20000524 Fix formatting problems with MSIE 5 in MacOS # 20000717 Add note about why Prolog isn't included. # 20000801 Add similar note about REXX and Haskell. # 20000919 Add logging of $maxhits to var/lang_srom_log, to track the growth rate of the Internet # 20001005 Fancier permanent logging of hit counts for C and Perl # 20001110 Add explanation about Visual Basic. # 20010505 New Alta Vista query URL format and results format # 20010717 Yet another results format, remove 'true basic' because no hits # 20020220 Add "touch ." at end # 20021111 New results format # 20030314 Add Ruby # 20030318 Add %mandates to facilitate accuracy of Ruby # 20031205 Alta Vista's hostname changed back to www.altavista.com # 20050309 New query URL format; make it run fast, just for debugging use strict; require 5.004; require "/Users/munafo/bin/SimpleGet.pl"; ########################################################################### # # Local variables containing static text # Original and revised query URL formats #my $avurl = "http://www.altavista.com"; # 20000113 #my $avurl = "http://www.altavista.digital.com"; # 20010505 my $avurl = "http://www.altavista.com"; # 20031205 my $SEARCH_PREFIX = qq{$avurl/cgi-bin/query?pg=q&what=web&kl=XX&q=}; # 20000113 $SEARCH_PREFIX = qq{$avurl/sites/search/web?kl=XX&pg=q&q=}; # 20010505 $SEARCH_PREFIX = qq{$avurl/sites/search/web?q=}; # 20010717 $SEARCH_PREFIX = qq{$avurl/web/results?q=}; # 20050309 my $SEARCH_SUFFIX = ''; # 20000113 $SEARCH_SUFFIX = qq{&pg=q&kl=XX}; # 20010717 # The list of languages is somewhat incomplete because many languages # have names that don't lend themselves well to search engine lookups. my %aliases = ( # 'Assembler' => ['assembly language'], # only a few hits, all with "rules" as a noun 'Basic' => ['visual basic'], # 'basic' alone hits too many pages # 'ms basic', 'microsoft basic', # no hits # 'true basic' # no hits as of 20010717 # 'integer basic', 'applesoft basic', # this proves I'm old 'Perl' => ['perl'], 'Objective C' => ['objective c'], # 'prolog' => ['prolog'], # "rules" is a noun # 'Haskell' => ['haskell'], # "rules" is a noun # 'REXX' => ['rexx'], # "rules" is a noun 'fortran' => ['fortran', 'f77', 'f90'], 'COBOL' => ['cobol'], 'C and C++' => ['c'], 'Pascal' => ['pascal'], 'Python' => ['python'], 'AppleScript' => ['applescript'], 'PHP' => ['php'], 'Tcl' => ['tcl'], 'lisp' => ['lisp', 'scheme'], 'forth' => ['forth'], 'Java' => ['java'], 'JavaScript' => ['javascript'], "Maple" => ['maple'], 'PostScript' => ['postscript'], 'Smalltalk' => ['smalltalk'], 'Ruby' => ['ruby'], 'nonexist' => [ ] ); my %stops = ( "C and C++" => ["a c sucks"], "Maple" => ["us maple", "rich maple", "concentric strata"], "Python" => ["monty"], ); # These search terms are required for a match. They are useful for # languages like 'python', which share a name with something else that # is unrelated to the language, but where no obvious stop word like # 'monty' is available. my %mandates = ( "Ruby" => ["language"], ); my %synonyms = ('sucks' => ['sucks'], 'rules' => ['rules', 'rocks'] ); my %permlog = ('c' => 1, 'perl' => 2); # ...not to be confused with # (knit 1, perl 2) ack! my %pqidx = ('sucks' => 0, 'rules' => 1, 'rocks' => 2); my @pqarr; # %tag determines the letter and color each language will have on the # chart. The first character is the letter and must be uppercase; the # second character gives the color and must be lowercase. my %tag = ( # colors should be: k, r, o, g, b and then repeat # add new languages in alpha order and re-do the colors "AppleScript" => "Ak", "Basic" => "Br", "C and C++" => "Co", "COBOL" => "Cg", "forth" => "Fb", "fortran" => "Fk", "Haskell" => "Hr", "Java" => "Jo", "JavaScript" => "Jg", "lisp" => "Lb", "Maple" => "Mk", "Objective C" => "Or", "Pascal" => "Po", "Perl" => "Pg", "PHP" => "Pb", "PostScript" => "Pk", "Python" => "Pr", "Ruby" => "Ro", "Smalltalk" => "Sg", "Tcl" => "Tb", 'nonexist' => '.' ); my %lcstyle = ( "k" => "", "r" => "", "o" => "", "g" => "", "b" => "", "?" => "" ); my $lcoff = ""; # I set $myambiv to the name of a language that I think both # rules and sucks. It is used as an example in the text. my $myambiv = "JavaScript"; my $maxhits = 0; my $maxhwhat = ""; ########################################################################### # # Subroutines sub maptag { my ($t) = @_; "$t??" =~ m/^([A-Z\?])([a-z\?])/; return($lcstyle{$2} . $1 . $lcoff); } # quoteit turns special characters into %-escapes for use as fields in an # HTTP GET-style form submission. sub quoteit { my ($s) = @_; $s = lc($s); $s =~ s/ /%20/g; # 20 $s =~ s/\"/%22/g; # " 22 $s =~ s/\+/%2B/g; # + 2B return $s; } ########################################################################### # # Main program $| = 1; my $servid = `/opt/mrob/bin/get-servid`; chop $servid; print "lang_srom started on service $servid\n"; system("/opt/mrob/bin/pe"); my $fast = 1; # (shift != 0); my $greatest = 0; my %count = (); my $pop = ""; foreach my $lang (sort keys(%aliases)) { foreach my $alias (@{$aliases{$lang}}) { foreach my $quality ('sucks', 'rules') { $count{$lang}{$quality} += 0; # make sure it has a value to start with foreach my $synonym (@{$synonyms{$quality}}) { my $stop = ""; foreach my $st (@{$stops{$lang}}) { $stop .= qq{ -"$st"}; } foreach my $st (@{$mandates{$lang}}) { $stop .= qq{ +"$st"}; } my $query = "eit(qq{+"$alias $synonym"$stop}); my $url = $SEARCH_PREFIX . $query . $SEARCH_SUFFIX; my $g1; # print ">> $url\n"; $g1 = get($url); # if ($g1 =~ /([\d\,]+)\D+pages?\s+found/i) { # if ( # ($g1 =~ /we found about ([\d\,]+) result/i) # 20010505 # || ($g1 =~ /we found ([\d\,]+) result/i) # 20010717 # ) { if ( ($g1 =~ m/AltaVista found ([\d\,]+) results/i) # 20021111 ) { my $raw = $1; $raw =~ s/\D//g; $raw += 0; # convert string to number $count{$lang}{$quality} += $raw; print STDERR "$alias $synonym (alias for: $lang $quality): $raw\n"; print "$alias $synonym (alias for: $lang $quality): $raw\n"; if ($permlog{$alias}) { my $idx = (($permlog{$alias}-1) * 3) + $pqidx{$synonym}; $pqarr[$idx] = $raw; } } sleep(10) unless ($fast); # keep our impact low } if ($count{$lang}{$quality} > $greatest) { $greatest = $count{$lang}{$quality}; $pop = $lang; } } } print STDERR "\n"; } die "bad AltaVista, bad, bad " if $greatest == 0; # plot them on a crude ASCII chart my $vscale = 3.0; # see chartrows formula below my $hlabel = "sucks . . . . . . . debatable . . . . . . . rules"; my $hscale = length($hlabel); my $chartrows = int($vscale * log($greatest + $greatest)); my @chart; for (my $i=0; $i <= $chartrows; $i++) { $chart[$i] = " " x $hscale; } my $rulezy = 0; my $ruleslang = ""; foreach my $lang (sort(keys(%aliases))) { my $rules = $count{$lang}{'rules'}; my $sucks = $count{$lang}{'sucks'}; my $thits = $rules + $sucks; if ($thits > 0) { # compute horizontal and vertical positions my $hpos = int(($hscale - 1) * $rules / ($rules + $sucks)); if (($hpos > $rulezy) && ($thits > 100)) { $rulezy = $hpos; $ruleslang = $lang; } my $vpos = $chartrows - int($vscale * log($rules + $sucks)); my $row = $chart[$vpos]; # Avoid plotting one symbol over another while (substr($row, $hpos * 2, 2) ne " ") { $hpos++; } substr($row, $hpos * 2, 2) = $tag{$lang}; $chart[$vpos] = $row; if ($thits > $maxhits) { $maxhits = $thits; $maxhwhat = $lang; } } } open (OUT,"> lang_srom.rhtf"); print OUT "Programming Languages: The Internet's Current Opinion tcc[Programming Languages: ` The Internet's Current Opinion] gr[(r)|®] gr[Java |JavaTM] "; # This is the old table format, before I got the scatter chart working. if (0) { print OUT "\n"; print OUT "*Language* | *Sucks* | *Rules* or *Rocks* \n"; foreach my $lang (sort(keys(%aliases))) { my $sucks = $count{$lang}{'sucks'}; my $rules = $count{$lang}{'rules'}; my $suckage = int (100* $sucks/$greatest); my $suck_offset = 100 - $suckage; my $ruleage = int (100* $rules/$greatest); my $rule_offset = 100 - $ruleage; print OUT "-\n $lang | $sucks | $rules \n"; } print OUT "
\n"; } print OUT ("
  total |$hlabel|
   hits +" . ("-" x $hscale) . "+
");
my $i = 0; my $hits = 0;
for ($i = 0; $i <= $chartrows; $i++) {
  # compute numeric value for label. The +0.5 makes the computed
  # value be centered on the values that are plotted in its row.
  $hits = exp((0.5 + $chartrows - $i) / $vscale);
  if ($hits > 10) {
    $hits = int($hits);
  } elsif ($hits > 1) {
    $hits = (int($hits * 10)) / 10;
  } else {
    $hits = "1.00";
  }
  # make the label blank except every 4th line (the commas get turned to
  # spaces later)
  $hits = ",,,,,,," . $hits;
  $hits = ",,,,,,," if (($i % 4) > 0);
  $hits =~ m/(.{7})$/; $hits = $1;
  $hits .= ",|" . $chart[$i] . "|\n";
  # place color labels around any tags in this line
  $hits =~ s/([A-Z][a-z])/&maptag($1)/ge;
  $hits =~ s/  / /g;
  $hits =~ s/,/ /g;
  print OUT "$hits";
}
$hits = (($i % 4) < 2) ? "       " : "   1.00";
print OUT ("$hits +" . ("-" x $hscale) . "+

");

# Show the legend.
$i = 0;
foreach my $lang (sort {lc($a) cmp lc($b)} (keys(%aliases))) {
  my $t = $tag{$lang};
  if ($t ne ".") {
    print OUT "\n     " if ($i % 3 == 0);
    $t = substr("  $t = $lang                  ", 1, 20);
    $t =~ s/ ([A-Z][a-z]) =/" " . &maptag($1) . " ="/ge;
    print OUT $t;
    $i++;
  }
}

my $date = scalar(gmtime(time()));

print OUT "

 Updated $date GMT. (s$servid) 
"; print OUT "The numbers come from [AltaVista(r)|$avurl] queries. For example, a search for |+\"$myambiv sucks\"| will reveal that there are " . $count{$myambiv}{'sucks'} . " web pages containing the text '$myambiv sucks'. That number, along with the results of queries for '$myambiv rocks' and '$myambiv rules', is used to plot the position of the symbol '|" . &maptag($tag{$myambiv}) . "|' on the chart. (As you can see, this paragraph rates $myambiv ambiguously -- and that reflects rather accurately how I feel about $myambiv :-) The chart is updated daily. "; print OUT "Languages that appear *higher* on the chart (like $pop) appear on a greater total number of web pages. Languages that appear closer to the *right* side of the chart (like $ruleslang) have a greater 'rules/sucks' ratio -- that is, they appear more often with 'rules' than with 'sucks'. However, you should keep in mind that languages near the bottom of the chart are not mentioned on many web pages, so their horizontal position isn't as accurate an indicator of their true karma. You might notice that there aren't many *popular* languages that *suck* -- that is, there are not many languages near the upper-left corner of the chart. This confirms the theory that really sucky languages never become widespread enough to be mentioned on lots of web pages. On the other hand, there are plenty of *unpopular* languages that *rule* (these appear in the bottom-right portion of the chart). In most cases, these are specialized languages -- they do some limited job really well, but haven't become popular because they are limited to specific types of tasks. In rare cases, these are new languages that will someday rule the world but currently only rule their early adopters. Regarding the data collection: As already mentioned, these data points come from search engine results. Some languages, such as Python, require the use of *stop words* (such as 'monty') to prevent unrealistic results from being plotted. The languages C and C++ both appear on the chart as '|" . &maptag($tag{"C and C++"}) . "|' because their names make it impossible to search for one without finding the other. Other languages are missing simply because I have overlooked them or do not consider them important. Suggestions are welcome, but will not always be accepted. In particular, I consider particular brands of a language to be insignificant except in cases (like Maple and AppleScript) where the brand *is* the language. I also don't care much about specialized languages (PHP is an example, but it rules so much I couldn't restrain myself :-) NOTE: The statistics for *Basic* actually consist almost entirely of references to Visual Basic. Since there are also a couple other Basics, I decided to lump them all together and call it just \"Basic\". NOTE: Prolog, REXX and Haskell are not included because the word *rules* has a special meaning in those languages (as a plural noun), making the 'rules' counts meaningless. "; print OUT "This page is inspired by the awesome [Operating System Sucks-Rules-O-Meter|http://srom.zgp.org/] which you are encouraged to visit if you like this sort of thing. I also have a certain fondness for now-defunct [Tool of Objective Truth|http://www.zdnet.co.uk/athome/misc/toot/] formerly featured at ZDNet UK. "; print OUT "If you are running Linux (Linux rules) and know Perl (Perl rules even more"; print OUT " than $ruleslang" if ($ruleslang ne "Perl"); print OUT ") you might be interested in the [source``code|lang_srom.txt] for the program that generates this page. Note in particular how it recomputes parts of the explanatory text to match the chart. The output it generates is in RHTF (RILYBOT hypertext format), part of my automated web authoring system. ---- (-The Programming Languages Internet Opinion Chart is made possible by-) ` (-[pb[AltaVista(r) logo|http://a12.g.akamai.net/7/12/282/09/av.com/static/i/2/logo_hmp.gif]|$avurl]-) AltaVista(r) is a registered trademark of AltaVista(r) Company, and the AltaVista(r) Logo is a trademark of AltaVista(r) Company. This site is not endorsed by, sponsored by, or affiliated with AltaVista(r) Company. AppleScript(r) is a registered trademark of Apple Computer, Inc. Java and JavaScript(r) are registered trademarks of Sun Microsystems. Linux(r) is a registered trademark of Linus Torvalds. Maple(r) is a registered trademark of Waterloo Maple, Inc. PostScript(r) is a registered trademark of Adobe Systems, Inc. Visual Basic(r) is a registered trademark of Microsoft Corporation. ZDNet is a trademark of Ziff-Davis Publishing Company. "; close OUT; # update public copy of myself system("cp lang_srom.per lang_srom.txt"); # log max hits (this is to measure size of the Internet) chdir; open(OUT, ">> var/lang_srom_log"); print OUT "$date (m$servid) [$maxhits $maxhwhat]"; print OUT (" " . ($pqarr[0]+0) . " " . ($pqarr[1]+0) . " " . ($pqarr[2]+0) . " " . ($pqarr[3]+0) . " " . ($pqarr[4]+0) . " " . ($pqarr[5]+0) . "\n"); close OUT; # This causes my web-authoring software to notice that something has changed, # which makes it compile the RHTF into HTML and upload to the web server. system("touch ."); exit 0;