[116798] branches/gsoc11-statistics/stats-server

cal at macports.org cal at macports.org
Fri Feb 7 11:46:32 PST 2014


Revision: 116798
          https://trac.macports.org/changeset/116798
Author:   cal at macports.org
Date:     2014-02-07 11:46:31 -0800 (Fri, 07 Feb 2014)
Log Message:
-----------
macports stats: move bin directory from level above, adjust for deployment

Added Paths:
-----------
    branches/gsoc11-statistics/stats-server/bin/
    branches/gsoc11-statistics/stats-server/bin/add_ports
    branches/gsoc11-statistics/stats-server/bin/generate_portspy
    branches/gsoc11-statistics/stats-server/bin/generate_seed
    branches/gsoc11-statistics/stats-server/bin/new_ports
    branches/gsoc11-statistics/stats-server/bin/populate.py


Property changes on: branches/gsoc11-statistics/stats-server/bin
___________________________________________________________________
Added: svn:ignore
   + bundler
erubis
rackup
rails
rake
rdoc
ri
sprockets
thor
tilt
tt
uuid


Added: branches/gsoc11-statistics/stats-server/bin/add_ports
===================================================================
--- branches/gsoc11-statistics/stats-server/bin/add_ports	                        (rev 0)
+++ branches/gsoc11-statistics/stats-server/bin/add_ports	2014-02-07 19:46:31 UTC (rev 116798)
@@ -0,0 +1,78 @@
+#!/usr/bin/env ruby
+require 'rubygems'
+require 'active_record'
+require 'fileutils'
+#require 'trac4r'
+
+ROOT = File.expand_path(File.dirname(__FILE__) + "/../") 
+TIME_FILE = "#{ROOT}/sync-timestamp"
+NEW_PORTS = "#{ROOT}/bin/new_ports"
+PORT_INDEX = "/opt/local/var/macports/sources/rsync.macports.org/release/tarballs/ports"
+RAILS_ROOT = "#{ROOT}"
+MODE = 'production'
+
+require File.expand_path(RAILS_ROOT + '/app/models/category.rb',  __FILE__)
+require File.expand_path(RAILS_ROOT + '/app/models/port.rb',  __FILE__)
+
+
+if File.exists?(TIME_FILE)
+  $mtime = File.stat(TIME_FILE).mtime.to_i
+else
+  $mtime = 0
+end
+
+FileUtils.touch(TIME_FILE)
+
+$ports = Array.new
+$hashed_data = Hash.new
+
+db_info = YAML.load_file(File.expand_path(RAILS_ROOT + '/config/database.yml',  __FILE__))
+#db_info['development']['database'] = RAILS_ROOT + "/" + db_info['development']['database']
+#puts db_info[MODE].to_yaml
+ActiveRecord::Base.establish_connection(db_info[MODE])
+#trac = Trac.new(TRAC_URL, TRAC_USER, TRAC_PASS)
+
+fp = IO.popen("#{NEW_PORTS} -m #{$mtime} #{PORT_INDEX}")
+new_ports = fp.read.split("\n")
+
+if new_ports.count > 0
+  new_ports << "" #add last blank line
+end
+
+
+new_ports.each do |line|
+  unless (line == "")
+    data = line.match(/(\S+):\s+\{?(.+)\}?$/)
+    unless data.nil? #field missing, should record this if it happens
+      $hashed_data[data[1].to_sym] = data[2]
+    end
+  else
+    category_name = $hashed_data[:categories].try(:split, " ").try(:[], 0)
+    $category = Category.find_by_name(category_name)
+    if $category.nil?
+      $category = Category.new({:name => category_name})
+      $category.save
+    end
+
+    port = Port.find_by_name($hashed_data[:name])
+    if port.nil?
+      port = Port.new
+    end
+
+    port[:name] = $hashed_data[:name]
+    port[:path] = $hashed_data[:portdir]
+    port[:version] = $hashed_data[:version]
+    port[:description] = $hashed_data[:description]
+    port[:licenses] = $hashed_data[:license]
+    port[:category_id] = $category.id
+    port[:variants] = $hashed_data[:variants]
+    port[:maintainers] = $hashed_data[:maintainers]
+    port[:platforms] = $hashed_data[:platforms]
+    port[:categories] = $hashed_data[:categories]
+
+    $ports << [$hashed_data, port]
+    port.save
+	puts "Saved #{port[:name]}, version #{port[:version]}"
+    $hashed_data = {}
+  end
+end


Property changes on: branches/gsoc11-statistics/stats-server/bin/add_ports
___________________________________________________________________
Added: svn:executable
   + *

Added: branches/gsoc11-statistics/stats-server/bin/generate_portspy
===================================================================
--- branches/gsoc11-statistics/stats-server/bin/generate_portspy	                        (rev 0)
+++ branches/gsoc11-statistics/stats-server/bin/generate_portspy	2014-02-07 19:46:31 UTC (rev 116798)
@@ -0,0 +1,65 @@
+#!/usr/bin/env ruby
+
+### Generate a valid ports.py for use in populating the database with
+### sample submissions. ports.py will be used by populate.py
+
+# This file adapted from generate_seed which is adapted from add_ports from MPWA
+
+# Note - this file must be executed from inside RAILS_ROOT 
+# otherwise it won't be able to connect to the database
+
+require 'rubygems'
+require 'active_record'
+require 'fileutils'
+
+ROOT = File.expand_path(File.dirname(__FILE__) + "/../") 
+BIN_ROOT = "#{ROOT}/bin"
+RAILS_ROOT = "#{ROOT}/stats-server"
+require File.expand_path(RAILS_ROOT + '/app/models/port.rb',  __FILE__)
+
+$ports = Array.new
+$hashed_data = Hash.new
+
+db_info = YAML.load_file(File.expand_path(RAILS_ROOT + '/config/database.yml',  __FILE__))
+ActiveRecord::Base.establish_connection(db_info['development'])
+ 
+
+class String
+  # Escape single quotes
+  def escape_single_quotes
+    self.gsub(/'/, "\\\\'")
+  end
+end
+
+def esc(str)
+  if not str.nil?
+    str.escape_single_quotes
+  else
+    str
+  end
+end
+
+path = "#{BIN_ROOT}/ports.py"
+portfile = File.new(path, "w")
+
+if not portfile
+  puts "Unable to open #{path}" 
+end
+
+portfile.syswrite('port_list = [ ')
+
+# Output an array of dictionaries. Each dictionary represents a port
+# The dicts have the keys name, version, variants.
+
+# Load all ports
+ports = Port.all
+ports.each do | port |
+  # Write the port
+  portfile.syswrite('{')
+  portfile.syswrite("\'name\': \'#{port.name}\', \'version\': \'#{port.version}\', \'variants\': \'#{port.variants}'")
+  portfile.syswrite('},')
+end
+
+portfile.syswrite(']')
+
+portfile.close
\ No newline at end of file


Property changes on: branches/gsoc11-statistics/stats-server/bin/generate_portspy
___________________________________________________________________
Added: svn:executable
   + *

Added: branches/gsoc11-statistics/stats-server/bin/generate_seed
===================================================================
--- branches/gsoc11-statistics/stats-server/bin/generate_seed	                        (rev 0)
+++ branches/gsoc11-statistics/stats-server/bin/generate_seed	2014-02-07 19:46:31 UTC (rev 116798)
@@ -0,0 +1,113 @@
+#!/usr/bin/env ruby
+
+### Generate a valid seeds.rb for use in seeding the database with 
+### valid ports and categories
+
+# Note - this file must be executed from inside RAILS_ROOT 
+# otherwise it won't be able to connect to the database
+
+require 'rubygems'
+require 'active_record'
+require 'fileutils'
+
+TIME_FILE = "/var/tmp/gsoc11-mpwa-sync"
+ROOT = File.expand_path(File.dirname(__FILE__) + "/../") 
+NEW_PORTS = "#{ROOT}/bin/new_ports"
+PORT_INDEX = "/opt/local/var/macports/sources/rsync.macports.org/release/tarballs/ports"
+RAILS_ROOT = "#{ROOT}"
+MODE = "production"
+
+require File.expand_path(RAILS_ROOT + '/app/models/category.rb',  __FILE__)
+require File.expand_path(RAILS_ROOT + '/app/models/port.rb',  __FILE__)
+
+
+if File.exists?(TIME_FILE)
+  $mtime = File.stat(TIME_FILE).mtime.to_i
+else
+  $mtime = 0
+end
+
+FileUtils.touch(TIME_FILE)
+
+# Output encoding magic comment
+puts "# coding: UTF-8"
+
+$ports = Array.new
+$hashed_data = Hash.new
+
+db_info = YAML.load_file(File.expand_path(RAILS_ROOT + '/config/database.yml',  __FILE__))
+ActiveRecord::Base.establish_connection(db_info[MODE])
+
+fp = IO.popen("#{NEW_PORTS} -m #{$mtime} #{PORT_INDEX}")
+new_ports = fp.read.split("\n")
+
+if new_ports.count > 0
+  new_ports << "" #add last blank line
+end
+
+class String
+  # Escape single quotes
+  def escape_single_quotes
+    self.gsub(/'/, "\\\\'")
+  end
+end
+
+def esc(str)
+  if not str.nil?
+    str.escape_single_quotes
+  else
+    str
+  end
+end
+
+new_ports.each do |line|
+  unless (line == "")
+    data = line.match(/(\S+):\s+\{?(.+)\}?$/)
+    unless data.nil? #field missing, should record this if it happens
+      $hashed_data[data[1].to_sym] = data[2]
+    end
+  else
+    category_name = $hashed_data[:categories].try(:split, " ").try(:[], 0)
+    $category = Category.find_by_name(category_name)
+    if $category.nil?
+      $category = Category.new({:name => category_name})
+      puts "category = Category.new({:name => \'#{category_name}\'})"
+      puts "category.save"
+      $category.save
+    end
+
+    port = Port.find_by_name($hashed_data[:name])
+    if port.nil?
+      port = Port.new
+    end
+
+    port[:name] = $hashed_data[:name]
+    port[:path] = $hashed_data[:portdir]
+    port[:version] = $hashed_data[:version]
+    port[:description] = $hashed_data[:description]
+    port[:licenses] = $hashed_data[:license]
+    port[:category_id] = $category.id
+    port[:variants] = $hashed_data[:variants]
+    port[:maintainers] = $hashed_data[:maintainers]
+    port[:platforms] = $hashed_data[:platforms]
+    port[:categories] = $hashed_data[:categories]
+
+    $ports << [$hashed_data, port]
+    
+    puts "port = Port.new"
+    puts "port[:name] = \'#{esc($hashed_data[:name])}\'"
+    puts "port[:path] = \'#{esc($hashed_data[:portdir])}\'"
+    puts "port[:version] = \'#{esc($hashed_data[:version])}\'"
+    puts "port[:description] = \'#{esc($hashed_data[:description])}\'"
+    puts "port[:licenses] = \'#{esc($hashed_data[:license])}\'"
+    puts "port[:category_id] = #{$category.id}"
+    puts "port[:variants] = \'#{esc($hashed_data[:variants])}\'"
+    puts "port[:maintainers] = \'#{esc($hashed_data[:maintainers])}\'"
+    puts "port[:platforms] = \'#{esc($hashed_data[:platforms])}\'"
+    puts "port[:categories] = \'#{esc($hashed_data[:categories])}\'"
+    puts "port.save"
+
+    port.save
+    $hashed_data = {}
+  end
+end


Property changes on: branches/gsoc11-statistics/stats-server/bin/generate_seed
___________________________________________________________________
Added: svn:executable
   + *

Added: branches/gsoc11-statistics/stats-server/bin/new_ports
===================================================================
--- branches/gsoc11-statistics/stats-server/bin/new_ports	                        (rev 0)
+++ branches/gsoc11-statistics/stats-server/bin/new_ports	2014-02-07 19:46:31 UTC (rev 116798)
@@ -0,0 +1,140 @@
+#!/bin/sh
+# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:filetype=tcl:et:sw=4:ts=4:sts=4
+# Run the Tcl interpreter \
+exec /usr/bin/tclsh "$0" "$@"
+
+catch {source \
+    [file join "/opt/local/share/macports/Tcl" macports1.0 macports_fastload.tcl]}
+package require macports
+package require Pextlib
+
+# Globals
+array set ui_options        [list]
+array set global_options    [list]
+array set global_variations [list]
+set port_options            [list]
+
+# Pass global options into mportinit
+mportinit ui_options global_options global_variations
+
+# Standard procedures
+proc print_usage args {
+    global argv0
+    puts "Usage: $argv0 \[-d\] -m <time> \<directory\>"
+    puts "-d:\tOutput debugging information"
+    puts "-m:\tOutput ports newer than the given mtime"
+}
+
+proc pindex {portdir} {
+    global target oldfd oldmtime qindex fd directory outdir \
+           ui_options port_options
+
+    # try to reuse the existing entry if it's still valid
+    if {[info exists qindex([string tolower [file tail $portdir]])]} {
+        try {
+            set mtime [file mtime [file join $directory $portdir Portfile]]
+            if {$oldmtime < $mtime} {
+                set offset $qindex([string tolower [file tail $portdir]])
+                seek $oldfd $offset
+                gets $oldfd line
+                set name [lindex $line 0]
+                set len [lindex $line 1]
+                set line [read $oldfd $len]
+                array set portinfo $line
+
+                if {[info exists ui_options(ports_debug)]} {
+                    puts "Found entry for $portdir"
+                }
+
+                foreach field [array names portinfo] {
+                    puts $fd "${field}: $portinfo($field)"
+                }
+                puts $fd ""
+
+                return
+            }
+        } catch {*} {
+            ui_warn "failed to open entry for ${portdir}"
+        }
+    }
+}
+
+if {[expr $argc > 4]} {
+    print_usage
+    exit 1
+}
+
+for {set i 0} {$i < $argc} {incr i} {
+    set arg [lindex $argv $i]
+    switch -regex -- $arg {
+        {^-.+} {
+            if {$arg == "-d"} { # Turn on debug output
+                set ui_options(ports_debug) yes
+            } elseif {$arg == "-m"} { # output ports newer than mtime
+                incr i
+                set oldmtime [lindex $argv $i]
+            } else {
+                puts stderr "Unknown option: $arg"
+                print_usage
+                exit 1
+            }
+        }
+        default {
+            set directory [file join [pwd] $arg]
+        }
+    }
+}
+
+if {![info exists directory]} {
+    set directory .
+}
+
+if {![info exists oldmtime]} {
+    set oldmtime 0
+}
+
+# cd to input directory
+if {[catch {cd $directory} result]} {
+    puts stderr "$result"
+    exit 1
+} else {
+    set directory [pwd]
+}
+
+# Set output directory to full path
+if {[info exists outdir]} {
+    if {[catch {file mkdir $outdir} result]} {
+        puts stderr "$result"
+        exit 1
+    }
+    if {[catch {cd $outdir} result]} {
+        puts stderr "$result"
+        exit 1
+    } else {
+        set outdir [pwd]
+    }
+} else {
+    set outdir $directory
+}
+
+set outpath [file join $outdir PortIndex]
+# open old index for comparison
+if {[file isfile $outpath] && [file isfile ${outpath}.quick]} {
+    if {![catch {set oldfd [open $outpath r]}] && ![catch {set quickfd [open ${outpath}.quick r]}]} {
+        if {![catch {set quicklist [read $quickfd]}]} {
+            foreach entry [split $quicklist "\n"] {
+                set qindex([lindex $entry 0]) [lindex $entry 1]
+            }
+        }
+        close $quickfd
+    }
+} else {
+    set newest 0
+}
+
+set fd stdout
+mporttraverse pindex $directory
+if {[info exists oldfd]} {
+    close $oldfd
+}
+close $fd


Property changes on: branches/gsoc11-statistics/stats-server/bin/new_ports
___________________________________________________________________
Added: svn:executable
   + *

Added: branches/gsoc11-statistics/stats-server/bin/populate.py
===================================================================
--- branches/gsoc11-statistics/stats-server/bin/populate.py	                        (rev 0)
+++ branches/gsoc11-statistics/stats-server/bin/populate.py	2014-02-07 19:46:31 UTC (rev 116798)
@@ -0,0 +1,149 @@
+#!/usr/bin/env python
+
+# populate.py
+# Simulates multiple users submitting realistic data
+# Uses curl to submit sample data and test rails submission code
+
+import random
+import string
+import uuid
+import json
+import subprocess
+import time
+from ports import port_list
+
+max_users = 250
+max_ports_per_user = 500
+
+url_testdeploy = 'http://statsdeploy.heroku.com/submissions'
+url_dev    = 'http://127.0.0.1:3000/submissions' 
+
+macports_versions = ['1.9.2', '1.9.99', '2.0']
+osx_versions = ['10.4', '10.5', '10.6']
+os_archs = ['i386', 'ppc']
+os_platforms = ['darwin']
+build_archs = ['x86_32', 'x86_64']
+gcc_versions = ['4.2.1', '4.3.6', '4.4.6', '4.5.3', '4.6.1']
+xcode_versions = ['2.5', '3.0', '3.1', '3.2', '4.0']
+
+# Generated user ids
+users = []
+
+# The probability that a new user will be added starts at prob_new_user
+prob_new_user = 95 # 95 %
+
+# Randomly choose entries for each category
+def build_os():
+	os = {}
+	os['macports_version'] = random.choice(macports_versions)
+	os['osx_version'] = random.choice(osx_versions)
+	os['os_arch'] = random.choice(os_archs)
+	os['os_platform'] = random.choice(os_platforms)
+	os['build_arch'] = random.choice(build_archs)
+	os['gcc_version'] = random.choice(gcc_versions)
+	os['xcode_version'] = random.choice(xcode_versions)
+	return os
+
+# Build up a space separated list of variants
+# It selects a random number of variants to include from a list of valid variants
+# for a port
+def build_variants(variants):
+	varlist = variants.split()
+	
+	size = random.randint(0, len(varlist))
+	randomlist = random.sample(varlist, size)
+	
+	return " ".join(randomlist)
+
+# Generate a list of ports for this user.
+def build_ports():
+	ports = []
+	
+	# Choose a random number between 0 and max_ports_per_user
+	n_ports = random.randint(0, max_ports_per_user)
+	
+	# Generate n_port ports
+	for i in range(n_ports):
+		
+		# Choose a random port from the list of all ports
+		port = random.choice(port_list)
+		
+		# Generate random version strings by appending a digit to the existing version
+		# eg: 2.2 -> 2.2_6
+		# Only append once, check if this port's version has already been modified
+		if not 'mod' in port:
+			append = ''.join(random.choice(string.digits) for i in xrange(1))
+			port['version'] = port['version'] + '_' + append
+			port['mod'] = True # Flag that this port's version has been modified
+		
+		# Build up a list of variants from all valid variants for this port
+		port['variants'] = build_variants(port['variants'])
+		
+		# Append to the list of ports to submit for this user
+		ports.append(port)
+	return ports
+
+def decay_probability():
+	global prob_new_user
+	
+	# Over time the probability that a new user will be added decreases as more users participate
+	decay_factor = 0.0001
+	n_users = len(users)
+	prob_decay = n_users * decay_factor
+	prob = prob_new_user - prob_decay
+	
+	prob_new_user = prob
+	
+	# Always keep a minimum 5% chance of growth to simulate users new to 
+	# macports users coming in and participating
+	if prob <= 5:
+		prob_new_user = 5
+
+def generate_uuid():
+	idstr = str(uuid.uuid4())
+	users.append(idstr)
+	
+	return idstr
+
+def get_uuid():
+	# Check if there are any available uuids
+	if len(users) == 0:
+		return generate_uuid()
+	
+	decay_probability()
+	
+	# Add a new user 'prob_new_user' percent of the time
+	# This simulates a new user deciding to participate
+	x = random.uniform(1,100)
+	print str(prob_new_user) + " n_users = " + str(len(users))
+	if x <= prob_new_user:
+		return generate_uuid()
+	else:
+		# Get a random uuid from the list (simulate an existing user updating their info)
+		uuid = random.choice(users)
+		return uuid
+
+def submit():
+	#url = url_testdeploy
+	url = url_dev
+	idstr = get_uuid()
+	
+	data = {}
+	
+	data['id'] = idstr
+	data['os'] = build_os()
+	data['active_ports'] = build_ports()
+	
+	json_enc = json.dumps(data)
+	args = "-d \'submission[data]=%s\'" % json_enc
+	pid = subprocess.Popen('curl ' + args + ' ' + url, shell=True)
+	pid.wait()
+
+def main():
+	random.seed()
+	for x in range(max_users):
+		submit()
+		time.sleep(0.005)
+
+if __name__ == '__main__':
+	main()


Property changes on: branches/gsoc11-statistics/stats-server/bin/populate.py
___________________________________________________________________
Added: svn:executable
   + *
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.macosforge.org/pipermail/macports-changes/attachments/20140207/72080060/attachment.html>


More information about the macports-changes mailing list