+
+# input parameters can be collected from a variety of sources (presently, CGI
+# and PATH_INFO), so we define an %input_params hash that collects them all
+# together during validation: this allows subsequent uses (e.g. href()) to be
+# agnostic of the parameter origin
+
+our %input_params = ();
+
+# input parameters are stored with the long parameter name as key. This will
+# also be used in the href subroutine to convert parameters to their CGI
+# equivalent, and since the href() usage is the most frequent one, we store
+# the name -> CGI key mapping here, instead of the reverse.
+#
+# XXX: Warning: If you touch this, check the search form for updating,
+# too.
+
+our @cgi_param_mapping = (
+ project => "p",
+ action => "a",
+ file_name => "f",
+ file_parent => "fp",
+ hash => "h",
+ hash_parent => "hp",
+ hash_base => "hb",
+ hash_parent_base => "hpb",
+ page => "pg",
+ order => "o",
+ searchtext => "s",
+ searchtype => "st",
+ snapshot_format => "sf",
+ extra_options => "opt",
+ search_use_regexp => "sr",
+);
+our %cgi_param_mapping = @cgi_param_mapping;
+
+# we will also need to know the possible actions, for validation
+our %actions = (
+ "blame" => \&git_blame,
+ "blobdiff" => \&git_blobdiff,
+ "blobdiff_plain" => \&git_blobdiff_plain,
+ "blob" => \&git_blob,
+ "blob_plain" => \&git_blob_plain,
+ "commitdiff" => \&git_commitdiff,
+ "commitdiff_plain" => \&git_commitdiff_plain,
+ "commit" => \&git_commit,
+ "forks" => \&git_forks,
+ "heads" => \&git_heads,
+ "history" => \&git_history,
+ "log" => \&git_log,
+ "patch" => \&git_patch,
+ "patches" => \&git_patches,
+ "rss" => \&git_rss,
+ "atom" => \&git_atom,
+ "search" => \&git_search,
+ "search_help" => \&git_search_help,
+ "shortlog" => \&git_shortlog,
+ "summary" => \&git_summary,
+ "tag" => \&git_tag,
+ "tags" => \&git_tags,
+ "tree" => \&git_tree,
+ "snapshot" => \&git_snapshot,
+ "object" => \&git_object,
+ # those below don't need $project
+ "opml" => \&git_opml,
+ "project_list" => \&git_project_list,
+ "project_index" => \&git_project_index,
+);
+
+# finally, we have the hash of allowed extra_options for the commands that
+# allow them
+our %allowed_options = (
+ "--no-merges" => [ qw(rss atom log shortlog history) ],
+);
+
+# fill %input_params with the CGI parameters. All values except for 'opt'
+# should be single values, but opt can be an array. We should probably
+# build an array of parameters that can be multi-valued, but since for the time
+# being it's only this one, we just single it out
+while (my ($name, $symbol) = each %cgi_param_mapping) {
+ if ($symbol eq 'opt') {
+ $input_params{$name} = [ $cgi->param($symbol) ];
+ } else {
+ $input_params{$name} = $cgi->param($symbol);
+ }
+}
+
+# now read PATH_INFO and update the parameter list for missing parameters
+sub evaluate_path_info {
+ return if defined $input_params{'project'};
+ return if !$path_info;
+ $path_info =~ s,^/+,,;
+ return if !$path_info;
+
+ # find which part of PATH_INFO is project
+ my $project = $path_info;
+ $project =~ s,/+$,,;
+ while ($project && !check_head_link("$projectroot/$project")) {
+ $project =~ s,/*[^/]*$,,;
+ }
+ return unless $project;
+ $input_params{'project'} = $project;
+
+ # do not change any parameters if an action is given using the query string
+ return if $input_params{'action'};
+ $path_info =~ s,^\Q$project\E/*,,;
+
+ # next, check if we have an action
+ my $action = $path_info;
+ $action =~ s,/.*$,,;
+ if (exists $actions{$action}) {
+ $path_info =~ s,^$action/*,,;
+ $input_params{'action'} = $action;
+ }
+
+ # list of actions that want hash_base instead of hash, but can have no
+ # pathname (f) parameter
+ my @wants_base = (
+ 'tree',
+ 'history',
+ );
+
+ # we want to catch
+ # [$hash_parent_base[:$file_parent]..]$hash_parent[:$file_name]
+ my ($parentrefname, $parentpathname, $refname, $pathname) =
+ ($path_info =~ /^(?:(.+?)(?::(.+))?\.\.)?(.+?)(?::(.+))?$/);
+
+ # first, analyze the 'current' part
+ if (defined $pathname) {
+ # we got "branch:filename" or "branch:dir/"
+ # we could use git_get_type(branch:pathname), but:
+ # - it needs $git_dir
+ # - it does a git() call
+ # - the convention of terminating directories with a slash
+ # makes it superfluous
+ # - embedding the action in the PATH_INFO would make it even
+ # more superfluous
+ $pathname =~ s,^/+,,;
+ if (!$pathname || substr($pathname, -1) eq "/") {
+ $input_params{'action'} ||= "tree";
+ $pathname =~ s,/$,,;
+ } else {
+ # the default action depends on whether we had parent info
+ # or not
+ if ($parentrefname) {
+ $input_params{'action'} ||= "blobdiff_plain";
+ } else {
+ $input_params{'action'} ||= "blob_plain";
+ }
+ }
+ $input_params{'hash_base'} ||= $refname;
+ $input_params{'file_name'} ||= $pathname;
+ } elsif (defined $refname) {
+ # we got "branch". In this case we have to choose if we have to
+ # set hash or hash_base.
+ #
+ # Most of the actions without a pathname only want hash to be
+ # set, except for the ones specified in @wants_base that want
+ # hash_base instead. It should also be noted that hand-crafted
+ # links having 'history' as an action and no pathname or hash
+ # set will fail, but that happens regardless of PATH_INFO.
+ $input_params{'action'} ||= "shortlog";
+ if (grep { $_ eq $input_params{'action'} } @wants_base) {
+ $input_params{'hash_base'} ||= $refname;
+ } else {
+ $input_params{'hash'} ||= $refname;
+ }
+ }
+
+ # next, handle the 'parent' part, if present
+ if (defined $parentrefname) {
+ # a missing pathspec defaults to the 'current' filename, allowing e.g.
+ # someproject/blobdiff/oldrev..newrev:/filename
+ if ($parentpathname) {
+ $parentpathname =~ s,^/+,,;
+ $parentpathname =~ s,/$,,;
+ $input_params{'file_parent'} ||= $parentpathname;
+ } else {
+ $input_params{'file_parent'} ||= $input_params{'file_name'};
+ }
+ # we assume that hash_parent_base is wanted if a path was specified,
+ # or if the action wants hash_base instead of hash
+ if (defined $input_params{'file_parent'} ||
+ grep { $_ eq $input_params{'action'} } @wants_base) {
+ $input_params{'hash_parent_base'} ||= $parentrefname;
+ } else {
+ $input_params{'hash_parent'} ||= $parentrefname;
+ }
+ }
+
+ # for the snapshot action, we allow URLs in the form
+ # $project/snapshot/$hash.ext
+ # where .ext determines the snapshot and gets removed from the
+ # passed $refname to provide the $hash.
+ #
+ # To be able to tell that $refname includes the format extension, we
+ # require the following two conditions to be satisfied:
+ # - the hash input parameter MUST have been set from the $refname part
+ # of the URL (i.e. they must be equal)
+ # - the snapshot format MUST NOT have been defined already (e.g. from
+ # CGI parameter sf)
+ # It's also useless to try any matching unless $refname has a dot,
+ # so we check for that too
+ if (defined $input_params{'action'} &&
+ $input_params{'action'} eq 'snapshot' &&
+ defined $refname && index($refname, '.') != -1 &&
+ $refname eq $input_params{'hash'} &&
+ !defined $input_params{'snapshot_format'}) {
+ # We loop over the known snapshot formats, checking for
+ # extensions. Allowed extensions are both the defined suffix
+ # (which includes the initial dot already) and the snapshot
+ # format key itself, with a prepended dot
+ while (my ($fmt, $opt) = each %known_snapshot_formats) {
+ my $hash = $refname;
+ unless ($hash =~ s/(\Q$opt->{'suffix'}\E|\Q.$fmt\E)$//) {
+ next;
+ }
+ my $sfx = $1;
+ # a valid suffix was found, so set the snapshot format
+ # and reset the hash parameter
+ $input_params{'snapshot_format'} = $fmt;
+ $input_params{'hash'} = $hash;
+ # we also set the format suffix to the one requested
+ # in the URL: this way a request for e.g. .tgz returns
+ # a .tgz instead of a .tar.gz
+ $known_snapshot_formats{$fmt}{'suffix'} = $sfx;
+ last;
+ }
+ }
+}
+evaluate_path_info();
+
+our $action = $input_params{'action'};