IT博客汇
  • 首页
  • 精华
  • 技术
  • 设计
  • 资讯
  • 扯淡
  • 权利声明
  • 登录 注册

    reviews.llvm.org became a read-only archive

    MaskRay发表于 2023-12-30 13:14:18
    love 0

    For approximately 10 years, reviews.llvm.org functioned as the codeview site for the LLVM project, utilizing a Phabricator instance. Thiswebsite hosted numerous invaluable code review discussions. However,following LLVM's transitionto GitHub pull requests, there arises a necessity for a read-onlyarchive of the existing Phabricator instance.

    The intent is to eliminate a SQL engine. Phabicator operates on a complexdatabase scheme. To minimize time investment, the most feasibleapproach seems to involve downloading the static HTML pages andemploying a lightweight scraping process.

    Raphaël Gomès developed phab-archiveto serve a read-only archive for Mercurial's Phabricator instance. I have modifiedthe code to suit reviews.llvm.org.

    At this juncture, the only requirement is someone with domain accessto redirect reviews.llvm.org to the archive website. Then we can obtain aHTTPS certificate.

    Data

    The file hierarchy is quite straightforward.archive/unprocessed/diffs contains raw HTML pages whiletemplates/diffs contains scraped HTML pages alongside patchfiles.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    % tree archive/unprocessed/diffs | head -n 12
    archive/unprocessed/diffs
    ├── 1
    │   ├── D1-4.html
    │   ├── D1-5.html
    │   └── D1.html
    ├── 10
    │   ├── D10-33.html
    │   └── D10.html
    ├── 100
    │   ├── D100000-335683.html
    │   ├── D100000-335688.html
    │   ├── D100000-335689.html
    % tree templates/diffs/ | head -n 20
    templates/diffs/
    ├── 1
    │   ├── D1-4.diff
    │   ├── D1-4.html
    │   ├── D1-5.diff
    │   ├── D1-5.html
    │   ├── D1.diff
    │   └── D1.html
    ├── 10
    │   ├── D10-33.diff
    │   ├── D10-33.html
    │   ├── D10.diff
    │   └── D10.html
    ├── 100
    │   ├── D100000-335683.diff
    │   ├── D100000-335683.html
    │   ├── D100000-335688.diff
    │   ├── D100000-335688.html
    │   ├── D100000-335689.diff
    │   ├── D100000-335689.html
    % cat templates/diffs/1/D1-4.diff
    Index: include/llvm/ADT/StringMap.h
    ===================================================================
    --- include/llvm/ADT/StringMap.h
    +++ include/llvm/ADT/StringMap.h
    @@ -34,7 +34,7 @@
    public:
    template <typename InitTy>
    static void Initialize(StringMapEntry<ValueTy> &T, InitTy InitVal) {
    - T.second = InitVal;
    + T.test= InitVal;
    }
    };
    1
    2
    3
    4
    % du -sh archive/unprocessed/
    270G archive/unprocessed/
    % du -sh templates/diffs
    282G templates/diffs

    Nginx

    I aim to utilize Nginx solely to serve URIs.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    /D2 => /diffs/2/D2.html
    /D2?id=&download=true => /diffs/2/D2.diff
    /D2?id=10 => /diffs/2/D2-10.html
    /D2?id=10&download=true => /diffs/2/D2-10.diff

    /D123?id=5 => /diffs/123/D123-5.html
    /D1234?id=5 => /diffs/123/D1234-5.html

    /rL$svn_rev => https://github.com/llvm/llvm-project/commit/$git_commit
    /rG$git_commit => https://github.com/llvm/llvm-project/commit/$git_commit

    We just need URL mapping and some Nginx locationdirectives.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    map_hash_max_size 400000;
    map_hash_bucket_size 128;
    map $request_uri $svn_rev {
    ~^/rL([0-9]+) $1;
    }
    map $svn_rev $git_commit {
    include /var/www/phab-archive/svn_url_rewrite.conf;
    }

    server {
    listen 80 default_server;
    listen [::]:80 default_server;

    if ($git_commit) {
    return 301 https://github.com/llvm/llvm-project/commit/$git_commit;
    }

    root /var/www/phab-archive/www;
    server_name _;

    types {
    text/html html;
    text/plain diff;
    }

    location ~ "^/D(?<diff>.{1,3})$" {
    set $ext ".html";
    if ($arg_download) { set $ext ".diff"; }
    if ($arg_id ~ ^(\d+)$) { rewrite ^ /diffs/$diff/D$diff-$arg_id$ext? last; }
    try_files /diffs/$diff/D$diff$ext =404;
    }
    location ~ ^/D(?<dir>...)(?<tail>.+) {
    set $ext ".html";
    if ($arg_download) { set $ext ".diff"; }
    if ($arg_id ~ ^(\d+)$) { rewrite ^ /diffs/$dir/D$dir$tail-$arg_id$ext? last; }
    try_files /diffs/$dir/D$dir$tail$ext =404;
    }
    }


沪ICP备19023445号-2号
友情链接