GET/POST request to parse content of a page and obtain the output.

API documentation


Special:ApiHelp/parse

Example 1: Parse content of a page

GET request

Response

{
    "parse": {
        "title": "Pet door",
        "pageid": 3276454,
        "revid": 852892138,
        "text": {
            "*": "<div class=\"mw-parser-output\"><div class=\"thumb tright\"><div class=\"thumbinner\" style=\"width:222px;\"><a href=\"/wiki/File:Doggy_door_exit.JPG\" class=\"image\"><img alt=\"\" src=\"//upload.wikimedia.org/wikipedia/commons/thumb/7/71/Doggy_door_exit.JPG/220px-Doggy_door_exit.JPG\" width=\"220\" height=\"165\" class=\"thumbimage\" srcset=\"//upload.wikimedia.org/wikipedia/commons/thumb/7/71/Doggy_door_exit.JPG/330px-Doggy_door_exit.JPG 1.5x, 
            ...
        }
    }
}

Sample code

Python

#!/usr/bin/python3

"""
    parse.py

    MediaWiki API Demos
    Demo of `Parse` module: Parse content of a page

    MIT License
"""

import requests

S = requests.Session()

URL = "https://en.wikipedia.org/w/api.php"

PARAMS = {
    "action": "parse",
    "page": "Pet door",
    "format": "json"
}

R = S.get(url=URL, params=PARAMS)
DATA = R.json()

print(DATA["parse"]["text"]["*"])

PHP

<?php
/*
    parse.php

    MediaWiki API Demos
    Demo of `Parse` module: Parse content of a page

    MIT License
*/

$endPoint = "https://en.wikipedia.org/w/api.php";
$params = [
    "action" => "parse",
    "page" => "Pet door",
    "format" => "json"
];

$url = $endPoint . "?" . http_build_query( $params );

$ch = curl_init( $url );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true );
$output = curl_exec( $ch );
curl_close( $ch );

$result = json_decode( $output, true );

echo( $result["parse"]["text"]["*"] );

JavaScript

/**
 * parse.js
 *
 * MediaWiki API Demos
 * Demo of `Parse` module: Parse content of a page
 *
 * MIT License
 */
 
const url = "https://en.wikipedia.org/w/api.php?" +
    new URLSearchParams({
        origin: "*",
        action: "parse",
        page: "Pet door",
        format: "json",
    });

try {
    const req = await fetch(url);
    const json = await req.json();
    console.log(json.parse.text["*"]);
} catch (e) {
    console.error(e);
}

MediaWiki JS

/**
 * parse.js
 *
 * MediaWiki API Demos
 * Demo of `Parse` module: Parse content of a page
 * MIT License
 */

const params = {
	action: 'parse',
	page: 'Pet door',
	format: 'json'
};
const api = new mw.Api();

api.get(params).done(data => {
	console.log(data.parse.text['*']);
});

Example 2: Parse a section of a page and fetch its table data

GET request

Response

Response
{
    "parse": {
        "title": "Wikipedia:Unusual articles/Places and infrastructure",
        "pageid": 38664530,
        "wikitext": {
            "*": "===Antarctica===\n<!--[[File:Grytviken church.jpg|thumb|150px|right|A little church in [[Grytviken]] in the [[Religion in Antarctica|Antarctic]].]]-->\n{| class=\"wikitable\"\n|-\n| '''[[Emilio Palma]]'''\n| An Argentine national who is the first person known to be born on the continent of Antarctica.\n|-\n| '''[[Scouting in the Antarctic]]'''\n| Always be prepared for glaciers and penguins.\n|}"
        }
    }
}

Sample code

parse_wikitable.py
#!/usr/bin/python3

"""
    parse_wikitable.py

    MediaWiki Action API Code Samples
    Demo of `Parse` module: Parse a section of a page, fetch its table data and save
    it to a CSV file

    MIT license
"""

import csv
import requests

S = requests.Session()

URL = "https://en.wikipedia.org/w/api.php"

TITLE = "Wikipedia:Unusual_articles/Places_and_infrastructure"

PARAMS = {
    'action': "parse",
    'page': TITLE,
    'prop': 'wikitext',
    'section': 5,
    'format': "json"
}


def get_table():
    """ Parse a section of a page, fetch its table data and save it to a CSV file
    """
    res = S.get(url=URL, params=PARAMS)
    data = res.json()
    wikitext = data['parse']['wikitext']['*']
    lines = wikitext.split('|-')
    entries = []

    for line in lines:
        line = line.strip()
        if line.startswith("|"):
            table = line[2:].split('||')
            entry = table[0].split("|")[0].strip("'''[[]]\n"), table[0].split("|")[1].strip("\n")
            entries.append(entry)

    file = open("places_and_infrastructure.csv", "w")
    writer = csv.writer(file)
    writer.writerows(entries)
    file.close()

if __name__ == '__main__':
    get_table()

Possible errors

Code Info
missingtitle The page you specified doesn't exist.
nosuchsection There is no section section in page.
pagecannotexist Namespace doesn't allow actual pages.
params The page parameter cannot be used together with the text and title parameters

Parameter history

  • v1.38: Introduced showstrategykeys
  • v1.32: Deprecated disabletidy
  • v1.31: Introduced disablestylededuplication
  • v1.30: Introduced revid, useskin, wrapoutputclass

See also

  • API:Expandtemplates
  • API:Revisions
  • Extension:TextExtracts
This article is issued from Mediawiki. The text is licensed under Creative Commons - Attribution - Sharealike. Additional terms may apply for the media files.